update model initialization design, supports custom data/num_classes (#44)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2022-11-15 20:06:29 +05:30
parent 1f3aad86c1
commit 832ea56eb4
8 changed files with 67 additions and 44 deletions
--- a/ultralytics/yolo/utils/configs/default.yaml
+++ b/ultralytics/yolo/utils/configs/default.yaml
@ -3,8 +3,7 @@


 # Train settings -------------------------------------------------------------------------------------------------------
-model: null  # i.e. yolov5s.pt
-cfg: null  # i.e. yolov5s.yaml
+model: null  # i.e. yolov5s.pt, yolo.yaml
 data: null  # i.e. coco128.yaml
 epochs: 300
 batch_size: 16
@ -70,6 +69,7 @@ mosaic: 1.0  # image mosaic (probability)
 mixup: 0.0  # image mixup (probability)
 copy_paste: 0.0  # segment copy-paste (probability)
 label_smoothing: 0.0
+# anchors: 3

 # Hydra configs --------------------------------------------------------------------------------------------------------
 hydra:
--- a/ultralytics/yolo/utils/downloads.py
+++ b/ultralytics/yolo/utils/downloads.py
@ -140,8 +140,3 @@ def download(url, dir=Path.cwd(), unzip=True, delete=True, curl=False, threads=1
    else:
        for u in [url] if isinstance(url, (str, Path)) else url:
            download_one(u, dir)
-
-
-def get_model(model: str):
-    # check for local weights
-    pass
--- a/ultralytics/yolo/utils/modeling/tasks.py
+++ b/ultralytics/yolo/utils/modeling/tasks.py
@ -66,7 +66,7 @@ class BaseModel(nn.Module):
        return self

    def load(self, weights):
-        # Force all tasks implement this function
+        # Force all tasks to implement this function
        raise NotImplementedError("This function needs to be implemented by derived classes!")


@ -169,10 +169,10 @@ class DetectionModel(BaseModel):
            mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)

    def load(self, weights):
-        ckpt = torch.load(weights, map_location='cpu')  # load checkpoint to CPU to avoid CUDA memory leak
-        csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
+        csd = weights['model'].float().state_dict()  # checkpoint state_dict as FP32
        csd = intersect_state_dicts(csd, self.state_dict())  # intersect
        self.load_state_dict(csd, strict=False)  # load
+        LOGGER.info(f'Transferred {len(csd)}/{len(self.model.state_dict())} items from {weights}')


 class SegmentationModel(DetectionModel):
@ -203,11 +203,33 @@ class ClassificationModel(BaseModel):
        self.nc = nc

    def _from_yaml(self, cfg):
-        # Create a YOLOv5 classification model from a *.yaml file
+        # TODO: Create a YOLOv5 classification model from a *.yaml file
        self.model = None

    def load(self, weights):
-        ckpt = torch.load(weights, map_location='cpu')  # load checkpoint to CPU to avoid CUDA memory leak
-        csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
+        model = weights["model"] if isinstance(weights, dict) else weights  # torchvision models are not dicts
+        csd = model.float().state_dict()
        csd = intersect_state_dicts(csd, self.state_dict())  # intersect
        self.load_state_dict(csd, strict=False)  # load
+
+    @staticmethod
+    def reshape_outputs(model, nc):
+        # Update a TorchVision classification model to class count 'n' if required
+        from ultralytics.yolo.utils.modeling.modules import Classify
+        name, m = list((model.model if hasattr(model, 'model') else model).named_children())[-1]  # last module
+        if isinstance(m, Classify):  # YOLO Classify() head
+            if m.linear.out_features != nc:
+                m.linear = nn.Linear(m.linear.in_features, nc)
+        elif isinstance(m, nn.Linear):  # ResNet, EfficientNet
+            if m.out_features != nc:
+                setattr(model, name, nn.Linear(m.in_features, nc))
+        elif isinstance(m, nn.Sequential):
+            types = [type(x) for x in m]
+            if nn.Linear in types:
+                i = types.index(nn.Linear)  # nn.Linear index
+                if m[i].out_features != nc:
+                    m[i] = nn.Linear(m[i].in_features, nc)
+            elif nn.Conv2d in types:
+                i = types.index(nn.Conv2d)  # nn.Conv2d index
+                if m[i].out_channels != nc:
+                    m[i] = nn.Conv2d(m[i].in_channels, nc, m[i].kernel_size, m[i].stride, bias=m[i].bias)