Fix some cuda training issues of segmentation (#46)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2022-11-17 06:44:02 -06:00
parent db1031a1a9
commit 47f1cb3ef4
5 changed files with 38 additions and 21 deletions
--- a/ultralytics/yolo/v8/classify/val.py
+++ b/ultralytics/yolo/v8/classify/val.py
@ -6,10 +6,11 @@ from ultralytics.yolo.engine.validator import BaseValidator
 class ClassificationValidator(BaseValidator):

    def init_metrics(self, model):
-        self.correct = torch.tensor([])
+        self.correct = torch.tensor([], device=next(model.parameters()).device)

    def preprocess(self, batch):
-        batch["img"] = batch["img"].to(self.device)
+        batch["img"] = batch["img"].to(self.device, non_blocking=True)
+        batch["img"] = batch["img"].half() if self.args.half else batch["img"].float()
        batch["cls"] = batch["cls"].to(self.device)
        return batch

--- a/ultralytics/yolo/v8/segment/train.py
+++ b/ultralytics/yolo/v8/segment/train.py
@ -23,7 +23,7 @@ class SegmentationTrainer(BaseTrainer):
    def get_dataloader(self, dataset_path, batch_size, rank=0):
        # TODO: manage splits differently
        # calculate stride - check if model is initialized
-        gs = max(int(self.model.stride.max() if self.model else 0), 32)
+        gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
        loader = build_dataloader(
            img_path=dataset_path,
            img_size=self.args.img_size,
@ -220,7 +220,7 @@ class SegmentationTrainer(BaseTrainer):
                mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device))
                for bi in b.unique():
                    j = b == bi  # matching index
-                    if True:
+                    if self.args.overlap_mask:
                        mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0)
                    else:
                        mask_gti = masks[tidxs[i]][j]
--- a/ultralytics/yolo/v8/segment/val.py
+++ b/ultralytics/yolo/v8/segment/val.py
@ -30,11 +30,13 @@ class SegmentationValidator(BaseValidator):

    def preprocess(self, batch):
        batch["img"] = batch["img"].to(self.device, non_blocking=True)
-        batch["img"] = (batch["img"].half() if self.args.half else batch["img"].float()) / 225
-        batch["bboxes"] = batch["bboxes"].to(self.device)
+        batch["img"] = (batch["img"].half() if self.args.half else batch["img"].float()) / 255
        batch["masks"] = batch["masks"].to(self.device).float()
        self.nb, _, self.height, self.width = batch["img"].shape  # batch size, channels, height, width
        self.targets = torch.cat((batch["batch_idx"].view(-1, 1), batch["cls"].view(-1, 1), batch["bboxes"]), 1)
+        self.targets = self.targets.to(self.device)
+        height, width = batch["img"].shape[2:]
+        self.targets[:, 2:] *= torch.tensor((width, height, width, height), device=self.device)  # to pixels
        self.lb = [self.targets[self.targets[:, 0] == i, 1:]
                   for i in range(self.nb)] if self.args.save_hybrid else []  # for autolabelling

@ -75,7 +77,7 @@ class SegmentationValidator(BaseValidator):
                                    agnostic=self.args.single_cls,
                                    max_det=self.args.max_det,
                                    nm=self.nm)
-        return (p, preds[0], preds[2])
+        return (p, preds[1], preds[2])

    def update_metrics(self, preds, batch):
        # Metrics
@ -83,7 +85,7 @@ class SegmentationValidator(BaseValidator):
        for si, (pred, proto) in enumerate(zip(preds[0], preds[1])):
            labels = self.targets[self.targets[:, 0] == si, 1:]
            nl, npr = labels.shape[0], pred.shape[0]  # number of labels, predictions
-            shape = Path(batch["im_file"][si])
+            shape = batch["shape"][si]
            # path = batch["shape"][si][0]
            correct_masks = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device)  # init
            correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device)  # init
@ -106,22 +108,29 @@ class SegmentationValidator(BaseValidator):
            if self.args.single_cls:
                pred[:, 5] = 0
            predn = pred.clone()
-            ops.scale_boxes(batch["img"][si].shape[1:], predn[:, :4], shape, batch["shape"][si][1])  # native-space pred
+            ops.scale_boxes(batch["img"][si].shape[1:], predn[:, :4], shape)  # native-space pred

            # Evaluate
            if nl:
                tbox = ops.xywh2xyxy(labels[:, 1:5])  # target boxes
-                ops.scale_boxes(batch["img"][si].shape[1:], tbox, shape, batch["shapes"][si][1])  # native-space labels
+                ops.scale_boxes(batch["img"][si].shape[1:], tbox, shape)  # native-space labels
                labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
                correct_bboxes = self._process_batch(predn, labelsn, self.iouv)
-                correct_masks = self._process_batch(predn, labelsn, self.iouv, pred_masks, gt_masks, masks=True)
+                # TODO: maybe remove these `self.` arguments as they already are member variable
+                correct_masks = self._process_batch(predn,
+                                                    labelsn,
+                                                    self.iouv,
+                                                    pred_masks,
+                                                    gt_masks,
+                                                    overlap=self.args.overlap_mask,
+                                                    masks=True)
                if self.args.plots:
                    self.confusion_matrix.process_batch(predn, labelsn)
            self.stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:,
                                                                                             0]))  # (conf, pcls, tcls)

            pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
-            if self.plots and self.batch_i < 3:
+            if self.args.plots and self.batch_i < 3:
                plot_masks.append(pred_masks[:15].cpu())  # filter top 15 to plot

            # TODO: Save/log