ultralytics 8.0.90 actions and docs improvements (#2326)

Co-authored-by: calmisential <xinyu_std@163.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: triple Mu <gpu@163.com> Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com> Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com> Co-authored-by: Laughing-q <1185102784@qq.com> Co-authored-by: ran xiao <ben.xiao@me.com> Co-authored-by: rxiao <ran.xiao@silverpond.com.au>
2023-04-29 20:16:56 +02:00
parent 243fc4b1fe
commit 44c7c3514d
39 changed files with 783 additions and 143 deletions
--- a/ultralytics/yolo/data/annotator.py
+++ b/ultralytics/yolo/data/annotator.py
@ -6,6 +6,18 @@ from ultralytics.yolo.utils.torch_utils import select_device


 def auto_annotate(data, det_model='yolov8x.pt', sam_model='sam_b.pt', device='', output_dir=None):
+    """
+    Automatically annotates images using a YOLO object detection model and a SAM segmentation model.
+
+    Args:
+        data (str): Path to a folder containing images to be annotated.
+        det_model (str, optional): Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'.
+        sam_model (str, optional): Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'.
+        device (str, optional): Device to run the models on. Defaults to an empty string (CPU or GPU, if available).
+        output_dir (str, None, optional): Directory to save the annotated results.
+            Defaults to a 'labels' folder in the same directory as 'data'.
+
+    """
    device = select_device(device)
    det_model = YOLO(det_model)
    sam_model = build_sam(sam_model)
@ -33,7 +45,7 @@ def auto_annotate(data, det_model='yolov8x.pt', sam_model='sam_b.pt', device='',
        result.update(masks=masks.squeeze(1))
        segments = result.masks.xyn  # noqa

-        with open(str(Path(output_dir) / Path(result.path).stem) + '.txt', 'w') as f:
+        with open(f'{str(Path(output_dir) / Path(result.path).stem)}.txt', 'w') as f:
            for i in range(len(segments)):
                s = segments[i]
                if len(s) == 0:
--- a/ultralytics/yolo/data/build.py
+++ b/ultralytics/yolo/data/build.py
@ -141,11 +141,8 @@ def load_inference_source(source=None, imgsz=640, vid_stride=1):

    Args:
        source (str, Path, Tensor, PIL.Image, np.ndarray): The input source for inference.
-        transforms (callable, optional): Custom transformations to be applied to the input source.
        imgsz (int, optional): The size of the image for inference. Default is 640.
        vid_stride (int, optional): The frame interval for video sources. Default is 1.
-        stride (int, optional): The model stride. Default is 32.
-        auto (bool, optional): Automatically apply pre-processing. Default is True.

    Returns:
        dataset (Dataset): A dataset object for the specified input source.
--- a/ultralytics/yolo/data/dataloaders/stream_loaders.py
+++ b/ultralytics/yolo/data/dataloaders/stream_loaders.py
@ -72,9 +72,6 @@ class LoadStreams:
        # Check for common shapes
        self.bs = self.__len__()

-        if not self.rect:
-            LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.')
-
    def update(self, i, cap, stream):
        """Read stream `i` frames in daemon thread."""
        n, f = 0, self.frames[i]  # frame number, frame array
--- a/ultralytics/yolo/engine/predictor.py
+++ b/ultralytics/yolo/engine/predictor.py
@ -116,6 +116,9 @@ class BasePredictor:
        """
        if not isinstance(im, torch.Tensor):
            auto = all(x.shape == im[0].shape for x in im) and self.model.pt
+            if not auto:
+                LOGGER.warning(
+                    'WARNING ⚠️ Source shapes differ. For optimal performance supply similarly-shaped sources.')
            im = np.stack([LetterBox(self.imgsz, auto=auto, stride=self.model.stride)(image=x) for x in im])
            im = im[..., ::-1].transpose((0, 3, 1, 2))  # BGR to RGB, BHWC to BCHW, (n, 3, h, w)
            im = np.ascontiguousarray(im)  # contiguous
@ -217,7 +220,8 @@ class BasePredictor:
            self.run_callbacks('on_predict_batch_start')
            self.batch = batch
            path, im0s, vid_cap, s = batch
-            visualize = increment_path(self.save_dir / Path(path).stem, mkdir=True) if self.args.visualize else False
+            visualize = increment_path(self.save_dir / Path(path[0]).stem,
+                                       mkdir=True) if self.args.visualize and (not self.source_type.tensor) else False

            # Preprocess
            with self.dt[0]:
@ -298,7 +302,7 @@ class BasePredictor:
            cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
            cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
        cv2.imshow(str(p), im0)
-        cv2.waitKey(500 if self.batch[4].startswith('image') else 1)  # 1 millisecond
+        cv2.waitKey(500 if self.batch[3].startswith('image') else 1)  # 1 millisecond

    def save_preds(self, vid_cap, idx, save_path):
        """Save video predictions as mp4 at specified path."""
--- a/ultralytics/yolo/utils/metrics.py
+++ b/ultralytics/yolo/utils/metrics.py
@ -205,7 +205,7 @@ class FocalLoss(nn.Module):
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
-        else:  # 'none'
+        else:  # 'None'
            return loss


--- a/ultralytics/yolo/utils/ops.py
+++ b/ultralytics/yolo/utils/ops.py
@ -148,7 +148,7 @@ def non_max_suppression(
    Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.

    Arguments:
-        prediction (torch.Tensor): A tensor of shape (batch_size, num_boxes, num_classes + 4 + num_masks)
+        prediction (torch.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)
            containing the predicted boxes, classes, and masks. The tensor should be in the format
            output by a model, such as YOLO.
        conf_thres (float): The confidence threshold below which boxes will be filtered out.
--- a/ultralytics/yolo/utils/plotting.py
+++ b/ultralytics/yolo/utils/plotting.py
@ -469,3 +469,39 @@ def output_to_target(output, max_det=300):
        targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1))
    targets = torch.cat(targets, 0).numpy()
    return targets[:, 0], targets[:, 1], targets[:, 2:]
+
+
+def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')):
+    """
+    Visualize feature maps of a given model module during inference.
+
+    Args:
+        x (torch.Tensor): Features to be visualized.
+        module_type (str): Module type.
+        stage (int): Module stage within the model.
+        n (int, optional): Maximum number of feature maps to plot. Defaults to 32.
+        save_dir (Path, optional): Directory to save results. Defaults to Path('runs/detect/exp').
+
+    Returns:
+        None: This function does not return any value; it saves the visualization to the specified directory.
+    """
+    for m in ['Detect', 'Pose', 'Segment']:
+        if m in module_type:
+            return
+    batch, channels, height, width = x.shape  # batch, channels, height, width
+    if height > 1 and width > 1:
+        f = save_dir / f"stage{stage}_{module_type.split('.')[-1]}_features.png"  # filename
+
+        blocks = torch.chunk(x[0].cpu(), channels, dim=0)  # select batch index 0, block by channels
+        n = min(n, channels)  # number of plots
+        fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True)  # 8 rows x n/8 cols
+        ax = ax.ravel()
+        plt.subplots_adjust(wspace=0.05, hspace=0.05)
+        for i in range(n):
+            ax[i].imshow(blocks[i].squeeze())  # cmap='gray'
+            ax[i].axis('off')
+
+        LOGGER.info(f'Saving {f}... ({n}/{channels})')
+        plt.savefig(f, dpi=300, bbox_inches='tight')
+        plt.close()
+        np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy())  # npy save
--- a/ultralytics/yolo/v8/detect/train.py
+++ b/ultralytics/yolo/v8/detect/train.py
@ -27,7 +27,7 @@ class DetectionTrainer(BaseTrainer):
        Args:
            img_path (str): Path to the folder containing images.
            mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
-            batch_size (int, optional): Size of batches, this is for `rect`. Defaults to None.
+            batch (int, optional): Size of batches, this is for `rect`. Defaults to None.
        """
        gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
        return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=mode == 'val', stride=gs)
--- a/ultralytics/yolo/v8/detect/val.py
+++ b/ultralytics/yolo/v8/detect/val.py
@ -177,7 +177,7 @@ class DetectionValidator(BaseValidator):
        Args:
            img_path (str): Path to the folder containing images.
            mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
-            batch_size (int, optional): Size of batches, this is for `rect`. Defaults to None.
+            batch (int, optional): Size of batches, this is for `rect`. Defaults to None.
        """
        gs = max(int(de_parallel(self.model).stride if self.model else 0), 32)
        return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, stride=gs)