ultralytics 8.0.89 SAM predict and auto-annotate (#2298)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Yonghye Kwon <developer.0hye@gmail.com> Co-authored-by: Paula Derrenger <107626595+pderrenger@users.noreply.github.com> Co-authored-by: Dhruv Nair <dhruv.nair@gmail.com> Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com> Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com> Co-authored-by: Snyk bot <snyk-bot@snyk.io> Co-authored-by: Laughing-q <1185102784@qq.com>
2023-04-28 00:36:50 +02:00
parent 3e118f6170
commit 243fc4b1fe
44 changed files with 2915 additions and 440 deletions
--- a/ultralytics/yolo/data/dataloaders/stream_loaders.py
+++ b/ultralytics/yolo/data/dataloaders/stream_loaders.py
@ -15,7 +15,6 @@ import requests
 import torch
 from PIL import Image

-from ultralytics.yolo.data.augment import LetterBox
 from ultralytics.yolo.data.utils import IMG_FORMATS, VID_FORMATS
 from ultralytics.yolo.utils import LOGGER, ROOT, is_colab, is_kaggle, ops
 from ultralytics.yolo.utils.checks import check_requirements
@ -31,12 +30,11 @@ class SourceTypes:

 class LoadStreams:
    # YOLOv8 streamloader, i.e. `yolo predict source='rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP streams`
-    def __init__(self, sources='file.streams', imgsz=640, stride=32, auto=True, transforms=None, vid_stride=1):
+    def __init__(self, sources='file.streams', imgsz=640, vid_stride=1):
        """Initialize instance variables and check for consistent input stream shapes."""
        torch.backends.cudnn.benchmark = True  # faster for fixed-size inference
        self.mode = 'stream'
        self.imgsz = imgsz
-        self.stride = stride
        self.vid_stride = vid_stride  # video frame-rate stride
        sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources]
        n = len(sources)
@ -72,10 +70,6 @@ class LoadStreams:
        LOGGER.info('')  # newline

        # Check for common shapes
-        s = np.stack([LetterBox(imgsz, auto, stride=stride)(image=x).shape for x in self.imgs])
-        self.rect = np.unique(s, axis=0).shape[0] == 1  # rect inference if all shapes equal
-        self.auto = auto and self.rect
-        self.transforms = transforms  # optional
        self.bs = self.__len__()

        if not self.rect:
@ -110,14 +104,7 @@ class LoadStreams:
            raise StopIteration

        im0 = self.imgs.copy()
-        if self.transforms:
-            im = np.stack([self.transforms(x) for x in im0])  # transforms
-        else:
-            im = np.stack([LetterBox(self.imgsz, self.auto, stride=self.stride)(image=x) for x in im0])
-            im = im[..., ::-1].transpose((0, 3, 1, 2))  # BGR to RGB, BHWC to BCHW
-            im = np.ascontiguousarray(im)  # contiguous
-
-        return self.sources, im, im0, None, ''
+        return self.sources, im0, None, ''

    def __len__(self):
        """Return the length of the sources object."""
@ -126,7 +113,7 @@ class LoadStreams:

 class LoadScreenshots:
    # YOLOv8 screenshot dataloader, i.e. `yolo predict source=screen`
-    def __init__(self, source, imgsz=640, stride=32, auto=True, transforms=None):
+    def __init__(self, source, imgsz=640):
        """source = [screen_number left top width height] (pixels)."""
        check_requirements('mss')
        import mss  # noqa
@ -140,9 +127,6 @@ class LoadScreenshots:
        elif len(params) == 5:
            self.screen, left, top, width, height = (int(x) for x in params)
        self.imgsz = imgsz
-        self.stride = stride
-        self.transforms = transforms
-        self.auto = auto
        self.mode = 'stream'
        self.frame = 0
        self.sct = mss.mss()
@ -165,19 +149,13 @@ class LoadScreenshots:
        im0 = np.array(self.sct.grab(self.monitor))[:, :, :3]  # [:, :, :3] BGRA to BGR
        s = f'screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: '

-        if self.transforms:
-            im = self.transforms(im0)  # transforms
-        else:
-            im = LetterBox(self.imgsz, self.auto, stride=self.stride)(image=im0)
-            im = im.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
-            im = np.ascontiguousarray(im)  # contiguous
        self.frame += 1
-        return str(self.screen), im, im0, None, s  # screen, img, original img, im0s, s
+        return str(self.screen), im0, None, s  # screen, img, original img, im0s, s


 class LoadImages:
    # YOLOv8 image/video dataloader, i.e. `yolo predict source=image.jpg/vid.mp4`
-    def __init__(self, path, imgsz=640, stride=32, auto=True, transforms=None, vid_stride=1):
+    def __init__(self, path, imgsz=640, vid_stride=1):
        """Initialize the Dataloader and raise FileNotFoundError if file not found."""
        if isinstance(path, str) and Path(path).suffix == '.txt':  # *.txt file with img/vid/dir on each line
            path = Path(path).read_text().rsplit()
@ -198,13 +176,10 @@ class LoadImages:
        ni, nv = len(images), len(videos)

        self.imgsz = imgsz
-        self.stride = stride
        self.files = images + videos
        self.nf = ni + nv  # number of files
        self.video_flag = [False] * ni + [True] * nv
        self.mode = 'image'
-        self.auto = auto
-        self.transforms = transforms  # optional
        self.vid_stride = vid_stride  # video frame-rate stride
        self.bs = 1
        if any(videos):
@ -254,14 +229,7 @@ class LoadImages:
                raise FileNotFoundError(f'Image Not Found {path}')
            s = f'image {self.count}/{self.nf} {path}: '

-        if self.transforms:
-            im = self.transforms(im0)  # transforms
-        else:
-            im = LetterBox(self.imgsz, self.auto, stride=self.stride)(image=im0)
-            im = im.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
-            im = np.ascontiguousarray(im)  # contiguous
-
-        return path, im, im0, self.cap, s
+        return [path], [im0], self.cap, s

    def _new_video(self, path):
        """Create a new video capture object."""
@ -290,16 +258,13 @@ class LoadImages:

 class LoadPilAndNumpy:

-    def __init__(self, im0, imgsz=640, stride=32, auto=True, transforms=None):
+    def __init__(self, im0, imgsz=640):
        """Initialize PIL and Numpy Dataloader."""
        if not isinstance(im0, list):
            im0 = [im0]
        self.paths = [getattr(im, 'filename', f'image{i}.jpg') for i, im in enumerate(im0)]
        self.im0 = [self._single_check(im) for im in im0]
        self.imgsz = imgsz
-        self.stride = stride
-        self.auto = auto
-        self.transforms = transforms
        self.mode = 'image'
        # Generate fake paths
        self.bs = len(self.im0)
@ -315,16 +280,6 @@ class LoadPilAndNumpy:
            im = np.ascontiguousarray(im)  # contiguous
        return im

-    def _single_preprocess(self, im, auto):
-        """Preprocesses a single image for inference."""
-        if self.transforms:
-            im = self.transforms(im)  # transforms
-        else:
-            im = LetterBox(self.imgsz, auto=auto, stride=self.stride)(image=im)
-            im = im.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
-            im = np.ascontiguousarray(im)  # contiguous
-        return im
-
    def __len__(self):
        """Returns the length of the 'im0' attribute."""
        return len(self.im0)
@ -333,11 +288,8 @@ class LoadPilAndNumpy:
        """Returns batch paths, images, processed images, None, ''."""
        if self.count == 1:  # loop only once as it's batch inference
            raise StopIteration
-        auto = all(x.shape == self.im0[0].shape for x in self.im0) and self.auto
-        im = [self._single_preprocess(im, auto) for im in self.im0]
-        im = np.stack(im, 0) if len(im) > 1 else im[0][None]
        self.count += 1
-        return self.paths, im, self.im0, None, ''
+        return self.paths, self.im0, None, ''

    def __iter__(self):
        """Enables iteration for class LoadPilAndNumpy."""
@ -362,7 +314,7 @@ class LoadTensor:
        if self.count == 1:
            raise StopIteration
        self.count += 1
-        return None, self.im0, self.im0, None, ''  # self.paths, im, self.im0, None, ''
+        return None, self.im0, None, ''  # self.paths, im, self.im0, None, ''

    def __len__(self):
        """Returns the batch size."""