From 8ebe94d1e928687feaa1fee6d5668987df5e43be Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sat, 15 Jul 2023 01:38:32 +0200 Subject: [PATCH] `ultralytics 8.0.135` remove deprecated `v5loader` (#3744) --- .../yolo/data/dataloaders/v5augmentations.md | 89 -- .../yolo/data/dataloaders/v5loader.md | 94 -- docs/reference/yolo/data/utils.md | 5 + docs/reference/yolo/utils/__init__.md | 5 + docs/reference/yolo/utils/torch_utils.md | 5 + examples/tutorial.ipynb | 2 +- mkdocs.yml | 2 - tests/test_engine.py | 1 - ultralytics/__init__.py | 2 +- ultralytics/datasets/xView.yaml | 2 +- ultralytics/yolo/cfg/__init__.py | 2 +- ultralytics/yolo/cfg/default.yaml | 3 - .../yolo/data/dataloaders/v5augmentations.py | 407 ------ ultralytics/yolo/data/dataloaders/v5loader.py | 1109 ----------------- ultralytics/yolo/data/utils.py | 33 + ultralytics/yolo/engine/trainer.py | 2 +- ultralytics/yolo/v8/detect/train.py | 28 +- ultralytics/yolo/v8/detect/val.py | 26 +- 18 files changed, 60 insertions(+), 1757 deletions(-) delete mode 100644 docs/reference/yolo/data/dataloaders/v5augmentations.md delete mode 100644 docs/reference/yolo/data/dataloaders/v5loader.md delete mode 100644 ultralytics/yolo/data/dataloaders/v5augmentations.py delete mode 100644 ultralytics/yolo/data/dataloaders/v5loader.py diff --git a/docs/reference/yolo/data/dataloaders/v5augmentations.md b/docs/reference/yolo/data/dataloaders/v5augmentations.md deleted file mode 100644 index 63df369..0000000 --- a/docs/reference/yolo/data/dataloaders/v5augmentations.md +++ /dev/null @@ -1,89 +0,0 @@ ---- -description: Enhance image data with Albumentations CenterCrop, normalize, augment_hsv, replicate, random_perspective, cutout, & box_candidates. -keywords: YOLO, object detection, data loaders, V5 augmentations, CenterCrop, normalize, random_perspective ---- - -## Albumentations ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.Albumentations -

- -## LetterBox ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.LetterBox -

- -## CenterCrop ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.CenterCrop -

- -## ToTensor ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.ToTensor -

- -## normalize ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.normalize -

- -## denormalize ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.denormalize -

- -## augment_hsv ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.augment_hsv -

- -## hist_equalize ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.hist_equalize -

- -## replicate ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.replicate -

- -## letterbox ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.letterbox -

- -## random_perspective ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.random_perspective -

- -## copy_paste ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.copy_paste -

- -## cutout ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.cutout -

- -## mixup ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.mixup -

- -## box_candidates ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.box_candidates -

- -## classify_albumentations ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.classify_albumentations -

- -## classify_transforms ---- -### ::: ultralytics.yolo.data.dataloaders.v5augmentations.classify_transforms -

diff --git a/docs/reference/yolo/data/dataloaders/v5loader.md b/docs/reference/yolo/data/dataloaders/v5loader.md deleted file mode 100644 index 5598695..0000000 --- a/docs/reference/yolo/data/dataloaders/v5loader.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -description: Efficiently load images and labels to models using Ultralytics YOLO's InfiniteDataLoader, LoadScreenshots, and LoadStreams. -keywords: YOLO, data loader, image classification, object detection, Ultralytics ---- - -## InfiniteDataLoader ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.InfiniteDataLoader -

- -## _RepeatSampler ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader._RepeatSampler -

- -## LoadScreenshots ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.LoadScreenshots -

- -## LoadImages ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.LoadImages -

- -## LoadStreams ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.LoadStreams -

- -## LoadImagesAndLabels ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.LoadImagesAndLabels -

- -## ClassificationDataset ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.ClassificationDataset -

- -## get_hash ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.get_hash -

- -## exif_size ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.exif_size -

- -## exif_transpose ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.exif_transpose -

- -## seed_worker ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.seed_worker -

- -## create_dataloader ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.create_dataloader -

- -## img2label_paths ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.img2label_paths -

- -## flatten_recursive ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.flatten_recursive -

- -## extract_boxes ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.extract_boxes -

- -## autosplit ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.autosplit -

- -## verify_image_label ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.verify_image_label -

- -## create_classification_dataloader ---- -### ::: ultralytics.yolo.data.dataloaders.v5loader.create_classification_dataloader -

diff --git a/docs/reference/yolo/data/utils.md b/docs/reference/yolo/data/utils.md index f0f2e2f..248321b 100644 --- a/docs/reference/yolo/data/utils.md +++ b/docs/reference/yolo/data/utils.md @@ -67,3 +67,8 @@ keywords: YOLOv4, Object Detection, Computer Vision, Deep Learning, Convolutiona --- ### ::: ultralytics.yolo.data.utils.zip_directory

+ +## autosplit +--- +### ::: ultralytics.yolo.data.utils.autosplit +

diff --git a/docs/reference/yolo/utils/__init__.md b/docs/reference/yolo/utils/__init__.md index 77b6584..4fa7604 100644 --- a/docs/reference/yolo/utils/__init__.md +++ b/docs/reference/yolo/utils/__init__.md @@ -18,6 +18,11 @@ keywords: Ultralytics, YOLO, utils, SimpleClass, IterableSimpleNamespace, EmojiF ### ::: ultralytics.yolo.utils.EmojiFilter

+## ThreadingLocked +--- +### ::: ultralytics.yolo.utils.ThreadingLocked +

+ ## TryExcept --- ### ::: ultralytics.yolo.utils.TryExcept diff --git a/docs/reference/yolo/utils/torch_utils.md b/docs/reference/yolo/utils/torch_utils.md index 6d24cac..727a2e1 100644 --- a/docs/reference/yolo/utils/torch_utils.md +++ b/docs/reference/yolo/utils/torch_utils.md @@ -23,6 +23,11 @@ keywords: Ultralytics YOLO, Torch, Utils, Pytorch, Object Detection ### ::: ultralytics.yolo.utils.torch_utils.smart_inference_mode

+## get_cpu_info +--- +### ::: ultralytics.yolo.utils.torch_utils.get_cpu_info +

+ ## select_device --- ### ::: ultralytics.yolo.utils.torch_utils.select_device diff --git a/examples/tutorial.ipynb b/examples/tutorial.ipynb index 9bb7150..9f746e2 100644 --- a/examples/tutorial.ipynb +++ b/examples/tutorial.ipynb @@ -300,7 +300,7 @@ "name": "stdout", "text": [ "Ultralytics YOLOv8.0.71 🚀 Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)\n", - "\u001b[34m\u001b[1myolo/engine/trainer: \u001b[0mtask=detect, mode=train, model=yolov8n.pt, data=coco128.yaml, epochs=3, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=False, optimizer=SGD, verbose=True, seed=0, deterministic=True, single_cls=False, image_weights=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, line_width=3, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=False, opset=None, workspace=4, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, label_smoothing=0.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0, copy_paste=0.0, cfg=None, v5loader=False, tracker=botsort.yaml, save_dir=runs/detect/train\n", + "\u001b[34m\u001b[1myolo/engine/trainer: \u001b[0mtask=detect, mode=train, model=yolov8n.pt, data=coco128.yaml, epochs=3, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=False, optimizer=SGD, verbose=True, seed=0, deterministic=True, single_cls=False, image_weights=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, line_width=3, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=False, opset=None, workspace=4, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, label_smoothing=0.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0, copy_paste=0.0, cfg=None, tracker=botsort.yaml, save_dir=runs/detect/train\n", "\n", " from n params module arguments \n", " 0 -1 1 464 ultralytics.nn.modules.Conv [3, 16, 3, 2] \n", diff --git a/mkdocs.yml b/mkdocs.yml index 219a059..19181ed 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -305,8 +305,6 @@ nav: - converter: reference/yolo/data/converter.md - dataloaders: - stream_loaders: reference/yolo/data/dataloaders/stream_loaders.md - - v5augmentations: reference/yolo/data/dataloaders/v5augmentations.md - - v5loader: reference/yolo/data/dataloaders/v5loader.md - dataset: reference/yolo/data/dataset.md - dataset_wrappers: reference/yolo/data/dataset_wrappers.md - utils: reference/yolo/data/utils.md diff --git a/tests/test_engine.py b/tests/test_engine.py index b011044..deb546c 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -65,7 +65,6 @@ def test_detect(): def test_segment(): overrides = {'data': 'coco8-seg.yaml', 'model': CFG_SEG, 'imgsz': 32, 'epochs': 1, 'save': False} CFG.data = 'coco8-seg.yaml' - CFG.v5loader = False # YOLO(CFG_SEG).train(**overrides) # works # trainer diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index a4d8652..d4ffad6 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = '8.0.134' +__version__ = '8.0.135' from ultralytics.hub import start from ultralytics.vit.rtdetr import RTDETR diff --git a/ultralytics/datasets/xView.yaml b/ultralytics/datasets/xView.yaml index 6049f6f..f58c4e5 100644 --- a/ultralytics/datasets/xView.yaml +++ b/ultralytics/datasets/xView.yaml @@ -87,7 +87,7 @@ download: | from PIL import Image from tqdm import tqdm - from ultralytics.yolo.data.dataloaders.v5loader import autosplit + from ultralytics.yolo.data.utils import autosplit from ultralytics.yolo.utils.ops import xyxy2xywhn diff --git a/ultralytics/yolo/cfg/__init__.py b/ultralytics/yolo/cfg/__init__.py index 71a9022..41d86aa 100644 --- a/ultralytics/yolo/cfg/__init__.py +++ b/ultralytics/yolo/cfg/__init__.py @@ -75,7 +75,7 @@ CFG_INT_KEYS = ('epochs', 'patience', 'batch', 'workers', 'seed', 'close_mosaic' CFG_BOOL_KEYS = ('save', 'exist_ok', 'verbose', 'deterministic', 'single_cls', 'rect', 'cos_lr', 'overlap_mask', 'val', 'save_json', 'save_hybrid', 'half', 'dnn', 'plots', 'show', 'save_txt', 'save_conf', 'save_crop', 'show_labels', 'show_conf', 'visualize', 'augment', 'agnostic_nms', 'retina_masks', 'boxes', 'keras', - 'optimize', 'int8', 'dynamic', 'simplify', 'nms', 'v5loader', 'profile') + 'optimize', 'int8', 'dynamic', 'simplify', 'nms', 'profile') def cfg2dict(cfg): diff --git a/ultralytics/yolo/cfg/default.yaml b/ultralytics/yolo/cfg/default.yaml index 25e4001..5babd25 100644 --- a/ultralytics/yolo/cfg/default.yaml +++ b/ultralytics/yolo/cfg/default.yaml @@ -110,8 +110,5 @@ copy_paste: 0.0 # (float) segment copy-paste (probability) # Custom config.yaml --------------------------------------------------------------------------------------------------- cfg: # (str, optional) for overriding defaults.yaml -# Debug, do not modify ------------------------------------------------------------------------------------------------- -v5loader: False # (bool) use legacy YOLOv5 dataloader (deprecated) - # Tracker settings ------------------------------------------------------------------------------------------------------ tracker: botsort.yaml # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml] diff --git a/ultralytics/yolo/data/dataloaders/v5augmentations.py b/ultralytics/yolo/data/dataloaders/v5augmentations.py deleted file mode 100644 index 8e0b3e2..0000000 --- a/ultralytics/yolo/data/dataloaders/v5augmentations.py +++ /dev/null @@ -1,407 +0,0 @@ -# Ultralytics YOLO 🚀, AGPL-3.0 license -""" -Image augmentation functions -""" - -import math -import random - -import cv2 -import numpy as np -import torch -import torchvision.transforms as T -import torchvision.transforms.functional as TF - -from ultralytics.yolo.utils import LOGGER, colorstr -from ultralytics.yolo.utils.checks import check_version -from ultralytics.yolo.utils.metrics import bbox_ioa -from ultralytics.yolo.utils.ops import resample_segments, segment2box, xywhn2xyxy - -IMAGENET_MEAN = 0.485, 0.456, 0.406 # RGB mean -IMAGENET_STD = 0.229, 0.224, 0.225 # RGB standard deviation - - -class Albumentations: - # YOLOv5 Albumentations class (optional, only used if package is installed) - def __init__(self, size=640): - """Instantiate object with image augmentations for YOLOv5.""" - self.transform = None - prefix = colorstr('albumentations: ') - try: - import albumentations as A - check_version(A.__version__, '1.0.3', hard=True) # version requirement - - T = [ - A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0), - A.Blur(p=0.01), - A.MedianBlur(p=0.01), - A.ToGray(p=0.01), - A.CLAHE(p=0.01), - A.RandomBrightnessContrast(p=0.0), - A.RandomGamma(p=0.0), - A.ImageCompression(quality_lower=75, p=0.0)] # transforms - self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'])) - - LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p)) - except ImportError: # package not installed, skip - pass - except Exception as e: - LOGGER.info(f'{prefix}{e}') - - def __call__(self, im, labels, p=1.0): - """Transforms input image and labels with probability 'p'.""" - if self.transform and random.random() < p: - new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed - im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])]) - return im, labels - - -def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False): - """Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = (x - mean) / std.""" - return TF.normalize(x, mean, std, inplace=inplace) - - -def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD): - """Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = x * std + mean.""" - for i in range(3): - x[:, i] = x[:, i] * std[i] + mean[i] - return x - - -def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5): - """HSV color-space augmentation.""" - if hgain or sgain or vgain: - r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains - hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV)) - dtype = im.dtype # uint8 - - x = np.arange(0, 256, dtype=r.dtype) - lut_hue = ((x * r[0]) % 180).astype(dtype) - lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) - lut_val = np.clip(x * r[2], 0, 255).astype(dtype) - - im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) - cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed - - -def hist_equalize(im, clahe=True, bgr=False): - """Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255.""" - yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV) - if clahe: - c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) - yuv[:, :, 0] = c.apply(yuv[:, :, 0]) - else: - yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram - return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB - - -def replicate(im, labels): - """Replicate labels.""" - h, w = im.shape[:2] - boxes = labels[:, 1:].astype(int) - x1, y1, x2, y2 = boxes.T - s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels) - for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices - x1b, y1b, x2b, y2b = boxes[i] - bh, bw = y2b - y1b, x2b - x1b - yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y - x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh] - im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # im4[ymin:ymax, xmin:xmax] - labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0) - - return im, labels - - -def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): - """Resize and pad image while meeting stride-multiple constraints.""" - shape = im.shape[:2] # current shape [height, width] - if isinstance(new_shape, int): - new_shape = (new_shape, new_shape) - - # Scale ratio (new / old) - r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) - if not scaleup: # only scale down, do not scale up (for better val mAP) - r = min(r, 1.0) - - # Compute padding - ratio = r, r # width, height ratios - new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) - dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding - if auto: # minimum rectangle - dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding - elif scaleFill: # stretch - dw, dh = 0.0, 0.0 - new_unpad = (new_shape[1], new_shape[0]) - ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios - - dw /= 2 # divide padding into 2 sides - dh /= 2 - - if shape[::-1] != new_unpad: # resize - im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) - top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) - left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) - im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border - return im, ratio, (dw, dh) - - -def random_perspective(im, - targets=(), - segments=(), - degrees=10, - translate=.1, - scale=.1, - shear=10, - perspective=0.0, - border=(0, 0)): - # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10)) - # targets = [cls, xyxy] - - height = im.shape[0] + border[0] * 2 # shape(h,w,c) - width = im.shape[1] + border[1] * 2 - - # Center - C = np.eye(3) - C[0, 2] = -im.shape[1] / 2 # x translation (pixels) - C[1, 2] = -im.shape[0] / 2 # y translation (pixels) - - # Perspective - P = np.eye(3) - P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) - P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) - - # Rotation and Scale - R = np.eye(3) - a = random.uniform(-degrees, degrees) - # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations - s = random.uniform(1 - scale, 1 + scale) - # s = 2 ** random.uniform(-scale, scale) - R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) - - # Shear - S = np.eye(3) - S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) - S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) - - # Translation - T = np.eye(3) - T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels) - T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels) - - # Combined rotation matrix - M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT - if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed - if perspective: - im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114)) - else: # affine - im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) - - # Visualize - # import matplotlib.pyplot as plt - # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() - # ax[0].imshow(im[:, :, ::-1]) # base - # ax[1].imshow(im2[:, :, ::-1]) # warped - - # Transform label coordinates - n = len(targets) - if n: - use_segments = any(x.any() for x in segments) - new = np.zeros((n, 4)) - if use_segments: # warp segments - segments = resample_segments(segments) # upsample - for i, segment in enumerate(segments): - xy = np.ones((len(segment), 3)) - xy[:, :2] = segment - xy = xy @ M.T # transform - xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine - - # Clip - new[i] = segment2box(xy, width, height) - - else: # warp boxes - xy = np.ones((n * 4, 3)) - xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 - xy = xy @ M.T # transform - xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine - - # Create new boxes - x = xy[:, [0, 2, 4, 6]] - y = xy[:, [1, 3, 5, 7]] - new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T - - # Clip - new[:, [0, 2]] = new[:, [0, 2]].clip(0, width) - new[:, [1, 3]] = new[:, [1, 3]].clip(0, height) - - # Filter candidates - i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10) - targets = targets[i] - targets[:, 1:5] = new[i] - - return im, targets - - -def copy_paste(im, labels, segments, p=0.5): - """Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy).""" - n = len(segments) - if p and n: - h, w, c = im.shape # height, width, channels - im_new = np.zeros(im.shape, np.uint8) - - # Calculate ioa first then select indexes randomly - boxes = np.stack([w - labels[:, 3], labels[:, 2], w - labels[:, 1], labels[:, 4]], axis=-1) # (n, 4) - ioa = bbox_ioa(boxes, labels[:, 1:5]) # intersection over area - indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, ) - n = len(indexes) - for j in random.sample(list(indexes), k=round(p * n)): - l, box, s = labels[j], boxes[j], segments[j] - labels = np.concatenate((labels, [[l[0], *box]]), 0) - segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)) - cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (1, 1, 1), cv2.FILLED) - - result = cv2.flip(im, 1) # augment segments (flip left-right) - i = cv2.flip(im_new, 1).astype(bool) - im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug - - return im, labels, segments - - -def cutout(im, labels, p=0.5): - """Applies image cutout augmentation https://arxiv.org/abs/1708.04552.""" - if random.random() < p: - h, w = im.shape[:2] - scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction - for s in scales: - mask_h = random.randint(1, int(h * s)) # create random masks - mask_w = random.randint(1, int(w * s)) - - # Box - xmin = max(0, random.randint(0, w) - mask_w // 2) - ymin = max(0, random.randint(0, h) - mask_h // 2) - xmax = min(w, xmin + mask_w) - ymax = min(h, ymin + mask_h) - - # Apply random color mask - im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)] - - # Return unobscured labels - if len(labels) and s > 0.03: - box = np.array([[xmin, ymin, xmax, ymax]], dtype=np.float32) - ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h))[0] # intersection over area - labels = labels[ioa < 0.60] # remove >60% obscured labels - - return labels - - -def mixup(im, labels, im2, labels2): - """Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf.""" - r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 - im = (im * r + im2 * (1 - r)).astype(np.uint8) - labels = np.concatenate((labels, labels2), 0) - return im, labels - - -def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n) - # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio - w1, h1 = box1[2] - box1[0], box1[3] - box1[1] - w2, h2 = box2[2] - box2[0], box2[3] - box2[1] - ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio - return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates - - -def classify_albumentations( - augment=True, - size=224, - scale=(0.08, 1.0), - ratio=(0.75, 1.0 / 0.75), # 0.75, 1.33 - hflip=0.5, - vflip=0.0, - jitter=0.4, - mean=IMAGENET_MEAN, - std=IMAGENET_STD, - auto_aug=False): - # YOLOv5 classification Albumentations (optional, only used if package is installed) - prefix = colorstr('albumentations: ') - try: - import albumentations as A - from albumentations.pytorch import ToTensorV2 - check_version(A.__version__, '1.0.3', hard=True) # version requirement - if augment: # Resize and crop - T = [A.RandomResizedCrop(height=size, width=size, scale=scale, ratio=ratio)] - if auto_aug: - # TODO: implement AugMix, AutoAug & RandAug in albumentation - LOGGER.info(f'{prefix}auto augmentations are currently not supported') - else: - if hflip > 0: - T += [A.HorizontalFlip(p=hflip)] - if vflip > 0: - T += [A.VerticalFlip(p=vflip)] - if jitter > 0: - jitter = float(jitter) - T += [A.ColorJitter(jitter, jitter, jitter, 0)] # brightness, contrast, satuaration, 0 hue - else: # Use fixed crop for eval set (reproducibility) - T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)] - T += [A.Normalize(mean=mean, std=std), ToTensorV2()] # Normalize and convert to Tensor - LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p)) - return A.Compose(T) - - except ImportError: # package not installed, skip - LOGGER.warning(f'{prefix}⚠️ not found, install with `pip install albumentations` (recommended)') - except Exception as e: - LOGGER.info(f'{prefix}{e}') - - -def classify_transforms(size=224): - """Transforms to apply if albumentations not installed.""" - assert isinstance(size, int), f'ERROR: classify_transforms size {size} must be integer, not (list, tuple)' - # T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) - return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) - - -class LetterBox: - # YOLOv5 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) - def __init__(self, size=(640, 640), auto=False, stride=32): - """Resizes and crops an image to a specified size for YOLOv5 preprocessing.""" - super().__init__() - self.h, self.w = (size, size) if isinstance(size, int) else size - self.auto = auto # pass max size integer, automatically solve for short side using stride - self.stride = stride # used with auto - - def __call__(self, im): # im = np.array HWC - imh, imw = im.shape[:2] - r = min(self.h / imh, self.w / imw) # ratio of new/old - h, w = round(imh * r), round(imw * r) # resized image - hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w - top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1) - im_out = np.full((self.h, self.w, 3), 114, dtype=im.dtype) - im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) - return im_out - - -class CenterCrop: - # YOLOv5 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()]) - def __init__(self, size=640): - """Converts input image into tensor for YOLOv5 processing.""" - super().__init__() - self.h, self.w = (size, size) if isinstance(size, int) else size - - def __call__(self, im): # im = np.array HWC - imh, imw = im.shape[:2] - m = min(imh, imw) # min dimension - top, left = (imh - m) // 2, (imw - m) // 2 - return cv2.resize(im[top:top + m, left:left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR) - - -class ToTensor: - # YOLOv5 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) - def __init__(self, half=False): - """Initialize ToTensor class for YOLOv5 image preprocessing.""" - super().__init__() - self.half = half - - def __call__(self, im): # im = np.array HWC in BGR order - im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous - im = torch.from_numpy(im) # to torch - im = im.half() if self.half else im.float() # uint8 to fp16/32 - im /= 255.0 # 0-255 to 0.0-1.0 - return im diff --git a/ultralytics/yolo/data/dataloaders/v5loader.py b/ultralytics/yolo/data/dataloaders/v5loader.py deleted file mode 100644 index 96549dd..0000000 --- a/ultralytics/yolo/data/dataloaders/v5loader.py +++ /dev/null @@ -1,1109 +0,0 @@ -# Ultralytics YOLO 🚀, AGPL-3.0 license -""" -Dataloaders and dataset utils -""" - -import contextlib -import glob -import hashlib -import math -import os -import random -import shutil -import time -from itertools import repeat -from multiprocessing.pool import ThreadPool -from pathlib import Path -from threading import Thread -from urllib.parse import urlparse - -import cv2 -import numpy as np -import psutil -import torch -import torchvision -from PIL import ExifTags, Image, ImageOps -from torch.utils.data import DataLoader, Dataset, dataloader, distributed -from tqdm import tqdm - -from ultralytics.yolo.utils import (DATASETS_DIR, LOGGER, NUM_THREADS, TQDM_BAR_FORMAT, is_colab, is_dir_writeable, - is_kaggle) -from ultralytics.yolo.utils.checks import check_requirements -from ultralytics.yolo.utils.ops import clean_str, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn -from ultralytics.yolo.utils.torch_utils import torch_distributed_zero_first - -from .v5augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste, - letterbox, mixup, random_perspective) - -# Parameters -HELP_URL = 'See https://docs.ultralytics.com/yolov5/tutorials/train_custom_data' -IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm' # include image suffixes -VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv' # include video suffixes -LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html -RANK = int(os.getenv('RANK', -1)) -PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true' # global pin_memory for dataloaders - -# Get orientation exif tag -for orientation in ExifTags.TAGS.keys(): - if ExifTags.TAGS[orientation] == 'Orientation': - break - - -def get_hash(paths): - """Returns a single hash value of a list of paths (files or dirs).""" - size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes - h = hashlib.sha256(str(size).encode()) # hash sizes - h.update(''.join(paths).encode()) # hash paths - return h.hexdigest() # return hash - - -def exif_size(img): - """Returns exif-corrected PIL size.""" - s = img.size # (width, height) - with contextlib.suppress(Exception): - rotation = dict(img._getexif().items())[orientation] - if rotation in [6, 8]: # rotation 270 or 90 - s = (s[1], s[0]) - return s - - -def exif_transpose(image): - """ - Transpose a PIL image accordingly if it has an EXIF Orientation tag. - Inplace version of https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py exif_transpose() - - :param image: The image to transpose. - :return: An image. - """ - exif = image.getexif() - orientation = exif.get(0x0112, 1) # default 1 - if orientation > 1: - method = { - 2: Image.FLIP_LEFT_RIGHT, - 3: Image.ROTATE_180, - 4: Image.FLIP_TOP_BOTTOM, - 5: Image.TRANSPOSE, - 6: Image.ROTATE_270, - 7: Image.TRANSVERSE, - 8: Image.ROTATE_90}.get(orientation) - if method is not None: - image = image.transpose(method) - del exif[0x0112] - image.info['exif'] = exif.tobytes() - return image - - -def seed_worker(worker_id): - """Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader.""" - worker_seed = torch.initial_seed() % 2 ** 32 - np.random.seed(worker_seed) - random.seed(worker_seed) - - -def create_dataloader(path, - imgsz, - batch_size, - stride, - single_cls=False, - hyp=None, - augment=False, - cache=False, - pad=0.0, - rect=False, - rank=-1, - workers=8, - image_weights=False, - close_mosaic=False, - min_items=0, - prefix='', - shuffle=False, - seed=0): - if rect and shuffle: - LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False') - shuffle = False - with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP - dataset = LoadImagesAndLabels( - path, - imgsz, - batch_size, - augment=augment, # augmentation - hyp=hyp, # hyperparameters - rect=rect, # rectangular batches - cache_images=cache, - single_cls=single_cls, - stride=int(stride), - pad=pad, - image_weights=image_weights, - min_items=min_items, - prefix=prefix) - - batch_size = min(batch_size, len(dataset)) - nd = torch.cuda.device_count() # number of CUDA devices - nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers - sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) - loader = DataLoader if image_weights or close_mosaic else InfiniteDataLoader # DataLoader allows attribute updates - generator = torch.Generator() - generator.manual_seed(6148914691236517205 + seed + RANK) - return loader(dataset, - batch_size=batch_size, - shuffle=shuffle and sampler is None, - num_workers=nw, - sampler=sampler, - pin_memory=PIN_MEMORY, - collate_fn=LoadImagesAndLabels.collate_fn, - worker_init_fn=seed_worker, - generator=generator), dataset - - -class InfiniteDataLoader(dataloader.DataLoader): - """Dataloader that reuses workers - - Uses same syntax as vanilla DataLoader - """ - - def __init__(self, *args, **kwargs): - """Dataloader that reuses workers for same syntax as vanilla DataLoader.""" - super().__init__(*args, **kwargs) - object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler)) - self.iterator = super().__iter__() - - def __len__(self): - """Returns the length of batch_sampler's sampler.""" - return len(self.batch_sampler.sampler) - - def __iter__(self): - """Creates a sampler that infinitely repeats.""" - for _ in range(len(self)): - yield next(self.iterator) - - -class _RepeatSampler: - """Sampler that repeats forever - - Args: - sampler (Dataset.sampler): The sampler to repeat. - """ - - def __init__(self, sampler): - """Sampler that repeats dataset samples infinitely.""" - self.sampler = sampler - - def __iter__(self): - """Infinite loop iterating over a given sampler.""" - while True: - yield from iter(self.sampler) - - -class LoadScreenshots: - # YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"` - def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None): - """source = [screen_number left top width height] (pixels).""" - check_requirements('mss') - import mss - - source, *params = source.split() - self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0 - if len(params) == 1: - self.screen = int(params[0]) - elif len(params) == 4: - left, top, width, height = (int(x) for x in params) - elif len(params) == 5: - self.screen, left, top, width, height = (int(x) for x in params) - self.img_size = img_size - self.stride = stride - self.transforms = transforms - self.auto = auto - self.mode = 'stream' - self.frame = 0 - self.sct = mss.mss() - - # Parse monitor shape - monitor = self.sct.monitors[self.screen] - self.top = monitor['top'] if top is None else (monitor['top'] + top) - self.left = monitor['left'] if left is None else (monitor['left'] + left) - self.width = width or monitor['width'] - self.height = height or monitor['height'] - self.monitor = {'left': self.left, 'top': self.top, 'width': self.width, 'height': self.height} - - def __iter__(self): - """Iterates over objects with the same structure as the monitor attribute.""" - return self - - def __next__(self): - """mss screen capture: get raw pixels from the screen as np array.""" - im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR - s = f'screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: ' - - if self.transforms: - im = self.transforms(im0) # transforms - else: - im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0] # padded resize - im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB - im = np.ascontiguousarray(im) # contiguous - self.frame += 1 - return str(self.screen), im, im0, None, s # screen, img, original img, im0s, s - - -class LoadImages: - # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4` - def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1): - """Initialize instance variables and check for valid input.""" - if isinstance(path, str) and Path(path).suffix == '.txt': # *.txt file with img/vid/dir on each line - path = Path(path).read_text().rsplit() - files = [] - for p in sorted(path) if isinstance(path, (list, tuple)) else [path]: - p = str(Path(p).resolve()) - if '*' in p: - files.extend(sorted(glob.glob(p, recursive=True))) # glob - elif os.path.isdir(p): - files.extend(sorted(glob.glob(os.path.join(p, '*.*')))) # dir - elif os.path.isfile(p): - files.append(p) # files - else: - raise FileNotFoundError(f'{p} does not exist') - - images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS] - videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS] - ni, nv = len(images), len(videos) - - self.img_size = img_size - self.stride = stride - self.files = images + videos - self.nf = ni + nv # number of files - self.video_flag = [False] * ni + [True] * nv - self.mode = 'image' - self.auto = auto - self.transforms = transforms # optional - self.vid_stride = vid_stride # video frame-rate stride - if any(videos): - self._new_video(videos[0]) # new video - else: - self.cap = None - assert self.nf > 0, f'No images or videos found in {p}. ' \ - f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}' - - def __iter__(self): - """Returns an iterator object for iterating over images or videos found in a directory.""" - self.count = 0 - return self - - def __next__(self): - """Iterator's next item, performs transformation on image and returns path, transformed image, original image, capture and size.""" - if self.count == self.nf: - raise StopIteration - path = self.files[self.count] - - if self.video_flag[self.count]: - # Read video - self.mode = 'video' - for _ in range(self.vid_stride): - self.cap.grab() - ret_val, im0 = self.cap.retrieve() - while not ret_val: - self.count += 1 - self.cap.release() - if self.count == self.nf: # last video - raise StopIteration - path = self.files[self.count] - self._new_video(path) - ret_val, im0 = self.cap.read() - - self.frame += 1 - # im0 = self._cv2_rotate(im0) # for use if cv2 autorotation is False - s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ' - - else: - # Read image - self.count += 1 - im0 = cv2.imread(path) # BGR - assert im0 is not None, f'Image Not Found {path}' - s = f'image {self.count}/{self.nf} {path}: ' - - if self.transforms: - im = self.transforms(im0) # transforms - else: - im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0] # padded resize - im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB - im = np.ascontiguousarray(im) # contiguous - - return path, im, im0, self.cap, s - - def _new_video(self, path): - """Create a new video capture object.""" - self.frame = 0 - self.cap = cv2.VideoCapture(path) - self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride) - self.orientation = int(self.cap.get(cv2.CAP_PROP_ORIENTATION_META)) # rotation degrees - # self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # disable https://github.com/ultralytics/yolov5/issues/8493 - - def _cv2_rotate(self, im): - """Rotate a cv2 video manually.""" - if self.orientation == 0: - return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE) - elif self.orientation == 180: - return cv2.rotate(im, cv2.ROTATE_90_COUNTERCLOCKWISE) - elif self.orientation == 90: - return cv2.rotate(im, cv2.ROTATE_180) - return im - - def __len__(self): - """Returns the number of files in the class instance.""" - return self.nf # number of files - - -class LoadStreams: - # YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams` - def __init__(self, sources='file.streams', img_size=640, stride=32, auto=True, transforms=None, vid_stride=1): - """Initialize YOLO detector with optional transforms and check input shapes.""" - torch.backends.cudnn.benchmark = True # faster for fixed-size inference - self.mode = 'stream' - self.img_size = img_size - self.stride = stride - self.vid_stride = vid_stride # video frame-rate stride - sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources] - n = len(sources) - self.sources = [clean_str(x) for x in sources] # clean source names for later - self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n - for i, s in enumerate(sources): # index, source - # Start thread to read frames from video stream - st = f'{i + 1}/{n}: {s}... ' - if urlparse(s).hostname in ('www.youtube.com', 'youtube.com', 'youtu.be'): # if source is YouTube video - # YouTube format i.e. 'https://www.youtube.com/watch?v=Zgi9g1ksQHc' or 'https://youtu.be/Zgi9g1ksQHc' - check_requirements(('pafy', 'youtube_dl==2020.12.2')) - import pafy - s = pafy.new(s).getbest(preftype='mp4').url # YouTube URL - s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam - if s == 0: - assert not is_colab(), '--source 0 webcam unsupported on Colab. Rerun command in a local environment.' - assert not is_kaggle(), '--source 0 webcam unsupported on Kaggle. Rerun command in a local environment.' - cap = cv2.VideoCapture(s) - assert cap.isOpened(), f'{st}Failed to open {s}' - w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = cap.get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan - self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback - self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback - - _, self.imgs[i] = cap.read() # guarantee first frame - self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True) - LOGGER.info(f'{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)') - self.threads[i].start() - LOGGER.info('') # newline - - # Check for common shapes - s = np.stack([letterbox(x, img_size, stride=stride, auto=auto)[0].shape for x in self.imgs]) - self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal - self.auto = auto and self.rect - self.transforms = transforms # optional - if not self.rect: - LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.') - - def update(self, i, cap, stream): - """Read stream `i` frames in daemon thread.""" - n, f = 0, self.frames[i] # frame number, frame array - while cap.isOpened() and n < f: - n += 1 - cap.grab() # .read() = .grab() followed by .retrieve() - if n % self.vid_stride == 0: - success, im = cap.retrieve() - if success: - self.imgs[i] = im - else: - LOGGER.warning('WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.') - self.imgs[i] = np.zeros_like(self.imgs[i]) - cap.open(stream) # re-open stream if signal was lost - time.sleep(0.0) # wait time - - def __iter__(self): - """Iterator that returns the class instance.""" - self.count = -1 - return self - - def __next__(self): - """Return a tuple containing transformed and resized image data.""" - self.count += 1 - if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit - cv2.destroyAllWindows() - raise StopIteration - - im0 = self.imgs.copy() - if self.transforms: - im = np.stack([self.transforms(x) for x in im0]) # transforms - else: - im = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0] for x in im0]) # resize - im = im[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW - im = np.ascontiguousarray(im) # contiguous - - return self.sources, im, im0, None, '' - - def __len__(self): - """Returns the number of sources as the length of the object.""" - return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years - - -def img2label_paths(img_paths): - """Define label paths as a function of image paths.""" - sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}labels{os.sep}' # /images/, /labels/ substrings - return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths] - - -class LoadImagesAndLabels(Dataset): - """YOLOv5 train_loader/val_loader, loads images and labels for training and validation.""" - cache_version = 0.6 # dataset labels *.cache version - rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4] - - def __init__(self, - path, - img_size=640, - batch_size=16, - augment=False, - hyp=None, - rect=False, - image_weights=False, - cache_images=False, - single_cls=False, - stride=32, - pad=0.0, - min_items=0, - prefix=''): - self.img_size = img_size - self.augment = augment - self.hyp = hyp - self.image_weights = image_weights - self.rect = False if image_weights else rect - self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) - self.mosaic_border = [-img_size // 2, -img_size // 2] - self.stride = stride - self.path = path - self.albumentations = Albumentations(size=img_size) if augment else None - - try: - f = [] # image files - for p in path if isinstance(path, list) else [path]: - p = Path(p) # os-agnostic - if p.is_dir(): # dir - f += glob.glob(str(p / '**' / '*.*'), recursive=True) - # f = list(p.rglob('*.*')) # pathlib - elif p.is_file(): # file - with open(p) as t: - t = t.read().strip().splitlines() - parent = str(p.parent) + os.sep - f += [x.replace('./', parent, 1) if x.startswith('./') else x for x in t] # to global path - # f += [p.parent / x.lstrip(os.sep) for x in t] # to global path (pathlib) - else: - raise FileNotFoundError(f'{prefix}{p} does not exist') - self.im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS) - # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib - assert self.im_files, f'{prefix}No images found' - except Exception as e: - raise FileNotFoundError(f'{prefix}Error loading data from {path}: {e}\n{HELP_URL}') from e - - # Check cache - self.label_files = img2label_paths(self.im_files) # labels - cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') - try: - cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict - assert cache['version'] == self.cache_version # matches current version - assert cache['hash'] == get_hash(self.label_files + self.im_files) # identical hash - except (FileNotFoundError, AssertionError, AttributeError): - cache, exists = self.cache_labels(cache_path, prefix), False # run cache ops - - # Display cache - nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total - if exists and LOCAL_RANK in (-1, 0): - d = f'Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt' - tqdm(None, desc=prefix + d, total=n, initial=n, bar_format=TQDM_BAR_FORMAT) # display cache results - if cache['msgs']: - LOGGER.info('\n'.join(cache['msgs'])) # display warnings - assert nf > 0 or not augment, f'{prefix}No labels found in {cache_path}, can not start training. {HELP_URL}' - - # Read cache - [cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items - labels, shapes, self.segments = zip(*cache.values()) - nl = len(np.concatenate(labels, 0)) # number of labels - assert nl > 0 or not augment, f'{prefix}All labels empty in {cache_path}, can not start training. {HELP_URL}' - self.labels = list(labels) - self.shapes = np.array(shapes) - self.im_files = list(cache.keys()) # update - self.label_files = img2label_paths(cache.keys()) # update - - # Filter images - if min_items: - include = np.array([len(x) >= min_items for x in self.labels]).nonzero()[0].astype(int) - LOGGER.info(f'{prefix}{n - len(include)}/{n} images filtered from dataset') - self.im_files = [self.im_files[i] for i in include] - self.label_files = [self.label_files[i] for i in include] - self.labels = [self.labels[i] for i in include] - self.segments = [self.segments[i] for i in include] - self.shapes = self.shapes[include] # wh - - # Create indices - n = len(self.shapes) # number of images - bi = np.floor(np.arange(n) / batch_size).astype(int) # batch index - nb = bi[-1] + 1 # number of batches - self.batch = bi # batch index of image - self.n = n - self.indices = range(n) - - # Update labels - include_class = [] # filter labels to include only these classes (optional) - include_class_array = np.array(include_class).reshape(1, -1) - for i, (label, segment) in enumerate(zip(self.labels, self.segments)): - if include_class: - j = (label[:, 0:1] == include_class_array).any(1) - self.labels[i] = label[j] - if segment: - self.segments[i] = [segment[si] for si, idx in enumerate(j) if idx] - if single_cls: # single-class training, merge all classes into 0 - self.labels[i][:, 0] = 0 - - # Rectangular Training - if self.rect: - # Sort by aspect ratio - s = self.shapes # wh - ar = s[:, 1] / s[:, 0] # aspect ratio - irect = ar.argsort() - self.im_files = [self.im_files[i] for i in irect] - self.label_files = [self.label_files[i] for i in irect] - self.labels = [self.labels[i] for i in irect] - self.segments = [self.segments[i] for i in irect] - self.shapes = s[irect] # wh - ar = ar[irect] - - # Set training image shapes - shapes = [[1, 1]] * nb - for i in range(nb): - ari = ar[bi == i] - mini, maxi = ari.min(), ari.max() - if maxi < 1: - shapes[i] = [maxi, 1] - elif mini > 1: - shapes[i] = [1, 1 / mini] - - self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride - - # Cache images into RAM/disk for faster training - if cache_images == 'ram' and not self.check_cache_ram(prefix=prefix): - cache_images = False - self.ims = [None] * n - self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files] - if cache_images: - b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes - self.im_hw0, self.im_hw = [None] * n, [None] * n - fcn = self.cache_images_to_disk if cache_images == 'disk' else self.load_image - with ThreadPool(NUM_THREADS) as pool: - results = pool.imap(fcn, range(n)) - pbar = tqdm(enumerate(results), total=n, bar_format=TQDM_BAR_FORMAT, disable=LOCAL_RANK > 0) - for i, x in pbar: - if cache_images == 'disk': - b += self.npy_files[i].stat().st_size - else: # 'ram' - self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i) - b += self.ims[i].nbytes - pbar.desc = f'{prefix}Caching images ({b / gb:.1f}GB {cache_images})' - pbar.close() - - def check_cache_ram(self, safety_margin=0.1, prefix=''): - """Check image caching requirements vs available memory.""" - b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes - n = min(self.n, 30) # extrapolate from 30 random images - for _ in range(n): - im = cv2.imread(random.choice(self.im_files)) # sample image - ratio = self.img_size / max(im.shape[0], im.shape[1]) # max(h, w) # ratio - b += im.nbytes * ratio ** 2 - mem_required = b * self.n / n # GB required to cache dataset into RAM - mem = psutil.virtual_memory() - cache = mem_required * (1 + safety_margin) < mem.available # to cache or not to cache, that is the question - if not cache: - LOGGER.info(f'{prefix}{mem_required / gb:.1f}GB RAM required, ' - f'{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, ' - f"{'caching images ✅' if cache else 'not caching images ⚠️'}") - return cache - - def cache_labels(self, path=Path('./labels.cache'), prefix=''): - """Cache labels and save as numpy file for next time.""" - # Cache dataset labels, check images and read shapes - if path.exists(): - path.unlink() # remove *.cache file if exists - x = {} # dict - nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages - desc = f'{prefix}Scanning {path.parent / path.stem}...' - total = len(self.im_files) - with ThreadPool(NUM_THREADS) as pool: - results = pool.imap(verify_image_label, zip(self.im_files, self.label_files, repeat(prefix))) - pbar = tqdm(results, desc=desc, total=total, bar_format=TQDM_BAR_FORMAT) - for im_file, lb, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar: - nm += nm_f - nf += nf_f - ne += ne_f - nc += nc_f - if im_file: - x[im_file] = [lb, shape, segments] - if msg: - msgs.append(msg) - pbar.desc = f'{desc} {nf} images, {nm + ne} backgrounds, {nc} corrupt' - pbar.close() - - if msgs: - LOGGER.info('\n'.join(msgs)) - if nf == 0: - LOGGER.warning(f'{prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}') - x['hash'] = get_hash(self.label_files + self.im_files) - x['results'] = nf, nm, ne, nc, len(self.im_files) - x['msgs'] = msgs # warnings - x['version'] = self.cache_version # cache version - if is_dir_writeable(path.parent): - np.save(str(path), x) # save cache for next time - path.with_suffix('.cache.npy').rename(path) # remove .npy suffix - LOGGER.info(f'{prefix}New cache created: {path}') - else: - LOGGER.warning(f'{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable') # not writeable - return x - - def __len__(self): - """Returns the length of 'im_files' attribute.""" - return len(self.im_files) - - def __getitem__(self, index): - """Get a sample and its corresponding label, filename and shape from the dataset.""" - index = self.indices[index] # linear, shuffled, or image_weights - - hyp = self.hyp - mosaic = self.mosaic and random.random() < hyp['mosaic'] - if mosaic: - # Load mosaic - img, labels = self.load_mosaic(index) - shapes = None - - # MixUp augmentation - if random.random() < hyp['mixup']: - img, labels = mixup(img, labels, *self.load_mosaic(random.randint(0, self.n - 1))) - - else: - # Load image - img, (h0, w0), (h, w) = self.load_image(index) - - # Letterbox - shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape - img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) - shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling - - labels = self.labels[index].copy() - if labels.size: # normalized xywh to pixel xyxy format - labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) - - if self.augment: - img, labels = random_perspective(img, - labels, - degrees=hyp['degrees'], - translate=hyp['translate'], - scale=hyp['scale'], - shear=hyp['shear'], - perspective=hyp['perspective']) - - nl = len(labels) # number of labels - if nl: - labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3) - - if self.augment: - # Albumentations - img, labels = self.albumentations(img, labels) - nl = len(labels) # update after albumentations - - # HSV color-space - augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) - - # Flip up-down - if random.random() < hyp['flipud']: - img = np.flipud(img) - if nl: - labels[:, 2] = 1 - labels[:, 2] - - # Flip left-right - if random.random() < hyp['fliplr']: - img = np.fliplr(img) - if nl: - labels[:, 1] = 1 - labels[:, 1] - - # Cutouts - # labels = cutout(img, labels, p=0.5) - # nl = len(labels) # update after cutout - - labels_out = torch.zeros((nl, 6)) - if nl: - labels_out[:, 1:] = torch.from_numpy(labels) - - # Convert - img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB - img = np.ascontiguousarray(img) - - return torch.from_numpy(img), labels_out, self.im_files[index], shapes - - def load_image(self, i): - """Loads 1 image from dataset index 'i', returns (im, original hw, resized hw).""" - im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i], - if im is None: # not cached in RAM - if fn.exists(): # load npy - im = np.load(fn) - else: # read image - im = cv2.imread(f) # BGR - assert im is not None, f'Image Not Found {f}' - h0, w0 = im.shape[:2] # orig hw - r = self.img_size / max(h0, w0) # ratio - if r != 1: # if sizes are not equal - interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA - im = cv2.resize(im, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) - return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized - return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized - - def cache_images_to_disk(self, i): - """Saves an image as an *.npy file for faster loading.""" - f = self.npy_files[i] - if not f.exists(): - np.save(f.as_posix(), cv2.imread(self.im_files[i])) - - def load_mosaic(self, index): - """YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic.""" - labels4, segments4 = [], [] - s = self.img_size - yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y - indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices - random.shuffle(indices) - for i, index in enumerate(indices): - # Load image - img, _, (h, w) = self.load_image(index) - - # Place img in img4 - if i == 0: # top left - img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles - x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) - x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) - elif i == 1: # top right - x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc - x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h - elif i == 2: # bottom left - x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) - x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) - elif i == 3: # bottom right - x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) - x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) - - img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] - padw = x1a - x1b - padh = y1a - y1b - - # Labels - labels, segments = self.labels[index].copy(), self.segments[index].copy() - if labels.size: - labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format - segments = [xyn2xy(x, w, h, padw, padh) for x in segments] - labels4.append(labels) - segments4.extend(segments) - - # Concat/clip labels - labels4 = np.concatenate(labels4, 0) - for x in (labels4[:, 1:], *segments4): - np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() - # img4, labels4 = replicate(img4, labels4) # replicate - - # Augment - img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste']) - img4, labels4 = random_perspective(img4, - labels4, - segments4, - degrees=self.hyp['degrees'], - translate=self.hyp['translate'], - scale=self.hyp['scale'], - shear=self.hyp['shear'], - perspective=self.hyp['perspective'], - border=self.mosaic_border) # border to remove - - return img4, labels4 - - def load_mosaic9(self, index): - """YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic.""" - labels9, segments9 = [], [] - s = self.img_size - indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices - random.shuffle(indices) - hp, wp = -1, -1 # height, width previous - for i, index in enumerate(indices): - # Load image - img, _, (h, w) = self.load_image(index) - - # Place img in img9 - if i == 0: # center - img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles - h0, w0 = h, w - c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates - elif i == 1: # top - c = s, s - h, s + w, s - elif i == 2: # top right - c = s + wp, s - h, s + wp + w, s - elif i == 3: # right - c = s + w0, s, s + w0 + w, s + h - elif i == 4: # bottom right - c = s + w0, s + hp, s + w0 + w, s + hp + h - elif i == 5: # bottom - c = s + w0 - w, s + h0, s + w0, s + h0 + h - elif i == 6: # bottom left - c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h - elif i == 7: # left - c = s - w, s + h0 - h, s, s + h0 - elif i == 8: # top left - c = s - w, s + h0 - hp - h, s, s + h0 - hp - - padx, pady = c[:2] - x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords - - # Labels - labels, segments = self.labels[index].copy(), self.segments[index].copy() - if labels.size: - labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format - segments = [xyn2xy(x, w, h, padx, pady) for x in segments] - labels9.append(labels) - segments9.extend(segments) - - # Image - img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax] - hp, wp = h, w # height, width previous - - # Offset - yc, xc = (int(random.uniform(0, s)) for _ in self.mosaic_border) # mosaic center x, y - img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s] - - # Concat/clip labels - labels9 = np.concatenate(labels9, 0) - labels9[:, [1, 3]] -= xc - labels9[:, [2, 4]] -= yc - c = np.array([xc, yc]) # centers - segments9 = [x - c for x in segments9] - - for x in (labels9[:, 1:], *segments9): - np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() - # img9, labels9 = replicate(img9, labels9) # replicate - - # Augment - img9, labels9, segments9 = copy_paste(img9, labels9, segments9, p=self.hyp['copy_paste']) - img9, labels9 = random_perspective(img9, - labels9, - segments9, - degrees=self.hyp['degrees'], - translate=self.hyp['translate'], - scale=self.hyp['scale'], - shear=self.hyp['shear'], - perspective=self.hyp['perspective'], - border=self.mosaic_border) # border to remove - - return img9, labels9 - - @staticmethod - def collate_fn(batch): - """YOLOv8 collate function, outputs dict.""" - im, label, path, shapes = zip(*batch) # transposed - for i, lb in enumerate(label): - lb[:, 0] = i # add target image index for build_targets() - batch_idx, cls, bboxes = torch.cat(label, 0).split((1, 1, 4), dim=1) - return { - 'ori_shape': tuple((x[0] if x else None) for x in shapes), - 'ratio_pad': tuple((x[1] if x else None) for x in shapes), - 'im_file': path, - 'img': torch.stack(im, 0), - 'cls': cls, - 'bboxes': bboxes, - 'batch_idx': batch_idx.view(-1)} - - @staticmethod - def collate_fn_old(batch): - """YOLOv5 original collate function.""" - im, label, path, shapes = zip(*batch) # transposed - for i, lb in enumerate(label): - lb[:, 0] = i # add target image index for build_targets() - return torch.stack(im, 0), torch.cat(label, 0), path, shapes - - -# Ancillary functions -------------------------------------------------------------------------------------------------- -def flatten_recursive(path=DATASETS_DIR / 'coco128'): - """Flatten a recursive directory by bringing all files to top level.""" - new_path = Path(f'{str(path)}_flat') - if os.path.exists(new_path): - shutil.rmtree(new_path) # delete output folder - os.makedirs(new_path) # make new output folder - for file in tqdm(glob.glob(f'{str(Path(path))}/**/*.*', recursive=True)): - shutil.copyfile(file, new_path / Path(file).name) - - -def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.dataloaders import *; extract_boxes() - # Convert detection dataset into classification dataset, with one directory per class - path = Path(path) # images dir - shutil.rmtree(path / 'classification') if (path / 'classification').is_dir() else None # remove existing - files = list(path.rglob('*.*')) - n = len(files) # number of files - for im_file in tqdm(files, total=n): - if im_file.suffix[1:] in IMG_FORMATS: - # Image - im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB - h, w = im.shape[:2] - - # Labels - lb_file = Path(img2label_paths([str(im_file)])[0]) - if Path(lb_file).exists(): - with open(lb_file) as f: - lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels - - for j, x in enumerate(lb): - c = int(x[0]) # class - f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg' # new filename - if not f.parent.is_dir(): - f.parent.mkdir(parents=True) - - b = x[1:] * [w, h, w, h] # box - # B[2:] = b[2:].max() # rectangle to square - b[2:] = b[2:] * 1.2 + 3 # pad - b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(int) - - b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image - b[[1, 3]] = np.clip(b[[1, 3]], 0, h) - assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}' - - -def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False): - """Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files - Usage: from utils.dataloaders import *; autosplit() - Arguments - path: Path to images directory - weights: Train, val, test weights (list, tuple) - annotated_only: Only use images with an annotated txt file - """ - path = Path(path) # images dir - files = sorted(x for x in path.rglob('*.*') if x.suffix[1:].lower() in IMG_FORMATS) # image files only - n = len(files) # number of files - random.seed(0) # for reproducibility - indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split - - txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files - for x in txt: - if (path.parent / x).exists(): - (path.parent / x).unlink() # remove existing - - print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only) - for i, img in tqdm(zip(indices, files), total=n): - if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label - with open(path.parent / txt[i], 'a') as f: - f.write(f'./{img.relative_to(path.parent).as_posix()}' + '\n') # add image to txt file - - -def verify_image_label(args): - """Verify one image-label pair.""" - im_file, lb_file, prefix = args - nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', [] # number (missing, found, empty, corrupt), message, segments - try: - # Verify images - im = Image.open(im_file) - im.verify() # PIL verify - shape = exif_size(im) # image size - assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels' - assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}' - if im.format.lower() in ('jpg', 'jpeg'): - with open(im_file, 'rb') as f: - f.seek(-2, 2) - if f.read() != b'\xff\xd9': # corrupt JPEG - ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100) - msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved' - - # Verify labels - if os.path.isfile(lb_file): - nf = 1 # label found - with open(lb_file) as f: - lb = [x.split() for x in f.read().strip().splitlines() if len(x)] - if any(len(x) > 6 for x in lb): # is segment - classes = np.array([x[0] for x in lb], dtype=np.float32) - segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb] # (cls, xy1...) - lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh) - lb = np.array(lb, dtype=np.float32) - nl = len(lb) - if nl: - assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected' - assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}' - assert (lb[:, 1:] <= 1).all(), f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}' - _, i = np.unique(lb, axis=0, return_index=True) - if len(i) < nl: # duplicate row check - lb = lb[i] # remove duplicates - if segments: - segments = [segments[x] for x in i] - msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed' - else: - ne = 1 # label empty - lb = np.zeros((0, 5), dtype=np.float32) - else: - nm = 1 # label missing - lb = np.zeros((0, 5), dtype=np.float32) - return im_file, lb, shape, segments, nm, nf, ne, nc, msg - except Exception as e: - nc = 1 - msg = f'{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}' - return [None, None, None, None, nm, nf, ne, nc, msg] - - -# Classification dataloaders ------------------------------------------------------------------------------------------- -class ClassificationDataset(torchvision.datasets.ImageFolder): - """ - YOLOv5 Classification Dataset. - Arguments - root: Dataset path - transform: torchvision transforms, used by default - album_transform: Albumentations transforms, used if installed - """ - - def __init__(self, root, augment, imgsz, cache=False): - """Initialize YOLO dataset with root, augmentation, image size, and cache parameters.""" - super().__init__(root=root) - self.torch_transforms = classify_transforms(imgsz) - self.album_transforms = classify_albumentations(augment, imgsz) if augment else None - self.cache_ram = cache is True or cache == 'ram' - self.cache_disk = cache == 'disk' - self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples] # file, index, npy, im - - def __getitem__(self, i): - """Retrieves data items of 'dataset' via indices & creates InfiniteDataLoader.""" - f, j, fn, im = self.samples[i] # filename, index, filename.with_suffix('.npy'), image - if self.cache_ram and im is None: - im = self.samples[i][3] = cv2.imread(f) - elif self.cache_disk: - if not fn.exists(): # load npy - np.save(fn.as_posix(), cv2.imread(f)) - im = np.load(fn) - else: # read image - im = cv2.imread(f) # BGR - if self.album_transforms: - sample = self.album_transforms(image=cv2.cvtColor(im, cv2.COLOR_BGR2RGB))['image'] - else: - sample = self.torch_transforms(im) - return sample, j - - -def create_classification_dataloader(path, - imgsz=224, - batch_size=16, - augment=True, - cache=False, - rank=-1, - workers=8, - shuffle=True): - """Returns Dataloader object to be used with YOLOv5 Classifier.""" - with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP - dataset = ClassificationDataset(root=path, imgsz=imgsz, augment=augment, cache=cache) - batch_size = min(batch_size, len(dataset)) - nd = torch.cuda.device_count() - nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) - sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) - generator = torch.Generator() - generator.manual_seed(6148914691236517205 + RANK) - return InfiniteDataLoader(dataset, - batch_size=batch_size, - shuffle=shuffle and sampler is None, - num_workers=nw, - sampler=sampler, - pin_memory=PIN_MEMORY, - worker_init_fn=seed_worker, - generator=generator) # or DataLoader(persistent_workers=True) diff --git a/ultralytics/yolo/data/utils.py b/ultralytics/yolo/data/utils.py index 9829671..146f23e 100644 --- a/ultralytics/yolo/data/utils.py +++ b/ultralytics/yolo/data/utils.py @@ -4,6 +4,7 @@ import contextlib import hashlib import json import os +import random import subprocess import time import zipfile @@ -522,3 +523,35 @@ def zip_directory(dir, use_zipfile_library=True): else: import shutil shutil.make_archive(dir, 'zip', dir) + + +def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False): + """ + Autosplit a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files. + + Args: + path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco128/images'. + weights (list | tuple, optional): Train, validation, and test split fractions. Defaults to (0.9, 0.1, 0.0). + annotated_only (bool, optional): If True, only images with an associated txt file are used. Defaults to False. + + Usage: + from utils.dataloaders import autosplit + autosplit() + """ + + path = Path(path) # images dir + files = sorted(x for x in path.rglob('*.*') if x.suffix[1:].lower() in IMG_FORMATS) # image files only + n = len(files) # number of files + random.seed(0) # for reproducibility + indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split + + txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files + for x in txt: + if (path.parent / x).exists(): + (path.parent / x).unlink() # remove existing + + LOGGER.info(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only) + for i, img in tqdm(zip(indices, files), total=n): + if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label + with open(path.parent / txt[i], 'a') as f: + f.write(f'./{img.relative_to(path.parent).as_posix()}' + '\n') # add image to txt file diff --git a/ultralytics/yolo/engine/trainer.py b/ultralytics/yolo/engine/trainer.py index 144be9c..26ceb6c 100644 --- a/ultralytics/yolo/engine/trainer.py +++ b/ultralytics/yolo/engine/trainer.py @@ -244,7 +244,7 @@ class BaseTrainer: metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix='val') self.metrics = dict(zip(metric_keys, [0] * len(metric_keys))) # TODO: init metrics for plot_results()? self.ema = ModelEMA(self.model) - if self.args.plots and not self.args.v5loader: + if self.args.plots: self.plot_training_labels() # Optimizer diff --git a/ultralytics/yolo/v8/detect/train.py b/ultralytics/yolo/v8/detect/train.py index 1b475ed..abb94fc 100644 --- a/ultralytics/yolo/v8/detect/train.py +++ b/ultralytics/yolo/v8/detect/train.py @@ -6,9 +6,8 @@ import numpy as np from ultralytics.nn.tasks import DetectionModel from ultralytics.yolo import v8 from ultralytics.yolo.data import build_dataloader, build_yolo_dataset -from ultralytics.yolo.data.dataloaders.v5loader import create_dataloader from ultralytics.yolo.engine.trainer import BaseTrainer -from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, RANK, colorstr +from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, RANK from ultralytics.yolo.utils.plotting import plot_images, plot_labels, plot_results from ultralytics.yolo.utils.torch_utils import de_parallel, torch_distributed_zero_first @@ -17,7 +16,8 @@ from ultralytics.yolo.utils.torch_utils import de_parallel, torch_distributed_ze class DetectionTrainer(BaseTrainer): def build_dataset(self, img_path, mode='train', batch=None): - """Build YOLO Dataset + """ + Build YOLO Dataset. Args: img_path (str): Path to the folder containing images. @@ -28,27 +28,7 @@ class DetectionTrainer(BaseTrainer): return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=mode == 'val', stride=gs) def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'): - """TODO: manage splits differently.""" - # Calculate stride - check if model is initialized - if self.args.v5loader: - LOGGER.warning("WARNING ⚠️ 'v5loader' feature is deprecated and will be removed soon. You can train using " - 'the default YOLOv8 dataloader instead, no argument is needed.') - gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32) - return create_dataloader(path=dataset_path, - imgsz=self.args.imgsz, - batch_size=batch_size, - stride=gs, - hyp=vars(self.args), - augment=mode == 'train', - cache=self.args.cache, - pad=0 if mode == 'train' else 0.5, - rect=self.args.rect or mode == 'val', - rank=rank, - workers=self.args.workers, - close_mosaic=self.args.close_mosaic != 0, - prefix=colorstr(f'{mode}: '), - shuffle=mode == 'train', - seed=self.args.seed)[0] + """Construct and return dataloader.""" assert mode in ['train', 'val'] with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP dataset = self.build_dataset(dataset_path, mode, batch_size) diff --git a/ultralytics/yolo/v8/detect/val.py b/ultralytics/yolo/v8/detect/val.py index 77d346c..c28dfb5 100644 --- a/ultralytics/yolo/v8/detect/val.py +++ b/ultralytics/yolo/v8/detect/val.py @@ -7,9 +7,8 @@ import numpy as np import torch from ultralytics.yolo.data import build_dataloader, build_yolo_dataset -from ultralytics.yolo.data.dataloaders.v5loader import create_dataloader from ultralytics.yolo.engine.validator import BaseValidator -from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, colorstr, ops +from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, ops from ultralytics.yolo.utils.checks import check_requirements from ultralytics.yolo.utils.metrics import ConfusionMatrix, DetMetrics, box_iou from ultralytics.yolo.utils.plotting import output_to_target, plot_images @@ -186,28 +185,9 @@ class DetectionValidator(BaseValidator): return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, stride=gs) def get_dataloader(self, dataset_path, batch_size): - """TODO: manage splits differently.""" - # Calculate stride - check if model is initialized - if self.args.v5loader: - LOGGER.warning("WARNING ⚠️ 'v5loader' feature is deprecated and will be removed soon. You can train using " - 'the default YOLOv8 dataloader instead, no argument is needed.') - gs = max(int(de_parallel(self.model).stride if self.model else 0), 32) - return create_dataloader(path=dataset_path, - imgsz=self.args.imgsz, - batch_size=batch_size, - stride=gs, - hyp=vars(self.args), - cache=False, - pad=0.5, - rect=self.args.rect, - workers=self.args.workers, - prefix=colorstr(f'{self.args.mode}: '), - shuffle=False, - seed=self.args.seed)[0] - + """Construct and return dataloader.""" dataset = self.build_dataset(dataset_path, batch=batch_size, mode='val') - dataloader = build_dataloader(dataset, batch_size, self.args.workers, shuffle=False, rank=-1) - return dataloader + return build_dataloader(dataset, batch_size, self.args.workers, shuffle=False, rank=-1) # return dataloader def plot_val_samples(self, batch, ni): """Plot validation image samples."""