diff --git a/docs/reference/yolo/data/dataloaders/v5augmentations.md b/docs/reference/yolo/data/dataloaders/v5augmentations.md
deleted file mode 100644
index 63df369..0000000
--- a/docs/reference/yolo/data/dataloaders/v5augmentations.md
+++ /dev/null
@@ -1,89 +0,0 @@
----
-description: Enhance image data with Albumentations CenterCrop, normalize, augment_hsv, replicate, random_perspective, cutout, & box_candidates.
-keywords: YOLO, object detection, data loaders, V5 augmentations, CenterCrop, normalize, random_perspective
----
-
-## Albumentations
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.Albumentations
-
-
-## LetterBox
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.LetterBox
-
-
-## CenterCrop
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.CenterCrop
-
-
-## ToTensor
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.ToTensor
-
-
-## normalize
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.normalize
-
-
-## denormalize
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.denormalize
-
-
-## augment_hsv
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.augment_hsv
-
-
-## hist_equalize
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.hist_equalize
-
-
-## replicate
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.replicate
-
-
-## letterbox
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.letterbox
-
-
-## random_perspective
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.random_perspective
-
-
-## copy_paste
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.copy_paste
-
-
-## cutout
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.cutout
-
-
-## mixup
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.mixup
-
-
-## box_candidates
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.box_candidates
-
-
-## classify_albumentations
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.classify_albumentations
-
-
-## classify_transforms
----
-### ::: ultralytics.yolo.data.dataloaders.v5augmentations.classify_transforms
-
diff --git a/docs/reference/yolo/data/dataloaders/v5loader.md b/docs/reference/yolo/data/dataloaders/v5loader.md
deleted file mode 100644
index 5598695..0000000
--- a/docs/reference/yolo/data/dataloaders/v5loader.md
+++ /dev/null
@@ -1,94 +0,0 @@
----
-description: Efficiently load images and labels to models using Ultralytics YOLO's InfiniteDataLoader, LoadScreenshots, and LoadStreams.
-keywords: YOLO, data loader, image classification, object detection, Ultralytics
----
-
-## InfiniteDataLoader
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.InfiniteDataLoader
-
-
-## _RepeatSampler
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader._RepeatSampler
-
-
-## LoadScreenshots
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.LoadScreenshots
-
-
-## LoadImages
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.LoadImages
-
-
-## LoadStreams
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.LoadStreams
-
-
-## LoadImagesAndLabels
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.LoadImagesAndLabels
-
-
-## ClassificationDataset
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.ClassificationDataset
-
-
-## get_hash
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.get_hash
-
-
-## exif_size
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.exif_size
-
-
-## exif_transpose
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.exif_transpose
-
-
-## seed_worker
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.seed_worker
-
-
-## create_dataloader
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.create_dataloader
-
-
-## img2label_paths
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.img2label_paths
-
-
-## flatten_recursive
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.flatten_recursive
-
-
-## extract_boxes
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.extract_boxes
-
-
-## autosplit
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.autosplit
-
-
-## verify_image_label
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.verify_image_label
-
-
-## create_classification_dataloader
----
-### ::: ultralytics.yolo.data.dataloaders.v5loader.create_classification_dataloader
-
diff --git a/docs/reference/yolo/data/utils.md b/docs/reference/yolo/data/utils.md
index f0f2e2f..248321b 100644
--- a/docs/reference/yolo/data/utils.md
+++ b/docs/reference/yolo/data/utils.md
@@ -67,3 +67,8 @@ keywords: YOLOv4, Object Detection, Computer Vision, Deep Learning, Convolutiona
---
### ::: ultralytics.yolo.data.utils.zip_directory
+
+## autosplit
+---
+### ::: ultralytics.yolo.data.utils.autosplit
+
diff --git a/docs/reference/yolo/utils/__init__.md b/docs/reference/yolo/utils/__init__.md
index 77b6584..4fa7604 100644
--- a/docs/reference/yolo/utils/__init__.md
+++ b/docs/reference/yolo/utils/__init__.md
@@ -18,6 +18,11 @@ keywords: Ultralytics, YOLO, utils, SimpleClass, IterableSimpleNamespace, EmojiF
### ::: ultralytics.yolo.utils.EmojiFilter
+## ThreadingLocked
+---
+### ::: ultralytics.yolo.utils.ThreadingLocked
+
+
## TryExcept
---
### ::: ultralytics.yolo.utils.TryExcept
diff --git a/docs/reference/yolo/utils/torch_utils.md b/docs/reference/yolo/utils/torch_utils.md
index 6d24cac..727a2e1 100644
--- a/docs/reference/yolo/utils/torch_utils.md
+++ b/docs/reference/yolo/utils/torch_utils.md
@@ -23,6 +23,11 @@ keywords: Ultralytics YOLO, Torch, Utils, Pytorch, Object Detection
### ::: ultralytics.yolo.utils.torch_utils.smart_inference_mode
+## get_cpu_info
+---
+### ::: ultralytics.yolo.utils.torch_utils.get_cpu_info
+
+
## select_device
---
### ::: ultralytics.yolo.utils.torch_utils.select_device
diff --git a/examples/tutorial.ipynb b/examples/tutorial.ipynb
index 9bb7150..9f746e2 100644
--- a/examples/tutorial.ipynb
+++ b/examples/tutorial.ipynb
@@ -300,7 +300,7 @@
"name": "stdout",
"text": [
"Ultralytics YOLOv8.0.71 🚀 Python-3.9.16 torch-2.0.0+cu118 CUDA:0 (Tesla T4, 15102MiB)\n",
- "\u001b[34m\u001b[1myolo/engine/trainer: \u001b[0mtask=detect, mode=train, model=yolov8n.pt, data=coco128.yaml, epochs=3, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=False, optimizer=SGD, verbose=True, seed=0, deterministic=True, single_cls=False, image_weights=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, line_width=3, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=False, opset=None, workspace=4, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, label_smoothing=0.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0, copy_paste=0.0, cfg=None, v5loader=False, tracker=botsort.yaml, save_dir=runs/detect/train\n",
+ "\u001b[34m\u001b[1myolo/engine/trainer: \u001b[0mtask=detect, mode=train, model=yolov8n.pt, data=coco128.yaml, epochs=3, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=False, optimizer=SGD, verbose=True, seed=0, deterministic=True, single_cls=False, image_weights=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, line_width=3, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=False, opset=None, workspace=4, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, label_smoothing=0.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0, copy_paste=0.0, cfg=None, tracker=botsort.yaml, save_dir=runs/detect/train\n",
"\n",
" from n params module arguments \n",
" 0 -1 1 464 ultralytics.nn.modules.Conv [3, 16, 3, 2] \n",
diff --git a/mkdocs.yml b/mkdocs.yml
index 219a059..19181ed 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -305,8 +305,6 @@ nav:
- converter: reference/yolo/data/converter.md
- dataloaders:
- stream_loaders: reference/yolo/data/dataloaders/stream_loaders.md
- - v5augmentations: reference/yolo/data/dataloaders/v5augmentations.md
- - v5loader: reference/yolo/data/dataloaders/v5loader.md
- dataset: reference/yolo/data/dataset.md
- dataset_wrappers: reference/yolo/data/dataset_wrappers.md
- utils: reference/yolo/data/utils.md
diff --git a/tests/test_engine.py b/tests/test_engine.py
index b011044..deb546c 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -65,7 +65,6 @@ def test_detect():
def test_segment():
overrides = {'data': 'coco8-seg.yaml', 'model': CFG_SEG, 'imgsz': 32, 'epochs': 1, 'save': False}
CFG.data = 'coco8-seg.yaml'
- CFG.v5loader = False
# YOLO(CFG_SEG).train(**overrides) # works
# trainer
diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index a4d8652..d4ffad6 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
-__version__ = '8.0.134'
+__version__ = '8.0.135'
from ultralytics.hub import start
from ultralytics.vit.rtdetr import RTDETR
diff --git a/ultralytics/datasets/xView.yaml b/ultralytics/datasets/xView.yaml
index 6049f6f..f58c4e5 100644
--- a/ultralytics/datasets/xView.yaml
+++ b/ultralytics/datasets/xView.yaml
@@ -87,7 +87,7 @@ download: |
from PIL import Image
from tqdm import tqdm
- from ultralytics.yolo.data.dataloaders.v5loader import autosplit
+ from ultralytics.yolo.data.utils import autosplit
from ultralytics.yolo.utils.ops import xyxy2xywhn
diff --git a/ultralytics/yolo/cfg/__init__.py b/ultralytics/yolo/cfg/__init__.py
index 71a9022..41d86aa 100644
--- a/ultralytics/yolo/cfg/__init__.py
+++ b/ultralytics/yolo/cfg/__init__.py
@@ -75,7 +75,7 @@ CFG_INT_KEYS = ('epochs', 'patience', 'batch', 'workers', 'seed', 'close_mosaic'
CFG_BOOL_KEYS = ('save', 'exist_ok', 'verbose', 'deterministic', 'single_cls', 'rect', 'cos_lr', 'overlap_mask', 'val',
'save_json', 'save_hybrid', 'half', 'dnn', 'plots', 'show', 'save_txt', 'save_conf', 'save_crop',
'show_labels', 'show_conf', 'visualize', 'augment', 'agnostic_nms', 'retina_masks', 'boxes', 'keras',
- 'optimize', 'int8', 'dynamic', 'simplify', 'nms', 'v5loader', 'profile')
+ 'optimize', 'int8', 'dynamic', 'simplify', 'nms', 'profile')
def cfg2dict(cfg):
diff --git a/ultralytics/yolo/cfg/default.yaml b/ultralytics/yolo/cfg/default.yaml
index 25e4001..5babd25 100644
--- a/ultralytics/yolo/cfg/default.yaml
+++ b/ultralytics/yolo/cfg/default.yaml
@@ -110,8 +110,5 @@ copy_paste: 0.0 # (float) segment copy-paste (probability)
# Custom config.yaml ---------------------------------------------------------------------------------------------------
cfg: # (str, optional) for overriding defaults.yaml
-# Debug, do not modify -------------------------------------------------------------------------------------------------
-v5loader: False # (bool) use legacy YOLOv5 dataloader (deprecated)
-
# Tracker settings ------------------------------------------------------------------------------------------------------
tracker: botsort.yaml # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]
diff --git a/ultralytics/yolo/data/dataloaders/v5augmentations.py b/ultralytics/yolo/data/dataloaders/v5augmentations.py
deleted file mode 100644
index 8e0b3e2..0000000
--- a/ultralytics/yolo/data/dataloaders/v5augmentations.py
+++ /dev/null
@@ -1,407 +0,0 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Image augmentation functions
-"""
-
-import math
-import random
-
-import cv2
-import numpy as np
-import torch
-import torchvision.transforms as T
-import torchvision.transforms.functional as TF
-
-from ultralytics.yolo.utils import LOGGER, colorstr
-from ultralytics.yolo.utils.checks import check_version
-from ultralytics.yolo.utils.metrics import bbox_ioa
-from ultralytics.yolo.utils.ops import resample_segments, segment2box, xywhn2xyxy
-
-IMAGENET_MEAN = 0.485, 0.456, 0.406 # RGB mean
-IMAGENET_STD = 0.229, 0.224, 0.225 # RGB standard deviation
-
-
-class Albumentations:
- # YOLOv5 Albumentations class (optional, only used if package is installed)
- def __init__(self, size=640):
- """Instantiate object with image augmentations for YOLOv5."""
- self.transform = None
- prefix = colorstr('albumentations: ')
- try:
- import albumentations as A
- check_version(A.__version__, '1.0.3', hard=True) # version requirement
-
- T = [
- A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0),
- A.Blur(p=0.01),
- A.MedianBlur(p=0.01),
- A.ToGray(p=0.01),
- A.CLAHE(p=0.01),
- A.RandomBrightnessContrast(p=0.0),
- A.RandomGamma(p=0.0),
- A.ImageCompression(quality_lower=75, p=0.0)] # transforms
- self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))
-
- LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p))
- except ImportError: # package not installed, skip
- pass
- except Exception as e:
- LOGGER.info(f'{prefix}{e}')
-
- def __call__(self, im, labels, p=1.0):
- """Transforms input image and labels with probability 'p'."""
- if self.transform and random.random() < p:
- new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed
- im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])])
- return im, labels
-
-
-def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False):
- """Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = (x - mean) / std."""
- return TF.normalize(x, mean, std, inplace=inplace)
-
-
-def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD):
- """Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = x * std + mean."""
- for i in range(3):
- x[:, i] = x[:, i] * std[i] + mean[i]
- return x
-
-
-def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
- """HSV color-space augmentation."""
- if hgain or sgain or vgain:
- r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
- hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
- dtype = im.dtype # uint8
-
- x = np.arange(0, 256, dtype=r.dtype)
- lut_hue = ((x * r[0]) % 180).astype(dtype)
- lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
- lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
-
- im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
- cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed
-
-
-def hist_equalize(im, clahe=True, bgr=False):
- """Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255."""
- yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
- if clahe:
- c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
- yuv[:, :, 0] = c.apply(yuv[:, :, 0])
- else:
- yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram
- return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB
-
-
-def replicate(im, labels):
- """Replicate labels."""
- h, w = im.shape[:2]
- boxes = labels[:, 1:].astype(int)
- x1, y1, x2, y2 = boxes.T
- s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
- for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
- x1b, y1b, x2b, y2b = boxes[i]
- bh, bw = y2b - y1b, x2b - x1b
- yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
- x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
- im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # im4[ymin:ymax, xmin:xmax]
- labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
-
- return im, labels
-
-
-def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
- """Resize and pad image while meeting stride-multiple constraints."""
- shape = im.shape[:2] # current shape [height, width]
- if isinstance(new_shape, int):
- new_shape = (new_shape, new_shape)
-
- # Scale ratio (new / old)
- r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
- if not scaleup: # only scale down, do not scale up (for better val mAP)
- r = min(r, 1.0)
-
- # Compute padding
- ratio = r, r # width, height ratios
- new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
- dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
- if auto: # minimum rectangle
- dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
- elif scaleFill: # stretch
- dw, dh = 0.0, 0.0
- new_unpad = (new_shape[1], new_shape[0])
- ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
-
- dw /= 2 # divide padding into 2 sides
- dh /= 2
-
- if shape[::-1] != new_unpad: # resize
- im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
- top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
- left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
- im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
- return im, ratio, (dw, dh)
-
-
-def random_perspective(im,
- targets=(),
- segments=(),
- degrees=10,
- translate=.1,
- scale=.1,
- shear=10,
- perspective=0.0,
- border=(0, 0)):
- # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10))
- # targets = [cls, xyxy]
-
- height = im.shape[0] + border[0] * 2 # shape(h,w,c)
- width = im.shape[1] + border[1] * 2
-
- # Center
- C = np.eye(3)
- C[0, 2] = -im.shape[1] / 2 # x translation (pixels)
- C[1, 2] = -im.shape[0] / 2 # y translation (pixels)
-
- # Perspective
- P = np.eye(3)
- P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
- P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
-
- # Rotation and Scale
- R = np.eye(3)
- a = random.uniform(-degrees, degrees)
- # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
- s = random.uniform(1 - scale, 1 + scale)
- # s = 2 ** random.uniform(-scale, scale)
- R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
-
- # Shear
- S = np.eye(3)
- S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
- S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
-
- # Translation
- T = np.eye(3)
- T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
- T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
-
- # Combined rotation matrix
- M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
- if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
- if perspective:
- im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
- else: # affine
- im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
-
- # Visualize
- # import matplotlib.pyplot as plt
- # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
- # ax[0].imshow(im[:, :, ::-1]) # base
- # ax[1].imshow(im2[:, :, ::-1]) # warped
-
- # Transform label coordinates
- n = len(targets)
- if n:
- use_segments = any(x.any() for x in segments)
- new = np.zeros((n, 4))
- if use_segments: # warp segments
- segments = resample_segments(segments) # upsample
- for i, segment in enumerate(segments):
- xy = np.ones((len(segment), 3))
- xy[:, :2] = segment
- xy = xy @ M.T # transform
- xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine
-
- # Clip
- new[i] = segment2box(xy, width, height)
-
- else: # warp boxes
- xy = np.ones((n * 4, 3))
- xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
- xy = xy @ M.T # transform
- xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
-
- # Create new boxes
- x = xy[:, [0, 2, 4, 6]]
- y = xy[:, [1, 3, 5, 7]]
- new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
-
- # Clip
- new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
- new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
-
- # Filter candidates
- i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
- targets = targets[i]
- targets[:, 1:5] = new[i]
-
- return im, targets
-
-
-def copy_paste(im, labels, segments, p=0.5):
- """Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)."""
- n = len(segments)
- if p and n:
- h, w, c = im.shape # height, width, channels
- im_new = np.zeros(im.shape, np.uint8)
-
- # Calculate ioa first then select indexes randomly
- boxes = np.stack([w - labels[:, 3], labels[:, 2], w - labels[:, 1], labels[:, 4]], axis=-1) # (n, 4)
- ioa = bbox_ioa(boxes, labels[:, 1:5]) # intersection over area
- indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, )
- n = len(indexes)
- for j in random.sample(list(indexes), k=round(p * n)):
- l, box, s = labels[j], boxes[j], segments[j]
- labels = np.concatenate((labels, [[l[0], *box]]), 0)
- segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
- cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (1, 1, 1), cv2.FILLED)
-
- result = cv2.flip(im, 1) # augment segments (flip left-right)
- i = cv2.flip(im_new, 1).astype(bool)
- im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug
-
- return im, labels, segments
-
-
-def cutout(im, labels, p=0.5):
- """Applies image cutout augmentation https://arxiv.org/abs/1708.04552."""
- if random.random() < p:
- h, w = im.shape[:2]
- scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
- for s in scales:
- mask_h = random.randint(1, int(h * s)) # create random masks
- mask_w = random.randint(1, int(w * s))
-
- # Box
- xmin = max(0, random.randint(0, w) - mask_w // 2)
- ymin = max(0, random.randint(0, h) - mask_h // 2)
- xmax = min(w, xmin + mask_w)
- ymax = min(h, ymin + mask_h)
-
- # Apply random color mask
- im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
-
- # Return unobscured labels
- if len(labels) and s > 0.03:
- box = np.array([[xmin, ymin, xmax, ymax]], dtype=np.float32)
- ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h))[0] # intersection over area
- labels = labels[ioa < 0.60] # remove >60% obscured labels
-
- return labels
-
-
-def mixup(im, labels, im2, labels2):
- """Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf."""
- r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
- im = (im * r + im2 * (1 - r)).astype(np.uint8)
- labels = np.concatenate((labels, labels2), 0)
- return im, labels
-
-
-def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
- # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
- w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
- w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
- ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
- return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
-
-
-def classify_albumentations(
- augment=True,
- size=224,
- scale=(0.08, 1.0),
- ratio=(0.75, 1.0 / 0.75), # 0.75, 1.33
- hflip=0.5,
- vflip=0.0,
- jitter=0.4,
- mean=IMAGENET_MEAN,
- std=IMAGENET_STD,
- auto_aug=False):
- # YOLOv5 classification Albumentations (optional, only used if package is installed)
- prefix = colorstr('albumentations: ')
- try:
- import albumentations as A
- from albumentations.pytorch import ToTensorV2
- check_version(A.__version__, '1.0.3', hard=True) # version requirement
- if augment: # Resize and crop
- T = [A.RandomResizedCrop(height=size, width=size, scale=scale, ratio=ratio)]
- if auto_aug:
- # TODO: implement AugMix, AutoAug & RandAug in albumentation
- LOGGER.info(f'{prefix}auto augmentations are currently not supported')
- else:
- if hflip > 0:
- T += [A.HorizontalFlip(p=hflip)]
- if vflip > 0:
- T += [A.VerticalFlip(p=vflip)]
- if jitter > 0:
- jitter = float(jitter)
- T += [A.ColorJitter(jitter, jitter, jitter, 0)] # brightness, contrast, satuaration, 0 hue
- else: # Use fixed crop for eval set (reproducibility)
- T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)]
- T += [A.Normalize(mean=mean, std=std), ToTensorV2()] # Normalize and convert to Tensor
- LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p))
- return A.Compose(T)
-
- except ImportError: # package not installed, skip
- LOGGER.warning(f'{prefix}⚠️ not found, install with `pip install albumentations` (recommended)')
- except Exception as e:
- LOGGER.info(f'{prefix}{e}')
-
-
-def classify_transforms(size=224):
- """Transforms to apply if albumentations not installed."""
- assert isinstance(size, int), f'ERROR: classify_transforms size {size} must be integer, not (list, tuple)'
- # T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
- return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
-
-
-class LetterBox:
- # YOLOv5 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
- def __init__(self, size=(640, 640), auto=False, stride=32):
- """Resizes and crops an image to a specified size for YOLOv5 preprocessing."""
- super().__init__()
- self.h, self.w = (size, size) if isinstance(size, int) else size
- self.auto = auto # pass max size integer, automatically solve for short side using stride
- self.stride = stride # used with auto
-
- def __call__(self, im): # im = np.array HWC
- imh, imw = im.shape[:2]
- r = min(self.h / imh, self.w / imw) # ratio of new/old
- h, w = round(imh * r), round(imw * r) # resized image
- hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w
- top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
- im_out = np.full((self.h, self.w, 3), 114, dtype=im.dtype)
- im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
- return im_out
-
-
-class CenterCrop:
- # YOLOv5 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])
- def __init__(self, size=640):
- """Converts input image into tensor for YOLOv5 processing."""
- super().__init__()
- self.h, self.w = (size, size) if isinstance(size, int) else size
-
- def __call__(self, im): # im = np.array HWC
- imh, imw = im.shape[:2]
- m = min(imh, imw) # min dimension
- top, left = (imh - m) // 2, (imw - m) // 2
- return cv2.resize(im[top:top + m, left:left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR)
-
-
-class ToTensor:
- # YOLOv5 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
- def __init__(self, half=False):
- """Initialize ToTensor class for YOLOv5 image preprocessing."""
- super().__init__()
- self.half = half
-
- def __call__(self, im): # im = np.array HWC in BGR order
- im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous
- im = torch.from_numpy(im) # to torch
- im = im.half() if self.half else im.float() # uint8 to fp16/32
- im /= 255.0 # 0-255 to 0.0-1.0
- return im
diff --git a/ultralytics/yolo/data/dataloaders/v5loader.py b/ultralytics/yolo/data/dataloaders/v5loader.py
deleted file mode 100644
index 96549dd..0000000
--- a/ultralytics/yolo/data/dataloaders/v5loader.py
+++ /dev/null
@@ -1,1109 +0,0 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Dataloaders and dataset utils
-"""
-
-import contextlib
-import glob
-import hashlib
-import math
-import os
-import random
-import shutil
-import time
-from itertools import repeat
-from multiprocessing.pool import ThreadPool
-from pathlib import Path
-from threading import Thread
-from urllib.parse import urlparse
-
-import cv2
-import numpy as np
-import psutil
-import torch
-import torchvision
-from PIL import ExifTags, Image, ImageOps
-from torch.utils.data import DataLoader, Dataset, dataloader, distributed
-from tqdm import tqdm
-
-from ultralytics.yolo.utils import (DATASETS_DIR, LOGGER, NUM_THREADS, TQDM_BAR_FORMAT, is_colab, is_dir_writeable,
- is_kaggle)
-from ultralytics.yolo.utils.checks import check_requirements
-from ultralytics.yolo.utils.ops import clean_str, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn
-from ultralytics.yolo.utils.torch_utils import torch_distributed_zero_first
-
-from .v5augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste,
- letterbox, mixup, random_perspective)
-
-# Parameters
-HELP_URL = 'See https://docs.ultralytics.com/yolov5/tutorials/train_custom_data'
-IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm' # include image suffixes
-VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv' # include video suffixes
-LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
-RANK = int(os.getenv('RANK', -1))
-PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true' # global pin_memory for dataloaders
-
-# Get orientation exif tag
-for orientation in ExifTags.TAGS.keys():
- if ExifTags.TAGS[orientation] == 'Orientation':
- break
-
-
-def get_hash(paths):
- """Returns a single hash value of a list of paths (files or dirs)."""
- size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
- h = hashlib.sha256(str(size).encode()) # hash sizes
- h.update(''.join(paths).encode()) # hash paths
- return h.hexdigest() # return hash
-
-
-def exif_size(img):
- """Returns exif-corrected PIL size."""
- s = img.size # (width, height)
- with contextlib.suppress(Exception):
- rotation = dict(img._getexif().items())[orientation]
- if rotation in [6, 8]: # rotation 270 or 90
- s = (s[1], s[0])
- return s
-
-
-def exif_transpose(image):
- """
- Transpose a PIL image accordingly if it has an EXIF Orientation tag.
- Inplace version of https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py exif_transpose()
-
- :param image: The image to transpose.
- :return: An image.
- """
- exif = image.getexif()
- orientation = exif.get(0x0112, 1) # default 1
- if orientation > 1:
- method = {
- 2: Image.FLIP_LEFT_RIGHT,
- 3: Image.ROTATE_180,
- 4: Image.FLIP_TOP_BOTTOM,
- 5: Image.TRANSPOSE,
- 6: Image.ROTATE_270,
- 7: Image.TRANSVERSE,
- 8: Image.ROTATE_90}.get(orientation)
- if method is not None:
- image = image.transpose(method)
- del exif[0x0112]
- image.info['exif'] = exif.tobytes()
- return image
-
-
-def seed_worker(worker_id):
- """Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader."""
- worker_seed = torch.initial_seed() % 2 ** 32
- np.random.seed(worker_seed)
- random.seed(worker_seed)
-
-
-def create_dataloader(path,
- imgsz,
- batch_size,
- stride,
- single_cls=False,
- hyp=None,
- augment=False,
- cache=False,
- pad=0.0,
- rect=False,
- rank=-1,
- workers=8,
- image_weights=False,
- close_mosaic=False,
- min_items=0,
- prefix='',
- shuffle=False,
- seed=0):
- if rect and shuffle:
- LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False')
- shuffle = False
- with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
- dataset = LoadImagesAndLabels(
- path,
- imgsz,
- batch_size,
- augment=augment, # augmentation
- hyp=hyp, # hyperparameters
- rect=rect, # rectangular batches
- cache_images=cache,
- single_cls=single_cls,
- stride=int(stride),
- pad=pad,
- image_weights=image_weights,
- min_items=min_items,
- prefix=prefix)
-
- batch_size = min(batch_size, len(dataset))
- nd = torch.cuda.device_count() # number of CUDA devices
- nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers
- sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
- loader = DataLoader if image_weights or close_mosaic else InfiniteDataLoader # DataLoader allows attribute updates
- generator = torch.Generator()
- generator.manual_seed(6148914691236517205 + seed + RANK)
- return loader(dataset,
- batch_size=batch_size,
- shuffle=shuffle and sampler is None,
- num_workers=nw,
- sampler=sampler,
- pin_memory=PIN_MEMORY,
- collate_fn=LoadImagesAndLabels.collate_fn,
- worker_init_fn=seed_worker,
- generator=generator), dataset
-
-
-class InfiniteDataLoader(dataloader.DataLoader):
- """Dataloader that reuses workers
-
- Uses same syntax as vanilla DataLoader
- """
-
- def __init__(self, *args, **kwargs):
- """Dataloader that reuses workers for same syntax as vanilla DataLoader."""
- super().__init__(*args, **kwargs)
- object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
- self.iterator = super().__iter__()
-
- def __len__(self):
- """Returns the length of batch_sampler's sampler."""
- return len(self.batch_sampler.sampler)
-
- def __iter__(self):
- """Creates a sampler that infinitely repeats."""
- for _ in range(len(self)):
- yield next(self.iterator)
-
-
-class _RepeatSampler:
- """Sampler that repeats forever
-
- Args:
- sampler (Dataset.sampler): The sampler to repeat.
- """
-
- def __init__(self, sampler):
- """Sampler that repeats dataset samples infinitely."""
- self.sampler = sampler
-
- def __iter__(self):
- """Infinite loop iterating over a given sampler."""
- while True:
- yield from iter(self.sampler)
-
-
-class LoadScreenshots:
- # YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"`
- def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None):
- """source = [screen_number left top width height] (pixels)."""
- check_requirements('mss')
- import mss
-
- source, *params = source.split()
- self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0
- if len(params) == 1:
- self.screen = int(params[0])
- elif len(params) == 4:
- left, top, width, height = (int(x) for x in params)
- elif len(params) == 5:
- self.screen, left, top, width, height = (int(x) for x in params)
- self.img_size = img_size
- self.stride = stride
- self.transforms = transforms
- self.auto = auto
- self.mode = 'stream'
- self.frame = 0
- self.sct = mss.mss()
-
- # Parse monitor shape
- monitor = self.sct.monitors[self.screen]
- self.top = monitor['top'] if top is None else (monitor['top'] + top)
- self.left = monitor['left'] if left is None else (monitor['left'] + left)
- self.width = width or monitor['width']
- self.height = height or monitor['height']
- self.monitor = {'left': self.left, 'top': self.top, 'width': self.width, 'height': self.height}
-
- def __iter__(self):
- """Iterates over objects with the same structure as the monitor attribute."""
- return self
-
- def __next__(self):
- """mss screen capture: get raw pixels from the screen as np array."""
- im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR
- s = f'screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: '
-
- if self.transforms:
- im = self.transforms(im0) # transforms
- else:
- im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0] # padded resize
- im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
- im = np.ascontiguousarray(im) # contiguous
- self.frame += 1
- return str(self.screen), im, im0, None, s # screen, img, original img, im0s, s
-
-
-class LoadImages:
- # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
- def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
- """Initialize instance variables and check for valid input."""
- if isinstance(path, str) and Path(path).suffix == '.txt': # *.txt file with img/vid/dir on each line
- path = Path(path).read_text().rsplit()
- files = []
- for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
- p = str(Path(p).resolve())
- if '*' in p:
- files.extend(sorted(glob.glob(p, recursive=True))) # glob
- elif os.path.isdir(p):
- files.extend(sorted(glob.glob(os.path.join(p, '*.*')))) # dir
- elif os.path.isfile(p):
- files.append(p) # files
- else:
- raise FileNotFoundError(f'{p} does not exist')
-
- images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
- videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
- ni, nv = len(images), len(videos)
-
- self.img_size = img_size
- self.stride = stride
- self.files = images + videos
- self.nf = ni + nv # number of files
- self.video_flag = [False] * ni + [True] * nv
- self.mode = 'image'
- self.auto = auto
- self.transforms = transforms # optional
- self.vid_stride = vid_stride # video frame-rate stride
- if any(videos):
- self._new_video(videos[0]) # new video
- else:
- self.cap = None
- assert self.nf > 0, f'No images or videos found in {p}. ' \
- f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'
-
- def __iter__(self):
- """Returns an iterator object for iterating over images or videos found in a directory."""
- self.count = 0
- return self
-
- def __next__(self):
- """Iterator's next item, performs transformation on image and returns path, transformed image, original image, capture and size."""
- if self.count == self.nf:
- raise StopIteration
- path = self.files[self.count]
-
- if self.video_flag[self.count]:
- # Read video
- self.mode = 'video'
- for _ in range(self.vid_stride):
- self.cap.grab()
- ret_val, im0 = self.cap.retrieve()
- while not ret_val:
- self.count += 1
- self.cap.release()
- if self.count == self.nf: # last video
- raise StopIteration
- path = self.files[self.count]
- self._new_video(path)
- ret_val, im0 = self.cap.read()
-
- self.frame += 1
- # im0 = self._cv2_rotate(im0) # for use if cv2 autorotation is False
- s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: '
-
- else:
- # Read image
- self.count += 1
- im0 = cv2.imread(path) # BGR
- assert im0 is not None, f'Image Not Found {path}'
- s = f'image {self.count}/{self.nf} {path}: '
-
- if self.transforms:
- im = self.transforms(im0) # transforms
- else:
- im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0] # padded resize
- im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
- im = np.ascontiguousarray(im) # contiguous
-
- return path, im, im0, self.cap, s
-
- def _new_video(self, path):
- """Create a new video capture object."""
- self.frame = 0
- self.cap = cv2.VideoCapture(path)
- self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
- self.orientation = int(self.cap.get(cv2.CAP_PROP_ORIENTATION_META)) # rotation degrees
- # self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # disable https://github.com/ultralytics/yolov5/issues/8493
-
- def _cv2_rotate(self, im):
- """Rotate a cv2 video manually."""
- if self.orientation == 0:
- return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE)
- elif self.orientation == 180:
- return cv2.rotate(im, cv2.ROTATE_90_COUNTERCLOCKWISE)
- elif self.orientation == 90:
- return cv2.rotate(im, cv2.ROTATE_180)
- return im
-
- def __len__(self):
- """Returns the number of files in the class instance."""
- return self.nf # number of files
-
-
-class LoadStreams:
- # YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams`
- def __init__(self, sources='file.streams', img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
- """Initialize YOLO detector with optional transforms and check input shapes."""
- torch.backends.cudnn.benchmark = True # faster for fixed-size inference
- self.mode = 'stream'
- self.img_size = img_size
- self.stride = stride
- self.vid_stride = vid_stride # video frame-rate stride
- sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources]
- n = len(sources)
- self.sources = [clean_str(x) for x in sources] # clean source names for later
- self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n
- for i, s in enumerate(sources): # index, source
- # Start thread to read frames from video stream
- st = f'{i + 1}/{n}: {s}... '
- if urlparse(s).hostname in ('www.youtube.com', 'youtube.com', 'youtu.be'): # if source is YouTube video
- # YouTube format i.e. 'https://www.youtube.com/watch?v=Zgi9g1ksQHc' or 'https://youtu.be/Zgi9g1ksQHc'
- check_requirements(('pafy', 'youtube_dl==2020.12.2'))
- import pafy
- s = pafy.new(s).getbest(preftype='mp4').url # YouTube URL
- s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
- if s == 0:
- assert not is_colab(), '--source 0 webcam unsupported on Colab. Rerun command in a local environment.'
- assert not is_kaggle(), '--source 0 webcam unsupported on Kaggle. Rerun command in a local environment.'
- cap = cv2.VideoCapture(s)
- assert cap.isOpened(), f'{st}Failed to open {s}'
- w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
- h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
- fps = cap.get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan
- self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback
- self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback
-
- _, self.imgs[i] = cap.read() # guarantee first frame
- self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True)
- LOGGER.info(f'{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)')
- self.threads[i].start()
- LOGGER.info('') # newline
-
- # Check for common shapes
- s = np.stack([letterbox(x, img_size, stride=stride, auto=auto)[0].shape for x in self.imgs])
- self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
- self.auto = auto and self.rect
- self.transforms = transforms # optional
- if not self.rect:
- LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.')
-
- def update(self, i, cap, stream):
- """Read stream `i` frames in daemon thread."""
- n, f = 0, self.frames[i] # frame number, frame array
- while cap.isOpened() and n < f:
- n += 1
- cap.grab() # .read() = .grab() followed by .retrieve()
- if n % self.vid_stride == 0:
- success, im = cap.retrieve()
- if success:
- self.imgs[i] = im
- else:
- LOGGER.warning('WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.')
- self.imgs[i] = np.zeros_like(self.imgs[i])
- cap.open(stream) # re-open stream if signal was lost
- time.sleep(0.0) # wait time
-
- def __iter__(self):
- """Iterator that returns the class instance."""
- self.count = -1
- return self
-
- def __next__(self):
- """Return a tuple containing transformed and resized image data."""
- self.count += 1
- if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit
- cv2.destroyAllWindows()
- raise StopIteration
-
- im0 = self.imgs.copy()
- if self.transforms:
- im = np.stack([self.transforms(x) for x in im0]) # transforms
- else:
- im = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0] for x in im0]) # resize
- im = im[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW
- im = np.ascontiguousarray(im) # contiguous
-
- return self.sources, im, im0, None, ''
-
- def __len__(self):
- """Returns the number of sources as the length of the object."""
- return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years
-
-
-def img2label_paths(img_paths):
- """Define label paths as a function of image paths."""
- sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}labels{os.sep}' # /images/, /labels/ substrings
- return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]
-
-
-class LoadImagesAndLabels(Dataset):
- """YOLOv5 train_loader/val_loader, loads images and labels for training and validation."""
- cache_version = 0.6 # dataset labels *.cache version
- rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4]
-
- def __init__(self,
- path,
- img_size=640,
- batch_size=16,
- augment=False,
- hyp=None,
- rect=False,
- image_weights=False,
- cache_images=False,
- single_cls=False,
- stride=32,
- pad=0.0,
- min_items=0,
- prefix=''):
- self.img_size = img_size
- self.augment = augment
- self.hyp = hyp
- self.image_weights = image_weights
- self.rect = False if image_weights else rect
- self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
- self.mosaic_border = [-img_size // 2, -img_size // 2]
- self.stride = stride
- self.path = path
- self.albumentations = Albumentations(size=img_size) if augment else None
-
- try:
- f = [] # image files
- for p in path if isinstance(path, list) else [path]:
- p = Path(p) # os-agnostic
- if p.is_dir(): # dir
- f += glob.glob(str(p / '**' / '*.*'), recursive=True)
- # f = list(p.rglob('*.*')) # pathlib
- elif p.is_file(): # file
- with open(p) as t:
- t = t.read().strip().splitlines()
- parent = str(p.parent) + os.sep
- f += [x.replace('./', parent, 1) if x.startswith('./') else x for x in t] # to global path
- # f += [p.parent / x.lstrip(os.sep) for x in t] # to global path (pathlib)
- else:
- raise FileNotFoundError(f'{prefix}{p} does not exist')
- self.im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS)
- # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib
- assert self.im_files, f'{prefix}No images found'
- except Exception as e:
- raise FileNotFoundError(f'{prefix}Error loading data from {path}: {e}\n{HELP_URL}') from e
-
- # Check cache
- self.label_files = img2label_paths(self.im_files) # labels
- cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')
- try:
- cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict
- assert cache['version'] == self.cache_version # matches current version
- assert cache['hash'] == get_hash(self.label_files + self.im_files) # identical hash
- except (FileNotFoundError, AssertionError, AttributeError):
- cache, exists = self.cache_labels(cache_path, prefix), False # run cache ops
-
- # Display cache
- nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total
- if exists and LOCAL_RANK in (-1, 0):
- d = f'Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt'
- tqdm(None, desc=prefix + d, total=n, initial=n, bar_format=TQDM_BAR_FORMAT) # display cache results
- if cache['msgs']:
- LOGGER.info('\n'.join(cache['msgs'])) # display warnings
- assert nf > 0 or not augment, f'{prefix}No labels found in {cache_path}, can not start training. {HELP_URL}'
-
- # Read cache
- [cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items
- labels, shapes, self.segments = zip(*cache.values())
- nl = len(np.concatenate(labels, 0)) # number of labels
- assert nl > 0 or not augment, f'{prefix}All labels empty in {cache_path}, can not start training. {HELP_URL}'
- self.labels = list(labels)
- self.shapes = np.array(shapes)
- self.im_files = list(cache.keys()) # update
- self.label_files = img2label_paths(cache.keys()) # update
-
- # Filter images
- if min_items:
- include = np.array([len(x) >= min_items for x in self.labels]).nonzero()[0].astype(int)
- LOGGER.info(f'{prefix}{n - len(include)}/{n} images filtered from dataset')
- self.im_files = [self.im_files[i] for i in include]
- self.label_files = [self.label_files[i] for i in include]
- self.labels = [self.labels[i] for i in include]
- self.segments = [self.segments[i] for i in include]
- self.shapes = self.shapes[include] # wh
-
- # Create indices
- n = len(self.shapes) # number of images
- bi = np.floor(np.arange(n) / batch_size).astype(int) # batch index
- nb = bi[-1] + 1 # number of batches
- self.batch = bi # batch index of image
- self.n = n
- self.indices = range(n)
-
- # Update labels
- include_class = [] # filter labels to include only these classes (optional)
- include_class_array = np.array(include_class).reshape(1, -1)
- for i, (label, segment) in enumerate(zip(self.labels, self.segments)):
- if include_class:
- j = (label[:, 0:1] == include_class_array).any(1)
- self.labels[i] = label[j]
- if segment:
- self.segments[i] = [segment[si] for si, idx in enumerate(j) if idx]
- if single_cls: # single-class training, merge all classes into 0
- self.labels[i][:, 0] = 0
-
- # Rectangular Training
- if self.rect:
- # Sort by aspect ratio
- s = self.shapes # wh
- ar = s[:, 1] / s[:, 0] # aspect ratio
- irect = ar.argsort()
- self.im_files = [self.im_files[i] for i in irect]
- self.label_files = [self.label_files[i] for i in irect]
- self.labels = [self.labels[i] for i in irect]
- self.segments = [self.segments[i] for i in irect]
- self.shapes = s[irect] # wh
- ar = ar[irect]
-
- # Set training image shapes
- shapes = [[1, 1]] * nb
- for i in range(nb):
- ari = ar[bi == i]
- mini, maxi = ari.min(), ari.max()
- if maxi < 1:
- shapes[i] = [maxi, 1]
- elif mini > 1:
- shapes[i] = [1, 1 / mini]
-
- self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride
-
- # Cache images into RAM/disk for faster training
- if cache_images == 'ram' and not self.check_cache_ram(prefix=prefix):
- cache_images = False
- self.ims = [None] * n
- self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files]
- if cache_images:
- b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
- self.im_hw0, self.im_hw = [None] * n, [None] * n
- fcn = self.cache_images_to_disk if cache_images == 'disk' else self.load_image
- with ThreadPool(NUM_THREADS) as pool:
- results = pool.imap(fcn, range(n))
- pbar = tqdm(enumerate(results), total=n, bar_format=TQDM_BAR_FORMAT, disable=LOCAL_RANK > 0)
- for i, x in pbar:
- if cache_images == 'disk':
- b += self.npy_files[i].stat().st_size
- else: # 'ram'
- self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i)
- b += self.ims[i].nbytes
- pbar.desc = f'{prefix}Caching images ({b / gb:.1f}GB {cache_images})'
- pbar.close()
-
- def check_cache_ram(self, safety_margin=0.1, prefix=''):
- """Check image caching requirements vs available memory."""
- b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
- n = min(self.n, 30) # extrapolate from 30 random images
- for _ in range(n):
- im = cv2.imread(random.choice(self.im_files)) # sample image
- ratio = self.img_size / max(im.shape[0], im.shape[1]) # max(h, w) # ratio
- b += im.nbytes * ratio ** 2
- mem_required = b * self.n / n # GB required to cache dataset into RAM
- mem = psutil.virtual_memory()
- cache = mem_required * (1 + safety_margin) < mem.available # to cache or not to cache, that is the question
- if not cache:
- LOGGER.info(f'{prefix}{mem_required / gb:.1f}GB RAM required, '
- f'{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, '
- f"{'caching images ✅' if cache else 'not caching images ⚠️'}")
- return cache
-
- def cache_labels(self, path=Path('./labels.cache'), prefix=''):
- """Cache labels and save as numpy file for next time."""
- # Cache dataset labels, check images and read shapes
- if path.exists():
- path.unlink() # remove *.cache file if exists
- x = {} # dict
- nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
- desc = f'{prefix}Scanning {path.parent / path.stem}...'
- total = len(self.im_files)
- with ThreadPool(NUM_THREADS) as pool:
- results = pool.imap(verify_image_label, zip(self.im_files, self.label_files, repeat(prefix)))
- pbar = tqdm(results, desc=desc, total=total, bar_format=TQDM_BAR_FORMAT)
- for im_file, lb, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
- nm += nm_f
- nf += nf_f
- ne += ne_f
- nc += nc_f
- if im_file:
- x[im_file] = [lb, shape, segments]
- if msg:
- msgs.append(msg)
- pbar.desc = f'{desc} {nf} images, {nm + ne} backgrounds, {nc} corrupt'
- pbar.close()
-
- if msgs:
- LOGGER.info('\n'.join(msgs))
- if nf == 0:
- LOGGER.warning(f'{prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}')
- x['hash'] = get_hash(self.label_files + self.im_files)
- x['results'] = nf, nm, ne, nc, len(self.im_files)
- x['msgs'] = msgs # warnings
- x['version'] = self.cache_version # cache version
- if is_dir_writeable(path.parent):
- np.save(str(path), x) # save cache for next time
- path.with_suffix('.cache.npy').rename(path) # remove .npy suffix
- LOGGER.info(f'{prefix}New cache created: {path}')
- else:
- LOGGER.warning(f'{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable') # not writeable
- return x
-
- def __len__(self):
- """Returns the length of 'im_files' attribute."""
- return len(self.im_files)
-
- def __getitem__(self, index):
- """Get a sample and its corresponding label, filename and shape from the dataset."""
- index = self.indices[index] # linear, shuffled, or image_weights
-
- hyp = self.hyp
- mosaic = self.mosaic and random.random() < hyp['mosaic']
- if mosaic:
- # Load mosaic
- img, labels = self.load_mosaic(index)
- shapes = None
-
- # MixUp augmentation
- if random.random() < hyp['mixup']:
- img, labels = mixup(img, labels, *self.load_mosaic(random.randint(0, self.n - 1)))
-
- else:
- # Load image
- img, (h0, w0), (h, w) = self.load_image(index)
-
- # Letterbox
- shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
- img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
- shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
-
- labels = self.labels[index].copy()
- if labels.size: # normalized xywh to pixel xyxy format
- labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
-
- if self.augment:
- img, labels = random_perspective(img,
- labels,
- degrees=hyp['degrees'],
- translate=hyp['translate'],
- scale=hyp['scale'],
- shear=hyp['shear'],
- perspective=hyp['perspective'])
-
- nl = len(labels) # number of labels
- if nl:
- labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)
-
- if self.augment:
- # Albumentations
- img, labels = self.albumentations(img, labels)
- nl = len(labels) # update after albumentations
-
- # HSV color-space
- augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
-
- # Flip up-down
- if random.random() < hyp['flipud']:
- img = np.flipud(img)
- if nl:
- labels[:, 2] = 1 - labels[:, 2]
-
- # Flip left-right
- if random.random() < hyp['fliplr']:
- img = np.fliplr(img)
- if nl:
- labels[:, 1] = 1 - labels[:, 1]
-
- # Cutouts
- # labels = cutout(img, labels, p=0.5)
- # nl = len(labels) # update after cutout
-
- labels_out = torch.zeros((nl, 6))
- if nl:
- labels_out[:, 1:] = torch.from_numpy(labels)
-
- # Convert
- img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
- img = np.ascontiguousarray(img)
-
- return torch.from_numpy(img), labels_out, self.im_files[index], shapes
-
- def load_image(self, i):
- """Loads 1 image from dataset index 'i', returns (im, original hw, resized hw)."""
- im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i],
- if im is None: # not cached in RAM
- if fn.exists(): # load npy
- im = np.load(fn)
- else: # read image
- im = cv2.imread(f) # BGR
- assert im is not None, f'Image Not Found {f}'
- h0, w0 = im.shape[:2] # orig hw
- r = self.img_size / max(h0, w0) # ratio
- if r != 1: # if sizes are not equal
- interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
- im = cv2.resize(im, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp)
- return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized
- return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized
-
- def cache_images_to_disk(self, i):
- """Saves an image as an *.npy file for faster loading."""
- f = self.npy_files[i]
- if not f.exists():
- np.save(f.as_posix(), cv2.imread(self.im_files[i]))
-
- def load_mosaic(self, index):
- """YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic."""
- labels4, segments4 = [], []
- s = self.img_size
- yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y
- indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices
- random.shuffle(indices)
- for i, index in enumerate(indices):
- # Load image
- img, _, (h, w) = self.load_image(index)
-
- # Place img in img4
- if i == 0: # top left
- img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
- x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
- x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
- elif i == 1: # top right
- x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
- x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
- elif i == 2: # bottom left
- x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
- x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
- elif i == 3: # bottom right
- x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
- x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
-
- img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
- padw = x1a - x1b
- padh = y1a - y1b
-
- # Labels
- labels, segments = self.labels[index].copy(), self.segments[index].copy()
- if labels.size:
- labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
- segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
- labels4.append(labels)
- segments4.extend(segments)
-
- # Concat/clip labels
- labels4 = np.concatenate(labels4, 0)
- for x in (labels4[:, 1:], *segments4):
- np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
- # img4, labels4 = replicate(img4, labels4) # replicate
-
- # Augment
- img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste'])
- img4, labels4 = random_perspective(img4,
- labels4,
- segments4,
- degrees=self.hyp['degrees'],
- translate=self.hyp['translate'],
- scale=self.hyp['scale'],
- shear=self.hyp['shear'],
- perspective=self.hyp['perspective'],
- border=self.mosaic_border) # border to remove
-
- return img4, labels4
-
- def load_mosaic9(self, index):
- """YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic."""
- labels9, segments9 = [], []
- s = self.img_size
- indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices
- random.shuffle(indices)
- hp, wp = -1, -1 # height, width previous
- for i, index in enumerate(indices):
- # Load image
- img, _, (h, w) = self.load_image(index)
-
- # Place img in img9
- if i == 0: # center
- img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
- h0, w0 = h, w
- c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates
- elif i == 1: # top
- c = s, s - h, s + w, s
- elif i == 2: # top right
- c = s + wp, s - h, s + wp + w, s
- elif i == 3: # right
- c = s + w0, s, s + w0 + w, s + h
- elif i == 4: # bottom right
- c = s + w0, s + hp, s + w0 + w, s + hp + h
- elif i == 5: # bottom
- c = s + w0 - w, s + h0, s + w0, s + h0 + h
- elif i == 6: # bottom left
- c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
- elif i == 7: # left
- c = s - w, s + h0 - h, s, s + h0
- elif i == 8: # top left
- c = s - w, s + h0 - hp - h, s, s + h0 - hp
-
- padx, pady = c[:2]
- x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords
-
- # Labels
- labels, segments = self.labels[index].copy(), self.segments[index].copy()
- if labels.size:
- labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format
- segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
- labels9.append(labels)
- segments9.extend(segments)
-
- # Image
- img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax]
- hp, wp = h, w # height, width previous
-
- # Offset
- yc, xc = (int(random.uniform(0, s)) for _ in self.mosaic_border) # mosaic center x, y
- img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]
-
- # Concat/clip labels
- labels9 = np.concatenate(labels9, 0)
- labels9[:, [1, 3]] -= xc
- labels9[:, [2, 4]] -= yc
- c = np.array([xc, yc]) # centers
- segments9 = [x - c for x in segments9]
-
- for x in (labels9[:, 1:], *segments9):
- np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
- # img9, labels9 = replicate(img9, labels9) # replicate
-
- # Augment
- img9, labels9, segments9 = copy_paste(img9, labels9, segments9, p=self.hyp['copy_paste'])
- img9, labels9 = random_perspective(img9,
- labels9,
- segments9,
- degrees=self.hyp['degrees'],
- translate=self.hyp['translate'],
- scale=self.hyp['scale'],
- shear=self.hyp['shear'],
- perspective=self.hyp['perspective'],
- border=self.mosaic_border) # border to remove
-
- return img9, labels9
-
- @staticmethod
- def collate_fn(batch):
- """YOLOv8 collate function, outputs dict."""
- im, label, path, shapes = zip(*batch) # transposed
- for i, lb in enumerate(label):
- lb[:, 0] = i # add target image index for build_targets()
- batch_idx, cls, bboxes = torch.cat(label, 0).split((1, 1, 4), dim=1)
- return {
- 'ori_shape': tuple((x[0] if x else None) for x in shapes),
- 'ratio_pad': tuple((x[1] if x else None) for x in shapes),
- 'im_file': path,
- 'img': torch.stack(im, 0),
- 'cls': cls,
- 'bboxes': bboxes,
- 'batch_idx': batch_idx.view(-1)}
-
- @staticmethod
- def collate_fn_old(batch):
- """YOLOv5 original collate function."""
- im, label, path, shapes = zip(*batch) # transposed
- for i, lb in enumerate(label):
- lb[:, 0] = i # add target image index for build_targets()
- return torch.stack(im, 0), torch.cat(label, 0), path, shapes
-
-
-# Ancillary functions --------------------------------------------------------------------------------------------------
-def flatten_recursive(path=DATASETS_DIR / 'coco128'):
- """Flatten a recursive directory by bringing all files to top level."""
- new_path = Path(f'{str(path)}_flat')
- if os.path.exists(new_path):
- shutil.rmtree(new_path) # delete output folder
- os.makedirs(new_path) # make new output folder
- for file in tqdm(glob.glob(f'{str(Path(path))}/**/*.*', recursive=True)):
- shutil.copyfile(file, new_path / Path(file).name)
-
-
-def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.dataloaders import *; extract_boxes()
- # Convert detection dataset into classification dataset, with one directory per class
- path = Path(path) # images dir
- shutil.rmtree(path / 'classification') if (path / 'classification').is_dir() else None # remove existing
- files = list(path.rglob('*.*'))
- n = len(files) # number of files
- for im_file in tqdm(files, total=n):
- if im_file.suffix[1:] in IMG_FORMATS:
- # Image
- im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB
- h, w = im.shape[:2]
-
- # Labels
- lb_file = Path(img2label_paths([str(im_file)])[0])
- if Path(lb_file).exists():
- with open(lb_file) as f:
- lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels
-
- for j, x in enumerate(lb):
- c = int(x[0]) # class
- f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg' # new filename
- if not f.parent.is_dir():
- f.parent.mkdir(parents=True)
-
- b = x[1:] * [w, h, w, h] # box
- # B[2:] = b[2:].max() # rectangle to square
- b[2:] = b[2:] * 1.2 + 3 # pad
- b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(int)
-
- b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
- b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
- assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
-
-
-def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
- """Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
- Usage: from utils.dataloaders import *; autosplit()
- Arguments
- path: Path to images directory
- weights: Train, val, test weights (list, tuple)
- annotated_only: Only use images with an annotated txt file
- """
- path = Path(path) # images dir
- files = sorted(x for x in path.rglob('*.*') if x.suffix[1:].lower() in IMG_FORMATS) # image files only
- n = len(files) # number of files
- random.seed(0) # for reproducibility
- indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split
-
- txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files
- for x in txt:
- if (path.parent / x).exists():
- (path.parent / x).unlink() # remove existing
-
- print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
- for i, img in tqdm(zip(indices, files), total=n):
- if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label
- with open(path.parent / txt[i], 'a') as f:
- f.write(f'./{img.relative_to(path.parent).as_posix()}' + '\n') # add image to txt file
-
-
-def verify_image_label(args):
- """Verify one image-label pair."""
- im_file, lb_file, prefix = args
- nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', [] # number (missing, found, empty, corrupt), message, segments
- try:
- # Verify images
- im = Image.open(im_file)
- im.verify() # PIL verify
- shape = exif_size(im) # image size
- assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
- assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}'
- if im.format.lower() in ('jpg', 'jpeg'):
- with open(im_file, 'rb') as f:
- f.seek(-2, 2)
- if f.read() != b'\xff\xd9': # corrupt JPEG
- ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100)
- msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved'
-
- # Verify labels
- if os.path.isfile(lb_file):
- nf = 1 # label found
- with open(lb_file) as f:
- lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
- if any(len(x) > 6 for x in lb): # is segment
- classes = np.array([x[0] for x in lb], dtype=np.float32)
- segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb] # (cls, xy1...)
- lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh)
- lb = np.array(lb, dtype=np.float32)
- nl = len(lb)
- if nl:
- assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected'
- assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}'
- assert (lb[:, 1:] <= 1).all(), f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}'
- _, i = np.unique(lb, axis=0, return_index=True)
- if len(i) < nl: # duplicate row check
- lb = lb[i] # remove duplicates
- if segments:
- segments = [segments[x] for x in i]
- msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed'
- else:
- ne = 1 # label empty
- lb = np.zeros((0, 5), dtype=np.float32)
- else:
- nm = 1 # label missing
- lb = np.zeros((0, 5), dtype=np.float32)
- return im_file, lb, shape, segments, nm, nf, ne, nc, msg
- except Exception as e:
- nc = 1
- msg = f'{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}'
- return [None, None, None, None, nm, nf, ne, nc, msg]
-
-
-# Classification dataloaders -------------------------------------------------------------------------------------------
-class ClassificationDataset(torchvision.datasets.ImageFolder):
- """
- YOLOv5 Classification Dataset.
- Arguments
- root: Dataset path
- transform: torchvision transforms, used by default
- album_transform: Albumentations transforms, used if installed
- """
-
- def __init__(self, root, augment, imgsz, cache=False):
- """Initialize YOLO dataset with root, augmentation, image size, and cache parameters."""
- super().__init__(root=root)
- self.torch_transforms = classify_transforms(imgsz)
- self.album_transforms = classify_albumentations(augment, imgsz) if augment else None
- self.cache_ram = cache is True or cache == 'ram'
- self.cache_disk = cache == 'disk'
- self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples] # file, index, npy, im
-
- def __getitem__(self, i):
- """Retrieves data items of 'dataset' via indices & creates InfiniteDataLoader."""
- f, j, fn, im = self.samples[i] # filename, index, filename.with_suffix('.npy'), image
- if self.cache_ram and im is None:
- im = self.samples[i][3] = cv2.imread(f)
- elif self.cache_disk:
- if not fn.exists(): # load npy
- np.save(fn.as_posix(), cv2.imread(f))
- im = np.load(fn)
- else: # read image
- im = cv2.imread(f) # BGR
- if self.album_transforms:
- sample = self.album_transforms(image=cv2.cvtColor(im, cv2.COLOR_BGR2RGB))['image']
- else:
- sample = self.torch_transforms(im)
- return sample, j
-
-
-def create_classification_dataloader(path,
- imgsz=224,
- batch_size=16,
- augment=True,
- cache=False,
- rank=-1,
- workers=8,
- shuffle=True):
- """Returns Dataloader object to be used with YOLOv5 Classifier."""
- with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
- dataset = ClassificationDataset(root=path, imgsz=imgsz, augment=augment, cache=cache)
- batch_size = min(batch_size, len(dataset))
- nd = torch.cuda.device_count()
- nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])
- sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
- generator = torch.Generator()
- generator.manual_seed(6148914691236517205 + RANK)
- return InfiniteDataLoader(dataset,
- batch_size=batch_size,
- shuffle=shuffle and sampler is None,
- num_workers=nw,
- sampler=sampler,
- pin_memory=PIN_MEMORY,
- worker_init_fn=seed_worker,
- generator=generator) # or DataLoader(persistent_workers=True)
diff --git a/ultralytics/yolo/data/utils.py b/ultralytics/yolo/data/utils.py
index 9829671..146f23e 100644
--- a/ultralytics/yolo/data/utils.py
+++ b/ultralytics/yolo/data/utils.py
@@ -4,6 +4,7 @@ import contextlib
import hashlib
import json
import os
+import random
import subprocess
import time
import zipfile
@@ -522,3 +523,35 @@ def zip_directory(dir, use_zipfile_library=True):
else:
import shutil
shutil.make_archive(dir, 'zip', dir)
+
+
+def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
+ """
+ Autosplit a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files.
+
+ Args:
+ path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco128/images'.
+ weights (list | tuple, optional): Train, validation, and test split fractions. Defaults to (0.9, 0.1, 0.0).
+ annotated_only (bool, optional): If True, only images with an associated txt file are used. Defaults to False.
+
+ Usage:
+ from utils.dataloaders import autosplit
+ autosplit()
+ """
+
+ path = Path(path) # images dir
+ files = sorted(x for x in path.rglob('*.*') if x.suffix[1:].lower() in IMG_FORMATS) # image files only
+ n = len(files) # number of files
+ random.seed(0) # for reproducibility
+ indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split
+
+ txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files
+ for x in txt:
+ if (path.parent / x).exists():
+ (path.parent / x).unlink() # remove existing
+
+ LOGGER.info(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
+ for i, img in tqdm(zip(indices, files), total=n):
+ if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label
+ with open(path.parent / txt[i], 'a') as f:
+ f.write(f'./{img.relative_to(path.parent).as_posix()}' + '\n') # add image to txt file
diff --git a/ultralytics/yolo/engine/trainer.py b/ultralytics/yolo/engine/trainer.py
index 144be9c..26ceb6c 100644
--- a/ultralytics/yolo/engine/trainer.py
+++ b/ultralytics/yolo/engine/trainer.py
@@ -244,7 +244,7 @@ class BaseTrainer:
metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix='val')
self.metrics = dict(zip(metric_keys, [0] * len(metric_keys))) # TODO: init metrics for plot_results()?
self.ema = ModelEMA(self.model)
- if self.args.plots and not self.args.v5loader:
+ if self.args.plots:
self.plot_training_labels()
# Optimizer
diff --git a/ultralytics/yolo/v8/detect/train.py b/ultralytics/yolo/v8/detect/train.py
index 1b475ed..abb94fc 100644
--- a/ultralytics/yolo/v8/detect/train.py
+++ b/ultralytics/yolo/v8/detect/train.py
@@ -6,9 +6,8 @@ import numpy as np
from ultralytics.nn.tasks import DetectionModel
from ultralytics.yolo import v8
from ultralytics.yolo.data import build_dataloader, build_yolo_dataset
-from ultralytics.yolo.data.dataloaders.v5loader import create_dataloader
from ultralytics.yolo.engine.trainer import BaseTrainer
-from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, RANK, colorstr
+from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, RANK
from ultralytics.yolo.utils.plotting import plot_images, plot_labels, plot_results
from ultralytics.yolo.utils.torch_utils import de_parallel, torch_distributed_zero_first
@@ -17,7 +16,8 @@ from ultralytics.yolo.utils.torch_utils import de_parallel, torch_distributed_ze
class DetectionTrainer(BaseTrainer):
def build_dataset(self, img_path, mode='train', batch=None):
- """Build YOLO Dataset
+ """
+ Build YOLO Dataset.
Args:
img_path (str): Path to the folder containing images.
@@ -28,27 +28,7 @@ class DetectionTrainer(BaseTrainer):
return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=mode == 'val', stride=gs)
def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'):
- """TODO: manage splits differently."""
- # Calculate stride - check if model is initialized
- if self.args.v5loader:
- LOGGER.warning("WARNING ⚠️ 'v5loader' feature is deprecated and will be removed soon. You can train using "
- 'the default YOLOv8 dataloader instead, no argument is needed.')
- gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
- return create_dataloader(path=dataset_path,
- imgsz=self.args.imgsz,
- batch_size=batch_size,
- stride=gs,
- hyp=vars(self.args),
- augment=mode == 'train',
- cache=self.args.cache,
- pad=0 if mode == 'train' else 0.5,
- rect=self.args.rect or mode == 'val',
- rank=rank,
- workers=self.args.workers,
- close_mosaic=self.args.close_mosaic != 0,
- prefix=colorstr(f'{mode}: '),
- shuffle=mode == 'train',
- seed=self.args.seed)[0]
+ """Construct and return dataloader."""
assert mode in ['train', 'val']
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
dataset = self.build_dataset(dataset_path, mode, batch_size)
diff --git a/ultralytics/yolo/v8/detect/val.py b/ultralytics/yolo/v8/detect/val.py
index 77d346c..c28dfb5 100644
--- a/ultralytics/yolo/v8/detect/val.py
+++ b/ultralytics/yolo/v8/detect/val.py
@@ -7,9 +7,8 @@ import numpy as np
import torch
from ultralytics.yolo.data import build_dataloader, build_yolo_dataset
-from ultralytics.yolo.data.dataloaders.v5loader import create_dataloader
from ultralytics.yolo.engine.validator import BaseValidator
-from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, colorstr, ops
+from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, ops
from ultralytics.yolo.utils.checks import check_requirements
from ultralytics.yolo.utils.metrics import ConfusionMatrix, DetMetrics, box_iou
from ultralytics.yolo.utils.plotting import output_to_target, plot_images
@@ -186,28 +185,9 @@ class DetectionValidator(BaseValidator):
return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, stride=gs)
def get_dataloader(self, dataset_path, batch_size):
- """TODO: manage splits differently."""
- # Calculate stride - check if model is initialized
- if self.args.v5loader:
- LOGGER.warning("WARNING ⚠️ 'v5loader' feature is deprecated and will be removed soon. You can train using "
- 'the default YOLOv8 dataloader instead, no argument is needed.')
- gs = max(int(de_parallel(self.model).stride if self.model else 0), 32)
- return create_dataloader(path=dataset_path,
- imgsz=self.args.imgsz,
- batch_size=batch_size,
- stride=gs,
- hyp=vars(self.args),
- cache=False,
- pad=0.5,
- rect=self.args.rect,
- workers=self.args.workers,
- prefix=colorstr(f'{self.args.mode}: '),
- shuffle=False,
- seed=self.args.seed)[0]
-
+ """Construct and return dataloader."""
dataset = self.build_dataset(dataset_path, batch=batch_size, mode='val')
- dataloader = build_dataloader(dataset, batch_size, self.args.workers, shuffle=False, rank=-1)
- return dataloader
+ return build_dataloader(dataset, batch_size, self.args.workers, shuffle=False, rank=-1) # return dataloader
def plot_val_samples(self, batch, ni):
"""Plot validation image samples."""