ultralytics 8.0.89
SAM predict and auto-annotate (#2298)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Yonghye Kwon <developer.0hye@gmail.com> Co-authored-by: Paula Derrenger <107626595+pderrenger@users.noreply.github.com> Co-authored-by: Dhruv Nair <dhruv.nair@gmail.com> Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com> Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com> Co-authored-by: Snyk bot <snyk-bot@snyk.io> Co-authored-by: Laughing-q <1185102784@qq.com>
This commit is contained in:
@ -25,7 +25,6 @@ verbose: True # whether to print verbose output
|
||||
seed: 0 # random seed for reproducibility
|
||||
deterministic: True # whether to enable deterministic mode
|
||||
single_cls: False # train multi-class data as single-class
|
||||
image_weights: False # use weighted image selection for training
|
||||
rect: False # rectangular training if mode='train' or rectangular validation if mode='val'
|
||||
cos_lr: False # use cosine learning rate scheduler
|
||||
close_mosaic: 0 # (int) disable mosaic augmentation for final epochs
|
||||
|
@ -1,9 +1,9 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from .base import BaseDataset
|
||||
from .build import build_classification_dataloader, build_dataloader, load_inference_source
|
||||
from .build import build_dataloader, build_yolo_dataset, load_inference_source
|
||||
from .dataset import ClassificationDataset, SemanticDataset, YOLODataset
|
||||
from .dataset_wrappers import MixAndRectDataset
|
||||
|
||||
__all__ = ('BaseDataset', 'ClassificationDataset', 'MixAndRectDataset', 'SemanticDataset', 'YOLODataset',
|
||||
'build_classification_dataloader', 'build_dataloader', 'load_inference_source')
|
||||
'build_yolo_dataset', 'build_dataloader', 'load_inference_source')
|
||||
|
42
ultralytics/yolo/data/annotator.py
Normal file
42
ultralytics/yolo/data/annotator.py
Normal file
@ -0,0 +1,42 @@
|
||||
from pathlib import Path
|
||||
|
||||
from ultralytics import YOLO
|
||||
from ultralytics.vit.sam import PromptPredictor, build_sam
|
||||
from ultralytics.yolo.utils.torch_utils import select_device
|
||||
|
||||
|
||||
def auto_annotate(data, det_model='yolov8x.pt', sam_model='sam_b.pt', device='', output_dir=None):
|
||||
device = select_device(device)
|
||||
det_model = YOLO(det_model)
|
||||
sam_model = build_sam(sam_model)
|
||||
det_model.to(device)
|
||||
sam_model.to(device)
|
||||
|
||||
if not output_dir:
|
||||
output_dir = Path(str(data)).parent / 'labels'
|
||||
Path(output_dir).mkdir(exist_ok=True, parents=True)
|
||||
|
||||
prompt_predictor = PromptPredictor(sam_model)
|
||||
det_results = det_model(data, stream=True)
|
||||
|
||||
for result in det_results:
|
||||
boxes = result.boxes.xyxy # Boxes object for bbox outputs
|
||||
class_ids = result.boxes.cls.int().tolist() # noqa
|
||||
prompt_predictor.set_image(result.orig_img)
|
||||
masks, _, _ = prompt_predictor.predict_torch(
|
||||
point_coords=None,
|
||||
point_labels=None,
|
||||
boxes=prompt_predictor.transform.apply_boxes_torch(boxes, result.orig_shape[:2]),
|
||||
multimask_output=False,
|
||||
)
|
||||
|
||||
result.update(masks=masks.squeeze(1))
|
||||
segments = result.masks.xyn # noqa
|
||||
|
||||
with open(str(Path(output_dir) / Path(result.path).stem) + '.txt', 'w') as f:
|
||||
for i in range(len(segments)):
|
||||
s = segments[i]
|
||||
if len(s) == 0:
|
||||
continue
|
||||
segment = map(str, segments[i].reshape(-1).tolist())
|
||||
f.write(f'{class_ids[i]} ' + ' '.join(segment) + '\n')
|
@ -24,17 +24,17 @@ class BaseDataset(Dataset):
|
||||
Base dataset class for loading and processing image data.
|
||||
|
||||
Args:
|
||||
img_path (str): Image path.
|
||||
imgsz (int): Target image size for resizing. Default is 640.
|
||||
cache (bool): Cache images in memory or on disk for faster loading. Default is False.
|
||||
augment (bool): Apply data augmentation. Default is True.
|
||||
hyp (dict): Dictionary of hyperparameters for data augmentation. Default is None.
|
||||
prefix (str): Prefix for file paths. Default is an empty string.
|
||||
rect (bool): Enable rectangular training. Default is False.
|
||||
batch_size (int): Batch size for rectangular training. Default is None.
|
||||
stride (int): Stride for rectangular training. Default is 32.
|
||||
pad (float): Padding for rectangular training. Default is 0.5.
|
||||
single_cls (bool): Use a single class for all labels. Default is False.
|
||||
img_path (str): Path to the folder containing images.
|
||||
imgsz (int, optional): Image size. Defaults to 640.
|
||||
cache (bool, optional): Cache images to RAM or disk during training. Defaults to False.
|
||||
augment (bool, optional): If True, data augmentation is applied. Defaults to True.
|
||||
hyp (dict, optional): Hyperparameters to apply data augmentation. Defaults to None.
|
||||
prefix (str, optional): Prefix to print in log messages. Defaults to ''.
|
||||
rect (bool, optional): If True, rectangular training is used. Defaults to False.
|
||||
batch_size (int, optional): Size of batches. Defaults to None.
|
||||
stride (int, optional): Stride. Defaults to 32.
|
||||
pad (float, optional): Padding. Defaults to 0.0.
|
||||
single_cls (bool, optional): If True, single class training is used. Defaults to False.
|
||||
classes (list): List of included classes. Default is None.
|
||||
|
||||
Attributes:
|
||||
|
@ -14,9 +14,8 @@ from ultralytics.yolo.data.dataloaders.stream_loaders import (LOADERS, LoadImage
|
||||
from ultralytics.yolo.data.utils import IMG_FORMATS, VID_FORMATS
|
||||
from ultralytics.yolo.utils.checks import check_file
|
||||
|
||||
from ..utils import LOGGER, RANK, colorstr
|
||||
from ..utils.torch_utils import torch_distributed_zero_first
|
||||
from .dataset import ClassificationDataset, YOLODataset
|
||||
from ..utils import RANK, colorstr
|
||||
from .dataset import YOLODataset
|
||||
from .utils import PIN_MEMORY
|
||||
|
||||
|
||||
@ -70,34 +69,31 @@ def seed_worker(worker_id): # noqa
|
||||
random.seed(worker_seed)
|
||||
|
||||
|
||||
def build_dataloader(cfg, batch, img_path, data_info, stride=32, rect=False, rank=-1, mode='train'):
|
||||
"""Return an InfiniteDataLoader or DataLoader for training or validation set."""
|
||||
assert mode in ['train', 'val']
|
||||
shuffle = mode == 'train'
|
||||
if cfg.rect and shuffle:
|
||||
LOGGER.warning("WARNING ⚠️ 'rect=True' is incompatible with DataLoader shuffle, setting shuffle=False")
|
||||
shuffle = False
|
||||
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
|
||||
dataset = YOLODataset(
|
||||
img_path=img_path,
|
||||
imgsz=cfg.imgsz,
|
||||
batch_size=batch,
|
||||
augment=mode == 'train', # augmentation
|
||||
hyp=cfg, # TODO: probably add a get_hyps_from_cfg function
|
||||
rect=cfg.rect or rect, # rectangular batches
|
||||
cache=cfg.cache or None,
|
||||
single_cls=cfg.single_cls or False,
|
||||
stride=int(stride),
|
||||
pad=0.0 if mode == 'train' else 0.5,
|
||||
prefix=colorstr(f'{mode}: '),
|
||||
use_segments=cfg.task == 'segment',
|
||||
use_keypoints=cfg.task == 'pose',
|
||||
classes=cfg.classes,
|
||||
data=data_info)
|
||||
def build_yolo_dataset(cfg, img_path, batch, data_info, mode='train', rect=False, stride=32):
|
||||
"""Build YOLO Dataset"""
|
||||
dataset = YOLODataset(
|
||||
img_path=img_path,
|
||||
imgsz=cfg.imgsz,
|
||||
batch_size=batch,
|
||||
augment=mode == 'train', # augmentation
|
||||
hyp=cfg, # TODO: probably add a get_hyps_from_cfg function
|
||||
rect=cfg.rect or rect, # rectangular batches
|
||||
cache=cfg.cache or None,
|
||||
single_cls=cfg.single_cls or False,
|
||||
stride=int(stride),
|
||||
pad=0.0 if mode == 'train' else 0.5,
|
||||
prefix=colorstr(f'{mode}: '),
|
||||
use_segments=cfg.task == 'segment',
|
||||
use_keypoints=cfg.task == 'pose',
|
||||
classes=cfg.classes,
|
||||
data=data_info)
|
||||
return dataset
|
||||
|
||||
|
||||
def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1):
|
||||
"""Return an InfiniteDataLoader or DataLoader for training or validation set."""
|
||||
batch = min(batch, len(dataset))
|
||||
nd = torch.cuda.device_count() # number of CUDA devices
|
||||
workers = cfg.workers if mode == 'train' else cfg.workers * 2
|
||||
nw = min([os.cpu_count() // max(nd, 1), batch if batch > 1 else 0, workers]) # number of workers
|
||||
sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
|
||||
generator = torch.Generator()
|
||||
@ -110,36 +106,7 @@ def build_dataloader(cfg, batch, img_path, data_info, stride=32, rect=False, ran
|
||||
pin_memory=PIN_MEMORY,
|
||||
collate_fn=getattr(dataset, 'collate_fn', None),
|
||||
worker_init_fn=seed_worker,
|
||||
generator=generator), dataset
|
||||
|
||||
|
||||
# Build classification
|
||||
# TODO: using cfg like `build_dataloader`
|
||||
def build_classification_dataloader(path,
|
||||
imgsz=224,
|
||||
batch_size=16,
|
||||
augment=True,
|
||||
cache=False,
|
||||
rank=-1,
|
||||
workers=8,
|
||||
shuffle=True):
|
||||
"""Returns Dataloader object to be used with YOLOv5 Classifier."""
|
||||
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
|
||||
dataset = ClassificationDataset(root=path, imgsz=imgsz, augment=augment, cache=cache)
|
||||
batch_size = min(batch_size, len(dataset))
|
||||
nd = torch.cuda.device_count()
|
||||
nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])
|
||||
sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
|
||||
generator = torch.Generator()
|
||||
generator.manual_seed(6148914691236517205 + RANK)
|
||||
return InfiniteDataLoader(dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=shuffle and sampler is None,
|
||||
num_workers=nw,
|
||||
sampler=sampler,
|
||||
pin_memory=PIN_MEMORY,
|
||||
worker_init_fn=seed_worker,
|
||||
generator=generator) # or DataLoader(persistent_workers=True)
|
||||
generator=generator)
|
||||
|
||||
|
||||
def check_source(source):
|
||||
@ -168,7 +135,7 @@ def check_source(source):
|
||||
return source, webcam, screenshot, from_img, in_memory, tensor
|
||||
|
||||
|
||||
def load_inference_source(source=None, transforms=None, imgsz=640, vid_stride=1, stride=32, auto=True):
|
||||
def load_inference_source(source=None, imgsz=640, vid_stride=1):
|
||||
"""
|
||||
Loads an inference source for object detection and applies necessary transformations.
|
||||
|
||||
@ -192,23 +159,13 @@ def load_inference_source(source=None, transforms=None, imgsz=640, vid_stride=1,
|
||||
elif in_memory:
|
||||
dataset = source
|
||||
elif webcam:
|
||||
dataset = LoadStreams(source,
|
||||
imgsz=imgsz,
|
||||
stride=stride,
|
||||
auto=auto,
|
||||
transforms=transforms,
|
||||
vid_stride=vid_stride)
|
||||
dataset = LoadStreams(source, imgsz=imgsz, vid_stride=vid_stride)
|
||||
elif screenshot:
|
||||
dataset = LoadScreenshots(source, imgsz=imgsz, stride=stride, auto=auto, transforms=transforms)
|
||||
dataset = LoadScreenshots(source, imgsz=imgsz)
|
||||
elif from_img:
|
||||
dataset = LoadPilAndNumpy(source, imgsz=imgsz, stride=stride, auto=auto, transforms=transforms)
|
||||
dataset = LoadPilAndNumpy(source, imgsz=imgsz)
|
||||
else:
|
||||
dataset = LoadImages(source,
|
||||
imgsz=imgsz,
|
||||
stride=stride,
|
||||
auto=auto,
|
||||
transforms=transforms,
|
||||
vid_stride=vid_stride)
|
||||
dataset = LoadImages(source, imgsz=imgsz, vid_stride=vid_stride)
|
||||
|
||||
# Attach source types to the dataset
|
||||
setattr(dataset, 'source_type', source_type)
|
||||
|
@ -15,7 +15,6 @@ import requests
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
from ultralytics.yolo.data.augment import LetterBox
|
||||
from ultralytics.yolo.data.utils import IMG_FORMATS, VID_FORMATS
|
||||
from ultralytics.yolo.utils import LOGGER, ROOT, is_colab, is_kaggle, ops
|
||||
from ultralytics.yolo.utils.checks import check_requirements
|
||||
@ -31,12 +30,11 @@ class SourceTypes:
|
||||
|
||||
class LoadStreams:
|
||||
# YOLOv8 streamloader, i.e. `yolo predict source='rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams`
|
||||
def __init__(self, sources='file.streams', imgsz=640, stride=32, auto=True, transforms=None, vid_stride=1):
|
||||
def __init__(self, sources='file.streams', imgsz=640, vid_stride=1):
|
||||
"""Initialize instance variables and check for consistent input stream shapes."""
|
||||
torch.backends.cudnn.benchmark = True # faster for fixed-size inference
|
||||
self.mode = 'stream'
|
||||
self.imgsz = imgsz
|
||||
self.stride = stride
|
||||
self.vid_stride = vid_stride # video frame-rate stride
|
||||
sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources]
|
||||
n = len(sources)
|
||||
@ -72,10 +70,6 @@ class LoadStreams:
|
||||
LOGGER.info('') # newline
|
||||
|
||||
# Check for common shapes
|
||||
s = np.stack([LetterBox(imgsz, auto, stride=stride)(image=x).shape for x in self.imgs])
|
||||
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
|
||||
self.auto = auto and self.rect
|
||||
self.transforms = transforms # optional
|
||||
self.bs = self.__len__()
|
||||
|
||||
if not self.rect:
|
||||
@ -110,14 +104,7 @@ class LoadStreams:
|
||||
raise StopIteration
|
||||
|
||||
im0 = self.imgs.copy()
|
||||
if self.transforms:
|
||||
im = np.stack([self.transforms(x) for x in im0]) # transforms
|
||||
else:
|
||||
im = np.stack([LetterBox(self.imgsz, self.auto, stride=self.stride)(image=x) for x in im0])
|
||||
im = im[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW
|
||||
im = np.ascontiguousarray(im) # contiguous
|
||||
|
||||
return self.sources, im, im0, None, ''
|
||||
return self.sources, im0, None, ''
|
||||
|
||||
def __len__(self):
|
||||
"""Return the length of the sources object."""
|
||||
@ -126,7 +113,7 @@ class LoadStreams:
|
||||
|
||||
class LoadScreenshots:
|
||||
# YOLOv8 screenshot dataloader, i.e. `yolo predict source=screen`
|
||||
def __init__(self, source, imgsz=640, stride=32, auto=True, transforms=None):
|
||||
def __init__(self, source, imgsz=640):
|
||||
"""source = [screen_number left top width height] (pixels)."""
|
||||
check_requirements('mss')
|
||||
import mss # noqa
|
||||
@ -140,9 +127,6 @@ class LoadScreenshots:
|
||||
elif len(params) == 5:
|
||||
self.screen, left, top, width, height = (int(x) for x in params)
|
||||
self.imgsz = imgsz
|
||||
self.stride = stride
|
||||
self.transforms = transforms
|
||||
self.auto = auto
|
||||
self.mode = 'stream'
|
||||
self.frame = 0
|
||||
self.sct = mss.mss()
|
||||
@ -165,19 +149,13 @@ class LoadScreenshots:
|
||||
im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR
|
||||
s = f'screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: '
|
||||
|
||||
if self.transforms:
|
||||
im = self.transforms(im0) # transforms
|
||||
else:
|
||||
im = LetterBox(self.imgsz, self.auto, stride=self.stride)(image=im0)
|
||||
im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
|
||||
im = np.ascontiguousarray(im) # contiguous
|
||||
self.frame += 1
|
||||
return str(self.screen), im, im0, None, s # screen, img, original img, im0s, s
|
||||
return str(self.screen), im0, None, s # screen, img, original img, im0s, s
|
||||
|
||||
|
||||
class LoadImages:
|
||||
# YOLOv8 image/video dataloader, i.e. `yolo predict source=image.jpg/vid.mp4`
|
||||
def __init__(self, path, imgsz=640, stride=32, auto=True, transforms=None, vid_stride=1):
|
||||
def __init__(self, path, imgsz=640, vid_stride=1):
|
||||
"""Initialize the Dataloader and raise FileNotFoundError if file not found."""
|
||||
if isinstance(path, str) and Path(path).suffix == '.txt': # *.txt file with img/vid/dir on each line
|
||||
path = Path(path).read_text().rsplit()
|
||||
@ -198,13 +176,10 @@ class LoadImages:
|
||||
ni, nv = len(images), len(videos)
|
||||
|
||||
self.imgsz = imgsz
|
||||
self.stride = stride
|
||||
self.files = images + videos
|
||||
self.nf = ni + nv # number of files
|
||||
self.video_flag = [False] * ni + [True] * nv
|
||||
self.mode = 'image'
|
||||
self.auto = auto
|
||||
self.transforms = transforms # optional
|
||||
self.vid_stride = vid_stride # video frame-rate stride
|
||||
self.bs = 1
|
||||
if any(videos):
|
||||
@ -254,14 +229,7 @@ class LoadImages:
|
||||
raise FileNotFoundError(f'Image Not Found {path}')
|
||||
s = f'image {self.count}/{self.nf} {path}: '
|
||||
|
||||
if self.transforms:
|
||||
im = self.transforms(im0) # transforms
|
||||
else:
|
||||
im = LetterBox(self.imgsz, self.auto, stride=self.stride)(image=im0)
|
||||
im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
|
||||
im = np.ascontiguousarray(im) # contiguous
|
||||
|
||||
return path, im, im0, self.cap, s
|
||||
return [path], [im0], self.cap, s
|
||||
|
||||
def _new_video(self, path):
|
||||
"""Create a new video capture object."""
|
||||
@ -290,16 +258,13 @@ class LoadImages:
|
||||
|
||||
class LoadPilAndNumpy:
|
||||
|
||||
def __init__(self, im0, imgsz=640, stride=32, auto=True, transforms=None):
|
||||
def __init__(self, im0, imgsz=640):
|
||||
"""Initialize PIL and Numpy Dataloader."""
|
||||
if not isinstance(im0, list):
|
||||
im0 = [im0]
|
||||
self.paths = [getattr(im, 'filename', f'image{i}.jpg') for i, im in enumerate(im0)]
|
||||
self.im0 = [self._single_check(im) for im in im0]
|
||||
self.imgsz = imgsz
|
||||
self.stride = stride
|
||||
self.auto = auto
|
||||
self.transforms = transforms
|
||||
self.mode = 'image'
|
||||
# Generate fake paths
|
||||
self.bs = len(self.im0)
|
||||
@ -315,16 +280,6 @@ class LoadPilAndNumpy:
|
||||
im = np.ascontiguousarray(im) # contiguous
|
||||
return im
|
||||
|
||||
def _single_preprocess(self, im, auto):
|
||||
"""Preprocesses a single image for inference."""
|
||||
if self.transforms:
|
||||
im = self.transforms(im) # transforms
|
||||
else:
|
||||
im = LetterBox(self.imgsz, auto=auto, stride=self.stride)(image=im)
|
||||
im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
|
||||
im = np.ascontiguousarray(im) # contiguous
|
||||
return im
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the length of the 'im0' attribute."""
|
||||
return len(self.im0)
|
||||
@ -333,11 +288,8 @@ class LoadPilAndNumpy:
|
||||
"""Returns batch paths, images, processed images, None, ''."""
|
||||
if self.count == 1: # loop only once as it's batch inference
|
||||
raise StopIteration
|
||||
auto = all(x.shape == self.im0[0].shape for x in self.im0) and self.auto
|
||||
im = [self._single_preprocess(im, auto) for im in self.im0]
|
||||
im = np.stack(im, 0) if len(im) > 1 else im[0][None]
|
||||
self.count += 1
|
||||
return self.paths, im, self.im0, None, ''
|
||||
return self.paths, self.im0, None, ''
|
||||
|
||||
def __iter__(self):
|
||||
"""Enables iteration for class LoadPilAndNumpy."""
|
||||
@ -362,7 +314,7 @@ class LoadTensor:
|
||||
if self.count == 1:
|
||||
raise StopIteration
|
||||
self.count += 1
|
||||
return None, self.im0, self.im0, None, '' # self.paths, im, self.im0, None, ''
|
||||
return None, self.im0, None, '' # self.paths, im, self.im0, None, ''
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the batch size."""
|
||||
|
@ -21,21 +21,9 @@ class YOLODataset(BaseDataset):
|
||||
Dataset class for loading object detection and/or segmentation labels in YOLO format.
|
||||
|
||||
Args:
|
||||
img_path (str): Path to the folder containing images.
|
||||
imgsz (int, optional): Image size. Defaults to 640.
|
||||
cache (bool, optional): Cache images to RAM or disk during training. Defaults to False.
|
||||
augment (bool, optional): If True, data augmentation is applied. Defaults to True.
|
||||
hyp (dict, optional): Hyperparameters to apply data augmentation. Defaults to None.
|
||||
prefix (str, optional): Prefix to print in log messages. Defaults to ''.
|
||||
rect (bool, optional): If True, rectangular training is used. Defaults to False.
|
||||
batch_size (int, optional): Size of batches. Defaults to None.
|
||||
stride (int, optional): Stride. Defaults to 32.
|
||||
pad (float, optional): Padding. Defaults to 0.0.
|
||||
single_cls (bool, optional): If True, single class training is used. Defaults to False.
|
||||
data (dict, optional): A dataset YAML dictionary. Defaults to None.
|
||||
use_segments (bool, optional): If True, segmentation masks are used as labels. Defaults to False.
|
||||
use_keypoints (bool, optional): If True, keypoints are used as labels. Defaults to False.
|
||||
data (dict, optional): A dataset YAML dictionary. Defaults to None.
|
||||
classes (list): List of included classes. Default is None.
|
||||
|
||||
Returns:
|
||||
(torch.utils.data.Dataset): A PyTorch dataset object that can be used for training an object detection model.
|
||||
@ -43,28 +31,12 @@ class YOLODataset(BaseDataset):
|
||||
cache_version = '1.0.2' # dataset labels *.cache version, >= 1.0.0 for YOLOv8
|
||||
rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4]
|
||||
|
||||
def __init__(self,
|
||||
img_path,
|
||||
imgsz=640,
|
||||
cache=False,
|
||||
augment=True,
|
||||
hyp=None,
|
||||
prefix='',
|
||||
rect=False,
|
||||
batch_size=None,
|
||||
stride=32,
|
||||
pad=0.0,
|
||||
single_cls=False,
|
||||
use_segments=False,
|
||||
use_keypoints=False,
|
||||
data=None,
|
||||
classes=None):
|
||||
def __init__(self, *args, data=None, use_segments=False, use_keypoints=False, **kwargs):
|
||||
self.use_segments = use_segments
|
||||
self.use_keypoints = use_keypoints
|
||||
self.data = data
|
||||
assert not (self.use_segments and self.use_keypoints), 'Can not use both segments and keypoints.'
|
||||
super().__init__(img_path, imgsz, cache, augment, hyp, prefix, rect, batch_size, stride, pad, single_cls,
|
||||
classes)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def cache_labels(self, path=Path('./labels.cache')):
|
||||
"""Cache dataset labels, check images and read shapes.
|
||||
|
@ -453,7 +453,7 @@ class YOLO:
|
||||
reduction_factor=3)
|
||||
|
||||
# Define the callbacks for the hyperparameter search
|
||||
tuner_callbacks = [WandbLoggerCallback(project='yolov8_tune') if wandb else None]
|
||||
tuner_callbacks = [WandbLoggerCallback(project='yolov8_tune')] if wandb else []
|
||||
|
||||
# Create the Ray Tune hyperparameter search tuner
|
||||
tuner = tune.Tuner(trainable_with_resources,
|
||||
|
@ -31,11 +31,13 @@ import platform
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from ultralytics.nn.autobackend import AutoBackend
|
||||
from ultralytics.yolo.cfg import get_cfg
|
||||
from ultralytics.yolo.data import load_inference_source
|
||||
from ultralytics.yolo.data.augment import classify_transforms
|
||||
from ultralytics.yolo.data.augment import LetterBox, classify_transforms
|
||||
from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, SETTINGS, callbacks, colorstr, ops
|
||||
from ultralytics.yolo.utils.checks import check_imgsz, check_imshow
|
||||
from ultralytics.yolo.utils.files import increment_path
|
||||
@ -106,9 +108,23 @@ class BasePredictor:
|
||||
self.callbacks = _callbacks or callbacks.get_default_callbacks()
|
||||
callbacks.add_integration_callbacks(self)
|
||||
|
||||
def preprocess(self, img):
|
||||
"""Prepares input image before inference."""
|
||||
pass
|
||||
def preprocess(self, im):
|
||||
"""Prepares input image before inference.
|
||||
|
||||
Args:
|
||||
im (torch.Tensor | List(np.ndarray)): (N, 3, h, w) for tensor, [(h, w, 3) x N] for list.
|
||||
"""
|
||||
if not isinstance(im, torch.Tensor):
|
||||
auto = all(x.shape == im[0].shape for x in im) and self.model.pt
|
||||
im = np.stack([LetterBox(self.imgsz, auto=auto, stride=self.model.stride)(image=x) for x in im])
|
||||
im = im[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW, (n, 3, h, w)
|
||||
im = np.ascontiguousarray(im) # contiguous
|
||||
im = torch.from_numpy(im)
|
||||
# NOTE: assuming im with (b, 3, h, w) if it's a tensor
|
||||
img = im.to(self.device)
|
||||
img = img.half() if self.model.fp16 else img.float() # uint8 to fp16/32
|
||||
img /= 255 # 0 - 255 to 0.0 - 1.0
|
||||
return img
|
||||
|
||||
def write_results(self, idx, results, batch):
|
||||
"""Write inference results to a file or directory."""
|
||||
@ -165,16 +181,9 @@ class BasePredictor:
|
||||
def setup_source(self, source):
|
||||
"""Sets up source and inference mode."""
|
||||
self.imgsz = check_imgsz(self.args.imgsz, stride=self.model.stride, min_dim=2) # check image size
|
||||
if self.args.task == 'classify':
|
||||
transforms = getattr(self.model.model, 'transforms', classify_transforms(self.imgsz[0]))
|
||||
else: # predict, segment
|
||||
transforms = None
|
||||
self.dataset = load_inference_source(source=source,
|
||||
transforms=transforms,
|
||||
imgsz=self.imgsz,
|
||||
vid_stride=self.args.vid_stride,
|
||||
stride=self.model.stride,
|
||||
auto=self.model.pt)
|
||||
self.transforms = getattr(self.model.model, 'transforms', classify_transforms(
|
||||
self.imgsz[0])) if self.args.task == 'classify' else None
|
||||
self.dataset = load_inference_source(source=source, imgsz=self.imgsz, vid_stride=self.args.vid_stride)
|
||||
self.source_type = self.dataset.source_type
|
||||
if not getattr(self, 'stream', True) and (self.dataset.mode == 'stream' or # streams
|
||||
len(self.dataset) > 1000 or # images
|
||||
@ -207,14 +216,12 @@ class BasePredictor:
|
||||
for batch in self.dataset:
|
||||
self.run_callbacks('on_predict_batch_start')
|
||||
self.batch = batch
|
||||
path, im, im0s, vid_cap, s = batch
|
||||
path, im0s, vid_cap, s = batch
|
||||
visualize = increment_path(self.save_dir / Path(path).stem, mkdir=True) if self.args.visualize else False
|
||||
|
||||
# Preprocess
|
||||
with self.dt[0]:
|
||||
im = self.preprocess(im)
|
||||
if len(im.shape) == 3:
|
||||
im = im[None] # expand for batch dim
|
||||
im = self.preprocess(im0s)
|
||||
|
||||
# Inference
|
||||
with self.dt[1]:
|
||||
@ -226,7 +233,7 @@ class BasePredictor:
|
||||
self.run_callbacks('on_predict_postprocess_end')
|
||||
|
||||
# Visualize, save, write results
|
||||
n = len(im)
|
||||
n = len(im0s)
|
||||
for i in range(n):
|
||||
self.results[i].speed = {
|
||||
'preprocess': self.dt[0].dt * 1E3 / n,
|
||||
@ -234,8 +241,7 @@ class BasePredictor:
|
||||
'postprocess': self.dt[2].dt * 1E3 / n}
|
||||
if self.source_type.tensor: # skip write, show and plot operations if input is raw tensor
|
||||
continue
|
||||
p, im0 = (path[i], im0s[i].copy()) if self.source_type.webcam or self.source_type.from_img \
|
||||
else (path, im0s.copy())
|
||||
p, im0 = path[i], im0s[i].copy()
|
||||
p = Path(p)
|
||||
|
||||
if self.args.verbose or self.args.save or self.args.save_txt or self.args.show:
|
||||
|
@ -213,7 +213,8 @@ class Results(SimpleClass):
|
||||
img = LetterBox(pred_masks.shape[1:])(image=annotator.result())
|
||||
img_gpu = torch.as_tensor(img, dtype=torch.float16, device=pred_masks.data.device).permute(
|
||||
2, 0, 1).flip(0).contiguous() / 255
|
||||
annotator.masks(pred_masks.data, colors=[colors(x, True) for x in pred_boxes.cls], im_gpu=img_gpu)
|
||||
idx = pred_boxes.cls if pred_boxes else range(len(pred_masks))
|
||||
annotator.masks(pred_masks.data, colors=[colors(x, True) for x in idx], im_gpu=img_gpu)
|
||||
|
||||
if pred_boxes and show_boxes:
|
||||
for d in reversed(pred_boxes):
|
||||
|
@ -481,6 +481,10 @@ class BaseTrainer:
|
||||
"""
|
||||
raise NotImplementedError('get_dataloader function not implemented in trainer')
|
||||
|
||||
def build_dataset(self, img_path, mode='train', batch=None):
|
||||
"""Build dataset"""
|
||||
raise NotImplementedError('build_dataset function not implemented in trainer')
|
||||
|
||||
def criterion(self, preds, batch):
|
||||
"""
|
||||
Returns loss and individual loss items as Tensor.
|
||||
|
@ -207,6 +207,10 @@ class BaseValidator:
|
||||
"""Get data loader from dataset path and batch size."""
|
||||
raise NotImplementedError('get_dataloader function not implemented for this validator')
|
||||
|
||||
def build_dataset(self, img_path):
|
||||
"""Build dataset"""
|
||||
raise NotImplementedError('build_dataset function not implemented in validator')
|
||||
|
||||
def preprocess(self, batch):
|
||||
"""Preprocesses an input batch."""
|
||||
return batch
|
||||
|
@ -13,20 +13,8 @@ try:
|
||||
except (ImportError, AssertionError):
|
||||
comet_ml = None
|
||||
|
||||
COMET_MODE = os.getenv('COMET_MODE', 'online')
|
||||
COMET_MODEL_NAME = os.getenv('COMET_MODEL_NAME', 'YOLOv8')
|
||||
# Determines how many batches of image predictions to log from the validation set
|
||||
COMET_EVAL_BATCH_LOGGING_INTERVAL = int(os.getenv('COMET_EVAL_BATCH_LOGGING_INTERVAL', 1))
|
||||
# Determines whether to log confusion matrix every evaluation epoch
|
||||
COMET_EVAL_LOG_CONFUSION_MATRIX = (os.getenv('COMET_EVAL_LOG_CONFUSION_MATRIX', 'true').lower() == 'true')
|
||||
# Determines whether to log image predictions every evaluation epoch
|
||||
COMET_EVAL_LOG_IMAGE_PREDICTIONS = (os.getenv('COMET_EVAL_LOG_IMAGE_PREDICTIONS', 'true').lower() == 'true')
|
||||
COMET_MAX_IMAGE_PREDICTIONS = int(os.getenv('COMET_MAX_IMAGE_PREDICTIONS', 100))
|
||||
|
||||
# Ensures certain logging functions only run for supported tasks
|
||||
COMET_SUPPORTED_TASKS = ['detect']
|
||||
# Scales reported confidence scores (0.0-1.0) by this value
|
||||
COMET_MAX_CONFIDENCE_SCORE = int(os.getenv('COMET_MAX_CONFIDENCE_SCORE', 100))
|
||||
|
||||
# Names of plots created by YOLOv8 that are logged to Comet
|
||||
EVALUATION_PLOT_NAMES = 'F1_curve', 'P_curve', 'R_curve', 'PR_curve', 'confusion_matrix'
|
||||
@ -35,6 +23,35 @@ LABEL_PLOT_NAMES = 'labels', 'labels_correlogram'
|
||||
_comet_image_prediction_count = 0
|
||||
|
||||
|
||||
def _get_comet_mode():
|
||||
return os.getenv('COMET_MODE', 'online')
|
||||
|
||||
|
||||
def _get_comet_model_name():
|
||||
return os.getenv('COMET_MODEL_NAME', 'YOLOv8')
|
||||
|
||||
|
||||
def _get_eval_batch_logging_interval():
|
||||
return int(os.getenv('COMET_EVAL_BATCH_LOGGING_INTERVAL', 1))
|
||||
|
||||
|
||||
def _get_max_image_predictions_to_log():
|
||||
return int(os.getenv('COMET_MAX_IMAGE_PREDICTIONS', 100))
|
||||
|
||||
|
||||
def _scale_confidence_score(score):
|
||||
scale = float(os.getenv('COMET_MAX_CONFIDENCE_SCORE', 100.0))
|
||||
return score * scale
|
||||
|
||||
|
||||
def _should_log_confusion_matrix():
|
||||
return os.getenv('COMET_EVAL_LOG_CONFUSION_MATRIX', 'true').lower() == 'true'
|
||||
|
||||
|
||||
def _should_log_image_predictions():
|
||||
return os.getenv('COMET_EVAL_LOG_IMAGE_PREDICTIONS', 'true').lower() == 'true'
|
||||
|
||||
|
||||
def _get_experiment_type(mode, project_name):
|
||||
"""Return an experiment based on mode and project name."""
|
||||
if mode == 'offline':
|
||||
@ -48,13 +65,14 @@ def _create_experiment(args):
|
||||
if RANK not in (-1, 0):
|
||||
return
|
||||
try:
|
||||
experiment = _get_experiment_type(COMET_MODE, args.project)
|
||||
comet_mode = _get_comet_mode()
|
||||
experiment = _get_experiment_type(comet_mode, args.project)
|
||||
experiment.log_parameters(vars(args))
|
||||
experiment.log_others({
|
||||
'eval_batch_logging_interval': COMET_EVAL_BATCH_LOGGING_INTERVAL,
|
||||
'log_confusion_matrix': COMET_EVAL_LOG_CONFUSION_MATRIX,
|
||||
'log_image_predictions': COMET_EVAL_LOG_IMAGE_PREDICTIONS,
|
||||
'max_image_predictions': COMET_MAX_IMAGE_PREDICTIONS, })
|
||||
'eval_batch_logging_interval': _get_eval_batch_logging_interval(),
|
||||
'log_confusion_matrix': _should_log_confusion_matrix(),
|
||||
'log_image_predictions': _should_log_image_predictions(),
|
||||
'max_image_predictions': _get_max_image_predictions_to_log(), })
|
||||
experiment.log_other('Created from', 'yolov8')
|
||||
|
||||
except Exception as e:
|
||||
@ -74,7 +92,12 @@ def _fetch_trainer_metadata(trainer):
|
||||
save_interval = curr_epoch % save_period == 0
|
||||
save_assets = save and save_period > 0 and save_interval and not final_epoch
|
||||
|
||||
return dict(curr_epoch=curr_epoch, curr_step=curr_step, save_assets=save_assets, final_epoch=final_epoch)
|
||||
return dict(
|
||||
curr_epoch=curr_epoch,
|
||||
curr_step=curr_step,
|
||||
save_assets=save_assets,
|
||||
final_epoch=final_epoch,
|
||||
)
|
||||
|
||||
|
||||
def _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad):
|
||||
@ -117,7 +140,10 @@ def _format_ground_truth_annotations_for_detection(img_idx, image_path, batch, c
|
||||
data = []
|
||||
for box, label in zip(bboxes, cls_labels):
|
||||
box = _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad)
|
||||
data.append({'boxes': [box], 'label': f'gt_{label}', 'score': COMET_MAX_CONFIDENCE_SCORE})
|
||||
data.append({
|
||||
'boxes': [box],
|
||||
'label': f'gt_{label}',
|
||||
'score': _scale_confidence_score(1.0), })
|
||||
|
||||
return {'name': 'ground_truth', 'data': data}
|
||||
|
||||
@ -135,7 +161,7 @@ def _format_prediction_annotations_for_detection(image_path, metadata, class_lab
|
||||
data = []
|
||||
for prediction in predictions:
|
||||
boxes = prediction['bbox']
|
||||
score = prediction['score'] * COMET_MAX_CONFIDENCE_SCORE
|
||||
score = _scale_confidence_score(prediction['score'])
|
||||
cls_label = prediction['category_id']
|
||||
if class_label_map:
|
||||
cls_label = str(class_label_map[cls_label])
|
||||
@ -207,13 +233,16 @@ def _log_image_predictions(experiment, validator, curr_step):
|
||||
dataloader = validator.dataloader
|
||||
class_label_map = validator.names
|
||||
|
||||
batch_logging_interval = _get_eval_batch_logging_interval()
|
||||
max_image_predictions = _get_max_image_predictions_to_log()
|
||||
|
||||
for batch_idx, batch in enumerate(dataloader):
|
||||
if (batch_idx + 1) % COMET_EVAL_BATCH_LOGGING_INTERVAL != 0:
|
||||
if (batch_idx + 1) % batch_logging_interval != 0:
|
||||
continue
|
||||
|
||||
image_paths = batch['im_file']
|
||||
for img_idx, image_path in enumerate(image_paths):
|
||||
if _comet_image_prediction_count >= COMET_MAX_IMAGE_PREDICTIONS:
|
||||
if _comet_image_prediction_count >= max_image_predictions:
|
||||
return
|
||||
|
||||
image_path = Path(image_path)
|
||||
@ -244,8 +273,9 @@ def _log_plots(experiment, trainer):
|
||||
|
||||
def _log_model(experiment, trainer):
|
||||
"""Log the best-trained model to Comet.ml."""
|
||||
model_name = _get_comet_model_name()
|
||||
experiment.log_model(
|
||||
COMET_MODEL_NAME,
|
||||
model_name,
|
||||
file_or_folder=str(trainer.best),
|
||||
file_name='best.pt',
|
||||
overwrite=True,
|
||||
@ -255,7 +285,8 @@ def _log_model(experiment, trainer):
|
||||
def on_pretrain_routine_start(trainer):
|
||||
"""Creates or resumes a CometML experiment at the start of a YOLO pre-training routine."""
|
||||
experiment = comet_ml.get_global_experiment()
|
||||
if not experiment:
|
||||
is_alive = getattr(experiment, 'alive', False)
|
||||
if not experiment or not is_alive:
|
||||
_create_experiment(trainer.args)
|
||||
|
||||
|
||||
@ -296,16 +327,16 @@ def on_fit_epoch_end(trainer):
|
||||
model_info = {
|
||||
'model/parameters': get_num_params(trainer.model),
|
||||
'model/GFLOPs': round(get_flops(trainer.model), 3),
|
||||
'model/speed(ms)': round(trainer.validator.speed['inference'], 3)}
|
||||
'model/speed(ms)': round(trainer.validator.speed['inference'], 3), }
|
||||
experiment.log_metrics(model_info, step=curr_step, epoch=curr_epoch)
|
||||
|
||||
if not save_assets:
|
||||
return
|
||||
|
||||
_log_model(experiment, trainer)
|
||||
if COMET_EVAL_LOG_CONFUSION_MATRIX:
|
||||
if _should_log_confusion_matrix():
|
||||
_log_confusion_matrix(experiment, trainer, curr_step, curr_epoch)
|
||||
if COMET_EVAL_LOG_IMAGE_PREDICTIONS:
|
||||
if _should_log_image_predictions():
|
||||
_log_image_predictions(experiment, trainer.validator, curr_step)
|
||||
|
||||
|
||||
|
@ -17,7 +17,8 @@ from ultralytics.yolo.utils import LOGGER, checks, clean_url, emojis, is_online,
|
||||
|
||||
GITHUB_ASSET_NAMES = [f'yolov8{k}{suffix}.pt' for k in 'nsmlx' for suffix in ('', '6', '-cls', '-seg', '-pose')] + \
|
||||
[f'yolov5{k}u.pt' for k in 'nsmlx'] + \
|
||||
[f'yolov3{k}u.pt' for k in ('', '-spp', '-tiny')]
|
||||
[f'yolov3{k}u.pt' for k in ('', '-spp', '-tiny')] + \
|
||||
[f'sam_{k}.pt' for k in 'bl']
|
||||
GITHUB_ASSET_STEMS = [Path(k).stem for k in GITHUB_ASSET_NAMES]
|
||||
|
||||
|
||||
|
@ -192,14 +192,27 @@ class Annotator:
|
||||
"""Add rectangle to image (PIL-only)."""
|
||||
self.draw.rectangle(xy, fill, outline, width)
|
||||
|
||||
def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'):
|
||||
def text(self, xy, text, txt_color=(255, 255, 255), anchor='top', box_style=False):
|
||||
"""Adds text to an image using PIL or cv2."""
|
||||
if anchor == 'bottom': # start y from font bottom
|
||||
w, h = self.font.getsize(text) # text width, height
|
||||
xy[1] += 1 - h
|
||||
if self.pil:
|
||||
if box_style:
|
||||
w, h = self.font.getsize(text)
|
||||
self.draw.rectangle((xy[0], xy[1], xy[0] + w + 1, xy[1] + h + 1), fill=txt_color)
|
||||
# Using `txt_color` for background and draw fg with white color
|
||||
txt_color = (255, 255, 255)
|
||||
self.draw.text(xy, text, fill=txt_color, font=self.font)
|
||||
else:
|
||||
if box_style:
|
||||
tf = max(self.lw - 1, 1) # font thickness
|
||||
w, h = cv2.getTextSize(text, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height
|
||||
outside = xy[1] - h >= 3
|
||||
p2 = xy[0] + w, xy[1] - h - 3 if outside else xy[1] + h + 3
|
||||
cv2.rectangle(self.im, xy, p2, txt_color, -1, cv2.LINE_AA) # filled
|
||||
# Using `txt_color` for background and draw fg with white color
|
||||
txt_color = (255, 255, 255)
|
||||
tf = max(self.lw - 1, 1) # font thickness
|
||||
cv2.putText(self.im, text, xy, 0, self.lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA)
|
||||
|
||||
@ -283,7 +296,7 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False,
|
||||
def plot_images(images,
|
||||
batch_idx,
|
||||
cls,
|
||||
bboxes,
|
||||
bboxes=np.zeros(0, dtype=np.float32),
|
||||
masks=np.zeros(0, dtype=np.uint8),
|
||||
kpts=np.zeros((0, 51), dtype=np.float32),
|
||||
paths=None,
|
||||
@ -337,27 +350,33 @@ def plot_images(images,
|
||||
annotator.text((x + 5, y + 5), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames
|
||||
if len(cls) > 0:
|
||||
idx = batch_idx == i
|
||||
|
||||
boxes = xywh2xyxy(bboxes[idx, :4]).T
|
||||
classes = cls[idx].astype('int')
|
||||
labels = bboxes.shape[1] == 4 # labels if no conf column
|
||||
conf = None if labels else bboxes[idx, 4] # check for confidence presence (label vs pred)
|
||||
|
||||
if boxes.shape[1]:
|
||||
if boxes.max() <= 1.01: # if normalized with tolerance 0.01
|
||||
boxes[[0, 2]] *= w # scale to pixels
|
||||
boxes[[1, 3]] *= h
|
||||
elif scale < 1: # absolute coords need scale if image scales
|
||||
boxes *= scale
|
||||
boxes[[0, 2]] += x
|
||||
boxes[[1, 3]] += y
|
||||
for j, box in enumerate(boxes.T.tolist()):
|
||||
c = classes[j]
|
||||
color = colors(c)
|
||||
c = names.get(c, c) if names else c
|
||||
if labels or conf[j] > 0.25: # 0.25 conf thresh
|
||||
label = f'{c}' if labels else f'{c} {conf[j]:.1f}'
|
||||
annotator.box_label(box, label, color=color)
|
||||
if len(bboxes):
|
||||
boxes = xywh2xyxy(bboxes[idx, :4]).T
|
||||
labels = bboxes.shape[1] == 4 # labels if no conf column
|
||||
conf = None if labels else bboxes[idx, 4] # check for confidence presence (label vs pred)
|
||||
|
||||
if boxes.shape[1]:
|
||||
if boxes.max() <= 1.01: # if normalized with tolerance 0.01
|
||||
boxes[[0, 2]] *= w # scale to pixels
|
||||
boxes[[1, 3]] *= h
|
||||
elif scale < 1: # absolute coords need scale if image scales
|
||||
boxes *= scale
|
||||
boxes[[0, 2]] += x
|
||||
boxes[[1, 3]] += y
|
||||
for j, box in enumerate(boxes.T.tolist()):
|
||||
c = classes[j]
|
||||
color = colors(c)
|
||||
c = names.get(c, c) if names else c
|
||||
if labels or conf[j] > 0.25: # 0.25 conf thresh
|
||||
label = f'{c}' if labels else f'{c} {conf[j]:.1f}'
|
||||
annotator.box_label(box, label, color=color)
|
||||
elif len(classes):
|
||||
for c in classes:
|
||||
color = colors(c)
|
||||
c = names.get(c, c) if names else c
|
||||
annotator.text((x, y), f'{c}', txt_color=color, box_style=True)
|
||||
|
||||
# Plot keypoints
|
||||
if len(kpts):
|
||||
@ -403,11 +422,14 @@ def plot_images(images,
|
||||
|
||||
|
||||
@plt_settings()
|
||||
def plot_results(file='path/to/results.csv', dir='', segment=False, pose=False):
|
||||
def plot_results(file='path/to/results.csv', dir='', segment=False, pose=False, classify=False):
|
||||
"""Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')."""
|
||||
import pandas as pd
|
||||
save_dir = Path(file).parent if file else Path(dir)
|
||||
if segment:
|
||||
if classify:
|
||||
fig, ax = plt.subplots(2, 2, figsize=(6, 6), tight_layout=True)
|
||||
index = [1, 4, 2, 3]
|
||||
elif segment:
|
||||
fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
|
||||
index = [1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]
|
||||
elif pose:
|
||||
|
@ -225,7 +225,7 @@ class TaskAlignedAssigner(nn.Module):
|
||||
target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx]
|
||||
|
||||
# Assigned target scores
|
||||
target_labels.clamp(0)
|
||||
target_labels.clamp_(0)
|
||||
target_scores = F.one_hot(target_labels, self.num_classes) # (b, h*w, 80)
|
||||
fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes) # (b, h*w, 80)
|
||||
target_scores = torch.where(fg_scores_mask > 0, target_scores, 0)
|
||||
|
@ -9,8 +9,14 @@ from ultralytics.yolo.utils import DEFAULT_CFG, ROOT
|
||||
|
||||
class ClassificationPredictor(BasePredictor):
|
||||
|
||||
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
||||
super().__init__(cfg, overrides, _callbacks)
|
||||
self.args.task = 'classify'
|
||||
|
||||
def preprocess(self, img):
|
||||
"""Converts input image to model-compatible data type."""
|
||||
if not isinstance(img, torch.Tensor):
|
||||
img = torch.stack([self.transforms(im) for im in img], dim=0)
|
||||
img = (img if isinstance(img, torch.Tensor) else torch.from_numpy(img)).to(self.model.device)
|
||||
return img.half() if self.model.fp16 else img.float() # uint8 to fp16/32
|
||||
|
||||
@ -19,7 +25,7 @@ class ClassificationPredictor(BasePredictor):
|
||||
results = []
|
||||
for i, pred in enumerate(preds):
|
||||
orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs
|
||||
path, _, _, _, _ = self.batch
|
||||
path = self.batch[0]
|
||||
img_path = path[i] if isinstance(path, list) else path
|
||||
results.append(Results(orig_img=orig_img, path=img_path, names=self.model.names, probs=pred))
|
||||
|
||||
|
@ -5,10 +5,11 @@ import torchvision
|
||||
|
||||
from ultralytics.nn.tasks import ClassificationModel, attempt_load_one_weight
|
||||
from ultralytics.yolo import v8
|
||||
from ultralytics.yolo.data import build_classification_dataloader
|
||||
from ultralytics.yolo.data import ClassificationDataset, build_dataloader
|
||||
from ultralytics.yolo.engine.trainer import BaseTrainer
|
||||
from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, RANK, colorstr
|
||||
from ultralytics.yolo.utils.torch_utils import is_parallel, strip_optimizer
|
||||
from ultralytics.yolo.utils.plotting import plot_images, plot_results
|
||||
from ultralytics.yolo.utils.torch_utils import is_parallel, strip_optimizer, torch_distributed_zero_first
|
||||
|
||||
|
||||
class ClassificationTrainer(BaseTrainer):
|
||||
@ -71,14 +72,16 @@ class ClassificationTrainer(BaseTrainer):
|
||||
|
||||
return # dont return ckpt. Classification doesn't support resume
|
||||
|
||||
def build_dataset(self, img_path, mode='train'):
|
||||
dataset = ClassificationDataset(root=img_path, imgsz=self.args.imgsz, augment=mode == 'train')
|
||||
return dataset
|
||||
|
||||
def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'):
|
||||
"""Returns PyTorch DataLoader with transforms to preprocess images for inference."""
|
||||
loader = build_classification_dataloader(path=dataset_path,
|
||||
imgsz=self.args.imgsz,
|
||||
batch_size=batch_size if mode == 'train' else (batch_size * 2),
|
||||
augment=mode == 'train',
|
||||
rank=rank,
|
||||
workers=self.args.workers)
|
||||
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
|
||||
dataset = self.build_dataset(dataset_path, mode)
|
||||
|
||||
loader = build_dataloader(dataset, batch_size, self.args.workers, rank=rank)
|
||||
# Attach inference transforms
|
||||
if mode != 'train':
|
||||
if is_parallel(self.model):
|
||||
@ -124,6 +127,10 @@ class ClassificationTrainer(BaseTrainer):
|
||||
"""Resumes training from a given checkpoint."""
|
||||
pass
|
||||
|
||||
def plot_metrics(self):
|
||||
"""Plots metrics from a CSV file."""
|
||||
plot_results(file=self.csv, classify=True) # save results.png
|
||||
|
||||
def final_eval(self):
|
||||
"""Evaluate trained model and save validation results."""
|
||||
for f in self.last, self.best:
|
||||
@ -138,6 +145,13 @@ class ClassificationTrainer(BaseTrainer):
|
||||
# self.run_callbacks('on_fit_epoch_end')
|
||||
LOGGER.info(f"Results saved to {colorstr('bold', self.save_dir)}")
|
||||
|
||||
def plot_training_samples(self, batch, ni):
|
||||
"""Plots training samples with their annotations."""
|
||||
plot_images(images=batch['img'],
|
||||
batch_idx=torch.arange(len(batch['img'])),
|
||||
cls=batch['cls'].squeeze(-1),
|
||||
fname=self.save_dir / f'train_batch{ni}.jpg')
|
||||
|
||||
|
||||
def train(cfg=DEFAULT_CFG, use_python=False):
|
||||
"""Train the YOLO classification model."""
|
||||
|
@ -1,9 +1,12 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from ultralytics.yolo.data import build_classification_dataloader
|
||||
import torch
|
||||
|
||||
from ultralytics.yolo.data import ClassificationDataset, build_dataloader
|
||||
from ultralytics.yolo.engine.validator import BaseValidator
|
||||
from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER
|
||||
from ultralytics.yolo.utils.metrics import ClassifyMetrics, ConfusionMatrix
|
||||
from ultralytics.yolo.utils.plotting import plot_images
|
||||
|
||||
|
||||
class ClassificationValidator(BaseValidator):
|
||||
@ -52,20 +55,36 @@ class ClassificationValidator(BaseValidator):
|
||||
self.metrics.process(self.targets, self.pred)
|
||||
return self.metrics.results_dict
|
||||
|
||||
def build_dataset(self, img_path):
|
||||
dataset = ClassificationDataset(root=img_path, imgsz=self.args.imgsz, augment=False)
|
||||
return dataset
|
||||
|
||||
def get_dataloader(self, dataset_path, batch_size):
|
||||
"""Builds and returns a data loader for classification tasks with given parameters."""
|
||||
return build_classification_dataloader(path=dataset_path,
|
||||
imgsz=self.args.imgsz,
|
||||
batch_size=batch_size,
|
||||
augment=False,
|
||||
shuffle=False,
|
||||
workers=self.args.workers)
|
||||
dataset = self.build_dataset(dataset_path)
|
||||
return build_dataloader(dataset, batch_size, self.args.workers, rank=-1)
|
||||
|
||||
def print_results(self):
|
||||
"""Prints evaluation metrics for YOLO object detection model."""
|
||||
pf = '%22s' + '%11.3g' * len(self.metrics.keys) # print format
|
||||
LOGGER.info(pf % ('all', self.metrics.top1, self.metrics.top5))
|
||||
|
||||
def plot_val_samples(self, batch, ni):
|
||||
"""Plot validation image samples."""
|
||||
plot_images(images=batch['img'],
|
||||
batch_idx=torch.arange(len(batch['img'])),
|
||||
cls=batch['cls'].squeeze(-1),
|
||||
fname=self.save_dir / f'val_batch{ni}_labels.jpg',
|
||||
names=self.names)
|
||||
|
||||
def plot_predictions(self, batch, preds, ni):
|
||||
"""Plots predicted bounding boxes on input images and saves the result."""
|
||||
plot_images(batch['img'],
|
||||
batch_idx=torch.arange(len(batch['img'])),
|
||||
cls=torch.argmax(preds, dim=1),
|
||||
fname=self.save_dir / f'val_batch{ni}_pred.jpg',
|
||||
names=self.names) # pred
|
||||
|
||||
|
||||
def val(cfg=DEFAULT_CFG, use_python=False):
|
||||
"""Validate YOLO model using custom data."""
|
||||
|
@ -9,13 +9,6 @@ from ultralytics.yolo.utils import DEFAULT_CFG, ROOT, ops
|
||||
|
||||
class DetectionPredictor(BasePredictor):
|
||||
|
||||
def preprocess(self, img):
|
||||
"""Convert an image to PyTorch tensor and normalize pixel values."""
|
||||
img = (img if isinstance(img, torch.Tensor) else torch.from_numpy(img)).to(self.model.device)
|
||||
img = img.half() if self.model.fp16 else img.float() # uint8 to fp16/32
|
||||
img /= 255 # 0 - 255 to 0.0 - 1.0
|
||||
return img
|
||||
|
||||
def postprocess(self, preds, img, orig_imgs):
|
||||
"""Postprocesses predictions and returns a list of Results objects."""
|
||||
preds = ops.non_max_suppression(preds,
|
||||
@ -30,7 +23,7 @@ class DetectionPredictor(BasePredictor):
|
||||
orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs
|
||||
if not isinstance(orig_imgs, torch.Tensor):
|
||||
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
|
||||
path, _, _, _, _ = self.batch
|
||||
path = self.batch[0]
|
||||
img_path = path[i] if isinstance(path, list) else path
|
||||
results.append(Results(orig_img=orig_img, path=img_path, names=self.model.names, boxes=pred))
|
||||
return results
|
||||
|
@ -7,41 +7,63 @@ import torch.nn as nn
|
||||
|
||||
from ultralytics.nn.tasks import DetectionModel
|
||||
from ultralytics.yolo import v8
|
||||
from ultralytics.yolo.data import build_dataloader
|
||||
from ultralytics.yolo.data import build_dataloader, build_yolo_dataset
|
||||
from ultralytics.yolo.data.dataloaders.v5loader import create_dataloader
|
||||
from ultralytics.yolo.engine.trainer import BaseTrainer
|
||||
from ultralytics.yolo.utils import DEFAULT_CFG, RANK, colorstr
|
||||
from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, RANK, colorstr
|
||||
from ultralytics.yolo.utils.loss import BboxLoss
|
||||
from ultralytics.yolo.utils.ops import xywh2xyxy
|
||||
from ultralytics.yolo.utils.plotting import plot_images, plot_labels, plot_results
|
||||
from ultralytics.yolo.utils.tal import TaskAlignedAssigner, dist2bbox, make_anchors
|
||||
from ultralytics.yolo.utils.torch_utils import de_parallel
|
||||
from ultralytics.yolo.utils.torch_utils import de_parallel, torch_distributed_zero_first
|
||||
|
||||
|
||||
# BaseTrainer python usage
|
||||
class DetectionTrainer(BaseTrainer):
|
||||
|
||||
def build_dataset(self, img_path, mode='train', batch=None):
|
||||
"""Build YOLO Dataset
|
||||
|
||||
Args:
|
||||
img_path (str): Path to the folder containing images.
|
||||
mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
|
||||
batch_size (int, optional): Size of batches, this is for `rect`. Defaults to None.
|
||||
"""
|
||||
gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
|
||||
return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=mode == 'val', stride=gs)
|
||||
|
||||
def get_dataloader(self, dataset_path, batch_size, rank=0, mode='train'):
|
||||
"""TODO: manage splits differently."""
|
||||
# Calculate stride - check if model is initialized
|
||||
gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
|
||||
return create_dataloader(path=dataset_path,
|
||||
imgsz=self.args.imgsz,
|
||||
batch_size=batch_size,
|
||||
stride=gs,
|
||||
hyp=vars(self.args),
|
||||
augment=mode == 'train',
|
||||
cache=self.args.cache,
|
||||
pad=0 if mode == 'train' else 0.5,
|
||||
rect=self.args.rect or mode == 'val',
|
||||
rank=rank,
|
||||
workers=self.args.workers,
|
||||
close_mosaic=self.args.close_mosaic != 0,
|
||||
prefix=colorstr(f'{mode}: '),
|
||||
shuffle=mode == 'train',
|
||||
seed=self.args.seed)[0] if self.args.v5loader else \
|
||||
build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, rank=rank, mode=mode,
|
||||
rect=mode == 'val', data_info=self.data)[0]
|
||||
if self.args.v5loader:
|
||||
LOGGER.warning("WARNING ⚠️ 'v5loader' feature is deprecated and will be removed soon. You can train using "
|
||||
'the default YOLOv8 dataloader instead, no argument is needed.')
|
||||
gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
|
||||
return create_dataloader(path=dataset_path,
|
||||
imgsz=self.args.imgsz,
|
||||
batch_size=batch_size,
|
||||
stride=gs,
|
||||
hyp=vars(self.args),
|
||||
augment=mode == 'train',
|
||||
cache=self.args.cache,
|
||||
pad=0 if mode == 'train' else 0.5,
|
||||
rect=self.args.rect or mode == 'val',
|
||||
rank=rank,
|
||||
workers=self.args.workers,
|
||||
close_mosaic=self.args.close_mosaic != 0,
|
||||
prefix=colorstr(f'{mode}: '),
|
||||
shuffle=mode == 'train',
|
||||
seed=self.args.seed)[0]
|
||||
assert mode in ['train', 'val']
|
||||
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
|
||||
dataset = self.build_dataset(dataset_path, mode, batch_size)
|
||||
shuffle = mode == 'train'
|
||||
if getattr(dataset, 'rect', False) and shuffle:
|
||||
LOGGER.warning("WARNING ⚠️ 'rect=True' is incompatible with DataLoader shuffle, setting shuffle=False")
|
||||
shuffle = False
|
||||
workers = self.args.workers if mode == 'train' else self.args.workers * 2
|
||||
dataloader = build_dataloader(dataset, batch_size, workers, shuffle, rank)
|
||||
return dataloader
|
||||
|
||||
def preprocess_batch(self, batch):
|
||||
"""Preprocesses a batch of images by scaling and converting to float."""
|
||||
|
@ -6,7 +6,7 @@ from pathlib import Path
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from ultralytics.yolo.data import build_dataloader
|
||||
from ultralytics.yolo.data import build_dataloader, build_yolo_dataset
|
||||
from ultralytics.yolo.data.dataloaders.v5loader import create_dataloader
|
||||
from ultralytics.yolo.engine.validator import BaseValidator
|
||||
from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, colorstr, ops
|
||||
@ -171,24 +171,40 @@ class DetectionValidator(BaseValidator):
|
||||
correct[matches[:, 1].astype(int), i] = True
|
||||
return torch.tensor(correct, dtype=torch.bool, device=detections.device)
|
||||
|
||||
def build_dataset(self, img_path, mode='val', batch=None):
|
||||
"""Build YOLO Dataset
|
||||
|
||||
Args:
|
||||
img_path (str): Path to the folder containing images.
|
||||
mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
|
||||
batch_size (int, optional): Size of batches, this is for `rect`. Defaults to None.
|
||||
"""
|
||||
gs = max(int(de_parallel(self.model).stride if self.model else 0), 32)
|
||||
return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, stride=gs)
|
||||
|
||||
def get_dataloader(self, dataset_path, batch_size):
|
||||
"""TODO: manage splits differently."""
|
||||
# Calculate stride - check if model is initialized
|
||||
gs = max(int(de_parallel(self.model).stride if self.model else 0), 32)
|
||||
return create_dataloader(path=dataset_path,
|
||||
imgsz=self.args.imgsz,
|
||||
batch_size=batch_size,
|
||||
stride=gs,
|
||||
hyp=vars(self.args),
|
||||
cache=False,
|
||||
pad=0.5,
|
||||
rect=self.args.rect,
|
||||
workers=self.args.workers,
|
||||
prefix=colorstr(f'{self.args.mode}: '),
|
||||
shuffle=False,
|
||||
seed=self.args.seed)[0] if self.args.v5loader else \
|
||||
build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, data_info=self.data,
|
||||
mode='val')[0]
|
||||
if self.args.v5loader:
|
||||
LOGGER.warning("WARNING ⚠️ 'v5loader' feature is deprecated and will be removed soon. You can train using "
|
||||
'the default YOLOv8 dataloader instead, no argument is needed.')
|
||||
gs = max(int(de_parallel(self.model).stride if self.model else 0), 32)
|
||||
return create_dataloader(path=dataset_path,
|
||||
imgsz=self.args.imgsz,
|
||||
batch_size=batch_size,
|
||||
stride=gs,
|
||||
hyp=vars(self.args),
|
||||
cache=False,
|
||||
pad=0.5,
|
||||
rect=self.args.rect,
|
||||
workers=self.args.workers,
|
||||
prefix=colorstr(f'{self.args.mode}: '),
|
||||
shuffle=False,
|
||||
seed=self.args.seed)[0]
|
||||
|
||||
dataset = self.build_dataset(dataset_path, batch=batch_size, mode='val')
|
||||
dataloader = build_dataloader(dataset, batch_size, self.args.workers, shuffle=False, rank=-1)
|
||||
return dataloader
|
||||
|
||||
def plot_val_samples(self, batch, ni):
|
||||
"""Plot validation image samples."""
|
||||
|
@ -7,6 +7,10 @@ from ultralytics.yolo.v8.detect.predict import DetectionPredictor
|
||||
|
||||
class PosePredictor(DetectionPredictor):
|
||||
|
||||
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
||||
super().__init__(cfg, overrides, _callbacks)
|
||||
self.args.task = 'pose'
|
||||
|
||||
def postprocess(self, preds, img, orig_img):
|
||||
"""Return detection results for a given input image or list of images."""
|
||||
preds = ops.non_max_suppression(preds,
|
||||
@ -24,7 +28,7 @@ class PosePredictor(DetectionPredictor):
|
||||
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round()
|
||||
pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:]
|
||||
pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, shape)
|
||||
path, _, _, _, _ = self.batch
|
||||
path = self.batch[0]
|
||||
img_path = path[i] if isinstance(path, list) else path
|
||||
results.append(
|
||||
Results(orig_img=orig_img,
|
||||
|
@ -9,6 +9,10 @@ from ultralytics.yolo.v8.detect.predict import DetectionPredictor
|
||||
|
||||
class SegmentationPredictor(DetectionPredictor):
|
||||
|
||||
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
||||
super().__init__(cfg, overrides, _callbacks)
|
||||
self.args.task = 'segment'
|
||||
|
||||
def postprocess(self, preds, img, orig_imgs):
|
||||
"""TODO: filter by classes."""
|
||||
p = ops.non_max_suppression(preds[0],
|
||||
@ -22,7 +26,7 @@ class SegmentationPredictor(DetectionPredictor):
|
||||
proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
|
||||
for i, pred in enumerate(p):
|
||||
orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs
|
||||
path, _, _, _, _ = self.batch
|
||||
path = self.batch[0]
|
||||
img_path = path[i] if isinstance(path, list) else path
|
||||
if not len(pred): # save empty boxes
|
||||
results.append(Results(orig_img=orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6]))
|
||||
|
Reference in New Issue
Block a user