diff --git a/docs/datasets/index.md b/docs/datasets/index.md index a8d1860..c23e16f 100644 --- a/docs/datasets/index.md +++ b/docs/datasets/index.md @@ -52,6 +52,12 @@ Image classification is a computer vision task that involves categorizing an ima - [Imagewoof](classify/imagewoof.md): A more challenging subset of ImageNet containing 10 dog breed categories for image classification tasks. - [MNIST](classify/mnist.md): A dataset of 70,000 grayscale images of handwritten digits for image classification tasks. +## [Oriented Bounding Boxes (OBB)](obb/index.md) + +Oriented Bounding Boxes (OBB) is a method in computer vision for detecting angled objects in images using rotated bounding boxes, often applied to aerial and satellite imagery. + +- [DOTAv2](obb/dota-v2.md): A popular OBB aerial imagery dataset with 1.7 million instances and 11,268 images. + ## [Multi-Object Tracking](track/index.md) Multi-object tracking is a computer vision technique that involves detecting and tracking multiple objects over time in a video sequence. diff --git a/docs/datasets/obb/index.md b/docs/datasets/obb/index.md index 1e1f544..d74f03c 100644 --- a/docs/datasets/obb/index.md +++ b/docs/datasets/obb/index.md @@ -1,12 +1,12 @@ --- comments: true -description: Dive deep into various oriented bounding box (OBB) dataset formats compatible with the Ultralytics YOLO model. Grasp the nuances of using and converting datasets to this format. +description: Dive deep into various oriented bounding box (OBB) dataset formats compatible with Ultralytics YOLO models. Grasp the nuances of using and converting datasets to this format. keywords: Ultralytics, YOLO, oriented bounding boxes, OBB, dataset formats, label formats, DOTA v2, data conversion --- -# Oriented Bounding Box Datasets Overview +# Oriented Bounding Box (OBB) Datasets Overview -Training a precise object detection model with oriented bounding boxes (OBB) requires a thorough dataset. This guide elucidates the various OBB dataset formats compatible with the Ultralytics YOLO model, offering insights into their structure, application, and methods for format conversions. +Training a precise object detection model with oriented bounding boxes (OBB) requires a thorough dataset. This guide explains the various OBB dataset formats compatible with Ultralytics YOLO models, offering insights into their structure, application, and methods for format conversions. ## Supported OBB Dataset Formats diff --git a/docs/modes/train.md b/docs/modes/train.md index b806b07..db9b8b4 100644 --- a/docs/modes/train.md +++ b/docs/modes/train.md @@ -160,7 +160,7 @@ Training settings for YOLO models refer to the various hyperparameters and confi | `single_cls` | `False` | train multi-class data as single-class | | `rect` | `False` | rectangular training with each batch collated for minimum padding | | `cos_lr` | `False` | use cosine learning rate scheduler | -| `close_mosaic` | `0` | (int) disable mosaic augmentation for final epochs | +| `close_mosaic` | `10` | (int) disable mosaic augmentation for final epochs (0 to disable) | | `resume` | `False` | resume training from last checkpoint | | `amp` | `True` | Automatic Mixed Precision (AMP) training, choices=[True, False] | | `fraction` | `1.0` | dataset fraction to train on (default is 1.0, all images in train set) | diff --git a/docs/usage/cfg.md b/docs/usage/cfg.md index 465d26c..8b78cc0 100644 --- a/docs/usage/cfg.md +++ b/docs/usage/cfg.md @@ -102,7 +102,7 @@ The training settings for YOLO models encompass various hyperparameters and conf | `single_cls` | `False` | train multi-class data as single-class | | `rect` | `False` | rectangular training with each batch collated for minimum padding | | `cos_lr` | `False` | use cosine learning rate scheduler | -| `close_mosaic` | `0` | (int) disable mosaic augmentation for final epochs | +| `close_mosaic` | `10` | (int) disable mosaic augmentation for final epochs (0 to disable) | | `resume` | `False` | resume training from last checkpoint | | `amp` | `True` | Automatic Mixed Precision (AMP) training, choices=[True, False] | | `fraction` | `1.0` | dataset fraction to train on (default is 1.0, all images in train set) | diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index cd947f5..9051e1b 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = '8.0.152' +__version__ = '8.0.153' from ultralytics.hub import start from ultralytics.models import RTDETR, SAM, YOLO diff --git a/ultralytics/cfg/default.yaml b/ultralytics/cfg/default.yaml index 5babd25..ac94499 100644 --- a/ultralytics/cfg/default.yaml +++ b/ultralytics/cfg/default.yaml @@ -27,7 +27,7 @@ deterministic: True # (bool) whether to enable deterministic mode single_cls: False # (bool) train multi-class data as single-class rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val' cos_lr: False # (bool) use cosine learning rate scheduler -close_mosaic: 10 # (int) disable mosaic augmentation for final epochs +close_mosaic: 10 # (int) disable mosaic augmentation for final epochs (0 to disable) resume: False # (bool) resume training from last checkpoint amp: True # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set) diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py index a6028de..3e9b465 100644 --- a/ultralytics/engine/model.py +++ b/ultralytics/engine/model.py @@ -9,7 +9,7 @@ from ultralytics.cfg import get_cfg from ultralytics.engine.exporter import Exporter from ultralytics.hub.utils import HUB_WEB_ROOT from ultralytics.nn.tasks import attempt_load_one_weight, guess_model_task, nn, yaml_model_load -from ultralytics.utils import (DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, RANK, ROOT, callbacks, +from ultralytics.utils import (DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, RANK, ROOT, callbacks, emojis, is_git_dir, yaml_load) from ultralytics.utils.checks import check_file, check_imgsz, check_pip_update_available, check_yaml from ultralytics.utils.downloads import GITHUB_ASSET_STEMS @@ -448,11 +448,11 @@ class Model: """Load model/trainer/validator/predictor.""" try: return self.task_map[self.task][key] - except Exception: + except Exception as e: name = self.__class__.__name__ mode = inspect.stack()[1][3] # get the function name. raise NotImplementedError( - f'WARNING ⚠️ `{name}` model does not support `{mode}` mode for `{self.task}` task yet.') + emojis(f'WARNING ⚠️ `{name}` model does not support `{mode}` mode for `{self.task}` task yet.')) from e @property def task_map(self): diff --git a/ultralytics/engine/validator.py b/ultralytics/engine/validator.py index 4dfe679..25d5180 100644 --- a/ultralytics/engine/validator.py +++ b/ultralytics/engine/validator.py @@ -51,9 +51,18 @@ class BaseValidator: device (torch.device): Device to use for validation. batch_i (int): Current batch index. training (bool): Whether the model is in training mode. - speed (float): Batch processing speed in seconds. - jdict (dict): Dictionary to store validation results. + names (dict): Class names. + seen: Records the number of images seen so far during validation. + stats: Placeholder for statistics during validation. + confusion_matrix: Placeholder for a confusion matrix. + nc: Number of classes. + iouv: (torch.Tensor): IoU thresholds from 0.50 to 0.95 in spaces of 0.05. + jdict (dict): Dictionary to store JSON validation results. + speed (dict): Dictionary with keys 'preprocess', 'inference', 'loss', 'postprocess' and their respective + batch processing times in milliseconds. save_dir (Path): Directory to save results. + plots (dict): Dictionary to store plots for visualization. + callbacks (dict): Dictionary to store various callback functions. """ def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None): @@ -65,6 +74,7 @@ class BaseValidator: save_dir (Path): Directory to save results. pbar (tqdm.tqdm): Progress bar for displaying progress. args (SimpleNamespace): Configuration for the validator. + _callbacks (dict): Dictionary to store various callback functions. """ self.dataloader = dataloader self.pbar = pbar @@ -74,8 +84,14 @@ class BaseValidator: self.device = None self.batch_i = None self.training = True - self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} + self.names = None + self.seen = None + self.stats = None + self.confusion_matrix = None + self.nc = None + self.iouv = None self.jdict = None + self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} project = self.args.project or Path(SETTINGS['runs_dir']) / self.args.task name = self.args.name or f'{self.args.mode}' @@ -200,26 +216,26 @@ class BaseValidator: LOGGER.info(f"Results saved to {colorstr('bold', self.save_dir)}") return stats - def match_predictions(self, pred_classes: torch.Tensor, true_classes: torch.Tensor, - iou: torch.Tensor) -> torch.Tensor: + def match_predictions(self, pred_classes, true_classes, iou): """ Matches predictions to ground truth objects (pred_classes, true_classes) using IoU. Args: pred_classes (torch.Tensor): Predicted class indices of shape(N,). true_classes (torch.Tensor): Target class indices of shape(M,). + iou (torch.Tensor): IoU thresholds from 0.50 to 0.95 in space of 0.05. Returns: (torch.Tensor): Correct tensor of shape(N,10) for 10 IoU thresholds. """ correct = np.zeros((pred_classes.shape[0], self.iouv.shape[0])).astype(bool) correct_class = true_classes[:, None] == pred_classes - for i in range(len(self.iouv)): - x = torch.where((iou >= self.iouv[i]) & correct_class) # IoU > threshold and classes match - if x[0].shape[0]: + for i, iouv in enumerate(self.iouv): + x = torch.nonzero(iou.ge(iouv) & correct_class) # IoU > threshold and classes match + if x.shape[0]: # Concatenate [label, detect, iou] - matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() - if x[0].shape[0] > 1: + matches = torch.cat((x, iou[x[:, 0], x[:, 1]].unsqueeze(1)), 1).cpu().numpy() + if x.shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] # matches = matches[matches[:, 2].argsort()[::-1]] diff --git a/ultralytics/models/fastsam/val.py b/ultralytics/models/fastsam/val.py index f1366f9..1d8442a 100644 --- a/ultralytics/models/fastsam/val.py +++ b/ultralytics/models/fastsam/val.py @@ -44,7 +44,7 @@ class FastSAMValidator(DetectionValidator): 'R', 'mAP50', 'mAP50-95)') def postprocess(self, preds): - """Postprocesses YOLO predictions and returns output detections with proto.""" + """Post-processes YOLO predictions and returns output detections with proto.""" p = ops.non_max_suppression(preds[0], self.args.conf, self.args.iou, diff --git a/ultralytics/models/nas/predict.py b/ultralytics/models/nas/predict.py index f5ae430..fe9f486 100644 --- a/ultralytics/models/nas/predict.py +++ b/ultralytics/models/nas/predict.py @@ -11,7 +11,7 @@ from ultralytics.utils.ops import xyxy2xywh class NASPredictor(BasePredictor): def postprocess(self, preds_in, img, orig_imgs): - """Postprocesses predictions and returns a list of Results objects.""" + """Postprocess predictions and returns a list of Results objects.""" # Cat boxes and class scores boxes = xyxy2xywh(preds_in[0][0]) diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py index 396b3be..c3cec8c 100644 --- a/ultralytics/models/sam/predict.py +++ b/ultralytics/models/sam/predict.py @@ -310,7 +310,7 @@ class Predictor(BasePredictor): self.done_warmup = True def postprocess(self, preds, img, orig_imgs): - """Postprocesses inference output predictions to create detection masks for objects.""" + """Post-processes inference output predictions to create detection masks for objects.""" # (N, 1, H, W), (N, 1) pred_masks, pred_scores = preds[:2] pred_bboxes = preds[2] if self.segment_all else None diff --git a/ultralytics/models/yolo/classify/predict.py b/ultralytics/models/yolo/classify/predict.py index a21b3fc..5c02458 100644 --- a/ultralytics/models/yolo/classify/predict.py +++ b/ultralytics/models/yolo/classify/predict.py @@ -21,7 +21,7 @@ class ClassificationPredictor(BasePredictor): return img.half() if self.model.fp16 else img.float() # uint8 to fp16/32 def postprocess(self, preds, img, orig_imgs): - """Postprocesses predictions to return Results objects.""" + """Post-processes predictions to return Results objects.""" results = [] for i, pred in enumerate(preds): orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs diff --git a/ultralytics/models/yolo/classify/train.py b/ultralytics/models/yolo/classify/train.py index eda92ad..420322b 100644 --- a/ultralytics/models/yolo/classify/train.py +++ b/ultralytics/models/yolo/classify/train.py @@ -43,11 +43,7 @@ class ClassificationTrainer(BaseTrainer): return model def setup_model(self): - """ - load/create/download model for any task - """ - # Classification models require special handling - + """load/create/download model for any task""" if isinstance(self.model, torch.nn.Module): # if model is loaded beforehand. No setup needed return @@ -65,7 +61,7 @@ class ClassificationTrainer(BaseTrainer): FileNotFoundError(f'ERROR: model={model} not found locally or online. Please check model name.') ClassificationModel.reshape_outputs(self.model, self.data['nc']) - return # dont return ckpt. Classification doesn't support resume + return # do not return ckpt. Classification doesn't support resume def build_dataset(self, img_path, mode='train', batch=None): return ClassificationDataset(root=img_path, args=self.args, augment=mode == 'train') @@ -102,9 +98,9 @@ class ClassificationTrainer(BaseTrainer): def label_loss_items(self, loss_items=None, prefix='train'): """ - Returns a loss dict with labelled training loss items tensor + Returns a loss dict with labelled training loss items tensor. Not needed for classification but necessary for + segmentation & detection """ - # Not needed for classification but necessary for segmentation & detection keys = [f'{prefix}/{x}' for x in self.loss_names] if loss_items is None: return keys @@ -144,7 +140,7 @@ class ClassificationTrainer(BaseTrainer): def train(cfg=DEFAULT_CFG, use_python=False): - """Train the YOLO classification model.""" + """Train a YOLO classification model.""" model = cfg.model or 'yolov8n-cls.pt' # or "resnet18" data = cfg.data or 'mnist160' # or yolo.ClassificationDataset("mnist") device = cfg.device if cfg.device is not None else '' diff --git a/ultralytics/models/yolo/classify/val.py b/ultralytics/models/yolo/classify/val.py index 41fa9f3..0df2a35 100644 --- a/ultralytics/models/yolo/classify/val.py +++ b/ultralytics/models/yolo/classify/val.py @@ -14,6 +14,8 @@ class ClassificationValidator(BaseValidator): def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None): """Initializes ClassificationValidator instance with args, dataloader, save_dir, and progress bar.""" super().__init__(dataloader, save_dir, pbar, args, _callbacks) + self.targets = None + self.pred = None self.args.task = 'classify' self.metrics = ClassifyMetrics() diff --git a/ultralytics/models/yolo/detect/predict.py b/ultralytics/models/yolo/detect/predict.py index b7bf16b..f2358c1 100644 --- a/ultralytics/models/yolo/detect/predict.py +++ b/ultralytics/models/yolo/detect/predict.py @@ -10,7 +10,7 @@ from ultralytics.utils import DEFAULT_CFG, ROOT, ops class DetectionPredictor(BasePredictor): def postprocess(self, preds, img, orig_imgs): - """Postprocesses predictions and returns a list of Results objects.""" + """Post-processes predictions and returns a list of Results objects.""" preds = ops.non_max_suppression(preds, self.args.conf, self.args.iou, diff --git a/ultralytics/models/yolo/detect/train.py b/ultralytics/models/yolo/detect/train.py index b321564..732640f 100644 --- a/ultralytics/models/yolo/detect/train.py +++ b/ultralytics/models/yolo/detect/train.py @@ -13,7 +13,6 @@ from ultralytics.utils.plotting import plot_images, plot_labels, plot_results from ultralytics.utils.torch_utils import de_parallel, torch_distributed_zero_first -# BaseTrainer python usage class DetectionTrainer(BaseTrainer): def build_dataset(self, img_path, mode='train', batch=None): @@ -69,9 +68,9 @@ class DetectionTrainer(BaseTrainer): def label_loss_items(self, loss_items=None, prefix='train'): """ - Returns a loss dict with labelled training loss items tensor + Returns a loss dict with labelled training loss items tensor. Not needed for classification but necessary for + segmentation & detection """ - # Not needed for classification but necessary for segmentation & detection keys = [f'{prefix}/{x}' for x in self.loss_names] if loss_items is not None: loss_items = [round(float(x), 5) for x in loss_items] # convert tensors to 5 decimal place floats diff --git a/ultralytics/models/yolo/detect/val.py b/ultralytics/models/yolo/detect/val.py index dd282b9..9eff20d 100644 --- a/ultralytics/models/yolo/detect/val.py +++ b/ultralytics/models/yolo/detect/val.py @@ -20,9 +20,10 @@ class DetectionValidator(BaseValidator): def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None): """Initialize detection model with necessary variables and settings.""" super().__init__(dataloader, save_dir, pbar, args, _callbacks) - self.args.task = 'detect' + self.nt_per_class = None self.is_coco = False self.class_map = None + self.args.task = 'detect' self.metrics = DetMetrics(save_dir=self.save_dir, on_plot=self.on_plot) self.iouv = torch.linspace(0.5, 0.95, 10) # iou vector for mAP@0.5:0.95 self.niou = self.iouv.numel() @@ -155,18 +156,23 @@ class DetectionValidator(BaseValidator): def _process_batch(self, detections, labels): """ - Return correct prediction matrix - Arguments: - detections (array[N, 6]), x1, y1, x2, y2, conf, class - labels (array[M, 5]), class, x1, y1, x2, y2 + Return correct prediction matrix. + + Args: + detections (torch.Tensor): Tensor of shape [N, 6] representing detections. + Each detection is of the format: x1, y1, x2, y2, conf, class. + labels (torch.Tensor): Tensor of shape [M, 5] representing labels. + Each label is of the format: class, x1, y1, x2, y2. + Returns: - correct (array[N, 10]), for 10 IoU levels + (torch.Tensor): Correct prediction matrix of shape [N, 10] for 10 IoU levels. """ iou = box_iou(labels[:, 1:], detections[:, :4]) return self.match_predictions(detections[:, 5], labels[:, 0], iou) def build_dataset(self, img_path, mode='val', batch=None): - """Build YOLO Dataset + """ + Build YOLO Dataset. Args: img_path (str): Path to the folder containing images. diff --git a/ultralytics/models/yolo/pose/train.py b/ultralytics/models/yolo/pose/train.py index 72f0a64..979c3f9 100644 --- a/ultralytics/models/yolo/pose/train.py +++ b/ultralytics/models/yolo/pose/train.py @@ -8,7 +8,6 @@ from ultralytics.utils import DEFAULT_CFG, LOGGER from ultralytics.utils.plotting import plot_images, plot_results -# BaseTrainer python usage class PoseTrainer(yolo.detect.DetectionTrainer): def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): diff --git a/ultralytics/models/yolo/pose/val.py b/ultralytics/models/yolo/pose/val.py index cbf2668..b68fa50 100644 --- a/ultralytics/models/yolo/pose/val.py +++ b/ultralytics/models/yolo/pose/val.py @@ -17,6 +17,8 @@ class PoseValidator(DetectionValidator): def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None): """Initialize a 'PoseValidator' object with custom parameters and assigned attributes.""" super().__init__(dataloader, save_dir, pbar, args, _callbacks) + self.sigma = None + self.kpt_shape = None self.args.task = 'pose' self.metrics = PoseMetrics(save_dir=self.save_dir, on_plot=self.on_plot) if isinstance(self.args.device, str) and self.args.device.lower() == 'mps': @@ -112,14 +114,19 @@ class PoseValidator(DetectionValidator): def _process_batch(self, detections, labels, pred_kpts=None, gt_kpts=None): """ - Return correct prediction matrix - Arguments: - detections (array[N, 6]), x1, y1, x2, y2, conf, class - labels (array[M, 5]), class, x1, y1, x2, y2 - pred_kpts (array[N, 51]), 51 = 17 * 3 - gt_kpts (array[N, 51]) + Return correct prediction matrix. + + Args: + detections (torch.Tensor): Tensor of shape [N, 6] representing detections. + Each detection is of the format: x1, y1, x2, y2, conf, class. + labels (torch.Tensor): Tensor of shape [M, 5] representing labels. + Each label is of the format: class, x1, y1, x2, y2. + pred_kpts (torch.Tensor, optional): Tensor of shape [N, 51] representing predicted keypoints. + 51 corresponds to 17 keypoints each with 3 values. + gt_kpts (torch.Tensor, optional): Tensor of shape [N, 51] representing ground truth keypoints. + Returns: - correct (array[N, 10]), for 10 IoU levels + torch.Tensor: Correct prediction matrix of shape [N, 10] for 10 IoU levels. """ if pred_kpts is not None and gt_kpts is not None: # `0.53` is from https://github.com/jin-s13/xtcocoapi/blob/master/xtcocotools/cocoeval.py#L384 diff --git a/ultralytics/models/yolo/segment/train.py b/ultralytics/models/yolo/segment/train.py index 89d5cb2..f3694b9 100644 --- a/ultralytics/models/yolo/segment/train.py +++ b/ultralytics/models/yolo/segment/train.py @@ -8,7 +8,6 @@ from ultralytics.utils import DEFAULT_CFG, RANK from ultralytics.utils.plotting import plot_images, plot_results -# BaseTrainer python usage class SegmentationTrainer(yolo.detect.DetectionTrainer): def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): diff --git a/ultralytics/models/yolo/segment/val.py b/ultralytics/models/yolo/segment/val.py index 1a295cd..39db266 100644 --- a/ultralytics/models/yolo/segment/val.py +++ b/ultralytics/models/yolo/segment/val.py @@ -19,6 +19,8 @@ class SegmentationValidator(DetectionValidator): def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None): """Initialize SegmentationValidator and set task to 'segment', metrics to SegmentMetrics.""" super().__init__(dataloader, save_dir, pbar, args, _callbacks) + self.plot_masks = None + self.process = None self.args.task = 'segment' self.metrics = SegmentMetrics(save_dir=self.save_dir, on_plot=self.on_plot) @@ -44,7 +46,7 @@ class SegmentationValidator(DetectionValidator): 'R', 'mAP50', 'mAP50-95)') def postprocess(self, preds): - """Postprocesses YOLO predictions and returns output detections with proto.""" + """Post-processes YOLO predictions and returns output detections with proto.""" p = ops.non_max_suppression(preds[0], self.args.conf, self.args.iou, diff --git a/ultralytics/utils/callbacks/dvc.py b/ultralytics/utils/callbacks/dvc.py index cfe18f2..2b179a2 100644 --- a/ultralytics/utils/callbacks/dvc.py +++ b/ultralytics/utils/callbacks/dvc.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO 🚀, GPL-3.0 license +# Ultralytics YOLO 🚀, AGPL-3.0 license import os