diff --git a/docs/modes/benchmark.md b/docs/modes/benchmark.md
index cae7aee..cc938ed 100644
--- a/docs/modes/benchmark.md
+++ b/docs/modes/benchmark.md
@@ -42,6 +42,7 @@ the benchmarks to their specific needs and compare the performance of different
| `model` | `None` | path to model file, i.e. yolov8n.pt, yolov8n.yaml |
| `imgsz` | `640` | image size as scalar or (h, w) list, i.e. (640, 480) |
| `half` | `False` | FP16 quantization |
+| `int8` | `False` | INT8 quantization |
| `device` | `None` | device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu |
| `hard_fail` | `False` | do not continue on error (bool), or val floor threshold (float) |
diff --git a/docs/reference/yolo/engine/exporter.md b/docs/reference/yolo/engine/exporter.md
index d3a0f3d..6e30466 100644
--- a/docs/reference/yolo/engine/exporter.md
+++ b/docs/reference/yolo/engine/exporter.md
@@ -1,11 +1,11 @@
-# iOSDetectModel
+# Exporter
---
-:::ultralytics.yolo.engine.exporter.iOSDetectModel
+:::ultralytics.yolo.engine.exporter.Exporter
-# Exporter
+# iOSDetectModel
---
-:::ultralytics.yolo.engine.exporter.Exporter
+:::ultralytics.yolo.engine.exporter.iOSDetectModel
# export_formats
diff --git a/mkdocs.yml b/mkdocs.yml
index 830c257..360f45e 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -1,6 +1,7 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
-site_name: YOLOv8 Docs
+site_name: Ultralytics YOLOv8 Docs
+site_url: https://docs.ultralytics.com
repo_url: https://github.com/ultralytics/ultralytics
edit_uri: https://github.com/ultralytics/ultralytics/tree/main/docs
repo_name: ultralytics/ultralytics
diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index dc0a14f..792617e 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
-__version__ = '8.0.80'
+__version__ = '8.0.81'
from ultralytics.hub import start
from ultralytics.yolo.engine.model import YOLO
diff --git a/ultralytics/hub/utils.py b/ultralytics/hub/utils.py
index bf1eb27..817aaf8 100644
--- a/ultralytics/hub/utils.py
+++ b/ultralytics/hub/utils.py
@@ -130,6 +130,7 @@ def smart_request(method, url, retry=3, timeout=30, thread=True, code=-1, verbos
@TryExcept(verbose=verbose)
def func(func_method, func_url, **func_kwargs):
+ """Make HTTP requests with retries and timeouts, with optional progress tracking."""
r = None # response
t0 = time.time() # initial time for timer
for i in range(retry + 1):
diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py
index 764fd62..5e61049 100644
--- a/ultralytics/nn/autobackend.py
+++ b/ultralytics/nn/autobackend.py
@@ -202,6 +202,7 @@ class AutoBackend(nn.Module):
from ultralytics.yolo.engine.exporter import gd_outputs
def wrap_frozen_graph(gd, inputs, outputs):
+ """Wrap frozen graphs for deployment."""
x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=''), []) # wrapped
ge = x.graph.as_graph_element
return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
@@ -427,6 +428,7 @@ class AutoBackend(nn.Module):
@staticmethod
def _apply_default_class_names(data):
+ """Applies default class names to an input YAML file or returns numerical class names."""
with contextlib.suppress(Exception):
return yaml_load(check_yaml(data))['names']
return {i: f'class{i}' for i in range(999)} # return default if above errors
diff --git a/ultralytics/nn/autoshape.py b/ultralytics/nn/autoshape.py
index 4d2bb7c..9522a09 100644
--- a/ultralytics/nn/autoshape.py
+++ b/ultralytics/nn/autoshape.py
@@ -34,6 +34,7 @@ class AutoShape(nn.Module):
amp = False # Automatic Mixed Precision (AMP) inference
def __init__(self, model, verbose=True):
+ """Initializes object and copies attributes from model object."""
super().__init__()
if verbose:
LOGGER.info('Adding AutoShape... ')
@@ -125,6 +126,7 @@ class AutoShape(nn.Module):
class Detections:
# YOLOv8 detections class for inference results
def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
+ """Initialize object attributes for YOLO detection results."""
super().__init__()
d = pred[0].device # device
gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations
@@ -142,6 +144,7 @@ class Detections:
self.s = tuple(shape) # inference BCHW shape
def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
+ """Return performance metrics and optionally cropped/save images or results."""
s, crops = '', []
for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
@@ -187,17 +190,21 @@ class Detections:
return crops
def show(self, labels=True):
+ """Displays YOLO results with detected bounding boxes."""
self._run(show=True, labels=labels) # show results
def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False):
+ """Save detection results with optional labels to specified directory."""
save_dir = increment_path(save_dir, exist_ok, mkdir=True) # increment save_dir
self._run(save=True, labels=labels, save_dir=save_dir) # save results
def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False):
+ """Crops images into detections and saves them if 'save' is True."""
save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None
return self._run(crop=True, save=save, save_dir=save_dir) # crop results
def render(self, labels=True):
+ """Renders detected objects and returns images."""
self._run(render=True, labels=labels) # render results
return self.ims
@@ -222,6 +229,7 @@ class Detections:
return x
def print(self):
+ """Print the results of the `self._run()` function."""
LOGGER.info(self.__str__())
def __len__(self): # override len(results)
@@ -231,4 +239,5 @@ class Detections:
return self._run(pprint=True) # print results
def __repr__(self):
+ """Returns a printable representation of the object."""
return f'YOLOv8 {self.__class__} instance\n' + self.__str__()
diff --git a/ultralytics/nn/modules.py b/ultralytics/nn/modules.py
index 7cda1a2..4c198dc 100644
--- a/ultralytics/nn/modules.py
+++ b/ultralytics/nn/modules.py
@@ -25,15 +25,18 @@ class Conv(nn.Module):
default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
+ """Initialize Conv layer with given arguments including activation."""
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
def forward(self, x):
+ """Apply convolution, batch normalization and activation to input tensor."""
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x):
+ """Perform transposed convolution of 2D data."""
return self.act(self.conv(x))
@@ -56,15 +59,18 @@ class ConvTranspose(nn.Module):
default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True):
+ """Initialize ConvTranspose2d layer with batch normalization and activation function."""
super().__init__()
self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn)
self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity()
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
def forward(self, x):
+ """Applies transposed convolutions, batch normalization and activation to input."""
return self.act(self.bn(self.conv_transpose(x)))
def forward_fuse(self, x):
+ """Applies activation and convolution transpose operation to input."""
return self.act(self.conv_transpose(x))
@@ -75,6 +81,7 @@ class DFL(nn.Module):
"""
def __init__(self, c1=16):
+ """Initialize a convolutional layer with a given number of input channels."""
super().__init__()
self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
x = torch.arange(c1, dtype=torch.float)
@@ -82,6 +89,7 @@ class DFL(nn.Module):
self.c1 = c1
def forward(self, x):
+ """Applies a transformer layer on input tensor 'x' and returns a tensor."""
b, c, a = x.shape # batch, channels, anchors
return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a)
# return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a)
@@ -91,6 +99,7 @@ class TransformerLayer(nn.Module):
"""Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)."""
def __init__(self, c, num_heads):
+ """Initializes a self-attention mechanism using linear transformations and multi-head attention."""
super().__init__()
self.q = nn.Linear(c, c, bias=False)
self.k = nn.Linear(c, c, bias=False)
@@ -100,6 +109,7 @@ class TransformerLayer(nn.Module):
self.fc2 = nn.Linear(c, c, bias=False)
def forward(self, x):
+ """Apply a transformer block to the input x and return the output."""
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
x = self.fc2(self.fc1(x)) + x
return x
@@ -109,6 +119,7 @@ class TransformerBlock(nn.Module):
"""Vision Transformer https://arxiv.org/abs/2010.11929."""
def __init__(self, c1, c2, num_heads, num_layers):
+ """Initialize a Transformer module with position embedding and specified number of heads and layers."""
super().__init__()
self.conv = None
if c1 != c2:
@@ -118,6 +129,7 @@ class TransformerBlock(nn.Module):
self.c2 = c2
def forward(self, x):
+ """Forward propagates the input through the bottleneck module."""
if self.conv is not None:
x = self.conv(x)
b, _, w, h = x.shape
@@ -136,6 +148,7 @@ class Bottleneck(nn.Module):
self.add = shortcut and c1 == c2
def forward(self, x):
+ """'forward()' applies the YOLOv5 FPN to input data."""
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
@@ -154,6 +167,7 @@ class BottleneckCSP(nn.Module):
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
def forward(self, x):
+ """Applies a CSP bottleneck with 3 convolutions."""
y1 = self.cv3(self.m(self.cv1(x)))
y2 = self.cv2(x)
return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
@@ -171,6 +185,7 @@ class C3(nn.Module):
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n)))
def forward(self, x):
+ """Forward pass through the CSP bottleneck with 2 convolutions."""
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
@@ -186,6 +201,7 @@ class C2(nn.Module):
self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)))
def forward(self, x):
+ """Forward pass through the CSP bottleneck with 2 convolutions."""
a, b = self.cv1(x).chunk(2, 1)
return self.cv2(torch.cat((self.m(a), b), 1))
@@ -201,11 +217,13 @@ class C2f(nn.Module):
self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
def forward(self, x):
+ """Forward pass of a YOLOv5 CSPDarknet backbone layer."""
y = list(self.cv1(x).chunk(2, 1))
y.extend(m(y[-1]) for m in self.m)
return self.cv2(torch.cat(y, 1))
def forward_split(self, x):
+ """Applies spatial attention to module's input."""
y = list(self.cv1(x).split((self.c, self.c), 1))
y.extend(m(y[-1]) for m in self.m)
return self.cv2(torch.cat(y, 1))
@@ -228,6 +246,7 @@ class SpatialAttention(nn.Module):
"""Spatial-attention module."""
def __init__(self, kernel_size=7):
+ """Initialize Spatial-attention module with kernel size argument."""
super().__init__()
assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
padding = 3 if kernel_size == 7 else 1
@@ -235,6 +254,7 @@ class SpatialAttention(nn.Module):
self.act = nn.Sigmoid()
def forward(self, x):
+ """Apply channel and spatial attention on input for feature recalibration."""
return x * self.act(self.cv1(torch.cat([torch.mean(x, 1, keepdim=True), torch.max(x, 1, keepdim=True)[0]], 1)))
@@ -247,6 +267,7 @@ class CBAM(nn.Module):
self.spatial_attention = SpatialAttention(kernel_size)
def forward(self, x):
+ """Applies the forward pass through C1 module."""
return self.spatial_attention(self.channel_attention(x))
@@ -259,6 +280,7 @@ class C1(nn.Module):
self.m = nn.Sequential(*(Conv(c2, c2, 3) for _ in range(n)))
def forward(self, x):
+ """Applies cross-convolutions to input in the C3 module."""
y = self.cv1(x)
return self.m(y) + y
@@ -267,6 +289,7 @@ class C3x(C3):
"""C3 module with cross-convolutions."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
+ """Initialize C3TR instance and set default parameters."""
super().__init__(c1, c2, n, shortcut, g, e)
self.c_ = int(c2 * e)
self.m = nn.Sequential(*(Bottleneck(self.c_, self.c_, shortcut, g, k=((1, 3), (3, 1)), e=1) for _ in range(n)))
@@ -276,6 +299,7 @@ class C3TR(C3):
"""C3 module with TransformerBlock()."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
+ """Initialize C3Ghost module with GhostBottleneck()."""
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = TransformerBlock(c_, c_, 4, n)
@@ -285,6 +309,7 @@ class C3Ghost(C3):
"""C3 module with GhostBottleneck()."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
+ """Initialize 'SPP' module with various pooling sizes for spatial pyramid pooling."""
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e) # hidden channels
self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
@@ -294,6 +319,7 @@ class SPP(nn.Module):
"""Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729."""
def __init__(self, c1, c2, k=(5, 9, 13)):
+ """Initialize the SPP layer with input/output channels and pooling kernel sizes."""
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
@@ -301,6 +327,7 @@ class SPP(nn.Module):
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
def forward(self, x):
+ """Forward pass of the SPP layer, performing spatial pyramid pooling."""
x = self.cv1(x)
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
@@ -316,6 +343,7 @@ class SPPF(nn.Module):
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
def forward(self, x):
+ """Forward pass through Ghost Convolution block."""
x = self.cv1(x)
y1 = self.m(x)
y2 = self.m(y1)
@@ -345,6 +373,7 @@ class GhostConv(nn.Module):
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
def forward(self, x):
+ """Forward propagation through a Ghost Bottleneck layer with skip connection."""
y = self.cv1(x)
return torch.cat((y, self.cv2(y)), 1)
@@ -363,6 +392,7 @@ class GhostBottleneck(nn.Module):
act=False)) if s == 2 else nn.Identity()
def forward(self, x):
+ """Applies skip connection and concatenation to input tensor."""
return self.conv(x) + self.shortcut(x)
@@ -370,10 +400,12 @@ class Concat(nn.Module):
"""Concatenate a list of tensors along dimension."""
def __init__(self, dimension=1):
+ """Concatenates a list of tensors along a specified dimension."""
super().__init__()
self.d = dimension
def forward(self, x):
+ """Forward pass for the YOLOv8 mask Proto module."""
return torch.cat(x, self.d)
@@ -388,6 +420,7 @@ class Proto(nn.Module):
self.cv3 = Conv(c_, c2)
def forward(self, x):
+ """Performs a forward pass through layers using an upsampled input image."""
return self.cv3(self.cv2(self.upsample(self.cv1(x))))
@@ -395,9 +428,11 @@ class Ensemble(nn.ModuleList):
"""Ensemble of models."""
def __init__(self):
+ """Initialize an ensemble of models."""
super().__init__()
def forward(self, x, augment=False, profile=False, visualize=False):
+ """Function generates the YOLOv5 network's final layer."""
y = [module(x, augment, profile, visualize)[0] for module in self]
# y = torch.stack(y).max(0)[0] # max ensemble
# y = torch.stack(y).mean(0) # mean ensemble
@@ -430,6 +465,7 @@ class Detect(nn.Module):
self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
def forward(self, x):
+ """Concatenates and returns predicted bounding boxes and class probabilities."""
shape = x[0].shape # BCHW
for i in range(self.nl):
x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
@@ -463,6 +499,7 @@ class Segment(Detect):
"""YOLOv8 Segment head for segmentation models."""
def __init__(self, nc=80, nm=32, npr=256, ch=()):
+ """Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers."""
super().__init__(nc, ch)
self.nm = nm # number of masks
self.npr = npr # number of protos
@@ -473,6 +510,7 @@ class Segment(Detect):
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch)
def forward(self, x):
+ """Return model outputs and mask coefficients if training, otherwise return outputs and mask coefficients."""
p = self.proto(x[0]) # mask protos
bs = p.shape[0] # batch size
@@ -487,6 +525,7 @@ class Pose(Detect):
"""YOLOv8 Pose head for keypoints models."""
def __init__(self, nc=80, kpt_shape=(17, 3), ch=()):
+ """Initialize YOLO network with default parameters and Convolutional Layers."""
super().__init__(nc, ch)
self.kpt_shape = kpt_shape # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
self.nk = kpt_shape[0] * kpt_shape[1] # number of keypoints total
@@ -496,6 +535,7 @@ class Pose(Detect):
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)
def forward(self, x):
+ """Perform forward pass through YOLO model and return predictions."""
bs = x[0].shape[0] # batch size
kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w)
x = self.detect(self, x)
@@ -505,6 +545,7 @@ class Pose(Detect):
return torch.cat([x, pred_kpt], 1) if self.export else (torch.cat([x[0], pred_kpt], 1), (x[1], kpt))
def kpts_decode(self, kpts):
+ """Decodes keypoints."""
ndim = self.kpt_shape[1]
y = kpts.clone()
if ndim == 3:
@@ -526,6 +567,7 @@ class Classify(nn.Module):
self.linear = nn.Linear(c_, c2) # to x(b,c2)
def forward(self, x):
+ """Performs a forward pass of the YOLO model on input image data."""
if isinstance(x, list):
x = torch.cat(x, 1)
x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py
index 1633105..dae593a 100644
--- a/ultralytics/nn/tasks.py
+++ b/ultralytics/nn/tasks.py
@@ -199,11 +199,13 @@ class DetectionModel(BaseModel):
LOGGER.info('')
def forward(self, x, augment=False, profile=False, visualize=False):
+ """Run forward pass on input image(s) with optional augmentation and profiling."""
if augment:
return self._forward_augment(x) # augmented inference, None
return self._forward_once(x, profile, visualize) # single-scale inference, train
def _forward_augment(self, x):
+ """Perform augmentations on input image x and return augmented inference and train outputs."""
img_size = x.shape[-2:] # height, width
s = [1, 0.83, 0.67] # scales
f = [None, 3, None] # flips (2-ud, 3-lr)
@@ -244,9 +246,11 @@ class SegmentationModel(DetectionModel):
"""YOLOv8 segmentation model."""
def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True):
+ """Initialize YOLOv8 segmentation model with given config and parameters."""
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
def _forward_augment(self, x):
+ """Undocumented function."""
raise NotImplementedError(emojis('WARNING ⚠️ SegmentationModel has not supported augment inference yet!'))
@@ -254,6 +258,7 @@ class PoseModel(DetectionModel):
"""YOLOv8 pose model."""
def __init__(self, cfg='yolov8n-pose.yaml', ch=3, nc=None, data_kpt_shape=(None, None), verbose=True):
+ """Initialize YOLOv8 Pose model."""
if not isinstance(cfg, dict):
cfg = yaml_model_load(cfg) # load model YAML
if any(data_kpt_shape) and list(data_kpt_shape) != list(cfg['kpt_shape']):
@@ -292,6 +297,7 @@ class ClassificationModel(BaseModel):
self.nc = nc
def _from_yaml(self, cfg, ch, nc, verbose):
+ """Set YOLOv8 model configurations and define the model architecture."""
self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict
# Define model
@@ -501,6 +507,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
def yaml_model_load(path):
+ """Load a YOLOv8 model from a YAML file."""
import re
path = Path(path)
diff --git a/ultralytics/tracker/track.py b/ultralytics/tracker/track.py
index 6718060..5a4d2d5 100644
--- a/ultralytics/tracker/track.py
+++ b/ultralytics/tracker/track.py
@@ -37,6 +37,7 @@ def on_predict_start(predictor, persist=False):
def on_predict_postprocess_end(predictor):
+ """Postprocess detected boxes and update with object tracking."""
bs = predictor.dataset.bs
im0s = predictor.batch[2]
im0s = im0s if isinstance(im0s, list) else [im0s]
diff --git a/ultralytics/tracker/trackers/basetrack.py b/ultralytics/tracker/trackers/basetrack.py
index d851e7d..3c7b0f7 100644
--- a/ultralytics/tracker/trackers/basetrack.py
+++ b/ultralytics/tracker/trackers/basetrack.py
@@ -6,6 +6,8 @@ import numpy as np
class TrackState:
+ """Enumeration of possible object tracking states."""
+
New = 0
Tracked = 1
Lost = 2
@@ -13,6 +15,8 @@ class TrackState:
class BaseTrack:
+ """Base class for object tracking, handling basic track attributes and operations."""
+
_count = 0
track_id = 0
@@ -32,28 +36,36 @@ class BaseTrack:
@property
def end_frame(self):
+ """Return the last frame ID of the track."""
return self.frame_id
@staticmethod
def next_id():
+ """Increment and return the global track ID counter."""
BaseTrack._count += 1
return BaseTrack._count
def activate(self, *args):
+ """Activate the track with the provided arguments."""
raise NotImplementedError
def predict(self):
+ """Predict the next state of the track."""
raise NotImplementedError
def update(self, *args, **kwargs):
+ """Update the track with new observations."""
raise NotImplementedError
def mark_lost(self):
+ """Mark the track as lost."""
self.state = TrackState.Lost
def mark_removed(self):
+ """Mark the track as removed."""
self.state = TrackState.Removed
@staticmethod
def reset_id():
+ """Reset the global track ID counter."""
BaseTrack._count = 0
diff --git a/ultralytics/tracker/trackers/bot_sort.py b/ultralytics/tracker/trackers/bot_sort.py
index 522cdad..10e8868 100644
--- a/ultralytics/tracker/trackers/bot_sort.py
+++ b/ultralytics/tracker/trackers/bot_sort.py
@@ -15,6 +15,7 @@ class BOTrack(STrack):
shared_kalman = KalmanFilterXYWH()
def __init__(self, tlwh, score, cls, feat=None, feat_history=50):
+ """Initialize YOLOv8 object with temporal parameters, such as feature history, alpha and current features."""
super().__init__(tlwh, score, cls)
self.smooth_feat = None
@@ -25,6 +26,7 @@ class BOTrack(STrack):
self.alpha = 0.9
def update_features(self, feat):
+ """Update features vector and smooth it using exponential moving average."""
feat /= np.linalg.norm(feat)
self.curr_feat = feat
if self.smooth_feat is None:
@@ -35,6 +37,7 @@ class BOTrack(STrack):
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
def predict(self):
+ """Predicts the mean and covariance using Kalman filter."""
mean_state = self.mean.copy()
if self.state != TrackState.Tracked:
mean_state[6] = 0
@@ -43,11 +46,13 @@ class BOTrack(STrack):
self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
def re_activate(self, new_track, frame_id, new_id=False):
+ """Reactivates a track with updated features and optionally assigns a new ID."""
if new_track.curr_feat is not None:
self.update_features(new_track.curr_feat)
super().re_activate(new_track, frame_id, new_id)
def update(self, new_track, frame_id):
+ """Update the YOLOv8 instance with new track and frame ID."""
if new_track.curr_feat is not None:
self.update_features(new_track.curr_feat)
super().update(new_track, frame_id)
@@ -65,6 +70,7 @@ class BOTrack(STrack):
@staticmethod
def multi_predict(stracks):
+ """Predicts the mean and covariance of multiple object tracks using shared Kalman filter."""
if len(stracks) <= 0:
return
multi_mean = np.asarray([st.mean.copy() for st in stracks])
@@ -79,6 +85,7 @@ class BOTrack(STrack):
stracks[i].covariance = cov
def convert_coords(self, tlwh):
+ """Converts Top-Left-Width-Height bounding box coordinates to X-Y-Width-Height format."""
return self.tlwh_to_xywh(tlwh)
@staticmethod
@@ -94,6 +101,7 @@ class BOTrack(STrack):
class BOTSORT(BYTETracker):
def __init__(self, args, frame_rate=30):
+ """Initialize YOLOv8 object with ReID module and GMC algorithm."""
super().__init__(args, frame_rate)
# ReID module
self.proximity_thresh = args.proximity_thresh
@@ -106,9 +114,11 @@ class BOTSORT(BYTETracker):
self.gmc = GMC(method=args.cmc_method)
def get_kalmanfilter(self):
+ """Returns an instance of KalmanFilterXYWH for object tracking."""
return KalmanFilterXYWH()
def init_track(self, dets, scores, cls, img=None):
+ """Initialize track with detections, scores, and classes."""
if len(dets) == 0:
return []
if self.args.with_reid and self.encoder is not None:
@@ -118,6 +128,7 @@ class BOTSORT(BYTETracker):
return [BOTrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] # detections
def get_dists(self, tracks, detections):
+ """Get distances between tracks and detections using IoU and (optionally) ReID embeddings."""
dists = matching.iou_distance(tracks, detections)
dists_mask = (dists > self.proximity_thresh)
@@ -133,4 +144,5 @@ class BOTSORT(BYTETracker):
return dists
def multi_predict(self, tracks):
+ """Predict and track multiple objects with YOLOv8 model."""
BOTrack.multi_predict(tracks)
diff --git a/ultralytics/tracker/trackers/byte_tracker.py b/ultralytics/tracker/trackers/byte_tracker.py
index 0c831f1..bb87ae9 100644
--- a/ultralytics/tracker/trackers/byte_tracker.py
+++ b/ultralytics/tracker/trackers/byte_tracker.py
@@ -23,6 +23,7 @@ class STrack(BaseTrack):
self.idx = tlwh[-1]
def predict(self):
+ """Predicts mean and covariance using Kalman filter."""
mean_state = self.mean.copy()
if self.state != TrackState.Tracked:
mean_state[7] = 0
@@ -30,6 +31,7 @@ class STrack(BaseTrack):
@staticmethod
def multi_predict(stracks):
+ """Perform multi-object predictive tracking using Kalman filter for given stracks."""
if len(stracks) <= 0:
return
multi_mean = np.asarray([st.mean.copy() for st in stracks])
@@ -44,6 +46,7 @@ class STrack(BaseTrack):
@staticmethod
def multi_gmc(stracks, H=np.eye(2, 3)):
+ """Update state tracks positions and covariances using a homography matrix."""
if len(stracks) > 0:
multi_mean = np.asarray([st.mean.copy() for st in stracks])
multi_covariance = np.asarray([st.covariance for st in stracks])
@@ -74,6 +77,7 @@ class STrack(BaseTrack):
self.start_frame = frame_id
def re_activate(self, new_track, frame_id, new_id=False):
+ """Reactivates a previously lost track with a new detection."""
self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance,
self.convert_coords(new_track.tlwh))
self.tracklet_len = 0
@@ -107,6 +111,7 @@ class STrack(BaseTrack):
self.idx = new_track.idx
def convert_coords(self, tlwh):
+ """Convert a bounding box's top-left-width-height format to its x-y-angle-height equivalent."""
return self.tlwh_to_xyah(tlwh)
@property
@@ -142,23 +147,27 @@ class STrack(BaseTrack):
@staticmethod
def tlbr_to_tlwh(tlbr):
+ """Converts top-left bottom-right format to top-left width height format."""
ret = np.asarray(tlbr).copy()
ret[2:] -= ret[:2]
return ret
@staticmethod
def tlwh_to_tlbr(tlwh):
+ """Converts tlwh bounding box format to tlbr format."""
ret = np.asarray(tlwh).copy()
ret[2:] += ret[:2]
return ret
def __repr__(self):
+ """Return a string representation of the BYTETracker object with start and end frames and track ID."""
return f'OT_{self.track_id}_({self.start_frame}-{self.end_frame})'
class BYTETracker:
def __init__(self, args, frame_rate=30):
+ """Initialize a YOLOv8 object to track objects with given arguments and frame rate."""
self.tracked_stracks = [] # type: list[STrack]
self.lost_stracks = [] # type: list[STrack]
self.removed_stracks = [] # type: list[STrack]
@@ -170,6 +179,7 @@ class BYTETracker:
self.reset_id()
def update(self, results, img=None):
+ """Updates object tracker with new detections and returns tracked object bounding boxes."""
self.frame_id += 1
activated_starcks = []
refind_stracks = []
@@ -285,12 +295,15 @@ class BYTETracker:
dtype=np.float32)
def get_kalmanfilter(self):
+ """Returns a Kalman filter object for tracking bounding boxes."""
return KalmanFilterXYAH()
def init_track(self, dets, scores, cls, img=None):
+ """Initialize object tracking with detections and scores using STrack algorithm."""
return [STrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] if len(dets) else [] # detections
def get_dists(self, tracks, detections):
+ """Calculates the distance between tracks and detections using IOU and fuses scores."""
dists = matching.iou_distance(tracks, detections)
# TODO: mot20
# if not self.args.mot20:
@@ -298,13 +311,16 @@ class BYTETracker:
return dists
def multi_predict(self, tracks):
+ """Returns the predicted tracks using the YOLOv8 network."""
STrack.multi_predict(tracks)
def reset_id(self):
+ """Resets the ID counter of STrack."""
STrack.reset_id()
@staticmethod
def joint_stracks(tlista, tlistb):
+ """Combine two lists of stracks into a single one."""
exists = {}
res = []
for t in tlista:
@@ -332,6 +348,7 @@ class BYTETracker:
@staticmethod
def remove_duplicate_stracks(stracksa, stracksb):
+ """Remove duplicate stracks with non-maximum IOU distance."""
pdist = matching.iou_distance(stracksa, stracksb)
pairs = np.where(pdist < 0.15)
dupa, dupb = [], []
diff --git a/ultralytics/tracker/utils/gmc.py b/ultralytics/tracker/utils/gmc.py
index 21a0969..a5c910d 100644
--- a/ultralytics/tracker/utils/gmc.py
+++ b/ultralytics/tracker/utils/gmc.py
@@ -11,6 +11,7 @@ from ultralytics.yolo.utils import LOGGER
class GMC:
def __init__(self, method='sparseOptFlow', downscale=2, verbose=None):
+ """Initialize a video tracker with specified parameters."""
super().__init__()
self.method = method
@@ -69,6 +70,7 @@ class GMC:
self.initializedFirstFrame = False
def apply(self, raw_frame, detections=None):
+ """Apply object detection on a raw frame using specified method."""
if self.method in ['orb', 'sift']:
return self.applyFeatures(raw_frame, detections)
elif self.method == 'ecc':
@@ -303,6 +305,7 @@ class GMC:
return H
def applyFile(self, raw_frame, detections=None):
+ """Return the homography matrix based on the GCPs in the next line of the input GMC file."""
line = self.gmcFile.readline()
tokens = line.split('\t')
H = np.eye(2, 3, dtype=np.float_)
diff --git a/ultralytics/tracker/utils/kalman_filter.py b/ultralytics/tracker/utils/kalman_filter.py
index b0c2a19..a0ee498 100644
--- a/ultralytics/tracker/utils/kalman_filter.py
+++ b/ultralytics/tracker/utils/kalman_filter.py
@@ -27,6 +27,7 @@ class KalmanFilterXYAH:
"""
def __init__(self):
+ """Initialize Kalman filter model matrices with motion and observation uncertainty weights."""
ndim, dt = 4, 1.
# Create Kalman filter model matrices.
@@ -253,6 +254,7 @@ class KalmanFilterXYWH:
"""
def __init__(self):
+ """Initialize Kalman filter model matrices with motion and observation uncertainties."""
ndim, dt = 4, 1.
# Create Kalman filter model matrices.
diff --git a/ultralytics/tracker/utils/matching.py b/ultralytics/tracker/utils/matching.py
index d8f38f1..f2d458e 100644
--- a/ultralytics/tracker/utils/matching.py
+++ b/ultralytics/tracker/utils/matching.py
@@ -18,6 +18,7 @@ except (ImportError, AssertionError, AttributeError):
def merge_matches(m1, m2, shape):
+ """Merge two sets of matches and return matched and unmatched indices."""
O, P, Q = shape
m1 = np.asarray(m1)
m2 = np.asarray(m2)
@@ -35,6 +36,7 @@ def merge_matches(m1, m2, shape):
def _indices_to_matches(cost_matrix, indices, thresh):
+ """_indices_to_matches: Return matched and unmatched indices given a cost matrix, indices, and a threshold."""
matched_cost = cost_matrix[tuple(zip(*indices))]
matched_mask = (matched_cost <= thresh)
@@ -144,6 +146,7 @@ def embedding_distance(tracks, detections, metric='cosine'):
def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
+ """Apply gating to the cost matrix based on predicted tracks and detected objects."""
if cost_matrix.size == 0:
return cost_matrix
gating_dim = 2 if only_position else 4
@@ -156,6 +159,7 @@ def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
+ """Fuse motion between tracks and detections with gating and Kalman filtering."""
if cost_matrix.size == 0:
return cost_matrix
gating_dim = 2 if only_position else 4
@@ -169,6 +173,7 @@ def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda
def fuse_iou(cost_matrix, tracks, detections):
+ """Fuses ReID and IoU similarity matrices to yield a cost matrix for object tracking."""
if cost_matrix.size == 0:
return cost_matrix
reid_sim = 1 - cost_matrix
@@ -181,6 +186,7 @@ def fuse_iou(cost_matrix, tracks, detections):
def fuse_score(cost_matrix, detections):
+ """Fuses cost matrix with detection scores to produce a single similarity matrix."""
if cost_matrix.size == 0:
return cost_matrix
iou_sim = 1 - cost_matrix
diff --git a/ultralytics/yolo/cfg/__init__.py b/ultralytics/yolo/cfg/__init__.py
index a27197a..01d6c58 100644
--- a/ultralytics/yolo/cfg/__init__.py
+++ b/ultralytics/yolo/cfg/__init__.py
@@ -393,6 +393,7 @@ def entrypoint(debug=''):
# Special modes --------------------------------------------------------------------------------------------------------
def copy_default_cfg():
+ """Copy and create a new default configuration file with '_copy' appended to its name."""
new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace('.yaml', '_copy.yaml')
shutil.copy2(DEFAULT_CFG_PATH, new_file)
LOGGER.info(f'{DEFAULT_CFG_PATH} copied to {new_file}\n'
diff --git a/ultralytics/yolo/data/augment.py b/ultralytics/yolo/data/augment.py
index b75f972..e333c67 100644
--- a/ultralytics/yolo/data/augment.py
+++ b/ultralytics/yolo/data/augment.py
@@ -26,15 +26,19 @@ class BaseTransform:
pass
def apply_image(self, labels):
+ """Applies image transformation to labels."""
pass
def apply_instances(self, labels):
+ """Applies transformations to input 'labels' and returns object instances."""
pass
def apply_semantic(self, labels):
+ """Applies semantic segmentation to an image."""
pass
def __call__(self, labels):
+ """Applies label transformations to an image, instances and semantic masks."""
self.apply_image(labels)
self.apply_instances(labels)
self.apply_semantic(labels)
@@ -43,20 +47,25 @@ class BaseTransform:
class Compose:
def __init__(self, transforms):
+ """Initializes the Compose object with a list of transforms."""
self.transforms = transforms
def __call__(self, data):
+ """Applies a series of transformations to input data."""
for t in self.transforms:
data = t(data)
return data
def append(self, transform):
+ """Appends a new transform to the existing list of transforms."""
self.transforms.append(transform)
def tolist(self):
+ """Converts list of transforms to a standard Python list."""
return self.transforms
def __repr__(self):
+ """Return string representation of object."""
format_string = f'{self.__class__.__name__}('
for t in self.transforms:
format_string += '\n'
@@ -74,6 +83,7 @@ class BaseMixTransform:
self.p = p
def __call__(self, labels):
+ """Applies pre-processing transforms and mixup/mosaic transforms to labels data."""
if random.uniform(0, 1) > self.p:
return labels
@@ -96,9 +106,11 @@ class BaseMixTransform:
return labels
def _mix_transform(self, labels):
+ """Applies MixUp or Mosaic augmentation to the label dictionary."""
raise NotImplementedError
def get_indexes(self):
+ """Gets a list of shuffled indexes for mosaic augmentation."""
raise NotImplementedError
@@ -111,6 +123,7 @@ class Mosaic(BaseMixTransform):
"""
def __init__(self, dataset, imgsz=640, p=1.0, border=(0, 0)):
+ """Initializes the object with a dataset, image size, probability, and border."""
assert 0 <= p <= 1.0, 'The probability should be in range [0, 1]. ' f'got {p}.'
super().__init__(dataset=dataset, p=p)
self.dataset = dataset
@@ -118,9 +131,11 @@ class Mosaic(BaseMixTransform):
self.border = border
def get_indexes(self):
+ """Return a list of 3 random indexes from the dataset."""
return [random.randint(0, len(self.dataset) - 1) for _ in range(3)]
def _mix_transform(self, labels):
+ """Apply mixup transformation to the input image and labels."""
mosaic_labels = []
assert labels.get('rect_shape', None) is None, 'rect and mosaic is exclusive.'
assert len(labels.get('mix_labels', [])) > 0, 'There are no other images for mosaic augment.'
@@ -166,6 +181,7 @@ class Mosaic(BaseMixTransform):
return labels
def _cat_labels(self, mosaic_labels):
+ """Return labels with mosaic border instances clipped."""
if len(mosaic_labels) == 0:
return {}
cls = []
@@ -190,6 +206,7 @@ class MixUp(BaseMixTransform):
super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
def get_indexes(self):
+ """Get a random index from the dataset."""
return random.randint(0, len(self.dataset) - 1)
def _mix_transform(self, labels):
@@ -400,6 +417,7 @@ class RandomHSV:
self.vgain = vgain
def __call__(self, labels):
+ """Applies random horizontal or vertical flip to an image with a given probability."""
img = labels['img']
if self.hgain or self.sgain or self.vgain:
r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1 # random gains
@@ -427,6 +445,7 @@ class RandomFlip:
self.flip_idx = flip_idx
def __call__(self, labels):
+ """Resize image and padding for detection, instance segmentation, pose."""
img = labels['img']
instances = labels.pop('instances')
instances.convert_bbox(format='xywh')
@@ -453,6 +472,7 @@ class LetterBox:
"""Resize image and padding for detection, instance segmentation, pose."""
def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, stride=32):
+ """Initialize LetterBox object with specific parameters."""
self.new_shape = new_shape
self.auto = auto
self.scaleFill = scaleFill
@@ -460,6 +480,7 @@ class LetterBox:
self.stride = stride
def __call__(self, labels=None, image=None):
+ """Return updated labels and image with added border."""
if labels is None:
labels = {}
img = labels.get('img') if image is None else image
@@ -556,6 +577,7 @@ class CopyPaste:
class Albumentations:
# YOLOv8 Albumentations class (optional, only used if package is installed)
def __init__(self, p=1.0):
+ """Initialize the transform object for YOLO bbox formatted params."""
self.p = p
self.transform = None
prefix = colorstr('albumentations: ')
@@ -581,6 +603,7 @@ class Albumentations:
LOGGER.info(f'{prefix}{e}')
def __call__(self, labels):
+ """Generates object detections and returns a dictionary with detection results."""
im = labels['img']
cls = labels['cls']
if len(cls):
@@ -618,6 +641,7 @@ class Format:
self.batch_idx = batch_idx # keep the batch indexes
def __call__(self, labels):
+ """Return formatted image, classes, bounding boxes & keypoints to be used by 'collate_fn'."""
img = labels.pop('img')
h, w = img.shape[:2]
cls = labels.pop('cls')
@@ -647,6 +671,7 @@ class Format:
return labels
def _format_img(self, img):
+ """Format the image for YOLOv5 from Numpy array to PyTorch tensor."""
if len(img.shape) < 3:
img = np.expand_dims(img, -1)
img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1])
@@ -668,6 +693,7 @@ class Format:
def v8_transforms(dataset, imgsz, hyp):
+ """Convert images to a size suitable for YOLOv8 training."""
pre_transform = Compose([
Mosaic(dataset, imgsz=imgsz, p=hyp.mosaic, border=[-imgsz // 2, -imgsz // 2]),
CopyPaste(p=hyp.copy_paste),
@@ -749,6 +775,7 @@ def classify_albumentations(
class ClassifyLetterBox:
# YOLOv8 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
def __init__(self, size=(640, 640), auto=False, stride=32):
+ """Resizes image and crops it to center with max dimensions 'h' and 'w'."""
super().__init__()
self.h, self.w = (size, size) if isinstance(size, int) else size
self.auto = auto # pass max size integer, automatically solve for short side using stride
@@ -768,6 +795,7 @@ class ClassifyLetterBox:
class CenterCrop:
# YOLOv8 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])
def __init__(self, size=640):
+ """Converts an image from numpy array to PyTorch tensor."""
super().__init__()
self.h, self.w = (size, size) if isinstance(size, int) else size
@@ -781,6 +809,7 @@ class CenterCrop:
class ToTensor:
# YOLOv8 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
def __init__(self, half=False):
+ """Initialize YOLOv8 ToTensor object with optional half-precision support."""
super().__init__()
self.half = half
diff --git a/ultralytics/yolo/data/base.py b/ultralytics/yolo/data/base.py
index b29be34..08265a6 100644
--- a/ultralytics/yolo/data/base.py
+++ b/ultralytics/yolo/data/base.py
@@ -170,6 +170,7 @@ class BaseDataset(Dataset):
np.save(f.as_posix(), cv2.imread(self.im_files[i]))
def set_rectangle(self):
+ """Sets the shape of bounding boxes for YOLO detections as rectangles."""
bi = np.floor(np.arange(self.ni) / self.batch_size).astype(int) # batch index
nb = bi[-1] + 1 # number of batches
@@ -194,9 +195,11 @@ class BaseDataset(Dataset):
self.batch = bi # batch index of image
def __getitem__(self, index):
+ """Returns transformed label information for given index."""
return self.transforms(self.get_label_info(index))
def get_label_info(self, index):
+ """Get and return label information from the dataset."""
label = deepcopy(self.labels[index]) # requires deepcopy() https://github.com/ultralytics/ultralytics/pull/1948
label.pop('shape', None) # shape is for rect, remove it
label['img'], label['ori_shape'], label['resized_shape'] = self.load_image(index)
@@ -208,6 +211,7 @@ class BaseDataset(Dataset):
return label
def __len__(self):
+ """Returns the length of the labels list for the dataset."""
return len(self.labels)
def update_labels_info(self, label):
diff --git a/ultralytics/yolo/data/build.py b/ultralytics/yolo/data/build.py
index df32486..6aeafe9 100644
--- a/ultralytics/yolo/data/build.py
+++ b/ultralytics/yolo/data/build.py
@@ -24,14 +24,17 @@ class InfiniteDataLoader(dataloader.DataLoader):
"""Dataloader that reuses workers. Uses same syntax as vanilla DataLoader."""
def __init__(self, *args, **kwargs):
+ """Dataloader that infinitely recycles workers, inherits from DataLoader."""
super().__init__(*args, **kwargs)
object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
self.iterator = super().__iter__()
def __len__(self):
+ """Returns the length of the batch sampler's sampler."""
return len(self.batch_sampler.sampler)
def __iter__(self):
+ """Creates a sampler that repeats indefinitely."""
for _ in range(len(self)):
yield next(self.iterator)
@@ -45,9 +48,11 @@ class _RepeatSampler:
"""
def __init__(self, sampler):
+ """Initializes an object that repeats a given sampler indefinitely."""
self.sampler = sampler
def __iter__(self):
+ """Iterates over the 'sampler' and yields its contents."""
while True:
yield from iter(self.sampler)
@@ -60,6 +65,7 @@ def seed_worker(worker_id): # noqa
def build_dataloader(cfg, batch, img_path, data_info, stride=32, rect=False, rank=-1, mode='train'):
+ """Return an InfiniteDataLoader or DataLoader for training or validation set."""
assert mode in ['train', 'val']
shuffle = mode == 'train'
if cfg.rect and shuffle:
@@ -134,6 +140,7 @@ def build_classification_dataloader(path,
def check_source(source):
+ """Check source type and return corresponding flag values."""
webcam, screenshot, from_img, in_memory, tensor = False, False, False, False, False
if isinstance(source, (str, int, Path)): # int for local usb camera
source = str(source)
diff --git a/ultralytics/yolo/data/dataloaders/stream_loaders.py b/ultralytics/yolo/data/dataloaders/stream_loaders.py
index 6f73cb2..26d3211 100644
--- a/ultralytics/yolo/data/dataloaders/stream_loaders.py
+++ b/ultralytics/yolo/data/dataloaders/stream_loaders.py
@@ -32,6 +32,7 @@ class SourceTypes:
class LoadStreams:
# YOLOv8 streamloader, i.e. `yolo predict source='rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams`
def __init__(self, sources='file.streams', imgsz=640, stride=32, auto=True, transforms=None, vid_stride=1):
+ """Initialize instance variables and check for consistent input stream shapes."""
torch.backends.cudnn.benchmark = True # faster for fixed-size inference
self.mode = 'stream'
self.imgsz = imgsz
@@ -97,10 +98,12 @@ class LoadStreams:
time.sleep(0.0) # wait time
def __iter__(self):
+ """Iterates through YOLO image feed and re-opens unresponsive streams."""
self.count = -1
return self
def __next__(self):
+ """Returns source paths, transformed and original images for processing YOLOv5."""
self.count += 1
if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit
cv2.destroyAllWindows()
@@ -117,6 +120,7 @@ class LoadStreams:
return self.sources, im, im0, None, ''
def __len__(self):
+ """Return the length of the sources object."""
return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years
@@ -153,6 +157,7 @@ class LoadScreenshots:
self.monitor = {'left': self.left, 'top': self.top, 'width': self.width, 'height': self.height}
def __iter__(self):
+ """Returns an iterator of the object."""
return self
def __next__(self):
@@ -173,6 +178,7 @@ class LoadScreenshots:
class LoadImages:
# YOLOv8 image/video dataloader, i.e. `yolo predict source=image.jpg/vid.mp4`
def __init__(self, path, imgsz=640, stride=32, auto=True, transforms=None, vid_stride=1):
+ """Initialize the Dataloader and raise FileNotFoundError if file not found."""
if isinstance(path, str) and Path(path).suffix == '.txt': # *.txt file with img/vid/dir on each line
path = Path(path).read_text().rsplit()
files = []
@@ -211,10 +217,12 @@ class LoadImages:
f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}')
def __iter__(self):
+ """Returns an iterator object for VideoStream or ImageFolder."""
self.count = 0
return self
def __next__(self):
+ """Return next image, path and metadata from dataset."""
if self.count == self.nf:
raise StopIteration
path = self.files[self.count]
@@ -276,12 +284,14 @@ class LoadImages:
return im
def __len__(self):
+ """Returns the number of files in the object."""
return self.nf # number of files
class LoadPilAndNumpy:
def __init__(self, im0, imgsz=640, stride=32, auto=True, transforms=None):
+ """Initialize PIL and Numpy Dataloader."""
if not isinstance(im0, list):
im0 = [im0]
self.paths = [getattr(im, 'filename', f'image{i}.jpg') for i, im in enumerate(im0)]
@@ -296,6 +306,7 @@ class LoadPilAndNumpy:
@staticmethod
def _single_check(im):
+ """Validate and format an image to numpy array."""
assert isinstance(im, (Image.Image, np.ndarray)), f'Expected PIL/np.ndarray image type, but got {type(im)}'
if isinstance(im, Image.Image):
if im.mode != 'RGB':
@@ -305,6 +316,7 @@ class LoadPilAndNumpy:
return im
def _single_preprocess(self, im, auto):
+ """Preprocesses a single image for inference."""
if self.transforms:
im = self.transforms(im) # transforms
else:
@@ -314,9 +326,11 @@ class LoadPilAndNumpy:
return im
def __len__(self):
+ """Returns the length of the 'im0' attribute."""
return len(self.im0)
def __next__(self):
+ """Returns batch paths, images, processed images, None, ''."""
if self.count == 1: # loop only once as it's batch inference
raise StopIteration
auto = all(x.shape == self.im0[0].shape for x in self.im0) and self.auto
@@ -326,6 +340,7 @@ class LoadPilAndNumpy:
return self.paths, im, self.im0, None, ''
def __iter__(self):
+ """Enables iteration for class LoadPilAndNumpy."""
self.count = 0
return self
@@ -338,16 +353,19 @@ class LoadTensor:
self.mode = 'image'
def __iter__(self):
+ """Returns an iterator object."""
self.count = 0
return self
def __next__(self):
+ """Return next item in the iterator."""
if self.count == 1:
raise StopIteration
self.count += 1
return None, self.im0, self.im0, None, '' # self.paths, im, self.im0, None, ''
def __len__(self):
+ """Returns the batch size."""
return self.bs
diff --git a/ultralytics/yolo/data/dataloaders/v5augmentations.py b/ultralytics/yolo/data/dataloaders/v5augmentations.py
index 971a203..8e0b3e2 100644
--- a/ultralytics/yolo/data/dataloaders/v5augmentations.py
+++ b/ultralytics/yolo/data/dataloaders/v5augmentations.py
@@ -24,6 +24,7 @@ IMAGENET_STD = 0.229, 0.224, 0.225 # RGB standard deviation
class Albumentations:
# YOLOv5 Albumentations class (optional, only used if package is installed)
def __init__(self, size=640):
+ """Instantiate object with image augmentations for YOLOv5."""
self.transform = None
prefix = colorstr('albumentations: ')
try:
@@ -48,6 +49,7 @@ class Albumentations:
LOGGER.info(f'{prefix}{e}')
def __call__(self, im, labels, p=1.0):
+ """Transforms input image and labels with probability 'p'."""
if self.transform and random.random() < p:
new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed
im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])])
@@ -111,7 +113,7 @@ def replicate(im, labels):
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
- # Resize and pad image while meeting stride-multiple constraints
+ """Resize and pad image while meeting stride-multiple constraints."""
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
@@ -359,6 +361,7 @@ def classify_transforms(size=224):
class LetterBox:
# YOLOv5 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
def __init__(self, size=(640, 640), auto=False, stride=32):
+ """Resizes and crops an image to a specified size for YOLOv5 preprocessing."""
super().__init__()
self.h, self.w = (size, size) if isinstance(size, int) else size
self.auto = auto # pass max size integer, automatically solve for short side using stride
@@ -378,6 +381,7 @@ class LetterBox:
class CenterCrop:
# YOLOv5 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])
def __init__(self, size=640):
+ """Converts input image into tensor for YOLOv5 processing."""
super().__init__()
self.h, self.w = (size, size) if isinstance(size, int) else size
@@ -391,6 +395,7 @@ class CenterCrop:
class ToTensor:
# YOLOv5 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
def __init__(self, half=False):
+ """Initialize ToTensor class for YOLOv5 image preprocessing."""
super().__init__()
self.half = half
diff --git a/ultralytics/yolo/data/dataloaders/v5loader.py b/ultralytics/yolo/data/dataloaders/v5loader.py
index f686497..3797412 100644
--- a/ultralytics/yolo/data/dataloaders/v5loader.py
+++ b/ultralytics/yolo/data/dataloaders/v5loader.py
@@ -162,14 +162,17 @@ class InfiniteDataLoader(dataloader.DataLoader):
"""
def __init__(self, *args, **kwargs):
+ """Dataloader that reuses workers for same syntax as vanilla DataLoader."""
super().__init__(*args, **kwargs)
object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
self.iterator = super().__iter__()
def __len__(self):
+ """Returns the length of batch_sampler's sampler."""
return len(self.batch_sampler.sampler)
def __iter__(self):
+ """Creates a sampler that infinitely repeats."""
for _ in range(len(self)):
yield next(self.iterator)
@@ -182,9 +185,11 @@ class _RepeatSampler:
"""
def __init__(self, sampler):
+ """Sampler that repeats dataset samples infinitely."""
self.sampler = sampler
def __iter__(self):
+ """Infinite loop iterating over a given sampler."""
while True:
yield from iter(self.sampler)
@@ -221,6 +226,7 @@ class LoadScreenshots:
self.monitor = {'left': self.left, 'top': self.top, 'width': self.width, 'height': self.height}
def __iter__(self):
+ """Iterates over objects with the same structure as the monitor attribute."""
return self
def __next__(self):
@@ -241,6 +247,7 @@ class LoadScreenshots:
class LoadImages:
# YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
+ """Initialize instance variables and check for valid input."""
if isinstance(path, str) and Path(path).suffix == '.txt': # *.txt file with img/vid/dir on each line
path = Path(path).read_text().rsplit()
files = []
@@ -276,10 +283,12 @@ class LoadImages:
f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'
def __iter__(self):
+ """Returns an iterator object for iterating over images or videos found in a directory."""
self.count = 0
return self
def __next__(self):
+ """Iterator's next item, performs transformation on image and returns path, transformed image, original image, capture and size."""
if self.count == self.nf:
raise StopIteration
path = self.files[self.count]
@@ -338,12 +347,14 @@ class LoadImages:
return im
def __len__(self):
+ """Returns the number of files in the class instance."""
return self.nf # number of files
class LoadStreams:
# YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams`
def __init__(self, sources='file.streams', img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
+ """Initialize YOLO detector with optional transforms and check input shapes."""
torch.backends.cudnn.benchmark = True # faster for fixed-size inference
self.mode = 'stream'
self.img_size = img_size
@@ -404,10 +415,12 @@ class LoadStreams:
time.sleep(0.0) # wait time
def __iter__(self):
+ """Iterator that returns the class instance."""
self.count = -1
return self
def __next__(self):
+ """Return a tuple containing transformed and resized image data."""
self.count += 1
if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit
cv2.destroyAllWindows()
@@ -424,6 +437,7 @@ class LoadStreams:
return self.sources, im, im0, None, ''
def __len__(self):
+ """Returns the number of sources as the length of the object."""
return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years
@@ -607,6 +621,7 @@ class LoadImagesAndLabels(Dataset):
return cache
def cache_labels(self, path=Path('./labels.cache'), prefix=''):
+ """Cache labels and save as numpy file for next time."""
# Cache dataset labels, check images and read shapes
if path.exists():
path.unlink() # remove *.cache file if exists
@@ -646,9 +661,11 @@ class LoadImagesAndLabels(Dataset):
return x
def __len__(self):
+ """Returns the length of 'im_files' attribute."""
return len(self.im_files)
def __getitem__(self, index):
+ """Get a sample and its corresponding label, filename and shape from the dataset."""
index = self.indices[index] # linear, shuffled, or image_weights
hyp = self.hyp
@@ -1039,6 +1056,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
"""
def __init__(self, root, augment, imgsz, cache=False):
+ """Initialize YOLO dataset with root, augmentation, image size, and cache parameters."""
super().__init__(root=root)
self.torch_transforms = classify_transforms(imgsz)
self.album_transforms = classify_albumentations(augment, imgsz) if augment else None
@@ -1047,6 +1065,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples] # file, index, npy, im
def __getitem__(self, i):
+ """Retrieves data items of 'dataset' via indices & creates InfiniteDataLoader."""
f, j, fn, im = self.samples[i] # filename, index, filename.with_suffix('.npy'), image
if self.cache_ram and im is None:
im = self.samples[i][3] = cv2.imread(f)
diff --git a/ultralytics/yolo/data/dataset.py b/ultralytics/yolo/data/dataset.py
index a44140f..3c8c43f 100644
--- a/ultralytics/yolo/data/dataset.py
+++ b/ultralytics/yolo/data/dataset.py
@@ -127,6 +127,7 @@ class YOLODataset(BaseDataset):
return x
def get_labels(self):
+ """Returns dictionary of labels for YOLO training."""
self.label_files = img2label_paths(self.im_files)
cache_path = Path(self.label_files[0]).parent.with_suffix('.cache')
try:
@@ -170,6 +171,7 @@ class YOLODataset(BaseDataset):
# TODO: use hyp config to set all these augmentations
def build_transforms(self, hyp=None):
+ """Builds and appends transforms to the list."""
if self.augment:
hyp.mosaic = hyp.mosaic if self.augment and not self.rect else 0.0
hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0
@@ -187,6 +189,7 @@ class YOLODataset(BaseDataset):
return transforms
def close_mosaic(self, hyp):
+ """Sets mosaic, copy_paste and mixup options to 0.0 and builds transformations."""
hyp.mosaic = 0.0 # set mosaic ratio=0.0
hyp.copy_paste = 0.0 # keep the same behavior as previous v8 close-mosaic
hyp.mixup = 0.0 # keep the same behavior as previous v8 close-mosaic
@@ -206,6 +209,7 @@ class YOLODataset(BaseDataset):
@staticmethod
def collate_fn(batch):
+ """Collates data samples into batches."""
new_batch = {}
keys = batch[0].keys()
values = list(zip(*[list(b.values()) for b in batch]))
@@ -234,6 +238,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
"""
def __init__(self, root, augment, imgsz, cache=False):
+ """Initialize YOLO object with root, image size, augmentations, and cache settings"""
super().__init__(root=root)
self.torch_transforms = classify_transforms(imgsz)
self.album_transforms = classify_albumentations(augment, imgsz) if augment else None
@@ -242,6 +247,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples] # file, index, npy, im
def __getitem__(self, i):
+ """Returns subset of data and targets corresponding to given indices."""
f, j, fn, im = self.samples[i] # filename, index, filename.with_suffix('.npy'), image
if self.cache_ram and im is None:
im = self.samples[i][3] = cv2.imread(f)
@@ -265,4 +271,5 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
class SemanticDataset(BaseDataset):
def __init__(self):
+ """Initialize a SemanticDataset object."""
pass
diff --git a/ultralytics/yolo/data/utils.py b/ultralytics/yolo/data/utils.py
index 0e1bd00..a366e0f 100644
--- a/ultralytics/yolo/data/utils.py
+++ b/ultralytics/yolo/data/utils.py
@@ -359,6 +359,7 @@ class HUBDatasetStats():
return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path
def _hub_ops(self, f):
+ """Saves a compressed image for HUB previews."""
compress_one_image(f, self.im_dir / Path(f).name) # save to dataset-hub
def get_json(self, save=False, verbose=False):
diff --git a/ultralytics/yolo/engine/exporter.py b/ultralytics/yolo/engine/exporter.py
index 2f9f5f4..d50039c 100644
--- a/ultralytics/yolo/engine/exporter.py
+++ b/ultralytics/yolo/engine/exporter.py
@@ -105,6 +105,7 @@ def try_export(inner_func):
inner_args = get_default_args(inner_func)
def outer_func(*args, **kwargs):
+ """Export a model."""
prefix = inner_args['prefix']
try:
with Profile() as dt:
@@ -118,24 +119,6 @@ def try_export(inner_func):
return outer_func
-class iOSDetectModel(torch.nn.Module):
- """Wrap an Ultralytics YOLO model for iOS export."""
-
- def __init__(self, model, im):
- super().__init__()
- b, c, h, w = im.shape # batch, channel, height, width
- self.model = model
- self.nc = len(model.names) # number of classes
- if w == h:
- self.normalize = 1.0 / w # scalar
- else:
- self.normalize = torch.tensor([1.0 / w, 1.0 / h, 1.0 / w, 1.0 / h]) # broadcast (slower, smaller)
-
- def forward(self, x):
- xywh, cls = self.model(x)[0].transpose(0, 1).split((4, self.nc), 1)
- return cls, xywh * self.normalize # confidence (3780, 80), coordinates (3780, 4)
-
-
class Exporter:
"""
A class for exporting a model.
@@ -160,6 +143,7 @@ class Exporter:
@smart_inference_mode()
def __call__(self, model=None):
+ """Returns list of exported files/dirs after running callbacks."""
self.run_callbacks('on_export_start')
t = time.time()
format = self.args.format.lower() # to lowercase
@@ -703,7 +687,7 @@ class Exporter:
tmp_file.unlink()
def _pipeline_coreml(self, model, prefix=colorstr('CoreML Pipeline:')):
- # YOLOv8 CoreML pipeline
+ """YOLOv8 CoreML pipeline."""
import coremltools as ct # noqa
LOGGER.info(f'{prefix} starting pipeline with coremltools {ct.__version__}...')
@@ -826,11 +810,33 @@ class Exporter:
self.callbacks[event].append(callback)
def run_callbacks(self, event: str):
+ """Execute all callbacks for a given event."""
for callback in self.callbacks.get(event, []):
callback(self)
+class iOSDetectModel(torch.nn.Module):
+ """Wrap an Ultralytics YOLO model for iOS export."""
+
+ def __init__(self, model, im):
+ """Initialize the iOSDetectModel class with a YOLO model and example image."""
+ super().__init__()
+ b, c, h, w = im.shape # batch, channel, height, width
+ self.model = model
+ self.nc = len(model.names) # number of classes
+ if w == h:
+ self.normalize = 1.0 / w # scalar
+ else:
+ self.normalize = torch.tensor([1.0 / w, 1.0 / h, 1.0 / w, 1.0 / h]) # broadcast (slower, smaller)
+
+ def forward(self, x):
+ """Normalize predictions of object detection model with input size-dependent factors."""
+ xywh, cls = self.model(x)[0].transpose(0, 1).split((4, self.nc), 1)
+ return cls, xywh * self.normalize # confidence (3780, 80), coordinates (3780, 4)
+
+
def export(cfg=DEFAULT_CFG):
+ """Export a YOLOv model to a specific format."""
cfg.model = cfg.model or 'yolov8n.yaml'
cfg.format = cfg.format or 'torchscript'
diff --git a/ultralytics/yolo/engine/model.py b/ultralytics/yolo/engine/model.py
index 8b3a1b1..c5e4fca 100644
--- a/ultralytics/yolo/engine/model.py
+++ b/ultralytics/yolo/engine/model.py
@@ -107,14 +107,17 @@ class YOLO:
self._load(model, task)
def __call__(self, source=None, stream=False, **kwargs):
+ """Calls the 'predict' function with given arguments to perform object detection."""
return self.predict(source, stream, **kwargs)
def __getattr__(self, attr):
+ """Raises error if object has no requested attribute."""
name = self.__class__.__name__
raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}")
@staticmethod
def is_hub_model(model):
+ """Check if the provided model is a HUB model."""
return any((
model.startswith('https://hub.ultra'), # i.e. https://hub.ultralytics.com/models/MODEL_ID
[len(x) for x in model.split('_')] == [42, 20], # APIKEY_MODELID
@@ -209,6 +212,7 @@ class YOLO:
self.model.info(verbose=verbose)
def fuse(self):
+ """Fuse PyTorch Conv2d and BatchNorm2d layers."""
self._check_is_pytorch_model()
self.model.fuse()
@@ -493,9 +497,11 @@ class YOLO:
@staticmethod
def _reset_ckpt_args(args):
+ """Reset arguments when loading a PyTorch model."""
include = {'imgsz', 'data', 'task', 'single_cls'} # only remember these arguments when loading a PyTorch model
return {k: v for k, v in args.items() if k in include}
def _reset_callbacks(self):
+ """Reset all registered callbacks."""
for event in callbacks.default_callbacks.keys():
self.callbacks[event] = [callbacks.default_callbacks[event][0]]
diff --git a/ultralytics/yolo/engine/predictor.py b/ultralytics/yolo/engine/predictor.py
index 882cde8..79b31c3 100644
--- a/ultralytics/yolo/engine/predictor.py
+++ b/ultralytics/yolo/engine/predictor.py
@@ -107,9 +107,11 @@ class BasePredictor:
callbacks.add_integration_callbacks(self)
def preprocess(self, img):
+ """Prepares input image before inference."""
pass
def write_results(self, idx, results, batch):
+ """Write inference results to a file or directory."""
p, im, _ = batch
log_string = ''
if len(im.shape) == 3:
@@ -143,9 +145,11 @@ class BasePredictor:
return log_string
def postprocess(self, preds, img, orig_img):
+ """Post-processes predictions for an image and returns them."""
return preds
def __call__(self, source=None, model=None, stream=False):
+ """Performs inference on an image or stream."""
self.stream = stream
if stream:
return self.stream_inference(source, model)
@@ -159,6 +163,7 @@ class BasePredictor:
pass
def setup_source(self, source):
+ """Sets up source and inference mode."""
self.imgsz = check_imgsz(self.args.imgsz, stride=self.model.stride, min_dim=2) # check image size
if self.args.task == 'classify':
transforms = getattr(self.model.model, 'transforms', classify_transforms(self.imgsz[0]))
@@ -179,6 +184,7 @@ class BasePredictor:
@smart_inference_mode()
def stream_inference(self, source=None, model=None):
+ """Streams real-time inference on camera feed and saves results to file."""
if self.args.verbose:
LOGGER.info('')
@@ -264,6 +270,7 @@ class BasePredictor:
self.run_callbacks('on_predict_end')
def setup_model(self, model, verbose=True):
+ """Initialize YOLO model with given parameters and set it to evaluation mode."""
device = select_device(self.args.device, verbose=verbose)
model = model or self.args.model
self.args.half &= device.type != 'cpu' # half precision only supported on CUDA
@@ -278,6 +285,7 @@ class BasePredictor:
self.model.eval()
def show(self, p):
+ """Display an image in a window using OpenCV imshow()."""
im0 = self.plotted_img
if platform.system() == 'Linux' and p not in self.windows:
self.windows.append(p)
@@ -287,6 +295,7 @@ class BasePredictor:
cv2.waitKey(500 if self.batch[4].startswith('image') else 1) # 1 millisecond
def save_preds(self, vid_cap, idx, save_path):
+ """Save video predictions as mp4 at specified path."""
im0 = self.plotted_img
# Save imgs
if self.dataset.mode == 'image':
@@ -307,6 +316,7 @@ class BasePredictor:
self.vid_writer[idx].write(im0)
def run_callbacks(self, event: str):
+ """Runs all registered callbacks for a specific event."""
for callback in self.callbacks.get(event, []):
callback(self)
diff --git a/ultralytics/yolo/engine/results.py b/ultralytics/yolo/engine/results.py
index c39ac50..5ed86c8 100644
--- a/ultralytics/yolo/engine/results.py
+++ b/ultralytics/yolo/engine/results.py
@@ -19,42 +19,41 @@ from ultralytics.yolo.utils.plotting import Annotator, colors, save_one_box
class BaseTensor(SimpleClass):
"""
-
- Attributes:
- data (torch.Tensor): Base tensor.
- orig_shape (tuple): Original image size, in the format (height, width).
-
- Methods:
- cpu(): Returns a copy of the tensor on CPU memory.
- numpy(): Returns a copy of the tensor as a numpy array.
- cuda(): Returns a copy of the tensor on GPU memory.
- to(): Returns a copy of the tensor with the specified device and dtype.
+ Base tensor class with additional methods for easy manipulation and device handling.
"""
def __init__(self, data, orig_shape) -> None:
+ """Initialize BaseTensor with data and original shape."""
self.data = data
self.orig_shape = orig_shape
@property
def shape(self):
+ """Return the shape of the data tensor."""
return self.data.shape
def cpu(self):
+ """Return a copy of the tensor on CPU memory."""
return self.__class__(self.data.cpu(), self.orig_shape)
def numpy(self):
+ """Return a copy of the tensor as a numpy array."""
return self.__class__(self.data.numpy(), self.orig_shape)
def cuda(self):
+ """Return a copy of the tensor on GPU memory."""
return self.__class__(self.data.cuda(), self.orig_shape)
def to(self, *args, **kwargs):
+ """Return a copy of the tensor with the specified device and dtype."""
return self.__class__(self.data.to(*args, **kwargs), self.orig_shape)
def __len__(self): # override len(results)
+ """Return the length of the data tensor."""
return len(self.data)
def __getitem__(self, idx):
+ """Return a BaseTensor with the specified index of the data tensor."""
return self.__class__(self.data[idx], self.orig_shape)
@@ -83,10 +82,10 @@ class Results(SimpleClass):
keypoints (List[List[float]], optional): A list of detected keypoints for each object.
speed (dict): A dictionary of preprocess, inference and postprocess speeds in milliseconds per image.
_keys (tuple): A tuple of attribute names for non-empty attributes.
-
"""
def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None) -> None:
+ """Initialize the Results class."""
self.orig_img = orig_img
self.orig_shape = orig_img.shape[:2]
self.boxes = Boxes(boxes, self.orig_shape) if boxes is not None else None # native size boxes
@@ -99,16 +98,19 @@ class Results(SimpleClass):
self._keys = ('boxes', 'masks', 'probs', 'keypoints')
def pandas(self):
+ """Convert the results to a pandas DataFrame."""
pass
# TODO masks.pandas + boxes.pandas + cls.pandas
def __getitem__(self, idx):
+ """Return a Results object for the specified index."""
r = self.new()
for k in self.keys:
setattr(r, k, getattr(self, k)[idx])
return r
def update(self, boxes=None, masks=None, probs=None):
+ """Update the boxes, masks, and probs attributes of the Results object."""
if boxes is not None:
self.boxes = Boxes(boxes, self.orig_shape)
if masks is not None:
@@ -117,38 +119,45 @@ class Results(SimpleClass):
self.probs = probs
def cpu(self):
+ """Return a copy of the Results object with all tensors on CPU memory."""
r = self.new()
for k in self.keys:
setattr(r, k, getattr(self, k).cpu())
return r
def numpy(self):
+ """Return a copy of the Results object with all tensors as numpy arrays."""
r = self.new()
for k in self.keys:
setattr(r, k, getattr(self, k).numpy())
return r
def cuda(self):
+ """Return a copy of the Results object with all tensors on GPU memory."""
r = self.new()
for k in self.keys:
setattr(r, k, getattr(self, k).cuda())
return r
def to(self, *args, **kwargs):
+ """Return a copy of the Results object with tensors on the specified device and dtype."""
r = self.new()
for k in self.keys:
setattr(r, k, getattr(self, k).to(*args, **kwargs))
return r
def __len__(self):
+ """Return the number of detections in the Results object."""
for k in self.keys:
return len(getattr(self, k))
def new(self):
+ """Return a new Results object with the same image, path, and names."""
return Results(orig_img=self.orig_img, path=self.path, names=self.names)
@property
def keys(self):
+ """Return a list of non-empty attribute names."""
return [k for k in self._keys if getattr(self, k) is not None]
def plot(
@@ -250,7 +259,8 @@ class Results(SimpleClass):
return log_string
def save_txt(self, txt_file, save_conf=False):
- """Save predictions into txt file.
+ """
+ Save predictions into txt file.
Args:
txt_file (str): txt file path.
@@ -285,7 +295,8 @@ class Results(SimpleClass):
f.write(text + '\n')
def save_crop(self, save_dir, file_name=Path('im.jpg')):
- """Save cropped predictions to `save_dir/cls/file_name.jpg`.
+ """
+ Save cropped predictions to `save_dir/cls/file_name.jpg`.
Args:
save_dir (str | pathlib.Path): Save path.
@@ -338,6 +349,7 @@ class Boxes(BaseTensor):
"""
def __init__(self, boxes, orig_shape) -> None:
+ """Initialize the Boxes class."""
if boxes.ndim == 1:
boxes = boxes[None, :]
n = boxes.shape[-1]
@@ -349,40 +361,49 @@ class Boxes(BaseTensor):
@property
def xyxy(self):
+ """Return the boxes in xyxy format."""
return self.data[:, :4]
@property
def conf(self):
+ """Return the confidence values of the boxes."""
return self.data[:, -2]
@property
def cls(self):
+ """Return the class values of the boxes."""
return self.data[:, -1]
@property
def id(self):
+ """Return the track IDs of the boxes (if available)."""
return self.data[:, -3] if self.is_track else None
@property
@lru_cache(maxsize=2) # maxsize 1 should suffice
def xywh(self):
+ """Return the boxes in xywh format."""
return ops.xyxy2xywh(self.xyxy)
@property
@lru_cache(maxsize=2)
def xyxyn(self):
+ """Return the boxes in xyxy format normalized by original image size."""
return self.xyxy / self.orig_shape[[1, 0, 1, 0]]
@property
@lru_cache(maxsize=2)
def xywhn(self):
+ """Return the boxes in xywh format normalized by original image size."""
return self.xywh / self.orig_shape[[1, 0, 1, 0]]
def pandas(self):
+ """Convert the object to a pandas DataFrame (not yet implemented)."""
LOGGER.info('results.pandas() method not yet implemented')
@property
def boxes(self):
+ """Return the raw bboxes tensor (deprecated)."""
LOGGER.warning("WARNING ⚠️ 'Boxes.boxes' is deprecated. Use 'Boxes.data' instead.")
return self.data
@@ -411,6 +432,7 @@ class Masks(BaseTensor):
"""
def __init__(self, masks, orig_shape) -> None:
+ """Initialize the Masks class."""
if masks.ndim == 2:
masks = masks[None, :]
super().__init__(masks, orig_shape)
@@ -418,7 +440,7 @@ class Masks(BaseTensor):
@property
@lru_cache(maxsize=1)
def segments(self):
- """Segments-deprecated (normalized)."""
+ """Return segments (deprecated; normalized)."""
LOGGER.warning("WARNING ⚠️ 'Masks.segments' is deprecated. Use 'Masks.xyn' for segments (normalized) and "
"'Masks.xy' for segments (pixels) instead.")
return self.xyn
@@ -426,7 +448,7 @@ class Masks(BaseTensor):
@property
@lru_cache(maxsize=1)
def xyn(self):
- """Segments (normalized)."""
+ """Return segments (normalized)."""
return [
ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True)
for x in ops.masks2segments(self.data)]
@@ -434,12 +456,13 @@ class Masks(BaseTensor):
@property
@lru_cache(maxsize=1)
def xy(self):
- """Segments (pixels)."""
+ """Return segments (pixels)."""
return [
ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False)
for x in ops.masks2segments(self.data)]
@property
def masks(self):
+ """Return the raw masks tensor (deprecated)."""
LOGGER.warning("WARNING ⚠️ 'Masks.masks' is deprecated. Use 'Masks.data' instead.")
return self.data
diff --git a/ultralytics/yolo/engine/trainer.py b/ultralytics/yolo/engine/trainer.py
index 0f4e74a..eb7e26b 100644
--- a/ultralytics/yolo/engine/trainer.py
+++ b/ultralytics/yolo/engine/trainer.py
@@ -159,6 +159,7 @@ class BaseTrainer:
self.callbacks[event] = [callback]
def run_callbacks(self, event: str):
+ """Run all existing callbacks associated with a particular event."""
for callback in self.callbacks.get(event, []):
callback(self)
@@ -190,6 +191,7 @@ class BaseTrainer:
self._do_train(world_size)
def _setup_ddp(self, world_size):
+ """Initializes and sets the DistributedDataParallel parameters for training."""
torch.cuda.set_device(RANK)
self.device = torch.device('cuda', RANK)
LOGGER.info(f'DDP settings: RANK {RANK}, WORLD_SIZE {world_size}, DEVICE {self.device}')
@@ -259,6 +261,7 @@ class BaseTrainer:
self.run_callbacks('on_pretrain_routine_end')
def _do_train(self, world_size=1):
+ """Train completed, evaluate and plot if specified by arguments."""
if world_size > 1:
self._setup_ddp(world_size)
@@ -392,6 +395,7 @@ class BaseTrainer:
self.run_callbacks('teardown')
def save_model(self):
+ """Save model checkpoints based on various conditions."""
ckpt = {
'epoch': self.epoch,
'best_fitness': self.best_fitness,
@@ -436,6 +440,7 @@ class BaseTrainer:
return ckpt
def optimizer_step(self):
+ """Perform a single step of the training optimizer with gradient clipping and EMA update."""
self.scaler.unscale_(self.optimizer) # unscale gradients
torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=10.0) # clip gradients
self.scaler.step(self.optimizer)
@@ -461,9 +466,11 @@ class BaseTrainer:
return metrics, fitness
def get_model(self, cfg=None, weights=None, verbose=True):
+ """Get model and raise NotImplementedError for loading cfg files."""
raise NotImplementedError("This task trainer doesn't support loading cfg files")
def get_validator(self):
+ """Returns a NotImplementedError when the get_validator function is called."""
raise NotImplementedError('get_validator function not implemented in trainer')
def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'):
@@ -492,19 +499,24 @@ class BaseTrainer:
self.model.names = self.data['names']
def build_targets(self, preds, targets):
+ """Builds target tensors for training YOLO model."""
pass
def progress_string(self):
+ """Returns a string describing training progress."""
return ''
# TODO: may need to put these following functions into callback
def plot_training_samples(self, batch, ni):
+ """Plots training samples during YOLOv5 training."""
pass
def plot_training_labels(self):
+ """Plots training labels for YOLO model."""
pass
def save_metrics(self, metrics):
+ """Saves training metrics to a CSV file."""
keys, vals = list(metrics.keys()), list(metrics.values())
n = len(metrics) + 1 # number of cols
s = '' if self.csv.exists() else (('%23s,' * n % tuple(['epoch'] + keys)).rstrip(',') + '\n') # header
@@ -512,9 +524,11 @@ class BaseTrainer:
f.write(s + ('%23.5g,' * n % tuple([self.epoch] + vals)).rstrip(',') + '\n')
def plot_metrics(self):
+ """Plot and display metrics visually."""
pass
def final_eval(self):
+ """Performs final evaluation and validation for object detection YOLO model."""
for f in self.last, self.best:
if f.exists():
strip_optimizer(f) # strip optimizers
@@ -525,6 +539,7 @@ class BaseTrainer:
self.run_callbacks('on_fit_epoch_end')
def check_resume(self):
+ """Check if resume checkpoint exists and update arguments accordingly."""
resume = self.args.resume
if resume:
try:
@@ -539,6 +554,7 @@ class BaseTrainer:
self.resume = resume
def resume_training(self, ckpt):
+ """Resume YOLO training from given epoch and best fitness."""
if ckpt is None:
return
best_fitness = 0.0
diff --git a/ultralytics/yolo/engine/validator.py b/ultralytics/yolo/engine/validator.py
index 81263b3..96c4086 100644
--- a/ultralytics/yolo/engine/validator.py
+++ b/ultralytics/yolo/engine/validator.py
@@ -195,58 +195,72 @@ class BaseValidator:
return stats
def add_callback(self, event: str, callback):
- """
- Appends the given callback.
- """
+ """Appends the given callback."""
self.callbacks[event].append(callback)
def run_callbacks(self, event: str):
+ """Runs all callbacks associated with a specified event."""
for callback in self.callbacks.get(event, []):
callback(self)
def get_dataloader(self, dataset_path, batch_size):
+ """Get data loader from dataset path and batch size."""
raise NotImplementedError('get_dataloader function not implemented for this validator')
def preprocess(self, batch):
+ """Preprocesses an input batch."""
return batch
def postprocess(self, preds):
+ """Describes and summarizes the purpose of 'postprocess()' but no details mentioned."""
return preds
def init_metrics(self, model):
+ """Initialize performance metrics for the YOLO model."""
pass
def update_metrics(self, preds, batch):
+ """Updates metrics based on predictions and batch."""
pass
def finalize_metrics(self, *args, **kwargs):
+ """Finalizes and returns all metrics."""
pass
def get_stats(self):
+ """Returns statistics about the model's performance."""
return {}
def check_stats(self, stats):
+ """Checks statistics."""
pass
def print_results(self):
+ """Prints the results of the model's predictions."""
pass
def get_desc(self):
+ """Get description of the YOLO model."""
pass
@property
def metric_keys(self):
+ """Returns the metric keys used in YOLO training/validation."""
return []
# TODO: may need to put these following functions into callback
def plot_val_samples(self, batch, ni):
+ """Plots validation samples during training."""
pass
def plot_predictions(self, batch, preds, ni):
+ """Plots YOLO model predictions on batch images."""
pass
def pred_to_json(self, preds, batch):
+ """Convert predictions to JSON format."""
pass
def eval_json(self, stats):
+ """Evaluate and return JSON format of prediction statistics."""
pass
diff --git a/ultralytics/yolo/utils/__init__.py b/ultralytics/yolo/utils/__init__.py
index 8fa28d2..94563f8 100644
--- a/ultralytics/yolo/utils/__init__.py
+++ b/ultralytics/yolo/utils/__init__.py
@@ -182,8 +182,10 @@ def plt_settings(rcparams={'font.size': 11}, backend='Agg'):
"""
def decorator(func):
+ """Decorator to apply temporary rc parameters and backend to a function."""
def wrapper(*args, **kwargs):
+ """Sets rc parameters and backend, calls the original function, and restores the settings."""
original_backend = plt.get_backend()
plt.switch_backend(backend)
@@ -229,6 +231,7 @@ class EmojiFilter(logging.Filter):
"""
def filter(self, record):
+ """Filter logs by emoji unicode characters on windows."""
record.msg = emojis(record.msg)
return super().filter(record)
@@ -573,13 +576,16 @@ class TryExcept(contextlib.ContextDecorator):
"""YOLOv8 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager."""
def __init__(self, msg='', verbose=True):
+ """Initialize TryExcept class with optional message and verbosity settings."""
self.msg = msg
self.verbose = verbose
def __enter__(self):
+ """Executes when entering TryExcept context, initializes instance."""
pass
def __exit__(self, exc_type, value, traceback):
+ """Defines behavior when exiting a 'with' block, prints error message if necessary."""
if self.verbose and value:
print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}"))
return True
@@ -589,6 +595,7 @@ def threaded(func):
"""Multi-threads a target function and returns thread. Usage: @threaded decorator."""
def wrapper(*args, **kwargs):
+ """Multi-threads a given function and returns the thread."""
thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True)
thread.start()
return thread
@@ -602,6 +609,7 @@ def set_sentry():
"""
def before_send(event, hint):
+ """A function executed before sending the event to Sentry."""
if 'exc_info' in hint:
exc_type, exc_value, tb = hint['exc_info']
if exc_type in (KeyboardInterrupt, FileNotFoundError) \
@@ -698,6 +706,7 @@ def set_settings(kwargs, file=SETTINGS_YAML):
def deprecation_warn(arg, new_arg, version=None):
+ """Issue a deprecation warning when a deprecated argument is used, suggesting an updated argument."""
if not version:
version = float(__version__[:3]) + 0.2 # deprecate after 2nd major release
LOGGER.warning(f"WARNING ⚠️ '{arg}' is deprecated and will be removed in 'ultralytics {version}' in the future. "
diff --git a/ultralytics/yolo/utils/benchmarks.py b/ultralytics/yolo/utils/benchmarks.py
index 0660343..05c8fef 100644
--- a/ultralytics/yolo/utils/benchmarks.py
+++ b/ultralytics/yolo/utils/benchmarks.py
@@ -35,7 +35,30 @@ from ultralytics.yolo.utils.files import file_size
from ultralytics.yolo.utils.torch_utils import select_device
-def benchmark(model=Path(SETTINGS['weights_dir']) / 'yolov8n.pt', imgsz=160, half=False, device='cpu', hard_fail=False):
+def benchmark(model=Path(SETTINGS['weights_dir']) / 'yolov8n.pt',
+ imgsz=160,
+ half=False,
+ int8=False,
+ device='cpu',
+ hard_fail=False):
+ """
+ Benchmark a YOLO model across different formats for speed and accuracy.
+
+ Args:
+ model (Union[str, Path], optional): Path to the model file or directory. Default is
+ Path(SETTINGS['weights_dir']) / 'yolov8n.pt'.
+ imgsz (int, optional): Image size for the benchmark. Default is 160.
+ half (bool, optional): Use half-precision for the model if True. Default is False.
+ int8 (bool, optional): Use int8-precision for the model if True. Default is False.
+ device (str, optional): Device to run the benchmark on, either 'cpu' or 'cuda'. Default is 'cpu'.
+ hard_fail (Union[bool, float], optional): If True or a float, assert benchmarks pass with given metric.
+ Default is False.
+
+ Returns:
+ df (pandas.DataFrame): A pandas DataFrame with benchmark results for each format, including file size,
+ metric, and inference time.
+ """
+
import pandas as pd
pd.options.display.max_columns = 10
pd.options.display.width = 120
@@ -61,7 +84,7 @@ def benchmark(model=Path(SETTINGS['weights_dir']) / 'yolov8n.pt', imgsz=160, hal
filename = model.ckpt_path or model.cfg
export = model # PyTorch format
else:
- filename = model.export(imgsz=imgsz, format=format, half=half, device=device) # all others
+ filename = model.export(imgsz=imgsz, format=format, half=half, int8=int8, device=device) # all others
export = YOLO(filename, task=model.task)
assert suffix in str(filename), 'export failed'
emoji = '❎' # indicates export succeeded
@@ -83,7 +106,14 @@ def benchmark(model=Path(SETTINGS['weights_dir']) / 'yolov8n.pt', imgsz=160, hal
elif model.task == 'pose':
data, key = 'coco8-pose.yaml', 'metrics/mAP50-95(P)'
- results = export.val(data=data, batch=1, imgsz=imgsz, plots=False, device=device, half=half, verbose=False)
+ results = export.val(data=data,
+ batch=1,
+ imgsz=imgsz,
+ plots=False,
+ device=device,
+ half=half,
+ int8=int8,
+ verbose=False)
metric, speed = results.results_dict[key], results.speed['inference']
y.append([name, '✅', round(file_size(filename), 1), round(metric, 4), round(speed, 2)])
except Exception as e:
diff --git a/ultralytics/yolo/utils/callbacks/base.py b/ultralytics/yolo/utils/callbacks/base.py
index a2abc4b..4cf06c7 100644
--- a/ultralytics/yolo/utils/callbacks/base.py
+++ b/ultralytics/yolo/utils/callbacks/base.py
@@ -2,111 +2,144 @@
"""
Base callbacks
"""
+
from collections import defaultdict
from copy import deepcopy
-
# Trainer callbacks ----------------------------------------------------------------------------------------------------
+
+
def on_pretrain_routine_start(trainer):
+ """Called before the pretraining routine starts."""
pass
def on_pretrain_routine_end(trainer):
+ """Called after the pretraining routine ends."""
pass
def on_train_start(trainer):
+ """Called when the training starts."""
pass
def on_train_epoch_start(trainer):
+ """Called at the start of each training epoch."""
pass
def on_train_batch_start(trainer):
+ """Called at the start of each training batch."""
pass
def optimizer_step(trainer):
+ """Called when the optimizer takes a step."""
pass
def on_before_zero_grad(trainer):
+ """Called before the gradients are set to zero."""
pass
def on_train_batch_end(trainer):
+ """Called at the end of each training batch."""
pass
def on_train_epoch_end(trainer):
+ """Called at the end of each training epoch."""
pass
def on_fit_epoch_end(trainer):
+ """Called at the end of each fit epoch (train + val)."""
pass
def on_model_save(trainer):
+ """Called when the model is saved."""
pass
def on_train_end(trainer):
+ """Called when the training ends."""
pass
def on_params_update(trainer):
+ """Called when the model parameters are updated."""
pass
def teardown(trainer):
+ """Called during the teardown of the training process."""
pass
# Validator callbacks --------------------------------------------------------------------------------------------------
+
+
def on_val_start(validator):
+ """Called when the validation starts."""
pass
def on_val_batch_start(validator):
+ """Called at the start of each validation batch."""
pass
def on_val_batch_end(validator):
+ """Called at the end of each validation batch."""
pass
def on_val_end(validator):
+ """Called when the validation ends."""
pass
# Predictor callbacks --------------------------------------------------------------------------------------------------
+
+
def on_predict_start(predictor):
+ """Called when the prediction starts."""
pass
def on_predict_batch_start(predictor):
+ """Called at the start of each prediction batch."""
pass
def on_predict_batch_end(predictor):
+ """Called at the end of each prediction batch."""
pass
def on_predict_postprocess_end(predictor):
+ """Called after the post-processing of the prediction ends."""
pass
def on_predict_end(predictor):
+ """Called when the prediction ends."""
pass
# Exporter callbacks ---------------------------------------------------------------------------------------------------
+
+
def on_export_start(exporter):
+ """Called when the model export starts."""
pass
def on_export_end(exporter):
+ """Called when the model export ends."""
pass
@@ -146,10 +179,23 @@ default_callbacks = {
def get_default_callbacks():
+ """
+ Return a copy of the default_callbacks dictionary with lists as default values.
+
+ Returns:
+ (defaultdict): A defaultdict with keys from default_callbacks and empty lists as default values.
+ """
return defaultdict(list, deepcopy(default_callbacks))
def add_integration_callbacks(instance):
+ """
+ Add integration callbacks from various sources to the instance's callbacks.
+
+ Args:
+ instance (Trainer, Predictor, Validator, Exporter): An object with a 'callbacks' attribute that is a dictionary
+ of callback lists.
+ """
from .clearml import callbacks as clearml_callbacks
from .comet import callbacks as comet_callbacks
from .hub import callbacks as hub_callbacks
diff --git a/ultralytics/yolo/utils/callbacks/clearml.py b/ultralytics/yolo/utils/callbacks/clearml.py
index dd0ad6d..f188649 100644
--- a/ultralytics/yolo/utils/callbacks/clearml.py
+++ b/ultralytics/yolo/utils/callbacks/clearml.py
@@ -59,6 +59,7 @@ def _log_plot(title, plot_path) -> None:
def on_pretrain_routine_start(trainer):
+ """Runs at start of pretraining routine; initializes and connects/ logs task to ClearML."""
try:
task = Task.current_task()
if task:
@@ -83,11 +84,13 @@ def on_pretrain_routine_start(trainer):
def on_train_epoch_end(trainer):
+ """Logs debug samples for the first epoch of YOLO training."""
if trainer.epoch == 1 and Task.current_task():
_log_debug_samples(sorted(trainer.save_dir.glob('train_batch*.jpg')), 'Mosaic')
def on_fit_epoch_end(trainer):
+ """Reports model information to logger at the end of an epoch."""
task = Task.current_task()
if task:
# You should have access to the validation bboxes under jdict
@@ -105,12 +108,14 @@ def on_fit_epoch_end(trainer):
def on_val_end(validator):
+ """Logs validation results including labels and predictions."""
if Task.current_task():
# Log val_labels and val_pred
_log_debug_samples(sorted(validator.save_dir.glob('val*.jpg')), 'Validation')
def on_train_end(trainer):
+ """Logs final model and its name on training completion."""
task = Task.current_task()
if task:
# Log final results, CM matrix + PR plots
diff --git a/ultralytics/yolo/utils/callbacks/comet.py b/ultralytics/yolo/utils/callbacks/comet.py
index 8f8de08..2d55df1 100644
--- a/ultralytics/yolo/utils/callbacks/comet.py
+++ b/ultralytics/yolo/utils/callbacks/comet.py
@@ -36,6 +36,7 @@ _comet_image_prediction_count = 0
def _get_experiment_type(mode, project_name):
+ """Return an experiment based on mode and project name."""
if mode == 'offline':
return comet_ml.OfflineExperiment(project_name=project_name)
@@ -61,6 +62,7 @@ def _create_experiment(args):
def _fetch_trainer_metadata(trainer):
+ """Returns metadata for YOLO training including epoch and asset saving status."""
curr_epoch = trainer.epoch + 1
train_num_steps_per_epoch = len(trainer.train_loader.dataset) // trainer.batch_size
@@ -97,6 +99,7 @@ def _scale_bounding_box_to_original_image_shape(box, resized_image_shape, origin
def _format_ground_truth_annotations_for_detection(img_idx, image_path, batch, class_name_map=None):
+ """Format ground truth annotations for detection."""
indices = batch['batch_idx'] == img_idx
bboxes = batch['bboxes'][indices]
if len(bboxes) == 0:
@@ -120,6 +123,7 @@ def _format_ground_truth_annotations_for_detection(img_idx, image_path, batch, c
def _format_prediction_annotations_for_detection(image_path, metadata, class_label_map=None):
+ """Format YOLO predictions for object detection visualization."""
stem = image_path.stem
image_id = int(stem) if stem.isnumeric() else stem
@@ -142,6 +146,7 @@ def _format_prediction_annotations_for_detection(image_path, metadata, class_lab
def _fetch_annotations(img_idx, image_path, batch, prediction_metadata_map, class_label_map):
+ """Join the ground truth and prediction annotations if they exist."""
ground_truth_annotations = _format_ground_truth_annotations_for_detection(img_idx, image_path, batch,
class_label_map)
prediction_annotations = _format_prediction_annotations_for_detection(image_path, prediction_metadata_map,
@@ -153,6 +158,7 @@ def _fetch_annotations(img_idx, image_path, batch, prediction_metadata_map, clas
def _create_prediction_metadata_map(model_predictions):
+ """Create metadata map for model predictions by groupings them based on image ID."""
pred_metadata_map = {}
for prediction in model_predictions:
pred_metadata_map.setdefault(prediction['image_id'], [])
@@ -162,6 +168,7 @@ def _create_prediction_metadata_map(model_predictions):
def _log_confusion_matrix(experiment, trainer, curr_step, curr_epoch):
+ """Log the confusion matrix to Weights and Biases experiment."""
conf_mat = trainer.validator.confusion_matrix.matrix
names = list(trainer.data['names'].values()) + ['background']
experiment.log_confusion_matrix(
@@ -174,6 +181,7 @@ def _log_confusion_matrix(experiment, trainer, curr_step, curr_epoch):
def _log_images(experiment, image_paths, curr_step, annotations=None):
+ """Logs images to the experiment with optional annotations."""
if annotations:
for image_path, annotation in zip(image_paths, annotations):
experiment.log_image(image_path, name=image_path.stem, step=curr_step, annotations=annotation)
@@ -184,6 +192,7 @@ def _log_images(experiment, image_paths, curr_step, annotations=None):
def _log_image_predictions(experiment, validator, curr_step):
+ """Logs predicted boxes for a single image during training."""
global _comet_image_prediction_count
task = validator.args.task
@@ -225,6 +234,7 @@ def _log_image_predictions(experiment, validator, curr_step):
def _log_plots(experiment, trainer):
+ """Logs evaluation plots and label plots for the experiment."""
plot_filenames = [trainer.save_dir / f'{plots}.png' for plots in EVALUATION_PLOT_NAMES]
_log_images(experiment, plot_filenames, None)
@@ -233,6 +243,7 @@ def _log_plots(experiment, trainer):
def _log_model(experiment, trainer):
+ """Log the best-trained model to Comet.ml."""
experiment.log_model(
COMET_MODEL_NAME,
file_or_folder=str(trainer.best),
@@ -242,12 +253,14 @@ def _log_model(experiment, trainer):
def on_pretrain_routine_start(trainer):
+ """Creates or resumes a CometML experiment at the start of a YOLO pre-training routine."""
experiment = comet_ml.get_global_experiment()
if not experiment:
_create_experiment(trainer.args)
def on_train_epoch_end(trainer):
+ """Log metrics and save batch images at the end of training epochs."""
experiment = comet_ml.get_global_experiment()
if not experiment:
return
@@ -267,6 +280,7 @@ def on_train_epoch_end(trainer):
def on_fit_epoch_end(trainer):
+ """Logs model assets at the end of each epoch."""
experiment = comet_ml.get_global_experiment()
if not experiment:
return
@@ -296,6 +310,7 @@ def on_fit_epoch_end(trainer):
def on_train_end(trainer):
+ """Perform operations at the end of training."""
experiment = comet_ml.get_global_experiment()
if not experiment:
return
diff --git a/ultralytics/yolo/utils/callbacks/hub.py b/ultralytics/yolo/utils/callbacks/hub.py
index 4a0c2a8..f467dae 100644
--- a/ultralytics/yolo/utils/callbacks/hub.py
+++ b/ultralytics/yolo/utils/callbacks/hub.py
@@ -9,6 +9,7 @@ from ultralytics.yolo.utils.torch_utils import get_flops, get_num_params
def on_pretrain_routine_end(trainer):
+ """Logs info before starting timer for upload rate limit."""
session = getattr(trainer, 'hub_session', None)
if session:
# Start timer for upload rate limit
@@ -17,6 +18,7 @@ def on_pretrain_routine_end(trainer):
def on_fit_epoch_end(trainer):
+ """Uploads training progress metrics at the end of each epoch."""
session = getattr(trainer, 'hub_session', None)
if session:
# Upload metrics after val end
@@ -35,6 +37,7 @@ def on_fit_epoch_end(trainer):
def on_model_save(trainer):
+ """Saves checkpoints to Ultralytics HUB with rate limiting."""
session = getattr(trainer, 'hub_session', None)
if session:
# Upload checkpoints with rate limiting
@@ -46,6 +49,7 @@ def on_model_save(trainer):
def on_train_end(trainer):
+ """Upload final model and metrics to Ultralytics HUB at the end of training."""
session = getattr(trainer, 'hub_session', None)
if session:
# Upload final model and metrics with exponential standoff
@@ -57,18 +61,22 @@ def on_train_end(trainer):
def on_train_start(trainer):
+ """Run traces on train start."""
traces(trainer.args, traces_sample_rate=1.0)
def on_val_start(validator):
+ """Runs traces on validation start."""
traces(validator.args, traces_sample_rate=1.0)
def on_predict_start(predictor):
+ """Run traces on predict start."""
traces(predictor.args, traces_sample_rate=1.0)
def on_export_start(exporter):
+ """Run traces on export start."""
traces(exporter.args, traces_sample_rate=1.0)
diff --git a/ultralytics/yolo/utils/callbacks/mlflow.py b/ultralytics/yolo/utils/callbacks/mlflow.py
index c71611d..36d092d 100644
--- a/ultralytics/yolo/utils/callbacks/mlflow.py
+++ b/ultralytics/yolo/utils/callbacks/mlflow.py
@@ -16,6 +16,7 @@ except (ImportError, AssertionError):
def on_pretrain_routine_end(trainer):
+ """Logs training parameters to MLflow."""
global mlflow, run, run_id, experiment_name
if os.environ.get('MLFLOW_TRACKING_URI') is None:
@@ -45,17 +46,20 @@ def on_pretrain_routine_end(trainer):
def on_fit_epoch_end(trainer):
+ """Logs training metrics to Mlflow."""
if mlflow:
metrics_dict = {f"{re.sub('[()]', '', k)}": float(v) for k, v in trainer.metrics.items()}
run.log_metrics(metrics=metrics_dict, step=trainer.epoch)
def on_model_save(trainer):
+ """Logs model and metrics to mlflow on save."""
if mlflow:
run.log_artifact(trainer.last)
def on_train_end(trainer):
+ """Called at end of train loop to log model artifact info."""
if mlflow:
root_dir = Path(__file__).resolve().parents[3]
run.log_artifact(trainer.best)
diff --git a/ultralytics/yolo/utils/callbacks/raytune.py b/ultralytics/yolo/utils/callbacks/raytune.py
index a57b4f4..1fff729 100644
--- a/ultralytics/yolo/utils/callbacks/raytune.py
+++ b/ultralytics/yolo/utils/callbacks/raytune.py
@@ -7,6 +7,7 @@ except (ImportError, AssertionError):
def on_fit_epoch_end(trainer):
+ """Sends training metrics to Ray Tune at end of each epoch."""
if ray.tune.is_session_enabled():
metrics = trainer.metrics
metrics['epoch'] = trainer.epoch
diff --git a/ultralytics/yolo/utils/callbacks/tensorboard.py b/ultralytics/yolo/utils/callbacks/tensorboard.py
index 90b4382..8c14dcb 100644
--- a/ultralytics/yolo/utils/callbacks/tensorboard.py
+++ b/ultralytics/yolo/utils/callbacks/tensorboard.py
@@ -12,12 +12,14 @@ writer = None # TensorBoard SummaryWriter instance
def _log_scalars(scalars, step=0):
+ """Logs scalar values to TensorBoard."""
if writer:
for k, v in scalars.items():
writer.add_scalar(k, v, step)
def on_pretrain_routine_start(trainer):
+ """Initialize TensorBoard logging with SummaryWriter."""
if SummaryWriter:
try:
global writer
@@ -29,10 +31,12 @@ def on_pretrain_routine_start(trainer):
def on_batch_end(trainer):
+ """Logs scalar statistics at the end of a training batch."""
_log_scalars(trainer.label_loss_items(trainer.tloss, prefix='train'), trainer.epoch + 1)
def on_fit_epoch_end(trainer):
+ """Logs epoch metrics at end of training epoch."""
_log_scalars(trainer.metrics, trainer.epoch + 1)
diff --git a/ultralytics/yolo/utils/callbacks/wb.py b/ultralytics/yolo/utils/callbacks/wb.py
index c62c4f8..f8776cd 100644
--- a/ultralytics/yolo/utils/callbacks/wb.py
+++ b/ultralytics/yolo/utils/callbacks/wb.py
@@ -11,11 +11,13 @@ except (ImportError, AssertionError):
def on_pretrain_routine_start(trainer):
+ """Initiate and start project if module is present."""
wb.init(project=trainer.args.project or 'YOLOv8', name=trainer.args.name, config=vars(
trainer.args)) if not wb.run else wb.run
def on_fit_epoch_end(trainer):
+ """Logs training metrics and model information at the end of an epoch."""
wb.run.log(trainer.metrics, step=trainer.epoch + 1)
if trainer.epoch == 0:
model_info = {
@@ -26,6 +28,7 @@ def on_fit_epoch_end(trainer):
def on_train_epoch_end(trainer):
+ """Log metrics and save images at the end of each training epoch."""
wb.run.log(trainer.label_loss_items(trainer.tloss, prefix='train'), step=trainer.epoch + 1)
wb.run.log(trainer.lr, step=trainer.epoch + 1)
if trainer.epoch == 1:
@@ -35,6 +38,7 @@ def on_train_epoch_end(trainer):
def on_train_end(trainer):
+ """Save the best model as an artifact at end of training."""
art = wb.Artifact(type='model', name=f'run_{wb.run.id}_model')
if trainer.best.exists():
art.add_file(trainer.best)
diff --git a/ultralytics/yolo/utils/checks.py b/ultralytics/yolo/utils/checks.py
index a40f83a..23738ff 100644
--- a/ultralytics/yolo/utils/checks.py
+++ b/ultralytics/yolo/utils/checks.py
@@ -295,7 +295,7 @@ def check_file(file, suffix='', download=True, hard=True):
def check_yaml(file, suffix=('.yaml', '.yml'), hard=True):
- # Search/download YAML file (if necessary) and return path, checking suffix
+ """Search/download YAML file (if necessary) and return path, checking suffix."""
return check_file(file, suffix, hard=hard)
@@ -315,6 +315,7 @@ def check_imshow(warn=False):
def check_yolo(verbose=True, device=''):
+ """Return a human-readable YOLO software and hardware summary."""
from ultralytics.yolo.utils.torch_utils import select_device
if is_colab():
diff --git a/ultralytics/yolo/utils/dist.py b/ultralytics/yolo/utils/dist.py
index e8ba05b..edd484a 100644
--- a/ultralytics/yolo/utils/dist.py
+++ b/ultralytics/yolo/utils/dist.py
@@ -24,6 +24,7 @@ def find_free_network_port() -> int:
def generate_ddp_file(trainer):
+ """Generates a DDP file and returns its file name."""
module, name = f'{trainer.__class__.__module__}.{trainer.__class__.__name__}'.rsplit('.', 1)
content = f'''cfg = {vars(trainer.args)} \nif __name__ == "__main__":
@@ -43,6 +44,7 @@ def generate_ddp_file(trainer):
def generate_ddp_command(world_size, trainer):
+ """Generates and returns command for distributed training."""
import __main__ # noqa local import to avoid https://github.com/Lightning-AI/lightning/issues/15218
if not trainer.resume:
shutil.rmtree(trainer.save_dir) # remove the save_dir
diff --git a/ultralytics/yolo/utils/downloads.py b/ultralytics/yolo/utils/downloads.py
index 0831b01..a60a74f 100644
--- a/ultralytics/yolo/utils/downloads.py
+++ b/ultralytics/yolo/utils/downloads.py
@@ -192,7 +192,7 @@ def attempt_download_asset(file, repo='ultralytics/assets', release='v0.0.0'):
def download(url, dir=Path.cwd(), unzip=True, delete=False, curl=False, threads=1, retry=3):
- # Multithreaded file download and unzip function, used in data.yaml for autodownload
+ """Downloads and unzips files concurrently if threads > 1, else sequentially."""
dir = Path(dir)
dir.mkdir(parents=True, exist_ok=True) # make directory
if threads > 1:
diff --git a/ultralytics/yolo/utils/errors.py b/ultralytics/yolo/utils/errors.py
index 9d3cff5..7163d4d 100644
--- a/ultralytics/yolo/utils/errors.py
+++ b/ultralytics/yolo/utils/errors.py
@@ -6,4 +6,5 @@ from ultralytics.yolo.utils import emojis
class HUBModelError(Exception):
def __init__(self, message='Model not found. Please check model URL and try again.'):
+ """Create an exception for when a model is not found."""
super().__init__(emojis(message))
diff --git a/ultralytics/yolo/utils/files.py b/ultralytics/yolo/utils/files.py
index cece05c..0f9b7e6 100644
--- a/ultralytics/yolo/utils/files.py
+++ b/ultralytics/yolo/utils/files.py
@@ -11,13 +11,16 @@ class WorkingDirectory(contextlib.ContextDecorator):
"""Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager."""
def __init__(self, new_dir):
+ """Sets the working directory to 'new_dir' upon instantiation."""
self.dir = new_dir # new dir
self.cwd = Path.cwd().resolve() # current dir
def __enter__(self):
+ """Changes the current directory to the specified directory."""
os.chdir(self.dir)
def __exit__(self, exc_type, exc_val, exc_tb):
+ """Restore the current working directory on context exit."""
os.chdir(self.cwd)
diff --git a/ultralytics/yolo/utils/instance.py b/ultralytics/yolo/utils/instance.py
index d150086..9f74871 100644
--- a/ultralytics/yolo/utils/instance.py
+++ b/ultralytics/yolo/utils/instance.py
@@ -14,6 +14,7 @@ def _ntuple(n):
"""From PyTorch internals."""
def parse(x):
+ """Parse bounding boxes format between XYWH and LTWH."""
return x if isinstance(x, abc.Iterable) else tuple(repeat(x, n))
return parse
@@ -64,6 +65,7 @@ class Bboxes:
# return Bboxes(bboxes, format)
def convert(self, format):
+ """Converts bounding box format from one type to another."""
assert format in _formats
if self.format == format:
return
@@ -77,6 +79,7 @@ class Bboxes:
self.format = format
def areas(self):
+ """Return box areas."""
self.convert('xyxy')
return (self.bboxes[:, 2] - self.bboxes[:, 0]) * (self.bboxes[:, 3] - self.bboxes[:, 1])
@@ -125,6 +128,7 @@ class Bboxes:
self.bboxes[:, 3] += offset[3]
def __len__(self):
+ """Return the number of boxes."""
return len(self.bboxes)
@classmethod
@@ -202,9 +206,11 @@ class Instances:
self.segments = segments
def convert_bbox(self, format):
+ """Convert bounding box format."""
self._bboxes.convert(format=format)
def bbox_areas(self):
+ """Calculate the area of bounding boxes."""
self._bboxes.areas()
def scale(self, scale_w, scale_h, bbox_only=False):
@@ -219,6 +225,7 @@ class Instances:
self.keypoints[..., 1] *= scale_h
def denormalize(self, w, h):
+ """Denormalizes boxes, segments, and keypoints from normalized coordinates."""
if not self.normalized:
return
self._bboxes.mul(scale=(w, h, w, h))
@@ -230,6 +237,7 @@ class Instances:
self.normalized = False
def normalize(self, w, h):
+ """Normalize bounding boxes, segments, and keypoints to image dimensions."""
if self.normalized:
return
self._bboxes.mul(scale=(1 / w, 1 / h, 1 / w, 1 / h))
@@ -279,6 +287,7 @@ class Instances:
)
def flipud(self, h):
+ """Flips the coordinates of bounding boxes, segments, and keypoints vertically."""
if self._bboxes.format == 'xyxy':
y1 = self.bboxes[:, 1].copy()
y2 = self.bboxes[:, 3].copy()
@@ -291,6 +300,7 @@ class Instances:
self.keypoints[..., 1] = h - self.keypoints[..., 1]
def fliplr(self, w):
+ """Reverses the order of the bounding boxes and segments horizontally."""
if self._bboxes.format == 'xyxy':
x1 = self.bboxes[:, 0].copy()
x2 = self.bboxes[:, 2].copy()
@@ -303,6 +313,7 @@ class Instances:
self.keypoints[..., 0] = w - self.keypoints[..., 0]
def clip(self, w, h):
+ """Clips bounding boxes, segments, and keypoints values to stay within image boundaries."""
ori_format = self._bboxes.format
self.convert_bbox(format='xyxy')
self.bboxes[:, [0, 2]] = self.bboxes[:, [0, 2]].clip(0, w)
@@ -316,6 +327,7 @@ class Instances:
self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h)
def update(self, bboxes, segments=None, keypoints=None):
+ """Updates instance variables."""
new_bboxes = Bboxes(bboxes, format=self._bboxes.format)
self._bboxes = new_bboxes
if segments is not None:
@@ -324,6 +336,7 @@ class Instances:
self.keypoints = keypoints
def __len__(self):
+ """Return the length of the instance list."""
return len(self.bboxes)
@classmethod
@@ -363,4 +376,5 @@ class Instances:
@property
def bboxes(self):
+ """Return bounding boxes."""
return self._bboxes.bboxes
diff --git a/ultralytics/yolo/utils/loss.py b/ultralytics/yolo/utils/loss.py
index 60deed4..52bfaf9 100644
--- a/ultralytics/yolo/utils/loss.py
+++ b/ultralytics/yolo/utils/loss.py
@@ -12,9 +12,11 @@ class VarifocalLoss(nn.Module):
"""Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367."""
def __init__(self):
+ """Initialize the VarifocalLoss class."""
super().__init__()
def forward(self, pred_score, gt_score, label, alpha=0.75, gamma=2.0):
+ """Computes varfocal loss."""
weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label
with torch.cuda.amp.autocast(enabled=False):
loss = (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), reduction='none') *
@@ -25,6 +27,7 @@ class VarifocalLoss(nn.Module):
class BboxLoss(nn.Module):
def __init__(self, reg_max, use_dfl=False):
+ """Initialize the BboxLoss module with regularization maximum and DFL settings."""
super().__init__()
self.reg_max = reg_max
self.use_dfl = use_dfl
@@ -64,6 +67,7 @@ class KeypointLoss(nn.Module):
self.sigmas = sigmas
def forward(self, pred_kpts, gt_kpts, kpt_mask, area):
+ """Calculates keypoint loss factor and Euclidean distance loss for predicted and actual keypoints."""
d = (pred_kpts[..., 0] - gt_kpts[..., 0]) ** 2 + (pred_kpts[..., 1] - gt_kpts[..., 1]) ** 2
kpt_loss_factor = (torch.sum(kpt_mask != 0) + torch.sum(kpt_mask == 0)) / (torch.sum(kpt_mask != 0) + 1e-9)
# e = d / (2 * (area * self.sigmas) ** 2 + 1e-9) # from formula
diff --git a/ultralytics/yolo/utils/metrics.py b/ultralytics/yolo/utils/metrics.py
index 9201a38..70c5fd2 100644
--- a/ultralytics/yolo/utils/metrics.py
+++ b/ultralytics/yolo/utils/metrics.py
@@ -180,6 +180,7 @@ class FocalLoss(nn.Module):
"""Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)."""
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
+ """Initialize FocalLoss object with given loss function and hyperparameters."""
super().__init__()
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
self.gamma = gamma
@@ -188,6 +189,7 @@ class FocalLoss(nn.Module):
self.loss_fcn.reduction = 'none' # required to apply FL to each element
def forward(self, pred, true):
+ """Calculates and updates confusion matrix for object detection/classification tasks."""
loss = self.loss_fcn(pred, true)
# p_t = torch.exp(-loss)
# loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability
@@ -220,6 +222,7 @@ class ConfusionMatrix:
"""
def __init__(self, nc, conf=0.25, iou_thres=0.45, task='detect'):
+ """Initialize attributes for the YOLO model."""
self.task = task
self.matrix = np.zeros((nc + 1, nc + 1)) if self.task == 'detect' else np.zeros((nc, nc))
self.nc = nc # number of classes
@@ -285,9 +288,11 @@ class ConfusionMatrix:
self.matrix[dc, self.nc] += 1 # predicted background
def matrix(self):
+ """Returns the confusion matrix."""
return self.matrix
def tp_fp(self):
+ """Returns true positives and false positives."""
tp = self.matrix.diagonal() # true positives
fp = self.matrix.sum(1) - tp # false positives
# fn = self.matrix.sum(0) - tp # false negatives (missed detections)
@@ -679,6 +684,7 @@ class DetMetrics(SimpleClass):
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
def process(self, tp, conf, pred_cls, target_cls):
+ """Process predicted results for object detection and update metrics."""
results = ap_per_class(tp, conf, pred_cls, target_cls, plot=self.plot, save_dir=self.save_dir,
names=self.names)[2:]
self.box.nc = len(self.names)
@@ -686,28 +692,35 @@ class DetMetrics(SimpleClass):
@property
def keys(self):
+ """Returns a list of keys for accessing specific metrics."""
return ['metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)']
def mean_results(self):
+ """Calculate mean of detected objects & return precision, recall, mAP50, and mAP50-95."""
return self.box.mean_results()
def class_result(self, i):
+ """Return the result of evaluating the performance of an object detection model on a specific class."""
return self.box.class_result(i)
@property
def maps(self):
+ """Returns mean Average Precision (mAP) scores per class."""
return self.box.maps
@property
def fitness(self):
+ """Returns the fitness of box object."""
return self.box.fitness()
@property
def ap_class_index(self):
+ """Returns the average precision index per class."""
return self.box.ap_class_index
@property
def results_dict(self):
+ """Returns dictionary of computed performance metrics and statistics."""
return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness]))
@@ -781,22 +794,27 @@ class SegmentMetrics(SimpleClass):
@property
def keys(self):
+ """Returns a list of keys for accessing metrics."""
return [
'metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)',
'metrics/precision(M)', 'metrics/recall(M)', 'metrics/mAP50(M)', 'metrics/mAP50-95(M)']
def mean_results(self):
+ """Return the mean metrics for bounding box and segmentation results."""
return self.box.mean_results() + self.seg.mean_results()
def class_result(self, i):
+ """Returns classification results for a specified class index."""
return self.box.class_result(i) + self.seg.class_result(i)
@property
def maps(self):
+ """Returns mAP scores for object detection and semantic segmentation models."""
return self.box.maps + self.seg.maps
@property
def fitness(self):
+ """Get the fitness score for both segmentation and bounding box models."""
return self.seg.fitness() + self.box.fitness()
@property
@@ -806,6 +824,7 @@ class SegmentMetrics(SimpleClass):
@property
def results_dict(self):
+ """Returns results of object detection model for evaluation."""
return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness]))
@@ -846,6 +865,7 @@ class PoseMetrics(SegmentMetrics):
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
def __getattr__(self, attr):
+ """Raises an AttributeError if an invalid attribute is accessed."""
name = self.__class__.__name__
raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}")
@@ -884,22 +904,27 @@ class PoseMetrics(SegmentMetrics):
@property
def keys(self):
+ """Returns list of evaluation metric keys."""
return [
'metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)',
'metrics/precision(P)', 'metrics/recall(P)', 'metrics/mAP50(P)', 'metrics/mAP50-95(P)']
def mean_results(self):
+ """Return the mean results of box and pose."""
return self.box.mean_results() + self.pose.mean_results()
def class_result(self, i):
+ """Return the class-wise detection results for a specific class i."""
return self.box.class_result(i) + self.pose.class_result(i)
@property
def maps(self):
+ """Returns the mean average precision (mAP) per class for both box and pose detections."""
return self.box.maps + self.pose.maps
@property
def fitness(self):
+ """Computes classification metrics and speed using the `targets` and `pred` inputs."""
return self.pose.fitness() + self.box.fitness()
@@ -935,12 +960,15 @@ class ClassifyMetrics(SimpleClass):
@property
def fitness(self):
+ """Returns top-5 accuracy as fitness score."""
return self.top5
@property
def results_dict(self):
+ """Returns a dictionary with model's performance metrics and fitness score."""
return dict(zip(self.keys + ['fitness'], [self.top1, self.top5, self.fitness]))
@property
def keys(self):
+ """Returns a list of keys for the results_dict property."""
return ['metrics/accuracy_top1', 'metrics/accuracy_top5']
diff --git a/ultralytics/yolo/utils/plotting.py b/ultralytics/yolo/utils/plotting.py
index 4361b35..774c3ce 100644
--- a/ultralytics/yolo/utils/plotting.py
+++ b/ultralytics/yolo/utils/plotting.py
@@ -33,6 +33,7 @@ class Colors:
dtype=np.uint8)
def __call__(self, i, bgr=False):
+ """Converts hex color codes to rgb values."""
c = self.palette[int(i) % self.n]
return (c[2], c[1], c[0]) if bgr else c
@@ -47,6 +48,7 @@ colors = Colors() # create instance for 'from utils.plots import colors'
class Annotator:
# YOLOv8 Annotator for train/val mosaics and jpgs and detect/hub inference annotations
def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'):
+ """Initialize the Annotator class with image and line width along with color palette for keypoints and limbs."""
assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.'
non_ascii = not is_ascii(example) # non-latin labels, i.e. asian, arabic, cyrillic
self.pil = pil or non_ascii
@@ -71,7 +73,7 @@ class Annotator:
self.kpt_color = colors.pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]]
def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
- # Add one xyxy box to image with label
+ """Add one xyxy box to image with label."""
if isinstance(box, torch.Tensor):
box = box.tolist()
if self.pil or not is_ascii(label):
@@ -191,7 +193,7 @@ class Annotator:
self.draw.rectangle(xy, fill, outline, width)
def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'):
- # Add text to image (PIL-only)
+ """Adds text to an image using PIL or cv2."""
if anchor == 'bottom': # start y from font bottom
w, h = self.font.getsize(text) # text width, height
xy[1] += 1 - h
@@ -214,6 +216,7 @@ class Annotator:
@TryExcept() # known issue https://github.com/ultralytics/yolov5/issues/5395
@plt_settings()
def plot_labels(boxes, cls, names=(), save_dir=Path('')):
+ """Save and plot image with no axis or spines."""
import pandas as pd
import seaborn as sn
@@ -260,7 +263,7 @@ def plot_labels(boxes, cls, names=(), save_dir=Path('')):
def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, BGR=False, save=True):
- # Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop
+ """Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop."""
b = xyxy2xywh(xyxy.view(-1, 4)) # boxes
if square:
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square
diff --git a/ultralytics/yolo/utils/tal.py b/ultralytics/yolo/utils/tal.py
index 4678e10..09868bd 100644
--- a/ultralytics/yolo/utils/tal.py
+++ b/ultralytics/yolo/utils/tal.py
@@ -69,6 +69,7 @@ class TaskAlignedAssigner(nn.Module):
"""
def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1e-9):
+ """Initialize a TaskAlignedAssigner object with customizable hyperparameters."""
super().__init__()
self.topk = topk
self.num_classes = num_classes
@@ -137,6 +138,7 @@ class TaskAlignedAssigner(nn.Module):
return mask_pos, align_metric, overlaps
def get_box_metrics(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, mask_gt):
+ """Compute alignment metric given predicted and ground truth bounding boxes."""
na = pd_bboxes.shape[-2]
mask_gt = mask_gt.bool() # b, max_num_obj, h*w
overlaps = torch.zeros([self.bs, self.n_max_boxes, na], dtype=pd_bboxes.dtype, device=pd_bboxes.device)
diff --git a/ultralytics/yolo/utils/torch_utils.py b/ultralytics/yolo/utils/torch_utils.py
index 84397fd..1ca3205 100644
--- a/ultralytics/yolo/utils/torch_utils.py
+++ b/ultralytics/yolo/utils/torch_utils.py
@@ -43,6 +43,7 @@ def smart_inference_mode():
"""Applies torch.inference_mode() decorator if torch>=1.9.0 else torch.no_grad() decorator."""
def decorate(fn):
+ """Applies appropriate torch decorator for inference mode based on torch version."""
return (torch.inference_mode if TORCH_1_9 else torch.no_grad)()(fn)
return decorate
@@ -232,7 +233,7 @@ def make_divisible(x, divisor):
def copy_attr(a, b, include=(), exclude=()):
- # Copy attributes from 'b' to 'a', options to only include [...] and to exclude [...]
+ """Copies attributes from object 'b' to object 'a', with options to include/exclude certain attributes."""
for k, v in b.__dict__.items():
if (len(include) and k not in include) or k.startswith('_') or k in exclude:
continue
@@ -246,7 +247,7 @@ def get_latest_opset():
def intersect_dicts(da, db, exclude=()):
- # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
+ """Returns a dictionary of intersecting keys with matching shapes, excluding 'exclude' keys, using da values."""
return {k: v for k, v in da.items() if k in db and all(x not in k for x in exclude) and v.shape == db[k].shape}
@@ -310,7 +311,7 @@ class ModelEMA:
# assert v.dtype == msd[k].dtype == torch.float32, f'{k}: EMA {v.dtype}, model {msd[k].dtype}'
def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
- # Update EMA attributes
+ """Updates attributes and saves stripped model with optimizer removed."""
if self.enabled:
copy_attr(self.ema, model, include, exclude)
diff --git a/ultralytics/yolo/v8/classify/predict.py b/ultralytics/yolo/v8/classify/predict.py
index 155491e..363448c 100644
--- a/ultralytics/yolo/v8/classify/predict.py
+++ b/ultralytics/yolo/v8/classify/predict.py
@@ -10,10 +10,12 @@ from ultralytics.yolo.utils import DEFAULT_CFG, ROOT
class ClassificationPredictor(BasePredictor):
def preprocess(self, img):
+ """Converts input image to model-compatible data type."""
img = (img if isinstance(img, torch.Tensor) else torch.from_numpy(img)).to(self.model.device)
return img.half() if self.model.fp16 else img.float() # uint8 to fp16/32
def postprocess(self, preds, img, orig_imgs):
+ """Postprocesses predictions to return Results objects."""
results = []
for i, pred in enumerate(preds):
orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs
@@ -25,6 +27,7 @@ class ClassificationPredictor(BasePredictor):
def predict(cfg=DEFAULT_CFG, use_python=False):
+ """Run YOLO model predictions on input images/videos."""
model = cfg.model or 'yolov8n-cls.pt' # or "resnet18"
source = cfg.source if cfg.source is not None else ROOT / 'assets' if (ROOT / 'assets').exists() \
else 'https://ultralytics.com/images/bus.jpg'
diff --git a/ultralytics/yolo/v8/classify/train.py b/ultralytics/yolo/v8/classify/train.py
index 66e257d..5c50b4f 100644
--- a/ultralytics/yolo/v8/classify/train.py
+++ b/ultralytics/yolo/v8/classify/train.py
@@ -14,15 +14,18 @@ from ultralytics.yolo.utils.torch_utils import is_parallel, strip_optimizer
class ClassificationTrainer(BaseTrainer):
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+ """Initialize a ClassificationTrainer object with optional configuration overrides and callbacks."""
if overrides is None:
overrides = {}
overrides['task'] = 'classify'
super().__init__(cfg, overrides, _callbacks)
def set_model_attributes(self):
+ """Set the YOLO model's class names from the loaded dataset."""
self.model.names = self.data['names']
def get_model(self, cfg=None, weights=None, verbose=True):
+ """Returns a modified PyTorch model configured for training YOLO."""
model = ClassificationModel(cfg, nc=self.data['nc'], verbose=verbose and RANK == -1)
if weights:
model.load(weights)
@@ -69,6 +72,7 @@ class ClassificationTrainer(BaseTrainer):
return # dont return ckpt. Classification doesn't support resume
def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'):
+ """Returns PyTorch DataLoader with transforms to preprocess images for inference."""
loader = build_classification_dataloader(path=dataset_path,
imgsz=self.args.imgsz,
batch_size=batch_size if mode == 'train' else (batch_size * 2),
@@ -84,19 +88,23 @@ class ClassificationTrainer(BaseTrainer):
return loader
def preprocess_batch(self, batch):
+ """Preprocesses a batch of images and classes."""
batch['img'] = batch['img'].to(self.device)
batch['cls'] = batch['cls'].to(self.device)
return batch
def progress_string(self):
+ """Returns a formatted string showing training progress."""
return ('\n' + '%11s' * (4 + len(self.loss_names))) % \
('Epoch', 'GPU_mem', *self.loss_names, 'Instances', 'Size')
def get_validator(self):
+ """Returns an instance of ClassificationValidator for validation."""
self.loss_names = ['loss']
return v8.classify.ClassificationValidator(self.test_loader, self.save_dir)
def criterion(self, preds, batch):
+ """Compute the classification loss between predictions and true labels."""
loss = torch.nn.functional.cross_entropy(preds, batch['cls'], reduction='sum') / self.args.nbs
loss_items = loss.detach()
return loss, loss_items
@@ -113,9 +121,11 @@ class ClassificationTrainer(BaseTrainer):
return dict(zip(keys, loss_items))
def resume_training(self, ckpt):
+ """Resumes training from a given checkpoint."""
pass
def final_eval(self):
+ """Evaluate trained model and save validation results."""
for f in self.last, self.best:
if f.exists():
strip_optimizer(f) # strip optimizers
@@ -130,6 +140,7 @@ class ClassificationTrainer(BaseTrainer):
def train(cfg=DEFAULT_CFG, use_python=False):
+ """Train the YOLO classification model."""
model = cfg.model or 'yolov8n-cls.pt' # or "resnet18"
data = cfg.data or 'mnist160' # or yolo.ClassificationDataset("mnist")
device = cfg.device if cfg.device is not None else ''
diff --git a/ultralytics/yolo/v8/classify/val.py b/ultralytics/yolo/v8/classify/val.py
index 6e3b212..6722dfc 100644
--- a/ultralytics/yolo/v8/classify/val.py
+++ b/ultralytics/yolo/v8/classify/val.py
@@ -9,14 +9,17 @@ from ultralytics.yolo.utils.metrics import ClassifyMetrics, ConfusionMatrix
class ClassificationValidator(BaseValidator):
def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
+ """Initializes ClassificationValidator instance with args, dataloader, save_dir, and progress bar."""
super().__init__(dataloader, save_dir, pbar, args, _callbacks)
self.args.task = 'classify'
self.metrics = ClassifyMetrics()
def get_desc(self):
+ """Returns a formatted string summarizing classification metrics."""
return ('%22s' + '%11s' * 2) % ('classes', 'top1_acc', 'top5_acc')
def init_metrics(self, model):
+ """Initialize confusion matrix, class names, and top-1 and top-5 accuracy."""
self.names = model.names
self.nc = len(model.names)
self.confusion_matrix = ConfusionMatrix(nc=self.nc, task='classify')
@@ -24,17 +27,20 @@ class ClassificationValidator(BaseValidator):
self.targets = []
def preprocess(self, batch):
+ """Preprocesses input batch and returns it."""
batch['img'] = batch['img'].to(self.device, non_blocking=True)
batch['img'] = batch['img'].half() if self.args.half else batch['img'].float()
batch['cls'] = batch['cls'].to(self.device)
return batch
def update_metrics(self, preds, batch):
+ """Updates running metrics with model predictions and batch targets."""
n5 = min(len(self.model.names), 5)
self.pred.append(preds.argsort(1, descending=True)[:, :n5])
self.targets.append(batch['cls'])
def finalize_metrics(self, *args, **kwargs):
+ """Finalizes metrics of the model such as confusion_matrix and speed."""
self.confusion_matrix.process_cls_preds(self.pred, self.targets)
if self.args.plots:
self.confusion_matrix.plot(save_dir=self.save_dir, names=list(self.names.values()))
@@ -42,10 +48,12 @@ class ClassificationValidator(BaseValidator):
self.metrics.confusion_matrix = self.confusion_matrix
def get_stats(self):
+ """Returns a dictionary of metrics obtained by processing targets and predictions."""
self.metrics.process(self.targets, self.pred)
return self.metrics.results_dict
def get_dataloader(self, dataset_path, batch_size):
+ """Builds and returns a data loader for classification tasks with given parameters."""
return build_classification_dataloader(path=dataset_path,
imgsz=self.args.imgsz,
batch_size=batch_size,
@@ -54,11 +62,13 @@ class ClassificationValidator(BaseValidator):
workers=self.args.workers)
def print_results(self):
+ """Prints evaluation metrics for YOLO object detection model."""
pf = '%22s' + '%11.3g' * len(self.metrics.keys) # print format
LOGGER.info(pf % ('all', self.metrics.top1, self.metrics.top5))
def val(cfg=DEFAULT_CFG, use_python=False):
+ """Validate YOLO model using custom data."""
model = cfg.model or 'yolov8n-cls.pt' # or "resnet18"
data = cfg.data or 'mnist160'
diff --git a/ultralytics/yolo/v8/detect/predict.py b/ultralytics/yolo/v8/detect/predict.py
index 5dcc508..68f0937 100644
--- a/ultralytics/yolo/v8/detect/predict.py
+++ b/ultralytics/yolo/v8/detect/predict.py
@@ -10,12 +10,14 @@ from ultralytics.yolo.utils import DEFAULT_CFG, ROOT, ops
class DetectionPredictor(BasePredictor):
def preprocess(self, img):
+ """Convert an image to PyTorch tensor and normalize pixel values."""
img = (img if isinstance(img, torch.Tensor) else torch.from_numpy(img)).to(self.model.device)
img = img.half() if self.model.fp16 else img.float() # uint8 to fp16/32
img /= 255 # 0 - 255 to 0.0 - 1.0
return img
def postprocess(self, preds, img, orig_imgs):
+ """Postprocesses predictions and returns a list of Results objects."""
preds = ops.non_max_suppression(preds,
self.args.conf,
self.args.iou,
@@ -35,6 +37,7 @@ class DetectionPredictor(BasePredictor):
def predict(cfg=DEFAULT_CFG, use_python=False):
+ """Runs YOLO model inference on input image(s)."""
model = cfg.model or 'yolov8n.pt'
source = cfg.source if cfg.source is not None else ROOT / 'assets' if (ROOT / 'assets').exists() \
else 'https://ultralytics.com/images/bus.jpg'
diff --git a/ultralytics/yolo/v8/detect/train.py b/ultralytics/yolo/v8/detect/train.py
index 088c215..077bd2c 100644
--- a/ultralytics/yolo/v8/detect/train.py
+++ b/ultralytics/yolo/v8/detect/train.py
@@ -44,6 +44,7 @@ class DetectionTrainer(BaseTrainer):
rect=mode == 'val', data_info=self.data)[0]
def preprocess_batch(self, batch):
+ """Preprocesses a batch of images by scaling and converting to float."""
batch['img'] = batch['img'].to(self.device, non_blocking=True).float() / 255
return batch
@@ -58,16 +59,19 @@ class DetectionTrainer(BaseTrainer):
# TODO: self.model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc
def get_model(self, cfg=None, weights=None, verbose=True):
+ """Return a YOLO detection model."""
model = DetectionModel(cfg, nc=self.data['nc'], verbose=verbose and RANK == -1)
if weights:
model.load(weights)
return model
def get_validator(self):
+ """Returns a DetectionValidator for YOLO model validation."""
self.loss_names = 'box_loss', 'cls_loss', 'dfl_loss'
return v8.detect.DetectionValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args))
def criterion(self, preds, batch):
+ """Compute loss for YOLO prediction and ground-truth."""
if not hasattr(self, 'compute_loss'):
self.compute_loss = Loss(de_parallel(self.model))
return self.compute_loss(preds, batch)
@@ -85,10 +89,12 @@ class DetectionTrainer(BaseTrainer):
return keys
def progress_string(self):
+ """Returns a formatted string of training progress with epoch, GPU memory, loss, instances and size."""
return ('\n' + '%11s' *
(4 + len(self.loss_names))) % ('Epoch', 'GPU_mem', *self.loss_names, 'Instances', 'Size')
def plot_training_samples(self, batch, ni):
+ """Plots training samples with their annotations."""
plot_images(images=batch['img'],
batch_idx=batch['batch_idx'],
cls=batch['cls'].squeeze(-1),
@@ -97,9 +103,11 @@ class DetectionTrainer(BaseTrainer):
fname=self.save_dir / f'train_batch{ni}.jpg')
def plot_metrics(self):
+ """Plots metrics from a CSV file."""
plot_results(file=self.csv) # save results.png
def plot_training_labels(self):
+ """Create a labeled training plot of the YOLO model."""
boxes = np.concatenate([lb['bboxes'] for lb in self.train_loader.dataset.labels], 0)
cls = np.concatenate([lb['cls'] for lb in self.train_loader.dataset.labels], 0)
plot_labels(boxes, cls.squeeze(), names=self.data['names'], save_dir=self.save_dir)
@@ -129,6 +137,7 @@ class Loss:
self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device)
def preprocess(self, targets, batch_size, scale_tensor):
+ """Preprocesses the target counts and matches with the input batch size to output a tensor."""
if targets.shape[0] == 0:
out = torch.zeros(batch_size, 0, 5, device=self.device)
else:
@@ -145,6 +154,7 @@ class Loss:
return out
def bbox_decode(self, anchor_points, pred_dist):
+ """Decode predicted object bounding box coordinates from anchor points and distribution."""
if self.use_dfl:
b, a, c = pred_dist.shape # batch, anchors, channels
pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))
@@ -153,6 +163,7 @@ class Loss:
return dist2bbox(pred_dist, anchor_points, xywh=False)
def __call__(self, preds, batch):
+ """Calculate the sum of the loss for box, cls and dfl multiplied by batch size."""
loss = torch.zeros(3, device=self.device) # box, cls, dfl
feats = preds[1] if isinstance(preds, tuple) else preds
pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
@@ -199,6 +210,7 @@ class Loss:
def train(cfg=DEFAULT_CFG, use_python=False):
+ """Train and optimize YOLO model given training data and device."""
model = cfg.model or 'yolov8n.pt'
data = cfg.data or 'coco128.yaml' # or yolo.ClassificationDataset("mnist")
device = cfg.device if cfg.device is not None else ''
diff --git a/ultralytics/yolo/v8/detect/val.py b/ultralytics/yolo/v8/detect/val.py
index 9d60fd3..1304186 100644
--- a/ultralytics/yolo/v8/detect/val.py
+++ b/ultralytics/yolo/v8/detect/val.py
@@ -19,6 +19,7 @@ from ultralytics.yolo.utils.torch_utils import de_parallel
class DetectionValidator(BaseValidator):
def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
+ """Initialize detection model with necessary variables and settings."""
super().__init__(dataloader, save_dir, pbar, args, _callbacks)
self.args.task = 'detect'
self.is_coco = False
@@ -28,6 +29,7 @@ class DetectionValidator(BaseValidator):
self.niou = self.iouv.numel()
def preprocess(self, batch):
+ """Preprocesses batch of images for YOLO training."""
batch['img'] = batch['img'].to(self.device, non_blocking=True)
batch['img'] = (batch['img'].half() if self.args.half else batch['img'].float()) / 255
for k in ['batch_idx', 'cls', 'bboxes']:
@@ -40,6 +42,7 @@ class DetectionValidator(BaseValidator):
return batch
def init_metrics(self, model):
+ """Initialize evaluation metrics for YOLO."""
val = self.data.get(self.args.split, '') # validation path
self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt') # is COCO
self.class_map = ops.coco80_to_coco91_class() if self.is_coco else list(range(1000))
@@ -54,9 +57,11 @@ class DetectionValidator(BaseValidator):
self.stats = []
def get_desc(self):
+ """Return a formatted string summarizing class metrics of YOLO model."""
return ('%22s' + '%11s' * 6) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)')
def postprocess(self, preds):
+ """Apply Non-maximum suppression to prediction outputs."""
preds = ops.non_max_suppression(preds,
self.args.conf,
self.args.iou,
@@ -113,10 +118,12 @@ class DetectionValidator(BaseValidator):
self.save_one_txt(predn, self.args.save_conf, shape, file)
def finalize_metrics(self, *args, **kwargs):
+ """Set final values for metrics speed and confusion matrix."""
self.metrics.speed = self.speed
self.metrics.confusion_matrix = self.confusion_matrix
def get_stats(self):
+ """Returns metrics statistics and results dictionary."""
stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*self.stats)] # to numpy
if len(stats) and stats[0].any():
self.metrics.process(*stats)
@@ -124,6 +131,7 @@ class DetectionValidator(BaseValidator):
return self.metrics.results_dict
def print_results(self):
+ """Prints training/validation set metrics per class."""
pf = '%22s' + '%11i' * 2 + '%11.3g' * len(self.metrics.keys) # print format
LOGGER.info(pf % ('all', self.seen, self.nt_per_class.sum(), *self.metrics.mean_results()))
if self.nt_per_class.sum() == 0:
@@ -183,6 +191,7 @@ class DetectionValidator(BaseValidator):
mode='val')[0]
def plot_val_samples(self, batch, ni):
+ """Plot validation image samples."""
plot_images(batch['img'],
batch['batch_idx'],
batch['cls'].squeeze(-1),
@@ -192,6 +201,7 @@ class DetectionValidator(BaseValidator):
names=self.names)
def plot_predictions(self, batch, preds, ni):
+ """Plots predicted bounding boxes on input images and saves the result."""
plot_images(batch['img'],
*output_to_target(preds, max_det=15),
paths=batch['im_file'],
@@ -199,6 +209,7 @@ class DetectionValidator(BaseValidator):
names=self.names) # pred
def save_one_txt(self, predn, save_conf, shape, file):
+ """Save YOLO detections to a txt file in normalized coordinates in a specific format."""
gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh
for *xyxy, conf, cls in predn.tolist():
xywh = (ops.xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
@@ -207,6 +218,7 @@ class DetectionValidator(BaseValidator):
f.write(('%g ' * len(line)).rstrip() % line + '\n')
def pred_to_json(self, predn, filename):
+ """Serialize YOLO predictions to COCO json format."""
stem = Path(filename).stem
image_id = int(stem) if stem.isnumeric() else stem
box = ops.xyxy2xywh(predn[:, :4]) # xywh
@@ -219,6 +231,7 @@ class DetectionValidator(BaseValidator):
'score': round(p[4], 5)})
def eval_json(self, stats):
+ """Evaluates YOLO output in JSON format and returns performance statistics."""
if self.args.save_json and self.is_coco and len(self.jdict):
anno_json = self.data['path'] / 'annotations/instances_val2017.json' # annotations
pred_json = self.save_dir / 'predictions.json' # predictions
@@ -245,6 +258,7 @@ class DetectionValidator(BaseValidator):
def val(cfg=DEFAULT_CFG, use_python=False):
+ """Validate trained YOLO model on validation dataset."""
model = cfg.model or 'yolov8n.pt'
data = cfg.data or 'coco128.yaml'
diff --git a/ultralytics/yolo/v8/pose/predict.py b/ultralytics/yolo/v8/pose/predict.py
index 3e19829..a3af259 100644
--- a/ultralytics/yolo/v8/pose/predict.py
+++ b/ultralytics/yolo/v8/pose/predict.py
@@ -8,6 +8,7 @@ from ultralytics.yolo.v8.detect.predict import DetectionPredictor
class PosePredictor(DetectionPredictor):
def postprocess(self, preds, img, orig_img):
+ """Return detection results for a given input image or list of images."""
preds = ops.non_max_suppression(preds,
self.args.conf,
self.args.iou,
@@ -35,6 +36,7 @@ class PosePredictor(DetectionPredictor):
def predict(cfg=DEFAULT_CFG, use_python=False):
+ """Runs YOLO to predict objects in an image or video."""
model = cfg.model or 'yolov8n-pose.pt'
source = cfg.source if cfg.source is not None else ROOT / 'assets' if (ROOT / 'assets').exists() \
else 'https://ultralytics.com/images/bus.jpg'
diff --git a/ultralytics/yolo/v8/pose/train.py b/ultralytics/yolo/v8/pose/train.py
index fd13bfc..4cb0deb 100644
--- a/ultralytics/yolo/v8/pose/train.py
+++ b/ultralytics/yolo/v8/pose/train.py
@@ -21,12 +21,14 @@ from ultralytics.yolo.v8.detect.train import Loss
class PoseTrainer(v8.detect.DetectionTrainer):
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+ """Initialize a PoseTrainer object with specified configurations and overrides."""
if overrides is None:
overrides = {}
overrides['task'] = 'pose'
super().__init__(cfg, overrides, _callbacks)
def get_model(self, cfg=None, weights=None, verbose=True):
+ """Get pose estimation model with specified configuration and weights."""
model = PoseModel(cfg, ch=3, nc=self.data['nc'], data_kpt_shape=self.data['kpt_shape'], verbose=verbose)
if weights:
model.load(weights)
@@ -34,19 +36,23 @@ class PoseTrainer(v8.detect.DetectionTrainer):
return model
def set_model_attributes(self):
+ """Sets keypoints shape attribute of PoseModel."""
super().set_model_attributes()
self.model.kpt_shape = self.data['kpt_shape']
def get_validator(self):
+ """Returns an instance of the PoseValidator class for validation."""
self.loss_names = 'box_loss', 'pose_loss', 'kobj_loss', 'cls_loss', 'dfl_loss'
return v8.pose.PoseValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args))
def criterion(self, preds, batch):
+ """Computes pose loss for the YOLO model."""
if not hasattr(self, 'compute_loss'):
self.compute_loss = PoseLoss(de_parallel(self.model))
return self.compute_loss(preds, batch)
def plot_training_samples(self, batch, ni):
+ """Plot a batch of training samples with annotated class labels, bounding boxes, and keypoints."""
images = batch['img']
kpts = batch['keypoints']
cls = batch['cls'].squeeze(-1)
@@ -62,6 +68,7 @@ class PoseTrainer(v8.detect.DetectionTrainer):
fname=self.save_dir / f'train_batch{ni}.jpg')
def plot_metrics(self):
+ """Plots training/val metrics."""
plot_results(file=self.csv, pose=True) # save results.png
@@ -78,6 +85,7 @@ class PoseLoss(Loss):
self.keypoint_loss = KeypointLoss(sigmas=sigmas)
def __call__(self, preds, batch):
+ """Calculate the total loss and detach it."""
loss = torch.zeros(5, device=self.device) # box, cls, dfl, kpt_location, kpt_visibility
feats, pred_kpts = preds if isinstance(preds[0], list) else preds[1]
pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
@@ -145,6 +153,7 @@ class PoseLoss(Loss):
return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl)
def kpts_decode(self, anchor_points, pred_kpts):
+ """Decodes predicted keypoints to image coordinates."""
y = pred_kpts.clone()
y[..., :2] *= 2.0
y[..., 0] += anchor_points[:, [0]] - 0.5
@@ -153,6 +162,7 @@ class PoseLoss(Loss):
def train(cfg=DEFAULT_CFG, use_python=False):
+ """Train the YOLO model on the given data and device."""
model = cfg.model or 'yolov8n-pose.yaml'
data = cfg.data or 'coco8-pose.yaml'
device = cfg.device if cfg.device is not None else ''
diff --git a/ultralytics/yolo/v8/pose/val.py b/ultralytics/yolo/v8/pose/val.py
index 010e944..4e7136d 100644
--- a/ultralytics/yolo/v8/pose/val.py
+++ b/ultralytics/yolo/v8/pose/val.py
@@ -15,20 +15,24 @@ from ultralytics.yolo.v8.detect import DetectionValidator
class PoseValidator(DetectionValidator):
def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
+ """Initialize a 'PoseValidator' object with custom parameters and assigned attributes."""
super().__init__(dataloader, save_dir, pbar, args, _callbacks)
self.args.task = 'pose'
self.metrics = PoseMetrics(save_dir=self.save_dir)
def preprocess(self, batch):
+ """Preprocesses the batch by converting the 'keypoints' data into a float and moving it to the device."""
batch = super().preprocess(batch)
batch['keypoints'] = batch['keypoints'].to(self.device).float()
return batch
def get_desc(self):
+ """Returns description of evaluation metrics in string format."""
return ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)', 'Pose(P',
'R', 'mAP50', 'mAP50-95)')
def postprocess(self, preds):
+ """Apply non-maximum suppression and return detections with high confidence scores."""
preds = ops.non_max_suppression(preds,
self.args.conf,
self.args.iou,
@@ -40,6 +44,7 @@ class PoseValidator(DetectionValidator):
return preds
def init_metrics(self, model):
+ """Initiate pose estimation metrics for YOLO model."""
super().init_metrics(model)
self.kpt_shape = self.data['kpt_shape']
is_pose = self.kpt_shape == [17, 3]
@@ -137,6 +142,7 @@ class PoseValidator(DetectionValidator):
return torch.tensor(correct, dtype=torch.bool, device=detections.device)
def plot_val_samples(self, batch, ni):
+ """Plots and saves validation set samples with predicted bounding boxes and keypoints."""
plot_images(batch['img'],
batch['batch_idx'],
batch['cls'].squeeze(-1),
@@ -147,6 +153,7 @@ class PoseValidator(DetectionValidator):
names=self.names)
def plot_predictions(self, batch, preds, ni):
+ """Plots predictions for YOLO model."""
pred_kpts = torch.cat([p[:, 6:].view(-1, *self.kpt_shape)[:15] for p in preds], 0)
plot_images(batch['img'],
*output_to_target(preds, max_det=15),
@@ -156,6 +163,7 @@ class PoseValidator(DetectionValidator):
names=self.names) # pred
def pred_to_json(self, predn, filename):
+ """Converts YOLO predictions to COCO JSON format."""
stem = Path(filename).stem
image_id = int(stem) if stem.isnumeric() else stem
box = ops.xyxy2xywh(predn[:, :4]) # xywh
@@ -169,6 +177,7 @@ class PoseValidator(DetectionValidator):
'score': round(p[4], 5)})
def eval_json(self, stats):
+ """Evaluates object detection model using COCO JSON format."""
if self.args.save_json and self.is_coco and len(self.jdict):
anno_json = self.data['path'] / 'annotations/person_keypoints_val2017.json' # annotations
pred_json = self.save_dir / 'predictions.json' # predictions
@@ -197,6 +206,7 @@ class PoseValidator(DetectionValidator):
def val(cfg=DEFAULT_CFG, use_python=False):
+ """Performs validation on YOLO model using given data."""
model = cfg.model or 'yolov8n-pose.pt'
data = cfg.data or 'coco8-pose.yaml'
diff --git a/ultralytics/yolo/v8/segment/predict.py b/ultralytics/yolo/v8/segment/predict.py
index b58b6e6..6ac24ed 100644
--- a/ultralytics/yolo/v8/segment/predict.py
+++ b/ultralytics/yolo/v8/segment/predict.py
@@ -41,6 +41,7 @@ class SegmentationPredictor(DetectionPredictor):
def predict(cfg=DEFAULT_CFG, use_python=False):
+ """Runs YOLO object detection on an image or video source."""
model = cfg.model or 'yolov8n-seg.pt'
source = cfg.source if cfg.source is not None else ROOT / 'assets' if (ROOT / 'assets').exists() \
else 'https://ultralytics.com/images/bus.jpg'
diff --git a/ultralytics/yolo/v8/segment/train.py b/ultralytics/yolo/v8/segment/train.py
index 9197e33..5eea38e 100644
--- a/ultralytics/yolo/v8/segment/train.py
+++ b/ultralytics/yolo/v8/segment/train.py
@@ -18,12 +18,14 @@ from ultralytics.yolo.v8.detect.train import Loss
class SegmentationTrainer(v8.detect.DetectionTrainer):
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+ """Initialize a SegmentationTrainer object with given arguments."""
if overrides is None:
overrides = {}
overrides['task'] = 'segment'
super().__init__(cfg, overrides, _callbacks)
def get_model(self, cfg=None, weights=None, verbose=True):
+ """Return SegmentationModel initialized with specified config and weights."""
model = SegmentationModel(cfg, ch=3, nc=self.data['nc'], verbose=verbose and RANK == -1)
if weights:
model.load(weights)
@@ -31,15 +33,18 @@ class SegmentationTrainer(v8.detect.DetectionTrainer):
return model
def get_validator(self):
+ """Return an instance of SegmentationValidator for validation of YOLO model."""
self.loss_names = 'box_loss', 'seg_loss', 'cls_loss', 'dfl_loss'
return v8.segment.SegmentationValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args))
def criterion(self, preds, batch):
+ """Returns the computed loss using the SegLoss class on the given predictions and batch."""
if not hasattr(self, 'compute_loss'):
self.compute_loss = SegLoss(de_parallel(self.model), overlap=self.args.overlap_mask)
return self.compute_loss(preds, batch)
def plot_training_samples(self, batch, ni):
+ """Creates a plot of training sample images with labels and box coordinates."""
images = batch['img']
masks = batch['masks']
cls = batch['cls'].squeeze(-1)
@@ -49,6 +54,7 @@ class SegmentationTrainer(v8.detect.DetectionTrainer):
plot_images(images, batch_idx, cls, bboxes, masks, paths=paths, fname=self.save_dir / f'train_batch{ni}.jpg')
def plot_metrics(self):
+ """Plots training/val metrics."""
plot_results(file=self.csv, segment=True) # save results.png
@@ -61,6 +67,7 @@ class SegLoss(Loss):
self.overlap = overlap
def __call__(self, preds, batch):
+ """Calculate and return the loss for the YOLO model."""
loss = torch.zeros(4, device=self.device) # box, cls, dfl
feats, pred_masks, proto = preds if len(preds) == 3 else preds[1]
batch_size, _, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width
@@ -147,6 +154,7 @@ class SegLoss(Loss):
def train(cfg=DEFAULT_CFG, use_python=False):
+ """Train a YOLO segmentation model based on passed arguments."""
model = cfg.model or 'yolov8n-seg.pt'
data = cfg.data or 'coco128-seg.yaml' # or yolo.ClassificationDataset("mnist")
device = cfg.device if cfg.device is not None else ''
diff --git a/ultralytics/yolo/v8/segment/val.py b/ultralytics/yolo/v8/segment/val.py
index dd9eca8..8d29f71 100644
--- a/ultralytics/yolo/v8/segment/val.py
+++ b/ultralytics/yolo/v8/segment/val.py
@@ -17,16 +17,19 @@ from ultralytics.yolo.v8.detect import DetectionValidator
class SegmentationValidator(DetectionValidator):
def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
+ """Initialize SegmentationValidator and set task to 'segment', metrics to SegmentMetrics."""
super().__init__(dataloader, save_dir, pbar, args, _callbacks)
self.args.task = 'segment'
self.metrics = SegmentMetrics(save_dir=self.save_dir)
def preprocess(self, batch):
+ """Preprocesses batch by converting masks to float and sending to device."""
batch = super().preprocess(batch)
batch['masks'] = batch['masks'].to(self.device).float()
return batch
def init_metrics(self, model):
+ """Initialize metrics and select mask processing function based on save_json flag."""
super().init_metrics(model)
self.plot_masks = []
if self.args.save_json:
@@ -36,10 +39,12 @@ class SegmentationValidator(DetectionValidator):
self.process = ops.process_mask # faster
def get_desc(self):
+ """Return a formatted description of evaluation metrics."""
return ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)', 'Mask(P',
'R', 'mAP50', 'mAP50-95)')
def postprocess(self, preds):
+ """Postprocesses YOLO predictions and returns output detections with proto."""
p = ops.non_max_suppression(preds[0],
self.args.conf,
self.args.iou,
@@ -119,6 +124,7 @@ class SegmentationValidator(DetectionValidator):
# save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt')
def finalize_metrics(self, *args, **kwargs):
+ """Sets speed and confusion matrix for evaluation metrics."""
self.metrics.speed = self.speed
self.metrics.confusion_matrix = self.confusion_matrix
@@ -160,6 +166,7 @@ class SegmentationValidator(DetectionValidator):
return torch.tensor(correct, dtype=torch.bool, device=detections.device)
def plot_val_samples(self, batch, ni):
+ """Plots validation samples with bounding box labels."""
plot_images(batch['img'],
batch['batch_idx'],
batch['cls'].squeeze(-1),
@@ -170,6 +177,7 @@ class SegmentationValidator(DetectionValidator):
names=self.names)
def plot_predictions(self, batch, preds, ni):
+ """Plots batch predictions with masks and bounding boxes."""
plot_images(batch['img'],
*output_to_target(preds[0], max_det=15),
torch.cat(self.plot_masks, dim=0) if len(self.plot_masks) else self.plot_masks,
@@ -184,6 +192,7 @@ class SegmentationValidator(DetectionValidator):
from pycocotools.mask import encode # noqa
def single_encode(x):
+ """Encode predicted masks as RLE and append results to jdict."""
rle = encode(np.asarray(x[:, :, None], order='F', dtype='uint8'))[0]
rle['counts'] = rle['counts'].decode('utf-8')
return rle
@@ -204,6 +213,7 @@ class SegmentationValidator(DetectionValidator):
'segmentation': rles[i]})
def eval_json(self, stats):
+ """Return COCO-style object detection evaluation metrics."""
if self.args.save_json and self.is_coco and len(self.jdict):
anno_json = self.data['path'] / 'annotations/instances_val2017.json' # annotations
pred_json = self.save_dir / 'predictions.json' # predictions
@@ -232,6 +242,7 @@ class SegmentationValidator(DetectionValidator):
def val(cfg=DEFAULT_CFG, use_python=False):
+ """Validate trained YOLO model on validation data."""
model = cfg.model or 'yolov8n-seg.pt'
data = cfg.data or 'coco128-seg.yaml'