`ultralytics 8.0.80` single-line docstring fixes (#2060)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
single_channel
Glenn Jocher 2 years ago committed by GitHub
parent 31db8ed163
commit 5bce1c3021
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = '8.0.79' __version__ = '8.0.80'
from ultralytics.hub import start from ultralytics.hub import start
from ultralytics.yolo.engine.model import YOLO from ultralytics.yolo.engine.model import YOLO

@ -54,7 +54,7 @@ model.train()""")
def reset_model(model_id=''): def reset_model(model_id=''):
# Reset a trained model to an untrained state """Reset a trained model to an untrained state."""
r = requests.post('https://api.ultralytics.com/model-reset', json={'apiKey': Auth().api_key, 'modelId': model_id}) r = requests.post('https://api.ultralytics.com/model-reset', json={'apiKey': Auth().api_key, 'modelId': model_id})
if r.status_code == 200: if r.status_code == 200:
LOGGER.info(f'{PREFIX}Model reset successfully') LOGGER.info(f'{PREFIX}Model reset successfully')
@ -63,13 +63,13 @@ def reset_model(model_id=''):
def export_fmts_hub(): def export_fmts_hub():
# Returns a list of HUB-supported export formats """Returns a list of HUB-supported export formats."""
from ultralytics.yolo.engine.exporter import export_formats from ultralytics.yolo.engine.exporter import export_formats
return list(export_formats()['Argument'][1:]) + ['ultralytics_tflite', 'ultralytics_coreml'] return list(export_formats()['Argument'][1:]) + ['ultralytics_tflite', 'ultralytics_coreml']
def export_model(model_id='', format='torchscript'): def export_model(model_id='', format='torchscript'):
# Export a model to all formats """Export a model to all formats."""
assert format in export_fmts_hub(), f"Unsupported export format '{format}', valid formats are {export_fmts_hub()}" assert format in export_fmts_hub(), f"Unsupported export format '{format}', valid formats are {export_fmts_hub()}"
r = requests.post('https://api.ultralytics.com/export', r = requests.post('https://api.ultralytics.com/export',
json={ json={
@ -81,7 +81,7 @@ def export_model(model_id='', format='torchscript'):
def get_export(model_id='', format='torchscript'): def get_export(model_id='', format='torchscript'):
# Get an exported model dictionary with download URL """Get an exported model dictionary with download URL."""
assert format in export_fmts_hub(), f"Unsupported export format '{format}', valid formats are {export_fmts_hub()}" assert format in export_fmts_hub(), f"Unsupported export format '{format}', valid formats are {export_fmts_hub()}"
r = requests.post('https://api.ultralytics.com/get-export', r = requests.post('https://api.ultralytics.com/get-export',
json={ json={

@ -124,7 +124,7 @@ class HUBTrainingSession:
'device': data['device'], 'device': data['device'],
'cache': data['cache'], 'cache': data['cache'],
'data': data['data']} 'data': data['data']}
self.model_file = data.get('cfg', data['weights']) self.model_file = data.get('cfg') or data.get('weights') # cfg for pretrained=False
self.model_file = checks.check_yolov5u_filename(self.model_file, verbose=False) # YOLOv5->YOLOv5u self.model_file = checks.check_yolov5u_filename(self.model_file, verbose=False) # YOLOv5->YOLOv5u
elif data['status'] == 'training': # existing model to resume training elif data['status'] == 'training': # existing model to resume training
self.train_args = {'data': data['data'], 'resume': True} self.train_args = {'data': data['data'], 'resume': True}

@ -21,11 +21,11 @@ from ultralytics.yolo.utils.ops import xywh2xyxy
def check_class_names(names): def check_class_names(names):
# Check class names. Map imagenet class codes to human-readable names if required. Convert lists to dicts. """Check class names. Map imagenet class codes to human-readable names if required. Convert lists to dicts."""
if isinstance(names, list): # names is a list if isinstance(names, list): # names is a list
names = dict(enumerate(names)) # convert to dict names = dict(enumerate(names)) # convert to dict
if isinstance(names, dict): if isinstance(names, dict):
# convert 1) string keys to int, i.e. '0' to 0, and non-string values to strings, i.e. True to 'True' # Convert 1) string keys to int, i.e. '0' to 0, and non-string values to strings, i.e. True to 'True'
names = {int(k): str(v) for k, v in names.items()} names = {int(k): str(v) for k, v in names.items()}
n = len(names) n = len(names)
if max(names.keys()) >= n: if max(names.keys()) >= n:
@ -229,7 +229,7 @@ class AutoBackend(nn.Module):
interpreter.allocate_tensors() # allocate interpreter.allocate_tensors() # allocate
input_details = interpreter.get_input_details() # inputs input_details = interpreter.get_input_details() # inputs
output_details = interpreter.get_output_details() # outputs output_details = interpreter.get_output_details() # outputs
# load metadata # Load metadata
with contextlib.suppress(zipfile.BadZipFile): with contextlib.suppress(zipfile.BadZipFile):
with zipfile.ZipFile(w, 'r') as model: with zipfile.ZipFile(w, 'r') as model:
meta_file = model.namelist()[0] meta_file = model.namelist()[0]

@ -24,7 +24,7 @@ from ultralytics.yolo.utils.torch_utils import copy_attr, smart_inference_mode
class AutoShape(nn.Module): class AutoShape(nn.Module):
# YOLOv8 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS """YOLOv8 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS."""
conf = 0.25 # NMS confidence threshold conf = 0.25 # NMS confidence threshold
iou = 0.45 # NMS IoU threshold iou = 0.45 # NMS IoU threshold
agnostic = False # NMS class-agnostic agnostic = False # NMS class-agnostic
@ -47,7 +47,7 @@ class AutoShape(nn.Module):
m.export = True # do not output loss values m.export = True # do not output loss values
def _apply(self, fn): def _apply(self, fn):
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers """Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers."""
self = super()._apply(fn) self = super()._apply(fn)
if self.pt: if self.pt:
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect() m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
@ -59,7 +59,7 @@ class AutoShape(nn.Module):
@smart_inference_mode() @smart_inference_mode()
def forward(self, ims, size=640, augment=False, profile=False): def forward(self, ims, size=640, augment=False, profile=False):
# Inference from various sources. For size(height=640, width=1280), RGB images example inputs are: """Inference from various sources. For size(height=640, width=1280), RGB images example inputs are:."""
# file: ims = 'data/images/zidane.jpg' # str or PosixPath # file: ims = 'data/images/zidane.jpg' # str or PosixPath
# URI: = 'https://ultralytics.com/images/zidane.jpg' # URI: = 'https://ultralytics.com/images/zidane.jpg'
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3) # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
@ -202,7 +202,7 @@ class Detections:
return self.ims return self.ims
def pandas(self): def pandas(self):
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0]) """Return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])."""
import pandas import pandas
new = copy(self) # return copy new = copy(self) # return copy
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
@ -213,7 +213,7 @@ class Detections:
return new return new
def tolist(self): def tolist(self):
# return a list of Detections objects, i.e. 'for result in results.tolist():' """Return a list of Detections objects, i.e. 'for result in results.tolist():'."""
r = range(self.n) # iterable r = range(self.n) # iterable
x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r] x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
# for d in x: # for d in x:

@ -12,7 +12,7 @@ from ultralytics.yolo.utils.tal import dist2bbox, make_anchors
def autopad(k, p=None, d=1): # kernel, padding, dilation def autopad(k, p=None, d=1): # kernel, padding, dilation
# Pad to 'same' shape outputs """Pad to 'same' shape outputs."""
if d > 1: if d > 1:
k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
if p is None: if p is None:
@ -21,7 +21,7 @@ def autopad(k, p=None, d=1): # kernel, padding, dilation
class Conv(nn.Module): class Conv(nn.Module):
# Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation) """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""
default_act = nn.SiLU() # default activation default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True): def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
@ -38,19 +38,21 @@ class Conv(nn.Module):
class DWConv(Conv): class DWConv(Conv):
# Depth-wise convolution """Depth-wise convolution."""
def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act) super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
class DWConvTranspose2d(nn.ConvTranspose2d): class DWConvTranspose2d(nn.ConvTranspose2d):
# Depth-wise transpose convolution """Depth-wise transpose convolution."""
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2)) super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
class ConvTranspose(nn.Module): class ConvTranspose(nn.Module):
# Convolution transpose 2d layer """Convolution transpose 2d layer."""
default_act = nn.SiLU() # default activation default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True): def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True):
@ -67,8 +69,11 @@ class ConvTranspose(nn.Module):
class DFL(nn.Module): class DFL(nn.Module):
# Integral module of Distribution Focal Loss (DFL) """
# Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391 Integral module of Distribution Focal Loss (DFL).
Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
"""
def __init__(self, c1=16): def __init__(self, c1=16):
super().__init__() super().__init__()
self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False) self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
@ -83,7 +88,8 @@ class DFL(nn.Module):
class TransformerLayer(nn.Module): class TransformerLayer(nn.Module):
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance) """Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)."""
def __init__(self, c, num_heads): def __init__(self, c, num_heads):
super().__init__() super().__init__()
self.q = nn.Linear(c, c, bias=False) self.q = nn.Linear(c, c, bias=False)
@ -100,7 +106,8 @@ class TransformerLayer(nn.Module):
class TransformerBlock(nn.Module): class TransformerBlock(nn.Module):
# Vision Transformer https://arxiv.org/abs/2010.11929 """Vision Transformer https://arxiv.org/abs/2010.11929."""
def __init__(self, c1, c2, num_heads, num_layers): def __init__(self, c1, c2, num_heads, num_layers):
super().__init__() super().__init__()
self.conv = None self.conv = None
@ -119,7 +126,8 @@ class TransformerBlock(nn.Module):
class Bottleneck(nn.Module): class Bottleneck(nn.Module):
# Standard bottleneck """Standard bottleneck."""
def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch_in, ch_out, shortcut, groups, kernels, expand def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch_in, ch_out, shortcut, groups, kernels, expand
super().__init__() super().__init__()
c_ = int(c2 * e) # hidden channels c_ = int(c2 * e) # hidden channels
@ -132,7 +140,8 @@ class Bottleneck(nn.Module):
class BottleneckCSP(nn.Module): class BottleneckCSP(nn.Module):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks """CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__() super().__init__()
c_ = int(c2 * e) # hidden channels c_ = int(c2 * e) # hidden channels
@ -151,7 +160,8 @@ class BottleneckCSP(nn.Module):
class C3(nn.Module): class C3(nn.Module):
# CSP Bottleneck with 3 convolutions """CSP Bottleneck with 3 convolutions."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__() super().__init__()
c_ = int(c2 * e) # hidden channels c_ = int(c2 * e) # hidden channels
@ -165,7 +175,8 @@ class C3(nn.Module):
class C2(nn.Module): class C2(nn.Module):
# CSP Bottleneck with 2 convolutions """CSP Bottleneck with 2 convolutions."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__() super().__init__()
self.c = int(c2 * e) # hidden channels self.c = int(c2 * e) # hidden channels
@ -180,7 +191,8 @@ class C2(nn.Module):
class C2f(nn.Module): class C2f(nn.Module):
# CSP Bottleneck with 2 convolutions """CSP Bottleneck with 2 convolutions."""
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__() super().__init__()
self.c = int(c2 * e) # hidden channels self.c = int(c2 * e) # hidden channels
@ -200,7 +212,8 @@ class C2f(nn.Module):
class ChannelAttention(nn.Module): class ChannelAttention(nn.Module):
# Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet """Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet."""
def __init__(self, channels: int) -> None: def __init__(self, channels: int) -> None:
super().__init__() super().__init__()
self.pool = nn.AdaptiveAvgPool2d(1) self.pool = nn.AdaptiveAvgPool2d(1)
@ -212,7 +225,8 @@ class ChannelAttention(nn.Module):
class SpatialAttention(nn.Module): class SpatialAttention(nn.Module):
# Spatial-attention module """Spatial-attention module."""
def __init__(self, kernel_size=7): def __init__(self, kernel_size=7):
super().__init__() super().__init__()
assert kernel_size in (3, 7), 'kernel size must be 3 or 7' assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
@ -225,7 +239,8 @@ class SpatialAttention(nn.Module):
class CBAM(nn.Module): class CBAM(nn.Module):
# Convolutional Block Attention Module """Convolutional Block Attention Module."""
def __init__(self, c1, kernel_size=7): # ch_in, kernels def __init__(self, c1, kernel_size=7): # ch_in, kernels
super().__init__() super().__init__()
self.channel_attention = ChannelAttention(c1) self.channel_attention = ChannelAttention(c1)
@ -236,7 +251,8 @@ class CBAM(nn.Module):
class C1(nn.Module): class C1(nn.Module):
# CSP Bottleneck with 1 convolution """CSP Bottleneck with 1 convolution."""
def __init__(self, c1, c2, n=1): # ch_in, ch_out, number def __init__(self, c1, c2, n=1): # ch_in, ch_out, number
super().__init__() super().__init__()
self.cv1 = Conv(c1, c2, 1, 1) self.cv1 = Conv(c1, c2, 1, 1)
@ -248,7 +264,8 @@ class C1(nn.Module):
class C3x(C3): class C3x(C3):
# C3 module with cross-convolutions """C3 module with cross-convolutions."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e) super().__init__(c1, c2, n, shortcut, g, e)
self.c_ = int(c2 * e) self.c_ = int(c2 * e)
@ -256,7 +273,8 @@ class C3x(C3):
class C3TR(C3): class C3TR(C3):
# C3 module with TransformerBlock() """C3 module with TransformerBlock()."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e) super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e) c_ = int(c2 * e)
@ -264,7 +282,8 @@ class C3TR(C3):
class C3Ghost(C3): class C3Ghost(C3):
# C3 module with GhostBottleneck() """C3 module with GhostBottleneck()."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e) super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e) # hidden channels c_ = int(c2 * e) # hidden channels
@ -272,7 +291,8 @@ class C3Ghost(C3):
class SPP(nn.Module): class SPP(nn.Module):
# Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729 """Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729."""
def __init__(self, c1, c2, k=(5, 9, 13)): def __init__(self, c1, c2, k=(5, 9, 13)):
super().__init__() super().__init__()
c_ = c1 // 2 # hidden channels c_ = c1 // 2 # hidden channels
@ -286,7 +306,8 @@ class SPP(nn.Module):
class SPPF(nn.Module): class SPPF(nn.Module):
# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher """Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13)) def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
super().__init__() super().__init__()
c_ = c1 // 2 # hidden channels c_ = c1 // 2 # hidden channels
@ -302,7 +323,8 @@ class SPPF(nn.Module):
class Focus(nn.Module): class Focus(nn.Module):
# Focus wh information into c-space """Focus wh information into c-space."""
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__() super().__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act) self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
@ -314,7 +336,8 @@ class Focus(nn.Module):
class GhostConv(nn.Module): class GhostConv(nn.Module):
# Ghost Convolution https://github.com/huawei-noah/ghostnet """Ghost Convolution https://github.com/huawei-noah/ghostnet."""
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
super().__init__() super().__init__()
c_ = c2 // 2 # hidden channels c_ = c2 // 2 # hidden channels
@ -327,7 +350,8 @@ class GhostConv(nn.Module):
class GhostBottleneck(nn.Module): class GhostBottleneck(nn.Module):
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet """Ghost Bottleneck https://github.com/huawei-noah/ghostnet."""
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
super().__init__() super().__init__()
c_ = c2 // 2 c_ = c2 // 2
@ -343,7 +367,8 @@ class GhostBottleneck(nn.Module):
class Concat(nn.Module): class Concat(nn.Module):
# Concatenate a list of tensors along dimension """Concatenate a list of tensors along dimension."""
def __init__(self, dimension=1): def __init__(self, dimension=1):
super().__init__() super().__init__()
self.d = dimension self.d = dimension
@ -353,7 +378,8 @@ class Concat(nn.Module):
class Proto(nn.Module): class Proto(nn.Module):
# YOLOv8 mask Proto module for segmentation models """YOLOv8 mask Proto module for segmentation models."""
def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks
super().__init__() super().__init__()
self.cv1 = Conv(c1, c_, k=3) self.cv1 = Conv(c1, c_, k=3)
@ -366,7 +392,8 @@ class Proto(nn.Module):
class Ensemble(nn.ModuleList): class Ensemble(nn.ModuleList):
# Ensemble of models """Ensemble of models."""
def __init__(self): def __init__(self):
super().__init__() super().__init__()
@ -382,7 +409,7 @@ class Ensemble(nn.ModuleList):
class Detect(nn.Module): class Detect(nn.Module):
# YOLOv8 Detect head for detection models """YOLOv8 Detect head for detection models."""
dynamic = False # force grid reconstruction dynamic = False # force grid reconstruction
export = False # export mode export = False # export mode
shape = None shape = None
@ -423,7 +450,7 @@ class Detect(nn.Module):
return y if self.export else (y, x) return y if self.export else (y, x)
def bias_init(self): def bias_init(self):
# Initialize Detect() biases, WARNING: requires stride availability """Initialize Detect() biases, WARNING: requires stride availability."""
m = self # self.model[-1] # Detect() module m = self # self.model[-1] # Detect() module
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1
# ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency
@ -433,7 +460,8 @@ class Detect(nn.Module):
class Segment(Detect): class Segment(Detect):
# YOLOv8 Segment head for segmentation models """YOLOv8 Segment head for segmentation models."""
def __init__(self, nc=80, nm=32, npr=256, ch=()): def __init__(self, nc=80, nm=32, npr=256, ch=()):
super().__init__(nc, ch) super().__init__(nc, ch)
self.nm = nm # number of masks self.nm = nm # number of masks
@ -456,7 +484,8 @@ class Segment(Detect):
class Pose(Detect): class Pose(Detect):
# YOLOv8 Pose head for keypoints models """YOLOv8 Pose head for keypoints models."""
def __init__(self, nc=80, kpt_shape=(17, 3), ch=()): def __init__(self, nc=80, kpt_shape=(17, 3), ch=()):
super().__init__(nc, ch) super().__init__(nc, ch)
self.kpt_shape = kpt_shape # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) self.kpt_shape = kpt_shape # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
@ -486,7 +515,8 @@ class Pose(Detect):
class Classify(nn.Module): class Classify(nn.Module):
# YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2) """YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)."""
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__() super().__init__()
c_ = 1280 # efficientnet_b0 size c_ = 1280 # efficientnet_b0 size

@ -167,7 +167,8 @@ class BaseModel(nn.Module):
class DetectionModel(BaseModel): class DetectionModel(BaseModel):
# YOLOv8 detection model """YOLOv8 detection model."""
def __init__(self, cfg='yolov8n.yaml', ch=3, nc=None, verbose=True): # model, input channels, number of classes def __init__(self, cfg='yolov8n.yaml', ch=3, nc=None, verbose=True): # model, input channels, number of classes
super().__init__() super().__init__()
self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict
@ -218,7 +219,7 @@ class DetectionModel(BaseModel):
@staticmethod @staticmethod
def _descale_pred(p, flips, scale, img_size, dim=1): def _descale_pred(p, flips, scale, img_size, dim=1):
# de-scale predictions following augmented inference (inverse operation) """De-scale predictions following augmented inference (inverse operation)."""
p[:, :4] /= scale # de-scale p[:, :4] /= scale # de-scale
x, y, wh, cls = p.split((1, 1, 2, p.shape[dim] - 4), dim) x, y, wh, cls = p.split((1, 1, 2, p.shape[dim] - 4), dim)
if flips == 2: if flips == 2:
@ -228,7 +229,7 @@ class DetectionModel(BaseModel):
return torch.cat((x, y, wh, cls), dim) return torch.cat((x, y, wh, cls), dim)
def _clip_augmented(self, y): def _clip_augmented(self, y):
# Clip YOLOv5 augmented inference tails """Clip YOLOv5 augmented inference tails."""
nl = self.model[-1].nl # number of detection layers (P3-P5) nl = self.model[-1].nl # number of detection layers (P3-P5)
g = sum(4 ** x for x in range(nl)) # grid points g = sum(4 ** x for x in range(nl)) # grid points
e = 1 # exclude layer count e = 1 # exclude layer count
@ -240,7 +241,8 @@ class DetectionModel(BaseModel):
class SegmentationModel(DetectionModel): class SegmentationModel(DetectionModel):
# YOLOv8 segmentation model """YOLOv8 segmentation model."""
def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True): def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True):
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose) super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
@ -249,7 +251,8 @@ class SegmentationModel(DetectionModel):
class PoseModel(DetectionModel): class PoseModel(DetectionModel):
# YOLOv8 pose model """YOLOv8 pose model."""
def __init__(self, cfg='yolov8n-pose.yaml', ch=3, nc=None, data_kpt_shape=(None, None), verbose=True): def __init__(self, cfg='yolov8n-pose.yaml', ch=3, nc=None, data_kpt_shape=(None, None), verbose=True):
if not isinstance(cfg, dict): if not isinstance(cfg, dict):
cfg = yaml_model_load(cfg) # load model YAML cfg = yaml_model_load(cfg) # load model YAML
@ -260,7 +263,8 @@ class PoseModel(DetectionModel):
class ClassificationModel(BaseModel): class ClassificationModel(BaseModel):
# YOLOv8 classification model """YOLOv8 classification model."""
def __init__(self, def __init__(self,
cfg=None, cfg=None,
model=None, model=None,
@ -272,7 +276,7 @@ class ClassificationModel(BaseModel):
self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg, ch, nc, verbose) self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg, ch, nc, verbose)
def _from_detection_model(self, model, nc=1000, cutoff=10): def _from_detection_model(self, model, nc=1000, cutoff=10):
# Create a YOLOv5 classification model from a YOLOv5 detection model """Create a YOLOv5 classification model from a YOLOv5 detection model."""
from ultralytics.nn.autobackend import AutoBackend from ultralytics.nn.autobackend import AutoBackend
if isinstance(model, AutoBackend): if isinstance(model, AutoBackend):
model = model.model # unwrap DetectMultiBackend model = model.model # unwrap DetectMultiBackend
@ -304,7 +308,7 @@ class ClassificationModel(BaseModel):
@staticmethod @staticmethod
def reshape_outputs(model, nc): def reshape_outputs(model, nc):
# Update a TorchVision classification model to class count 'n' if required """Update a TorchVision classification model to class count 'n' if required."""
name, m = list((model.model if hasattr(model, 'model') else model).named_children())[-1] # last module name, m = list((model.model if hasattr(model, 'model') else model).named_children())[-1] # last module
if isinstance(m, Classify): # YOLO Classify() head if isinstance(m, Classify): # YOLO Classify() head
if m.linear.out_features != nc: if m.linear.out_features != nc:
@ -363,7 +367,7 @@ def torch_safe_load(weight):
def attempt_load_weights(weights, device=None, inplace=True, fuse=False): def attempt_load_weights(weights, device=None, inplace=True, fuse=False):
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a """Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a."""
ensemble = Ensemble() ensemble = Ensemble()
for w in weights if isinstance(weights, list) else [weights]: for w in weights if isinstance(weights, list) else [weights]:
@ -403,7 +407,7 @@ def attempt_load_weights(weights, device=None, inplace=True, fuse=False):
def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False): def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
# Loads a single model weights """Loads a single model weights."""
ckpt, weight = torch_safe_load(weight) # load ckpt ckpt, weight = torch_safe_load(weight) # load ckpt
args = {**DEFAULT_CFG_DICT, **ckpt['train_args']} # combine model and default args, preferring model args args = {**DEFAULT_CFG_DICT, **ckpt['train_args']} # combine model and default args, preferring model args
model = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model model = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model
@ -546,7 +550,7 @@ def guess_model_task(model):
""" """
def cfg2task(cfg): def cfg2task(cfg):
# Guess from YAML dictionary """Guess from YAML dictionary."""
m = cfg['head'][-1][-2].lower() # output module name m = cfg['head'][-1][-2].lower() # output module name
if m in ('classify', 'classifier', 'cls', 'fc'): if m in ('classify', 'classifier', 'cls', 'fc'):
return 'classify' return 'classify'

@ -27,7 +27,7 @@ class BaseTrack:
frame_id = 0 frame_id = 0
time_since_update = 0 time_since_update = 0
# multi-camera # Multi-camera
location = (np.inf, np.inf) location = (np.inf, np.inf)
@property @property

@ -100,7 +100,7 @@ class BOTSORT(BYTETracker):
self.appearance_thresh = args.appearance_thresh self.appearance_thresh = args.appearance_thresh
if args.with_reid: if args.with_reid:
# haven't supported BoT-SORT(reid) yet # Haven't supported BoT-SORT(reid) yet
self.encoder = None self.encoder = None
# self.gmc = GMC(method=args.cmc_method, verbose=[args.name, args.ablation]) # self.gmc = GMC(method=args.cmc_method, verbose=[args.name, args.ablation])
self.gmc = GMC(method=args.cmc_method) self.gmc = GMC(method=args.cmc_method)

@ -11,8 +11,7 @@ class STrack(BaseTrack):
shared_kalman = KalmanFilterXYAH() shared_kalman = KalmanFilterXYAH()
def __init__(self, tlwh, score, cls): def __init__(self, tlwh, score, cls):
"""wait activate."""
# wait activate
self._tlwh = np.asarray(self.tlbr_to_tlwh(tlwh[:-1]), dtype=np.float32) self._tlwh = np.asarray(self.tlbr_to_tlwh(tlwh[:-1]), dtype=np.float32)
self.kalman_filter = None self.kalman_filter = None
self.mean, self.covariance = None, None self.mean, self.covariance = None, None
@ -62,7 +61,7 @@ class STrack(BaseTrack):
stracks[i].covariance = cov stracks[i].covariance = cov
def activate(self, kalman_filter, frame_id): def activate(self, kalman_filter, frame_id):
"""Start a new tracklet""" """Start a new tracklet."""
self.kalman_filter = kalman_filter self.kalman_filter = kalman_filter
self.track_id = self.next_id() self.track_id = self.next_id()
self.mean, self.covariance = self.kalman_filter.initiate(self.convert_coords(self._tlwh)) self.mean, self.covariance = self.kalman_filter.initiate(self.convert_coords(self._tlwh))
@ -179,7 +178,7 @@ class BYTETracker:
scores = results.conf scores = results.conf
bboxes = results.xyxy bboxes = results.xyxy
# add index # Add index
bboxes = np.concatenate([bboxes, np.arange(len(bboxes)).reshape(-1, 1)], axis=-1) bboxes = np.concatenate([bboxes, np.arange(len(bboxes)).reshape(-1, 1)], axis=-1)
cls = results.cls cls = results.cls
@ -196,7 +195,7 @@ class BYTETracker:
cls_second = cls[inds_second] cls_second = cls[inds_second]
detections = self.init_track(dets, scores_keep, cls_keep, img) detections = self.init_track(dets, scores_keep, cls_keep, img)
""" Add newly detected tracklets to tracked_stracks""" # Add newly detected tracklets to tracked_stracks
unconfirmed = [] unconfirmed = []
tracked_stracks = [] # type: list[STrack] tracked_stracks = [] # type: list[STrack]
for track in self.tracked_stracks: for track in self.tracked_stracks:
@ -204,7 +203,7 @@ class BYTETracker:
unconfirmed.append(track) unconfirmed.append(track)
else: else:
tracked_stracks.append(track) tracked_stracks.append(track)
""" Step 2: First association, with high score detection boxes""" # Step 2: First association, with high score detection boxes
strack_pool = self.joint_stracks(tracked_stracks, self.lost_stracks) strack_pool = self.joint_stracks(tracked_stracks, self.lost_stracks)
# Predict the current location with KF # Predict the current location with KF
self.multi_predict(strack_pool) self.multi_predict(strack_pool)
@ -225,7 +224,7 @@ class BYTETracker:
else: else:
track.re_activate(det, self.frame_id, new_id=False) track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track) refind_stracks.append(track)
""" Step 3: Second association, with low score detection boxes""" # Step 3: Second association, with low score detection boxes
# association the untrack to the low score detections # association the untrack to the low score detections
detections_second = self.init_track(dets_second, scores_second, cls_second, img) detections_second = self.init_track(dets_second, scores_second, cls_second, img)
r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
@ -247,7 +246,7 @@ class BYTETracker:
if track.state != TrackState.Lost: if track.state != TrackState.Lost:
track.mark_lost() track.mark_lost()
lost_stracks.append(track) lost_stracks.append(track)
"""Deal with unconfirmed tracks, usually tracks with only one beginning frame""" # Deal with unconfirmed tracks, usually tracks with only one beginning frame
detections = [detections[i] for i in u_detection] detections = [detections[i] for i in u_detection]
dists = self.get_dists(unconfirmed, detections) dists = self.get_dists(unconfirmed, detections)
matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
@ -258,14 +257,14 @@ class BYTETracker:
track = unconfirmed[it] track = unconfirmed[it]
track.mark_removed() track.mark_removed()
removed_stracks.append(track) removed_stracks.append(track)
""" Step 4: Init new stracks""" # Step 4: Init new stracks
for inew in u_detection: for inew in u_detection:
track = detections[inew] track = detections[inew]
if track.score < self.args.new_track_thresh: if track.score < self.args.new_track_thresh:
continue continue
track.activate(self.kalman_filter, self.frame_id) track.activate(self.kalman_filter, self.frame_id)
activated_starcks.append(track) activated_starcks.append(track)
""" Step 5: Update state""" # Step 5: Update state
for track in self.lost_stracks: for track in self.lost_stracks:
if self.frame_id - track.end_frame > self.max_time_lost: if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed() track.mark_removed()
@ -320,7 +319,7 @@ class BYTETracker:
@staticmethod @staticmethod
def sub_stracks(tlista, tlistb): def sub_stracks(tlista, tlistb):
""" DEPRECATED CODE in https://github.com/ultralytics/ultralytics/pull/1890/ """DEPRECATED CODE in https://github.com/ultralytics/ultralytics/pull/1890/
stracks = {t.track_id: t for t in tlista} stracks = {t.track_id: t for t in tlista}
for t in tlistb: for t in tlistb:
tid = t.track_id tid = t.track_id

@ -83,8 +83,7 @@ class GMC:
return np.eye(2, 3) return np.eye(2, 3)
def applyEcc(self, raw_frame, detections=None): def applyEcc(self, raw_frame, detections=None):
"""Initialize."""
# Initialize
height, width, _ = raw_frame.shape height, width, _ = raw_frame.shape
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY) frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
H = np.eye(2, 3, dtype=np.float32) H = np.eye(2, 3, dtype=np.float32)
@ -116,8 +115,7 @@ class GMC:
return H return H
def applyFeatures(self, raw_frame, detections=None): def applyFeatures(self, raw_frame, detections=None):
"""Initialize."""
# Initialize
height, width, _ = raw_frame.shape height, width, _ = raw_frame.shape
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY) frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
H = np.eye(2, 3) H = np.eye(2, 3)
@ -129,7 +127,7 @@ class GMC:
width = width // self.downscale width = width // self.downscale
height = height // self.downscale height = height // self.downscale
# find the keypoints # Find the keypoints
mask = np.zeros_like(frame) mask = np.zeros_like(frame)
# mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255 # mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255
mask[int(0.02 * height):int(0.98 * height), int(0.02 * width):int(0.98 * width)] = 255 mask[int(0.02 * height):int(0.98 * height), int(0.02 * width):int(0.98 * width)] = 255
@ -140,7 +138,7 @@ class GMC:
keypoints = self.detector.detect(frame, mask) keypoints = self.detector.detect(frame, mask)
# compute the descriptors # Compute the descriptors
keypoints, descriptors = self.extractor.compute(frame, keypoints) keypoints, descriptors = self.extractor.compute(frame, keypoints)
# Handle first frame # Handle first frame
@ -243,7 +241,7 @@ class GMC:
return H return H
def applySparseOptFlow(self, raw_frame, detections=None): def applySparseOptFlow(self, raw_frame, detections=None):
# Initialize """Initialize."""
# t0 = time.time() # t0 = time.time()
height, width, _ = raw_frame.shape height, width, _ = raw_frame.shape
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY) frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
@ -254,7 +252,7 @@ class GMC:
# frame = cv2.GaussianBlur(frame, (3, 3), 1.5) # frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
frame = cv2.resize(frame, (width // self.downscale, height // self.downscale)) frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
# find the keypoints # Find the keypoints
keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params) keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params)
# Handle first frame # Handle first frame
@ -268,10 +266,10 @@ class GMC:
return H return H
# find correspondences # Find correspondences
matchedKeypoints, status, err = cv2.calcOpticalFlowPyrLK(self.prevFrame, frame, self.prevKeyPoints, None) matchedKeypoints, status, err = cv2.calcOpticalFlowPyrLK(self.prevFrame, frame, self.prevKeyPoints, None)
# leave good correspondences only # Leave good correspondences only
prevPoints = [] prevPoints = []
currPoints = [] currPoints = []

@ -8,6 +8,7 @@ from .kalman_filter import chi2inv95
try: try:
import lap # for linear_assignment import lap # for linear_assignment
assert lap.__version__ # verify package is not directory assert lap.__version__ # verify package is not directory
except (ImportError, AssertionError, AttributeError): except (ImportError, AssertionError, AttributeError):
from ultralytics.yolo.utils.checks import check_requirements from ultralytics.yolo.utils.checks import check_requirements
@ -45,7 +46,7 @@ def _indices_to_matches(cost_matrix, indices, thresh):
def linear_assignment(cost_matrix, thresh, use_lap=True): def linear_assignment(cost_matrix, thresh, use_lap=True):
# Linear assignment implementations with scipy and lap.lapjv """Linear assignment implementations with scipy and lap.lapjv."""
if cost_matrix.size == 0: if cost_matrix.size == 0:
return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))

@ -400,5 +400,5 @@ def copy_default_cfg():
if __name__ == '__main__': if __name__ == '__main__':
# entrypoint(debug='yolo predict model=yolov8n.pt') # Example Usage: entrypoint(debug='yolo predict model=yolov8n.pt')
entrypoint(debug='') entrypoint(debug='')

@ -66,7 +66,7 @@ class Compose:
class BaseMixTransform: class BaseMixTransform:
"""This implementation is from mmyolo""" """This implementation is from mmyolo."""
def __init__(self, dataset, pre_transform=None, p=0.0) -> None: def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
self.dataset = dataset self.dataset = dataset
@ -77,12 +77,12 @@ class BaseMixTransform:
if random.uniform(0, 1) > self.p: if random.uniform(0, 1) > self.p:
return labels return labels
# get index of one or three other images # Get index of one or three other images
indexes = self.get_indexes() indexes = self.get_indexes()
if isinstance(indexes, int): if isinstance(indexes, int):
indexes = [indexes] indexes = [indexes]
# get images information will be used for Mosaic or MixUp # Get images information will be used for Mosaic or MixUp
mix_labels = [self.dataset.get_label_info(i) for i in indexes] mix_labels = [self.dataset.get_label_info(i) for i in indexes]
if self.pre_transform is not None: if self.pre_transform is not None:
@ -132,7 +132,7 @@ class Mosaic(BaseMixTransform):
img = labels_patch['img'] img = labels_patch['img']
h, w = labels_patch.pop('resized_shape') h, w = labels_patch.pop('resized_shape')
# place img in img4 # Place img in img4
if i == 0: # top left if i == 0: # top left
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
@ -158,7 +158,7 @@ class Mosaic(BaseMixTransform):
return final_labels return final_labels
def _update_labels(self, labels, padw, padh): def _update_labels(self, labels, padw, padh):
"""Update labels""" """Update labels."""
nh, nw = labels['img'].shape[:2] nh, nw = labels['img'].shape[:2]
labels['instances'].convert_bbox(format='xyxy') labels['instances'].convert_bbox(format='xyxy')
labels['instances'].denormalize(nw, nh) labels['instances'].denormalize(nw, nh)
@ -193,7 +193,7 @@ class MixUp(BaseMixTransform):
return random.randint(0, len(self.dataset) - 1) return random.randint(0, len(self.dataset) - 1)
def _mix_transform(self, labels): def _mix_transform(self, labels):
# Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf """Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf."""
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
labels2 = labels['mix_labels'][0] labels2 = labels['mix_labels'][0]
labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8) labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8)
@ -217,12 +217,12 @@ class RandomPerspective:
self.scale = scale self.scale = scale
self.shear = shear self.shear = shear
self.perspective = perspective self.perspective = perspective
# mosaic border # Mosaic border
self.border = border self.border = border
self.pre_transform = pre_transform self.pre_transform = pre_transform
def affine_transform(self, img, border): def affine_transform(self, img, border):
# Center """Center."""
C = np.eye(3, dtype=np.float32) C = np.eye(3, dtype=np.float32)
C[0, 2] = -img.shape[1] / 2 # x translation (pixels) C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
@ -253,7 +253,7 @@ class RandomPerspective:
# Combined rotation matrix # Combined rotation matrix
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
# affine image # Affine image
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
if self.perspective: if self.perspective:
img = cv2.warpPerspective(img, M, dsize=self.size, borderValue=(114, 114, 114)) img = cv2.warpPerspective(img, M, dsize=self.size, borderValue=(114, 114, 114))
@ -281,7 +281,7 @@ class RandomPerspective:
xy = xy @ M.T # transform xy = xy @ M.T # transform
xy = (xy[:, :2] / xy[:, 2:3] if self.perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine xy = (xy[:, :2] / xy[:, 2:3] if self.perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
# create new boxes # Create new boxes
x = xy[:, [0, 2, 4, 6]] x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]] y = xy[:, [1, 3, 5, 7]]
return np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1)), dtype=bboxes.dtype).reshape(4, n).T return np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1)), dtype=bboxes.dtype).reshape(4, n).T
@ -348,7 +348,7 @@ class RandomPerspective:
img = labels['img'] img = labels['img']
cls = labels['cls'] cls = labels['cls']
instances = labels.pop('instances') instances = labels.pop('instances')
# make sure the coord formats are right # Make sure the coord formats are right
instances.convert_bbox(format='xyxy') instances.convert_bbox(format='xyxy')
instances.denormalize(*img.shape[:2][::-1]) instances.denormalize(*img.shape[:2][::-1])
@ -362,19 +362,19 @@ class RandomPerspective:
segments = instances.segments segments = instances.segments
keypoints = instances.keypoints keypoints = instances.keypoints
# update bboxes if there are segments. # Update bboxes if there are segments.
if len(segments): if len(segments):
bboxes, segments = self.apply_segments(segments, M) bboxes, segments = self.apply_segments(segments, M)
if keypoints is not None: if keypoints is not None:
keypoints = self.apply_keypoints(keypoints, M) keypoints = self.apply_keypoints(keypoints, M)
new_instances = Instances(bboxes, segments, keypoints, bbox_format='xyxy', normalized=False) new_instances = Instances(bboxes, segments, keypoints, bbox_format='xyxy', normalized=False)
# clip # Clip
new_instances.clip(*self.size) new_instances.clip(*self.size)
# filter instances # Filter instances
instances.scale(scale_w=scale, scale_h=scale, bbox_only=True) instances.scale(scale_w=scale, scale_h=scale, bbox_only=True)
# make the bboxes have the same scale with new_bboxes # Make the bboxes have the same scale with new_bboxes
i = self.box_candidates(box1=instances.bboxes.T, i = self.box_candidates(box1=instances.bboxes.T,
box2=new_instances.bboxes.T, box2=new_instances.bboxes.T,
area_thr=0.01 if len(segments) else 0.10) area_thr=0.01 if len(segments) else 0.10)
@ -441,7 +441,7 @@ class RandomFlip:
if self.direction == 'horizontal' and random.random() < self.p: if self.direction == 'horizontal' and random.random() < self.p:
img = np.fliplr(img) img = np.fliplr(img)
instances.fliplr(w) instances.fliplr(w)
# for keypoints # For keypoints
if self.flip_idx is not None and instances.keypoints is not None: if self.flip_idx is not None and instances.keypoints is not None:
instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :]) instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
labels['img'] = np.ascontiguousarray(img) labels['img'] = np.ascontiguousarray(img)
@ -450,7 +450,7 @@ class RandomFlip:
class LetterBox: class LetterBox:
"""Resize image and padding for detection, instance segmentation, pose""" """Resize image and padding for detection, instance segmentation, pose."""
def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, stride=32): def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, stride=32):
self.new_shape = new_shape self.new_shape = new_shape
@ -505,7 +505,7 @@ class LetterBox:
return img return img
def _update_labels(self, labels, ratio, padw, padh): def _update_labels(self, labels, ratio, padw, padh):
"""Update labels""" """Update labels."""
labels['instances'].convert_bbox(format='xyxy') labels['instances'].convert_bbox(format='xyxy')
labels['instances'].denormalize(*labels['img'].shape[:2][::-1]) labels['instances'].denormalize(*labels['img'].shape[:2][::-1])
labels['instances'].scale(*ratio) labels['instances'].scale(*ratio)
@ -519,7 +519,7 @@ class CopyPaste:
self.p = p self.p = p
def __call__(self, labels): def __call__(self, labels):
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy) """Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)."""
im = labels['img'] im = labels['img']
cls = labels['cls'] cls = labels['cls']
h, w = im.shape[:2] h, w = im.shape[:2]
@ -531,7 +531,7 @@ class CopyPaste:
_, w, _ = im.shape # height, width, channels _, w, _ = im.shape # height, width, channels
im_new = np.zeros(im.shape, np.uint8) im_new = np.zeros(im.shape, np.uint8)
# calculate ioa first then select indexes randomly # Calculate ioa first then select indexes randomly
ins_flip = deepcopy(instances) ins_flip = deepcopy(instances)
ins_flip.fliplr(w) ins_flip.fliplr(w)
@ -641,7 +641,7 @@ class Format:
labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4)) labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
if self.return_keypoint: if self.return_keypoint:
labels['keypoints'] = torch.from_numpy(instances.keypoints) labels['keypoints'] = torch.from_numpy(instances.keypoints)
# then we can use collate_fn # Then we can use collate_fn
if self.batch_idx: if self.batch_idx:
labels['batch_idx'] = torch.zeros(nl) labels['batch_idx'] = torch.zeros(nl)
return labels return labels
@ -654,7 +654,7 @@ class Format:
return img return img
def _format_segments(self, instances, cls, w, h): def _format_segments(self, instances, cls, w, h):
"""convert polygon points to bitmap""" """convert polygon points to bitmap."""
segments = instances.segments segments = instances.segments
if self.mask_overlap: if self.mask_overlap:
masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=self.mask_ratio) masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=self.mask_ratio)

@ -70,7 +70,7 @@ class BaseDataset(Dataset):
self.ni = len(self.labels) self.ni = len(self.labels)
# rect stuff # Rect stuff
self.rect = rect self.rect = rect
self.batch_size = batch_size self.batch_size = batch_size
self.stride = stride self.stride = stride
@ -79,13 +79,13 @@ class BaseDataset(Dataset):
assert self.batch_size is not None assert self.batch_size is not None
self.set_rectangle() self.set_rectangle()
# cache stuff # Cache stuff
self.ims = [None] * self.ni self.ims = [None] * self.ni
self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files] self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files]
if cache: if cache:
self.cache_images(cache) self.cache_images(cache)
# transforms # Transforms
self.transforms = self.build_transforms(hyp=hyp) self.transforms = self.build_transforms(hyp=hyp)
def get_img_files(self, img_path): def get_img_files(self, img_path):
@ -96,13 +96,13 @@ class BaseDataset(Dataset):
p = Path(p) # os-agnostic p = Path(p) # os-agnostic
if p.is_dir(): # dir if p.is_dir(): # dir
f += glob.glob(str(p / '**' / '*.*'), recursive=True) f += glob.glob(str(p / '**' / '*.*'), recursive=True)
# f = list(p.rglob('*.*')) # pathlib # F = list(p.rglob('*.*')) # pathlib
elif p.is_file(): # file elif p.is_file(): # file
with open(p) as t: with open(p) as t:
t = t.read().strip().splitlines() t = t.read().strip().splitlines()
parent = str(p.parent) + os.sep parent = str(p.parent) + os.sep
f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
# f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib) # F += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
else: else:
raise FileNotFoundError(f'{self.prefix}{p} does not exist') raise FileNotFoundError(f'{self.prefix}{p} does not exist')
im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS) im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS)
@ -113,7 +113,7 @@ class BaseDataset(Dataset):
return im_files return im_files
def update_labels(self, include_class: Optional[list]): def update_labels(self, include_class: Optional[list]):
"""include_class, filter labels to include only these classes (optional)""" """include_class, filter labels to include only these classes (optional)."""
include_class_array = np.array(include_class).reshape(1, -1) include_class_array = np.array(include_class).reshape(1, -1)
for i in range(len(self.labels)): for i in range(len(self.labels)):
if include_class is not None: if include_class is not None:
@ -129,7 +129,7 @@ class BaseDataset(Dataset):
self.labels[i]['cls'][:, 0] = 0 self.labels[i]['cls'][:, 0] = 0
def load_image(self, i): def load_image(self, i):
# Loads 1 image from dataset index 'i', returns (im, resized hw) """Loads 1 image from dataset index 'i', returns (im, resized hw)."""
im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i] im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i]
if im is None: # not cached in RAM if im is None: # not cached in RAM
if fn.exists(): # load npy if fn.exists(): # load npy
@ -147,7 +147,7 @@ class BaseDataset(Dataset):
return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized
def cache_images(self, cache): def cache_images(self, cache):
# cache images to memory or disk """Cache images to memory or disk."""
gb = 0 # Gigabytes of cached images gb = 0 # Gigabytes of cached images
self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni
fcn = self.cache_images_to_disk if cache == 'disk' else self.load_image fcn = self.cache_images_to_disk if cache == 'disk' else self.load_image
@ -164,7 +164,7 @@ class BaseDataset(Dataset):
pbar.close() pbar.close()
def cache_images_to_disk(self, i): def cache_images_to_disk(self, i):
# Saves an image as an *.npy file for faster loading """Saves an image as an *.npy file for faster loading."""
f = self.npy_files[i] f = self.npy_files[i]
if not f.exists(): if not f.exists():
np.save(f.as_posix(), cv2.imread(self.im_files[i])) np.save(f.as_posix(), cv2.imread(self.im_files[i]))
@ -211,17 +211,17 @@ class BaseDataset(Dataset):
return len(self.labels) return len(self.labels)
def update_labels_info(self, label): def update_labels_info(self, label):
"""custom your label format here""" """custom your label format here."""
return label return label
def build_transforms(self, hyp=None): def build_transforms(self, hyp=None):
"""Users can custom augmentations here """Users can custom augmentations here
like: like:
if self.augment: if self.augment:
# training transforms # Training transforms
return Compose([]) return Compose([])
else: else:
# val transforms # Val transforms
return Compose([]) return Compose([])
""" """
raise NotImplementedError raise NotImplementedError

@ -104,7 +104,7 @@ def build_dataloader(cfg, batch, img_path, data_info, stride=32, rect=False, ran
generator=generator), dataset generator=generator), dataset
# build classification # Build classification
# TODO: using cfg like `build_dataloader` # TODO: using cfg like `build_dataloader`
def build_classification_dataloader(path, def build_classification_dataloader(path,
imgsz=224, imgsz=224,
@ -114,7 +114,7 @@ def build_classification_dataloader(path,
rank=-1, rank=-1,
workers=8, workers=8,
shuffle=True): shuffle=True):
# Returns Dataloader object to be used with YOLOv5 Classifier """Returns Dataloader object to be used with YOLOv5 Classifier."""
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
dataset = ClassificationDataset(root=path, imgsz=imgsz, augment=augment, cache=cache) dataset = ClassificationDataset(root=path, imgsz=imgsz, augment=augment, cache=cache)
batch_size = min(batch_size, len(dataset)) batch_size = min(batch_size, len(dataset))

@ -70,7 +70,7 @@ class LoadStreams:
self.threads[i].start() self.threads[i].start()
LOGGER.info('') # newline LOGGER.info('') # newline
# check for common shapes # Check for common shapes
s = np.stack([LetterBox(imgsz, auto, stride=stride)(image=x).shape for x in self.imgs]) s = np.stack([LetterBox(imgsz, auto, stride=stride)(image=x).shape for x in self.imgs])
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
self.auto = auto and self.rect self.auto = auto and self.rect
@ -81,7 +81,7 @@ class LoadStreams:
LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.') LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.')
def update(self, i, cap, stream): def update(self, i, cap, stream):
# Read stream `i` frames in daemon thread """Read stream `i` frames in daemon thread."""
n, f = 0, self.frames[i] # frame number, frame array n, f = 0, self.frames[i] # frame number, frame array
while cap.isOpened() and n < f: while cap.isOpened() and n < f:
n += 1 n += 1
@ -123,7 +123,7 @@ class LoadStreams:
class LoadScreenshots: class LoadScreenshots:
# YOLOv8 screenshot dataloader, i.e. `yolo predict source=screen` # YOLOv8 screenshot dataloader, i.e. `yolo predict source=screen`
def __init__(self, source, imgsz=640, stride=32, auto=True, transforms=None): def __init__(self, source, imgsz=640, stride=32, auto=True, transforms=None):
# source = [screen_number left top width height] (pixels) """source = [screen_number left top width height] (pixels)."""
check_requirements('mss') check_requirements('mss')
import mss # noqa import mss # noqa
@ -156,7 +156,7 @@ class LoadScreenshots:
return self return self
def __next__(self): def __next__(self):
# mss screen capture: get raw pixels from the screen as np array """mss screen capture: get raw pixels from the screen as np array."""
im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR
s = f'screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: ' s = f'screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: '
@ -256,7 +256,7 @@ class LoadImages:
return path, im, im0, self.cap, s return path, im, im0, self.cap, s
def _new_video(self, path): def _new_video(self, path):
# Create a new video capture object """Create a new video capture object."""
self.frame = 0 self.frame = 0
self.cap = cv2.VideoCapture(path) self.cap = cv2.VideoCapture(path)
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride) self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
@ -266,7 +266,7 @@ class LoadImages:
# self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0)
def _cv2_rotate(self, im): def _cv2_rotate(self, im):
# Rotate a cv2 video manually """Rotate a cv2 video manually."""
if self.orientation == 0: if self.orientation == 0:
return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE) return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE)
elif self.orientation == 180: elif self.orientation == 180:
@ -291,7 +291,7 @@ class LoadPilAndNumpy:
self.auto = auto self.auto = auto
self.transforms = transforms self.transforms = transforms
self.mode = 'image' self.mode = 'image'
# generate fake paths # Generate fake paths
self.bs = len(self.im0) self.bs = len(self.im0)
@staticmethod @staticmethod

@ -55,19 +55,19 @@ class Albumentations:
def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False): def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False):
# Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = (x - mean) / std """Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = (x - mean) / std."""
return TF.normalize(x, mean, std, inplace=inplace) return TF.normalize(x, mean, std, inplace=inplace)
def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD): def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD):
# Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = x * std + mean """Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = x * std + mean."""
for i in range(3): for i in range(3):
x[:, i] = x[:, i] * std[i] + mean[i] x[:, i] = x[:, i] * std[i] + mean[i]
return x return x
def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5): def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
# HSV color-space augmentation """HSV color-space augmentation."""
if hgain or sgain or vgain: if hgain or sgain or vgain:
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV)) hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
@ -83,7 +83,7 @@ def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
def hist_equalize(im, clahe=True, bgr=False): def hist_equalize(im, clahe=True, bgr=False):
# Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255 """Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255."""
yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV) yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
if clahe: if clahe:
c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
@ -94,7 +94,7 @@ def hist_equalize(im, clahe=True, bgr=False):
def replicate(im, labels): def replicate(im, labels):
# Replicate labels """Replicate labels."""
h, w = im.shape[:2] h, w = im.shape[:2]
boxes = labels[:, 1:].astype(int) boxes = labels[:, 1:].astype(int)
x1, y1, x2, y2 = boxes.T x1, y1, x2, y2 = boxes.T
@ -213,7 +213,7 @@ def random_perspective(im,
xy = xy @ M.T # transform xy = xy @ M.T # transform
xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine
# clip # Clip
new[i] = segment2box(xy, width, height) new[i] = segment2box(xy, width, height)
else: # warp boxes else: # warp boxes
@ -222,16 +222,16 @@ def random_perspective(im,
xy = xy @ M.T # transform xy = xy @ M.T # transform
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
# create new boxes # Create new boxes
x = xy[:, [0, 2, 4, 6]] x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]] y = xy[:, [1, 3, 5, 7]]
new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# clip # Clip
new[:, [0, 2]] = new[:, [0, 2]].clip(0, width) new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
new[:, [1, 3]] = new[:, [1, 3]].clip(0, height) new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
# filter candidates # Filter candidates
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10) i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
targets = targets[i] targets = targets[i]
targets[:, 1:5] = new[i] targets[:, 1:5] = new[i]
@ -240,13 +240,13 @@ def random_perspective(im,
def copy_paste(im, labels, segments, p=0.5): def copy_paste(im, labels, segments, p=0.5):
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy) """Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)."""
n = len(segments) n = len(segments)
if p and n: if p and n:
h, w, c = im.shape # height, width, channels h, w, c = im.shape # height, width, channels
im_new = np.zeros(im.shape, np.uint8) im_new = np.zeros(im.shape, np.uint8)
# calculate ioa first then select indexes randomly # Calculate ioa first then select indexes randomly
boxes = np.stack([w - labels[:, 3], labels[:, 2], w - labels[:, 1], labels[:, 4]], axis=-1) # (n, 4) boxes = np.stack([w - labels[:, 3], labels[:, 2], w - labels[:, 1], labels[:, 4]], axis=-1) # (n, 4)
ioa = bbox_ioa(boxes, labels[:, 1:5]) # intersection over area ioa = bbox_ioa(boxes, labels[:, 1:5]) # intersection over area
indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, ) indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, )
@ -265,7 +265,7 @@ def copy_paste(im, labels, segments, p=0.5):
def cutout(im, labels, p=0.5): def cutout(im, labels, p=0.5):
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552 """Applies image cutout augmentation https://arxiv.org/abs/1708.04552."""
if random.random() < p: if random.random() < p:
h, w = im.shape[:2] h, w = im.shape[:2]
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
@ -273,16 +273,16 @@ def cutout(im, labels, p=0.5):
mask_h = random.randint(1, int(h * s)) # create random masks mask_h = random.randint(1, int(h * s)) # create random masks
mask_w = random.randint(1, int(w * s)) mask_w = random.randint(1, int(w * s))
# box # Box
xmin = max(0, random.randint(0, w) - mask_w // 2) xmin = max(0, random.randint(0, w) - mask_w // 2)
ymin = max(0, random.randint(0, h) - mask_h // 2) ymin = max(0, random.randint(0, h) - mask_h // 2)
xmax = min(w, xmin + mask_w) xmax = min(w, xmin + mask_w)
ymax = min(h, ymin + mask_h) ymax = min(h, ymin + mask_h)
# apply random color mask # Apply random color mask
im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)] im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
# return unobscured labels # Return unobscured labels
if len(labels) and s > 0.03: if len(labels) and s > 0.03:
box = np.array([[xmin, ymin, xmax, ymax]], dtype=np.float32) box = np.array([[xmin, ymin, xmax, ymax]], dtype=np.float32)
ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h))[0] # intersection over area ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h))[0] # intersection over area
@ -292,7 +292,7 @@ def cutout(im, labels, p=0.5):
def mixup(im, labels, im2, labels2): def mixup(im, labels, im2, labels2):
# Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf """Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf."""
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
im = (im * r + im2 * (1 - r)).astype(np.uint8) im = (im * r + im2 * (1 - r)).astype(np.uint8)
labels = np.concatenate((labels, labels2), 0) labels = np.concatenate((labels, labels2), 0)
@ -350,7 +350,7 @@ def classify_albumentations(
def classify_transforms(size=224): def classify_transforms(size=224):
# Transforms to apply if albumentations not installed """Transforms to apply if albumentations not installed."""
assert isinstance(size, int), f'ERROR: classify_transforms size {size} must be integer, not (list, tuple)' assert isinstance(size, int), f'ERROR: classify_transforms size {size} must be integer, not (list, tuple)'
# T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) # T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])

@ -50,7 +50,7 @@ for orientation in ExifTags.TAGS.keys():
def get_hash(paths): def get_hash(paths):
# Returns a single hash value of a list of paths (files or dirs) """Returns a single hash value of a list of paths (files or dirs)."""
size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
h = hashlib.sha256(str(size).encode()) # hash sizes h = hashlib.sha256(str(size).encode()) # hash sizes
h.update(''.join(paths).encode()) # hash paths h.update(''.join(paths).encode()) # hash paths
@ -58,7 +58,7 @@ def get_hash(paths):
def exif_size(img): def exif_size(img):
# Returns exif-corrected PIL size """Returns exif-corrected PIL size."""
s = img.size # (width, height) s = img.size # (width, height)
with contextlib.suppress(Exception): with contextlib.suppress(Exception):
rotation = dict(img._getexif().items())[orientation] rotation = dict(img._getexif().items())[orientation]
@ -94,7 +94,7 @@ def exif_transpose(image):
def seed_worker(worker_id): def seed_worker(worker_id):
# Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader """Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader."""
worker_seed = torch.initial_seed() % 2 ** 32 worker_seed = torch.initial_seed() % 2 ** 32
np.random.seed(worker_seed) np.random.seed(worker_seed)
random.seed(worker_seed) random.seed(worker_seed)
@ -156,7 +156,7 @@ def create_dataloader(path,
class InfiniteDataLoader(dataloader.DataLoader): class InfiniteDataLoader(dataloader.DataLoader):
""" Dataloader that reuses workers """Dataloader that reuses workers
Uses same syntax as vanilla DataLoader Uses same syntax as vanilla DataLoader
""" """
@ -175,7 +175,7 @@ class InfiniteDataLoader(dataloader.DataLoader):
class _RepeatSampler: class _RepeatSampler:
""" Sampler that repeats forever """Sampler that repeats forever
Args: Args:
sampler (Dataset.sampler): The sampler to repeat. sampler (Dataset.sampler): The sampler to repeat.
@ -192,7 +192,7 @@ class _RepeatSampler:
class LoadScreenshots: class LoadScreenshots:
# YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"` # YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"`
def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None): def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None):
# source = [screen_number left top width height] (pixels) """source = [screen_number left top width height] (pixels)."""
check_requirements('mss') check_requirements('mss')
import mss import mss
@ -224,7 +224,7 @@ class LoadScreenshots:
return self return self
def __next__(self): def __next__(self):
# mss screen capture: get raw pixels from the screen as np array """mss screen capture: get raw pixels from the screen as np array."""
im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR
s = f'screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: ' s = f'screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: '
@ -320,7 +320,7 @@ class LoadImages:
return path, im, im0, self.cap, s return path, im, im0, self.cap, s
def _new_video(self, path): def _new_video(self, path):
# Create a new video capture object """Create a new video capture object."""
self.frame = 0 self.frame = 0
self.cap = cv2.VideoCapture(path) self.cap = cv2.VideoCapture(path)
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride) self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
@ -328,7 +328,7 @@ class LoadImages:
# self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # disable https://github.com/ultralytics/yolov5/issues/8493 # self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # disable https://github.com/ultralytics/yolov5/issues/8493
def _cv2_rotate(self, im): def _cv2_rotate(self, im):
# Rotate a cv2 video manually """Rotate a cv2 video manually."""
if self.orientation == 0: if self.orientation == 0:
return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE) return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE)
elif self.orientation == 180: elif self.orientation == 180:
@ -379,7 +379,7 @@ class LoadStreams:
self.threads[i].start() self.threads[i].start()
LOGGER.info('') # newline LOGGER.info('') # newline
# check for common shapes # Check for common shapes
s = np.stack([letterbox(x, img_size, stride=stride, auto=auto)[0].shape for x in self.imgs]) s = np.stack([letterbox(x, img_size, stride=stride, auto=auto)[0].shape for x in self.imgs])
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
self.auto = auto and self.rect self.auto = auto and self.rect
@ -388,7 +388,7 @@ class LoadStreams:
LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.') LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.')
def update(self, i, cap, stream): def update(self, i, cap, stream):
# Read stream `i` frames in daemon thread """Read stream `i` frames in daemon thread."""
n, f = 0, self.frames[i] # frame number, frame array n, f = 0, self.frames[i] # frame number, frame array
while cap.isOpened() and n < f: while cap.isOpened() and n < f:
n += 1 n += 1
@ -428,13 +428,13 @@ class LoadStreams:
def img2label_paths(img_paths): def img2label_paths(img_paths):
# Define label paths as a function of image paths """Define label paths as a function of image paths."""
sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}labels{os.sep}' # /images/, /labels/ substrings sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}labels{os.sep}' # /images/, /labels/ substrings
return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths] return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]
class LoadImagesAndLabels(Dataset): class LoadImagesAndLabels(Dataset):
# YOLOv5 train_loader/val_loader, loads images and labels for training and validation """YOLOv5 train_loader/val_loader, loads images and labels for training and validation."""
cache_version = 0.6 # dataset labels *.cache version cache_version = 0.6 # dataset labels *.cache version
rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4] rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4]
@ -590,7 +590,7 @@ class LoadImagesAndLabels(Dataset):
pbar.close() pbar.close()
def check_cache_ram(self, safety_margin=0.1, prefix=''): def check_cache_ram(self, safety_margin=0.1, prefix=''):
# Check image caching requirements vs available memory """Check image caching requirements vs available memory."""
b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
n = min(self.n, 30) # extrapolate from 30 random images n = min(self.n, 30) # extrapolate from 30 random images
for _ in range(n): for _ in range(n):
@ -648,12 +648,6 @@ class LoadImagesAndLabels(Dataset):
def __len__(self): def __len__(self):
return len(self.im_files) return len(self.im_files)
# def __iter__(self):
# self.count = -1
# print('ran dataset iter')
# #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
# return self
def __getitem__(self, index): def __getitem__(self, index):
index = self.indices[index] # linear, shuffled, or image_weights index = self.indices[index] # linear, shuffled, or image_weights
@ -729,7 +723,7 @@ class LoadImagesAndLabels(Dataset):
return torch.from_numpy(img), labels_out, self.im_files[index], shapes return torch.from_numpy(img), labels_out, self.im_files[index], shapes
def load_image(self, i): def load_image(self, i):
# Loads 1 image from dataset index 'i', returns (im, original hw, resized hw) """Loads 1 image from dataset index 'i', returns (im, original hw, resized hw)."""
im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i], im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i],
if im is None: # not cached in RAM if im is None: # not cached in RAM
if fn.exists(): # load npy if fn.exists(): # load npy
@ -746,13 +740,13 @@ class LoadImagesAndLabels(Dataset):
return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized
def cache_images_to_disk(self, i): def cache_images_to_disk(self, i):
# Saves an image as an *.npy file for faster loading """Saves an image as an *.npy file for faster loading."""
f = self.npy_files[i] f = self.npy_files[i]
if not f.exists(): if not f.exists():
np.save(f.as_posix(), cv2.imread(self.im_files[i])) np.save(f.as_posix(), cv2.imread(self.im_files[i]))
def load_mosaic(self, index): def load_mosaic(self, index):
# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic """YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic."""
labels4, segments4 = [], [] labels4, segments4 = [], []
s = self.img_size s = self.img_size
yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y
@ -762,7 +756,7 @@ class LoadImagesAndLabels(Dataset):
# Load image # Load image
img, _, (h, w) = self.load_image(index) img, _, (h, w) = self.load_image(index)
# place img in img4 # Place img in img4
if i == 0: # top left if i == 0: # top left
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
@ -810,7 +804,7 @@ class LoadImagesAndLabels(Dataset):
return img4, labels4 return img4, labels4
def load_mosaic9(self, index): def load_mosaic9(self, index):
# YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic """YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic."""
labels9, segments9 = [], [] labels9, segments9 = [], []
s = self.img_size s = self.img_size
indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices
@ -820,7 +814,7 @@ class LoadImagesAndLabels(Dataset):
# Load image # Load image
img, _, (h, w) = self.load_image(index) img, _, (h, w) = self.load_image(index)
# place img in img9 # Place img in img9
if i == 0: # center if i == 0: # center
img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
h0, w0 = h, w h0, w0 = h, w
@ -888,7 +882,7 @@ class LoadImagesAndLabels(Dataset):
@staticmethod @staticmethod
def collate_fn(batch): def collate_fn(batch):
# YOLOv8 collate function, outputs dict """YOLOv8 collate function, outputs dict."""
im, label, path, shapes = zip(*batch) # transposed im, label, path, shapes = zip(*batch) # transposed
for i, lb in enumerate(label): for i, lb in enumerate(label):
lb[:, 0] = i # add target image index for build_targets() lb[:, 0] = i # add target image index for build_targets()
@ -904,7 +898,7 @@ class LoadImagesAndLabels(Dataset):
@staticmethod @staticmethod
def collate_fn_old(batch): def collate_fn_old(batch):
# YOLOv5 original collate function """YOLOv5 original collate function."""
im, label, path, shapes = zip(*batch) # transposed im, label, path, shapes = zip(*batch) # transposed
for i, lb in enumerate(label): for i, lb in enumerate(label):
lb[:, 0] = i # add target image index for build_targets() lb[:, 0] = i # add target image index for build_targets()
@ -913,7 +907,7 @@ class LoadImagesAndLabels(Dataset):
# Ancillary functions -------------------------------------------------------------------------------------------------- # Ancillary functions --------------------------------------------------------------------------------------------------
def flatten_recursive(path=DATASETS_DIR / 'coco128'): def flatten_recursive(path=DATASETS_DIR / 'coco128'):
# Flatten a recursive directory by bringing all files to top level """Flatten a recursive directory by bringing all files to top level."""
new_path = Path(f'{str(path)}_flat') new_path = Path(f'{str(path)}_flat')
if os.path.exists(new_path): if os.path.exists(new_path):
shutil.rmtree(new_path) # delete output folder shutil.rmtree(new_path) # delete output folder
@ -930,11 +924,11 @@ def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.dataloaders impo
n = len(files) # number of files n = len(files) # number of files
for im_file in tqdm(files, total=n): for im_file in tqdm(files, total=n):
if im_file.suffix[1:] in IMG_FORMATS: if im_file.suffix[1:] in IMG_FORMATS:
# image # Image
im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB
h, w = im.shape[:2] h, w = im.shape[:2]
# labels # Labels
lb_file = Path(img2label_paths([str(im_file)])[0]) lb_file = Path(img2label_paths([str(im_file)])[0])
if Path(lb_file).exists(): if Path(lb_file).exists():
with open(lb_file) as f: with open(lb_file) as f:
@ -947,7 +941,7 @@ def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.dataloaders impo
f.parent.mkdir(parents=True) f.parent.mkdir(parents=True)
b = x[1:] * [w, h, w, h] # box b = x[1:] * [w, h, w, h] # box
# b[2:] = b[2:].max() # rectangle to square # B[2:] = b[2:].max() # rectangle to square
b[2:] = b[2:] * 1.2 + 3 # pad b[2:] = b[2:] * 1.2 + 3 # pad
b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(int) b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(int)
@ -957,7 +951,7 @@ def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.dataloaders impo
def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False): def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
""" Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files """Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
Usage: from utils.dataloaders import *; autosplit() Usage: from utils.dataloaders import *; autosplit()
Arguments Arguments
path: Path to images directory path: Path to images directory
@ -983,11 +977,11 @@ def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), ann
def verify_image_label(args): def verify_image_label(args):
# Verify one image-label pair """Verify one image-label pair."""
im_file, lb_file, prefix = args im_file, lb_file, prefix = args
nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', [] # number (missing, found, empty, corrupt), message, segments nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', [] # number (missing, found, empty, corrupt), message, segments
try: try:
# verify images # Verify images
im = Image.open(im_file) im = Image.open(im_file)
im.verify() # PIL verify im.verify() # PIL verify
shape = exif_size(im) # image size shape = exif_size(im) # image size
@ -1000,7 +994,7 @@ def verify_image_label(args):
ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100) ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100)
msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved' msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved'
# verify labels # Verify labels
if os.path.isfile(lb_file): if os.path.isfile(lb_file):
nf = 1 # label found nf = 1 # label found
with open(lb_file) as f: with open(lb_file) as f:
@ -1077,7 +1071,7 @@ def create_classification_dataloader(path,
rank=-1, rank=-1,
workers=8, workers=8,
shuffle=True): shuffle=True):
# Returns Dataloader object to be used with YOLOv5 Classifier """Returns Dataloader object to be used with YOLOv5 Classifier."""
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
dataset = ClassificationDataset(root=path, imgsz=imgsz, augment=augment, cache=cache) dataset = ClassificationDataset(root=path, imgsz=imgsz, augment=augment, cache=cache)
batch_size = min(batch_size, len(dataset)) batch_size = min(batch_size, len(dataset))

@ -193,7 +193,7 @@ class YOLODataset(BaseDataset):
self.transforms = self.build_transforms(hyp) self.transforms = self.build_transforms(hyp)
def update_labels_info(self, label): def update_labels_info(self, label):
"""custom your label format here""" """custom your label format here."""
# NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label # NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
# we can make it also support classification and semantic segmentation by add or remove some dict keys there. # we can make it also support classification and semantic segmentation by add or remove some dict keys there.
bboxes = label.pop('bboxes') bboxes = label.pop('bboxes')

@ -39,7 +39,7 @@ class MixAndRectDataset:
""" """
labels = deepcopy(self.dataset[index]) labels = deepcopy(self.dataset[index])
for transform in self.dataset.transforms.tolist(): for transform in self.dataset.transforms.tolist():
# mosaic and mixup # Mosaic and mixup
if hasattr(transform, 'get_indexes'): if hasattr(transform, 'get_indexes'):
indexes = transform.get_indexes(self.dataset) indexes = transform.get_indexes(self.dataset)
if not isinstance(indexes, collections.abc.Sequence): if not isinstance(indexes, collections.abc.Sequence):

@ -37,13 +37,13 @@ for orientation in ExifTags.TAGS.keys():
def img2label_paths(img_paths): def img2label_paths(img_paths):
# Define label paths as a function of image paths """Define label paths as a function of image paths."""
sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}labels{os.sep}' # /images/, /labels/ substrings sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}labels{os.sep}' # /images/, /labels/ substrings
return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths] return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]
def get_hash(paths): def get_hash(paths):
# Returns a single hash value of a list of paths (files or dirs) """Returns a single hash value of a list of paths (files or dirs)."""
size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
h = hashlib.sha256(str(size).encode()) # hash sizes h = hashlib.sha256(str(size).encode()) # hash sizes
h.update(''.join(paths).encode()) # hash paths h.update(''.join(paths).encode()) # hash paths
@ -51,7 +51,7 @@ def get_hash(paths):
def exif_size(img): def exif_size(img):
# Returns exif-corrected PIL size """Returns exif-corrected PIL size."""
s = img.size # (width, height) s = img.size # (width, height)
with contextlib.suppress(Exception): with contextlib.suppress(Exception):
rotation = dict(img._getexif().items())[orientation] rotation = dict(img._getexif().items())[orientation]
@ -61,12 +61,12 @@ def exif_size(img):
def verify_image_label(args): def verify_image_label(args):
# Verify one image-label pair """Verify one image-label pair."""
im_file, lb_file, prefix, keypoint, num_cls, nkpt, ndim = args im_file, lb_file, prefix, keypoint, num_cls, nkpt, ndim = args
# number (missing, found, empty, corrupt), message, segments, keypoints # Number (missing, found, empty, corrupt), message, segments, keypoints
nm, nf, ne, nc, msg, segments, keypoints = 0, 0, 0, 0, '', [], None nm, nf, ne, nc, msg, segments, keypoints = 0, 0, 0, 0, '', [], None
try: try:
# verify images # Verify images
im = Image.open(im_file) im = Image.open(im_file)
im.verify() # PIL verify im.verify() # PIL verify
shape = exif_size(im) # image size shape = exif_size(im) # image size
@ -80,7 +80,7 @@ def verify_image_label(args):
ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100) ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100)
msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved' msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved'
# verify labels # Verify labels
if os.path.isfile(lb_file): if os.path.isfile(lb_file):
nf = 1 # label found nf = 1 # label found
with open(lb_file) as f: with open(lb_file) as f:
@ -191,7 +191,7 @@ def polygons2masks_overlap(imgsz, segments, downsample_ratio=1):
def check_det_dataset(dataset, autodownload=True): def check_det_dataset(dataset, autodownload=True):
# Download, check and/or unzip dataset if not found locally """Download, check and/or unzip dataset if not found locally."""
data = check_file(dataset) data = check_file(dataset)
# Download (optional) # Download (optional)
@ -321,7 +321,7 @@ class HUBDatasetStats():
""" """
def __init__(self, path='coco128.yaml', autodownload=False): def __init__(self, path='coco128.yaml', autodownload=False):
# Initialize class """Initialize class."""
zipped, data_dir, yaml_path = self._unzip(Path(path)) zipped, data_dir, yaml_path = self._unzip(Path(path))
try: try:
# data = yaml_load(check_yaml(yaml_path)) # data dict # data = yaml_load(check_yaml(yaml_path)) # data dict
@ -339,7 +339,7 @@ class HUBDatasetStats():
@staticmethod @staticmethod
def _find_yaml(dir): def _find_yaml(dir):
# Return data.yaml file """Return data.yaml file."""
files = list(dir.glob('*.yaml')) or list(dir.rglob('*.yaml')) # try root level first and then recursive files = list(dir.glob('*.yaml')) or list(dir.rglob('*.yaml')) # try root level first and then recursive
assert files, f'No *.yaml file found in {dir}' assert files, f'No *.yaml file found in {dir}'
if len(files) > 1: if len(files) > 1:
@ -349,7 +349,7 @@ class HUBDatasetStats():
return files[0] return files[0]
def _unzip(self, path): def _unzip(self, path):
# Unzip data.zip """Unzip data.zip."""
if not str(path).endswith('.zip'): # path is data.yaml if not str(path).endswith('.zip'): # path is data.yaml
return False, None, path return False, None, path
assert Path(path).is_file(), f'Error unzipping {path}, file not found' assert Path(path).is_file(), f'Error unzipping {path}, file not found'
@ -362,12 +362,12 @@ class HUBDatasetStats():
compress_one_image(f, self.im_dir / Path(f).name) # save to dataset-hub compress_one_image(f, self.im_dir / Path(f).name) # save to dataset-hub
def get_json(self, save=False, verbose=False): def get_json(self, save=False, verbose=False):
# Return dataset JSON for Ultralytics HUB """Return dataset JSON for Ultralytics HUB."""
# from ultralytics.yolo.data import YOLODataset # from ultralytics.yolo.data import YOLODataset
from ultralytics.yolo.data.dataloaders.v5loader import LoadImagesAndLabels from ultralytics.yolo.data.dataloaders.v5loader import LoadImagesAndLabels
def _round(labels): def _round(labels):
# Update labels to integer class and 6 decimal place floats """Update labels to integer class and 6 decimal place floats."""
return [[int(c), *(round(x, 4) for x in points)] for c, *points in labels] return [[int(c), *(round(x, 4) for x in points)] for c, *points in labels]
for split in 'train', 'val', 'test': for split in 'train', 'val', 'test':
@ -400,7 +400,7 @@ class HUBDatasetStats():
return self.stats return self.stats
def process_images(self): def process_images(self):
# Compress images for Ultralytics HUB """Compress images for Ultralytics HUB."""
# from ultralytics.yolo.data import YOLODataset # from ultralytics.yolo.data import YOLODataset
from ultralytics.yolo.data.dataloaders.v5loader import LoadImagesAndLabels from ultralytics.yolo.data.dataloaders.v5loader import LoadImagesAndLabels

@ -73,7 +73,7 @@ ARM64 = platform.machine() in ('arm64', 'aarch64')
def export_formats(): def export_formats():
"""YOLOv8 export formats""" """YOLOv8 export formats."""
import pandas import pandas
x = [ x = [
['PyTorch', '-', '.pt', True, True], ['PyTorch', '-', '.pt', True, True],
@ -92,7 +92,7 @@ def export_formats():
def gd_outputs(gd): def gd_outputs(gd):
"""TensorFlow GraphDef model output node names""" """TensorFlow GraphDef model output node names."""
name_list, input_list = [], [] name_list, input_list = [], []
for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef
name_list.append(node.name) name_list.append(node.name)
@ -101,7 +101,7 @@ def gd_outputs(gd):
def try_export(inner_func): def try_export(inner_func):
"""YOLOv8 export decorator, i..e @try_export""" """YOLOv8 export decorator, i..e @try_export."""
inner_args = get_default_args(inner_func) inner_args = get_default_args(inner_func)
def outer_func(*args, **kwargs): def outer_func(*args, **kwargs):
@ -119,7 +119,7 @@ def try_export(inner_func):
class iOSDetectModel(torch.nn.Module): class iOSDetectModel(torch.nn.Module):
"""Wrap an Ultralytics YOLO model for iOS export""" """Wrap an Ultralytics YOLO model for iOS export."""
def __init__(self, model, im): def __init__(self, model, im):
super().__init__() super().__init__()
@ -246,28 +246,28 @@ class Exporter:
# Exports # Exports
f = [''] * len(fmts) # exported filenames f = [''] * len(fmts) # exported filenames
if jit: # TorchScript if jit: # TorchScript
f[0], _ = self._export_torchscript() f[0], _ = self.export_torchscript()
if engine: # TensorRT required before ONNX if engine: # TensorRT required before ONNX
f[1], _ = self._export_engine() f[1], _ = self.export_engine()
if onnx or xml: # OpenVINO requires ONNX if onnx or xml: # OpenVINO requires ONNX
f[2], _ = self._export_onnx() f[2], _ = self.export_onnx()
if xml: # OpenVINO if xml: # OpenVINO
f[3], _ = self._export_openvino() f[3], _ = self.export_openvino()
if coreml: # CoreML if coreml: # CoreML
f[4], _ = self._export_coreml() f[4], _ = self.export_coreml()
if any((saved_model, pb, tflite, edgetpu, tfjs)): # TensorFlow formats if any((saved_model, pb, tflite, edgetpu, tfjs)): # TensorFlow formats
self.args.int8 |= edgetpu self.args.int8 |= edgetpu
f[5], s_model = self._export_saved_model() f[5], s_model = self.export_saved_model()
if pb or tfjs: # pb prerequisite to tfjs if pb or tfjs: # pb prerequisite to tfjs
f[6], _ = self._export_pb(s_model) f[6], _ = self.export_pb(s_model)
if tflite: if tflite:
f[7], _ = self._export_tflite(s_model, nms=False, agnostic_nms=self.args.agnostic_nms) f[7], _ = self.export_tflite(s_model, nms=False, agnostic_nms=self.args.agnostic_nms)
if edgetpu: if edgetpu:
f[8], _ = self._export_edgetpu(tflite_model=Path(f[5]) / f'{self.file.stem}_full_integer_quant.tflite') f[8], _ = self.export_edgetpu(tflite_model=Path(f[5]) / f'{self.file.stem}_full_integer_quant.tflite')
if tfjs: if tfjs:
f[9], _ = self._export_tfjs() f[9], _ = self.export_tfjs()
if paddle: # PaddlePaddle if paddle: # PaddlePaddle
f[10], _ = self._export_paddle() f[10], _ = self.export_paddle()
# Finish # Finish
f = [str(x) for x in f if x] # filter out '' and None f = [str(x) for x in f if x] # filter out '' and None
@ -289,8 +289,8 @@ class Exporter:
return f # return list of exported files/dirs return f # return list of exported files/dirs
@try_export @try_export
def _export_torchscript(self, prefix=colorstr('TorchScript:')): def export_torchscript(self, prefix=colorstr('TorchScript:')):
# YOLOv8 TorchScript model export """YOLOv8 TorchScript model export."""
LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...') LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...')
f = self.file.with_suffix('.torchscript') f = self.file.with_suffix('.torchscript')
@ -305,8 +305,8 @@ class Exporter:
return f, None return f, None
@try_export @try_export
def _export_onnx(self, prefix=colorstr('ONNX:')): def export_onnx(self, prefix=colorstr('ONNX:')):
# YOLOv8 ONNX export """YOLOv8 ONNX export."""
requirements = ['onnx>=1.12.0'] requirements = ['onnx>=1.12.0']
if self.args.simplify: if self.args.simplify:
requirements += ['onnxsim>=0.4.17', 'onnxruntime-gpu' if torch.cuda.is_available() else 'onnxruntime'] requirements += ['onnxsim>=0.4.17', 'onnxruntime-gpu' if torch.cuda.is_available() else 'onnxruntime']
@ -363,8 +363,8 @@ class Exporter:
return f, model_onnx return f, model_onnx
@try_export @try_export
def _export_openvino(self, prefix=colorstr('OpenVINO:')): def export_openvino(self, prefix=colorstr('OpenVINO:')):
# YOLOv8 OpenVINO export """YOLOv8 OpenVINO export."""
check_requirements('openvino-dev>=2022.3') # requires openvino-dev: https://pypi.org/project/openvino-dev/ check_requirements('openvino-dev>=2022.3') # requires openvino-dev: https://pypi.org/project/openvino-dev/
import openvino.runtime as ov # noqa import openvino.runtime as ov # noqa
from openvino.tools import mo # noqa from openvino.tools import mo # noqa
@ -383,8 +383,8 @@ class Exporter:
return f, None return f, None
@try_export @try_export
def _export_paddle(self, prefix=colorstr('PaddlePaddle:')): def export_paddle(self, prefix=colorstr('PaddlePaddle:')):
# YOLOv8 Paddle export """YOLOv8 Paddle export."""
check_requirements(('paddlepaddle', 'x2paddle')) check_requirements(('paddlepaddle', 'x2paddle'))
import x2paddle # noqa import x2paddle # noqa
from x2paddle.convert import pytorch2paddle # noqa from x2paddle.convert import pytorch2paddle # noqa
@ -397,8 +397,8 @@ class Exporter:
return f, None return f, None
@try_export @try_export
def _export_coreml(self, prefix=colorstr('CoreML:')): def export_coreml(self, prefix=colorstr('CoreML:')):
# YOLOv8 CoreML export """YOLOv8 CoreML export."""
check_requirements('coremltools>=6.0') check_requirements('coremltools>=6.0')
import coremltools as ct # noqa import coremltools as ct # noqa
@ -439,8 +439,8 @@ class Exporter:
return f, ct_model return f, ct_model
@try_export @try_export
def _export_engine(self, workspace=4, verbose=False, prefix=colorstr('TensorRT:')): def export_engine(self, workspace=4, verbose=False, prefix=colorstr('TensorRT:')):
# YOLOv8 TensorRT export https://developer.nvidia.com/tensorrt """YOLOv8 TensorRT export https://developer.nvidia.com/tensorrt."""
assert self.im.device.type != 'cpu', "export running on CPU but must be on GPU, i.e. use 'device=0'" assert self.im.device.type != 'cpu', "export running on CPU but must be on GPU, i.e. use 'device=0'"
try: try:
import tensorrt as trt # noqa import tensorrt as trt # noqa
@ -451,7 +451,7 @@ class Exporter:
check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=8.0.0 check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=8.0.0
self.args.simplify = True self.args.simplify = True
f_onnx, _ = self._export_onnx() f_onnx, _ = self.export_onnx()
LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...') LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...')
assert Path(f_onnx).exists(), f'failed to export ONNX file: {f_onnx}' assert Path(f_onnx).exists(), f'failed to export ONNX file: {f_onnx}'
@ -504,9 +504,8 @@ class Exporter:
return f, None return f, None
@try_export @try_export
def _export_saved_model(self, prefix=colorstr('TensorFlow SavedModel:')): def export_saved_model(self, prefix=colorstr('TensorFlow SavedModel:')):
"""YOLOv8 TensorFlow SavedModel export."""
# YOLOv8 TensorFlow SavedModel export
try: try:
import tensorflow as tf # noqa import tensorflow as tf # noqa
except ImportError: except ImportError:
@ -525,7 +524,7 @@ class Exporter:
# Export to ONNX # Export to ONNX
self.args.simplify = True self.args.simplify = True
f_onnx, _ = self._export_onnx() f_onnx, _ = self.export_onnx()
# Export to TF # Export to TF
int8 = '-oiqt -qt per-tensor' if self.args.int8 else '' int8 = '-oiqt -qt per-tensor' if self.args.int8 else ''
@ -551,8 +550,8 @@ class Exporter:
return str(f), keras_model return str(f), keras_model
@try_export @try_export
def _export_pb(self, keras_model, prefix=colorstr('TensorFlow GraphDef:')): def export_pb(self, keras_model, prefix=colorstr('TensorFlow GraphDef:')):
# YOLOv8 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow """YOLOv8 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow."""
import tensorflow as tf # noqa import tensorflow as tf # noqa
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 # noqa from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 # noqa
@ -567,8 +566,8 @@ class Exporter:
return f, None return f, None
@try_export @try_export
def _export_tflite(self, keras_model, nms, agnostic_nms, prefix=colorstr('TensorFlow Lite:')): def export_tflite(self, keras_model, nms, agnostic_nms, prefix=colorstr('TensorFlow Lite:')):
# YOLOv8 TensorFlow Lite export """YOLOv8 TensorFlow Lite export."""
import tensorflow as tf # noqa import tensorflow as tf # noqa
LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
@ -581,44 +580,9 @@ class Exporter:
f = saved_model / f'{self.file.stem}_float32.tflite' f = saved_model / f'{self.file.stem}_float32.tflite'
return str(f), None return str(f), None
# # OLD TFLITE EXPORT CODE BELOW -------------------------------------------------------------------------------
# batch_size, ch, *imgsz = list(self.im.shape) # BCHW
# f = str(self.file).replace(self.file.suffix, '-fp16.tflite')
#
# converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
# converter.target_spec.supported_types = [tf.float16]
# converter.optimizations = [tf.lite.Optimize.DEFAULT]
# if self.args.int8:
#
# def representative_dataset_gen(dataset, n_images=100):
# # Dataset generator for use with converter.representative_dataset, returns a generator of np arrays
# for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
# im = np.transpose(img, [1, 2, 0])
# im = np.expand_dims(im, axis=0).astype(np.float32)
# im /= 255
# yield [im]
# if n >= n_images:
# break
#
# dataset = LoadImages(check_det_dataset(self.args.data)['train'], imgsz=imgsz, auto=False)
# converter.representative_dataset = lambda: representative_dataset_gen(dataset, n_images=100)
# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# converter.target_spec.supported_types = []
# converter.inference_input_type = tf.uint8 # or tf.int8
# converter.inference_output_type = tf.uint8 # or tf.int8
# converter.experimental_new_quantizer = True
# f = str(self.file).replace(self.file.suffix, '-int8.tflite')
# if nms or agnostic_nms:
# converter.target_spec.supported_ops.append(tf.lite.OpsSet.SELECT_TF_OPS)
#
# tflite_model = converter.convert()
# open(f, 'wb').write(tflite_model)
# return f, None
@try_export @try_export
def _export_edgetpu(self, tflite_model='', prefix=colorstr('Edge TPU:')): def export_edgetpu(self, tflite_model='', prefix=colorstr('Edge TPU:')):
# YOLOv8 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/ """YOLOv8 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/."""
LOGGER.warning(f'{prefix} WARNING ⚠️ Edge TPU known bug https://github.com/ultralytics/ultralytics/issues/1185') LOGGER.warning(f'{prefix} WARNING ⚠️ Edge TPU known bug https://github.com/ultralytics/ultralytics/issues/1185')
cmd = 'edgetpu_compiler --version' cmd = 'edgetpu_compiler --version'
@ -644,8 +608,8 @@ class Exporter:
return f, None return f, None
@try_export @try_export
def _export_tfjs(self, prefix=colorstr('TensorFlow.js:')): def export_tfjs(self, prefix=colorstr('TensorFlow.js:')):
# YOLOv8 TensorFlow.js export """YOLOv8 TensorFlow.js export."""
check_requirements('tensorflowjs') check_requirements('tensorflowjs')
import tensorflow as tf import tensorflow as tf
import tensorflowjs as tfjs # noqa import tensorflowjs as tfjs # noqa
@ -681,7 +645,7 @@ class Exporter:
return f, None return f, None
def _add_tflite_metadata(self, file): def _add_tflite_metadata(self, file):
# Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata """Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata."""
from tflite_support import flatbuffers # noqa from tflite_support import flatbuffers # noqa
from tflite_support import metadata as _metadata # noqa from tflite_support import metadata as _metadata # noqa
from tflite_support import metadata_schema_py_generated as _metadata_fb # noqa from tflite_support import metadata_schema_py_generated as _metadata_fb # noqa

@ -35,6 +35,7 @@ class YOLO:
Args: Args:
model (str, Path): Path to the model file to load or create. model (str, Path): Path to the model file to load or create.
task (Any, optional): Task type for the YOLO model. Defaults to None.
Attributes: Attributes:
predictor (Any): The predictor object. predictor (Any): The predictor object.
@ -76,7 +77,6 @@ class YOLO:
Args: Args:
model (Union[str, Path], optional): Path or name of the model to load or create. Defaults to 'yolov8n.pt'. model (Union[str, Path], optional): Path or name of the model to load or create. Defaults to 'yolov8n.pt'.
task (Any, optional): Task type for the YOLO model. Defaults to None. task (Any, optional): Task type for the YOLO model. Defaults to None.
""" """
self.callbacks = callbacks.get_default_callbacks() self.callbacks = callbacks.get_default_callbacks()
self.predictor = None # reuse predictor self.predictor = None # reuse predictor
@ -273,7 +273,7 @@ class YOLO:
@smart_inference_mode() @smart_inference_mode()
def val(self, data=None, **kwargs): def val(self, data=None, **kwargs):
""" """
Validate a model on a given dataset . Validate a model on a given dataset.
Args: Args:
data (str): The dataset to validate on. Accepts all formats accepted by yolo data (str): The dataset to validate on. Accepts all formats accepted by yolo
@ -365,7 +365,7 @@ class YOLO:
self.model = self.trainer.model self.model = self.trainer.model
self.trainer.hub_session = self.session # attach optional HUB session self.trainer.hub_session = self.session # attach optional HUB session
self.trainer.train() self.trainer.train()
# update model and cfg after training # Update model and cfg after training
if RANK in (-1, 0): if RANK in (-1, 0):
self.model, _ = attempt_load_one_weight(str(self.trainer.best)) self.model, _ = attempt_load_one_weight(str(self.trainer.best))
self.overrides = self.model.args self.overrides = self.model.args

@ -134,7 +134,7 @@ class BasePredictor:
if not self.args.retina_masks: if not self.args.retina_masks:
plot_args['im_gpu'] = im[idx] plot_args['im_gpu'] = im[idx]
self.plotted_img = result.plot(**plot_args) self.plotted_img = result.plot(**plot_args)
# write # Write
if self.args.save_txt: if self.args.save_txt:
result.save_txt(f'{self.txt_path}.txt', save_conf=self.args.save_conf) result.save_txt(f'{self.txt_path}.txt', save_conf=self.args.save_conf)
if self.args.save_crop: if self.args.save_crop:
@ -153,7 +153,7 @@ class BasePredictor:
return list(self.stream_inference(source, model)) # merge list of Result into one return list(self.stream_inference(source, model)) # merge list of Result into one
def predict_cli(self, source=None, model=None): def predict_cli(self, source=None, model=None):
# Method used for CLI prediction. It uses always generator as outputs as not required by CLI mode """Method used for CLI prediction. It uses always generator as outputs as not required by CLI mode."""
gen = self.stream_inference(source, model) gen = self.stream_inference(source, model)
for _ in gen: # running CLI inference without accumulating any outputs (do not modify) for _ in gen: # running CLI inference without accumulating any outputs (do not modify)
pass pass
@ -182,16 +182,16 @@ class BasePredictor:
if self.args.verbose: if self.args.verbose:
LOGGER.info('') LOGGER.info('')
# setup model # Setup model
if not self.model: if not self.model:
self.setup_model(model) self.setup_model(model)
# setup source every time predict is called # Setup source every time predict is called
self.setup_source(source if source is not None else self.args.source) self.setup_source(source if source is not None else self.args.source)
# check if save_dir/ label file exists # Check if save_dir/ label file exists
if self.args.save or self.args.save_txt: if self.args.save or self.args.save_txt:
(self.save_dir / 'labels' if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True) (self.save_dir / 'labels' if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)
# warmup model # Warmup model
if not self.done_warmup: if not self.done_warmup:
self.model.warmup(imgsz=(1 if self.model.pt or self.model.triton else self.dataset.bs, 3, *self.imgsz)) self.model.warmup(imgsz=(1 if self.model.pt or self.model.triton else self.dataset.bs, 3, *self.imgsz))
self.done_warmup = True self.done_warmup = True
@ -204,22 +204,22 @@ class BasePredictor:
path, im, im0s, vid_cap, s = batch path, im, im0s, vid_cap, s = batch
visualize = increment_path(self.save_dir / Path(path).stem, mkdir=True) if self.args.visualize else False visualize = increment_path(self.save_dir / Path(path).stem, mkdir=True) if self.args.visualize else False
# preprocess # Preprocess
with self.dt[0]: with self.dt[0]:
im = self.preprocess(im) im = self.preprocess(im)
if len(im.shape) == 3: if len(im.shape) == 3:
im = im[None] # expand for batch dim im = im[None] # expand for batch dim
# inference # Inference
with self.dt[1]: with self.dt[1]:
preds = self.model(im, augment=self.args.augment, visualize=visualize) preds = self.model(im, augment=self.args.augment, visualize=visualize)
# postprocess # Postprocess
with self.dt[2]: with self.dt[2]:
self.results = self.postprocess(preds, im, im0s) self.results = self.postprocess(preds, im, im0s)
self.run_callbacks('on_predict_postprocess_end') self.run_callbacks('on_predict_postprocess_end')
# visualize, save, write results # Visualize, save, write results
n = len(im) n = len(im)
for i in range(n): for i in range(n):
self.results[i].speed = { self.results[i].speed = {
@ -288,7 +288,7 @@ class BasePredictor:
def save_preds(self, vid_cap, idx, save_path): def save_preds(self, vid_cap, idx, save_path):
im0 = self.plotted_img im0 = self.plotted_img
# save imgs # Save imgs
if self.dataset.mode == 'image': if self.dataset.mode == 'image':
cv2.imwrite(save_path, im0) cv2.imwrite(save_path, im0)
else: # 'video' or 'stream' else: # 'video' or 'stream'

@ -262,12 +262,12 @@ class Results(SimpleClass):
kpts = self.keypoints kpts = self.keypoints
texts = [] texts = []
if probs is not None: if probs is not None:
# classify # Classify
n5 = min(len(self.names), 5) n5 = min(len(self.names), 5)
top5i = probs.argsort(0, descending=True)[:n5].tolist() # top 5 indices top5i = probs.argsort(0, descending=True)[:n5].tolist() # top 5 indices
[texts.append(f'{probs[j]:.2f} {self.names[j]}') for j in top5i] [texts.append(f'{probs[j]:.2f} {self.names[j]}') for j in top5i]
elif boxes: elif boxes:
# detect/segment/pose # Detect/segment/pose
for j, d in enumerate(boxes): for j, d in enumerate(boxes):
c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item()) c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item())
line = (c, *d.xywhn.view(-1)) line = (c, *d.xywhn.view(-1))
@ -418,7 +418,7 @@ class Masks(BaseTensor):
@property @property
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def segments(self): def segments(self):
# Segments-deprecated (normalized) """Segments-deprecated (normalized)."""
LOGGER.warning("WARNING ⚠️ 'Masks.segments' is deprecated. Use 'Masks.xyn' for segments (normalized) and " LOGGER.warning("WARNING ⚠️ 'Masks.segments' is deprecated. Use 'Masks.xyn' for segments (normalized) and "
"'Masks.xy' for segments (pixels) instead.") "'Masks.xy' for segments (pixels) instead.")
return self.xyn return self.xyn
@ -426,7 +426,7 @@ class Masks(BaseTensor):
@property @property
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def xyn(self): def xyn(self):
# Segments (normalized) """Segments (normalized)."""
return [ return [
ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True) ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True)
for x in ops.masks2segments(self.data)] for x in ops.masks2segments(self.data)]
@ -434,7 +434,7 @@ class Masks(BaseTensor):
@property @property
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def xy(self): def xy(self):
# Segments (pixels) """Segments (pixels)."""
return [ return [
ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False) ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False)
for x in ops.masks2segments(self.data)] for x in ops.masks2segments(self.data)]

@ -163,7 +163,7 @@ class BaseTrainer:
callback(self) callback(self)
def train(self): def train(self):
# Allow device='', device=None on Multi-GPU systems to default to device=0 """Allow device='', device=None on Multi-GPU systems to default to device=0."""
if isinstance(self.args.device, int) or self.args.device: # i.e. device=0 or device=[0,1,2,3] if isinstance(self.args.device, int) or self.args.device: # i.e. device=0 or device=[0,1,2,3]
world_size = torch.cuda.device_count() world_size = torch.cuda.device_count()
elif torch.cuda.is_available(): # i.e. device=None or device='' elif torch.cuda.is_available(): # i.e. device=None or device=''
@ -306,7 +306,7 @@ class BaseTrainer:
xi = [0, nw] # x interp xi = [0, nw] # x interp
self.accumulate = max(1, np.interp(ni, xi, [1, self.args.nbs / self.batch_size]).round()) self.accumulate = max(1, np.interp(ni, xi, [1, self.args.nbs / self.batch_size]).round())
for j, x in enumerate(self.optimizer.param_groups): for j, x in enumerate(self.optimizer.param_groups):
# bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 # Bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
x['lr'] = np.interp( x['lr'] = np.interp(
ni, xi, [self.args.warmup_bias_lr if j == 0 else 0.0, x['initial_lr'] * self.lf(epoch)]) ni, xi, [self.args.warmup_bias_lr if j == 0 else 0.0, x['initial_lr'] * self.lf(epoch)])
if 'momentum' in x: if 'momentum' in x:
@ -631,7 +631,7 @@ def check_amp(model):
return False # AMP only used on CUDA devices return False # AMP only used on CUDA devices
def amp_allclose(m, im): def amp_allclose(m, im):
# All close FP32 vs AMP results """All close FP32 vs AMP results."""
a = m(im, device=device, verbose=False)[0].boxes.data # FP32 inference a = m(im, device=device, verbose=False)[0].boxes.data # FP32 inference
with torch.cuda.amp.autocast(True): with torch.cuda.amp.autocast(True):
b = m(im, device=device, verbose=False)[0].boxes.data # AMP inference b = m(im, device=device, verbose=False)[0].boxes.data # AMP inference

@ -149,20 +149,20 @@ class BaseValidator:
for batch_i, batch in enumerate(bar): for batch_i, batch in enumerate(bar):
self.run_callbacks('on_val_batch_start') self.run_callbacks('on_val_batch_start')
self.batch_i = batch_i self.batch_i = batch_i
# preprocess # Preprocess
with dt[0]: with dt[0]:
batch = self.preprocess(batch) batch = self.preprocess(batch)
# inference # Inference
with dt[1]: with dt[1]:
preds = model(batch['img']) preds = model(batch['img'])
# loss # Loss
with dt[2]: with dt[2]:
if self.training: if self.training:
self.loss += trainer.criterion(preds, batch)[1] self.loss += trainer.criterion(preds, batch)[1]
# postprocess # Postprocess
with dt[3]: with dt[3]:
preds = self.postprocess(preds) preds = self.postprocess(preds)

@ -199,7 +199,7 @@ def plt_settings(rcparams={'font.size': 11}, backend='Agg'):
def set_logging(name=LOGGING_NAME, verbose=True): def set_logging(name=LOGGING_NAME, verbose=True):
# sets up logging for the given name """Sets up logging for the given name."""
rank = int(os.getenv('RANK', -1)) # rank in world for Multi-GPU trainings rank = int(os.getenv('RANK', -1)) # rank in world for Multi-GPU trainings
level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR
logging.config.dictConfig({ logging.config.dictConfig({
@ -539,12 +539,12 @@ SETTINGS_YAML = USER_CONFIG_DIR / 'settings.yaml'
def emojis(string=''): def emojis(string=''):
# Return platform-dependent emoji-safe version of string """Return platform-dependent emoji-safe version of string."""
return string.encode().decode('ascii', 'ignore') if WINDOWS else string return string.encode().decode('ascii', 'ignore') if WINDOWS else string
def colorstr(*input): def colorstr(*input):
# Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world') """Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world')."""
*args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string *args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string
colors = { colors = {
'black': '\033[30m', # basic colors 'black': '\033[30m', # basic colors
@ -570,7 +570,8 @@ def colorstr(*input):
class TryExcept(contextlib.ContextDecorator): class TryExcept(contextlib.ContextDecorator):
# YOLOv8 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager """YOLOv8 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager."""
def __init__(self, msg='', verbose=True): def __init__(self, msg='', verbose=True):
self.msg = msg self.msg = msg
self.verbose = verbose self.verbose = verbose
@ -585,7 +586,8 @@ class TryExcept(contextlib.ContextDecorator):
def threaded(func): def threaded(func):
# Multi-threads a target function and returns thread. Usage: @threaded decorator """Multi-threads a target function and returns thread. Usage: @threaded decorator."""
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True) thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True)
thread.start() thread.start()
@ -703,13 +705,13 @@ def deprecation_warn(arg, new_arg, version=None):
def clean_url(url): def clean_url(url):
# Strip auth from URL, i.e. https://url.com/file.txt?auth -> https://url.com/file.txt """Strip auth from URL, i.e. https://url.com/file.txt?auth -> https://url.com/file.txt."""
url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/ url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/
return urllib.parse.unquote(url).split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth return urllib.parse.unquote(url).split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth
def url2file(url): def url2file(url):
# Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt """Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt."""
return Path(clean_url(url)).name return Path(clean_url(url)).name

@ -15,20 +15,20 @@ except (ImportError, AssertionError):
COMET_MODE = os.getenv('COMET_MODE', 'online') COMET_MODE = os.getenv('COMET_MODE', 'online')
COMET_MODEL_NAME = os.getenv('COMET_MODEL_NAME', 'YOLOv8') COMET_MODEL_NAME = os.getenv('COMET_MODEL_NAME', 'YOLOv8')
# determines how many batches of image predictions to log from the validation set # Determines how many batches of image predictions to log from the validation set
COMET_EVAL_BATCH_LOGGING_INTERVAL = int(os.getenv('COMET_EVAL_BATCH_LOGGING_INTERVAL', 1)) COMET_EVAL_BATCH_LOGGING_INTERVAL = int(os.getenv('COMET_EVAL_BATCH_LOGGING_INTERVAL', 1))
# determines whether to log confusion matrix every evaluation epoch # Determines whether to log confusion matrix every evaluation epoch
COMET_EVAL_LOG_CONFUSION_MATRIX = (os.getenv('COMET_EVAL_LOG_CONFUSION_MATRIX', 'true').lower() == 'true') COMET_EVAL_LOG_CONFUSION_MATRIX = (os.getenv('COMET_EVAL_LOG_CONFUSION_MATRIX', 'true').lower() == 'true')
# determines whether to log image predictions every evaluation epoch # Determines whether to log image predictions every evaluation epoch
COMET_EVAL_LOG_IMAGE_PREDICTIONS = (os.getenv('COMET_EVAL_LOG_IMAGE_PREDICTIONS', 'true').lower() == 'true') COMET_EVAL_LOG_IMAGE_PREDICTIONS = (os.getenv('COMET_EVAL_LOG_IMAGE_PREDICTIONS', 'true').lower() == 'true')
COMET_MAX_IMAGE_PREDICTIONS = int(os.getenv('COMET_MAX_IMAGE_PREDICTIONS', 100)) COMET_MAX_IMAGE_PREDICTIONS = int(os.getenv('COMET_MAX_IMAGE_PREDICTIONS', 100))
# ensures certain logging functions only run for supported tasks # Ensures certain logging functions only run for supported tasks
COMET_SUPPORTED_TASKS = ['detect'] COMET_SUPPORTED_TASKS = ['detect']
# scales reported confidence scores (0.0-1.0) by this value # Scales reported confidence scores (0.0-1.0) by this value
COMET_MAX_CONFIDENCE_SCORE = int(os.getenv('COMET_MAX_CONFIDENCE_SCORE', 100)) COMET_MAX_CONFIDENCE_SCORE = int(os.getenv('COMET_MAX_CONFIDENCE_SCORE', 100))
# names of plots created by YOLOv8 that are logged to Comet # Names of plots created by YOLOv8 that are logged to Comet
EVALUATION_PLOT_NAMES = 'F1_curve', 'P_curve', 'R_curve', 'PR_curve', 'confusion_matrix' EVALUATION_PLOT_NAMES = 'F1_curve', 'P_curve', 'R_curve', 'PR_curve', 'confusion_matrix'
LABEL_PLOT_NAMES = 'labels', 'labels_correlogram' LABEL_PLOT_NAMES = 'labels', 'labels_correlogram'
@ -43,7 +43,7 @@ def _get_experiment_type(mode, project_name):
def _create_experiment(args): def _create_experiment(args):
# Ensures that the experiment object is only created in a single process during distributed training. """Ensures that the experiment object is only created in a single process during distributed training."""
if RANK not in (-1, 0): if RANK not in (-1, 0):
return return
try: try:
@ -83,13 +83,13 @@ def _scale_bounding_box_to_original_image_shape(box, resized_image_shape, origin
resized_image_height, resized_image_width = resized_image_shape resized_image_height, resized_image_width = resized_image_shape
# convert normalized xywh format predictions to xyxy in resized scale format # Convert normalized xywh format predictions to xyxy in resized scale format
box = ops.xywhn2xyxy(box, h=resized_image_height, w=resized_image_width) box = ops.xywhn2xyxy(box, h=resized_image_height, w=resized_image_width)
# scale box predictions from resized image scale back to original image scale # Scale box predictions from resized image scale back to original image scale
box = ops.scale_boxes(resized_image_shape, box, original_image_shape, ratio_pad) box = ops.scale_boxes(resized_image_shape, box, original_image_shape, ratio_pad)
# Convert bounding box format from xyxy to xywh for Comet logging # Convert bounding box format from xyxy to xywh for Comet logging
box = ops.xyxy2xywh(box) box = ops.xyxy2xywh(box)
# adjust xy center to correspond top-left corner # Adjust xy center to correspond top-left corner
box[:2] -= box[2:] / 2 box[:2] -= box[2:] / 2
box = box.tolist() box = box.tolist()

@ -244,7 +244,7 @@ def check_requirements(requirements=ROOT.parent / 'requirements.txt', exclude=()
def check_suffix(file='yolov8n.pt', suffix='.pt', msg=''): def check_suffix(file='yolov8n.pt', suffix='.pt', msg=''):
# Check file(s) for acceptable suffix """Check file(s) for acceptable suffix."""
if file and suffix: if file and suffix:
if isinstance(suffix, str): if isinstance(suffix, str):
suffix = (suffix, ) suffix = (suffix, )
@ -255,7 +255,7 @@ def check_suffix(file='yolov8n.pt', suffix='.pt', msg=''):
def check_yolov5u_filename(file: str, verbose: bool = True): def check_yolov5u_filename(file: str, verbose: bool = True):
# Replace legacy YOLOv5 filenames with updated YOLOv5u filenames """Replace legacy YOLOv5 filenames with updated YOLOv5u filenames."""
if ('yolov3' in file or 'yolov5' in file) and 'u' not in file: if ('yolov3' in file or 'yolov5' in file) and 'u' not in file:
original_file = file original_file = file
file = re.sub(r'(.*yolov5([nsmlx]))\.pt', '\\1u.pt', file) # i.e. yolov5n.pt -> yolov5nu.pt file = re.sub(r'(.*yolov5([nsmlx]))\.pt', '\\1u.pt', file) # i.e. yolov5n.pt -> yolov5nu.pt
@ -269,7 +269,7 @@ def check_yolov5u_filename(file: str, verbose: bool = True):
def check_file(file, suffix='', download=True, hard=True): def check_file(file, suffix='', download=True, hard=True):
# Search/download file (if necessary) and return path """Search/download file (if necessary) and return path."""
check_suffix(file, suffix) # optional check_suffix(file, suffix) # optional
file = str(file).strip() # convert to string and strip spaces file = str(file).strip() # convert to string and strip spaces
file = check_yolov5u_filename(file) # yolov5n -> yolov5nu file = check_yolov5u_filename(file) # yolov5n -> yolov5nu
@ -300,7 +300,7 @@ def check_yaml(file, suffix=('.yaml', '.yml'), hard=True):
def check_imshow(warn=False): def check_imshow(warn=False):
# Check if environment supports image displays """Check if environment supports image displays."""
try: try:
assert not any((is_colab(), is_kaggle(), is_docker())) assert not any((is_colab(), is_kaggle(), is_docker()))
cv2.imshow('test', np.zeros((1, 1, 3))) cv2.imshow('test', np.zeros((1, 1, 3)))
@ -346,9 +346,10 @@ def git_describe(path=ROOT): # path must be a directory
def print_args(args: Optional[dict] = None, show_file=True, show_func=False): def print_args(args: Optional[dict] = None, show_file=True, show_func=False):
# Print function arguments (optional args dict) """Print function arguments (optional args dict)."""
def strip_auth(v): def strip_auth(v):
# Clean longer Ultralytics HUB URLs by stripping potential authentication information """Clean longer Ultralytics HUB URLs by stripping potential authentication information."""
return clean_url(v) if (isinstance(v, str) and v.startswith('http') and len(v) > 100) else v return clean_url(v) if (isinstance(v, str) and v.startswith('http') and len(v) > 100) else v
x = inspect.currentframe().f_back # previous frame x = inspect.currentframe().f_back # previous frame

@ -59,6 +59,6 @@ def generate_ddp_command(world_size, trainer):
def ddp_cleanup(trainer, file): def ddp_cleanup(trainer, file):
# delete temp file if created """Delete temp file if created."""
if f'{id(trainer)}.py' in file: # if temp_file suffix in file if f'{id(trainer)}.py' in file: # if temp_file suffix in file
os.remove(file) os.remove(file)

@ -21,7 +21,7 @@ GITHUB_ASSET_STEMS = [Path(k).stem for k in GITHUB_ASSET_NAMES]
def is_url(url, check=True): def is_url(url, check=True):
# Check if string is URL and check if URL exists """Check if string is URL and check if URL exists."""
with contextlib.suppress(Exception): with contextlib.suppress(Exception):
url = str(url) url = str(url)
result = parse.urlparse(url) result = parse.urlparse(url)
@ -141,11 +141,11 @@ def safe_download(url,
def attempt_download_asset(file, repo='ultralytics/assets', release='v0.0.0'): def attempt_download_asset(file, repo='ultralytics/assets', release='v0.0.0'):
# Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc. """Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc."""
from ultralytics.yolo.utils import SETTINGS # scoped for circular import from ultralytics.yolo.utils import SETTINGS # scoped for circular import
def github_assets(repository, version='latest'): def github_assets(repository, version='latest'):
# Return GitHub repo tag and assets (i.e. ['yolov8n.pt', 'yolov8s.pt', ...]) """Return GitHub repo tag and assets (i.e. ['yolov8n.pt', 'yolov8s.pt', ...])."""
if version != 'latest': if version != 'latest':
version = f'tags/{version}' # i.e. tags/v6.2 version = f'tags/{version}' # i.e. tags/v6.2
response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json() # github api response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json() # github api

@ -8,7 +8,8 @@ from pathlib import Path
class WorkingDirectory(contextlib.ContextDecorator): class WorkingDirectory(contextlib.ContextDecorator):
# Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager """Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager."""
def __init__(self, new_dir): def __init__(self, new_dir):
self.dir = new_dir # new dir self.dir = new_dir # new dir
self.cwd = Path.cwd().resolve() # current dir self.cwd = Path.cwd().resolve() # current dir
@ -56,19 +57,19 @@ def increment_path(path, exist_ok=False, sep='', mkdir=False):
def file_age(path=__file__): def file_age(path=__file__):
# Return days since last file update """Return days since last file update."""
dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta
return dt.days # + dt.seconds / 86400 # fractional days return dt.days # + dt.seconds / 86400 # fractional days
def file_date(path=__file__): def file_date(path=__file__):
# Return human-readable file modification date, i.e. '2021-3-26' """Return human-readable file modification date, i.e. '2021-3-26'."""
t = datetime.fromtimestamp(Path(path).stat().st_mtime) t = datetime.fromtimestamp(Path(path).stat().st_mtime)
return f'{t.year}-{t.month}-{t.day}' return f'{t.year}-{t.month}-{t.day}'
def file_size(path): def file_size(path):
# Return file/dir size (MB) """Return file/dir size (MB)."""
if isinstance(path, (str, Path)): if isinstance(path, (str, Path)):
mb = 1 << 20 # bytes to MiB (1024 ** 2) mb = 1 << 20 # bytes to MiB (1024 ** 2)
path = Path(path) path = Path(path)
@ -80,6 +81,6 @@ def file_size(path):
def get_latest_run(search_dir='.'): def get_latest_run(search_dir='.'):
# Return path to most recent 'last.pt' in /runs (i.e. to --resume from) """Return path to most recent 'last.pt' in /runs (i.e. to --resume from)."""
last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True) last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
return max(last_list, key=os.path.getctime) if last_list else '' return max(last_list, key=os.path.getctime) if last_list else ''

@ -11,7 +11,8 @@ from .ops import ltwh2xywh, ltwh2xyxy, resample_segments, xywh2ltwh, xywh2xyxy,
def _ntuple(n): def _ntuple(n):
# From PyTorch internals """From PyTorch internals."""
def parse(x): def parse(x):
return x if isinstance(x, abc.Iterable) else tuple(repeat(x, n)) return x if isinstance(x, abc.Iterable) else tuple(repeat(x, n))
@ -29,7 +30,7 @@ __all__ = 'Bboxes', # tuple or list
class Bboxes: class Bboxes:
"""Now only numpy is supported""" """Now only numpy is supported."""
def __init__(self, bboxes, format='xyxy') -> None: def __init__(self, bboxes, format='xyxy') -> None:
assert format in _formats assert format in _formats
@ -207,7 +208,7 @@ class Instances:
self._bboxes.areas() self._bboxes.areas()
def scale(self, scale_w, scale_h, bbox_only=False): def scale(self, scale_w, scale_h, bbox_only=False):
"""this might be similar with denormalize func but without normalized sign""" """this might be similar with denormalize func but without normalized sign."""
self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h)) self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h))
if bbox_only: if bbox_only:
return return
@ -240,7 +241,7 @@ class Instances:
self.normalized = True self.normalized = True
def add_padding(self, padw, padh): def add_padding(self, padw, padh):
# handle rect and mosaic situation """Handle rect and mosaic situation."""
assert not self.normalized, 'you should add padding with absolute coordinates.' assert not self.normalized, 'you should add padding with absolute coordinates.'
self._bboxes.add(offset=(padw, padh, padw, padh)) self._bboxes.add(offset=(padw, padh, padw, padh))
self.segments[..., 0] += padw self.segments[..., 0] += padw

@ -9,7 +9,8 @@ from .tal import bbox2dist
class VarifocalLoss(nn.Module): class VarifocalLoss(nn.Module):
# Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367 """Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367."""
def __init__(self): def __init__(self):
super().__init__() super().__init__()
@ -29,7 +30,7 @@ class BboxLoss(nn.Module):
self.use_dfl = use_dfl self.use_dfl = use_dfl
def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask): def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask):
# IoU loss """IoU loss."""
weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1) weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1)
iou = bbox_iou(pred_bboxes[fg_mask], target_bboxes[fg_mask], xywh=False, CIoU=True) iou = bbox_iou(pred_bboxes[fg_mask], target_bboxes[fg_mask], xywh=False, CIoU=True)
loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum
@ -46,7 +47,7 @@ class BboxLoss(nn.Module):
@staticmethod @staticmethod
def _df_loss(pred_dist, target): def _df_loss(pred_dist, target):
# Return sum of left and right DFL losses """Return sum of left and right DFL losses."""
# Distribution Focal Loss (DFL) proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391 # Distribution Focal Loss (DFL) proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
tl = target.long() # target left tl = target.long() # target left
tr = tl + 1 # target right tr = tl + 1 # target right

@ -16,9 +16,9 @@ from ultralytics.yolo.utils import LOGGER, SimpleClass, TryExcept, plt_settings
OKS_SIGMA = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0 OKS_SIGMA = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
# boxes # Boxes
def box_area(box): def box_area(box):
# box = xyxy(4,n) """Return box area, where box shape is xyxy(4,n)."""
return (box[2] - box[0]) * (box[3] - box[1]) return (box[2] - box[0]) * (box[3] - box[1])
@ -175,9 +175,10 @@ def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#iss
return 1.0 - 0.5 * eps, 0.5 * eps return 1.0 - 0.5 * eps, 0.5 * eps
# losses # Losses
class FocalLoss(nn.Module): class FocalLoss(nn.Module):
# Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) """Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)."""
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
super().__init__() super().__init__()
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
@ -341,7 +342,7 @@ class ConfusionMatrix:
def smooth(y, f=0.05): def smooth(y, f=0.05):
# Box filter of fraction f """Box filter of fraction f."""
nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd) nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd)
p = np.ones(nf // 2) # ones padding p = np.ones(nf // 2) # ones padding
yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded
@ -350,7 +351,7 @@ def smooth(y, f=0.05):
@plt_settings() @plt_settings()
def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()): def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()):
# Precision-recall curve """Plots a precision-recall curve."""
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
py = np.stack(py, axis=1) py = np.stack(py, axis=1)
@ -373,7 +374,7 @@ def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()):
@plt_settings() @plt_settings()
def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confidence', ylabel='Metric'): def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confidence', ylabel='Metric'):
# Metric-confidence curve """Plots a metric-confidence curve."""
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
if 0 < len(names) < 21: # display per-class legend if < 21 classes if 0 < len(names) < 21: # display per-class legend if < 21 classes
@ -614,23 +615,23 @@ class Metric(SimpleClass):
return self.all_ap.mean() if len(self.all_ap) else 0.0 return self.all_ap.mean() if len(self.all_ap) else 0.0
def mean_results(self): def mean_results(self):
"""Mean of results, return mp, mr, map50, map""" """Mean of results, return mp, mr, map50, map."""
return [self.mp, self.mr, self.map50, self.map] return [self.mp, self.mr, self.map50, self.map]
def class_result(self, i): def class_result(self, i):
"""class-aware result, return p[i], r[i], ap50[i], ap[i]""" """class-aware result, return p[i], r[i], ap50[i], ap[i]."""
return self.p[i], self.r[i], self.ap50[i], self.ap[i] return self.p[i], self.r[i], self.ap50[i], self.ap[i]
@property @property
def maps(self): def maps(self):
"""mAP of each class""" """mAP of each class."""
maps = np.zeros(self.nc) + self.map maps = np.zeros(self.nc) + self.map
for i, c in enumerate(self.ap_class_index): for i, c in enumerate(self.ap_class_index):
maps[c] = self.ap[i] maps[c] = self.ap[i]
return maps return maps
def fitness(self): def fitness(self):
# Model fitness as a weighted combination of metrics """Model fitness as a weighted combination of metrics."""
w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
return (np.array(self.mean_results()) * w).sum() return (np.array(self.mean_results()) * w).sum()
@ -800,7 +801,7 @@ class SegmentMetrics(SimpleClass):
@property @property
def ap_class_index(self): def ap_class_index(self):
# boxes and masks have the same ap_class_index """Boxes and masks have the same ap_class_index."""
return self.box.ap_class_index return self.box.ap_class_index
@property @property
@ -926,7 +927,7 @@ class ClassifyMetrics(SimpleClass):
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
def process(self, targets, pred): def process(self, targets, pred):
# target classes and predicted classes """Target classes and predicted classes."""
pred, targets = torch.cat(pred), torch.cat(targets) pred, targets = torch.cat(pred), torch.cat(targets)
correct = (targets[:, None] == pred).float() correct = (targets[:, None] == pred).float()
acc = torch.stack((correct[:, 0], correct.max(1).values), dim=1) # (top1, top5) accuracy acc = torch.stack((correct[:, 0], correct.max(1).values), dim=1) # (top1, top5) accuracy

@ -246,7 +246,7 @@ def non_max_suppression(
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
i = i[:max_det] # limit detections i = i[:max_det] # limit detections
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) # Update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
weights = iou * scores[None] # box weights weights = iou * scores[None] # box weights
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes

@ -21,7 +21,7 @@ from .ops import clip_boxes, scale_image, xywh2xyxy, xyxy2xywh
class Colors: class Colors:
# Ultralytics color palette https://ultralytics.com/ # Ultralytics color palette https://ultralytics.com/
def __init__(self): def __init__(self):
# hex = matplotlib.colors.TABLEAU_COLORS.values() """Initialize colors as hex = matplotlib.colors.TABLEAU_COLORS.values()."""
hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB', hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7') '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
self.palette = [self.hex2rgb(f'#{c}') for c in hexs] self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
@ -63,7 +63,7 @@ class Annotator:
else: # use cv2 else: # use cv2
self.im = im self.im = im
self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width
# pose # Pose
self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9], self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9],
[8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]] [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
@ -115,7 +115,7 @@ class Annotator:
alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque
""" """
if self.pil: if self.pil:
# convert to numpy first # Convert to numpy first
self.im = np.asarray(self.im).copy() self.im = np.asarray(self.im).copy()
if len(masks) == 0: if len(masks) == 0:
self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255 self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
@ -136,7 +136,7 @@ class Annotator:
im_mask_np = im_mask.byte().cpu().numpy() im_mask_np = im_mask.byte().cpu().numpy()
self.im[:] = im_mask_np if retina_masks else scale_image(im_mask_np, self.im.shape) self.im[:] = im_mask_np if retina_masks else scale_image(im_mask_np, self.im.shape)
if self.pil: if self.pil:
# convert im back to PIL and update draw # Convert im back to PIL and update draw
self.fromarray(self.im) self.fromarray(self.im)
def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True): def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True):
@ -152,7 +152,7 @@ class Annotator:
Note: `kpt_line=True` currently only supports human pose plotting. Note: `kpt_line=True` currently only supports human pose plotting.
""" """
if self.pil: if self.pil:
# convert to numpy first # Convert to numpy first
self.im = np.asarray(self.im).copy() self.im = np.asarray(self.im).copy()
nkpt, ndim = kpts.shape nkpt, ndim = kpts.shape
is_pose = nkpt == 17 and ndim == 3 is_pose = nkpt == 17 and ndim == 3
@ -183,11 +183,11 @@ class Annotator:
continue continue
cv2.line(self.im, pos1, pos2, [int(x) for x in self.limb_color[i]], thickness=2, lineType=cv2.LINE_AA) cv2.line(self.im, pos1, pos2, [int(x) for x in self.limb_color[i]], thickness=2, lineType=cv2.LINE_AA)
if self.pil: if self.pil:
# convert im back to PIL and update draw # Convert im back to PIL and update draw
self.fromarray(self.im) self.fromarray(self.im)
def rectangle(self, xy, fill=None, outline=None, width=1): def rectangle(self, xy, fill=None, outline=None, width=1):
# Add rectangle to image (PIL-only) """Add rectangle to image (PIL-only)."""
self.draw.rectangle(xy, fill, outline, width) self.draw.rectangle(xy, fill, outline, width)
def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'): def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'):
@ -202,12 +202,12 @@ class Annotator:
cv2.putText(self.im, text, xy, 0, self.lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA) cv2.putText(self.im, text, xy, 0, self.lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA)
def fromarray(self, im): def fromarray(self, im):
# Update self.im from a numpy array """Update self.im from a numpy array."""
self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
self.draw = ImageDraw.Draw(self.im) self.draw = ImageDraw.Draw(self.im)
def result(self): def result(self):
# Return annotated image as array """Return annotated image as array."""
return np.asarray(self.im) return np.asarray(self.im)
@ -217,18 +217,18 @@ def plot_labels(boxes, cls, names=(), save_dir=Path('')):
import pandas as pd import pandas as pd
import seaborn as sn import seaborn as sn
# plot dataset labels # Plot dataset labels
LOGGER.info(f"Plotting labels to {save_dir / 'labels.jpg'}... ") LOGGER.info(f"Plotting labels to {save_dir / 'labels.jpg'}... ")
b = boxes.transpose() # classes, boxes b = boxes.transpose() # classes, boxes
nc = int(cls.max() + 1) # number of classes nc = int(cls.max() + 1) # number of classes
x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height']) x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height'])
# seaborn correlogram # Seaborn correlogram
sn.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9)) sn.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9))
plt.savefig(save_dir / 'labels_correlogram.jpg', dpi=200) plt.savefig(save_dir / 'labels_correlogram.jpg', dpi=200)
plt.close() plt.close()
# matplotlib labels # Matplotlib labels
ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel() ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel()
y = ax[0].hist(cls, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8) y = ax[0].hist(cls, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
with contextlib.suppress(Exception): # color histogram bars by class with contextlib.suppress(Exception): # color histogram bars by class
@ -242,7 +242,7 @@ def plot_labels(boxes, cls, names=(), save_dir=Path('')):
sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9) sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9)
sn.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9) sn.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9)
# rectangles # Rectangles
boxes[:, 0:2] = 0.5 # center boxes[:, 0:2] = 0.5 # center
boxes = xywh2xyxy(boxes) * 1000 boxes = xywh2xyxy(boxes) * 1000
img = Image.fromarray(np.ones((1000, 1000, 3), dtype=np.uint8) * 255) img = Image.fromarray(np.ones((1000, 1000, 3), dtype=np.uint8) * 255)
@ -401,7 +401,7 @@ def plot_images(images,
@plt_settings() @plt_settings()
def plot_results(file='path/to/results.csv', dir='', segment=False, pose=False): def plot_results(file='path/to/results.csv', dir='', segment=False, pose=False):
# Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') """Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')."""
import pandas as pd import pandas as pd
save_dir = Path(file).parent if file else Path(dir) save_dir = Path(file).parent if file else Path(dir)
if segment: if segment:
@ -436,7 +436,7 @@ def plot_results(file='path/to/results.csv', dir='', segment=False, pose=False):
def output_to_target(output, max_det=300): def output_to_target(output, max_det=300):
# Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting """Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting."""
targets = [] targets = []
for i, o in enumerate(output): for i, o in enumerate(output):
box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1) box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1)

@ -48,7 +48,7 @@ def select_highest_overlaps(mask_pos, overlaps, n_max_boxes):
is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) # (b, n_max_boxes, h*w) is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) # (b, n_max_boxes, h*w)
mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos) # (b, n_max_boxes, h*w) mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos) # (b, n_max_boxes, h*w)
fg_mask = mask_pos.sum(-2) fg_mask = mask_pos.sum(-2)
# find each grid serve which gt(index) # Find each grid serve which gt(index)
target_gt_idx = mask_pos.argmax(-2) # (b, h*w) target_gt_idx = mask_pos.argmax(-2) # (b, h*w)
return target_gt_idx, fg_mask, mask_pos return target_gt_idx, fg_mask, mask_pos
@ -112,10 +112,10 @@ class TaskAlignedAssigner(nn.Module):
target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes) target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes)
# assigned target # Assigned target
target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask) target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask)
# normalize # Normalize
align_metric *= mask_pos align_metric *= mask_pos
pos_align_metrics = align_metric.amax(axis=-1, keepdim=True) # b, max_num_obj pos_align_metrics = align_metric.amax(axis=-1, keepdim=True) # b, max_num_obj
pos_overlaps = (overlaps * mask_pos).amax(axis=-1, keepdim=True) # b, max_num_obj pos_overlaps = (overlaps * mask_pos).amax(axis=-1, keepdim=True) # b, max_num_obj
@ -125,13 +125,13 @@ class TaskAlignedAssigner(nn.Module):
return target_labels, target_bboxes, target_scores, fg_mask.bool(), target_gt_idx return target_labels, target_bboxes, target_scores, fg_mask.bool(), target_gt_idx
def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt): def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt):
# get in_gts mask, (b, max_num_obj, h*w) """Get in_gts mask, (b, max_num_obj, h*w)."""
mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes) mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes)
# get anchor_align metric, (b, max_num_obj, h*w) # Get anchor_align metric, (b, max_num_obj, h*w)
align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes, mask_in_gts * mask_gt) align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes, mask_in_gts * mask_gt)
# get topk_metric mask, (b, max_num_obj, h*w) # Get topk_metric mask, (b, max_num_obj, h*w)
mask_topk = self.select_topk_candidates(align_metric, topk_mask=mask_gt.repeat([1, 1, self.topk]).bool()) mask_topk = self.select_topk_candidates(align_metric, topk_mask=mask_gt.repeat([1, 1, self.topk]).bool())
# merge all mask to a final mask, (b, max_num_obj, h*w) # Merge all mask to a final mask, (b, max_num_obj, h*w)
mask_pos = mask_topk * mask_in_gts * mask_gt mask_pos = mask_topk * mask_in_gts * mask_gt
return mask_pos, align_metric, overlaps return mask_pos, align_metric, overlaps
@ -145,7 +145,7 @@ class TaskAlignedAssigner(nn.Module):
ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) # 2, b, max_num_obj ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) # 2, b, max_num_obj
ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) # b, max_num_obj ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) # b, max_num_obj
ind[1] = gt_labels.long().squeeze(-1) # b, max_num_obj ind[1] = gt_labels.long().squeeze(-1) # b, max_num_obj
# get the scores of each grid for each gt cls # Get the scores of each grid for each gt cls
bbox_scores[mask_gt] = pd_scores[ind[0], :, ind[1]][mask_gt] # b, max_num_obj, h*w bbox_scores[mask_gt] = pd_scores[ind[0], :, ind[1]][mask_gt] # b, max_num_obj, h*w
# (b, max_num_obj, 1, 4), (b, 1, h*w, 4) # (b, max_num_obj, 1, 4), (b, 1, h*w, 4)

@ -30,7 +30,7 @@ TORCH_2_X = check_version(torch.__version__, minimum='2.0')
@contextmanager @contextmanager
def torch_distributed_zero_first(local_rank: int): def torch_distributed_zero_first(local_rank: int):
# Decorator to make all processes in distributed training wait for each local_master to do something """Decorator to make all processes in distributed training wait for each local_master to do something."""
initialized = torch.distributed.is_available() and torch.distributed.is_initialized() initialized = torch.distributed.is_available() and torch.distributed.is_initialized()
if initialized and local_rank not in (-1, 0): if initialized and local_rank not in (-1, 0):
dist.barrier(device_ids=[local_rank]) dist.barrier(device_ids=[local_rank])
@ -40,7 +40,8 @@ def torch_distributed_zero_first(local_rank: int):
def smart_inference_mode(): def smart_inference_mode():
# Applies torch.inference_mode() decorator if torch>=1.9.0 else torch.no_grad() decorator """Applies torch.inference_mode() decorator if torch>=1.9.0 else torch.no_grad() decorator."""
def decorate(fn): def decorate(fn):
return (torch.inference_mode if TORCH_1_9 else torch.no_grad)()(fn) return (torch.inference_mode if TORCH_1_9 else torch.no_grad)()(fn)
@ -48,7 +49,7 @@ def smart_inference_mode():
def select_device(device='', batch=0, newline=False, verbose=True): def select_device(device='', batch=0, newline=False, verbose=True):
# device = None or 'cpu' or 0 or '0' or '0,1,2,3' """Selects PyTorch Device. Options are device = None or 'cpu' or 0 or '0' or '0,1,2,3'."""
s = f'Ultralytics YOLOv{__version__} 🚀 Python-{platform.python_version()} torch-{torch.__version__} ' s = f'Ultralytics YOLOv{__version__} 🚀 Python-{platform.python_version()} torch-{torch.__version__} '
device = str(device).lower() device = str(device).lower()
for remove in 'cuda:', 'none', '(', ')', '[', ']', "'", ' ': for remove in 'cuda:', 'none', '(', ')', '[', ']', "'", ' ':
@ -84,7 +85,7 @@ def select_device(device='', batch=0, newline=False, verbose=True):
s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB
arg = 'cuda:0' arg = 'cuda:0'
elif mps and getattr(torch, 'has_mps', False) and torch.backends.mps.is_available() and TORCH_2_X: elif mps and getattr(torch, 'has_mps', False) and torch.backends.mps.is_available() and TORCH_2_X:
# prefer MPS if available # Prefer MPS if available
s += 'MPS\n' s += 'MPS\n'
arg = 'mps' arg = 'mps'
else: # revert to CPU else: # revert to CPU
@ -97,14 +98,14 @@ def select_device(device='', batch=0, newline=False, verbose=True):
def time_sync(): def time_sync():
# PyTorch-accurate time """PyTorch-accurate time."""
if torch.cuda.is_available(): if torch.cuda.is_available():
torch.cuda.synchronize() torch.cuda.synchronize()
return time.time() return time.time()
def fuse_conv_and_bn(conv, bn): def fuse_conv_and_bn(conv, bn):
# Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ """Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/."""
fusedconv = nn.Conv2d(conv.in_channels, fusedconv = nn.Conv2d(conv.in_channels,
conv.out_channels, conv.out_channels,
kernel_size=conv.kernel_size, kernel_size=conv.kernel_size,
@ -128,7 +129,7 @@ def fuse_conv_and_bn(conv, bn):
def fuse_deconv_and_bn(deconv, bn): def fuse_deconv_and_bn(deconv, bn):
# Fuse ConvTranspose2d() and BatchNorm2d() layers """Fuse ConvTranspose2d() and BatchNorm2d() layers."""
fuseddconv = nn.ConvTranspose2d(deconv.in_channels, fuseddconv = nn.ConvTranspose2d(deconv.in_channels,
deconv.out_channels, deconv.out_channels,
kernel_size=deconv.kernel_size, kernel_size=deconv.kernel_size,
@ -139,7 +140,7 @@ def fuse_deconv_and_bn(deconv, bn):
groups=deconv.groups, groups=deconv.groups,
bias=True).requires_grad_(False).to(deconv.weight.device) bias=True).requires_grad_(False).to(deconv.weight.device)
# prepare filters # Prepare filters
w_deconv = deconv.weight.clone().view(deconv.out_channels, -1) w_deconv = deconv.weight.clone().view(deconv.out_channels, -1)
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
fuseddconv.weight.copy_(torch.mm(w_bn, w_deconv).view(fuseddconv.weight.shape)) fuseddconv.weight.copy_(torch.mm(w_bn, w_deconv).view(fuseddconv.weight.shape))
@ -153,7 +154,7 @@ def fuse_deconv_and_bn(deconv, bn):
def model_info(model, detailed=False, verbose=True, imgsz=640): def model_info(model, detailed=False, verbose=True, imgsz=640):
# Model information. imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320] """Model information. imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320]."""
if not verbose: if not verbose:
return return
n_p = get_num_params(model) n_p = get_num_params(model)
@ -174,17 +175,17 @@ def model_info(model, detailed=False, verbose=True, imgsz=640):
def get_num_params(model): def get_num_params(model):
# Return the total number of parameters in a YOLO model """Return the total number of parameters in a YOLO model."""
return sum(x.numel() for x in model.parameters()) return sum(x.numel() for x in model.parameters())
def get_num_gradients(model): def get_num_gradients(model):
# Return the total number of parameters with gradients in a YOLO model """Return the total number of parameters with gradients in a YOLO model."""
return sum(x.numel() for x in model.parameters() if x.requires_grad) return sum(x.numel() for x in model.parameters() if x.requires_grad)
def get_flops(model, imgsz=640): def get_flops(model, imgsz=640):
# Return a YOLO model's FLOPs """Return a YOLO model's FLOPs."""
try: try:
model = de_parallel(model) model = de_parallel(model)
p = next(model.parameters()) p = next(model.parameters())
@ -199,7 +200,7 @@ def get_flops(model, imgsz=640):
def initialize_weights(model): def initialize_weights(model):
# Initialize model weights to random values """Initialize model weights to random values."""
for m in model.modules(): for m in model.modules():
t = type(m) t = type(m)
if t is nn.Conv2d: if t is nn.Conv2d:
@ -224,7 +225,7 @@ def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416)
def make_divisible(x, divisor): def make_divisible(x, divisor):
# Returns nearest x divisible by divisor """Returns nearest x divisible by divisor."""
if isinstance(divisor, torch.Tensor): if isinstance(divisor, torch.Tensor):
divisor = int(divisor.max()) # to int divisor = int(divisor.max()) # to int
return math.ceil(x / divisor) * divisor return math.ceil(x / divisor) * divisor
@ -240,7 +241,7 @@ def copy_attr(a, b, include=(), exclude=()):
def get_latest_opset(): def get_latest_opset():
# Return second-most (for maturity) recently supported ONNX opset by this version of torch """Return second-most (for maturity) recently supported ONNX opset by this version of torch."""
return max(int(k[14:]) for k in vars(torch.onnx) if 'symbolic_opset' in k) - 1 # opset return max(int(k[14:]) for k in vars(torch.onnx) if 'symbolic_opset' in k) - 1 # opset
@ -250,22 +251,22 @@ def intersect_dicts(da, db, exclude=()):
def is_parallel(model): def is_parallel(model):
# Returns True if model is of type DP or DDP """Returns True if model is of type DP or DDP."""
return isinstance(model, (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)) return isinstance(model, (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel))
def de_parallel(model): def de_parallel(model):
# De-parallelize a model: returns single-GPU model if model is of type DP or DDP """De-parallelize a model: returns single-GPU model if model is of type DP or DDP."""
return model.module if is_parallel(model) else model return model.module if is_parallel(model) else model
def one_cycle(y1=0.0, y2=1.0, steps=100): def one_cycle(y1=0.0, y2=1.0, steps=100):
# lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf """Returns a lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf."""
return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1 return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1
def init_seeds(seed=0, deterministic=False): def init_seeds(seed=0, deterministic=False):
# Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html """Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html."""
random.seed(seed) random.seed(seed)
np.random.seed(seed) np.random.seed(seed)
torch.manual_seed(seed) torch.manual_seed(seed)
@ -280,14 +281,14 @@ def init_seeds(seed=0, deterministic=False):
class ModelEMA: class ModelEMA:
""" Updated Exponential Moving Average (EMA) from https://github.com/rwightman/pytorch-image-models """Updated Exponential Moving Average (EMA) from https://github.com/rwightman/pytorch-image-models
Keeps a moving average of everything in the model state_dict (parameters and buffers) Keeps a moving average of everything in the model state_dict (parameters and buffers)
For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
To disable EMA set the `enabled` attribute to `False`. To disable EMA set the `enabled` attribute to `False`.
""" """
def __init__(self, model, decay=0.9999, tau=2000, updates=0): def __init__(self, model, decay=0.9999, tau=2000, updates=0):
# Create EMA """Create EMA."""
self.ema = deepcopy(de_parallel(model)).eval() # FP32 EMA self.ema = deepcopy(de_parallel(model)).eval() # FP32 EMA
self.updates = updates # number of EMA updates self.updates = updates # number of EMA updates
self.decay = lambda x: decay * (1 - math.exp(-x / tau)) # decay exponential ramp (to help early epochs) self.decay = lambda x: decay * (1 - math.exp(-x / tau)) # decay exponential ramp (to help early epochs)
@ -296,7 +297,7 @@ class ModelEMA:
self.enabled = True self.enabled = True
def update(self, model): def update(self, model):
# Update EMA parameters """Update EMA parameters."""
if self.enabled: if self.enabled:
self.updates += 1 self.updates += 1
d = self.decay(self.updates) d = self.decay(self.updates)

@ -46,7 +46,7 @@ class ClassificationTrainer(BaseTrainer):
""" """
load/create/download model for any task load/create/download model for any task
""" """
# classification models require special handling # Classification models require special handling
if isinstance(self.model, torch.nn.Module): # if model is loaded beforehand. No setup needed if isinstance(self.model, torch.nn.Module): # if model is loaded beforehand. No setup needed
return return

@ -22,8 +22,8 @@ from ultralytics.yolo.utils.torch_utils import de_parallel
class DetectionTrainer(BaseTrainer): class DetectionTrainer(BaseTrainer):
def get_dataloader(self, dataset_path, batch_size, rank=0, mode='train'): def get_dataloader(self, dataset_path, batch_size, rank=0, mode='train'):
# TODO: manage splits differently """TODO: manage splits differently."""
# calculate stride - check if model is initialized # Calculate stride - check if model is initialized
gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32) gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
return create_dataloader(path=dataset_path, return create_dataloader(path=dataset_path,
imgsz=self.args.imgsz, imgsz=self.args.imgsz,
@ -48,7 +48,7 @@ class DetectionTrainer(BaseTrainer):
return batch return batch
def set_model_attributes(self): def set_model_attributes(self):
# nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps) """nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps)."""
# self.args.box *= 3 / nl # scale to layers # self.args.box *= 3 / nl # scale to layers
# self.args.cls *= self.data["nc"] / 80 * 3 / nl # scale to classes and layers # self.args.cls *= self.data["nc"] / 80 * 3 / nl # scale to classes and layers
# self.args.cls *= (self.args.imgsz / 640) ** 2 * 3 / nl # scale to image size and layers # self.args.cls *= (self.args.imgsz / 640) ** 2 * 3 / nl # scale to image size and layers

@ -67,7 +67,7 @@ class DetectionValidator(BaseValidator):
return preds return preds
def update_metrics(self, preds, batch): def update_metrics(self, preds, batch):
# Metrics """Metrics."""
for si, pred in enumerate(preds): for si, pred in enumerate(preds):
idx = batch['batch_idx'] == si idx = batch['batch_idx'] == si
cls = batch['cls'][idx] cls = batch['cls'][idx]
@ -164,8 +164,8 @@ class DetectionValidator(BaseValidator):
return torch.tensor(correct, dtype=torch.bool, device=detections.device) return torch.tensor(correct, dtype=torch.bool, device=detections.device)
def get_dataloader(self, dataset_path, batch_size): def get_dataloader(self, dataset_path, batch_size):
# TODO: manage splits differently """TODO: manage splits differently."""
# calculate stride - check if model is initialized # Calculate stride - check if model is initialized
gs = max(int(de_parallel(self.model).stride if self.model else 0), 32) gs = max(int(de_parallel(self.model).stride if self.model else 0), 32)
return create_dataloader(path=dataset_path, return create_dataloader(path=dataset_path,
imgsz=self.args.imgsz, imgsz=self.args.imgsz,

@ -47,7 +47,7 @@ class PoseValidator(DetectionValidator):
self.sigma = OKS_SIGMA if is_pose else np.ones(nkpt) / nkpt self.sigma = OKS_SIGMA if is_pose else np.ones(nkpt) / nkpt
def update_metrics(self, preds, batch): def update_metrics(self, preds, batch):
# Metrics """Metrics."""
for si, pred in enumerate(preds): for si, pred in enumerate(preds):
idx = batch['batch_idx'] == si idx = batch['batch_idx'] == si
cls = batch['cls'][idx] cls = batch['cls'][idx]

@ -10,7 +10,7 @@ from ultralytics.yolo.v8.detect.predict import DetectionPredictor
class SegmentationPredictor(DetectionPredictor): class SegmentationPredictor(DetectionPredictor):
def postprocess(self, preds, img, orig_imgs): def postprocess(self, preds, img, orig_imgs):
# TODO: filter by classes """TODO: filter by classes."""
p = ops.non_max_suppression(preds[0], p = ops.non_max_suppression(preds[0],
self.args.conf, self.args.conf,
self.args.iou, self.args.iou,

@ -140,7 +140,7 @@ class SegLoss(Loss):
return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl)
def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
# Mask loss for one image """Mask loss for one image."""
pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n, 32) @ (32,80,80) -> (n,80,80) pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n, 32) @ (32,80,80) -> (n,80,80)
loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none') loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none')
return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean()

@ -52,7 +52,7 @@ class SegmentationValidator(DetectionValidator):
return p, proto return p, proto
def update_metrics(self, preds, batch): def update_metrics(self, preds, batch):
# Metrics """Metrics."""
for si, (pred, proto) in enumerate(zip(preds[0], preds[1])): for si, (pred, proto) in enumerate(zip(preds[0], preds[1])):
idx = batch['batch_idx'] == si idx = batch['batch_idx'] == si
cls = batch['cls'][idx] cls = batch['cls'][idx]
@ -179,7 +179,7 @@ class SegmentationValidator(DetectionValidator):
self.plot_masks.clear() self.plot_masks.clear()
def pred_to_json(self, predn, filename, pred_masks): def pred_to_json(self, predn, filename, pred_masks):
# Save one JSON result """Save one JSON result."""
# Example result = {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} # Example result = {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
from pycocotools.mask import encode # noqa from pycocotools.mask import encode # noqa

Loading…
Cancel
Save