ultralytics 8.0.136 refactor and simplify package (#3748)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
Laughing
2023-07-16 23:47:45 +08:00
committed by GitHub
parent 8ebe94d1e9
commit 620f3eb218
383 changed files with 4213 additions and 4646 deletions

View File

@ -0,0 +1,809 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import contextlib
import inspect
import logging.config
import os
import platform
import re
import subprocess
import sys
import threading
import urllib
import uuid
from pathlib import Path
from types import SimpleNamespace
from typing import Union
import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
import yaml
from ultralytics import __version__
# PyTorch Multi-GPU DDP Constants
RANK = int(os.getenv('RANK', -1))
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
# Other Constants
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLO
DEFAULT_CFG_PATH = ROOT / 'cfg/default.yaml'
NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads
AUTOINSTALL = str(os.getenv('YOLO_AUTOINSTALL', True)).lower() == 'true' # global auto-install mode
VERBOSE = str(os.getenv('YOLO_VERBOSE', True)).lower() == 'true' # global verbose mode
TQDM_BAR_FORMAT = '{l_bar}{bar:10}{r_bar}' # tqdm bar format
LOGGING_NAME = 'ultralytics'
MACOS, LINUX, WINDOWS = (platform.system() == x for x in ['Darwin', 'Linux', 'Windows']) # environment booleans
ARM64 = platform.machine() in ('arm64', 'aarch64') # ARM64 booleans
HELP_MSG = \
"""
Usage examples for running YOLOv8:
1. Install the ultralytics package:
pip install ultralytics
2. Use the Python SDK:
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n.yaml') # build a new model from scratch
model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
# Use the model
results = model.train(data="coco128.yaml", epochs=3) # train the model
results = model.val() # evaluate model performance on the validation set
results = model('https://ultralytics.com/images/bus.jpg') # predict on an image
success = model.export(format='onnx') # export the model to ONNX format
3. Use the command line interface (CLI):
YOLOv8 'yolo' CLI commands use the following syntax:
yolo TASK MODE ARGS
Where TASK (optional) is one of [detect, segment, classify]
MODE (required) is one of [train, val, predict, export]
ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override defaults.
See all ARGS at https://docs.ultralytics.com/usage/cfg or with 'yolo cfg'
- Train a detection model for 10 epochs with an initial learning_rate of 0.01
yolo detect train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01
- Predict a YouTube video using a pretrained segmentation model at image size 320:
yolo segment predict model=yolov8n-seg.pt source='https://youtu.be/Zgi9g1ksQHc' imgsz=320
- Val a pretrained detection model at batch-size 1 and image size 640:
yolo detect val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640
- Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required)
yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128
- Run special commands:
yolo help
yolo checks
yolo version
yolo settings
yolo copy-cfg
yolo cfg
Docs: https://docs.ultralytics.com
Community: https://community.ultralytics.com
GitHub: https://github.com/ultralytics/ultralytics
"""
# Settings
torch.set_printoptions(linewidth=320, precision=4, profile='default')
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
os.environ['NUMEXPR_MAX_THREADS'] = str(NUM_THREADS) # NumExpr max threads
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' # for deterministic training
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # suppress verbose TF compiler warnings in Colab
class SimpleClass:
"""
Ultralytics SimpleClass is a base class providing helpful string representation, error reporting, and attribute
access methods for easier debugging and usage.
"""
def __str__(self):
"""Return a human-readable string representation of the object."""
attr = []
for a in dir(self):
v = getattr(self, a)
if not callable(v) and not a.startswith('_'):
if isinstance(v, SimpleClass):
# Display only the module and class name for subclasses
s = f'{a}: {v.__module__}.{v.__class__.__name__} object'
else:
s = f'{a}: {repr(v)}'
attr.append(s)
return f'{self.__module__}.{self.__class__.__name__} object with attributes:\n\n' + '\n'.join(attr)
def __repr__(self):
"""Return a machine-readable string representation of the object."""
return self.__str__()
def __getattr__(self, attr):
"""Custom attribute access error message with helpful information."""
name = self.__class__.__name__
raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}")
class IterableSimpleNamespace(SimpleNamespace):
"""
Ultralytics IterableSimpleNamespace is an extension class of SimpleNamespace that adds iterable functionality and
enables usage with dict() and for loops.
"""
def __iter__(self):
"""Return an iterator of key-value pairs from the namespace's attributes."""
return iter(vars(self).items())
def __str__(self):
"""Return a human-readable string representation of the object."""
return '\n'.join(f'{k}={v}' for k, v in vars(self).items())
def __getattr__(self, attr):
"""Custom attribute access error message with helpful information."""
name = self.__class__.__name__
raise AttributeError(f"""
'{name}' object has no attribute '{attr}'. This may be caused by a modified or out of date ultralytics
'default.yaml' file.\nPlease update your code with 'pip install -U ultralytics' and if necessary replace
{DEFAULT_CFG_PATH} with the latest version from
https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/default.yaml
""")
def get(self, key, default=None):
"""Return the value of the specified key if it exists; otherwise, return the default value."""
return getattr(self, key, default)
def plt_settings(rcparams=None, backend='Agg'):
"""
Decorator to temporarily set rc parameters and the backend for a plotting function.
Usage:
decorator: @plt_settings({"font.size": 12})
context manager: with plt_settings({"font.size": 12}):
Args:
rcparams (dict): Dictionary of rc parameters to set.
backend (str, optional): Name of the backend to use. Defaults to 'Agg'.
Returns:
(Callable): Decorated function with temporarily set rc parameters and backend. This decorator can be
applied to any function that needs to have specific matplotlib rc parameters and backend for its execution.
"""
if rcparams is None:
rcparams = {'font.size': 11}
def decorator(func):
"""Decorator to apply temporary rc parameters and backend to a function."""
def wrapper(*args, **kwargs):
"""Sets rc parameters and backend, calls the original function, and restores the settings."""
original_backend = plt.get_backend()
plt.switch_backend(backend)
with plt.rc_context(rcparams):
result = func(*args, **kwargs)
plt.switch_backend(original_backend)
return result
return wrapper
return decorator
def set_logging(name=LOGGING_NAME, verbose=True):
"""Sets up logging for the given name."""
rank = int(os.getenv('RANK', -1)) # rank in world for Multi-GPU trainings
level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR
logging.config.dictConfig({
'version': 1,
'disable_existing_loggers': False,
'formatters': {
name: {
'format': '%(message)s'}},
'handlers': {
name: {
'class': 'logging.StreamHandler',
'formatter': name,
'level': level}},
'loggers': {
name: {
'level': level,
'handlers': [name],
'propagate': False}}})
def emojis(string=''):
"""Return platform-dependent emoji-safe version of string."""
return string.encode().decode('ascii', 'ignore') if WINDOWS else string
class EmojiFilter(logging.Filter):
"""
A custom logging filter class for removing emojis in log messages.
This filter is particularly useful for ensuring compatibility with Windows terminals
that may not support the display of emojis in log messages.
"""
def filter(self, record):
"""Filter logs by emoji unicode characters on windows."""
record.msg = emojis(record.msg)
return super().filter(record)
# Set logger
set_logging(LOGGING_NAME, verbose=VERBOSE) # run before defining LOGGER
LOGGER = logging.getLogger(LOGGING_NAME) # define globally (used in train.py, val.py, detect.py, etc.)
if WINDOWS: # emoji-safe logging
LOGGER.addFilter(EmojiFilter())
class ThreadingLocked:
"""
A decorator class for ensuring thread-safe execution of a function or method.
This class can be used as a decorator to make sure that if the decorated function
is called from multiple threads, only one thread at a time will be able to execute the function.
Attributes:
lock (threading.Lock): A lock object used to manage access to the decorated function.
Usage:
@ThreadingLocked()
def my_function():
# Your code here
pass
"""
def __init__(self):
self.lock = threading.Lock()
def __call__(self, f):
from functools import wraps
@wraps(f)
def decorated(*args, **kwargs):
with self.lock:
return f(*args, **kwargs)
return decorated
def yaml_save(file='data.yaml', data=None):
"""
Save YAML data to a file.
Args:
file (str, optional): File name. Default is 'data.yaml'.
data (dict): Data to save in YAML format.
Returns:
(None): Data is saved to the specified file.
"""
if data is None:
data = {}
file = Path(file)
if not file.parent.exists():
# Create parent directories if they don't exist
file.parent.mkdir(parents=True, exist_ok=True)
# Convert Path objects to strings
for k, v in data.items():
if isinstance(v, Path):
data[k] = str(v)
# Dump data to file in YAML format
with open(file, 'w') as f:
yaml.safe_dump(data, f, sort_keys=False, allow_unicode=True)
def yaml_load(file='data.yaml', append_filename=False):
"""
Load YAML data from a file.
Args:
file (str, optional): File name. Default is 'data.yaml'.
append_filename (bool): Add the YAML filename to the YAML dictionary. Default is False.
Returns:
(dict): YAML data and file name.
"""
with open(file, errors='ignore', encoding='utf-8') as f:
s = f.read() # string
# Remove special characters
if not s.isprintable():
s = re.sub(r'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]+', '', s)
# Add YAML filename to dict and return
return {**yaml.safe_load(s), 'yaml_file': str(file)} if append_filename else yaml.safe_load(s)
def yaml_print(yaml_file: Union[str, Path, dict]) -> None:
"""
Pretty prints a yaml file or a yaml-formatted dictionary.
Args:
yaml_file: The file path of the yaml file or a yaml-formatted dictionary.
Returns:
None
"""
yaml_dict = yaml_load(yaml_file) if isinstance(yaml_file, (str, Path)) else yaml_file
dump = yaml.dump(yaml_dict, sort_keys=False, allow_unicode=True)
LOGGER.info(f"Printing '{colorstr('bold', 'black', yaml_file)}'\n\n{dump}")
# Default configuration
DEFAULT_CFG_DICT = yaml_load(DEFAULT_CFG_PATH)
for k, v in DEFAULT_CFG_DICT.items():
if isinstance(v, str) and v.lower() == 'none':
DEFAULT_CFG_DICT[k] = None
DEFAULT_CFG_KEYS = DEFAULT_CFG_DICT.keys()
DEFAULT_CFG = IterableSimpleNamespace(**DEFAULT_CFG_DICT)
def is_colab():
"""
Check if the current script is running inside a Google Colab notebook.
Returns:
(bool): True if running inside a Colab notebook, False otherwise.
"""
return 'COLAB_RELEASE_TAG' in os.environ or 'COLAB_BACKEND_VERSION' in os.environ
def is_kaggle():
"""
Check if the current script is running inside a Kaggle kernel.
Returns:
(bool): True if running inside a Kaggle kernel, False otherwise.
"""
return os.environ.get('PWD') == '/kaggle/working' and os.environ.get('KAGGLE_URL_BASE') == 'https://www.kaggle.com'
def is_jupyter():
"""
Check if the current script is running inside a Jupyter Notebook.
Verified on Colab, Jupyterlab, Kaggle, Paperspace.
Returns:
(bool): True if running inside a Jupyter Notebook, False otherwise.
"""
with contextlib.suppress(Exception):
from IPython import get_ipython
return get_ipython() is not None
return False
def is_docker() -> bool:
"""
Determine if the script is running inside a Docker container.
Returns:
(bool): True if the script is running inside a Docker container, False otherwise.
"""
file = Path('/proc/self/cgroup')
if file.exists():
with open(file) as f:
return 'docker' in f.read()
else:
return False
def is_online() -> bool:
"""
Check internet connectivity by attempting to connect to a known online host.
Returns:
(bool): True if connection is successful, False otherwise.
"""
import socket
for host in '1.1.1.1', '8.8.8.8', '223.5.5.5': # Cloudflare, Google, AliDNS:
try:
test_connection = socket.create_connection(address=(host, 53), timeout=2)
except (socket.timeout, socket.gaierror, OSError):
continue
else:
# If the connection was successful, close it to avoid a ResourceWarning
test_connection.close()
return True
return False
ONLINE = is_online()
def is_pip_package(filepath: str = __name__) -> bool:
"""
Determines if the file at the given filepath is part of a pip package.
Args:
filepath (str): The filepath to check.
Returns:
(bool): True if the file is part of a pip package, False otherwise.
"""
import importlib.util
# Get the spec for the module
spec = importlib.util.find_spec(filepath)
# Return whether the spec is not None and the origin is not None (indicating it is a package)
return spec is not None and spec.origin is not None
def is_dir_writeable(dir_path: Union[str, Path]) -> bool:
"""
Check if a directory is writeable.
Args:
dir_path (str | Path): The path to the directory.
Returns:
(bool): True if the directory is writeable, False otherwise.
"""
return os.access(str(dir_path), os.W_OK)
def is_pytest_running():
"""
Determines whether pytest is currently running or not.
Returns:
(bool): True if pytest is running, False otherwise.
"""
return ('PYTEST_CURRENT_TEST' in os.environ) or ('pytest' in sys.modules) or ('pytest' in Path(sys.argv[0]).stem)
def is_github_actions_ci() -> bool:
"""
Determine if the current environment is a GitHub Actions CI Python runner.
Returns:
(bool): True if the current environment is a GitHub Actions CI Python runner, False otherwise.
"""
return 'GITHUB_ACTIONS' in os.environ and 'RUNNER_OS' in os.environ and 'RUNNER_TOOL_CACHE' in os.environ
def is_git_dir():
"""
Determines whether the current file is part of a git repository.
If the current file is not part of a git repository, returns None.
Returns:
(bool): True if current file is part of a git repository.
"""
return get_git_dir() is not None
def get_git_dir():
"""
Determines whether the current file is part of a git repository and if so, returns the repository root directory.
If the current file is not part of a git repository, returns None.
Returns:
(Path | None): Git root directory if found or None if not found.
"""
for d in Path(__file__).parents:
if (d / '.git').is_dir():
return d
return None # no .git dir found
def get_git_origin_url():
"""
Retrieves the origin URL of a git repository.
Returns:
(str | None): The origin URL of the git repository.
"""
if is_git_dir():
with contextlib.suppress(subprocess.CalledProcessError):
origin = subprocess.check_output(['git', 'config', '--get', 'remote.origin.url'])
return origin.decode().strip()
return None # if not git dir or on error
def get_git_branch():
"""
Returns the current git branch name. If not in a git repository, returns None.
Returns:
(str | None): The current git branch name.
"""
if is_git_dir():
with contextlib.suppress(subprocess.CalledProcessError):
origin = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])
return origin.decode().strip()
return None # if not git dir or on error
def get_default_args(func):
"""Returns a dictionary of default arguments for a function.
Args:
func (callable): The function to inspect.
Returns:
(dict): A dictionary where each key is a parameter name, and each value is the default value of that parameter.
"""
signature = inspect.signature(func)
return {k: v.default for k, v in signature.parameters.items() if v.default is not inspect.Parameter.empty}
def get_user_config_dir(sub_dir='Ultralytics'):
"""
Get the user config directory.
Args:
sub_dir (str): The name of the subdirectory to create.
Returns:
(Path): The path to the user config directory.
"""
# Return the appropriate config directory for each operating system
if WINDOWS:
path = Path.home() / 'AppData' / 'Roaming' / sub_dir
elif MACOS: # macOS
path = Path.home() / 'Library' / 'Application Support' / sub_dir
elif LINUX:
path = Path.home() / '.config' / sub_dir
else:
raise ValueError(f'Unsupported operating system: {platform.system()}')
# GCP and AWS lambda fix, only /tmp is writeable
if not is_dir_writeable(str(path.parent)):
path = Path('/tmp') / sub_dir
LOGGER.warning(f"WARNING ⚠️ user config directory is not writeable, defaulting to '{path}'.")
# Create the subdirectory if it does not exist
path.mkdir(parents=True, exist_ok=True)
return path
USER_CONFIG_DIR = Path(os.getenv('YOLO_CONFIG_DIR', get_user_config_dir())) # Ultralytics settings dir
SETTINGS_YAML = USER_CONFIG_DIR / 'settings.yaml'
def colorstr(*input):
"""Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world')."""
*args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string
colors = {
'black': '\033[30m', # basic colors
'red': '\033[31m',
'green': '\033[32m',
'yellow': '\033[33m',
'blue': '\033[34m',
'magenta': '\033[35m',
'cyan': '\033[36m',
'white': '\033[37m',
'bright_black': '\033[90m', # bright colors
'bright_red': '\033[91m',
'bright_green': '\033[92m',
'bright_yellow': '\033[93m',
'bright_blue': '\033[94m',
'bright_magenta': '\033[95m',
'bright_cyan': '\033[96m',
'bright_white': '\033[97m',
'end': '\033[0m', # misc
'bold': '\033[1m',
'underline': '\033[4m'}
return ''.join(colors[x] for x in args) + f'{string}' + colors['end']
class TryExcept(contextlib.ContextDecorator):
"""YOLOv8 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager."""
def __init__(self, msg='', verbose=True):
"""Initialize TryExcept class with optional message and verbosity settings."""
self.msg = msg
self.verbose = verbose
def __enter__(self):
"""Executes when entering TryExcept context, initializes instance."""
pass
def __exit__(self, exc_type, value, traceback):
"""Defines behavior when exiting a 'with' block, prints error message if necessary."""
if self.verbose and value:
print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}"))
return True
def threaded(func):
"""Multi-threads a target function and returns thread. Usage: @threaded decorator."""
def wrapper(*args, **kwargs):
"""Multi-threads a given function and returns the thread."""
thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True)
thread.start()
return thread
return wrapper
def set_sentry():
"""
Initialize the Sentry SDK for error tracking and reporting. Only used if sentry_sdk package is installed and
sync=True in settings. Run 'yolo settings' to see and update settings YAML file.
Conditions required to send errors (ALL conditions must be met or no errors will be reported):
- sentry_sdk package is installed
- sync=True in YOLO settings
- pytest is not running
- running in a pip package installation
- running in a non-git directory
- running with rank -1 or 0
- online environment
- CLI used to run package (checked with 'yolo' as the name of the main CLI command)
The function also configures Sentry SDK to ignore KeyboardInterrupt and FileNotFoundError
exceptions and to exclude events with 'out of memory' in their exception message.
Additionally, the function sets custom tags and user information for Sentry events.
"""
def before_send(event, hint):
"""
Modify the event before sending it to Sentry based on specific exception types and messages.
Args:
event (dict): The event dictionary containing information about the error.
hint (dict): A dictionary containing additional information about the error.
Returns:
dict: The modified event or None if the event should not be sent to Sentry.
"""
if 'exc_info' in hint:
exc_type, exc_value, tb = hint['exc_info']
if exc_type in (KeyboardInterrupt, FileNotFoundError) \
or 'out of memory' in str(exc_value):
return None # do not send event
event['tags'] = {
'sys_argv': sys.argv[0],
'sys_argv_name': Path(sys.argv[0]).name,
'install': 'git' if is_git_dir() else 'pip' if is_pip_package() else 'other',
'os': ENVIRONMENT}
return event
if SETTINGS['sync'] and \
RANK in (-1, 0) and \
Path(sys.argv[0]).name == 'yolo' and \
not TESTS_RUNNING and \
ONLINE and \
is_pip_package() and \
not is_git_dir():
# If sentry_sdk package is not installed then return and do not use Sentry
try:
import sentry_sdk # noqa
except ImportError:
return
sentry_sdk.init(
dsn='https://5ff1556b71594bfea135ff0203a0d290@o4504521589325824.ingest.sentry.io/4504521592406016',
debug=False,
traces_sample_rate=1.0,
release=__version__,
environment='production', # 'dev' or 'production'
before_send=before_send,
ignore_errors=[KeyboardInterrupt, FileNotFoundError])
sentry_sdk.set_user({'id': SETTINGS['uuid']}) # SHA-256 anonymized UUID hash
# Disable all sentry logging
for logger in 'sentry_sdk', 'sentry_sdk.errors':
logging.getLogger(logger).setLevel(logging.CRITICAL)
def get_settings(file=SETTINGS_YAML, version='0.0.3'):
"""
Loads a global Ultralytics settings YAML file or creates one with default values if it does not exist.
Args:
file (Path): Path to the Ultralytics settings YAML file. Defaults to 'settings.yaml' in the USER_CONFIG_DIR.
version (str): Settings version. If min settings version not met, new default settings will be saved.
Returns:
(dict): Dictionary of settings key-value pairs.
"""
import hashlib
from ultralytics.utils.checks import check_version
from ultralytics.utils.torch_utils import torch_distributed_zero_first
git_dir = get_git_dir()
root = git_dir or Path()
datasets_root = (root.parent if git_dir and is_dir_writeable(root.parent) else root).resolve()
defaults = {
'datasets_dir': str(datasets_root / 'datasets'), # default datasets directory.
'weights_dir': str(root / 'weights'), # default weights directory.
'runs_dir': str(root / 'runs'), # default runs directory.
'uuid': hashlib.sha256(str(uuid.getnode()).encode()).hexdigest(), # SHA-256 anonymized UUID hash
'sync': True, # sync analytics to help with YOLO development
'api_key': '', # Ultralytics HUB API key (https://hub.ultralytics.com/)
'settings_version': version} # Ultralytics settings version
with torch_distributed_zero_first(RANK):
if not file.exists():
yaml_save(file, defaults)
settings = yaml_load(file)
# Check that settings keys and types match defaults
correct = \
settings \
and settings.keys() == defaults.keys() \
and all(type(a) == type(b) for a, b in zip(settings.values(), defaults.values())) \
and check_version(settings['settings_version'], version)
if not correct:
LOGGER.warning('WARNING ⚠️ Ultralytics settings reset to defaults. This is normal and may be due to a '
'recent ultralytics package update, but may have overwritten previous settings. '
f"\nView and update settings with 'yolo settings' or at '{file}'")
settings = defaults # merge **defaults with **settings (prefer **settings)
yaml_save(file, settings) # save updated defaults
return settings
def set_settings(kwargs, file=SETTINGS_YAML):
"""
Function that runs on a first-time ultralytics package installation to set up global settings and create necessary
directories.
"""
SETTINGS.update(kwargs)
yaml_save(file, SETTINGS)
def deprecation_warn(arg, new_arg, version=None):
"""Issue a deprecation warning when a deprecated argument is used, suggesting an updated argument."""
if not version:
version = float(__version__[:3]) + 0.2 # deprecate after 2nd major release
LOGGER.warning(f"WARNING ⚠️ '{arg}' is deprecated and will be removed in 'ultralytics {version}' in the future. "
f"Please use '{new_arg}' instead.")
def clean_url(url):
"""Strip auth from URL, i.e. https://url.com/file.txt?auth -> https://url.com/file.txt."""
url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/
return urllib.parse.unquote(url).split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth
def url2file(url):
"""Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt."""
return Path(clean_url(url)).name
# Run below code on utils init ------------------------------------------------------------------------------------
# Check first-install steps
PREFIX = colorstr('Ultralytics: ')
SETTINGS = get_settings()
DATASETS_DIR = Path(SETTINGS['datasets_dir']) # global datasets directory
ENVIRONMENT = 'Colab' if is_colab() else 'Kaggle' if is_kaggle() else 'Jupyter' if is_jupyter() else \
'Docker' if is_docker() else platform.system()
TESTS_RUNNING = is_pytest_running() or is_github_actions_ci()
set_sentry()
# Apply monkey patches if the script is being run from within the parent directory of the script's location
from .patches import imread, imshow, imwrite
# torch.save = torch_save
if Path(inspect.stack()[0].filename).parent.parent.as_posix() in inspect.stack()[-1].filename:
cv2.imread, cv2.imwrite, cv2.imshow = imread, imwrite, imshow

View File

@ -0,0 +1,90 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch.
"""
from copy import deepcopy
import numpy as np
import torch
from ultralytics.utils import DEFAULT_CFG, LOGGER, colorstr
from ultralytics.utils.torch_utils import profile
def check_train_batch_size(model, imgsz=640, amp=True):
"""
Check YOLO training batch size using the autobatch() function.
Args:
model (torch.nn.Module): YOLO model to check batch size for.
imgsz (int): Image size used for training.
amp (bool): If True, use automatic mixed precision (AMP) for training.
Returns:
(int): Optimal batch size computed using the autobatch() function.
"""
with torch.cuda.amp.autocast(amp):
return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size
def autobatch(model, imgsz=640, fraction=0.67, batch_size=DEFAULT_CFG.batch):
"""
Automatically estimate the best YOLO batch size to use a fraction of the available CUDA memory.
Args:
model (torch.nn.module): YOLO model to compute batch size for.
imgsz (int, optional): The image size used as input for the YOLO model. Defaults to 640.
fraction (float, optional): The fraction of available CUDA memory to use. Defaults to 0.67.
batch_size (int, optional): The default batch size to use if an error is detected. Defaults to 16.
Returns:
(int): The optimal batch size.
"""
# Check device
prefix = colorstr('AutoBatch: ')
LOGGER.info(f'{prefix}Computing optimal batch size for imgsz={imgsz}')
device = next(model.parameters()).device # get model device
if device.type == 'cpu':
LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}')
return batch_size
if torch.backends.cudnn.benchmark:
LOGGER.info(f'{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}')
return batch_size
# Inspect CUDA memory
gb = 1 << 30 # bytes to GiB (1024 ** 3)
d = str(device).upper() # 'CUDA:0'
properties = torch.cuda.get_device_properties(device) # device properties
t = properties.total_memory / gb # GiB total
r = torch.cuda.memory_reserved(device) / gb # GiB reserved
a = torch.cuda.memory_allocated(device) / gb # GiB allocated
f = t - (r + a) # GiB free
LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free')
# Profile batch sizes
batch_sizes = [1, 2, 4, 8, 16]
try:
img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes]
results = profile(img, model, n=3, device=device)
# Fit a solution
y = [x[2] for x in results if x] # memory [2]
p = np.polyfit(batch_sizes[:len(y)], y, deg=1) # first degree polynomial fit
b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size)
if None in results: # some sizes failed
i = results.index(None) # first fail index
if b >= batch_sizes[i]: # y intercept above failure point
b = batch_sizes[max(i - 1, 0)] # select prior safe point
if b < 1 or b > 1024: # b outside of safe range
b = batch_size
LOGGER.info(f'{prefix}WARNING ⚠️ CUDA anomaly detected, using default batch-size {batch_size}.')
fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted
LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅')
return b
except Exception as e:
LOGGER.warning(f'{prefix}WARNING ⚠️ error detected: {e}, using default batch-size {batch_size}.')
return batch_size

View File

@ -0,0 +1,358 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Benchmark a YOLO model formats for speed and accuracy
Usage:
from ultralytics.utils.benchmarks import ProfileModels, benchmark
ProfileModels(['yolov8n.yaml', 'yolov8s.yaml']).profile()
run_benchmarks(model='yolov8n.pt', imgsz=160)
Format | `format=argument` | Model
--- | --- | ---
PyTorch | - | yolov8n.pt
TorchScript | `torchscript` | yolov8n.torchscript
ONNX | `onnx` | yolov8n.onnx
OpenVINO | `openvino` | yolov8n_openvino_model/
TensorRT | `engine` | yolov8n.engine
CoreML | `coreml` | yolov8n.mlmodel
TensorFlow SavedModel | `saved_model` | yolov8n_saved_model/
TensorFlow GraphDef | `pb` | yolov8n.pb
TensorFlow Lite | `tflite` | yolov8n.tflite
TensorFlow Edge TPU | `edgetpu` | yolov8n_edgetpu.tflite
TensorFlow.js | `tfjs` | yolov8n_web_model/
PaddlePaddle | `paddle` | yolov8n_paddle_model/
ncnn | `ncnn` | yolov8n_ncnn_model/
"""
import glob
import platform
import time
from pathlib import Path
import numpy as np
import torch.cuda
from tqdm import tqdm
from ultralytics import YOLO
from ultralytics.cfg import TASK2DATA, TASK2METRIC
from ultralytics.engine.exporter import export_formats
from ultralytics.utils import LINUX, LOGGER, MACOS, ROOT, SETTINGS
from ultralytics.utils.checks import check_requirements, check_yolo
from ultralytics.utils.downloads import download
from ultralytics.utils.files import file_size
from ultralytics.utils.torch_utils import select_device
def benchmark(model=Path(SETTINGS['weights_dir']) / 'yolov8n.pt',
imgsz=160,
half=False,
int8=False,
device='cpu',
hard_fail=False):
"""
Benchmark a YOLO model across different formats for speed and accuracy.
Args:
model (str | Path | optional): Path to the model file or directory. Default is
Path(SETTINGS['weights_dir']) / 'yolov8n.pt'.
imgsz (int, optional): Image size for the benchmark. Default is 160.
half (bool, optional): Use half-precision for the model if True. Default is False.
int8 (bool, optional): Use int8-precision for the model if True. Default is False.
device (str, optional): Device to run the benchmark on, either 'cpu' or 'cuda'. Default is 'cpu'.
hard_fail (bool | float | optional): If True or a float, assert benchmarks pass with given metric.
Default is False.
Returns:
df (pandas.DataFrame): A pandas DataFrame with benchmark results for each format, including file size,
metric, and inference time.
"""
import pandas as pd
pd.options.display.max_columns = 10
pd.options.display.width = 120
device = select_device(device, verbose=False)
if isinstance(model, (str, Path)):
model = YOLO(model)
y = []
t0 = time.time()
for i, (name, format, suffix, cpu, gpu) in export_formats().iterrows(): # index, (name, format, suffix, CPU, GPU)
emoji, filename = '', None # export defaults
try:
assert i != 9 or LINUX, 'Edge TPU export only supported on Linux'
if i == 10:
assert MACOS or LINUX, 'TF.js export only supported on macOS and Linux'
if 'cpu' in device.type:
assert cpu, 'inference not supported on CPU'
if 'cuda' in device.type:
assert gpu, 'inference not supported on GPU'
# Export
if format == '-':
filename = model.ckpt_path or model.cfg
export = model # PyTorch format
else:
filename = model.export(imgsz=imgsz, format=format, half=half, int8=int8, device=device, verbose=False)
export = YOLO(filename, task=model.task)
assert suffix in str(filename), 'export failed'
emoji = '' # indicates export succeeded
# Predict
assert model.task != 'pose' or i != 7, 'GraphDef Pose inference is not supported'
assert i not in (9, 10), 'inference not supported' # Edge TPU and TF.js are unsupported
assert i != 5 or platform.system() == 'Darwin', 'inference only supported on macOS>=10.13' # CoreML
if not (ROOT / 'assets/bus.jpg').exists():
download(url='https://ultralytics.com/images/bus.jpg', dir=ROOT / 'assets')
export.predict(ROOT / 'assets/bus.jpg', imgsz=imgsz, device=device, half=half)
# Validate
data = TASK2DATA[model.task] # task to dataset, i.e. coco8.yaml for task=detect
key = TASK2METRIC[model.task] # task to metric, i.e. metrics/mAP50-95(B) for task=detect
results = export.val(data=data,
batch=1,
imgsz=imgsz,
plots=False,
device=device,
half=half,
int8=int8,
verbose=False)
metric, speed = results.results_dict[key], results.speed['inference']
y.append([name, '', round(file_size(filename), 1), round(metric, 4), round(speed, 2)])
except Exception as e:
if hard_fail:
assert type(e) is AssertionError, f'Benchmark hard_fail for {name}: {e}'
LOGGER.warning(f'ERROR ❌️ Benchmark failure for {name}: {e}')
y.append([name, emoji, round(file_size(filename), 1), None, None]) # mAP, t_inference
# Print results
check_yolo(device=device) # print system info
df = pd.DataFrame(y, columns=['Format', 'Status❔', 'Size (MB)', key, 'Inference time (ms/im)'])
name = Path(model.ckpt_path).name
s = f'\nBenchmarks complete for {name} on {data} at imgsz={imgsz} ({time.time() - t0:.2f}s)\n{df}\n'
LOGGER.info(s)
with open('benchmarks.log', 'a', errors='ignore', encoding='utf-8') as f:
f.write(s)
if hard_fail and isinstance(hard_fail, float):
metrics = df[key].array # values to compare to floor
floor = hard_fail # minimum metric floor to pass, i.e. = 0.29 mAP for YOLOv5n
assert all(x > floor for x in metrics if pd.notna(x)), f'HARD FAIL: one or more metric(s) < floor {floor}'
return df
class ProfileModels:
"""
ProfileModels class for profiling different models on ONNX and TensorRT.
This class profiles the performance of different models, provided their paths. The profiling includes parameters such as
model speed and FLOPs.
Attributes:
paths (list): Paths of the models to profile.
num_timed_runs (int): Number of timed runs for the profiling. Default is 100.
num_warmup_runs (int): Number of warmup runs before profiling. Default is 10.
min_time (float): Minimum number of seconds to profile for. Default is 60.
imgsz (int): Image size used in the models. Default is 640.
Methods:
profile(): Profiles the models and prints the result.
"""
def __init__(self,
paths: list,
num_timed_runs=100,
num_warmup_runs=10,
min_time=60,
imgsz=640,
trt=True,
device=None):
self.paths = paths
self.num_timed_runs = num_timed_runs
self.num_warmup_runs = num_warmup_runs
self.min_time = min_time
self.imgsz = imgsz
self.trt = trt # run TensorRT profiling
self.device = device or torch.device(0 if torch.cuda.is_available() else 'cpu')
def profile(self):
files = self.get_files()
if not files:
print('No matching *.pt or *.onnx files found.')
return
table_rows = []
output = []
for file in files:
engine_file = file.with_suffix('.engine')
if file.suffix in ('.pt', '.yaml'):
model = YOLO(str(file))
model.fuse() # to report correct params and GFLOPs in model.info()
model_info = model.info()
if self.trt and self.device.type != 'cpu' and not engine_file.is_file():
engine_file = model.export(format='engine',
half=True,
imgsz=self.imgsz,
device=self.device,
verbose=False)
onnx_file = model.export(format='onnx',
half=True,
imgsz=self.imgsz,
simplify=True,
device=self.device,
verbose=False)
elif file.suffix == '.onnx':
model_info = self.get_onnx_model_info(file)
onnx_file = file
else:
continue
t_engine = self.profile_tensorrt_model(str(engine_file))
t_onnx = self.profile_onnx_model(str(onnx_file))
table_rows.append(self.generate_table_row(file.stem, t_onnx, t_engine, model_info))
output.append(self.generate_results_dict(file.stem, t_onnx, t_engine, model_info))
self.print_table(table_rows)
return output
def get_files(self):
files = []
for path in self.paths:
path = Path(path)
if path.is_dir():
extensions = ['*.pt', '*.onnx', '*.yaml']
files.extend([file for ext in extensions for file in glob.glob(str(path / ext))])
elif path.suffix in {'.pt', '.yaml'}: # add non-existing
files.append(str(path))
else:
files.extend(glob.glob(str(path)))
print(f'Profiling: {sorted(files)}')
return [Path(file) for file in sorted(files)]
def get_onnx_model_info(self, onnx_file: str):
# return (num_layers, num_params, num_gradients, num_flops)
return 0.0, 0.0, 0.0, 0.0
def iterative_sigma_clipping(self, data, sigma=2, max_iters=3):
data = np.array(data)
for _ in range(max_iters):
mean, std = np.mean(data), np.std(data)
clipped_data = data[(data > mean - sigma * std) & (data < mean + sigma * std)]
if len(clipped_data) == len(data):
break
data = clipped_data
return data
def profile_tensorrt_model(self, engine_file: str):
if not self.trt or not Path(engine_file).is_file():
return 0.0, 0.0
# Model and input
model = YOLO(engine_file)
input_data = np.random.rand(self.imgsz, self.imgsz, 3).astype(np.float32) # must be FP32
# Warmup runs
elapsed = 0.0
for _ in range(3):
start_time = time.time()
for _ in range(self.num_warmup_runs):
model(input_data, imgsz=self.imgsz, verbose=False)
elapsed = time.time() - start_time
# Compute number of runs as higher of min_time or num_timed_runs
num_runs = max(round(self.min_time / elapsed * self.num_warmup_runs), self.num_timed_runs * 50)
# Timed runs
run_times = []
for _ in tqdm(range(num_runs), desc=engine_file):
results = model(input_data, imgsz=self.imgsz, verbose=False)
run_times.append(results[0].speed['inference']) # Convert to milliseconds
run_times = self.iterative_sigma_clipping(np.array(run_times), sigma=2, max_iters=3) # sigma clipping
return np.mean(run_times), np.std(run_times)
def profile_onnx_model(self, onnx_file: str):
check_requirements('onnxruntime')
import onnxruntime as ort
# Session with either 'TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider'
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
sess_options.intra_op_num_threads = 8 # Limit the number of threads
sess = ort.InferenceSession(onnx_file, sess_options, providers=['CPUExecutionProvider'])
input_tensor = sess.get_inputs()[0]
input_type = input_tensor.type
# Mapping ONNX datatype to numpy datatype
if 'float16' in input_type:
input_dtype = np.float16
elif 'float' in input_type:
input_dtype = np.float32
elif 'double' in input_type:
input_dtype = np.float64
elif 'int64' in input_type:
input_dtype = np.int64
elif 'int32' in input_type:
input_dtype = np.int32
else:
raise ValueError(f'Unsupported ONNX datatype {input_type}')
input_data = np.random.rand(*input_tensor.shape).astype(input_dtype)
input_name = input_tensor.name
output_name = sess.get_outputs()[0].name
# Warmup runs
elapsed = 0.0
for _ in range(3):
start_time = time.time()
for _ in range(self.num_warmup_runs):
sess.run([output_name], {input_name: input_data})
elapsed = time.time() - start_time
# Compute number of runs as higher of min_time or num_timed_runs
num_runs = max(round(self.min_time / elapsed * self.num_warmup_runs), self.num_timed_runs)
# Timed runs
run_times = []
for _ in tqdm(range(num_runs), desc=onnx_file):
start_time = time.time()
sess.run([output_name], {input_name: input_data})
run_times.append((time.time() - start_time) * 1000) # Convert to milliseconds
run_times = self.iterative_sigma_clipping(np.array(run_times), sigma=2, max_iters=5) # sigma clipping
return np.mean(run_times), np.std(run_times)
def generate_table_row(self, model_name, t_onnx, t_engine, model_info):
layers, params, gradients, flops = model_info
return f'| {model_name:18s} | {self.imgsz} | - | {t_onnx[0]:.2f} ± {t_onnx[1]:.2f} ms | {t_engine[0]:.2f} ± {t_engine[1]:.2f} ms | {params / 1e6:.1f} | {flops:.1f} |'
def generate_results_dict(self, model_name, t_onnx, t_engine, model_info):
layers, params, gradients, flops = model_info
return {
'model/name': model_name,
'model/parameters': params,
'model/GFLOPs': round(flops, 3),
'model/speed_ONNX(ms)': round(t_onnx[0], 3),
'model/speed_TensorRT(ms)': round(t_engine[0], 3)}
def print_table(self, table_rows):
gpu = torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'GPU'
header = f'| Model | size<br><sup>(pixels) | mAP<sup>val<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>{gpu} TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |'
separator = '|-------------|---------------------|--------------------|------------------------------|-----------------------------------|------------------|-----------------|'
print(f'\n\n{header}')
print(separator)
for row in table_rows:
print(row)
if __name__ == '__main__':
# Benchmark all export formats
benchmark()
# Profiling models on ONNX and TensorRT
ProfileModels(['yolov8n.yaml', 'yolov8s.yaml'])

View File

@ -0,0 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from .base import add_integration_callbacks, default_callbacks, get_default_callbacks
__all__ = 'add_integration_callbacks', 'default_callbacks', 'get_default_callbacks'

View File

@ -0,0 +1,212 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Base callbacks
"""
from collections import defaultdict
from copy import deepcopy
# Trainer callbacks ----------------------------------------------------------------------------------------------------
def on_pretrain_routine_start(trainer):
"""Called before the pretraining routine starts."""
pass
def on_pretrain_routine_end(trainer):
"""Called after the pretraining routine ends."""
pass
def on_train_start(trainer):
"""Called when the training starts."""
pass
def on_train_epoch_start(trainer):
"""Called at the start of each training epoch."""
pass
def on_train_batch_start(trainer):
"""Called at the start of each training batch."""
pass
def optimizer_step(trainer):
"""Called when the optimizer takes a step."""
pass
def on_before_zero_grad(trainer):
"""Called before the gradients are set to zero."""
pass
def on_train_batch_end(trainer):
"""Called at the end of each training batch."""
pass
def on_train_epoch_end(trainer):
"""Called at the end of each training epoch."""
pass
def on_fit_epoch_end(trainer):
"""Called at the end of each fit epoch (train + val)."""
pass
def on_model_save(trainer):
"""Called when the model is saved."""
pass
def on_train_end(trainer):
"""Called when the training ends."""
pass
def on_params_update(trainer):
"""Called when the model parameters are updated."""
pass
def teardown(trainer):
"""Called during the teardown of the training process."""
pass
# Validator callbacks --------------------------------------------------------------------------------------------------
def on_val_start(validator):
"""Called when the validation starts."""
pass
def on_val_batch_start(validator):
"""Called at the start of each validation batch."""
pass
def on_val_batch_end(validator):
"""Called at the end of each validation batch."""
pass
def on_val_end(validator):
"""Called when the validation ends."""
pass
# Predictor callbacks --------------------------------------------------------------------------------------------------
def on_predict_start(predictor):
"""Called when the prediction starts."""
pass
def on_predict_batch_start(predictor):
"""Called at the start of each prediction batch."""
pass
def on_predict_batch_end(predictor):
"""Called at the end of each prediction batch."""
pass
def on_predict_postprocess_end(predictor):
"""Called after the post-processing of the prediction ends."""
pass
def on_predict_end(predictor):
"""Called when the prediction ends."""
pass
# Exporter callbacks ---------------------------------------------------------------------------------------------------
def on_export_start(exporter):
"""Called when the model export starts."""
pass
def on_export_end(exporter):
"""Called when the model export ends."""
pass
default_callbacks = {
# Run in trainer
'on_pretrain_routine_start': [on_pretrain_routine_start],
'on_pretrain_routine_end': [on_pretrain_routine_end],
'on_train_start': [on_train_start],
'on_train_epoch_start': [on_train_epoch_start],
'on_train_batch_start': [on_train_batch_start],
'optimizer_step': [optimizer_step],
'on_before_zero_grad': [on_before_zero_grad],
'on_train_batch_end': [on_train_batch_end],
'on_train_epoch_end': [on_train_epoch_end],
'on_fit_epoch_end': [on_fit_epoch_end], # fit = train + val
'on_model_save': [on_model_save],
'on_train_end': [on_train_end],
'on_params_update': [on_params_update],
'teardown': [teardown],
# Run in validator
'on_val_start': [on_val_start],
'on_val_batch_start': [on_val_batch_start],
'on_val_batch_end': [on_val_batch_end],
'on_val_end': [on_val_end],
# Run in predictor
'on_predict_start': [on_predict_start],
'on_predict_batch_start': [on_predict_batch_start],
'on_predict_postprocess_end': [on_predict_postprocess_end],
'on_predict_batch_end': [on_predict_batch_end],
'on_predict_end': [on_predict_end],
# Run in exporter
'on_export_start': [on_export_start],
'on_export_end': [on_export_end]}
def get_default_callbacks():
"""
Return a copy of the default_callbacks dictionary with lists as default values.
Returns:
(defaultdict): A defaultdict with keys from default_callbacks and empty lists as default values.
"""
return defaultdict(list, deepcopy(default_callbacks))
def add_integration_callbacks(instance):
"""
Add integration callbacks from various sources to the instance's callbacks.
Args:
instance (Trainer, Predictor, Validator, Exporter): An object with a 'callbacks' attribute that is a dictionary
of callback lists.
"""
from .clearml import callbacks as clearml_cb
from .comet import callbacks as comet_cb
from .dvc import callbacks as dvc_cb
from .hub import callbacks as hub_cb
from .mlflow import callbacks as mlflow_cb
from .neptune import callbacks as neptune_cb
from .raytune import callbacks as tune_cb
from .tensorboard import callbacks as tensorboard_cb
from .wb import callbacks as wb_cb
for x in clearml_cb, comet_cb, hub_cb, mlflow_cb, neptune_cb, tune_cb, tensorboard_cb, wb_cb, dvc_cb:
for k, v in x.items():
if v not in instance.callbacks[k]: # prevent duplicate callbacks addition
instance.callbacks[k].append(v) # callback[name].append(func)

View File

@ -0,0 +1,143 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import re
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from ultralytics.utils import LOGGER, TESTS_RUNNING
from ultralytics.utils.torch_utils import model_info_for_loggers
try:
import clearml
from clearml import Task
from clearml.binding.frameworks.pytorch_bind import PatchPyTorchModelIO
from clearml.binding.matplotlib_bind import PatchedMatplotlib
assert hasattr(clearml, '__version__') # verify package is not directory
assert not TESTS_RUNNING # do not log pytest
except (ImportError, AssertionError):
clearml = None
def _log_debug_samples(files, title='Debug Samples') -> None:
"""
Log files (images) as debug samples in the ClearML task.
Args:
files (list): A list of file paths in PosixPath format.
title (str): A title that groups together images with the same values.
"""
task = Task.current_task()
if task:
for f in files:
if f.exists():
it = re.search(r'_batch(\d+)', f.name)
iteration = int(it.groups()[0]) if it else 0
task.get_logger().report_image(title=title,
series=f.name.replace(it.group(), ''),
local_path=str(f),
iteration=iteration)
def _log_plot(title, plot_path) -> None:
"""
Log an image as a plot in the plot section of ClearML.
Args:
title (str): The title of the plot.
plot_path (str): The path to the saved image file.
"""
img = mpimg.imread(plot_path)
fig = plt.figure()
ax = fig.add_axes([0, 0, 1, 1], frameon=False, aspect='auto', xticks=[], yticks=[]) # no ticks
ax.imshow(img)
Task.current_task().get_logger().report_matplotlib_figure(title=title,
series='',
figure=fig,
report_interactive=False)
def on_pretrain_routine_start(trainer):
"""Runs at start of pretraining routine; initializes and connects/ logs task to ClearML."""
try:
task = Task.current_task()
if task:
# Make sure the automatic pytorch and matplotlib bindings are disabled!
# We are logging these plots and model files manually in the integration
PatchPyTorchModelIO.update_current_task(None)
PatchedMatplotlib.update_current_task(None)
else:
task = Task.init(project_name=trainer.args.project or 'YOLOv8',
task_name=trainer.args.name,
tags=['YOLOv8'],
output_uri=True,
reuse_last_task_id=False,
auto_connect_frameworks={
'pytorch': False,
'matplotlib': False})
LOGGER.warning('ClearML Initialized a new task. If you want to run remotely, '
'please add clearml-init and connect your arguments before initializing YOLO.')
task.connect(vars(trainer.args), name='General')
except Exception as e:
LOGGER.warning(f'WARNING ⚠️ ClearML installed but not initialized correctly, not logging this run. {e}')
def on_train_epoch_end(trainer):
task = Task.current_task()
if task:
"""Logs debug samples for the first epoch of YOLO training."""
if trainer.epoch == 1:
_log_debug_samples(sorted(trainer.save_dir.glob('train_batch*.jpg')), 'Mosaic')
"""Report the current training progress."""
for k, v in trainer.validator.metrics.results_dict.items():
task.get_logger().report_scalar('train', k, v, iteration=trainer.epoch)
def on_fit_epoch_end(trainer):
"""Reports model information to logger at the end of an epoch."""
task = Task.current_task()
if task:
# You should have access to the validation bboxes under jdict
task.get_logger().report_scalar(title='Epoch Time',
series='Epoch Time',
value=trainer.epoch_time,
iteration=trainer.epoch)
if trainer.epoch == 0:
for k, v in model_info_for_loggers(trainer).items():
task.get_logger().report_single_value(k, v)
def on_val_end(validator):
"""Logs validation results including labels and predictions."""
if Task.current_task():
# Log val_labels and val_pred
_log_debug_samples(sorted(validator.save_dir.glob('val*.jpg')), 'Validation')
def on_train_end(trainer):
"""Logs final model and its name on training completion."""
task = Task.current_task()
if task:
# Log final results, CM matrix + PR plots
files = [
'results.png', 'confusion_matrix.png', 'confusion_matrix_normalized.png',
*(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
files = [(trainer.save_dir / f) for f in files if (trainer.save_dir / f).exists()] # filter
for f in files:
_log_plot(title=f.stem, plot_path=f)
# Report final metrics
for k, v in trainer.validator.metrics.results_dict.items():
task.get_logger().report_single_value(k, v)
# Log the final model
task.update_output_model(model_path=str(trainer.best), model_name=trainer.args.name, auto_delete_file=False)
callbacks = {
'on_pretrain_routine_start': on_pretrain_routine_start,
'on_train_epoch_end': on_train_epoch_end,
'on_fit_epoch_end': on_fit_epoch_end,
'on_val_end': on_val_end,
'on_train_end': on_train_end} if clearml else {}

View File

@ -0,0 +1,368 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import os
from pathlib import Path
from ultralytics.utils import LOGGER, RANK, TESTS_RUNNING, ops
from ultralytics.utils.torch_utils import model_info_for_loggers
try:
import comet_ml
assert not TESTS_RUNNING # do not log pytest
assert hasattr(comet_ml, '__version__') # verify package is not directory
except (ImportError, AssertionError):
comet_ml = None
# Ensures certain logging functions only run for supported tasks
COMET_SUPPORTED_TASKS = ['detect']
# Names of plots created by YOLOv8 that are logged to Comet
EVALUATION_PLOT_NAMES = 'F1_curve', 'P_curve', 'R_curve', 'PR_curve', 'confusion_matrix'
LABEL_PLOT_NAMES = 'labels', 'labels_correlogram'
_comet_image_prediction_count = 0
def _get_comet_mode():
return os.getenv('COMET_MODE', 'online')
def _get_comet_model_name():
return os.getenv('COMET_MODEL_NAME', 'YOLOv8')
def _get_eval_batch_logging_interval():
return int(os.getenv('COMET_EVAL_BATCH_LOGGING_INTERVAL', 1))
def _get_max_image_predictions_to_log():
return int(os.getenv('COMET_MAX_IMAGE_PREDICTIONS', 100))
def _scale_confidence_score(score):
scale = float(os.getenv('COMET_MAX_CONFIDENCE_SCORE', 100.0))
return score * scale
def _should_log_confusion_matrix():
return os.getenv('COMET_EVAL_LOG_CONFUSION_MATRIX', 'false').lower() == 'true'
def _should_log_image_predictions():
return os.getenv('COMET_EVAL_LOG_IMAGE_PREDICTIONS', 'true').lower() == 'true'
def _get_experiment_type(mode, project_name):
"""Return an experiment based on mode and project name."""
if mode == 'offline':
return comet_ml.OfflineExperiment(project_name=project_name)
return comet_ml.Experiment(project_name=project_name)
def _create_experiment(args):
"""Ensures that the experiment object is only created in a single process during distributed training."""
if RANK not in (-1, 0):
return
try:
comet_mode = _get_comet_mode()
_project_name = os.getenv('COMET_PROJECT_NAME', args.project)
experiment = _get_experiment_type(comet_mode, _project_name)
experiment.log_parameters(vars(args))
experiment.log_others({
'eval_batch_logging_interval': _get_eval_batch_logging_interval(),
'log_confusion_matrix_on_eval': _should_log_confusion_matrix(),
'log_image_predictions': _should_log_image_predictions(),
'max_image_predictions': _get_max_image_predictions_to_log(), })
experiment.log_other('Created from', 'yolov8')
except Exception as e:
LOGGER.warning(f'WARNING ⚠️ Comet installed but not initialized correctly, not logging this run. {e}')
def _fetch_trainer_metadata(trainer):
"""Returns metadata for YOLO training including epoch and asset saving status."""
curr_epoch = trainer.epoch + 1
train_num_steps_per_epoch = len(trainer.train_loader.dataset) // trainer.batch_size
curr_step = curr_epoch * train_num_steps_per_epoch
final_epoch = curr_epoch == trainer.epochs
save = trainer.args.save
save_period = trainer.args.save_period
save_interval = curr_epoch % save_period == 0
save_assets = save and save_period > 0 and save_interval and not final_epoch
return dict(
curr_epoch=curr_epoch,
curr_step=curr_step,
save_assets=save_assets,
final_epoch=final_epoch,
)
def _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad):
"""YOLOv8 resizes images during training and the label values
are normalized based on this resized shape. This function rescales the
bounding box labels to the original image shape.
"""
resized_image_height, resized_image_width = resized_image_shape
# Convert normalized xywh format predictions to xyxy in resized scale format
box = ops.xywhn2xyxy(box, h=resized_image_height, w=resized_image_width)
# Scale box predictions from resized image scale back to original image scale
box = ops.scale_boxes(resized_image_shape, box, original_image_shape, ratio_pad)
# Convert bounding box format from xyxy to xywh for Comet logging
box = ops.xyxy2xywh(box)
# Adjust xy center to correspond top-left corner
box[:2] -= box[2:] / 2
box = box.tolist()
return box
def _format_ground_truth_annotations_for_detection(img_idx, image_path, batch, class_name_map=None):
"""Format ground truth annotations for detection."""
indices = batch['batch_idx'] == img_idx
bboxes = batch['bboxes'][indices]
if len(bboxes) == 0:
LOGGER.debug(f'COMET WARNING: Image: {image_path} has no bounding boxes labels')
return None
cls_labels = batch['cls'][indices].squeeze(1).tolist()
if class_name_map:
cls_labels = [str(class_name_map[label]) for label in cls_labels]
original_image_shape = batch['ori_shape'][img_idx]
resized_image_shape = batch['resized_shape'][img_idx]
ratio_pad = batch['ratio_pad'][img_idx]
data = []
for box, label in zip(bboxes, cls_labels):
box = _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad)
data.append({
'boxes': [box],
'label': f'gt_{label}',
'score': _scale_confidence_score(1.0), })
return {'name': 'ground_truth', 'data': data}
def _format_prediction_annotations_for_detection(image_path, metadata, class_label_map=None):
"""Format YOLO predictions for object detection visualization."""
stem = image_path.stem
image_id = int(stem) if stem.isnumeric() else stem
predictions = metadata.get(image_id)
if not predictions:
LOGGER.debug(f'COMET WARNING: Image: {image_path} has no bounding boxes predictions')
return None
data = []
for prediction in predictions:
boxes = prediction['bbox']
score = _scale_confidence_score(prediction['score'])
cls_label = prediction['category_id']
if class_label_map:
cls_label = str(class_label_map[cls_label])
data.append({'boxes': [boxes], 'label': cls_label, 'score': score})
return {'name': 'prediction', 'data': data}
def _fetch_annotations(img_idx, image_path, batch, prediction_metadata_map, class_label_map):
"""Join the ground truth and prediction annotations if they exist."""
ground_truth_annotations = _format_ground_truth_annotations_for_detection(img_idx, image_path, batch,
class_label_map)
prediction_annotations = _format_prediction_annotations_for_detection(image_path, prediction_metadata_map,
class_label_map)
annotations = [
annotation for annotation in [ground_truth_annotations, prediction_annotations] if annotation is not None]
return [annotations] if annotations else None
def _create_prediction_metadata_map(model_predictions):
"""Create metadata map for model predictions by groupings them based on image ID."""
pred_metadata_map = {}
for prediction in model_predictions:
pred_metadata_map.setdefault(prediction['image_id'], [])
pred_metadata_map[prediction['image_id']].append(prediction)
return pred_metadata_map
def _log_confusion_matrix(experiment, trainer, curr_step, curr_epoch):
"""Log the confusion matrix to Comet experiment."""
conf_mat = trainer.validator.confusion_matrix.matrix
names = list(trainer.data['names'].values()) + ['background']
experiment.log_confusion_matrix(
matrix=conf_mat,
labels=names,
max_categories=len(names),
epoch=curr_epoch,
step=curr_step,
)
def _log_images(experiment, image_paths, curr_step, annotations=None):
"""Logs images to the experiment with optional annotations."""
if annotations:
for image_path, annotation in zip(image_paths, annotations):
experiment.log_image(image_path, name=image_path.stem, step=curr_step, annotations=annotation)
else:
for image_path in image_paths:
experiment.log_image(image_path, name=image_path.stem, step=curr_step)
def _log_image_predictions(experiment, validator, curr_step):
"""Logs predicted boxes for a single image during training."""
global _comet_image_prediction_count
task = validator.args.task
if task not in COMET_SUPPORTED_TASKS:
return
jdict = validator.jdict
if not jdict:
return
predictions_metadata_map = _create_prediction_metadata_map(jdict)
dataloader = validator.dataloader
class_label_map = validator.names
batch_logging_interval = _get_eval_batch_logging_interval()
max_image_predictions = _get_max_image_predictions_to_log()
for batch_idx, batch in enumerate(dataloader):
if (batch_idx + 1) % batch_logging_interval != 0:
continue
image_paths = batch['im_file']
for img_idx, image_path in enumerate(image_paths):
if _comet_image_prediction_count >= max_image_predictions:
return
image_path = Path(image_path)
annotations = _fetch_annotations(
img_idx,
image_path,
batch,
predictions_metadata_map,
class_label_map,
)
_log_images(
experiment,
[image_path],
curr_step,
annotations=annotations,
)
_comet_image_prediction_count += 1
def _log_plots(experiment, trainer):
"""Logs evaluation plots and label plots for the experiment."""
plot_filenames = [trainer.save_dir / f'{plots}.png' for plots in EVALUATION_PLOT_NAMES]
_log_images(experiment, plot_filenames, None)
label_plot_filenames = [trainer.save_dir / f'{labels}.jpg' for labels in LABEL_PLOT_NAMES]
_log_images(experiment, label_plot_filenames, None)
def _log_model(experiment, trainer):
"""Log the best-trained model to Comet.ml."""
model_name = _get_comet_model_name()
experiment.log_model(
model_name,
file_or_folder=str(trainer.best),
file_name='best.pt',
overwrite=True,
)
def on_pretrain_routine_start(trainer):
"""Creates or resumes a CometML experiment at the start of a YOLO pre-training routine."""
experiment = comet_ml.get_global_experiment()
is_alive = getattr(experiment, 'alive', False)
if not experiment or not is_alive:
_create_experiment(trainer.args)
def on_train_epoch_end(trainer):
"""Log metrics and save batch images at the end of training epochs."""
experiment = comet_ml.get_global_experiment()
if not experiment:
return
metadata = _fetch_trainer_metadata(trainer)
curr_epoch = metadata['curr_epoch']
curr_step = metadata['curr_step']
experiment.log_metrics(
trainer.label_loss_items(trainer.tloss, prefix='train'),
step=curr_step,
epoch=curr_epoch,
)
if curr_epoch == 1:
_log_images(experiment, trainer.save_dir.glob('train_batch*.jpg'), curr_step)
def on_fit_epoch_end(trainer):
"""Logs model assets at the end of each epoch."""
experiment = comet_ml.get_global_experiment()
if not experiment:
return
metadata = _fetch_trainer_metadata(trainer)
curr_epoch = metadata['curr_epoch']
curr_step = metadata['curr_step']
save_assets = metadata['save_assets']
experiment.log_metrics(trainer.metrics, step=curr_step, epoch=curr_epoch)
experiment.log_metrics(trainer.lr, step=curr_step, epoch=curr_epoch)
if curr_epoch == 1:
experiment.log_metrics(model_info_for_loggers(trainer), step=curr_step, epoch=curr_epoch)
if not save_assets:
return
_log_model(experiment, trainer)
if _should_log_confusion_matrix():
_log_confusion_matrix(experiment, trainer, curr_step, curr_epoch)
if _should_log_image_predictions():
_log_image_predictions(experiment, trainer.validator, curr_step)
def on_train_end(trainer):
"""Perform operations at the end of training."""
experiment = comet_ml.get_global_experiment()
if not experiment:
return
metadata = _fetch_trainer_metadata(trainer)
curr_epoch = metadata['curr_epoch']
curr_step = metadata['curr_step']
plots = trainer.args.plots
_log_model(experiment, trainer)
if plots:
_log_plots(experiment, trainer)
_log_confusion_matrix(experiment, trainer, curr_step, curr_epoch)
_log_image_predictions(experiment, trainer.validator, curr_step)
experiment.end()
global _comet_image_prediction_count
_comet_image_prediction_count = 0
callbacks = {
'on_pretrain_routine_start': on_pretrain_routine_start,
'on_train_epoch_end': on_train_epoch_end,
'on_fit_epoch_end': on_fit_epoch_end,
'on_train_end': on_train_end} if comet_ml else {}

View File

@ -0,0 +1,136 @@
# Ultralytics YOLO 🚀, GPL-3.0 license
import os
import pkg_resources as pkg
from ultralytics.utils import LOGGER, TESTS_RUNNING
from ultralytics.utils.torch_utils import model_info_for_loggers
try:
from importlib.metadata import version
import dvclive
assert not TESTS_RUNNING # do not log pytest
ver = version('dvclive')
if pkg.parse_version(ver) < pkg.parse_version('2.11.0'):
LOGGER.debug(f'DVCLive is detected but version {ver} is incompatible (>=2.11 required).')
dvclive = None # noqa: F811
except (ImportError, AssertionError, TypeError):
dvclive = None
# DVCLive logger instance
live = None
_processed_plots = {}
# `on_fit_epoch_end` is called on final validation (probably need to be fixed)
# for now this is the way we distinguish final evaluation of the best model vs
# last epoch validation
_training_epoch = False
def _logger_disabled():
return os.getenv('ULTRALYTICS_DVC_DISABLED', 'false').lower() == 'true'
def _log_images(image_path, prefix=''):
if live:
live.log_image(os.path.join(prefix, image_path.name), image_path)
def _log_plots(plots, prefix=''):
for name, params in plots.items():
timestamp = params['timestamp']
if _processed_plots.get(name) != timestamp:
_log_images(name, prefix)
_processed_plots[name] = timestamp
def _log_confusion_matrix(validator):
targets = []
preds = []
matrix = validator.confusion_matrix.matrix
names = list(validator.names.values())
if validator.confusion_matrix.task == 'detect':
names += ['background']
for ti, pred in enumerate(matrix.T.astype(int)):
for pi, num in enumerate(pred):
targets.extend([names[ti]] * num)
preds.extend([names[pi]] * num)
live.log_sklearn_plot('confusion_matrix', targets, preds, name='cf.json', normalized=True)
def on_pretrain_routine_start(trainer):
try:
global live
if not _logger_disabled():
live = dvclive.Live(save_dvc_exp=True, cache_images=True)
LOGGER.info(
'DVCLive is detected and auto logging is enabled (can be disabled with `ULTRALYTICS_DVC_DISABLED=true`).'
)
else:
LOGGER.debug('DVCLive is detected and auto logging is disabled via `ULTRALYTICS_DVC_DISABLED`.')
live = None
except Exception as e:
LOGGER.warning(f'WARNING ⚠️ DVCLive installed but not initialized correctly, not logging this run. {e}')
def on_pretrain_routine_end(trainer):
_log_plots(trainer.plots, 'train')
def on_train_start(trainer):
if live:
live.log_params(trainer.args)
def on_train_epoch_start(trainer):
global _training_epoch
_training_epoch = True
def on_fit_epoch_end(trainer):
global _training_epoch
if live and _training_epoch:
all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr}
for metric, value in all_metrics.items():
live.log_metric(metric, value)
if trainer.epoch == 0:
for metric, value in model_info_for_loggers(trainer).items():
live.log_metric(metric, value, plot=False)
_log_plots(trainer.plots, 'train')
_log_plots(trainer.validator.plots, 'val')
live.next_step()
_training_epoch = False
def on_train_end(trainer):
if live:
# At the end log the best metrics. It runs validator on the best model internally.
all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr}
for metric, value in all_metrics.items():
live.log_metric(metric, value, plot=False)
_log_plots(trainer.plots, 'eval')
_log_plots(trainer.validator.plots, 'eval')
_log_confusion_matrix(trainer.validator)
if trainer.best.exists():
live.log_artifact(trainer.best, copy=True)
live.end()
callbacks = {
'on_pretrain_routine_start': on_pretrain_routine_start,
'on_pretrain_routine_end': on_pretrain_routine_end,
'on_train_start': on_train_start,
'on_train_epoch_start': on_train_epoch_start,
'on_fit_epoch_end': on_fit_epoch_end,
'on_train_end': on_train_end} if dvclive else {}

View File

@ -0,0 +1,87 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import json
from time import time
from ultralytics.hub.utils import PREFIX, events
from ultralytics.utils import LOGGER
from ultralytics.utils.torch_utils import model_info_for_loggers
def on_pretrain_routine_end(trainer):
"""Logs info before starting timer for upload rate limit."""
session = getattr(trainer, 'hub_session', None)
if session:
# Start timer for upload rate limit
LOGGER.info(f'{PREFIX}View model at https://hub.ultralytics.com/models/{session.model_id} 🚀')
session.timers = {'metrics': time(), 'ckpt': time()} # start timer on session.rate_limit
def on_fit_epoch_end(trainer):
"""Uploads training progress metrics at the end of each epoch."""
session = getattr(trainer, 'hub_session', None)
if session:
# Upload metrics after val end
all_plots = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics}
if trainer.epoch == 0:
all_plots = {**all_plots, **model_info_for_loggers(trainer)}
session.metrics_queue[trainer.epoch] = json.dumps(all_plots)
if time() - session.timers['metrics'] > session.rate_limits['metrics']:
session.upload_metrics()
session.timers['metrics'] = time() # reset timer
session.metrics_queue = {} # reset queue
def on_model_save(trainer):
"""Saves checkpoints to Ultralytics HUB with rate limiting."""
session = getattr(trainer, 'hub_session', None)
if session:
# Upload checkpoints with rate limiting
is_best = trainer.best_fitness == trainer.fitness
if time() - session.timers['ckpt'] > session.rate_limits['ckpt']:
LOGGER.info(f'{PREFIX}Uploading checkpoint https://hub.ultralytics.com/models/{session.model_id}')
session.upload_model(trainer.epoch, trainer.last, is_best)
session.timers['ckpt'] = time() # reset timer
def on_train_end(trainer):
"""Upload final model and metrics to Ultralytics HUB at the end of training."""
session = getattr(trainer, 'hub_session', None)
if session:
# Upload final model and metrics with exponential standoff
LOGGER.info(f'{PREFIX}Syncing final model...')
session.upload_model(trainer.epoch, trainer.best, map=trainer.metrics.get('metrics/mAP50-95(B)', 0), final=True)
session.alive = False # stop heartbeats
LOGGER.info(f'{PREFIX}Done ✅\n'
f'{PREFIX}View model at https://hub.ultralytics.com/models/{session.model_id} 🚀')
def on_train_start(trainer):
"""Run events on train start."""
events(trainer.args)
def on_val_start(validator):
"""Runs events on validation start."""
events(validator.args)
def on_predict_start(predictor):
"""Run events on predict start."""
events(predictor.args)
def on_export_start(exporter):
"""Run events on export start."""
events(exporter.args)
callbacks = {
'on_pretrain_routine_end': on_pretrain_routine_end,
'on_fit_epoch_end': on_fit_epoch_end,
'on_model_save': on_model_save,
'on_train_end': on_train_end,
'on_train_start': on_train_start,
'on_val_start': on_val_start,
'on_predict_start': on_predict_start,
'on_export_start': on_export_start}

View File

@ -0,0 +1,71 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import os
import re
from pathlib import Path
from ultralytics.utils import LOGGER, TESTS_RUNNING, colorstr
try:
import mlflow
assert not TESTS_RUNNING # do not log pytest
assert hasattr(mlflow, '__version__') # verify package is not directory
except (ImportError, AssertionError):
mlflow = None
def on_pretrain_routine_end(trainer):
"""Logs training parameters to MLflow."""
global mlflow, run, run_id, experiment_name
if os.environ.get('MLFLOW_TRACKING_URI') is None:
mlflow = None
if mlflow:
mlflow_location = os.environ['MLFLOW_TRACKING_URI'] # "http://192.168.xxx.xxx:5000"
mlflow.set_tracking_uri(mlflow_location)
experiment_name = os.environ.get('MLFLOW_EXPERIMENT_NAME') or trainer.args.project or '/Shared/YOLOv8'
run_name = os.environ.get('MLFLOW_RUN') or trainer.args.name
experiment = mlflow.get_experiment_by_name(experiment_name)
if experiment is None:
mlflow.create_experiment(experiment_name)
mlflow.set_experiment(experiment_name)
prefix = colorstr('MLFlow: ')
try:
run, active_run = mlflow, mlflow.active_run()
if not active_run:
active_run = mlflow.start_run(experiment_id=experiment.experiment_id, run_name=run_name)
run_id = active_run.info.run_id
LOGGER.info(f'{prefix}Using run_id({run_id}) at {mlflow_location}')
run.log_params(vars(trainer.model.args))
except Exception as err:
LOGGER.error(f'{prefix}Failing init - {repr(err)}')
LOGGER.warning(f'{prefix}Continuing without Mlflow')
def on_fit_epoch_end(trainer):
"""Logs training metrics to Mlflow."""
if mlflow:
metrics_dict = {f"{re.sub('[()]', '', k)}": float(v) for k, v in trainer.metrics.items()}
run.log_metrics(metrics=metrics_dict, step=trainer.epoch)
def on_train_end(trainer):
"""Called at end of train loop to log model artifact info."""
if mlflow:
root_dir = Path(__file__).resolve().parents[3]
run.log_artifact(trainer.last)
run.log_artifact(trainer.best)
run.pyfunc.log_model(artifact_path=experiment_name,
code_path=[str(root_dir)],
artifacts={'model_path': str(trainer.save_dir)},
python_model=run.pyfunc.PythonModel())
callbacks = {
'on_pretrain_routine_end': on_pretrain_routine_end,
'on_fit_epoch_end': on_fit_epoch_end,
'on_train_end': on_train_end} if mlflow else {}

View File

@ -0,0 +1,103 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from ultralytics.utils import LOGGER, TESTS_RUNNING
from ultralytics.utils.torch_utils import model_info_for_loggers
try:
import neptune
from neptune.types import File
assert not TESTS_RUNNING # do not log pytest
assert hasattr(neptune, '__version__')
except (ImportError, AssertionError):
neptune = None
run = None # NeptuneAI experiment logger instance
def _log_scalars(scalars, step=0):
"""Log scalars to the NeptuneAI experiment logger."""
if run:
for k, v in scalars.items():
run[k].append(value=v, step=step)
def _log_images(imgs_dict, group=''):
"""Log scalars to the NeptuneAI experiment logger."""
if run:
for k, v in imgs_dict.items():
run[f'{group}/{k}'].upload(File(v))
def _log_plot(title, plot_path):
"""Log plots to the NeptuneAI experiment logger."""
"""
Log image as plot in the plot section of NeptuneAI
arguments:
title (str) Title of the plot
plot_path (PosixPath or str) Path to the saved image file
"""
img = mpimg.imread(plot_path)
fig = plt.figure()
ax = fig.add_axes([0, 0, 1, 1], frameon=False, aspect='auto', xticks=[], yticks=[]) # no ticks
ax.imshow(img)
run[f'Plots/{title}'].upload(fig)
def on_pretrain_routine_start(trainer):
"""Callback function called before the training routine starts."""
try:
global run
run = neptune.init_run(project=trainer.args.project or 'YOLOv8', name=trainer.args.name, tags=['YOLOv8'])
run['Configuration/Hyperparameters'] = {k: '' if v is None else v for k, v in vars(trainer.args).items()}
except Exception as e:
LOGGER.warning(f'WARNING ⚠️ NeptuneAI installed but not initialized correctly, not logging this run. {e}')
def on_train_epoch_end(trainer):
"""Callback function called at end of each training epoch."""
_log_scalars(trainer.label_loss_items(trainer.tloss, prefix='train'), trainer.epoch + 1)
_log_scalars(trainer.lr, trainer.epoch + 1)
if trainer.epoch == 1:
_log_images({f.stem: str(f) for f in trainer.save_dir.glob('train_batch*.jpg')}, 'Mosaic')
def on_fit_epoch_end(trainer):
"""Callback function called at end of each fit (train+val) epoch."""
if run and trainer.epoch == 0:
run['Configuration/Model'] = model_info_for_loggers(trainer)
_log_scalars(trainer.metrics, trainer.epoch + 1)
def on_val_end(validator):
"""Callback function called at end of each validation."""
if run:
# Log val_labels and val_pred
_log_images({f.stem: str(f) for f in validator.save_dir.glob('val*.jpg')}, 'Validation')
def on_train_end(trainer):
"""Callback function called at end of training."""
if run:
# Log final results, CM matrix + PR plots
files = [
'results.png', 'confusion_matrix.png', 'confusion_matrix_normalized.png',
*(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
files = [(trainer.save_dir / f) for f in files if (trainer.save_dir / f).exists()] # filter
for f in files:
_log_plot(title=f.stem, plot_path=f)
# Log the final model
run[f'weights/{trainer.args.name or trainer.args.task}/{str(trainer.best.name)}'].upload(File(str(
trainer.best)))
callbacks = {
'on_pretrain_routine_start': on_pretrain_routine_start,
'on_train_epoch_end': on_train_epoch_end,
'on_fit_epoch_end': on_fit_epoch_end,
'on_val_end': on_val_end,
'on_train_end': on_train_end} if neptune else {}

View File

@ -0,0 +1,20 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
try:
import ray
from ray import tune
from ray.air import session
except (ImportError, AssertionError):
tune = None
def on_fit_epoch_end(trainer):
"""Sends training metrics to Ray Tune at end of each epoch."""
if ray.tune.is_session_enabled():
metrics = trainer.metrics
metrics['epoch'] = trainer.epoch
session.report(metrics)
callbacks = {
'on_fit_epoch_end': on_fit_epoch_end, } if tune else {}

View File

@ -0,0 +1,47 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from ultralytics.utils import LOGGER, TESTS_RUNNING, colorstr
try:
from torch.utils.tensorboard import SummaryWriter
assert not TESTS_RUNNING # do not log pytest
except (ImportError, AssertionError):
SummaryWriter = None
writer = None # TensorBoard SummaryWriter instance
def _log_scalars(scalars, step=0):
"""Logs scalar values to TensorBoard."""
if writer:
for k, v in scalars.items():
writer.add_scalar(k, v, step)
def on_pretrain_routine_start(trainer):
"""Initialize TensorBoard logging with SummaryWriter."""
if SummaryWriter:
try:
global writer
writer = SummaryWriter(str(trainer.save_dir))
prefix = colorstr('TensorBoard: ')
LOGGER.info(f"{prefix}Start with 'tensorboard --logdir {trainer.save_dir}', view at http://localhost:6006/")
except Exception as e:
LOGGER.warning(f'WARNING ⚠️ TensorBoard not initialized correctly, not logging this run. {e}')
def on_batch_end(trainer):
"""Logs scalar statistics at the end of a training batch."""
_log_scalars(trainer.label_loss_items(trainer.tloss, prefix='train'), trainer.epoch + 1)
def on_fit_epoch_end(trainer):
"""Logs epoch metrics at end of training epoch."""
_log_scalars(trainer.metrics, trainer.epoch + 1)
callbacks = {
'on_pretrain_routine_start': on_pretrain_routine_start,
'on_fit_epoch_end': on_fit_epoch_end,
'on_batch_end': on_batch_end}

View File

@ -0,0 +1,60 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from ultralytics.utils import TESTS_RUNNING
from ultralytics.utils.torch_utils import model_info_for_loggers
try:
import wandb as wb
assert hasattr(wb, '__version__')
assert not TESTS_RUNNING # do not log pytest
except (ImportError, AssertionError):
wb = None
_processed_plots = {}
def _log_plots(plots, step):
for name, params in plots.items():
timestamp = params['timestamp']
if _processed_plots.get(name, None) != timestamp:
wb.run.log({name.stem: wb.Image(str(name))}, step=step)
_processed_plots[name] = timestamp
def on_pretrain_routine_start(trainer):
"""Initiate and start project if module is present."""
wb.run or wb.init(project=trainer.args.project or 'YOLOv8', name=trainer.args.name, config=vars(trainer.args))
def on_fit_epoch_end(trainer):
"""Logs training metrics and model information at the end of an epoch."""
wb.run.log(trainer.metrics, step=trainer.epoch + 1)
_log_plots(trainer.plots, step=trainer.epoch + 1)
_log_plots(trainer.validator.plots, step=trainer.epoch + 1)
if trainer.epoch == 0:
wb.run.log(model_info_for_loggers(trainer), step=trainer.epoch + 1)
def on_train_epoch_end(trainer):
"""Log metrics and save images at the end of each training epoch."""
wb.run.log(trainer.label_loss_items(trainer.tloss, prefix='train'), step=trainer.epoch + 1)
wb.run.log(trainer.lr, step=trainer.epoch + 1)
if trainer.epoch == 1:
_log_plots(trainer.plots, step=trainer.epoch + 1)
def on_train_end(trainer):
"""Save the best model as an artifact at end of training."""
_log_plots(trainer.validator.plots, step=trainer.epoch + 1)
_log_plots(trainer.plots, step=trainer.epoch + 1)
art = wb.Artifact(type='model', name=f'run_{wb.run.id}_model')
if trainer.best.exists():
art.add_file(trainer.best)
wb.run.log_artifact(art, aliases=['best'])
callbacks = {
'on_pretrain_routine_start': on_pretrain_routine_start,
'on_train_epoch_end': on_train_epoch_end,
'on_fit_epoch_end': on_fit_epoch_end,
'on_train_end': on_train_end} if wb else {}

457
ultralytics/utils/checks.py Normal file
View File

@ -0,0 +1,457 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import contextlib
import glob
import inspect
import math
import os
import platform
import re
import shutil
import subprocess
import time
from pathlib import Path
from typing import Optional
import cv2
import numpy as np
import pkg_resources as pkg
import psutil
import requests
import torch
from matplotlib import font_manager
from ultralytics.utils import (AUTOINSTALL, LOGGER, ONLINE, ROOT, USER_CONFIG_DIR, ThreadingLocked, TryExcept,
clean_url, colorstr, downloads, emojis, is_colab, is_docker, is_jupyter, is_kaggle,
is_online, is_pip_package, url2file)
def is_ascii(s) -> bool:
"""
Check if a string is composed of only ASCII characters.
Args:
s (str): String to be checked.
Returns:
bool: True if the string is composed only of ASCII characters, False otherwise.
"""
# Convert list, tuple, None, etc. to string
s = str(s)
# Check if the string is composed of only ASCII characters
return all(ord(c) < 128 for c in s)
def check_imgsz(imgsz, stride=32, min_dim=1, max_dim=2, floor=0):
"""
Verify image size is a multiple of the given stride in each dimension. If the image size is not a multiple of the
stride, update it to the nearest multiple of the stride that is greater than or equal to the given floor value.
Args:
imgsz (int | cList[int]): Image size.
stride (int): Stride value.
min_dim (int): Minimum number of dimensions.
floor (int): Minimum allowed value for image size.
Returns:
(List[int]): Updated image size.
"""
# Convert stride to integer if it is a tensor
stride = int(stride.max() if isinstance(stride, torch.Tensor) else stride)
# Convert image size to list if it is an integer
if isinstance(imgsz, int):
imgsz = [imgsz]
elif isinstance(imgsz, (list, tuple)):
imgsz = list(imgsz)
else:
raise TypeError(f"'imgsz={imgsz}' is of invalid type {type(imgsz).__name__}. "
f"Valid imgsz types are int i.e. 'imgsz=640' or list i.e. 'imgsz=[640,640]'")
# Apply max_dim
if len(imgsz) > max_dim:
msg = "'train' and 'val' imgsz must be an integer, while 'predict' and 'export' imgsz may be a [h, w] list " \
"or an integer, i.e. 'yolo export imgsz=640,480' or 'yolo export imgsz=640'"
if max_dim != 1:
raise ValueError(f'imgsz={imgsz} is not a valid image size. {msg}')
LOGGER.warning(f"WARNING ⚠️ updating to 'imgsz={max(imgsz)}'. {msg}")
imgsz = [max(imgsz)]
# Make image size a multiple of the stride
sz = [max(math.ceil(x / stride) * stride, floor) for x in imgsz]
# Print warning message if image size was updated
if sz != imgsz:
LOGGER.warning(f'WARNING ⚠️ imgsz={imgsz} must be multiple of max stride {stride}, updating to {sz}')
# Add missing dimensions if necessary
sz = [sz[0], sz[0]] if min_dim == 2 and len(sz) == 1 else sz[0] if min_dim == 1 and len(sz) == 1 else sz
return sz
def check_version(current: str = '0.0.0',
minimum: str = '0.0.0',
name: str = 'version ',
pinned: bool = False,
hard: bool = False,
verbose: bool = False) -> bool:
"""
Check current version against the required minimum version.
Args:
current (str): Current version.
minimum (str): Required minimum version.
name (str): Name to be used in warning message.
pinned (bool): If True, versions must match exactly. If False, minimum version must be satisfied.
hard (bool): If True, raise an AssertionError if the minimum version is not met.
verbose (bool): If True, print warning message if minimum version is not met.
Returns:
(bool): True if minimum version is met, False otherwise.
"""
current, minimum = (pkg.parse_version(x) for x in (current, minimum))
result = (current == minimum) if pinned else (current >= minimum) # bool
warning_message = f'WARNING ⚠️ {name}{minimum} is required by YOLOv8, but {name}{current} is currently installed'
if hard:
assert result, emojis(warning_message) # assert min requirements met
if verbose and not result:
LOGGER.warning(warning_message)
return result
def check_latest_pypi_version(package_name='ultralytics'):
"""
Returns the latest version of a PyPI package without downloading or installing it.
Parameters:
package_name (str): The name of the package to find the latest version for.
Returns:
(str): The latest version of the package.
"""
with contextlib.suppress(Exception):
requests.packages.urllib3.disable_warnings() # Disable the InsecureRequestWarning
response = requests.get(f'https://pypi.org/pypi/{package_name}/json', timeout=3)
if response.status_code == 200:
return response.json()['info']['version']
return None
def check_pip_update_available():
"""
Checks if a new version of the ultralytics package is available on PyPI.
Returns:
(bool): True if an update is available, False otherwise.
"""
if ONLINE and is_pip_package():
with contextlib.suppress(Exception):
from ultralytics import __version__
latest = check_latest_pypi_version()
if pkg.parse_version(__version__) < pkg.parse_version(latest): # update is available
LOGGER.info(f'New https://pypi.org/project/ultralytics/{latest} available 😃 '
f"Update with 'pip install -U ultralytics'")
return True
return False
@ThreadingLocked()
def check_font(font='Arial.ttf'):
"""
Find font locally or download to user's configuration directory if it does not already exist.
Args:
font (str): Path or name of font.
Returns:
file (Path): Resolved font file path.
"""
name = Path(font).name
# Check USER_CONFIG_DIR
file = USER_CONFIG_DIR / name
if file.exists():
return file
# Check system fonts
matches = [s for s in font_manager.findSystemFonts() if font in s]
if any(matches):
return matches[0]
# Download to USER_CONFIG_DIR if missing
url = f'https://ultralytics.com/assets/{name}'
if downloads.is_url(url):
downloads.safe_download(url=url, file=file)
return file
def check_python(minimum: str = '3.7.0') -> bool:
"""
Check current python version against the required minimum version.
Args:
minimum (str): Required minimum version of python.
Returns:
None
"""
return check_version(platform.python_version(), minimum, name='Python ', hard=True)
@TryExcept()
def check_requirements(requirements=ROOT.parent / 'requirements.txt', exclude=(), install=True, cmds=''):
"""
Check if installed dependencies meet YOLOv8 requirements and attempt to auto-update if needed.
Args:
requirements (Union[Path, str, List[str]]): Path to a requirements.txt file, a single package requirement as a
string, or a list of package requirements as strings.
exclude (Tuple[str]): Tuple of package names to exclude from checking.
install (bool): If True, attempt to auto-update packages that don't meet requirements.
cmds (str): Additional commands to pass to the pip install command when auto-updating.
"""
prefix = colorstr('red', 'bold', 'requirements:')
check_python() # check python version
check_torchvision() # check torch-torchvision compatibility
if isinstance(requirements, Path): # requirements.txt file
file = requirements.resolve()
assert file.exists(), f'{prefix} {file} not found, check failed.'
with file.open() as f:
requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude]
elif isinstance(requirements, str):
requirements = [requirements]
s = '' # console string
pkgs = []
for r in requirements:
r_stripped = r.split('/')[-1].replace('.git', '') # replace git+https://org/repo.git -> 'repo'
try:
pkg.require(r_stripped)
except (pkg.VersionConflict, pkg.DistributionNotFound): # exception if requirements not met
try: # attempt to import (slower but more accurate)
import importlib
importlib.import_module(next(pkg.parse_requirements(r_stripped)).name)
except ImportError:
s += f'"{r}" '
pkgs.append(r)
if s:
if install and AUTOINSTALL: # check environment variable
n = len(pkgs) # number of packages updates
LOGGER.info(f"{prefix} Ultralytics requirement{'s' * (n > 1)} {pkgs} not found, attempting AutoUpdate...")
try:
t = time.time()
assert is_online(), 'AutoUpdate skipped (offline)'
LOGGER.info(subprocess.check_output(f'pip install --no-cache {s} {cmds}', shell=True).decode())
dt = time.time() - t
LOGGER.info(
f"{prefix} AutoUpdate success ✅ {dt:.1f}s, installed {n} package{'s' * (n > 1)}: {pkgs}\n"
f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n")
except Exception as e:
LOGGER.warning(f'{prefix}{e}')
return False
else:
return False
return True
def check_torchvision():
"""
Checks the installed versions of PyTorch and Torchvision to ensure they're compatible.
This function checks the installed versions of PyTorch and Torchvision, and warns if they're incompatible according
to the provided compatibility table based on https://github.com/pytorch/vision#installation. The
compatibility table is a dictionary where the keys are PyTorch versions and the values are lists of compatible
Torchvision versions.
"""
import torchvision
# Compatibility table
compatibility_table = {'2.0': ['0.15'], '1.13': ['0.14'], '1.12': ['0.13']}
# Extract only the major and minor versions
v_torch = '.'.join(torch.__version__.split('+')[0].split('.')[:2])
v_torchvision = '.'.join(torchvision.__version__.split('+')[0].split('.')[:2])
if v_torch in compatibility_table:
compatible_versions = compatibility_table[v_torch]
if all(pkg.parse_version(v_torchvision) != pkg.parse_version(v) for v in compatible_versions):
print(f'WARNING ⚠️ torchvision=={v_torchvision} is incompatible with torch=={v_torch}.\n'
f"Run 'pip install torchvision=={compatible_versions[0]}' to fix torchvision or "
"'pip install -U torch torchvision' to update both.\n"
'For a full compatibility table see https://github.com/pytorch/vision#installation')
def check_suffix(file='yolov8n.pt', suffix='.pt', msg=''):
"""Check file(s) for acceptable suffix."""
if file and suffix:
if isinstance(suffix, str):
suffix = (suffix, )
for f in file if isinstance(file, (list, tuple)) else [file]:
s = Path(f).suffix.lower().strip() # file suffix
if len(s):
assert s in suffix, f'{msg}{f} acceptable suffix is {suffix}, not {s}'
def check_yolov5u_filename(file: str, verbose: bool = True):
"""Replace legacy YOLOv5 filenames with updated YOLOv5u filenames."""
if ('yolov3' in file or 'yolov5' in file) and 'u' not in file:
original_file = file
file = re.sub(r'(.*yolov5([nsmlx]))\.pt', '\\1u.pt', file) # i.e. yolov5n.pt -> yolov5nu.pt
file = re.sub(r'(.*yolov5([nsmlx])6)\.pt', '\\1u.pt', file) # i.e. yolov5n6.pt -> yolov5n6u.pt
file = re.sub(r'(.*yolov3(|-tiny|-spp))\.pt', '\\1u.pt', file) # i.e. yolov3-spp.pt -> yolov3-sppu.pt
if file != original_file and verbose:
LOGGER.info(f"PRO TIP 💡 Replace 'model={original_file}' with new 'model={file}'.\nYOLOv5 'u' models are "
f'trained with https://github.com/ultralytics/ultralytics and feature improved performance vs '
f'standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.\n')
return file
def check_file(file, suffix='', download=True, hard=True):
"""Search/download file (if necessary) and return path."""
check_suffix(file, suffix) # optional
file = str(file).strip() # convert to string and strip spaces
file = check_yolov5u_filename(file) # yolov5n -> yolov5nu
if not file or ('://' not in file and Path(file).exists()): # exists ('://' check required in Windows Python<3.10)
return file
elif download and file.lower().startswith(('https://', 'http://', 'rtsp://', 'rtmp://')): # download
url = file # warning: Pathlib turns :// -> :/
file = url2file(file) # '%2F' to '/', split https://url.com/file.txt?auth
if Path(file).exists():
LOGGER.info(f'Found {clean_url(url)} locally at {file}') # file already exists
else:
downloads.safe_download(url=url, file=file, unzip=False)
return file
else: # search
files = glob.glob(str(ROOT / 'cfg' / '**' / file), recursive=True) # find file
if not files and hard:
raise FileNotFoundError(f"'{file}' does not exist")
elif len(files) > 1 and hard:
raise FileNotFoundError(f"Multiple files match '{file}', specify exact path: {files}")
return files[0] if len(files) else [] # return file
def check_yaml(file, suffix=('.yaml', '.yml'), hard=True):
"""Search/download YAML file (if necessary) and return path, checking suffix."""
return check_file(file, suffix, hard=hard)
def check_imshow(warn=False):
"""Check if environment supports image displays."""
try:
assert not any((is_colab(), is_kaggle(), is_docker()))
cv2.imshow('test', np.zeros((1, 1, 3)))
cv2.waitKey(1)
cv2.destroyAllWindows()
cv2.waitKey(1)
return True
except Exception as e:
if warn:
LOGGER.warning(f'WARNING ⚠️ Environment does not support cv2.imshow() or PIL Image.show()\n{e}')
return False
def check_yolo(verbose=True, device=''):
"""Return a human-readable YOLO software and hardware summary."""
from ultralytics.utils.torch_utils import select_device
if is_jupyter():
if check_requirements('wandb', install=False):
os.system('pip uninstall -y wandb') # uninstall wandb: unwanted account creation prompt with infinite hang
if is_colab():
shutil.rmtree('sample_data', ignore_errors=True) # remove colab /sample_data directory
if verbose:
# System info
gib = 1 << 30 # bytes per GiB
ram = psutil.virtual_memory().total
total, used, free = shutil.disk_usage('/')
s = f'({os.cpu_count()} CPUs, {ram / gib:.1f} GB RAM, {(total - free) / gib:.1f}/{total / gib:.1f} GB disk)'
with contextlib.suppress(Exception): # clear display if ipython is installed
from IPython import display
display.clear_output()
else:
s = ''
select_device(device=device, newline=False)
LOGGER.info(f'Setup complete ✅ {s}')
def check_amp(model):
"""
This function checks the PyTorch Automatic Mixed Precision (AMP) functionality of a YOLOv8 model.
If the checks fail, it means there are anomalies with AMP on the system that may cause NaN losses or zero-mAP
results, so AMP will be disabled during training.
Args:
model (nn.Module): A YOLOv8 model instance.
Returns:
(bool): Returns True if the AMP functionality works correctly with YOLOv8 model, else False.
Raises:
AssertionError: If the AMP checks fail, indicating anomalies with the AMP functionality on the system.
"""
device = next(model.parameters()).device # get model device
if device.type in ('cpu', 'mps'):
return False # AMP only used on CUDA devices
def amp_allclose(m, im):
"""All close FP32 vs AMP results."""
a = m(im, device=device, verbose=False)[0].boxes.data # FP32 inference
with torch.cuda.amp.autocast(True):
b = m(im, device=device, verbose=False)[0].boxes.data # AMP inference
del m
return a.shape == b.shape and torch.allclose(a, b.float(), atol=0.5) # close to 0.5 absolute tolerance
f = ROOT / 'assets/bus.jpg' # image to check
im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if ONLINE else np.ones((640, 640, 3))
prefix = colorstr('AMP: ')
LOGGER.info(f'{prefix}running Automatic Mixed Precision (AMP) checks with YOLOv8n...')
warning_msg = "Setting 'amp=True'. If you experience zero-mAP or NaN losses you can disable AMP with amp=False."
try:
from ultralytics import YOLO
assert amp_allclose(YOLO('yolov8n.pt'), im)
LOGGER.info(f'{prefix}checks passed ✅')
except ConnectionError:
LOGGER.warning(f'{prefix}checks skipped ⚠️, offline and unable to download YOLOv8n. {warning_msg}')
except (AttributeError, ModuleNotFoundError):
LOGGER.warning(
f'{prefix}checks skipped ⚠️. Unable to load YOLOv8n due to possible Ultralytics package modifications. {warning_msg}'
)
except AssertionError:
LOGGER.warning(f'{prefix}checks failed ❌. Anomalies were detected with AMP on your system that may lead to '
f'NaN losses or zero-mAP results, so AMP will be disabled during training.')
return False
return True
def git_describe(path=ROOT): # path must be a directory
"""Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe."""
try:
assert (Path(path) / '.git').is_dir()
return subprocess.check_output(f'git -C {path} describe --tags --long --always', shell=True).decode()[:-1]
except AssertionError:
return ''
def print_args(args: Optional[dict] = None, show_file=True, show_func=False):
"""Print function arguments (optional args dict)."""
def strip_auth(v):
"""Clean longer Ultralytics HUB URLs by stripping potential authentication information."""
return clean_url(v) if (isinstance(v, str) and v.startswith('http') and len(v) > 100) else v
x = inspect.currentframe().f_back # previous frame
file, _, func, _, _ = inspect.getframeinfo(x)
if args is None: # get args automatically
args, _, _, frm = inspect.getargvalues(x)
args = {k: v for k, v in frm.items() if k in args}
try:
file = Path(file).resolve().relative_to(ROOT).with_suffix('')
except ValueError:
file = Path(file).stem
s = (f'{file}: ' if show_file else '') + (f'{func}: ' if show_func else '')
LOGGER.info(colorstr(s) + ', '.join(f'{k}={strip_auth(v)}' for k, v in args.items()))

67
ultralytics/utils/dist.py Normal file
View File

@ -0,0 +1,67 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import os
import re
import shutil
import socket
import sys
import tempfile
from pathlib import Path
from . import USER_CONFIG_DIR
from .torch_utils import TORCH_1_9
def find_free_network_port() -> int:
"""Finds a free port on localhost.
It is useful in single-node training when we don't want to connect to a real main node but have to set the
`MASTER_PORT` environment variable.
"""
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(('127.0.0.1', 0))
return s.getsockname()[1] # port
def generate_ddp_file(trainer):
"""Generates a DDP file and returns its file name."""
module, name = f'{trainer.__class__.__module__}.{trainer.__class__.__name__}'.rsplit('.', 1)
content = f'''overrides = {vars(trainer.args)} \nif __name__ == "__main__":
from {module} import {name}
from ultralytics.utils import DEFAULT_CFG_DICT
cfg = DEFAULT_CFG_DICT.copy()
cfg.update(save_dir='') # handle the extra key 'save_dir'
trainer = {name}(cfg=cfg, overrides=overrides)
trainer.train()'''
(USER_CONFIG_DIR / 'DDP').mkdir(exist_ok=True)
with tempfile.NamedTemporaryFile(prefix='_temp_',
suffix=f'{id(trainer)}.py',
mode='w+',
encoding='utf-8',
dir=USER_CONFIG_DIR / 'DDP',
delete=False) as file:
file.write(content)
return file.name
def generate_ddp_command(world_size, trainer):
"""Generates and returns command for distributed training."""
import __main__ # noqa local import to avoid https://github.com/Lightning-AI/lightning/issues/15218
if not trainer.resume:
shutil.rmtree(trainer.save_dir) # remove the save_dir
file = str(Path(sys.argv[0]).resolve())
safe_pattern = re.compile(r'^[a-zA-Z0-9_. /\\-]{1,128}$') # allowed characters and maximum of 100 characters
if not (safe_pattern.match(file) and Path(file).exists() and file.endswith('.py')): # using CLI
file = generate_ddp_file(trainer)
dist_cmd = 'torch.distributed.run' if TORCH_1_9 else 'torch.distributed.launch'
port = find_free_network_port()
cmd = [sys.executable, '-m', dist_cmd, '--nproc_per_node', f'{world_size}', '--master_port', f'{port}', file]
return cmd, file
def ddp_cleanup(trainer, file):
"""Delete temp file if created."""
if f'{id(trainer)}.py' in file: # if temp_file suffix in file
os.remove(file)

View File

@ -0,0 +1,271 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import contextlib
import shutil
import subprocess
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path
from urllib import parse, request
from zipfile import BadZipFile, ZipFile, is_zipfile
import requests
import torch
from tqdm import tqdm
from ultralytics.utils import LOGGER, checks, clean_url, emojis, is_online, url2file
GITHUB_ASSET_NAMES = [f'yolov8{k}{suffix}.pt' for k in 'nsmlx' for suffix in ('', '6', '-cls', '-seg', '-pose')] + \
[f'yolov5{k}u.pt' for k in 'nsmlx'] + \
[f'yolov3{k}u.pt' for k in ('', '-spp', '-tiny')] + \
[f'yolo_nas_{k}.pt' for k in 'sml'] + \
[f'sam_{k}.pt' for k in 'bl'] + \
[f'FastSAM-{k}.pt' for k in 'sx'] + \
[f'rtdetr-{k}.pt' for k in 'lx'] + \
['mobile_sam.pt']
GITHUB_ASSET_STEMS = [Path(k).stem for k in GITHUB_ASSET_NAMES]
def is_url(url, check=True):
"""Check if string is URL and check if URL exists."""
with contextlib.suppress(Exception):
url = str(url)
result = parse.urlparse(url)
assert all([result.scheme, result.netloc]) # check if is url
if check:
with request.urlopen(url) as response:
return response.getcode() == 200 # check if exists online
return True
return False
def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX'), exist_ok=False):
"""
Unzips a *.zip file to the specified path, excluding files containing strings in the exclude list.
If the zipfile does not contain a single top-level directory, the function will create a new
directory with the same name as the zipfile (without the extension) to extract its contents.
If a path is not provided, the function will use the parent directory of the zipfile as the default path.
Args:
file (str): The path to the zipfile to be extracted.
path (str, optional): The path to extract the zipfile to. Defaults to None.
exclude (tuple, optional): A tuple of filename strings to be excluded. Defaults to ('.DS_Store', '__MACOSX').
exist_ok (bool, optional): Whether to overwrite existing contents if they exist. Defaults to False.
Raises:
BadZipFile: If the provided file does not exist or is not a valid zipfile.
Returns:
(Path): The path to the directory where the zipfile was extracted.
"""
if not (Path(file).exists() and is_zipfile(file)):
raise BadZipFile(f"File '{file}' does not exist or is a bad zip file.")
if path is None:
path = Path(file).parent # default path
# Unzip the file contents
with ZipFile(file) as zipObj:
file_list = [f for f in zipObj.namelist() if all(x not in f for x in exclude)]
top_level_dirs = {Path(f).parts[0] for f in file_list}
if len(top_level_dirs) > 1 or not file_list[0].endswith('/'):
path = Path(path) / Path(file).stem # define new unzip directory
# Check if destination directory already exists and contains files
extract_path = Path(path) / list(top_level_dirs)[0]
if extract_path.exists() and any(extract_path.iterdir()) and not exist_ok:
# If it exists and is not empty, return the path without unzipping
LOGGER.info(f'Skipping {file} unzip (already unzipped)')
return path
for f in file_list:
zipObj.extract(f, path=path)
return path # return unzip dir
def check_disk_space(url='https://ultralytics.com/assets/coco128.zip', sf=1.5, hard=True):
"""
Check if there is sufficient disk space to download and store a file.
Args:
url (str, optional): The URL to the file. Defaults to 'https://ultralytics.com/assets/coco128.zip'.
sf (float, optional): Safety factor, the multiplier for the required free space. Defaults to 2.0.
hard (bool, optional): Whether to throw an error or not on insufficient disk space. Defaults to True.
Returns:
(bool): True if there is sufficient disk space, False otherwise.
"""
with contextlib.suppress(Exception):
gib = 1 << 30 # bytes per GiB
data = int(requests.head(url).headers['Content-Length']) / gib # file size (GB)
total, used, free = (x / gib for x in shutil.disk_usage('/')) # bytes
if data * sf < free:
return True # sufficient space
# Insufficient space
text = (f'WARNING ⚠️ Insufficient free disk space {free:.1f} GB < {data * sf:.3f} GB required, '
f'Please free {data * sf - free:.1f} GB additional disk space and try again.')
if hard:
raise MemoryError(text)
else:
LOGGER.warning(text)
return False
# Pass if error
return True
def safe_download(url,
file=None,
dir=None,
unzip=True,
delete=False,
curl=False,
retry=3,
min_bytes=1E0,
progress=True):
"""
Downloads files from a URL, with options for retrying, unzipping, and deleting the downloaded file.
Args:
url (str): The URL of the file to be downloaded.
file (str, optional): The filename of the downloaded file.
If not provided, the file will be saved with the same name as the URL.
dir (str, optional): The directory to save the downloaded file.
If not provided, the file will be saved in the current working directory.
unzip (bool, optional): Whether to unzip the downloaded file. Default: True.
delete (bool, optional): Whether to delete the downloaded file after unzipping. Default: False.
curl (bool, optional): Whether to use curl command line tool for downloading. Default: False.
retry (int, optional): The number of times to retry the download in case of failure. Default: 3.
min_bytes (float, optional): The minimum number of bytes that the downloaded file should have, to be considered
a successful download. Default: 1E0.
progress (bool, optional): Whether to display a progress bar during the download. Default: True.
"""
f = dir / url2file(url) if dir else Path(file) # URL converted to filename
if '://' not in str(url) and Path(url).is_file(): # URL exists ('://' check required in Windows Python<3.10)
f = Path(url) # filename
elif not f.is_file(): # URL and file do not exist
assert dir or file, 'dir or file required for download'
f = dir / url2file(url) if dir else Path(file)
desc = f'Downloading {clean_url(url)} to {f}'
LOGGER.info(f'{desc}...')
f.parent.mkdir(parents=True, exist_ok=True) # make directory if missing
check_disk_space(url)
for i in range(retry + 1):
try:
if curl or i > 0: # curl download with retry, continue
s = 'sS' * (not progress) # silent
r = subprocess.run(['curl', '-#', f'-{s}L', url, '-o', f, '--retry', '3', '-C', '-']).returncode
assert r == 0, f'Curl return value {r}'
else: # urllib download
method = 'torch'
if method == 'torch':
torch.hub.download_url_to_file(url, f, progress=progress)
else:
from ultralytics.utils import TQDM_BAR_FORMAT
with request.urlopen(url) as response, tqdm(total=int(response.getheader('Content-Length', 0)),
desc=desc,
disable=not progress,
unit='B',
unit_scale=True,
unit_divisor=1024,
bar_format=TQDM_BAR_FORMAT) as pbar:
with open(f, 'wb') as f_opened:
for data in response:
f_opened.write(data)
pbar.update(len(data))
if f.exists():
if f.stat().st_size > min_bytes:
break # success
f.unlink() # remove partial downloads
except Exception as e:
if i == 0 and not is_online():
raise ConnectionError(emojis(f'❌ Download failure for {url}. Environment is not online.')) from e
elif i >= retry:
raise ConnectionError(emojis(f'❌ Download failure for {url}. Retry limit reached.')) from e
LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...')
if unzip and f.exists() and f.suffix in ('', '.zip', '.tar', '.gz'):
unzip_dir = dir or f.parent # unzip to dir if provided else unzip in place
LOGGER.info(f'Unzipping {f} to {unzip_dir.absolute()}...')
if is_zipfile(f):
unzip_dir = unzip_file(file=f, path=unzip_dir) # unzip
elif f.suffix == '.tar':
subprocess.run(['tar', 'xf', f, '--directory', unzip_dir], check=True) # unzip
elif f.suffix == '.gz':
subprocess.run(['tar', 'xfz', f, '--directory', unzip_dir], check=True) # unzip
if delete:
f.unlink() # remove zip
return unzip_dir
def get_github_assets(repo='ultralytics/assets', version='latest'):
"""Return GitHub repo tag and assets (i.e. ['yolov8n.pt', 'yolov8s.pt', ...])."""
if version != 'latest':
version = f'tags/{version}' # i.e. tags/v6.2
response = requests.get(f'https://api.github.com/repos/{repo}/releases/{version}').json() # github api
return response['tag_name'], [x['name'] for x in response['assets']] # tag, assets
def attempt_download_asset(file, repo='ultralytics/assets', release='v0.0.0'):
"""Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc."""
from ultralytics.utils import SETTINGS # scoped for circular import
# YOLOv3/5u updates
file = str(file)
file = checks.check_yolov5u_filename(file)
file = Path(file.strip().replace("'", ''))
if file.exists():
return str(file)
elif (SETTINGS['weights_dir'] / file).exists():
return str(SETTINGS['weights_dir'] / file)
else:
# URL specified
name = Path(parse.unquote(str(file))).name # decode '%2F' to '/' etc.
if str(file).startswith(('http:/', 'https:/')): # download
url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
file = url2file(name) # parse authentication https://url.com/file.txt?auth...
if Path(file).is_file():
LOGGER.info(f'Found {clean_url(url)} locally at {file}') # file already exists
else:
safe_download(url=url, file=file, min_bytes=1E5)
return file
# GitHub assets
assets = GITHUB_ASSET_NAMES
try:
tag, assets = get_github_assets(repo, release)
except Exception:
try:
tag, assets = get_github_assets(repo) # latest release
except Exception:
try:
tag = subprocess.check_output(['git', 'tag']).decode().split()[-1]
except Exception:
tag = release
file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required)
if name in assets:
safe_download(url=f'https://github.com/{repo}/releases/download/{tag}/{name}', file=file, min_bytes=1E5)
return str(file)
def download(url, dir=Path.cwd(), unzip=True, delete=False, curl=False, threads=1, retry=3):
"""Downloads and unzips files concurrently if threads > 1, else sequentially."""
dir = Path(dir)
dir.mkdir(parents=True, exist_ok=True) # make directory
if threads > 1:
with ThreadPool(threads) as pool:
pool.map(
lambda x: safe_download(
url=x[0], dir=x[1], unzip=unzip, delete=delete, curl=curl, retry=retry, progress=threads <= 1),
zip(url, repeat(dir)))
pool.close()
pool.join()
else:
for u in [url] if isinstance(url, (str, Path)) else url:
safe_download(url=u, dir=dir, unzip=unzip, delete=delete, curl=curl, retry=retry)

View File

@ -0,0 +1,10 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from ultralytics.utils import emojis
class HUBModelError(Exception):
def __init__(self, message='Model not found. Please check model URL and try again.'):
"""Create an exception for when a model is not found."""
super().__init__(emojis(message))

100
ultralytics/utils/files.py Normal file
View File

@ -0,0 +1,100 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import contextlib
import glob
import os
import shutil
from datetime import datetime
from pathlib import Path
class WorkingDirectory(contextlib.ContextDecorator):
"""Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager."""
def __init__(self, new_dir):
"""Sets the working directory to 'new_dir' upon instantiation."""
self.dir = new_dir # new dir
self.cwd = Path.cwd().resolve() # current dir
def __enter__(self):
"""Changes the current directory to the specified directory."""
os.chdir(self.dir)
def __exit__(self, exc_type, exc_val, exc_tb):
"""Restore the current working directory on context exit."""
os.chdir(self.cwd)
def increment_path(path, exist_ok=False, sep='', mkdir=False):
"""
Increments a file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc.
If the path exists and exist_ok is not set to True, the path will be incremented by appending a number and sep to
the end of the path. If the path is a file, the file extension will be preserved. If the path is a directory, the
number will be appended directly to the end of the path. If mkdir is set to True, the path will be created as a
directory if it does not already exist.
Args:
path (str, pathlib.Path): Path to increment.
exist_ok (bool, optional): If True, the path will not be incremented and returned as-is. Defaults to False.
sep (str, optional): Separator to use between the path and the incrementation number. Defaults to ''.
mkdir (bool, optional): Create a directory if it does not exist. Defaults to False.
Returns:
(pathlib.Path): Incremented path.
"""
path = Path(path) # os-agnostic
if path.exists() and not exist_ok:
path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '')
# Method 1
for n in range(2, 9999):
p = f'{path}{sep}{n}{suffix}' # increment path
if not os.path.exists(p): #
break
path = Path(p)
if mkdir:
path.mkdir(parents=True, exist_ok=True) # make directory
return path
def file_age(path=__file__):
"""Return days since last file update."""
dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta
return dt.days # + dt.seconds / 86400 # fractional days
def file_date(path=__file__):
"""Return human-readable file modification date, i.e. '2021-3-26'."""
t = datetime.fromtimestamp(Path(path).stat().st_mtime)
return f'{t.year}-{t.month}-{t.day}'
def file_size(path):
"""Return file/dir size (MB)."""
if isinstance(path, (str, Path)):
mb = 1 << 20 # bytes to MiB (1024 ** 2)
path = Path(path)
if path.is_file():
return path.stat().st_size / mb
elif path.is_dir():
return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / mb
return 0.0
def get_latest_run(search_dir='.'):
"""Return path to most recent 'last.pt' in /runs (i.e. to --resume from)."""
last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
return max(last_list, key=os.path.getctime) if last_list else ''
def make_dirs(dir='new_dir/'):
"""Create directories."""
dir = Path(dir)
if dir.exists():
shutil.rmtree(dir) # delete dir
for p in dir, dir / 'labels', dir / 'images':
p.mkdir(parents=True, exist_ok=True) # make dir
return dir

View File

@ -0,0 +1,392 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from collections import abc
from itertools import repeat
from numbers import Number
from typing import List
import numpy as np
from .ops import ltwh2xywh, ltwh2xyxy, resample_segments, xywh2ltwh, xywh2xyxy, xyxy2ltwh, xyxy2xywh
def _ntuple(n):
"""From PyTorch internals."""
def parse(x):
"""Parse bounding boxes format between XYWH and LTWH."""
return x if isinstance(x, abc.Iterable) else tuple(repeat(x, n))
return parse
to_2tuple = _ntuple(2)
to_4tuple = _ntuple(4)
# `xyxy` means left top and right bottom
# `xywh` means center x, center y and width, height(yolo format)
# `ltwh` means left top and width, height(coco format)
_formats = ['xyxy', 'xywh', 'ltwh']
__all__ = 'Bboxes', # tuple or list
class Bboxes:
"""Now only numpy is supported."""
def __init__(self, bboxes, format='xyxy') -> None:
assert format in _formats, f'Invalid bounding box format: {format}, format must be one of {_formats}'
bboxes = bboxes[None, :] if bboxes.ndim == 1 else bboxes
assert bboxes.ndim == 2
assert bboxes.shape[1] == 4
self.bboxes = bboxes
self.format = format
# self.normalized = normalized
# def convert(self, format):
# assert format in _formats
# if self.format == format:
# bboxes = self.bboxes
# elif self.format == "xyxy":
# if format == "xywh":
# bboxes = xyxy2xywh(self.bboxes)
# else:
# bboxes = xyxy2ltwh(self.bboxes)
# elif self.format == "xywh":
# if format == "xyxy":
# bboxes = xywh2xyxy(self.bboxes)
# else:
# bboxes = xywh2ltwh(self.bboxes)
# else:
# if format == "xyxy":
# bboxes = ltwh2xyxy(self.bboxes)
# else:
# bboxes = ltwh2xywh(self.bboxes)
#
# return Bboxes(bboxes, format)
def convert(self, format):
"""Converts bounding box format from one type to another."""
assert format in _formats, f'Invalid bounding box format: {format}, format must be one of {_formats}'
if self.format == format:
return
elif self.format == 'xyxy':
bboxes = xyxy2xywh(self.bboxes) if format == 'xywh' else xyxy2ltwh(self.bboxes)
elif self.format == 'xywh':
bboxes = xywh2xyxy(self.bboxes) if format == 'xyxy' else xywh2ltwh(self.bboxes)
else:
bboxes = ltwh2xyxy(self.bboxes) if format == 'xyxy' else ltwh2xywh(self.bboxes)
self.bboxes = bboxes
self.format = format
def areas(self):
"""Return box areas."""
self.convert('xyxy')
return (self.bboxes[:, 2] - self.bboxes[:, 0]) * (self.bboxes[:, 3] - self.bboxes[:, 1])
# def denormalize(self, w, h):
# if not self.normalized:
# return
# assert (self.bboxes <= 1.0).all()
# self.bboxes[:, 0::2] *= w
# self.bboxes[:, 1::2] *= h
# self.normalized = False
#
# def normalize(self, w, h):
# if self.normalized:
# return
# assert (self.bboxes > 1.0).any()
# self.bboxes[:, 0::2] /= w
# self.bboxes[:, 1::2] /= h
# self.normalized = True
def mul(self, scale):
"""
Args:
scale (tuple | list | int): the scale for four coords.
"""
if isinstance(scale, Number):
scale = to_4tuple(scale)
assert isinstance(scale, (tuple, list))
assert len(scale) == 4
self.bboxes[:, 0] *= scale[0]
self.bboxes[:, 1] *= scale[1]
self.bboxes[:, 2] *= scale[2]
self.bboxes[:, 3] *= scale[3]
def add(self, offset):
"""
Args:
offset (tuple | list | int): the offset for four coords.
"""
if isinstance(offset, Number):
offset = to_4tuple(offset)
assert isinstance(offset, (tuple, list))
assert len(offset) == 4
self.bboxes[:, 0] += offset[0]
self.bboxes[:, 1] += offset[1]
self.bboxes[:, 2] += offset[2]
self.bboxes[:, 3] += offset[3]
def __len__(self):
"""Return the number of boxes."""
return len(self.bboxes)
@classmethod
def concatenate(cls, boxes_list: List['Bboxes'], axis=0) -> 'Bboxes':
"""
Concatenate a list of Bboxes objects into a single Bboxes object.
Args:
boxes_list (List[Bboxes]): A list of Bboxes objects to concatenate.
axis (int, optional): The axis along which to concatenate the bounding boxes.
Defaults to 0.
Returns:
Bboxes: A new Bboxes object containing the concatenated bounding boxes.
Note:
The input should be a list or tuple of Bboxes objects.
"""
assert isinstance(boxes_list, (list, tuple))
if not boxes_list:
return cls(np.empty(0))
assert all(isinstance(box, Bboxes) for box in boxes_list)
if len(boxes_list) == 1:
return boxes_list[0]
return cls(np.concatenate([b.bboxes for b in boxes_list], axis=axis))
def __getitem__(self, index) -> 'Bboxes':
"""
Retrieve a specific bounding box or a set of bounding boxes using indexing.
Args:
index (int, slice, or np.ndarray): The index, slice, or boolean array to select
the desired bounding boxes.
Returns:
Bboxes: A new Bboxes object containing the selected bounding boxes.
Raises:
AssertionError: If the indexed bounding boxes do not form a 2-dimensional matrix.
Note:
When using boolean indexing, make sure to provide a boolean array with the same
length as the number of bounding boxes.
"""
if isinstance(index, int):
return Bboxes(self.bboxes[index].view(1, -1))
b = self.bboxes[index]
assert b.ndim == 2, f'Indexing on Bboxes with {index} failed to return a matrix!'
return Bboxes(b)
class Instances:
def __init__(self, bboxes, segments=None, keypoints=None, bbox_format='xywh', normalized=True) -> None:
"""
Args:
bboxes (ndarray): bboxes with shape [N, 4].
segments (list | ndarray): segments.
keypoints (ndarray): keypoints(x, y, visible) with shape [N, 17, 3].
"""
if segments is None:
segments = []
self._bboxes = Bboxes(bboxes=bboxes, format=bbox_format)
self.keypoints = keypoints
self.normalized = normalized
if len(segments) > 0:
# list[np.array(1000, 2)] * num_samples
segments = resample_segments(segments)
# (N, 1000, 2)
segments = np.stack(segments, axis=0)
else:
segments = np.zeros((0, 1000, 2), dtype=np.float32)
self.segments = segments
def convert_bbox(self, format):
"""Convert bounding box format."""
self._bboxes.convert(format=format)
@property
def bbox_areas(self):
"""Calculate the area of bounding boxes."""
return self._bboxes.areas()
def scale(self, scale_w, scale_h, bbox_only=False):
"""this might be similar with denormalize func but without normalized sign."""
self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h))
if bbox_only:
return
self.segments[..., 0] *= scale_w
self.segments[..., 1] *= scale_h
if self.keypoints is not None:
self.keypoints[..., 0] *= scale_w
self.keypoints[..., 1] *= scale_h
def denormalize(self, w, h):
"""Denormalizes boxes, segments, and keypoints from normalized coordinates."""
if not self.normalized:
return
self._bboxes.mul(scale=(w, h, w, h))
self.segments[..., 0] *= w
self.segments[..., 1] *= h
if self.keypoints is not None:
self.keypoints[..., 0] *= w
self.keypoints[..., 1] *= h
self.normalized = False
def normalize(self, w, h):
"""Normalize bounding boxes, segments, and keypoints to image dimensions."""
if self.normalized:
return
self._bboxes.mul(scale=(1 / w, 1 / h, 1 / w, 1 / h))
self.segments[..., 0] /= w
self.segments[..., 1] /= h
if self.keypoints is not None:
self.keypoints[..., 0] /= w
self.keypoints[..., 1] /= h
self.normalized = True
def add_padding(self, padw, padh):
"""Handle rect and mosaic situation."""
assert not self.normalized, 'you should add padding with absolute coordinates.'
self._bboxes.add(offset=(padw, padh, padw, padh))
self.segments[..., 0] += padw
self.segments[..., 1] += padh
if self.keypoints is not None:
self.keypoints[..., 0] += padw
self.keypoints[..., 1] += padh
def __getitem__(self, index) -> 'Instances':
"""
Retrieve a specific instance or a set of instances using indexing.
Args:
index (int, slice, or np.ndarray): The index, slice, or boolean array to select
the desired instances.
Returns:
Instances: A new Instances object containing the selected bounding boxes,
segments, and keypoints if present.
Note:
When using boolean indexing, make sure to provide a boolean array with the same
length as the number of instances.
"""
segments = self.segments[index] if len(self.segments) else self.segments
keypoints = self.keypoints[index] if self.keypoints is not None else None
bboxes = self.bboxes[index]
bbox_format = self._bboxes.format
return Instances(
bboxes=bboxes,
segments=segments,
keypoints=keypoints,
bbox_format=bbox_format,
normalized=self.normalized,
)
def flipud(self, h):
"""Flips the coordinates of bounding boxes, segments, and keypoints vertically."""
if self._bboxes.format == 'xyxy':
y1 = self.bboxes[:, 1].copy()
y2 = self.bboxes[:, 3].copy()
self.bboxes[:, 1] = h - y2
self.bboxes[:, 3] = h - y1
else:
self.bboxes[:, 1] = h - self.bboxes[:, 1]
self.segments[..., 1] = h - self.segments[..., 1]
if self.keypoints is not None:
self.keypoints[..., 1] = h - self.keypoints[..., 1]
def fliplr(self, w):
"""Reverses the order of the bounding boxes and segments horizontally."""
if self._bboxes.format == 'xyxy':
x1 = self.bboxes[:, 0].copy()
x2 = self.bboxes[:, 2].copy()
self.bboxes[:, 0] = w - x2
self.bboxes[:, 2] = w - x1
else:
self.bboxes[:, 0] = w - self.bboxes[:, 0]
self.segments[..., 0] = w - self.segments[..., 0]
if self.keypoints is not None:
self.keypoints[..., 0] = w - self.keypoints[..., 0]
def clip(self, w, h):
"""Clips bounding boxes, segments, and keypoints values to stay within image boundaries."""
ori_format = self._bboxes.format
self.convert_bbox(format='xyxy')
self.bboxes[:, [0, 2]] = self.bboxes[:, [0, 2]].clip(0, w)
self.bboxes[:, [1, 3]] = self.bboxes[:, [1, 3]].clip(0, h)
if ori_format != 'xyxy':
self.convert_bbox(format=ori_format)
self.segments[..., 0] = self.segments[..., 0].clip(0, w)
self.segments[..., 1] = self.segments[..., 1].clip(0, h)
if self.keypoints is not None:
self.keypoints[..., 0] = self.keypoints[..., 0].clip(0, w)
self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h)
def remove_zero_area_boxes(self):
"""Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height. This removes them."""
good = self.bbox_areas > 0
if not all(good):
self._bboxes = self._bboxes[good]
if len(self.segments):
self.segments = self.segments[good]
if self.keypoints is not None:
self.keypoints = self.keypoints[good]
return good
def update(self, bboxes, segments=None, keypoints=None):
"""Updates instance variables."""
self._bboxes = Bboxes(bboxes, format=self._bboxes.format)
if segments is not None:
self.segments = segments
if keypoints is not None:
self.keypoints = keypoints
def __len__(self):
"""Return the length of the instance list."""
return len(self.bboxes)
@classmethod
def concatenate(cls, instances_list: List['Instances'], axis=0) -> 'Instances':
"""
Concatenates a list of Instances objects into a single Instances object.
Args:
instances_list (List[Instances]): A list of Instances objects to concatenate.
axis (int, optional): The axis along which the arrays will be concatenated. Defaults to 0.
Returns:
Instances: A new Instances object containing the concatenated bounding boxes,
segments, and keypoints if present.
Note:
The `Instances` objects in the list should have the same properties, such as
the format of the bounding boxes, whether keypoints are present, and if the
coordinates are normalized.
"""
assert isinstance(instances_list, (list, tuple))
if not instances_list:
return cls(np.empty(0))
assert all(isinstance(instance, Instances) for instance in instances_list)
if len(instances_list) == 1:
return instances_list[0]
use_keypoint = instances_list[0].keypoints is not None
bbox_format = instances_list[0]._bboxes.format
normalized = instances_list[0].normalized
cat_boxes = np.concatenate([ins.bboxes for ins in instances_list], axis=axis)
cat_segments = np.concatenate([b.segments for b in instances_list], axis=axis)
cat_keypoints = np.concatenate([b.keypoints for b in instances_list], axis=axis) if use_keypoint else None
return cls(cat_boxes, cat_segments, cat_keypoints, bbox_format, normalized)
@property
def bboxes(self):
"""Return bounding boxes."""
return self._bboxes.bboxes

392
ultralytics/utils/loss.py Normal file
View File

@ -0,0 +1,392 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import torch
import torch.nn as nn
import torch.nn.functional as F
from ultralytics.utils.metrics import OKS_SIGMA
from ultralytics.utils.ops import crop_mask, xywh2xyxy, xyxy2xywh
from ultralytics.utils.tal import TaskAlignedAssigner, dist2bbox, make_anchors
from .metrics import bbox_iou
from .tal import bbox2dist
class VarifocalLoss(nn.Module):
"""Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367."""
def __init__(self):
"""Initialize the VarifocalLoss class."""
super().__init__()
def forward(self, pred_score, gt_score, label, alpha=0.75, gamma=2.0):
"""Computes varfocal loss."""
weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label
with torch.cuda.amp.autocast(enabled=False):
loss = (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), reduction='none') *
weight).mean(1).sum()
return loss
# Losses
class FocalLoss(nn.Module):
"""Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)."""
def __init__(self, ):
super().__init__()
def forward(self, pred, label, gamma=1.5, alpha=0.25):
"""Calculates and updates confusion matrix for object detection/classification tasks."""
loss = F.binary_cross_entropy_with_logits(pred, label, reduction='none')
# p_t = torch.exp(-loss)
# loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability
# TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
pred_prob = pred.sigmoid() # prob from logits
p_t = label * pred_prob + (1 - label) * (1 - pred_prob)
modulating_factor = (1.0 - p_t) ** gamma
loss *= modulating_factor
if alpha > 0:
alpha_factor = label * alpha + (1 - label) * (1 - alpha)
loss *= alpha_factor
return loss.mean(1).sum()
class BboxLoss(nn.Module):
def __init__(self, reg_max, use_dfl=False):
"""Initialize the BboxLoss module with regularization maximum and DFL settings."""
super().__init__()
self.reg_max = reg_max
self.use_dfl = use_dfl
def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask):
"""IoU loss."""
weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1)
iou = bbox_iou(pred_bboxes[fg_mask], target_bboxes[fg_mask], xywh=False, CIoU=True)
loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum
# DFL loss
if self.use_dfl:
target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max)
loss_dfl = self._df_loss(pred_dist[fg_mask].view(-1, self.reg_max + 1), target_ltrb[fg_mask]) * weight
loss_dfl = loss_dfl.sum() / target_scores_sum
else:
loss_dfl = torch.tensor(0.0).to(pred_dist.device)
return loss_iou, loss_dfl
@staticmethod
def _df_loss(pred_dist, target):
"""Return sum of left and right DFL losses."""
# Distribution Focal Loss (DFL) proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
tl = target.long() # target left
tr = tl + 1 # target right
wl = tr - target # weight left
wr = 1 - wl # weight right
return (F.cross_entropy(pred_dist, tl.view(-1), reduction='none').view(tl.shape) * wl +
F.cross_entropy(pred_dist, tr.view(-1), reduction='none').view(tl.shape) * wr).mean(-1, keepdim=True)
class KeypointLoss(nn.Module):
def __init__(self, sigmas) -> None:
super().__init__()
self.sigmas = sigmas
def forward(self, pred_kpts, gt_kpts, kpt_mask, area):
"""Calculates keypoint loss factor and Euclidean distance loss for predicted and actual keypoints."""
d = (pred_kpts[..., 0] - gt_kpts[..., 0]) ** 2 + (pred_kpts[..., 1] - gt_kpts[..., 1]) ** 2
kpt_loss_factor = (torch.sum(kpt_mask != 0) + torch.sum(kpt_mask == 0)) / (torch.sum(kpt_mask != 0) + 1e-9)
# e = d / (2 * (area * self.sigmas) ** 2 + 1e-9) # from formula
e = d / (2 * self.sigmas) ** 2 / (area + 1e-9) / 2 # from cocoeval
return kpt_loss_factor * ((1 - torch.exp(-e)) * kpt_mask).mean()
# Criterion class for computing Detection training losses
class v8DetectionLoss:
def __init__(self, model): # model must be de-paralleled
device = next(model.parameters()).device # get model device
h = model.args # hyperparameters
m = model.model[-1] # Detect() module
self.bce = nn.BCEWithLogitsLoss(reduction='none')
self.hyp = h
self.stride = m.stride # model strides
self.nc = m.nc # number of classes
self.no = m.no
self.reg_max = m.reg_max
self.device = device
self.use_dfl = m.reg_max > 1
self.assigner = TaskAlignedAssigner(topk=10, num_classes=self.nc, alpha=0.5, beta=6.0)
self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=self.use_dfl).to(device)
self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device)
def preprocess(self, targets, batch_size, scale_tensor):
"""Preprocesses the target counts and matches with the input batch size to output a tensor."""
if targets.shape[0] == 0:
out = torch.zeros(batch_size, 0, 5, device=self.device)
else:
i = targets[:, 0] # image index
_, counts = i.unique(return_counts=True)
counts = counts.to(dtype=torch.int32)
out = torch.zeros(batch_size, counts.max(), 5, device=self.device)
for j in range(batch_size):
matches = i == j
n = matches.sum()
if n:
out[j, :n] = targets[matches, 1:]
out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor))
return out
def bbox_decode(self, anchor_points, pred_dist):
"""Decode predicted object bounding box coordinates from anchor points and distribution."""
if self.use_dfl:
b, a, c = pred_dist.shape # batch, anchors, channels
pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))
# pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype))
# pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2)
return dist2bbox(pred_dist, anchor_points, xywh=False)
def __call__(self, preds, batch):
"""Calculate the sum of the loss for box, cls and dfl multiplied by batch size."""
loss = torch.zeros(3, device=self.device) # box, cls, dfl
feats = preds[1] if isinstance(preds, tuple) else preds
pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
(self.reg_max * 4, self.nc), 1)
pred_scores = pred_scores.permute(0, 2, 1).contiguous()
pred_distri = pred_distri.permute(0, 2, 1).contiguous()
dtype = pred_scores.dtype
batch_size = pred_scores.shape[0]
imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)
anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
# targets
targets = torch.cat((batch['batch_idx'].view(-1, 1), batch['cls'].view(-1, 1), batch['bboxes']), 1)
targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy
mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
# pboxes
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
_, target_bboxes, target_scores, fg_mask, _ = self.assigner(
pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt)
target_scores_sum = max(target_scores.sum(), 1)
# cls loss
# loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way
loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE
# bbox loss
if fg_mask.sum():
target_bboxes /= stride_tensor
loss[0], loss[2] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores,
target_scores_sum, fg_mask)
loss[0] *= self.hyp.box # box gain
loss[1] *= self.hyp.cls # cls gain
loss[2] *= self.hyp.dfl # dfl gain
return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl)
# Criterion class for computing training losses
class v8SegmentationLoss(v8DetectionLoss):
def __init__(self, model): # model must be de-paralleled
super().__init__(model)
self.nm = model.model[-1].nm # number of masks
self.overlap = model.args.overlap_mask
def __call__(self, preds, batch):
"""Calculate and return the loss for the YOLO model."""
loss = torch.zeros(4, device=self.device) # box, cls, dfl
feats, pred_masks, proto = preds if len(preds) == 3 else preds[1]
batch_size, _, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width
pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
(self.reg_max * 4, self.nc), 1)
# b, grids, ..
pred_scores = pred_scores.permute(0, 2, 1).contiguous()
pred_distri = pred_distri.permute(0, 2, 1).contiguous()
pred_masks = pred_masks.permute(0, 2, 1).contiguous()
dtype = pred_scores.dtype
imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)
anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
# targets
try:
batch_idx = batch['batch_idx'].view(-1, 1)
targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1)
targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy
mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
except RuntimeError as e:
raise TypeError('ERROR ❌ segment dataset incorrectly formatted or not a segment dataset.\n'
"This error can occur when incorrectly training a 'segment' model on a 'detect' dataset, "
"i.e. 'yolo train model=yolov8n-seg.pt data=coco128.yaml'.\nVerify your dataset is a "
"correctly formatted 'segment' dataset using 'data=coco128-seg.yaml' "
'as an example.\nSee https://docs.ultralytics.com/tasks/segment/ for help.') from e
# pboxes
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
_, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner(
pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt)
target_scores_sum = max(target_scores.sum(), 1)
# cls loss
# loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way
loss[2] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE
if fg_mask.sum():
# bbox loss
loss[0], loss[3] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes / stride_tensor,
target_scores, target_scores_sum, fg_mask)
# masks loss
masks = batch['masks'].to(self.device).float()
if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample
masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0]
for i in range(batch_size):
if fg_mask[i].sum():
mask_idx = target_gt_idx[i][fg_mask[i]]
if self.overlap:
gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0)
else:
gt_mask = masks[batch_idx.view(-1) == i][mask_idx]
xyxyn = target_bboxes[i][fg_mask[i]] / imgsz[[1, 0, 1, 0]]
marea = xyxy2xywh(xyxyn)[:, 2:].prod(1)
mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device)
loss[1] += self.single_mask_loss(gt_mask, pred_masks[i][fg_mask[i]], proto[i], mxyxy, marea) # seg
# WARNING: lines below prevents Multi-GPU DDP 'unused gradient' PyTorch errors, do not remove
else:
loss[1] += (proto * 0).sum() + (pred_masks * 0).sum() # inf sums may lead to nan loss
# WARNING: lines below prevent Multi-GPU DDP 'unused gradient' PyTorch errors, do not remove
else:
loss[1] += (proto * 0).sum() + (pred_masks * 0).sum() # inf sums may lead to nan loss
loss[0] *= self.hyp.box # box gain
loss[1] *= self.hyp.box / batch_size # seg gain
loss[2] *= self.hyp.cls # cls gain
loss[3] *= self.hyp.dfl # dfl gain
return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl)
def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
"""Mask loss for one image."""
pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n, 32) @ (32,80,80) -> (n,80,80)
loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none')
return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean()
# Criterion class for computing training losses
class v8PoseLoss(v8DetectionLoss):
def __init__(self, model): # model must be de-paralleled
super().__init__(model)
self.kpt_shape = model.model[-1].kpt_shape
self.bce_pose = nn.BCEWithLogitsLoss()
is_pose = self.kpt_shape == [17, 3]
nkpt = self.kpt_shape[0] # number of keypoints
sigmas = torch.from_numpy(OKS_SIGMA).to(self.device) if is_pose else torch.ones(nkpt, device=self.device) / nkpt
self.keypoint_loss = KeypointLoss(sigmas=sigmas)
def __call__(self, preds, batch):
"""Calculate the total loss and detach it."""
loss = torch.zeros(5, device=self.device) # box, cls, dfl, kpt_location, kpt_visibility
feats, pred_kpts = preds if isinstance(preds[0], list) else preds[1]
pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
(self.reg_max * 4, self.nc), 1)
# b, grids, ..
pred_scores = pred_scores.permute(0, 2, 1).contiguous()
pred_distri = pred_distri.permute(0, 2, 1).contiguous()
pred_kpts = pred_kpts.permute(0, 2, 1).contiguous()
dtype = pred_scores.dtype
imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)
anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
# targets
batch_size = pred_scores.shape[0]
batch_idx = batch['batch_idx'].view(-1, 1)
targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1)
targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy
mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
# pboxes
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
pred_kpts = self.kpts_decode(anchor_points, pred_kpts.view(batch_size, -1, *self.kpt_shape)) # (b, h*w, 17, 3)
_, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner(
pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt)
target_scores_sum = max(target_scores.sum(), 1)
# cls loss
# loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way
loss[3] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE
# bbox loss
if fg_mask.sum():
target_bboxes /= stride_tensor
loss[0], loss[4] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores,
target_scores_sum, fg_mask)
keypoints = batch['keypoints'].to(self.device).float().clone()
keypoints[..., 0] *= imgsz[1]
keypoints[..., 1] *= imgsz[0]
for i in range(batch_size):
if fg_mask[i].sum():
idx = target_gt_idx[i][fg_mask[i]]
gt_kpt = keypoints[batch_idx.view(-1) == i][idx] # (n, 51)
gt_kpt[..., 0] /= stride_tensor[fg_mask[i]]
gt_kpt[..., 1] /= stride_tensor[fg_mask[i]]
area = xyxy2xywh(target_bboxes[i][fg_mask[i]])[:, 2:].prod(1, keepdim=True)
pred_kpt = pred_kpts[i][fg_mask[i]]
kpt_mask = gt_kpt[..., 2] != 0
loss[1] += self.keypoint_loss(pred_kpt, gt_kpt, kpt_mask, area) # pose loss
# kpt_score loss
if pred_kpt.shape[-1] == 3:
loss[2] += self.bce_pose(pred_kpt[..., 2], kpt_mask.float()) # keypoint obj loss
loss[0] *= self.hyp.box # box gain
loss[1] *= self.hyp.pose / batch_size # pose gain
loss[2] *= self.hyp.kobj / batch_size # kobj gain
loss[3] *= self.hyp.cls # cls gain
loss[4] *= self.hyp.dfl # dfl gain
return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl)
def kpts_decode(self, anchor_points, pred_kpts):
"""Decodes predicted keypoints to image coordinates."""
y = pred_kpts.clone()
y[..., :2] *= 2.0
y[..., 0] += anchor_points[:, [0]] - 0.5
y[..., 1] += anchor_points[:, [1]] - 0.5
return y
class v8ClassificationLoss:
def __call__(self, preds, batch):
"""Compute the classification loss between predictions and true labels."""
loss = torch.nn.functional.cross_entropy(preds, batch['cls'], reduction='sum') / 64
loss_items = loss.detach()
return loss, loss_items

View File

@ -0,0 +1,977 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Model validation metrics
"""
import math
import warnings
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import torch
from ultralytics.utils import LOGGER, SimpleClass, TryExcept, plt_settings
OKS_SIGMA = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
# Boxes
def box_area(box):
"""Return box area, where box shape is xyxy(4,n)."""
return (box[2] - box[0]) * (box[3] - box[1])
def bbox_ioa(box1, box2, eps=1e-7):
"""
Calculate the intersection over box2 area given box1 and box2. Boxes are in x1y1x2y2 format.
Args:
box1 (np.array): A numpy array of shape (n, 4) representing n bounding boxes.
box2 (np.array): A numpy array of shape (m, 4) representing m bounding boxes.
eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.
Returns:
(np.array): A numpy array of shape (n, m) representing the intersection over box2 area.
"""
# Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1.T
b2_x1, b2_y1, b2_x2, b2_y2 = box2.T
# Intersection area
inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \
(np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0)
# box2 area
box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps
# Intersection over box2 area
return inter_area / box2_area
def box_iou(box1, box2, eps=1e-7):
"""
Calculate intersection-over-union (IoU) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
Based on https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
Args:
box1 (torch.Tensor): A tensor of shape (N, 4) representing N bounding boxes.
box2 (torch.Tensor): A tensor of shape (M, 4) representing M bounding boxes.
eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.
Returns:
(torch.Tensor): An NxM tensor containing the pairwise IoU values for every element in box1 and box2.
"""
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
(a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp_(0).prod(2)
# IoU = inter / (area1 + area2 - inter)
return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)
def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
"""
Calculate Intersection over Union (IoU) of box1(1, 4) to box2(n, 4).
Args:
box1 (torch.Tensor): A tensor representing a single bounding box with shape (1, 4).
box2 (torch.Tensor): A tensor representing n bounding boxes with shape (n, 4).
xywh (bool, optional): If True, input boxes are in (x, y, w, h) format. If False, input boxes are in
(x1, y1, x2, y2) format. Defaults to True.
GIoU (bool, optional): If True, calculate Generalized IoU. Defaults to False.
DIoU (bool, optional): If True, calculate Distance IoU. Defaults to False.
CIoU (bool, optional): If True, calculate Complete IoU. Defaults to False.
eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.
Returns:
(torch.Tensor): IoU, GIoU, DIoU, or CIoU values depending on the specified flags.
"""
# Get the coordinates of bounding boxes
if xywh: # transform from xywh to xyxy
(x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1)
w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2
b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_
b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_
else: # x1, y1, x2, y2 = box1
b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1)
b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1)
w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
# Intersection area
inter = (b1_x2.minimum(b2_x2) - b1_x1.maximum(b2_x1)).clamp_(0) * \
(b1_y2.minimum(b2_y2) - b1_y1.maximum(b2_y1)).clamp_(0)
# Union Area
union = w1 * h1 + w2 * h2 - inter + eps
# IoU
iou = inter / union
if CIoU or DIoU or GIoU:
cw = b1_x2.maximum(b2_x2) - b1_x1.minimum(b2_x1) # convex (smallest enclosing box) width
ch = b1_y2.maximum(b2_y2) - b1_y1.minimum(b2_y1) # convex height
if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared
rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center dist ** 2
if CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
v = (4 / math.pi ** 2) * (torch.atan(w2 / h2) - torch.atan(w1 / h1)).pow(2)
with torch.no_grad():
alpha = v / (v - iou + (1 + eps))
return iou - (rho2 / c2 + v * alpha) # CIoU
return iou - rho2 / c2 # DIoU
c_area = cw * ch + eps # convex area
return iou - (c_area - union) / c_area # GIoU https://arxiv.org/pdf/1902.09630.pdf
return iou # IoU
def mask_iou(mask1, mask2, eps=1e-7):
"""
Calculate masks IoU.
Args:
mask1 (torch.Tensor): A tensor of shape (N, n) where N is the number of ground truth objects and n is the
product of image width and height.
mask2 (torch.Tensor): A tensor of shape (M, n) where M is the number of predicted objects and n is the
product of image width and height.
eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.
Returns:
(torch.Tensor): A tensor of shape (N, M) representing masks IoU.
"""
intersection = torch.matmul(mask1, mask2.T).clamp_(0)
union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection
return intersection / (union + eps)
def kpt_iou(kpt1, kpt2, area, sigma, eps=1e-7):
"""
Calculate Object Keypoint Similarity (OKS).
Args:
kpt1 (torch.Tensor): A tensor of shape (N, 17, 3) representing ground truth keypoints.
kpt2 (torch.Tensor): A tensor of shape (M, 17, 3) representing predicted keypoints.
area (torch.Tensor): A tensor of shape (N,) representing areas from ground truth.
sigma (list): A list containing 17 values representing keypoint scales.
eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.
Returns:
(torch.Tensor): A tensor of shape (N, M) representing keypoint similarities.
"""
d = (kpt1[:, None, :, 0] - kpt2[..., 0]) ** 2 + (kpt1[:, None, :, 1] - kpt2[..., 1]) ** 2 # (N, M, 17)
sigma = torch.tensor(sigma, device=kpt1.device, dtype=kpt1.dtype) # (17, )
kpt_mask = kpt1[..., 2] != 0 # (N, 17)
e = d / (2 * sigma) ** 2 / (area[:, None, None] + eps) / 2 # from cocoeval
# e = d / ((area[None, :, None] + eps) * sigma) ** 2 / 2 # from formula
return (torch.exp(-e) * kpt_mask[:, None]).sum(-1) / (kpt_mask.sum(-1)[:, None] + eps)
def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
# return positive, negative label smoothing BCE targets
return 1.0 - 0.5 * eps, 0.5 * eps
class ConfusionMatrix:
"""
A class for calculating and updating a confusion matrix for object detection and classification tasks.
Attributes:
task (str): The type of task, either 'detect' or 'classify'.
matrix (np.array): The confusion matrix, with dimensions depending on the task.
nc (int): The number of classes.
conf (float): The confidence threshold for detections.
iou_thres (float): The Intersection over Union threshold.
"""
def __init__(self, nc, conf=0.25, iou_thres=0.45, task='detect'):
"""Initialize attributes for the YOLO model."""
self.task = task
self.matrix = np.zeros((nc + 1, nc + 1)) if self.task == 'detect' else np.zeros((nc, nc))
self.nc = nc # number of classes
self.conf = conf
self.iou_thres = iou_thres
def process_cls_preds(self, preds, targets):
"""
Update confusion matrix for classification task
Args:
preds (Array[N, min(nc,5)]): Predicted class labels.
targets (Array[N, 1]): Ground truth class labels.
"""
preds, targets = torch.cat(preds)[:, 0], torch.cat(targets)
for p, t in zip(preds.cpu().numpy(), targets.cpu().numpy()):
self.matrix[p][t] += 1
def process_batch(self, detections, labels):
"""
Update confusion matrix for object detection task.
Args:
detections (Array[N, 6]): Detected bounding boxes and their associated information.
Each row should contain (x1, y1, x2, y2, conf, class).
labels (Array[M, 5]): Ground truth bounding boxes and their associated class labels.
Each row should contain (class, x1, y1, x2, y2).
"""
if detections is None:
gt_classes = labels.int()
for gc in gt_classes:
self.matrix[self.nc, gc] += 1 # background FN
return
detections = detections[detections[:, 4] > self.conf]
gt_classes = labels[:, 0].int()
detection_classes = detections[:, 5].int()
iou = box_iou(labels[:, 1:], detections[:, :4])
x = torch.where(iou > self.iou_thres)
if x[0].shape[0]:
matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
if x[0].shape[0] > 1:
matches = matches[matches[:, 2].argsort()[::-1]]
matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
matches = matches[matches[:, 2].argsort()[::-1]]
matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
else:
matches = np.zeros((0, 3))
n = matches.shape[0] > 0
m0, m1, _ = matches.transpose().astype(int)
for i, gc in enumerate(gt_classes):
j = m0 == i
if n and sum(j) == 1:
self.matrix[detection_classes[m1[j]], gc] += 1 # correct
else:
self.matrix[self.nc, gc] += 1 # true background
if n:
for i, dc in enumerate(detection_classes):
if not any(m1 == i):
self.matrix[dc, self.nc] += 1 # predicted background
def matrix(self):
"""Returns the confusion matrix."""
return self.matrix
def tp_fp(self):
"""Returns true positives and false positives."""
tp = self.matrix.diagonal() # true positives
fp = self.matrix.sum(1) - tp # false positives
# fn = self.matrix.sum(0) - tp # false negatives (missed detections)
return (tp[:-1], fp[:-1]) if self.task == 'detect' else (tp, fp) # remove background class if task=detect
@TryExcept('WARNING ⚠️ ConfusionMatrix plot failure')
@plt_settings()
def plot(self, normalize=True, save_dir='', names=(), on_plot=None):
"""
Plot the confusion matrix using seaborn and save it to a file.
Args:
normalize (bool): Whether to normalize the confusion matrix.
save_dir (str): Directory where the plot will be saved.
names (tuple): Names of classes, used as labels on the plot.
on_plot (func): An optional callback to pass plots path and data when they are rendered.
"""
import seaborn as sn
array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-9) if normalize else 1) # normalize columns
array[array < 0.005] = np.nan # don't annotate (would appear as 0.00)
fig, ax = plt.subplots(1, 1, figsize=(12, 9), tight_layout=True)
nc, nn = self.nc, len(names) # number of classes, names
sn.set(font_scale=1.0 if nc < 50 else 0.8) # for label size
labels = (0 < nn < 99) and (nn == nc) # apply names to ticklabels
ticklabels = (list(names) + ['background']) if labels else 'auto'
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered
sn.heatmap(array,
ax=ax,
annot=nc < 30,
annot_kws={
'size': 8},
cmap='Blues',
fmt='.2f' if normalize else '.0f',
square=True,
vmin=0.0,
xticklabels=ticklabels,
yticklabels=ticklabels).set_facecolor((1, 1, 1))
title = 'Confusion Matrix' + ' Normalized' * normalize
ax.set_xlabel('True')
ax.set_ylabel('Predicted')
ax.set_title(title)
plot_fname = Path(save_dir) / f'{title.lower().replace(" ", "_")}.png'
fig.savefig(plot_fname, dpi=250)
plt.close(fig)
if on_plot:
on_plot(plot_fname)
def print(self):
"""
Print the confusion matrix to the console.
"""
for i in range(self.nc + 1):
LOGGER.info(' '.join(map(str, self.matrix[i])))
def smooth(y, f=0.05):
"""Box filter of fraction f."""
nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd)
p = np.ones(nf // 2) # ones padding
yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded
return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed
@plt_settings()
def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=(), on_plot=None):
"""Plots a precision-recall curve."""
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
py = np.stack(py, axis=1)
if 0 < len(names) < 21: # display per-class legend if < 21 classes
for i, y in enumerate(py.T):
ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision)
else:
ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision)
ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
ax.set_xlabel('Recall')
ax.set_ylabel('Precision')
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.legend(bbox_to_anchor=(1.04, 1), loc='upper left')
ax.set_title('Precision-Recall Curve')
fig.savefig(save_dir, dpi=250)
plt.close(fig)
if on_plot:
on_plot(save_dir)
@plt_settings()
def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confidence', ylabel='Metric', on_plot=None):
"""Plots a metric-confidence curve."""
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
if 0 < len(names) < 21: # display per-class legend if < 21 classes
for i, y in enumerate(py):
ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric)
else:
ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric)
y = smooth(py.mean(0), 0.05)
ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}')
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.legend(bbox_to_anchor=(1.04, 1), loc='upper left')
ax.set_title(f'{ylabel}-Confidence Curve')
fig.savefig(save_dir, dpi=250)
plt.close(fig)
if on_plot:
on_plot(save_dir)
def compute_ap(recall, precision):
"""
Compute the average precision (AP) given the recall and precision curves.
Arguments:
recall (list): The recall curve.
precision (list): The precision curve.
Returns:
(float): Average precision.
(np.ndarray): Precision envelope curve.
(np.ndarray): Modified recall curve with sentinel values added at the beginning and end.
"""
# Append sentinel values to beginning and end
mrec = np.concatenate(([0.0], recall, [1.0]))
mpre = np.concatenate(([1.0], precision, [0.0]))
# Compute the precision envelope
mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
# Integrate area under curve
method = 'interp' # methods: 'continuous', 'interp'
if method == 'interp':
x = np.linspace(0, 1, 101) # 101-point interp (COCO)
ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
else: # 'continuous'
i = np.where(mrec[1:] != mrec[:-1])[0] # points where x-axis (recall) changes
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve
return ap, mpre, mrec
def ap_per_class(tp,
conf,
pred_cls,
target_cls,
plot=False,
on_plot=None,
save_dir=Path(),
names=(),
eps=1e-16,
prefix=''):
"""
Computes the average precision per class for object detection evaluation.
Args:
tp (np.ndarray): Binary array indicating whether the detection is correct (True) or not (False).
conf (np.ndarray): Array of confidence scores of the detections.
pred_cls (np.ndarray): Array of predicted classes of the detections.
target_cls (np.ndarray): Array of true classes of the detections.
plot (bool, optional): Whether to plot PR curves or not. Defaults to False.
on_plot (func, optional): A callback to pass plots path and data when they are rendered. Defaults to None.
save_dir (Path, optional): Directory to save the PR curves. Defaults to an empty path.
names (tuple, optional): Tuple of class names to plot PR curves. Defaults to an empty tuple.
eps (float, optional): A small value to avoid division by zero. Defaults to 1e-16.
prefix (str, optional): A prefix string for saving the plot files. Defaults to an empty string.
Returns:
(tuple): A tuple of six arrays and one array of unique classes, where:
tp (np.ndarray): True positive counts for each class.
fp (np.ndarray): False positive counts for each class.
p (np.ndarray): Precision values at each confidence threshold.
r (np.ndarray): Recall values at each confidence threshold.
f1 (np.ndarray): F1-score values at each confidence threshold.
ap (np.ndarray): Average precision for each class at different IoU thresholds.
unique_classes (np.ndarray): An array of unique classes that have data.
"""
# Sort by objectness
i = np.argsort(-conf)
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
# Find unique classes
unique_classes, nt = np.unique(target_cls, return_counts=True)
nc = unique_classes.shape[0] # number of classes, number of detections
# Create Precision-Recall curve and compute AP for each class
px, py = np.linspace(0, 1, 1000), [] # for plotting
ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
for ci, c in enumerate(unique_classes):
i = pred_cls == c
n_l = nt[ci] # number of labels
n_p = i.sum() # number of predictions
if n_p == 0 or n_l == 0:
continue
# Accumulate FPs and TPs
fpc = (1 - tp[i]).cumsum(0)
tpc = tp[i].cumsum(0)
# Recall
recall = tpc / (n_l + eps) # recall curve
r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases
# Precision
precision = tpc / (tpc + fpc) # precision curve
p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score
# AP from recall-precision curve
for j in range(tp.shape[1]):
ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
if plot and j == 0:
py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
# Compute F1 (harmonic mean of precision and recall)
f1 = 2 * p * r / (p + r + eps)
names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data
names = dict(enumerate(names)) # to dict
if plot:
plot_pr_curve(px, py, ap, save_dir / f'{prefix}PR_curve.png', names, on_plot=on_plot)
plot_mc_curve(px, f1, save_dir / f'{prefix}F1_curve.png', names, ylabel='F1', on_plot=on_plot)
plot_mc_curve(px, p, save_dir / f'{prefix}P_curve.png', names, ylabel='Precision', on_plot=on_plot)
plot_mc_curve(px, r, save_dir / f'{prefix}R_curve.png', names, ylabel='Recall', on_plot=on_plot)
i = smooth(f1.mean(0), 0.1).argmax() # max F1 index
p, r, f1 = p[:, i], r[:, i], f1[:, i]
tp = (r * nt).round() # true positives
fp = (tp / (p + eps) - tp).round() # false positives
return tp, fp, p, r, f1, ap, unique_classes.astype(int)
class Metric(SimpleClass):
"""
Class for computing evaluation metrics for YOLOv8 model.
Attributes:
p (list): Precision for each class. Shape: (nc,).
r (list): Recall for each class. Shape: (nc,).
f1 (list): F1 score for each class. Shape: (nc,).
all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
ap_class_index (list): Index of class for each AP score. Shape: (nc,).
nc (int): Number of classes.
Methods:
ap50(): AP at IoU threshold of 0.5 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
ap(): AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
mp(): Mean precision of all classes. Returns: Float.
mr(): Mean recall of all classes. Returns: Float.
map50(): Mean AP at IoU threshold of 0.5 for all classes. Returns: Float.
map75(): Mean AP at IoU threshold of 0.75 for all classes. Returns: Float.
map(): Mean AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: Float.
mean_results(): Mean of results, returns mp, mr, map50, map.
class_result(i): Class-aware result, returns p[i], r[i], ap50[i], ap[i].
maps(): mAP of each class. Returns: Array of mAP scores, shape: (nc,).
fitness(): Model fitness as a weighted combination of metrics. Returns: Float.
update(results): Update metric attributes with new evaluation results.
"""
def __init__(self) -> None:
self.p = [] # (nc, )
self.r = [] # (nc, )
self.f1 = [] # (nc, )
self.all_ap = [] # (nc, 10)
self.ap_class_index = [] # (nc, )
self.nc = 0
@property
def ap50(self):
"""
Returns the Average Precision (AP) at an IoU threshold of 0.5 for all classes.
Returns:
(np.ndarray, list): Array of shape (nc,) with AP50 values per class, or an empty list if not available.
"""
return self.all_ap[:, 0] if len(self.all_ap) else []
@property
def ap(self):
"""
Returns the Average Precision (AP) at an IoU threshold of 0.5-0.95 for all classes.
Returns:
(np.ndarray, list): Array of shape (nc,) with AP50-95 values per class, or an empty list if not available.
"""
return self.all_ap.mean(1) if len(self.all_ap) else []
@property
def mp(self):
"""
Returns the Mean Precision of all classes.
Returns:
(float): The mean precision of all classes.
"""
return self.p.mean() if len(self.p) else 0.0
@property
def mr(self):
"""
Returns the Mean Recall of all classes.
Returns:
(float): The mean recall of all classes.
"""
return self.r.mean() if len(self.r) else 0.0
@property
def map50(self):
"""
Returns the mean Average Precision (mAP) at an IoU threshold of 0.5.
Returns:
(float): The mAP50 at an IoU threshold of 0.5.
"""
return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0
@property
def map75(self):
"""
Returns the mean Average Precision (mAP) at an IoU threshold of 0.75.
Returns:
(float): The mAP50 at an IoU threshold of 0.75.
"""
return self.all_ap[:, 5].mean() if len(self.all_ap) else 0.0
@property
def map(self):
"""
Returns the mean Average Precision (mAP) over IoU thresholds of 0.5 - 0.95 in steps of 0.05.
Returns:
(float): The mAP over IoU thresholds of 0.5 - 0.95 in steps of 0.05.
"""
return self.all_ap.mean() if len(self.all_ap) else 0.0
def mean_results(self):
"""Mean of results, return mp, mr, map50, map."""
return [self.mp, self.mr, self.map50, self.map]
def class_result(self, i):
"""class-aware result, return p[i], r[i], ap50[i], ap[i]."""
return self.p[i], self.r[i], self.ap50[i], self.ap[i]
@property
def maps(self):
"""mAP of each class."""
maps = np.zeros(self.nc) + self.map
for i, c in enumerate(self.ap_class_index):
maps[c] = self.ap[i]
return maps
def fitness(self):
"""Model fitness as a weighted combination of metrics."""
w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
return (np.array(self.mean_results()) * w).sum()
def update(self, results):
"""
Args:
results (tuple): A tuple of (p, r, ap, f1, ap_class)
"""
self.p, self.r, self.f1, self.all_ap, self.ap_class_index = results
class DetMetrics(SimpleClass):
"""
This class is a utility class for computing detection metrics such as precision, recall, and mean average precision
(mAP) of an object detection model.
Args:
save_dir (Path): A path to the directory where the output plots will be saved. Defaults to current directory.
plot (bool): A flag that indicates whether to plot precision-recall curves for each class. Defaults to False.
on_plot (func): An optional callback to pass plots path and data when they are rendered. Defaults to None.
names (tuple of str): A tuple of strings that represents the names of the classes. Defaults to an empty tuple.
Attributes:
save_dir (Path): A path to the directory where the output plots will be saved.
plot (bool): A flag that indicates whether to plot the precision-recall curves for each class.
on_plot (func): An optional callback to pass plots path and data when they are rendered.
names (tuple of str): A tuple of strings that represents the names of the classes.
box (Metric): An instance of the Metric class for storing the results of the detection metrics.
speed (dict): A dictionary for storing the execution time of different parts of the detection process.
Methods:
process(tp, conf, pred_cls, target_cls): Updates the metric results with the latest batch of predictions.
keys: Returns a list of keys for accessing the computed detection metrics.
mean_results: Returns a list of mean values for the computed detection metrics.
class_result(i): Returns a list of values for the computed detection metrics for a specific class.
maps: Returns a dictionary of mean average precision (mAP) values for different IoU thresholds.
fitness: Computes the fitness score based on the computed detection metrics.
ap_class_index: Returns a list of class indices sorted by their average precision (AP) values.
results_dict: Returns a dictionary that maps detection metric keys to their computed values.
"""
def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
self.save_dir = save_dir
self.plot = plot
self.on_plot = on_plot
self.names = names
self.box = Metric()
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
def process(self, tp, conf, pred_cls, target_cls):
"""Process predicted results for object detection and update metrics."""
results = ap_per_class(tp,
conf,
pred_cls,
target_cls,
plot=self.plot,
save_dir=self.save_dir,
names=self.names,
on_plot=self.on_plot)[2:]
self.box.nc = len(self.names)
self.box.update(results)
@property
def keys(self):
"""Returns a list of keys for accessing specific metrics."""
return ['metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)']
def mean_results(self):
"""Calculate mean of detected objects & return precision, recall, mAP50, and mAP50-95."""
return self.box.mean_results()
def class_result(self, i):
"""Return the result of evaluating the performance of an object detection model on a specific class."""
return self.box.class_result(i)
@property
def maps(self):
"""Returns mean Average Precision (mAP) scores per class."""
return self.box.maps
@property
def fitness(self):
"""Returns the fitness of box object."""
return self.box.fitness()
@property
def ap_class_index(self):
"""Returns the average precision index per class."""
return self.box.ap_class_index
@property
def results_dict(self):
"""Returns dictionary of computed performance metrics and statistics."""
return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness]))
class SegmentMetrics(SimpleClass):
"""
Calculates and aggregates detection and segmentation metrics over a given set of classes.
Args:
save_dir (Path): Path to the directory where the output plots should be saved. Default is the current directory.
plot (bool): Whether to save the detection and segmentation plots. Default is False.
on_plot (func): An optional callback to pass plots path and data when they are rendered. Defaults to None.
names (list): List of class names. Default is an empty list.
Attributes:
save_dir (Path): Path to the directory where the output plots should be saved.
plot (bool): Whether to save the detection and segmentation plots.
on_plot (func): An optional callback to pass plots path and data when they are rendered.
names (list): List of class names.
box (Metric): An instance of the Metric class to calculate box detection metrics.
seg (Metric): An instance of the Metric class to calculate mask segmentation metrics.
speed (dict): Dictionary to store the time taken in different phases of inference.
Methods:
process(tp_m, tp_b, conf, pred_cls, target_cls): Processes metrics over the given set of predictions.
mean_results(): Returns the mean of the detection and segmentation metrics over all the classes.
class_result(i): Returns the detection and segmentation metrics of class `i`.
maps: Returns the mean Average Precision (mAP) scores for IoU thresholds ranging from 0.50 to 0.95.
fitness: Returns the fitness scores, which are a single weighted combination of metrics.
ap_class_index: Returns the list of indices of classes used to compute Average Precision (AP).
results_dict: Returns the dictionary containing all the detection and segmentation metrics and fitness score.
"""
def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
self.save_dir = save_dir
self.plot = plot
self.on_plot = on_plot
self.names = names
self.box = Metric()
self.seg = Metric()
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
def process(self, tp_b, tp_m, conf, pred_cls, target_cls):
"""
Processes the detection and segmentation metrics over the given set of predictions.
Args:
tp_b (list): List of True Positive boxes.
tp_m (list): List of True Positive masks.
conf (list): List of confidence scores.
pred_cls (list): List of predicted classes.
target_cls (list): List of target classes.
"""
results_mask = ap_per_class(tp_m,
conf,
pred_cls,
target_cls,
plot=self.plot,
on_plot=self.on_plot,
save_dir=self.save_dir,
names=self.names,
prefix='Mask')[2:]
self.seg.nc = len(self.names)
self.seg.update(results_mask)
results_box = ap_per_class(tp_b,
conf,
pred_cls,
target_cls,
plot=self.plot,
on_plot=self.on_plot,
save_dir=self.save_dir,
names=self.names,
prefix='Box')[2:]
self.box.nc = len(self.names)
self.box.update(results_box)
@property
def keys(self):
"""Returns a list of keys for accessing metrics."""
return [
'metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)',
'metrics/precision(M)', 'metrics/recall(M)', 'metrics/mAP50(M)', 'metrics/mAP50-95(M)']
def mean_results(self):
"""Return the mean metrics for bounding box and segmentation results."""
return self.box.mean_results() + self.seg.mean_results()
def class_result(self, i):
"""Returns classification results for a specified class index."""
return self.box.class_result(i) + self.seg.class_result(i)
@property
def maps(self):
"""Returns mAP scores for object detection and semantic segmentation models."""
return self.box.maps + self.seg.maps
@property
def fitness(self):
"""Get the fitness score for both segmentation and bounding box models."""
return self.seg.fitness() + self.box.fitness()
@property
def ap_class_index(self):
"""Boxes and masks have the same ap_class_index."""
return self.box.ap_class_index
@property
def results_dict(self):
"""Returns results of object detection model for evaluation."""
return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness]))
class PoseMetrics(SegmentMetrics):
"""
Calculates and aggregates detection and pose metrics over a given set of classes.
Args:
save_dir (Path): Path to the directory where the output plots should be saved. Default is the current directory.
plot (bool): Whether to save the detection and segmentation plots. Default is False.
on_plot (func): An optional callback to pass plots path and data when they are rendered. Defaults to None.
names (list): List of class names. Default is an empty list.
Attributes:
save_dir (Path): Path to the directory where the output plots should be saved.
plot (bool): Whether to save the detection and segmentation plots.
on_plot (func): An optional callback to pass plots path and data when they are rendered.
names (list): List of class names.
box (Metric): An instance of the Metric class to calculate box detection metrics.
pose (Metric): An instance of the Metric class to calculate mask segmentation metrics.
speed (dict): Dictionary to store the time taken in different phases of inference.
Methods:
process(tp_m, tp_b, conf, pred_cls, target_cls): Processes metrics over the given set of predictions.
mean_results(): Returns the mean of the detection and segmentation metrics over all the classes.
class_result(i): Returns the detection and segmentation metrics of class `i`.
maps: Returns the mean Average Precision (mAP) scores for IoU thresholds ranging from 0.50 to 0.95.
fitness: Returns the fitness scores, which are a single weighted combination of metrics.
ap_class_index: Returns the list of indices of classes used to compute Average Precision (AP).
results_dict: Returns the dictionary containing all the detection and segmentation metrics and fitness score.
"""
def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
super().__init__(save_dir, plot, names)
self.save_dir = save_dir
self.plot = plot
self.on_plot = on_plot
self.names = names
self.box = Metric()
self.pose = Metric()
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
def __getattr__(self, attr):
"""Raises an AttributeError if an invalid attribute is accessed."""
name = self.__class__.__name__
raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}")
def process(self, tp_b, tp_p, conf, pred_cls, target_cls):
"""
Processes the detection and pose metrics over the given set of predictions.
Args:
tp_b (list): List of True Positive boxes.
tp_p (list): List of True Positive keypoints.
conf (list): List of confidence scores.
pred_cls (list): List of predicted classes.
target_cls (list): List of target classes.
"""
results_pose = ap_per_class(tp_p,
conf,
pred_cls,
target_cls,
plot=self.plot,
on_plot=self.on_plot,
save_dir=self.save_dir,
names=self.names,
prefix='Pose')[2:]
self.pose.nc = len(self.names)
self.pose.update(results_pose)
results_box = ap_per_class(tp_b,
conf,
pred_cls,
target_cls,
plot=self.plot,
on_plot=self.on_plot,
save_dir=self.save_dir,
names=self.names,
prefix='Box')[2:]
self.box.nc = len(self.names)
self.box.update(results_box)
@property
def keys(self):
"""Returns list of evaluation metric keys."""
return [
'metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)',
'metrics/precision(P)', 'metrics/recall(P)', 'metrics/mAP50(P)', 'metrics/mAP50-95(P)']
def mean_results(self):
"""Return the mean results of box and pose."""
return self.box.mean_results() + self.pose.mean_results()
def class_result(self, i):
"""Return the class-wise detection results for a specific class i."""
return self.box.class_result(i) + self.pose.class_result(i)
@property
def maps(self):
"""Returns the mean average precision (mAP) per class for both box and pose detections."""
return self.box.maps + self.pose.maps
@property
def fitness(self):
"""Computes classification metrics and speed using the `targets` and `pred` inputs."""
return self.pose.fitness() + self.box.fitness()
class ClassifyMetrics(SimpleClass):
"""
Class for computing classification metrics including top-1 and top-5 accuracy.
Attributes:
top1 (float): The top-1 accuracy.
top5 (float): The top-5 accuracy.
speed (Dict[str, float]): A dictionary containing the time taken for each step in the pipeline.
Properties:
fitness (float): The fitness of the model, which is equal to top-5 accuracy.
results_dict (Dict[str, Union[float, str]]): A dictionary containing the classification metrics and fitness.
keys (List[str]): A list of keys for the results_dict.
Methods:
process(targets, pred): Processes the targets and predictions to compute classification metrics.
"""
def __init__(self) -> None:
self.top1 = 0
self.top5 = 0
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
def process(self, targets, pred):
"""Target classes and predicted classes."""
pred, targets = torch.cat(pred), torch.cat(targets)
correct = (targets[:, None] == pred).float()
acc = torch.stack((correct[:, 0], correct.max(1).values), dim=1) # (top1, top5) accuracy
self.top1, self.top5 = acc.mean(0).tolist()
@property
def fitness(self):
"""Returns top-5 accuracy as fitness score."""
return self.top5
@property
def results_dict(self):
"""Returns a dictionary with model's performance metrics and fitness score."""
return dict(zip(self.keys + ['fitness'], [self.top1, self.top5, self.fitness]))
@property
def keys(self):
"""Returns a list of keys for the results_dict property."""
return ['metrics/accuracy_top1', 'metrics/accuracy_top5']

739
ultralytics/utils/ops.py Normal file
View File

@ -0,0 +1,739 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import contextlib
import math
import re
import time
import cv2
import numpy as np
import torch
import torch.nn.functional as F
import torchvision
from ultralytics.utils import LOGGER
from .metrics import box_iou
class Profile(contextlib.ContextDecorator):
"""
YOLOv8 Profile class.
Usage: as a decorator with @Profile() or as a context manager with 'with Profile():'
"""
def __init__(self, t=0.0):
"""
Initialize the Profile class.
Args:
t (float): Initial time. Defaults to 0.0.
"""
self.t = t
self.cuda = torch.cuda.is_available()
def __enter__(self):
"""
Start timing.
"""
self.start = self.time()
return self
def __exit__(self, type, value, traceback):
"""
Stop timing.
"""
self.dt = self.time() - self.start # delta-time
self.t += self.dt # accumulate dt
def time(self):
"""
Get current time.
"""
if self.cuda:
torch.cuda.synchronize()
return time.time()
def coco80_to_coco91_class(): #
"""
Converts 80-index (val2014) to 91-index (paper).
For details see https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/.
Example:
a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
"""
return [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
def segment2box(segment, width=640, height=640):
"""
Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
Args:
segment (torch.Tensor): the segment label
width (int): the width of the image. Defaults to 640
height (int): The height of the image. Defaults to 640
Returns:
(np.ndarray): the minimum and maximum x and y values of the segment.
"""
# Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
x, y = segment.T # segment xy
inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
x, y, = x[inside], y[inside]
return np.array([x.min(), y.min(), x.max(), y.max()], dtype=segment.dtype) if any(x) else np.zeros(
4, dtype=segment.dtype) # xyxy
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True):
"""
Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in
(img1_shape) to the shape of a different image (img0_shape).
Args:
img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)
img0_shape (tuple): the shape of the target image, in the format of (height, width).
ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
calculated based on the size difference between the two images.
padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
rescaling.
Returns:
boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
"""
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), round(
(img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1) # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
if padding:
boxes[..., [0, 2]] -= pad[0] # x padding
boxes[..., [1, 3]] -= pad[1] # y padding
boxes[..., :4] /= gain
clip_boxes(boxes, img0_shape)
return boxes
def make_divisible(x, divisor):
"""
Returns the nearest number that is divisible by the given divisor.
Args:
x (int): The number to make divisible.
divisor (int | torch.Tensor): The divisor.
Returns:
(int): The nearest number divisible by the divisor.
"""
if isinstance(divisor, torch.Tensor):
divisor = int(divisor.max()) # to int
return math.ceil(x / divisor) * divisor
def non_max_suppression(
prediction,
conf_thres=0.25,
iou_thres=0.45,
classes=None,
agnostic=False,
multi_label=False,
labels=(),
max_det=300,
nc=0, # number of classes (optional)
max_time_img=0.05,
max_nms=30000,
max_wh=7680,
):
"""
Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
Arguments:
prediction (torch.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)
containing the predicted boxes, classes, and masks. The tensor should be in the format
output by a model, such as YOLO.
conf_thres (float): The confidence threshold below which boxes will be filtered out.
Valid values are between 0.0 and 1.0.
iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS.
Valid values are between 0.0 and 1.0.
classes (List[int]): A list of class indices to consider. If None, all classes will be considered.
agnostic (bool): If True, the model is agnostic to the number of classes, and all
classes will be considered as one.
multi_label (bool): If True, each box may have multiple labels.
labels (List[List[Union[int, float, torch.Tensor]]]): A list of lists, where each inner
list contains the apriori labels for a given image. The list should be in the format
output by a dataloader, with each label being a tuple of (class_index, x1, y1, x2, y2).
max_det (int): The maximum number of boxes to keep after NMS.
nc (int, optional): The number of classes output by the model. Any indices after this will be considered masks.
max_time_img (float): The maximum time (seconds) for processing one image.
max_nms (int): The maximum number of boxes into torchvision.ops.nms().
max_wh (int): The maximum box width and height in pixels
Returns:
(List[torch.Tensor]): A list of length batch_size, where each element is a tensor of
shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns
(x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
"""
# Checks
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out)
prediction = prediction[0] # select only inference output
device = prediction.device
mps = 'mps' in device.type # Apple MPS
if mps: # MPS not fully supported yet, convert tensors to CPU before NMS
prediction = prediction.cpu()
bs = prediction.shape[0] # batch size
nc = nc or (prediction.shape[1] - 4) # number of classes
nm = prediction.shape[1] - nc - 4
mi = 4 + nc # mask start index
xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates
# Settings
# min_wh = 2 # (pixels) minimum box width and height
time_limit = 0.5 + max_time_img * bs # seconds to quit after
redundant = True # require redundant detections
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
merge = False # use merge-NMS
prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84)
prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy
t = time.time()
output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
# x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height
x = x[xc[xi]] # confidence
# Cat apriori labels if autolabelling
if labels and len(labels[xi]):
lb = labels[xi]
v = torch.zeros((len(lb), nc + nm + 5), device=x.device)
v[:, :4] = lb[:, 1:5] # box
v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls
x = torch.cat((x, v), 0)
# If none remain process next image
if not x.shape[0]:
continue
# Detections matrix nx6 (xyxy, conf, cls)
box, cls, mask = x.split((4, nc, nm), 1)
if multi_label:
i, j = torch.where(cls > conf_thres)
x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
else: # best class only
conf, j = cls.max(1, keepdim=True)
x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
# Filter by class
if classes is not None:
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
# Apply finite constraint
# if not torch.isfinite(x).all():
# x = x[torch.isfinite(x).all(1)]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
if n > max_nms: # excess boxes
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence and remove excess boxes
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
i = i[:max_det] # limit detections
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
# Update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
weights = iou * scores[None] # box weights
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
if redundant:
i = i[iou.sum(1) > 1] # require redundancy
output[xi] = x[i]
if mps:
output[xi] = output[xi].to(device)
if (time.time() - t) > time_limit:
LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
break # time limit exceeded
return output
def clip_boxes(boxes, shape):
"""
It takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the
shape
Args:
boxes (torch.Tensor): the bounding boxes to clip
shape (tuple): the shape of the image
"""
if isinstance(boxes, torch.Tensor): # faster individually
boxes[..., 0].clamp_(0, shape[1]) # x1
boxes[..., 1].clamp_(0, shape[0]) # y1
boxes[..., 2].clamp_(0, shape[1]) # x2
boxes[..., 3].clamp_(0, shape[0]) # y2
else: # np.array (faster grouped)
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
def clip_coords(coords, shape):
"""
Clip line coordinates to the image boundaries.
Args:
coords (torch.Tensor | numpy.ndarray): A list of line coordinates.
shape (tuple): A tuple of integers representing the size of the image in the format (height, width).
Returns:
(None): The function modifies the input `coordinates` in place, by clipping each coordinate to the image boundaries.
"""
if isinstance(coords, torch.Tensor): # faster individually
coords[..., 0].clamp_(0, shape[1]) # x
coords[..., 1].clamp_(0, shape[0]) # y
else: # np.array (faster grouped)
coords[..., 0] = coords[..., 0].clip(0, shape[1]) # x
coords[..., 1] = coords[..., 1].clip(0, shape[0]) # y
def scale_image(masks, im0_shape, ratio_pad=None):
"""
Takes a mask, and resizes it to the original image size
Args:
masks (torch.Tensor): resized and padded masks/images, [h, w, num]/[h, w, 3].
im0_shape (tuple): the original image shape
ratio_pad (tuple): the ratio of the padding to the original image.
Returns:
masks (torch.Tensor): The masks that are being returned.
"""
# Rescale coordinates (xyxy) from im1_shape to im0_shape
im1_shape = masks.shape
if im1_shape[:2] == im0_shape[:2]:
return masks
if ratio_pad is None: # calculate from im0_shape
gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new
pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
top, left = int(pad[1]), int(pad[0]) # y, x
bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])
if len(masks.shape) < 2:
raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
masks = masks[top:bottom, left:right]
# masks = masks.permute(2, 0, 1).contiguous()
# masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0]
# masks = masks.permute(1, 2, 0).contiguous()
masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))
if len(masks.shape) == 2:
masks = masks[:, :, None]
return masks
def xyxy2xywh(x):
"""
Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format.
Args:
x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
Returns:
y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height) format.
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center
y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center
y[..., 2] = x[..., 2] - x[..., 0] # width
y[..., 3] = x[..., 3] - x[..., 1] # height
return y
def xywh2xyxy(x):
"""
Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
top-left corner and (x2, y2) is the bottom-right corner.
Args:
x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
Returns:
y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x
y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y
return y
def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
"""
Convert normalized bounding box coordinates to pixel coordinates.
Args:
x (np.ndarray | torch.Tensor): The bounding box coordinates.
w (int): Width of the image. Defaults to 640
h (int): Height of the image. Defaults to 640
padw (int): Padding width. Defaults to 0
padh (int): Padding height. Defaults to 0
Returns:
y (np.ndarray | torch.Tensor): The coordinates of the bounding box in the format [x1, y1, x2, y2] where
x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box.
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x
y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y
y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw # bottom right x
y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh # bottom right y
return y
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
"""
Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format.
x, y, width and height are normalized to image dimensions
Args:
x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
w (int): The width of the image. Defaults to 640
h (int): The height of the image. Defaults to 640
clip (bool): If True, the boxes will be clipped to the image boundaries. Defaults to False
eps (float): The minimum value of the box's width and height. Defaults to 0.0
Returns:
y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height, normalized) format
"""
if clip:
clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w # x center
y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h # y center
y[..., 2] = (x[..., 2] - x[..., 0]) / w # width
y[..., 3] = (x[..., 3] - x[..., 1]) / h # height
return y
def xyn2xy(x, w=640, h=640, padw=0, padh=0):
"""
Convert normalized coordinates to pixel coordinates of shape (n,2)
Args:
x (np.ndarray | torch.Tensor): The input tensor of normalized bounding box coordinates
w (int): The width of the image. Defaults to 640
h (int): The height of the image. Defaults to 640
padw (int): The width of the padding. Defaults to 0
padh (int): The height of the padding. Defaults to 0
Returns:
y (np.ndarray | torch.Tensor): The x and y coordinates of the top left corner of the bounding box
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[..., 0] = w * x[..., 0] + padw # top left x
y[..., 1] = h * x[..., 1] + padh # top left y
return y
def xywh2ltwh(x):
"""
Convert the bounding box format from [x, y, w, h] to [x1, y1, w, h], where x1, y1 are the top-left coordinates.
Args:
x (np.ndarray | torch.Tensor): The input tensor with the bounding box coordinates in the xywh format
Returns:
y (np.ndarray | torch.Tensor): The bounding box coordinates in the xyltwh format
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
return y
def xyxy2ltwh(x):
"""
Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right
Args:
x (np.ndarray | torch.Tensor): The input tensor with the bounding boxes coordinates in the xyxy format
Returns:
y (np.ndarray | torch.Tensor): The bounding box coordinates in the xyltwh format.
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 2] = x[:, 2] - x[:, 0] # width
y[:, 3] = x[:, 3] - x[:, 1] # height
return y
def ltwh2xywh(x):
"""
Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center
Args:
x (torch.Tensor): the input tensor
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = x[:, 0] + x[:, 2] / 2 # center x
y[:, 1] = x[:, 1] + x[:, 3] / 2 # center y
return y
def ltwh2xyxy(x):
"""
It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
Args:
x (np.ndarray | torch.Tensor): the input image
Returns:
y (np.ndarray | torch.Tensor): the xyxy coordinates of the bounding boxes.
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 2] = x[:, 2] + x[:, 0] # width
y[:, 3] = x[:, 3] + x[:, 1] # height
return y
def segments2boxes(segments):
"""
It converts segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
Args:
segments (list): list of segments, each segment is a list of points, each point is a list of x, y coordinates
Returns:
(np.ndarray): the xywh coordinates of the bounding boxes.
"""
boxes = []
for s in segments:
x, y = s.T # segment xy
boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy
return xyxy2xywh(np.array(boxes)) # cls, xywh
def resample_segments(segments, n=1000):
"""
Inputs a list of segments (n,2) and returns a list of segments (n,2) up-sampled to n points each.
Args:
segments (list): a list of (n,2) arrays, where n is the number of points in the segment.
n (int): number of points to resample the segment to. Defaults to 1000
Returns:
segments (list): the resampled segments.
"""
for i, s in enumerate(segments):
s = np.concatenate((s, s[0:1, :]), axis=0)
x = np.linspace(0, len(s) - 1, n)
xp = np.arange(len(s))
segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)],
dtype=np.float32).reshape(2, -1).T # segment xy
return segments
def crop_mask(masks, boxes):
"""
It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box
Args:
masks (torch.Tensor): [n, h, w] tensor of masks
boxes (torch.Tensor): [n, 4] tensor of bbox coordinates in relative point form
Returns:
(torch.Tensor): The masks are being cropped to the bounding box.
"""
n, h, w = masks.shape
x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(n,1,1)
r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,1,w)
c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(1,h,1)
return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
def process_mask_upsample(protos, masks_in, bboxes, shape):
"""
It takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher
quality but is slower.
Args:
protos (torch.Tensor): [mask_dim, mask_h, mask_w]
masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms
bboxes (torch.Tensor): [n, 4], n is number of masks after nms
shape (tuple): the size of the input image (h,w)
Returns:
(torch.Tensor): The upsampled masks.
"""
c, mh, mw = protos.shape # CHW
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
masks = crop_mask(masks, bboxes) # CHW
return masks.gt_(0.5)
def process_mask(protos, masks_in, bboxes, shape, upsample=False):
"""
Apply masks to bounding boxes using the output of the mask head.
Args:
protos (torch.Tensor): A tensor of shape [mask_dim, mask_h, mask_w].
masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS.
bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS.
shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False.
Returns:
(torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
are the height and width of the input image. The mask is applied to the bounding boxes.
"""
c, mh, mw = protos.shape # CHW
ih, iw = shape
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW
downsampled_bboxes = bboxes.clone()
downsampled_bboxes[:, 0] *= mw / iw
downsampled_bboxes[:, 2] *= mw / iw
downsampled_bboxes[:, 3] *= mh / ih
downsampled_bboxes[:, 1] *= mh / ih
masks = crop_mask(masks, downsampled_bboxes) # CHW
if upsample:
masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
return masks.gt_(0.5)
def process_mask_native(protos, masks_in, bboxes, shape):
"""
It takes the output of the mask head, and crops it after upsampling to the bounding boxes.
Args:
protos (torch.Tensor): [mask_dim, mask_h, mask_w]
masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms
bboxes (torch.Tensor): [n, 4], n is number of masks after nms
shape (tuple): the size of the input image (h,w)
Returns:
masks (torch.Tensor): The returned masks with dimensions [h, w, n]
"""
c, mh, mw = protos.shape # CHW
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
masks = scale_masks(masks[None], shape)[0] # CHW
masks = crop_mask(masks, bboxes) # CHW
return masks.gt_(0.5)
def scale_masks(masks, shape, padding=True):
"""
Rescale segment masks to shape.
Args:
masks (torch.Tensor): (N, C, H, W).
shape (tuple): Height and width.
padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
rescaling.
"""
mh, mw = masks.shape[2:]
gain = min(mh / shape[0], mw / shape[1]) # gain = old / new
pad = [mw - shape[1] * gain, mh - shape[0] * gain] # wh padding
if padding:
pad[0] /= 2
pad[1] /= 2
top, left = (int(pad[1]), int(pad[0])) if padding else (0, 0) # y, x
bottom, right = (int(mh - pad[1]), int(mw - pad[0]))
masks = masks[..., top:bottom, left:right]
masks = F.interpolate(masks, shape, mode='bilinear', align_corners=False) # NCHW
return masks
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False, padding=True):
"""
Rescale segment coordinates (xyxy) from img1_shape to img0_shape
Args:
img1_shape (tuple): The shape of the image that the coords are from.
coords (torch.Tensor): the coords to be scaled
img0_shape (tuple): the shape of the image that the segmentation is being applied to
ratio_pad (tuple): the ratio of the image size to the padded image size.
normalize (bool): If True, the coordinates will be normalized to the range [0, 1]. Defaults to False
padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
rescaling.
Returns:
coords (torch.Tensor): the segmented image.
"""
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
if padding:
coords[..., 0] -= pad[0] # x padding
coords[..., 1] -= pad[1] # y padding
coords[..., 0] /= gain
coords[..., 1] /= gain
clip_coords(coords, img0_shape)
if normalize:
coords[..., 0] /= img0_shape[1] # width
coords[..., 1] /= img0_shape[0] # height
return coords
def masks2segments(masks, strategy='largest'):
"""
It takes a list of masks(n,h,w) and returns a list of segments(n,xy)
Args:
masks (torch.Tensor): the output of the model, which is a tensor of shape (batch_size, 160, 160)
strategy (str): 'concat' or 'largest'. Defaults to largest
Returns:
segments (List): list of segment masks
"""
segments = []
for x in masks.int().cpu().numpy().astype('uint8'):
c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
if c:
if strategy == 'concat': # concatenate all segments
c = np.concatenate([x.reshape(-1, 2) for x in c])
elif strategy == 'largest': # select largest segment
c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
else:
c = np.zeros((0, 2)) # no segments found
segments.append(c.astype('float32'))
return segments
def clean_str(s):
"""
Cleans a string by replacing special characters with underscore _
Args:
s (str): a string needing special characters replaced
Returns:
(str): a string with special characters replaced by an underscore _
"""
return re.sub(pattern='[|@#!¡·$€%&()=?¿^*;:,¨´><+]', repl='_', string=s)

View File

@ -0,0 +1,45 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Monkey patches to update/extend functionality of existing functions
"""
from pathlib import Path
import cv2
import numpy as np
import torch
# OpenCV Multilanguage-friendly functions ------------------------------------------------------------------------------
_imshow = cv2.imshow # copy to avoid recursion errors
def imread(filename, flags=cv2.IMREAD_COLOR):
return cv2.imdecode(np.fromfile(filename, np.uint8), flags)
def imwrite(filename, img):
try:
cv2.imencode(Path(filename).suffix, img)[1].tofile(filename)
return True
except Exception:
return False
def imshow(path, im):
_imshow(path.encode('unicode_escape').decode(), im)
# PyTorch functions ----------------------------------------------------------------------------------------------------
_torch_save = torch.save # copy to avoid recursion errors
def torch_save(*args, **kwargs):
"""Use dill (if exists) to serialize the lambda functions where pickle does not do this."""
try:
import dill as pickle
except ImportError:
import pickle
if 'pickle_module' not in kwargs:
kwargs['pickle_module'] = pickle
return _torch_save(*args, **kwargs)

View File

@ -0,0 +1,527 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import contextlib
import math
import warnings
from pathlib import Path
import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont
from PIL import __version__ as pil_version
from scipy.ndimage import gaussian_filter1d
from ultralytics.utils import LOGGER, TryExcept, plt_settings, threaded
from .checks import check_font, check_version, is_ascii
from .files import increment_path
from .ops import clip_boxes, scale_image, xywh2xyxy, xyxy2xywh
class Colors:
"""Ultralytics color palette https://ultralytics.com/."""
def __init__(self):
"""Initialize colors as hex = matplotlib.colors.TABLEAU_COLORS.values()."""
hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
self.n = len(self.palette)
self.pose_palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102], [230, 230, 0], [255, 153, 255],
[153, 204, 255], [255, 102, 255], [255, 51, 255], [102, 178, 255], [51, 153, 255],
[255, 153, 153], [255, 102, 102], [255, 51, 51], [153, 255, 153], [102, 255, 102],
[51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0], [255, 255, 255]],
dtype=np.uint8)
def __call__(self, i, bgr=False):
"""Converts hex color codes to rgb values."""
c = self.palette[int(i) % self.n]
return (c[2], c[1], c[0]) if bgr else c
@staticmethod
def hex2rgb(h): # rgb order (PIL)
return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
colors = Colors() # create instance for 'from utils.plots import colors'
class Annotator:
"""YOLOv8 Annotator for train/val mosaics and jpgs and detect/hub inference annotations."""
def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'):
"""Initialize the Annotator class with image and line width along with color palette for keypoints and limbs."""
assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.'
non_ascii = not is_ascii(example) # non-latin labels, i.e. asian, arabic, cyrillic
self.pil = pil or non_ascii
if self.pil: # use PIL
self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
self.draw = ImageDraw.Draw(self.im)
try:
font = check_font('Arial.Unicode.ttf' if non_ascii else font)
size = font_size or max(round(sum(self.im.size) / 2 * 0.035), 12)
self.font = ImageFont.truetype(str(font), size)
except Exception:
self.font = ImageFont.load_default()
# Deprecation fix for w, h = getsize(string) -> _, _, w, h = getbox(string)
if check_version(pil_version, '9.2.0'):
self.font.getsize = lambda x: self.font.getbbox(x)[2:4] # text width, height
else: # use cv2
self.im = im
self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width
# Pose
self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9],
[8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
self.limb_color = colors.pose_palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]]
self.kpt_color = colors.pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]]
def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
"""Add one xyxy box to image with label."""
if isinstance(box, torch.Tensor):
box = box.tolist()
if self.pil or not is_ascii(label):
self.draw.rectangle(box, width=self.lw, outline=color) # box
if label:
w, h = self.font.getsize(label) # text width, height
outside = box[1] - h >= 0 # label fits outside box
self.draw.rectangle(
(box[0], box[1] - h if outside else box[1], box[0] + w + 1,
box[1] + 1 if outside else box[1] + h + 1),
fill=color,
)
# self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0
self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font)
else: # cv2
p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
if label:
tf = max(self.lw - 1, 1) # font thickness
w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height
outside = p1[1] - h >= 3
p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled
cv2.putText(self.im,
label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
0,
self.lw / 3,
txt_color,
thickness=tf,
lineType=cv2.LINE_AA)
def masks(self, masks, colors, im_gpu, alpha=0.5, retina_masks=False):
"""Plot masks at once.
Args:
masks (tensor): predicted masks on cuda, shape: [n, h, w]
colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque
"""
if self.pil:
# Convert to numpy first
self.im = np.asarray(self.im).copy()
if len(masks) == 0:
self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
if im_gpu.device != masks.device:
im_gpu = im_gpu.to(masks.device)
colors = torch.tensor(colors, device=masks.device, dtype=torch.float32) / 255.0 # shape(n,3)
colors = colors[:, None, None] # shape(n,1,1,3)
masks = masks.unsqueeze(3) # shape(n,h,w,1)
masks_color = masks * (colors * alpha) # shape(n,h,w,3)
inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1)
mcs = masks_color.max(dim=0).values # shape(n,h,w,3)
im_gpu = im_gpu.flip(dims=[0]) # flip channel
im_gpu = im_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3)
im_gpu = im_gpu * inv_alph_masks[-1] + mcs
im_mask = (im_gpu * 255)
im_mask_np = im_mask.byte().cpu().numpy()
self.im[:] = im_mask_np if retina_masks else scale_image(im_mask_np, self.im.shape)
if self.pil:
# Convert im back to PIL and update draw
self.fromarray(self.im)
def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True):
"""Plot keypoints on the image.
Args:
kpts (tensor): Predicted keypoints with shape [17, 3]. Each keypoint has (x, y, confidence).
shape (tuple): Image shape as a tuple (h, w), where h is the height and w is the width.
radius (int, optional): Radius of the drawn keypoints. Default is 5.
kpt_line (bool, optional): If True, the function will draw lines connecting keypoints
for human pose. Default is True.
Note: `kpt_line=True` currently only supports human pose plotting.
"""
if self.pil:
# Convert to numpy first
self.im = np.asarray(self.im).copy()
nkpt, ndim = kpts.shape
is_pose = nkpt == 17 and ndim == 3
kpt_line &= is_pose # `kpt_line=True` for now only supports human pose plotting
for i, k in enumerate(kpts):
color_k = [int(x) for x in self.kpt_color[i]] if is_pose else colors(i)
x_coord, y_coord = k[0], k[1]
if x_coord % shape[1] != 0 and y_coord % shape[0] != 0:
if len(k) == 3:
conf = k[2]
if conf < 0.5:
continue
cv2.circle(self.im, (int(x_coord), int(y_coord)), radius, color_k, -1, lineType=cv2.LINE_AA)
if kpt_line:
ndim = kpts.shape[-1]
for i, sk in enumerate(self.skeleton):
pos1 = (int(kpts[(sk[0] - 1), 0]), int(kpts[(sk[0] - 1), 1]))
pos2 = (int(kpts[(sk[1] - 1), 0]), int(kpts[(sk[1] - 1), 1]))
if ndim == 3:
conf1 = kpts[(sk[0] - 1), 2]
conf2 = kpts[(sk[1] - 1), 2]
if conf1 < 0.5 or conf2 < 0.5:
continue
if pos1[0] % shape[1] == 0 or pos1[1] % shape[0] == 0 or pos1[0] < 0 or pos1[1] < 0:
continue
if pos2[0] % shape[1] == 0 or pos2[1] % shape[0] == 0 or pos2[0] < 0 or pos2[1] < 0:
continue
cv2.line(self.im, pos1, pos2, [int(x) for x in self.limb_color[i]], thickness=2, lineType=cv2.LINE_AA)
if self.pil:
# Convert im back to PIL and update draw
self.fromarray(self.im)
def rectangle(self, xy, fill=None, outline=None, width=1):
"""Add rectangle to image (PIL-only)."""
self.draw.rectangle(xy, fill, outline, width)
def text(self, xy, text, txt_color=(255, 255, 255), anchor='top', box_style=False):
"""Adds text to an image using PIL or cv2."""
if anchor == 'bottom': # start y from font bottom
w, h = self.font.getsize(text) # text width, height
xy[1] += 1 - h
if self.pil:
if box_style:
w, h = self.font.getsize(text)
self.draw.rectangle((xy[0], xy[1], xy[0] + w + 1, xy[1] + h + 1), fill=txt_color)
# Using `txt_color` for background and draw fg with white color
txt_color = (255, 255, 255)
if '\n' in text:
lines = text.split('\n')
_, h = self.font.getsize(text)
for line in lines:
self.draw.text(xy, line, fill=txt_color, font=self.font)
xy[1] += h
else:
self.draw.text(xy, text, fill=txt_color, font=self.font)
else:
if box_style:
tf = max(self.lw - 1, 1) # font thickness
w, h = cv2.getTextSize(text, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height
outside = xy[1] - h >= 3
p2 = xy[0] + w, xy[1] - h - 3 if outside else xy[1] + h + 3
cv2.rectangle(self.im, xy, p2, txt_color, -1, cv2.LINE_AA) # filled
# Using `txt_color` for background and draw fg with white color
txt_color = (255, 255, 255)
tf = max(self.lw - 1, 1) # font thickness
cv2.putText(self.im, text, xy, 0, self.lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA)
def fromarray(self, im):
"""Update self.im from a numpy array."""
self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
self.draw = ImageDraw.Draw(self.im)
def result(self):
"""Return annotated image as array."""
return np.asarray(self.im)
@TryExcept() # known issue https://github.com/ultralytics/yolov5/issues/5395
@plt_settings()
def plot_labels(boxes, cls, names=(), save_dir=Path(''), on_plot=None):
"""Save and plot image with no axis or spines."""
import pandas as pd
import seaborn as sn
# Filter matplotlib>=3.7.2 warning
warnings.filterwarnings('ignore', category=UserWarning, message='The figure layout has changed to tight')
# Plot dataset labels
LOGGER.info(f"Plotting labels to {save_dir / 'labels.jpg'}... ")
b = boxes.transpose() # classes, boxes
nc = int(cls.max() + 1) # number of classes
x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height'])
# Seaborn correlogram
sn.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9))
plt.savefig(save_dir / 'labels_correlogram.jpg', dpi=200)
plt.close()
# Matplotlib labels
ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel()
y = ax[0].hist(cls, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
with contextlib.suppress(Exception): # color histogram bars by class
[y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)] # known issue #3195
ax[0].set_ylabel('instances')
if 0 < len(names) < 30:
ax[0].set_xticks(range(len(names)))
ax[0].set_xticklabels(list(names.values()), rotation=90, fontsize=10)
else:
ax[0].set_xlabel('classes')
sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9)
sn.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9)
# Rectangles
boxes[:, 0:2] = 0.5 # center
boxes = xywh2xyxy(boxes) * 1000
img = Image.fromarray(np.ones((1000, 1000, 3), dtype=np.uint8) * 255)
for cls, box in zip(cls[:500], boxes[:500]):
ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls)) # plot
ax[1].imshow(img)
ax[1].axis('off')
for a in [0, 1, 2, 3]:
for s in ['top', 'right', 'left', 'bottom']:
ax[a].spines[s].set_visible(False)
fname = save_dir / 'labels.jpg'
plt.savefig(fname, dpi=200)
plt.close()
if on_plot:
on_plot(fname)
def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, BGR=False, save=True):
"""Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop."""
b = xyxy2xywh(xyxy.view(-1, 4)) # boxes
if square:
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square
b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad
xyxy = xywh2xyxy(b).long()
clip_boxes(xyxy, im.shape)
crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
if save:
file.parent.mkdir(parents=True, exist_ok=True) # make directory
f = str(increment_path(file).with_suffix('.jpg'))
# cv2.imwrite(f, crop) # save BGR, https://github.com/ultralytics/yolov5/issues/7007 chroma subsampling issue
Image.fromarray(crop[..., ::-1]).save(f, quality=95, subsampling=0) # save RGB
return crop
@threaded
def plot_images(images,
batch_idx,
cls,
bboxes=np.zeros(0, dtype=np.float32),
masks=np.zeros(0, dtype=np.uint8),
kpts=np.zeros((0, 51), dtype=np.float32),
paths=None,
fname='images.jpg',
names=None,
on_plot=None):
"""Plot image grid with labels."""
if isinstance(images, torch.Tensor):
images = images.cpu().float().numpy()
if isinstance(cls, torch.Tensor):
cls = cls.cpu().numpy()
if isinstance(bboxes, torch.Tensor):
bboxes = bboxes.cpu().numpy()
if isinstance(masks, torch.Tensor):
masks = masks.cpu().numpy().astype(int)
if isinstance(kpts, torch.Tensor):
kpts = kpts.cpu().numpy()
if isinstance(batch_idx, torch.Tensor):
batch_idx = batch_idx.cpu().numpy()
max_size = 1920 # max image size
max_subplots = 16 # max image subplots, i.e. 4x4
bs, _, h, w = images.shape # batch size, _, height, width
bs = min(bs, max_subplots) # limit plot images
ns = np.ceil(bs ** 0.5) # number of subplots (square)
if np.max(images[0]) <= 1:
images *= 255 # de-normalise (optional)
# Build Image
mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
for i, im in enumerate(images):
if i == max_subplots: # if last batch has fewer images than we expect
break
x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
im = im.transpose(1, 2, 0)
mosaic[y:y + h, x:x + w, :] = im
# Resize (optional)
scale = max_size / ns / max(h, w)
if scale < 1:
h = math.ceil(scale * h)
w = math.ceil(scale * w)
mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
# Annotate
fs = int((h + w) * ns * 0.01) # font size
annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)
for i in range(i + 1):
x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders
if paths:
annotator.text((x + 5, y + 5), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames
if len(cls) > 0:
idx = batch_idx == i
classes = cls[idx].astype('int')
if len(bboxes):
boxes = xywh2xyxy(bboxes[idx, :4]).T
labels = bboxes.shape[1] == 4 # labels if no conf column
conf = None if labels else bboxes[idx, 4] # check for confidence presence (label vs pred)
if boxes.shape[1]:
if boxes.max() <= 1.01: # if normalized with tolerance 0.01
boxes[[0, 2]] *= w # scale to pixels
boxes[[1, 3]] *= h
elif scale < 1: # absolute coords need scale if image scales
boxes *= scale
boxes[[0, 2]] += x
boxes[[1, 3]] += y
for j, box in enumerate(boxes.T.tolist()):
c = classes[j]
color = colors(c)
c = names.get(c, c) if names else c
if labels or conf[j] > 0.25: # 0.25 conf thresh
label = f'{c}' if labels else f'{c} {conf[j]:.1f}'
annotator.box_label(box, label, color=color)
elif len(classes):
for c in classes:
color = colors(c)
c = names.get(c, c) if names else c
annotator.text((x, y), f'{c}', txt_color=color, box_style=True)
# Plot keypoints
if len(kpts):
kpts_ = kpts[idx].copy()
if len(kpts_):
if kpts_[..., 0].max() <= 1.01 or kpts_[..., 1].max() <= 1.01: # if normalized with tolerance .01
kpts_[..., 0] *= w # scale to pixels
kpts_[..., 1] *= h
elif scale < 1: # absolute coords need scale if image scales
kpts_ *= scale
kpts_[..., 0] += x
kpts_[..., 1] += y
for j in range(len(kpts_)):
if labels or conf[j] > 0.25: # 0.25 conf thresh
annotator.kpts(kpts_[j])
# Plot masks
if len(masks):
if idx.shape[0] == masks.shape[0]: # overlap_masks=False
image_masks = masks[idx]
else: # overlap_masks=True
image_masks = masks[[i]] # (1, 640, 640)
nl = idx.sum()
index = np.arange(nl).reshape((nl, 1, 1)) + 1
image_masks = np.repeat(image_masks, nl, axis=0)
image_masks = np.where(image_masks == index, 1.0, 0.0)
im = np.asarray(annotator.im).copy()
for j, box in enumerate(boxes.T.tolist()):
if labels or conf[j] > 0.25: # 0.25 conf thresh
color = colors(classes[j])
mh, mw = image_masks[j].shape
if mh != h or mw != w:
mask = image_masks[j].astype(np.uint8)
mask = cv2.resize(mask, (w, h))
mask = mask.astype(bool)
else:
mask = image_masks[j].astype(bool)
with contextlib.suppress(Exception):
im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6
annotator.fromarray(im)
annotator.im.save(fname) # save
if on_plot:
on_plot(fname)
@plt_settings()
def plot_results(file='path/to/results.csv', dir='', segment=False, pose=False, classify=False, on_plot=None):
"""Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')."""
import pandas as pd
save_dir = Path(file).parent if file else Path(dir)
if classify:
fig, ax = plt.subplots(2, 2, figsize=(6, 6), tight_layout=True)
index = [1, 4, 2, 3]
elif segment:
fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
index = [1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]
elif pose:
fig, ax = plt.subplots(2, 9, figsize=(21, 6), tight_layout=True)
index = [1, 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 16, 17, 18, 8, 9, 12, 13]
else:
fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
index = [1, 2, 3, 4, 5, 8, 9, 10, 6, 7]
ax = ax.ravel()
files = list(save_dir.glob('results*.csv'))
assert len(files), f'No results.csv files found in {save_dir.resolve()}, nothing to plot.'
for f in files:
try:
data = pd.read_csv(f)
s = [x.strip() for x in data.columns]
x = data.values[:, 0]
for i, j in enumerate(index):
y = data.values[:, j].astype('float')
# y[y == 0] = np.nan # don't show zero values
ax[i].plot(x, y, marker='.', label=f.stem, linewidth=2, markersize=8) # actual results
ax[i].plot(x, gaussian_filter1d(y, sigma=3), ':', label='smooth', linewidth=2) # smoothing line
ax[i].set_title(s[j], fontsize=12)
# if j in [8, 9, 10]: # share train and val loss y axes
# ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
except Exception as e:
LOGGER.warning(f'WARNING: Plotting error for {f}: {e}')
ax[1].legend()
fname = save_dir / 'results.png'
fig.savefig(fname, dpi=200)
plt.close()
if on_plot:
on_plot(fname)
def output_to_target(output, max_det=300):
"""Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting."""
targets = []
for i, o in enumerate(output):
box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1)
j = torch.full((conf.shape[0], 1), i)
targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1))
targets = torch.cat(targets, 0).numpy()
return targets[:, 0], targets[:, 1], targets[:, 2:]
def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')):
"""
Visualize feature maps of a given model module during inference.
Args:
x (torch.Tensor): Features to be visualized.
module_type (str): Module type.
stage (int): Module stage within the model.
n (int, optional): Maximum number of feature maps to plot. Defaults to 32.
save_dir (Path, optional): Directory to save results. Defaults to Path('runs/detect/exp').
"""
for m in ['Detect', 'Pose', 'Segment']:
if m in module_type:
return
batch, channels, height, width = x.shape # batch, channels, height, width
if height > 1 and width > 1:
f = save_dir / f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename
blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels
n = min(n, channels) # number of plots
fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols
ax = ax.ravel()
plt.subplots_adjust(wspace=0.05, hspace=0.05)
for i in range(n):
ax[i].imshow(blocks[i].squeeze()) # cmap='gray'
ax[i].axis('off')
LOGGER.info(f'Saving {f}... ({n}/{channels})')
plt.savefig(f, dpi=300, bbox_inches='tight')
plt.close()
np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy()) # npy save

276
ultralytics/utils/tal.py Normal file
View File

@ -0,0 +1,276 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import torch
import torch.nn as nn
from .checks import check_version
from .metrics import bbox_iou
TORCH_1_10 = check_version(torch.__version__, '1.10.0')
def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9):
"""select the positive anchor center in gt
Args:
xy_centers (Tensor): shape(h*w, 4)
gt_bboxes (Tensor): shape(b, n_boxes, 4)
Return:
(Tensor): shape(b, n_boxes, h*w)
"""
n_anchors = xy_centers.shape[0]
bs, n_boxes, _ = gt_bboxes.shape
lt, rb = gt_bboxes.view(-1, 1, 4).chunk(2, 2) # left-top, right-bottom
bbox_deltas = torch.cat((xy_centers[None] - lt, rb - xy_centers[None]), dim=2).view(bs, n_boxes, n_anchors, -1)
# return (bbox_deltas.min(3)[0] > eps).to(gt_bboxes.dtype)
return bbox_deltas.amin(3).gt_(eps)
def select_highest_overlaps(mask_pos, overlaps, n_max_boxes):
"""if an anchor box is assigned to multiple gts,
the one with the highest iou will be selected.
Args:
mask_pos (Tensor): shape(b, n_max_boxes, h*w)
overlaps (Tensor): shape(b, n_max_boxes, h*w)
Return:
target_gt_idx (Tensor): shape(b, h*w)
fg_mask (Tensor): shape(b, h*w)
mask_pos (Tensor): shape(b, n_max_boxes, h*w)
"""
# (b, n_max_boxes, h*w) -> (b, h*w)
fg_mask = mask_pos.sum(-2)
if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes
mask_multi_gts = (fg_mask.unsqueeze(1) > 1).expand(-1, n_max_boxes, -1) # (b, n_max_boxes, h*w)
max_overlaps_idx = overlaps.argmax(1) # (b, h*w)
is_max_overlaps = torch.zeros(mask_pos.shape, dtype=mask_pos.dtype, device=mask_pos.device)
is_max_overlaps.scatter_(1, max_overlaps_idx.unsqueeze(1), 1)
mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos).float() # (b, n_max_boxes, h*w)
fg_mask = mask_pos.sum(-2)
# Find each grid serve which gt(index)
target_gt_idx = mask_pos.argmax(-2) # (b, h*w)
return target_gt_idx, fg_mask, mask_pos
class TaskAlignedAssigner(nn.Module):
"""
A task-aligned assigner for object detection.
This class assigns ground-truth (gt) objects to anchors based on the task-aligned metric,
which combines both classification and localization information.
Attributes:
topk (int): The number of top candidates to consider.
num_classes (int): The number of object classes.
alpha (float): The alpha parameter for the classification component of the task-aligned metric.
beta (float): The beta parameter for the localization component of the task-aligned metric.
eps (float): A small value to prevent division by zero.
"""
def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1e-9):
"""Initialize a TaskAlignedAssigner object with customizable hyperparameters."""
super().__init__()
self.topk = topk
self.num_classes = num_classes
self.bg_idx = num_classes
self.alpha = alpha
self.beta = beta
self.eps = eps
@torch.no_grad()
def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt):
"""
Compute the task-aligned assignment.
Reference https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py
Args:
pd_scores (Tensor): shape(bs, num_total_anchors, num_classes)
pd_bboxes (Tensor): shape(bs, num_total_anchors, 4)
anc_points (Tensor): shape(num_total_anchors, 2)
gt_labels (Tensor): shape(bs, n_max_boxes, 1)
gt_bboxes (Tensor): shape(bs, n_max_boxes, 4)
mask_gt (Tensor): shape(bs, n_max_boxes, 1)
Returns:
target_labels (Tensor): shape(bs, num_total_anchors)
target_bboxes (Tensor): shape(bs, num_total_anchors, 4)
target_scores (Tensor): shape(bs, num_total_anchors, num_classes)
fg_mask (Tensor): shape(bs, num_total_anchors)
target_gt_idx (Tensor): shape(bs, num_total_anchors)
"""
self.bs = pd_scores.size(0)
self.n_max_boxes = gt_bboxes.size(1)
if self.n_max_boxes == 0:
device = gt_bboxes.device
return (torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), torch.zeros_like(pd_bboxes).to(device),
torch.zeros_like(pd_scores).to(device), torch.zeros_like(pd_scores[..., 0]).to(device),
torch.zeros_like(pd_scores[..., 0]).to(device))
mask_pos, align_metric, overlaps = self.get_pos_mask(pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points,
mask_gt)
target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes)
# Assigned target
target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask)
# Normalize
align_metric *= mask_pos
pos_align_metrics = align_metric.amax(axis=-1, keepdim=True) # b, max_num_obj
pos_overlaps = (overlaps * mask_pos).amax(axis=-1, keepdim=True) # b, max_num_obj
norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).amax(-2).unsqueeze(-1)
target_scores = target_scores * norm_align_metric
return target_labels, target_bboxes, target_scores, fg_mask.bool(), target_gt_idx
def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt):
"""Get in_gts mask, (b, max_num_obj, h*w)."""
mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes)
# Get anchor_align metric, (b, max_num_obj, h*w)
align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes, mask_in_gts * mask_gt)
# Get topk_metric mask, (b, max_num_obj, h*w)
mask_topk = self.select_topk_candidates(align_metric, topk_mask=mask_gt.expand(-1, -1, self.topk).bool())
# Merge all mask to a final mask, (b, max_num_obj, h*w)
mask_pos = mask_topk * mask_in_gts * mask_gt
return mask_pos, align_metric, overlaps
def get_box_metrics(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, mask_gt):
"""Compute alignment metric given predicted and ground truth bounding boxes."""
na = pd_bboxes.shape[-2]
mask_gt = mask_gt.bool() # b, max_num_obj, h*w
overlaps = torch.zeros([self.bs, self.n_max_boxes, na], dtype=pd_bboxes.dtype, device=pd_bboxes.device)
bbox_scores = torch.zeros([self.bs, self.n_max_boxes, na], dtype=pd_scores.dtype, device=pd_scores.device)
ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) # 2, b, max_num_obj
ind[0] = torch.arange(end=self.bs).view(-1, 1).expand(-1, self.n_max_boxes) # b, max_num_obj
ind[1] = gt_labels.squeeze(-1) # b, max_num_obj
# Get the scores of each grid for each gt cls
bbox_scores[mask_gt] = pd_scores[ind[0], :, ind[1]][mask_gt] # b, max_num_obj, h*w
# (b, max_num_obj, 1, 4), (b, 1, h*w, 4)
pd_boxes = pd_bboxes.unsqueeze(1).expand(-1, self.n_max_boxes, -1, -1)[mask_gt]
gt_boxes = gt_bboxes.unsqueeze(2).expand(-1, -1, na, -1)[mask_gt]
overlaps[mask_gt] = bbox_iou(gt_boxes, pd_boxes, xywh=False, CIoU=True).squeeze(-1).clamp_(0)
align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta)
return align_metric, overlaps
def select_topk_candidates(self, metrics, largest=True, topk_mask=None):
"""
Select the top-k candidates based on the given metrics.
Args:
metrics (Tensor): A tensor of shape (b, max_num_obj, h*w), where b is the batch size,
max_num_obj is the maximum number of objects, and h*w represents the
total number of anchor points.
largest (bool): If True, select the largest values; otherwise, select the smallest values.
topk_mask (Tensor): An optional boolean tensor of shape (b, max_num_obj, topk), where
topk is the number of top candidates to consider. If not provided,
the top-k values are automatically computed based on the given metrics.
Returns:
(Tensor): A tensor of shape (b, max_num_obj, h*w) containing the selected top-k candidates.
"""
# (b, max_num_obj, topk)
topk_metrics, topk_idxs = torch.topk(metrics, self.topk, dim=-1, largest=largest)
if topk_mask is None:
topk_mask = (topk_metrics.max(-1, keepdim=True)[0] > self.eps).expand_as(topk_idxs)
# (b, max_num_obj, topk)
topk_idxs.masked_fill_(~topk_mask, 0)
# (b, max_num_obj, topk, h*w) -> (b, max_num_obj, h*w)
count_tensor = torch.zeros(metrics.shape, dtype=torch.int8, device=topk_idxs.device)
ones = torch.ones_like(topk_idxs[:, :, :1], dtype=torch.int8, device=topk_idxs.device)
for k in range(self.topk):
# Expand topk_idxs for each value of k and add 1 at the specified positions
count_tensor.scatter_add_(-1, topk_idxs[:, :, k:k + 1], ones)
# count_tensor.scatter_add_(-1, topk_idxs, torch.ones_like(topk_idxs, dtype=torch.int8, device=topk_idxs.device))
# filter invalid bboxes
count_tensor.masked_fill_(count_tensor > 1, 0)
return count_tensor.to(metrics.dtype)
def get_targets(self, gt_labels, gt_bboxes, target_gt_idx, fg_mask):
"""
Compute target labels, target bounding boxes, and target scores for the positive anchor points.
Args:
gt_labels (Tensor): Ground truth labels of shape (b, max_num_obj, 1), where b is the
batch size and max_num_obj is the maximum number of objects.
gt_bboxes (Tensor): Ground truth bounding boxes of shape (b, max_num_obj, 4).
target_gt_idx (Tensor): Indices of the assigned ground truth objects for positive
anchor points, with shape (b, h*w), where h*w is the total
number of anchor points.
fg_mask (Tensor): A boolean tensor of shape (b, h*w) indicating the positive
(foreground) anchor points.
Returns:
(Tuple[Tensor, Tensor, Tensor]): A tuple containing the following tensors:
- target_labels (Tensor): Shape (b, h*w), containing the target labels for
positive anchor points.
- target_bboxes (Tensor): Shape (b, h*w, 4), containing the target bounding boxes
for positive anchor points.
- target_scores (Tensor): Shape (b, h*w, num_classes), containing the target scores
for positive anchor points, where num_classes is the number
of object classes.
"""
# Assigned target labels, (b, 1)
batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[..., None]
target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes # (b, h*w)
target_labels = gt_labels.long().flatten()[target_gt_idx] # (b, h*w)
# Assigned target boxes, (b, max_num_obj, 4) -> (b, h*w)
target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx]
# Assigned target scores
target_labels.clamp_(0)
# 10x faster than F.one_hot()
target_scores = torch.zeros((target_labels.shape[0], target_labels.shape[1], self.num_classes),
dtype=torch.int64,
device=target_labels.device) # (b, h*w, 80)
target_scores.scatter_(2, target_labels.unsqueeze(-1), 1)
fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes) # (b, h*w, 80)
target_scores = torch.where(fg_scores_mask > 0, target_scores, 0)
return target_labels, target_bboxes, target_scores
def make_anchors(feats, strides, grid_cell_offset=0.5):
"""Generate anchors from features."""
anchor_points, stride_tensor = [], []
assert feats is not None
dtype, device = feats[0].dtype, feats[0].device
for i, stride in enumerate(strides):
_, _, h, w = feats[i].shape
sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x
sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y
sy, sx = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx)
anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2))
stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device))
return torch.cat(anchor_points), torch.cat(stride_tensor)
def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
"""Transform distance(ltrb) to box(xywh or xyxy)."""
lt, rb = distance.chunk(2, dim)
x1y1 = anchor_points - lt
x2y2 = anchor_points + rb
if xywh:
c_xy = (x1y1 + x2y2) / 2
wh = x2y2 - x1y1
return torch.cat((c_xy, wh), dim) # xywh bbox
return torch.cat((x1y1, x2y2), dim) # xyxy bbox
def bbox2dist(anchor_points, bbox, reg_max):
"""Transform bbox(xyxy) to dist(ltrb)."""
x1y1, x2y2 = bbox.chunk(2, -1)
return torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1).clamp_(0, reg_max - 0.01) # dist (lt, rb)

View File

@ -0,0 +1,518 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import math
import os
import platform
import random
import time
from contextlib import contextmanager
from copy import deepcopy
from pathlib import Path
from typing import Union
import numpy as np
import torch
import torch.distributed as dist
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, RANK, __version__
from ultralytics.utils.checks import check_requirements, check_version
try:
import thop
except ImportError:
thop = None
TORCHVISION_0_10 = check_version(torchvision.__version__, '0.10.0')
TORCH_1_9 = check_version(torch.__version__, '1.9.0')
TORCH_1_11 = check_version(torch.__version__, '1.11.0')
TORCH_1_12 = check_version(torch.__version__, '1.12.0')
TORCH_2_0 = check_version(torch.__version__, minimum='2.0')
@contextmanager
def torch_distributed_zero_first(local_rank: int):
"""Decorator to make all processes in distributed training wait for each local_master to do something."""
initialized = torch.distributed.is_available() and torch.distributed.is_initialized()
if initialized and local_rank not in (-1, 0):
dist.barrier(device_ids=[local_rank])
yield
if initialized and local_rank == 0:
dist.barrier(device_ids=[0])
def smart_inference_mode():
"""Applies torch.inference_mode() decorator if torch>=1.9.0 else torch.no_grad() decorator."""
def decorate(fn):
"""Applies appropriate torch decorator for inference mode based on torch version."""
return (torch.inference_mode if TORCH_1_9 else torch.no_grad)()(fn)
return decorate
def get_cpu_info():
"""Return a string with system CPU information, i.e. 'Apple M2'."""
check_requirements('py-cpuinfo')
import cpuinfo # noqa
return cpuinfo.get_cpu_info()['brand_raw'].replace('(R)', '').replace('CPU ', '').replace('@ ', '')
def select_device(device='', batch=0, newline=False, verbose=True):
"""Selects PyTorch Device. Options are device = None or 'cpu' or 0 or '0' or '0,1,2,3'."""
s = f'Ultralytics YOLOv{__version__} 🚀 Python-{platform.python_version()} torch-{torch.__version__} '
device = str(device).lower()
for remove in 'cuda:', 'none', '(', ')', '[', ']', "'", ' ':
device = device.replace(remove, '') # to string, 'cuda:0' -> '0' and '(0, 1)' -> '0,1'
cpu = device == 'cpu'
mps = device == 'mps' # Apple Metal Performance Shaders (MPS)
if cpu or mps:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False
elif device: # non-cpu device requested
if device == 'cuda':
device = '0'
visible = os.environ.get('CUDA_VISIBLE_DEVICES', None)
os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable - must be before assert is_available()
if not (torch.cuda.is_available() and torch.cuda.device_count() >= len(device.replace(',', ''))):
LOGGER.info(s)
install = 'See https://pytorch.org/get-started/locally/ for up-to-date torch install instructions if no ' \
'CUDA devices are seen by torch.\n' if torch.cuda.device_count() == 0 else ''
raise ValueError(f"Invalid CUDA 'device={device}' requested."
f" Use 'device=cpu' or pass valid CUDA device(s) if available,"
f" i.e. 'device=0' or 'device=0,1,2,3' for Multi-GPU.\n"
f'\ntorch.cuda.is_available(): {torch.cuda.is_available()}'
f'\ntorch.cuda.device_count(): {torch.cuda.device_count()}'
f"\nos.environ['CUDA_VISIBLE_DEVICES']: {visible}\n"
f'{install}')
if not cpu and not mps and torch.cuda.is_available(): # prefer GPU if available
devices = device.split(',') if device else '0' # range(torch.cuda.device_count()) # i.e. 0,1,6,7
n = len(devices) # device count
if n > 1 and batch > 0 and batch % n != 0: # check batch_size is divisible by device_count
raise ValueError(f"'batch={batch}' must be a multiple of GPU count {n}. Try 'batch={batch // n * n}' or "
f"'batch={batch // n * n + n}', the nearest batch sizes evenly divisible by {n}.")
space = ' ' * (len(s) + 1)
for i, d in enumerate(devices):
p = torch.cuda.get_device_properties(i)
s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB
arg = 'cuda:0'
elif mps and getattr(torch, 'has_mps', False) and torch.backends.mps.is_available() and TORCH_2_0:
# Prefer MPS if available
s += f'MPS ({get_cpu_info()})\n'
arg = 'mps'
else: # revert to CPU
s += f'CPU ({get_cpu_info()})\n'
arg = 'cpu'
if verbose and RANK == -1:
LOGGER.info(s if newline else s.rstrip())
return torch.device(arg)
def time_sync():
"""PyTorch-accurate time."""
if torch.cuda.is_available():
torch.cuda.synchronize()
return time.time()
def fuse_conv_and_bn(conv, bn):
"""Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/."""
fusedconv = nn.Conv2d(conv.in_channels,
conv.out_channels,
kernel_size=conv.kernel_size,
stride=conv.stride,
padding=conv.padding,
dilation=conv.dilation,
groups=conv.groups,
bias=True).requires_grad_(False).to(conv.weight.device)
# Prepare filters
w_conv = conv.weight.clone().view(conv.out_channels, -1)
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
# Prepare spatial bias
b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
return fusedconv
def fuse_deconv_and_bn(deconv, bn):
"""Fuse ConvTranspose2d() and BatchNorm2d() layers."""
fuseddconv = nn.ConvTranspose2d(deconv.in_channels,
deconv.out_channels,
kernel_size=deconv.kernel_size,
stride=deconv.stride,
padding=deconv.padding,
output_padding=deconv.output_padding,
dilation=deconv.dilation,
groups=deconv.groups,
bias=True).requires_grad_(False).to(deconv.weight.device)
# Prepare filters
w_deconv = deconv.weight.clone().view(deconv.out_channels, -1)
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
fuseddconv.weight.copy_(torch.mm(w_bn, w_deconv).view(fuseddconv.weight.shape))
# Prepare spatial bias
b_conv = torch.zeros(deconv.weight.size(1), device=deconv.weight.device) if deconv.bias is None else deconv.bias
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
fuseddconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
return fuseddconv
def model_info(model, detailed=False, verbose=True, imgsz=640):
"""Model information. imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320]."""
if not verbose:
return
n_p = get_num_params(model) # number of parameters
n_g = get_num_gradients(model) # number of gradients
n_l = len(list(model.modules())) # number of layers
if detailed:
LOGGER.info(
f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}")
for i, (name, p) in enumerate(model.named_parameters()):
name = name.replace('module_list.', '')
LOGGER.info('%5g %40s %9s %12g %20s %10.3g %10.3g %10s' %
(i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std(), p.dtype))
flops = get_flops(model, imgsz)
fused = ' (fused)' if getattr(model, 'is_fused', lambda: False)() else ''
fs = f', {flops:.1f} GFLOPs' if flops else ''
yaml_file = getattr(model, 'yaml_file', '') or getattr(model, 'yaml', {}).get('yaml_file', '')
model_name = Path(yaml_file).stem.replace('yolo', 'YOLO') or 'Model'
LOGGER.info(f'{model_name} summary{fused}: {n_l} layers, {n_p} parameters, {n_g} gradients{fs}')
return n_l, n_p, n_g, flops
def get_num_params(model):
"""Return the total number of parameters in a YOLO model."""
return sum(x.numel() for x in model.parameters())
def get_num_gradients(model):
"""Return the total number of parameters with gradients in a YOLO model."""
return sum(x.numel() for x in model.parameters() if x.requires_grad)
def model_info_for_loggers(trainer):
"""
Return model info dict with useful model information.
Example for YOLOv8n:
{'model/parameters': 3151904,
'model/GFLOPs': 8.746,
'model/speed_ONNX(ms)': 41.244,
'model/speed_TensorRT(ms)': 3.211,
'model/speed_PyTorch(ms)': 18.755}
"""
if trainer.args.profile: # profile ONNX and TensorRT times
from ultralytics.utils.benchmarks import ProfileModels
results = ProfileModels([trainer.last], device=trainer.device).profile()[0]
results.pop('model/name')
else: # only return PyTorch times from most recent validation
results = {
'model/parameters': get_num_params(trainer.model),
'model/GFLOPs': round(get_flops(trainer.model), 3)}
results['model/speed_PyTorch(ms)'] = round(trainer.validator.speed['inference'], 3)
return results
def get_flops(model, imgsz=640):
"""Return a YOLO model's FLOPs."""
try:
model = de_parallel(model)
p = next(model.parameters())
stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32 # max stride
im = torch.empty((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format
flops = thop.profile(deepcopy(model), inputs=[im], verbose=False)[0] / 1E9 * 2 if thop else 0 # stride GFLOPs
imgsz = imgsz if isinstance(imgsz, list) else [imgsz, imgsz] # expand if int/float
return flops * imgsz[0] / stride * imgsz[1] / stride # 640x640 GFLOPs
except Exception:
return 0
def get_flops_with_torch_profiler(model, imgsz=640):
"""Compute model FLOPs (thop alternative)."""
model = de_parallel(model)
p = next(model.parameters())
stride = (max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32) * 2 # max stride
im = torch.zeros((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format
with torch.profiler.profile(with_flops=True) as prof:
model(im)
flops = sum(x.flops for x in prof.key_averages()) / 1E9
imgsz = imgsz if isinstance(imgsz, list) else [imgsz, imgsz] # expand if int/float
flops = flops * imgsz[0] / stride * imgsz[1] / stride # 640x640 GFLOPs
return flops
def initialize_weights(model):
"""Initialize model weights to random values."""
for m in model.modules():
t = type(m)
if t is nn.Conv2d:
pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif t is nn.BatchNorm2d:
m.eps = 1e-3
m.momentum = 0.03
elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
m.inplace = True
def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416)
# Scales img(bs,3,y,x) by ratio constrained to gs-multiple
if ratio == 1.0:
return img
h, w = img.shape[2:]
s = (int(h * ratio), int(w * ratio)) # new size
img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize
if not same_shape: # pad/crop img
h, w = (math.ceil(x * ratio / gs) * gs for x in (h, w))
return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean
def make_divisible(x, divisor):
"""Returns nearest x divisible by divisor."""
if isinstance(divisor, torch.Tensor):
divisor = int(divisor.max()) # to int
return math.ceil(x / divisor) * divisor
def copy_attr(a, b, include=(), exclude=()):
"""Copies attributes from object 'b' to object 'a', with options to include/exclude certain attributes."""
for k, v in b.__dict__.items():
if (len(include) and k not in include) or k.startswith('_') or k in exclude:
continue
else:
setattr(a, k, v)
def get_latest_opset():
"""Return second-most (for maturity) recently supported ONNX opset by this version of torch."""
return max(int(k[14:]) for k in vars(torch.onnx) if 'symbolic_opset' in k) - 1 # opset
def intersect_dicts(da, db, exclude=()):
"""Returns a dictionary of intersecting keys with matching shapes, excluding 'exclude' keys, using da values."""
return {k: v for k, v in da.items() if k in db and all(x not in k for x in exclude) and v.shape == db[k].shape}
def is_parallel(model):
"""Returns True if model is of type DP or DDP."""
return isinstance(model, (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel))
def de_parallel(model):
"""De-parallelize a model: returns single-GPU model if model is of type DP or DDP."""
return model.module if is_parallel(model) else model
def one_cycle(y1=0.0, y2=1.0, steps=100):
"""Returns a lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf."""
return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1
def init_seeds(seed=0, deterministic=False):
"""Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html."""
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) # for Multi-GPU, exception safe
# torch.backends.cudnn.benchmark = True # AutoBatch problem https://github.com/ultralytics/yolov5/issues/9287
if deterministic:
if TORCH_2_0:
torch.use_deterministic_algorithms(True, warn_only=True) # warn if deterministic is not possible
torch.backends.cudnn.deterministic = True
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
os.environ['PYTHONHASHSEED'] = str(seed)
else:
LOGGER.warning('WARNING ⚠️ Upgrade to torch>=2.0.0 for deterministic training.')
else:
torch.use_deterministic_algorithms(False)
torch.backends.cudnn.deterministic = False
class ModelEMA:
"""Updated Exponential Moving Average (EMA) from https://github.com/rwightman/pytorch-image-models
Keeps a moving average of everything in the model state_dict (parameters and buffers)
For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
To disable EMA set the `enabled` attribute to `False`.
"""
def __init__(self, model, decay=0.9999, tau=2000, updates=0):
"""Create EMA."""
self.ema = deepcopy(de_parallel(model)).eval() # FP32 EMA
self.updates = updates # number of EMA updates
self.decay = lambda x: decay * (1 - math.exp(-x / tau)) # decay exponential ramp (to help early epochs)
for p in self.ema.parameters():
p.requires_grad_(False)
self.enabled = True
def update(self, model):
"""Update EMA parameters."""
if self.enabled:
self.updates += 1
d = self.decay(self.updates)
msd = de_parallel(model).state_dict() # model state_dict
for k, v in self.ema.state_dict().items():
if v.dtype.is_floating_point: # true for FP16 and FP32
v *= d
v += (1 - d) * msd[k].detach()
# assert v.dtype == msd[k].dtype == torch.float32, f'{k}: EMA {v.dtype}, model {msd[k].dtype}'
def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
"""Updates attributes and saves stripped model with optimizer removed."""
if self.enabled:
copy_attr(self.ema, model, include, exclude)
def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None:
"""
Strip optimizer from 'f' to finalize training, optionally save as 's'.
Args:
f (str): file path to model to strip the optimizer from. Default is 'best.pt'.
s (str): file path to save the model with stripped optimizer to. If not provided, 'f' will be overwritten.
Returns:
None
Usage:
from pathlib import Path
from ultralytics.utils.torch_utils import strip_optimizer
for f in Path('/Users/glennjocher/Downloads/weights').rglob('*.pt'):
strip_optimizer(f)
"""
# Use dill (if exists) to serialize the lambda functions where pickle does not do this
try:
import dill as pickle
except ImportError:
import pickle
x = torch.load(f, map_location=torch.device('cpu'))
if 'model' not in x:
LOGGER.info(f'Skipping {f}, not a valid Ultralytics model.')
return
if hasattr(x['model'], 'args'):
x['model'].args = dict(x['model'].args) # convert from IterableSimpleNamespace to dict
args = {**DEFAULT_CFG_DICT, **x['train_args']} if 'train_args' in x else None # combine args
if x.get('ema'):
x['model'] = x['ema'] # replace model with ema
for k in 'optimizer', 'best_fitness', 'ema', 'updates': # keys
x[k] = None
x['epoch'] = -1
x['model'].half() # to FP16
for p in x['model'].parameters():
p.requires_grad = False
x['train_args'] = {k: v for k, v in args.items() if k in DEFAULT_CFG_KEYS} # strip non-default keys
# x['model'].args = x['train_args']
torch.save(x, s or f, pickle_module=pickle)
mb = os.path.getsize(s or f) / 1E6 # filesize
LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB")
def profile(input, ops, n=10, device=None):
"""
YOLOv8 speed/memory/FLOPs profiler
Usage:
input = torch.randn(16, 3, 640, 640)
m1 = lambda x: x * torch.sigmoid(x)
m2 = nn.SiLU()
profile(input, [m1, m2], n=100) # profile over 100 iterations
"""
results = []
if not isinstance(device, torch.device):
device = select_device(device)
LOGGER.info(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}"
f"{'input':>24s}{'output':>24s}")
for x in input if isinstance(input, list) else [input]:
x = x.to(device)
x.requires_grad = True
for m in ops if isinstance(ops, list) else [ops]:
m = m.to(device) if hasattr(m, 'to') else m # device
m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m
tf, tb, t = 0, 0, [0, 0, 0] # dt forward, backward
try:
flops = thop.profile(m, inputs=[x], verbose=False)[0] / 1E9 * 2 if thop else 0 # GFLOPs
except Exception:
flops = 0
try:
for _ in range(n):
t[0] = time_sync()
y = m(x)
t[1] = time_sync()
try:
_ = (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward()
t[2] = time_sync()
except Exception: # no backward method
# print(e) # for debug
t[2] = float('nan')
tf += (t[1] - t[0]) * 1000 / n # ms per op forward
tb += (t[2] - t[1]) * 1000 / n # ms per op backward
mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0 # (GB)
s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' for x in (x, y)) # shapes
p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0 # parameters
LOGGER.info(f'{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}')
results.append([p, flops, mem, tf, tb, s_in, s_out])
except Exception as e:
LOGGER.info(e)
results.append(None)
torch.cuda.empty_cache()
return results
class EarlyStopping:
"""
Early stopping class that stops training when a specified number of epochs have passed without improvement.
"""
def __init__(self, patience=50):
"""
Initialize early stopping object
Args:
patience (int, optional): Number of epochs to wait after fitness stops improving before stopping.
"""
self.best_fitness = 0.0 # i.e. mAP
self.best_epoch = 0
self.patience = patience or float('inf') # epochs to wait after fitness stops improving to stop
self.possible_stop = False # possible stop may occur next epoch
def __call__(self, epoch, fitness):
"""
Check whether to stop training
Args:
epoch (int): Current epoch of training
fitness (float): Fitness value of current epoch
Returns:
(bool): True if training should stop, False otherwise
"""
if fitness is None: # check if fitness=None (happens when val=False)
return False
if fitness >= self.best_fitness: # >= 0 to allow for early zero-fitness stage of training
self.best_epoch = epoch
self.best_fitness = fitness
delta = epoch - self.best_epoch # epochs without improvement
self.possible_stop = delta >= (self.patience - 1) # possible stop may occur next epoch
stop = delta >= self.patience # stop training if patience exceeded
if stop:
LOGGER.info(f'Stopping training early as no improvement observed in last {self.patience} epochs. '
f'Best results observed at epoch {self.best_epoch}, best model saved as best.pt.\n'
f'To update EarlyStopping(patience={self.patience}) pass a new patience value, '
f'i.e. `patience=300` or use `patience=0` to disable EarlyStopping.')
return stop

120
ultralytics/utils/tuner.py Normal file
View File

@ -0,0 +1,120 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from ultralytics.cfg import TASK2DATA, TASK2METRIC
from ultralytics.utils import DEFAULT_CFG_DICT, LOGGER, NUM_THREADS
def run_ray_tune(model,
space: dict = None,
grace_period: int = 10,
gpu_per_trial: int = None,
max_samples: int = 10,
**train_args):
"""
Runs hyperparameter tuning using Ray Tune.
Args:
model (YOLO): Model to run the tuner on.
space (dict, optional): The hyperparameter search space. Defaults to None.
grace_period (int, optional): The grace period in epochs of the ASHA scheduler. Defaults to 10.
gpu_per_trial (int, optional): The number of GPUs to allocate per trial. Defaults to None.
max_samples (int, optional): The maximum number of trials to run. Defaults to 10.
train_args (dict, optional): Additional arguments to pass to the `train()` method. Defaults to {}.
Returns:
(dict): A dictionary containing the results of the hyperparameter search.
Raises:
ModuleNotFoundError: If Ray Tune is not installed.
"""
if train_args is None:
train_args = {}
try:
from ray import tune
from ray.air import RunConfig
from ray.air.integrations.wandb import WandbLoggerCallback
from ray.tune.schedulers import ASHAScheduler
except ImportError:
raise ModuleNotFoundError("Tuning hyperparameters requires Ray Tune. Install with: pip install 'ray[tune]'")
try:
import wandb
assert hasattr(wandb, '__version__')
except (ImportError, AssertionError):
wandb = False
default_space = {
# 'optimizer': tune.choice(['SGD', 'Adam', 'AdamW', 'NAdam', 'RAdam', 'RMSProp']),
'lr0': tune.uniform(1e-5, 1e-1),
'lrf': tune.uniform(0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
'momentum': tune.uniform(0.6, 0.98), # SGD momentum/Adam beta1
'weight_decay': tune.uniform(0.0, 0.001), # optimizer weight decay 5e-4
'warmup_epochs': tune.uniform(0.0, 5.0), # warmup epochs (fractions ok)
'warmup_momentum': tune.uniform(0.0, 0.95), # warmup initial momentum
'box': tune.uniform(0.02, 0.2), # box loss gain
'cls': tune.uniform(0.2, 4.0), # cls loss gain (scale with pixels)
'hsv_h': tune.uniform(0.0, 0.1), # image HSV-Hue augmentation (fraction)
'hsv_s': tune.uniform(0.0, 0.9), # image HSV-Saturation augmentation (fraction)
'hsv_v': tune.uniform(0.0, 0.9), # image HSV-Value augmentation (fraction)
'degrees': tune.uniform(0.0, 45.0), # image rotation (+/- deg)
'translate': tune.uniform(0.0, 0.9), # image translation (+/- fraction)
'scale': tune.uniform(0.0, 0.9), # image scale (+/- gain)
'shear': tune.uniform(0.0, 10.0), # image shear (+/- deg)
'perspective': tune.uniform(0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
'flipud': tune.uniform(0.0, 1.0), # image flip up-down (probability)
'fliplr': tune.uniform(0.0, 1.0), # image flip left-right (probability)
'mosaic': tune.uniform(0.0, 1.0), # image mixup (probability)
'mixup': tune.uniform(0.0, 1.0), # image mixup (probability)
'copy_paste': tune.uniform(0.0, 1.0)} # segment copy-paste (probability)
def _tune(config):
"""
Trains the YOLO model with the specified hyperparameters and additional arguments.
Args:
config (dict): A dictionary of hyperparameters to use for training.
Returns:
None.
"""
model._reset_callbacks()
config.update(train_args)
model.train(**config)
# Get search space
if not space:
space = default_space
LOGGER.warning('WARNING ⚠️ search space not provided, using default search space.')
# Get dataset
data = train_args.get('data', TASK2DATA[model.task])
space['data'] = data
if 'data' not in train_args:
LOGGER.warning(f'WARNING ⚠️ data not provided, using default "data={data}".')
# Define the trainable function with allocated resources
trainable_with_resources = tune.with_resources(_tune, {'cpu': NUM_THREADS, 'gpu': gpu_per_trial or 0})
# Define the ASHA scheduler for hyperparameter search
asha_scheduler = ASHAScheduler(time_attr='epoch',
metric=TASK2METRIC[model.task],
mode='max',
max_t=train_args.get('epochs') or DEFAULT_CFG_DICT['epochs'] or 100,
grace_period=grace_period,
reduction_factor=3)
# Define the callbacks for the hyperparameter search
tuner_callbacks = [WandbLoggerCallback(project='YOLOv8-tune')] if wandb else []
# Create the Ray Tune hyperparameter search tuner
tuner = tune.Tuner(trainable_with_resources,
param_space=space,
tune_config=tune.TuneConfig(scheduler=asha_scheduler, num_samples=max_samples),
run_config=RunConfig(callbacks=tuner_callbacks, storage_path='./runs/tune'))
# Run the hyperparameter search
tuner.fit()
# Return the results of the hyperparameter search
return tuner.get_results()