ultralytics 8.0.136 refactor and simplify package (#3748)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
2023-07-16 23:47:45 +08:00
parent 8ebe94d1e9
commit 620f3eb218
383 changed files with 4213 additions and 4646 deletions
--- a/ultralytics/utils/callbacks/init.py
+++ b/ultralytics/utils/callbacks/init.py
@ -0,0 +1,5 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+from .base import add_integration_callbacks, default_callbacks, get_default_callbacks
+
+__all__ = 'add_integration_callbacks', 'default_callbacks', 'get_default_callbacks'
--- a/ultralytics/utils/callbacks/base.py
+++ b/ultralytics/utils/callbacks/base.py
@ -0,0 +1,212 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+"""
+Base callbacks
+"""
+
+from collections import defaultdict
+from copy import deepcopy
+
+# Trainer callbacks ----------------------------------------------------------------------------------------------------
+
+
+def on_pretrain_routine_start(trainer):
+    """Called before the pretraining routine starts."""
+    pass
+
+
+def on_pretrain_routine_end(trainer):
+    """Called after the pretraining routine ends."""
+    pass
+
+
+def on_train_start(trainer):
+    """Called when the training starts."""
+    pass
+
+
+def on_train_epoch_start(trainer):
+    """Called at the start of each training epoch."""
+    pass
+
+
+def on_train_batch_start(trainer):
+    """Called at the start of each training batch."""
+    pass
+
+
+def optimizer_step(trainer):
+    """Called when the optimizer takes a step."""
+    pass
+
+
+def on_before_zero_grad(trainer):
+    """Called before the gradients are set to zero."""
+    pass
+
+
+def on_train_batch_end(trainer):
+    """Called at the end of each training batch."""
+    pass
+
+
+def on_train_epoch_end(trainer):
+    """Called at the end of each training epoch."""
+    pass
+
+
+def on_fit_epoch_end(trainer):
+    """Called at the end of each fit epoch (train + val)."""
+    pass
+
+
+def on_model_save(trainer):
+    """Called when the model is saved."""
+    pass
+
+
+def on_train_end(trainer):
+    """Called when the training ends."""
+    pass
+
+
+def on_params_update(trainer):
+    """Called when the model parameters are updated."""
+    pass
+
+
+def teardown(trainer):
+    """Called during the teardown of the training process."""
+    pass
+
+
+# Validator callbacks --------------------------------------------------------------------------------------------------
+
+
+def on_val_start(validator):
+    """Called when the validation starts."""
+    pass
+
+
+def on_val_batch_start(validator):
+    """Called at the start of each validation batch."""
+    pass
+
+
+def on_val_batch_end(validator):
+    """Called at the end of each validation batch."""
+    pass
+
+
+def on_val_end(validator):
+    """Called when the validation ends."""
+    pass
+
+
+# Predictor callbacks --------------------------------------------------------------------------------------------------
+
+
+def on_predict_start(predictor):
+    """Called when the prediction starts."""
+    pass
+
+
+def on_predict_batch_start(predictor):
+    """Called at the start of each prediction batch."""
+    pass
+
+
+def on_predict_batch_end(predictor):
+    """Called at the end of each prediction batch."""
+    pass
+
+
+def on_predict_postprocess_end(predictor):
+    """Called after the post-processing of the prediction ends."""
+    pass
+
+
+def on_predict_end(predictor):
+    """Called when the prediction ends."""
+    pass
+
+
+# Exporter callbacks ---------------------------------------------------------------------------------------------------
+
+
+def on_export_start(exporter):
+    """Called when the model export starts."""
+    pass
+
+
+def on_export_end(exporter):
+    """Called when the model export ends."""
+    pass
+
+
+default_callbacks = {
+    # Run in trainer
+    'on_pretrain_routine_start': [on_pretrain_routine_start],
+    'on_pretrain_routine_end': [on_pretrain_routine_end],
+    'on_train_start': [on_train_start],
+    'on_train_epoch_start': [on_train_epoch_start],
+    'on_train_batch_start': [on_train_batch_start],
+    'optimizer_step': [optimizer_step],
+    'on_before_zero_grad': [on_before_zero_grad],
+    'on_train_batch_end': [on_train_batch_end],
+    'on_train_epoch_end': [on_train_epoch_end],
+    'on_fit_epoch_end': [on_fit_epoch_end],  # fit = train + val
+    'on_model_save': [on_model_save],
+    'on_train_end': [on_train_end],
+    'on_params_update': [on_params_update],
+    'teardown': [teardown],
+
+    # Run in validator
+    'on_val_start': [on_val_start],
+    'on_val_batch_start': [on_val_batch_start],
+    'on_val_batch_end': [on_val_batch_end],
+    'on_val_end': [on_val_end],
+
+    # Run in predictor
+    'on_predict_start': [on_predict_start],
+    'on_predict_batch_start': [on_predict_batch_start],
+    'on_predict_postprocess_end': [on_predict_postprocess_end],
+    'on_predict_batch_end': [on_predict_batch_end],
+    'on_predict_end': [on_predict_end],
+
+    # Run in exporter
+    'on_export_start': [on_export_start],
+    'on_export_end': [on_export_end]}
+
+
+def get_default_callbacks():
+    """
+    Return a copy of the default_callbacks dictionary with lists as default values.
+
+    Returns:
+        (defaultdict): A defaultdict with keys from default_callbacks and empty lists as default values.
+    """
+    return defaultdict(list, deepcopy(default_callbacks))
+
+
+def add_integration_callbacks(instance):
+    """
+    Add integration callbacks from various sources to the instance's callbacks.
+
+    Args:
+        instance (Trainer, Predictor, Validator, Exporter): An object with a 'callbacks' attribute that is a dictionary
+            of callback lists.
+    """
+    from .clearml import callbacks as clearml_cb
+    from .comet import callbacks as comet_cb
+    from .dvc import callbacks as dvc_cb
+    from .hub import callbacks as hub_cb
+    from .mlflow import callbacks as mlflow_cb
+    from .neptune import callbacks as neptune_cb
+    from .raytune import callbacks as tune_cb
+    from .tensorboard import callbacks as tensorboard_cb
+    from .wb import callbacks as wb_cb
+
+    for x in clearml_cb, comet_cb, hub_cb, mlflow_cb, neptune_cb, tune_cb, tensorboard_cb, wb_cb, dvc_cb:
+        for k, v in x.items():
+            if v not in instance.callbacks[k]:  # prevent duplicate callbacks addition
+                instance.callbacks[k].append(v)  # callback[name].append(func)
--- a/ultralytics/utils/callbacks/clearml.py
+++ b/ultralytics/utils/callbacks/clearml.py
@ -0,0 +1,143 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+import re
+
+import matplotlib.image as mpimg
+import matplotlib.pyplot as plt
+
+from ultralytics.utils import LOGGER, TESTS_RUNNING
+from ultralytics.utils.torch_utils import model_info_for_loggers
+
+try:
+    import clearml
+    from clearml import Task
+    from clearml.binding.frameworks.pytorch_bind import PatchPyTorchModelIO
+    from clearml.binding.matplotlib_bind import PatchedMatplotlib
+
+    assert hasattr(clearml, '__version__')  # verify package is not directory
+    assert not TESTS_RUNNING  # do not log pytest
+except (ImportError, AssertionError):
+    clearml = None
+
+
+def _log_debug_samples(files, title='Debug Samples') -> None:
+    """
+    Log files (images) as debug samples in the ClearML task.
+
+    Args:
+        files (list): A list of file paths in PosixPath format.
+        title (str): A title that groups together images with the same values.
+    """
+    task = Task.current_task()
+    if task:
+        for f in files:
+            if f.exists():
+                it = re.search(r'_batch(\d+)', f.name)
+                iteration = int(it.groups()[0]) if it else 0
+                task.get_logger().report_image(title=title,
+                                               series=f.name.replace(it.group(), ''),
+                                               local_path=str(f),
+                                               iteration=iteration)
+
+
+def _log_plot(title, plot_path) -> None:
+    """
+    Log an image as a plot in the plot section of ClearML.
+
+    Args:
+        title (str): The title of the plot.
+        plot_path (str): The path to the saved image file.
+    """
+    img = mpimg.imread(plot_path)
+    fig = plt.figure()
+    ax = fig.add_axes([0, 0, 1, 1], frameon=False, aspect='auto', xticks=[], yticks=[])  # no ticks
+    ax.imshow(img)
+
+    Task.current_task().get_logger().report_matplotlib_figure(title=title,
+                                                              series='',
+                                                              figure=fig,
+                                                              report_interactive=False)
+
+
+def on_pretrain_routine_start(trainer):
+    """Runs at start of pretraining routine; initializes and connects/ logs task to ClearML."""
+    try:
+        task = Task.current_task()
+        if task:
+            # Make sure the automatic pytorch and matplotlib bindings are disabled!
+            # We are logging these plots and model files manually in the integration
+            PatchPyTorchModelIO.update_current_task(None)
+            PatchedMatplotlib.update_current_task(None)
+        else:
+            task = Task.init(project_name=trainer.args.project or 'YOLOv8',
+                             task_name=trainer.args.name,
+                             tags=['YOLOv8'],
+                             output_uri=True,
+                             reuse_last_task_id=False,
+                             auto_connect_frameworks={
+                                 'pytorch': False,
+                                 'matplotlib': False})
+            LOGGER.warning('ClearML Initialized a new task. If you want to run remotely, '
+                           'please add clearml-init and connect your arguments before initializing YOLO.')
+        task.connect(vars(trainer.args), name='General')
+    except Exception as e:
+        LOGGER.warning(f'WARNING ⚠️ ClearML installed but not initialized correctly, not logging this run. {e}')
+
+
+def on_train_epoch_end(trainer):
+    task = Task.current_task()
+
+    if task:
+        """Logs debug samples for the first epoch of YOLO training."""
+        if trainer.epoch == 1:
+            _log_debug_samples(sorted(trainer.save_dir.glob('train_batch*.jpg')), 'Mosaic')
+        """Report the current training progress."""
+        for k, v in trainer.validator.metrics.results_dict.items():
+            task.get_logger().report_scalar('train', k, v, iteration=trainer.epoch)
+
+
+def on_fit_epoch_end(trainer):
+    """Reports model information to logger at the end of an epoch."""
+    task = Task.current_task()
+    if task:
+        # You should have access to the validation bboxes under jdict
+        task.get_logger().report_scalar(title='Epoch Time',
+                                        series='Epoch Time',
+                                        value=trainer.epoch_time,
+                                        iteration=trainer.epoch)
+        if trainer.epoch == 0:
+            for k, v in model_info_for_loggers(trainer).items():
+                task.get_logger().report_single_value(k, v)
+
+
+def on_val_end(validator):
+    """Logs validation results including labels and predictions."""
+    if Task.current_task():
+        # Log val_labels and val_pred
+        _log_debug_samples(sorted(validator.save_dir.glob('val*.jpg')), 'Validation')
+
+
+def on_train_end(trainer):
+    """Logs final model and its name on training completion."""
+    task = Task.current_task()
+    if task:
+        # Log final results, CM matrix + PR plots
+        files = [
+            'results.png', 'confusion_matrix.png', 'confusion_matrix_normalized.png',
+            *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
+        files = [(trainer.save_dir / f) for f in files if (trainer.save_dir / f).exists()]  # filter
+        for f in files:
+            _log_plot(title=f.stem, plot_path=f)
+        # Report final metrics
+        for k, v in trainer.validator.metrics.results_dict.items():
+            task.get_logger().report_single_value(k, v)
+        # Log the final model
+        task.update_output_model(model_path=str(trainer.best), model_name=trainer.args.name, auto_delete_file=False)
+
+
+callbacks = {
+    'on_pretrain_routine_start': on_pretrain_routine_start,
+    'on_train_epoch_end': on_train_epoch_end,
+    'on_fit_epoch_end': on_fit_epoch_end,
+    'on_val_end': on_val_end,
+    'on_train_end': on_train_end} if clearml else {}
--- a/ultralytics/utils/callbacks/comet.py
+++ b/ultralytics/utils/callbacks/comet.py
@ -0,0 +1,368 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+import os
+from pathlib import Path
+
+from ultralytics.utils import LOGGER, RANK, TESTS_RUNNING, ops
+from ultralytics.utils.torch_utils import model_info_for_loggers
+
+try:
+    import comet_ml
+
+    assert not TESTS_RUNNING  # do not log pytest
+    assert hasattr(comet_ml, '__version__')  # verify package is not directory
+except (ImportError, AssertionError):
+    comet_ml = None
+
+# Ensures certain logging functions only run for supported tasks
+COMET_SUPPORTED_TASKS = ['detect']
+
+# Names of plots created by YOLOv8 that are logged to Comet
+EVALUATION_PLOT_NAMES = 'F1_curve', 'P_curve', 'R_curve', 'PR_curve', 'confusion_matrix'
+LABEL_PLOT_NAMES = 'labels', 'labels_correlogram'
+
+_comet_image_prediction_count = 0
+
+
+def _get_comet_mode():
+    return os.getenv('COMET_MODE', 'online')
+
+
+def _get_comet_model_name():
+    return os.getenv('COMET_MODEL_NAME', 'YOLOv8')
+
+
+def _get_eval_batch_logging_interval():
+    return int(os.getenv('COMET_EVAL_BATCH_LOGGING_INTERVAL', 1))
+
+
+def _get_max_image_predictions_to_log():
+    return int(os.getenv('COMET_MAX_IMAGE_PREDICTIONS', 100))
+
+
+def _scale_confidence_score(score):
+    scale = float(os.getenv('COMET_MAX_CONFIDENCE_SCORE', 100.0))
+    return score * scale
+
+
+def _should_log_confusion_matrix():
+    return os.getenv('COMET_EVAL_LOG_CONFUSION_MATRIX', 'false').lower() == 'true'
+
+
+def _should_log_image_predictions():
+    return os.getenv('COMET_EVAL_LOG_IMAGE_PREDICTIONS', 'true').lower() == 'true'
+
+
+def _get_experiment_type(mode, project_name):
+    """Return an experiment based on mode and project name."""
+    if mode == 'offline':
+        return comet_ml.OfflineExperiment(project_name=project_name)
+
+    return comet_ml.Experiment(project_name=project_name)
+
+
+def _create_experiment(args):
+    """Ensures that the experiment object is only created in a single process during distributed training."""
+    if RANK not in (-1, 0):
+        return
+    try:
+        comet_mode = _get_comet_mode()
+        _project_name = os.getenv('COMET_PROJECT_NAME', args.project)
+        experiment = _get_experiment_type(comet_mode, _project_name)
+        experiment.log_parameters(vars(args))
+        experiment.log_others({
+            'eval_batch_logging_interval': _get_eval_batch_logging_interval(),
+            'log_confusion_matrix_on_eval': _should_log_confusion_matrix(),
+            'log_image_predictions': _should_log_image_predictions(),
+            'max_image_predictions': _get_max_image_predictions_to_log(), })
+        experiment.log_other('Created from', 'yolov8')
+
+    except Exception as e:
+        LOGGER.warning(f'WARNING ⚠️ Comet installed but not initialized correctly, not logging this run. {e}')
+
+
+def _fetch_trainer_metadata(trainer):
+    """Returns metadata for YOLO training including epoch and asset saving status."""
+    curr_epoch = trainer.epoch + 1
+
+    train_num_steps_per_epoch = len(trainer.train_loader.dataset) // trainer.batch_size
+    curr_step = curr_epoch * train_num_steps_per_epoch
+    final_epoch = curr_epoch == trainer.epochs
+
+    save = trainer.args.save
+    save_period = trainer.args.save_period
+    save_interval = curr_epoch % save_period == 0
+    save_assets = save and save_period > 0 and save_interval and not final_epoch
+
+    return dict(
+        curr_epoch=curr_epoch,
+        curr_step=curr_step,
+        save_assets=save_assets,
+        final_epoch=final_epoch,
+    )
+
+
+def _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad):
+    """YOLOv8 resizes images during training and the label values
+    are normalized based on this resized shape. This function rescales the
+    bounding box labels to the original image shape.
+    """
+
+    resized_image_height, resized_image_width = resized_image_shape
+
+    # Convert normalized xywh format predictions to xyxy in resized scale format
+    box = ops.xywhn2xyxy(box, h=resized_image_height, w=resized_image_width)
+    # Scale box predictions from resized image scale back to original image scale
+    box = ops.scale_boxes(resized_image_shape, box, original_image_shape, ratio_pad)
+    # Convert bounding box format from xyxy to xywh for Comet logging
+    box = ops.xyxy2xywh(box)
+    # Adjust xy center to correspond top-left corner
+    box[:2] -= box[2:] / 2
+    box = box.tolist()
+
+    return box
+
+
+def _format_ground_truth_annotations_for_detection(img_idx, image_path, batch, class_name_map=None):
+    """Format ground truth annotations for detection."""
+    indices = batch['batch_idx'] == img_idx
+    bboxes = batch['bboxes'][indices]
+    if len(bboxes) == 0:
+        LOGGER.debug(f'COMET WARNING: Image: {image_path} has no bounding boxes labels')
+        return None
+
+    cls_labels = batch['cls'][indices].squeeze(1).tolist()
+    if class_name_map:
+        cls_labels = [str(class_name_map[label]) for label in cls_labels]
+
+    original_image_shape = batch['ori_shape'][img_idx]
+    resized_image_shape = batch['resized_shape'][img_idx]
+    ratio_pad = batch['ratio_pad'][img_idx]
+
+    data = []
+    for box, label in zip(bboxes, cls_labels):
+        box = _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad)
+        data.append({
+            'boxes': [box],
+            'label': f'gt_{label}',
+            'score': _scale_confidence_score(1.0), })
+
+    return {'name': 'ground_truth', 'data': data}
+
+
+def _format_prediction_annotations_for_detection(image_path, metadata, class_label_map=None):
+    """Format YOLO predictions for object detection visualization."""
+    stem = image_path.stem
+    image_id = int(stem) if stem.isnumeric() else stem
+
+    predictions = metadata.get(image_id)
+    if not predictions:
+        LOGGER.debug(f'COMET WARNING: Image: {image_path} has no bounding boxes predictions')
+        return None
+
+    data = []
+    for prediction in predictions:
+        boxes = prediction['bbox']
+        score = _scale_confidence_score(prediction['score'])
+        cls_label = prediction['category_id']
+        if class_label_map:
+            cls_label = str(class_label_map[cls_label])
+
+        data.append({'boxes': [boxes], 'label': cls_label, 'score': score})
+
+    return {'name': 'prediction', 'data': data}
+
+
+def _fetch_annotations(img_idx, image_path, batch, prediction_metadata_map, class_label_map):
+    """Join the ground truth and prediction annotations if they exist."""
+    ground_truth_annotations = _format_ground_truth_annotations_for_detection(img_idx, image_path, batch,
+                                                                              class_label_map)
+    prediction_annotations = _format_prediction_annotations_for_detection(image_path, prediction_metadata_map,
+                                                                          class_label_map)
+
+    annotations = [
+        annotation for annotation in [ground_truth_annotations, prediction_annotations] if annotation is not None]
+    return [annotations] if annotations else None
+
+
+def _create_prediction_metadata_map(model_predictions):
+    """Create metadata map for model predictions by groupings them based on image ID."""
+    pred_metadata_map = {}
+    for prediction in model_predictions:
+        pred_metadata_map.setdefault(prediction['image_id'], [])
+        pred_metadata_map[prediction['image_id']].append(prediction)
+
+    return pred_metadata_map
+
+
+def _log_confusion_matrix(experiment, trainer, curr_step, curr_epoch):
+    """Log the confusion matrix to Comet experiment."""
+    conf_mat = trainer.validator.confusion_matrix.matrix
+    names = list(trainer.data['names'].values()) + ['background']
+    experiment.log_confusion_matrix(
+        matrix=conf_mat,
+        labels=names,
+        max_categories=len(names),
+        epoch=curr_epoch,
+        step=curr_step,
+    )
+
+
+def _log_images(experiment, image_paths, curr_step, annotations=None):
+    """Logs images to the experiment with optional annotations."""
+    if annotations:
+        for image_path, annotation in zip(image_paths, annotations):
+            experiment.log_image(image_path, name=image_path.stem, step=curr_step, annotations=annotation)
+
+    else:
+        for image_path in image_paths:
+            experiment.log_image(image_path, name=image_path.stem, step=curr_step)
+
+
+def _log_image_predictions(experiment, validator, curr_step):
+    """Logs predicted boxes for a single image during training."""
+    global _comet_image_prediction_count
+
+    task = validator.args.task
+    if task not in COMET_SUPPORTED_TASKS:
+        return
+
+    jdict = validator.jdict
+    if not jdict:
+        return
+
+    predictions_metadata_map = _create_prediction_metadata_map(jdict)
+    dataloader = validator.dataloader
+    class_label_map = validator.names
+
+    batch_logging_interval = _get_eval_batch_logging_interval()
+    max_image_predictions = _get_max_image_predictions_to_log()
+
+    for batch_idx, batch in enumerate(dataloader):
+        if (batch_idx + 1) % batch_logging_interval != 0:
+            continue
+
+        image_paths = batch['im_file']
+        for img_idx, image_path in enumerate(image_paths):
+            if _comet_image_prediction_count >= max_image_predictions:
+                return
+
+            image_path = Path(image_path)
+            annotations = _fetch_annotations(
+                img_idx,
+                image_path,
+                batch,
+                predictions_metadata_map,
+                class_label_map,
+            )
+            _log_images(
+                experiment,
+                [image_path],
+                curr_step,
+                annotations=annotations,
+            )
+            _comet_image_prediction_count += 1
+
+
+def _log_plots(experiment, trainer):
+    """Logs evaluation plots and label plots for the experiment."""
+    plot_filenames = [trainer.save_dir / f'{plots}.png' for plots in EVALUATION_PLOT_NAMES]
+    _log_images(experiment, plot_filenames, None)
+
+    label_plot_filenames = [trainer.save_dir / f'{labels}.jpg' for labels in LABEL_PLOT_NAMES]
+    _log_images(experiment, label_plot_filenames, None)
+
+
+def _log_model(experiment, trainer):
+    """Log the best-trained model to Comet.ml."""
+    model_name = _get_comet_model_name()
+    experiment.log_model(
+        model_name,
+        file_or_folder=str(trainer.best),
+        file_name='best.pt',
+        overwrite=True,
+    )
+
+
+def on_pretrain_routine_start(trainer):
+    """Creates or resumes a CometML experiment at the start of a YOLO pre-training routine."""
+    experiment = comet_ml.get_global_experiment()
+    is_alive = getattr(experiment, 'alive', False)
+    if not experiment or not is_alive:
+        _create_experiment(trainer.args)
+
+
+def on_train_epoch_end(trainer):
+    """Log metrics and save batch images at the end of training epochs."""
+    experiment = comet_ml.get_global_experiment()
+    if not experiment:
+        return
+
+    metadata = _fetch_trainer_metadata(trainer)
+    curr_epoch = metadata['curr_epoch']
+    curr_step = metadata['curr_step']
+
+    experiment.log_metrics(
+        trainer.label_loss_items(trainer.tloss, prefix='train'),
+        step=curr_step,
+        epoch=curr_epoch,
+    )
+
+    if curr_epoch == 1:
+        _log_images(experiment, trainer.save_dir.glob('train_batch*.jpg'), curr_step)
+
+
+def on_fit_epoch_end(trainer):
+    """Logs model assets at the end of each epoch."""
+    experiment = comet_ml.get_global_experiment()
+    if not experiment:
+        return
+
+    metadata = _fetch_trainer_metadata(trainer)
+    curr_epoch = metadata['curr_epoch']
+    curr_step = metadata['curr_step']
+    save_assets = metadata['save_assets']
+
+    experiment.log_metrics(trainer.metrics, step=curr_step, epoch=curr_epoch)
+    experiment.log_metrics(trainer.lr, step=curr_step, epoch=curr_epoch)
+    if curr_epoch == 1:
+        experiment.log_metrics(model_info_for_loggers(trainer), step=curr_step, epoch=curr_epoch)
+
+    if not save_assets:
+        return
+
+    _log_model(experiment, trainer)
+    if _should_log_confusion_matrix():
+        _log_confusion_matrix(experiment, trainer, curr_step, curr_epoch)
+    if _should_log_image_predictions():
+        _log_image_predictions(experiment, trainer.validator, curr_step)
+
+
+def on_train_end(trainer):
+    """Perform operations at the end of training."""
+    experiment = comet_ml.get_global_experiment()
+    if not experiment:
+        return
+
+    metadata = _fetch_trainer_metadata(trainer)
+    curr_epoch = metadata['curr_epoch']
+    curr_step = metadata['curr_step']
+    plots = trainer.args.plots
+
+    _log_model(experiment, trainer)
+    if plots:
+        _log_plots(experiment, trainer)
+
+    _log_confusion_matrix(experiment, trainer, curr_step, curr_epoch)
+    _log_image_predictions(experiment, trainer.validator, curr_step)
+    experiment.end()
+
+    global _comet_image_prediction_count
+    _comet_image_prediction_count = 0
+
+
+callbacks = {
+    'on_pretrain_routine_start': on_pretrain_routine_start,
+    'on_train_epoch_end': on_train_epoch_end,
+    'on_fit_epoch_end': on_fit_epoch_end,
+    'on_train_end': on_train_end} if comet_ml else {}
--- a/ultralytics/utils/callbacks/dvc.py
+++ b/ultralytics/utils/callbacks/dvc.py
@ -0,0 +1,136 @@
+# Ultralytics YOLO 🚀, GPL-3.0 license
+import os
+
+import pkg_resources as pkg
+
+from ultralytics.utils import LOGGER, TESTS_RUNNING
+from ultralytics.utils.torch_utils import model_info_for_loggers
+
+try:
+    from importlib.metadata import version
+
+    import dvclive
+
+    assert not TESTS_RUNNING  # do not log pytest
+
+    ver = version('dvclive')
+    if pkg.parse_version(ver) < pkg.parse_version('2.11.0'):
+        LOGGER.debug(f'DVCLive is detected but version {ver} is incompatible (>=2.11 required).')
+        dvclive = None  # noqa: F811
+except (ImportError, AssertionError, TypeError):
+    dvclive = None
+
+# DVCLive logger instance
+live = None
+_processed_plots = {}
+
+# `on_fit_epoch_end` is called on final validation (probably need to be fixed)
+# for now this is the way we distinguish final evaluation of the best model vs
+# last epoch validation
+_training_epoch = False
+
+
+def _logger_disabled():
+    return os.getenv('ULTRALYTICS_DVC_DISABLED', 'false').lower() == 'true'
+
+
+def _log_images(image_path, prefix=''):
+    if live:
+        live.log_image(os.path.join(prefix, image_path.name), image_path)
+
+
+def _log_plots(plots, prefix=''):
+    for name, params in plots.items():
+        timestamp = params['timestamp']
+        if _processed_plots.get(name) != timestamp:
+            _log_images(name, prefix)
+            _processed_plots[name] = timestamp
+
+
+def _log_confusion_matrix(validator):
+    targets = []
+    preds = []
+    matrix = validator.confusion_matrix.matrix
+    names = list(validator.names.values())
+    if validator.confusion_matrix.task == 'detect':
+        names += ['background']
+
+    for ti, pred in enumerate(matrix.T.astype(int)):
+        for pi, num in enumerate(pred):
+            targets.extend([names[ti]] * num)
+            preds.extend([names[pi]] * num)
+
+    live.log_sklearn_plot('confusion_matrix', targets, preds, name='cf.json', normalized=True)
+
+
+def on_pretrain_routine_start(trainer):
+    try:
+        global live
+        if not _logger_disabled():
+            live = dvclive.Live(save_dvc_exp=True, cache_images=True)
+            LOGGER.info(
+                'DVCLive is detected and auto logging is enabled (can be disabled with `ULTRALYTICS_DVC_DISABLED=true`).'
+            )
+        else:
+            LOGGER.debug('DVCLive is detected and auto logging is disabled via `ULTRALYTICS_DVC_DISABLED`.')
+            live = None
+    except Exception as e:
+        LOGGER.warning(f'WARNING ⚠️ DVCLive installed but not initialized correctly, not logging this run. {e}')
+
+
+def on_pretrain_routine_end(trainer):
+    _log_plots(trainer.plots, 'train')
+
+
+def on_train_start(trainer):
+    if live:
+        live.log_params(trainer.args)
+
+
+def on_train_epoch_start(trainer):
+    global _training_epoch
+    _training_epoch = True
+
+
+def on_fit_epoch_end(trainer):
+    global _training_epoch
+    if live and _training_epoch:
+        all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr}
+        for metric, value in all_metrics.items():
+            live.log_metric(metric, value)
+
+        if trainer.epoch == 0:
+            for metric, value in model_info_for_loggers(trainer).items():
+                live.log_metric(metric, value, plot=False)
+
+        _log_plots(trainer.plots, 'train')
+        _log_plots(trainer.validator.plots, 'val')
+
+        live.next_step()
+        _training_epoch = False
+
+
+def on_train_end(trainer):
+    if live:
+        # At the end log the best metrics. It runs validator on the best model internally.
+        all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr}
+        for metric, value in all_metrics.items():
+            live.log_metric(metric, value, plot=False)
+
+        _log_plots(trainer.plots, 'eval')
+        _log_plots(trainer.validator.plots, 'eval')
+        _log_confusion_matrix(trainer.validator)
+
+        if trainer.best.exists():
+            live.log_artifact(trainer.best, copy=True)
+
+        live.end()
+
+
+callbacks = {
+    'on_pretrain_routine_start': on_pretrain_routine_start,
+    'on_pretrain_routine_end': on_pretrain_routine_end,
+    'on_train_start': on_train_start,
+    'on_train_epoch_start': on_train_epoch_start,
+    'on_fit_epoch_end': on_fit_epoch_end,
+    'on_train_end': on_train_end} if dvclive else {}
--- a/ultralytics/utils/callbacks/hub.py
+++ b/ultralytics/utils/callbacks/hub.py
@ -0,0 +1,87 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+import json
+from time import time
+
+from ultralytics.hub.utils import PREFIX, events
+from ultralytics.utils import LOGGER
+from ultralytics.utils.torch_utils import model_info_for_loggers
+
+
+def on_pretrain_routine_end(trainer):
+    """Logs info before starting timer for upload rate limit."""
+    session = getattr(trainer, 'hub_session', None)
+    if session:
+        # Start timer for upload rate limit
+        LOGGER.info(f'{PREFIX}View model at https://hub.ultralytics.com/models/{session.model_id} 🚀')
+        session.timers = {'metrics': time(), 'ckpt': time()}  # start timer on session.rate_limit
+
+
+def on_fit_epoch_end(trainer):
+    """Uploads training progress metrics at the end of each epoch."""
+    session = getattr(trainer, 'hub_session', None)
+    if session:
+        # Upload metrics after val end
+        all_plots = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics}
+        if trainer.epoch == 0:
+            all_plots = {**all_plots, **model_info_for_loggers(trainer)}
+        session.metrics_queue[trainer.epoch] = json.dumps(all_plots)
+        if time() - session.timers['metrics'] > session.rate_limits['metrics']:
+            session.upload_metrics()
+            session.timers['metrics'] = time()  # reset timer
+            session.metrics_queue = {}  # reset queue
+
+
+def on_model_save(trainer):
+    """Saves checkpoints to Ultralytics HUB with rate limiting."""
+    session = getattr(trainer, 'hub_session', None)
+    if session:
+        # Upload checkpoints with rate limiting
+        is_best = trainer.best_fitness == trainer.fitness
+        if time() - session.timers['ckpt'] > session.rate_limits['ckpt']:
+            LOGGER.info(f'{PREFIX}Uploading checkpoint https://hub.ultralytics.com/models/{session.model_id}')
+            session.upload_model(trainer.epoch, trainer.last, is_best)
+            session.timers['ckpt'] = time()  # reset timer
+
+
+def on_train_end(trainer):
+    """Upload final model and metrics to Ultralytics HUB at the end of training."""
+    session = getattr(trainer, 'hub_session', None)
+    if session:
+        # Upload final model and metrics with exponential standoff
+        LOGGER.info(f'{PREFIX}Syncing final model...')
+        session.upload_model(trainer.epoch, trainer.best, map=trainer.metrics.get('metrics/mAP50-95(B)', 0), final=True)
+        session.alive = False  # stop heartbeats
+        LOGGER.info(f'{PREFIX}Done ✅\n'
+                    f'{PREFIX}View model at https://hub.ultralytics.com/models/{session.model_id} 🚀')
+
+
+def on_train_start(trainer):
+    """Run events on train start."""
+    events(trainer.args)
+
+
+def on_val_start(validator):
+    """Runs events on validation start."""
+    events(validator.args)
+
+
+def on_predict_start(predictor):
+    """Run events on predict start."""
+    events(predictor.args)
+
+
+def on_export_start(exporter):
+    """Run events on export start."""
+    events(exporter.args)
+
+
+callbacks = {
+    'on_pretrain_routine_end': on_pretrain_routine_end,
+    'on_fit_epoch_end': on_fit_epoch_end,
+    'on_model_save': on_model_save,
+    'on_train_end': on_train_end,
+    'on_train_start': on_train_start,
+    'on_val_start': on_val_start,
+    'on_predict_start': on_predict_start,
+    'on_export_start': on_export_start}
--- a/ultralytics/utils/callbacks/mlflow.py
+++ b/ultralytics/utils/callbacks/mlflow.py
@ -0,0 +1,71 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+import os
+import re
+from pathlib import Path
+
+from ultralytics.utils import LOGGER, TESTS_RUNNING, colorstr
+
+try:
+    import mlflow
+
+    assert not TESTS_RUNNING  # do not log pytest
+    assert hasattr(mlflow, '__version__')  # verify package is not directory
+except (ImportError, AssertionError):
+    mlflow = None
+
+
+def on_pretrain_routine_end(trainer):
+    """Logs training parameters to MLflow."""
+    global mlflow, run, run_id, experiment_name
+
+    if os.environ.get('MLFLOW_TRACKING_URI') is None:
+        mlflow = None
+
+    if mlflow:
+        mlflow_location = os.environ['MLFLOW_TRACKING_URI']  # "http://192.168.xxx.xxx:5000"
+        mlflow.set_tracking_uri(mlflow_location)
+
+        experiment_name = os.environ.get('MLFLOW_EXPERIMENT_NAME') or trainer.args.project or '/Shared/YOLOv8'
+        run_name = os.environ.get('MLFLOW_RUN') or trainer.args.name
+        experiment = mlflow.get_experiment_by_name(experiment_name)
+        if experiment is None:
+            mlflow.create_experiment(experiment_name)
+        mlflow.set_experiment(experiment_name)
+
+        prefix = colorstr('MLFlow: ')
+        try:
+            run, active_run = mlflow, mlflow.active_run()
+            if not active_run:
+                active_run = mlflow.start_run(experiment_id=experiment.experiment_id, run_name=run_name)
+            run_id = active_run.info.run_id
+            LOGGER.info(f'{prefix}Using run_id({run_id}) at {mlflow_location}')
+            run.log_params(vars(trainer.model.args))
+        except Exception as err:
+            LOGGER.error(f'{prefix}Failing init - {repr(err)}')
+            LOGGER.warning(f'{prefix}Continuing without Mlflow')
+
+
+def on_fit_epoch_end(trainer):
+    """Logs training metrics to Mlflow."""
+    if mlflow:
+        metrics_dict = {f"{re.sub('[()]', '', k)}": float(v) for k, v in trainer.metrics.items()}
+        run.log_metrics(metrics=metrics_dict, step=trainer.epoch)
+
+
+def on_train_end(trainer):
+    """Called at end of train loop to log model artifact info."""
+    if mlflow:
+        root_dir = Path(__file__).resolve().parents[3]
+        run.log_artifact(trainer.last)
+        run.log_artifact(trainer.best)
+        run.pyfunc.log_model(artifact_path=experiment_name,
+                             code_path=[str(root_dir)],
+                             artifacts={'model_path': str(trainer.save_dir)},
+                             python_model=run.pyfunc.PythonModel())
+
+
+callbacks = {
+    'on_pretrain_routine_end': on_pretrain_routine_end,
+    'on_fit_epoch_end': on_fit_epoch_end,
+    'on_train_end': on_train_end} if mlflow else {}
--- a/ultralytics/utils/callbacks/neptune.py
+++ b/ultralytics/utils/callbacks/neptune.py
@ -0,0 +1,103 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+import matplotlib.image as mpimg
+import matplotlib.pyplot as plt
+
+from ultralytics.utils import LOGGER, TESTS_RUNNING
+from ultralytics.utils.torch_utils import model_info_for_loggers
+
+try:
+    import neptune
+    from neptune.types import File
+
+    assert not TESTS_RUNNING  # do not log pytest
+    assert hasattr(neptune, '__version__')
+except (ImportError, AssertionError):
+    neptune = None
+
+run = None  # NeptuneAI experiment logger instance
+
+
+def _log_scalars(scalars, step=0):
+    """Log scalars to the NeptuneAI experiment logger."""
+    if run:
+        for k, v in scalars.items():
+            run[k].append(value=v, step=step)
+
+
+def _log_images(imgs_dict, group=''):
+    """Log scalars to the NeptuneAI experiment logger."""
+    if run:
+        for k, v in imgs_dict.items():
+            run[f'{group}/{k}'].upload(File(v))
+
+
+def _log_plot(title, plot_path):
+    """Log plots to the NeptuneAI experiment logger."""
+    """
+        Log image as plot in the plot section of NeptuneAI
+
+        arguments:
+        title (str) Title of the plot
+        plot_path (PosixPath or str) Path to the saved image file
+        """
+    img = mpimg.imread(plot_path)
+    fig = plt.figure()
+    ax = fig.add_axes([0, 0, 1, 1], frameon=False, aspect='auto', xticks=[], yticks=[])  # no ticks
+    ax.imshow(img)
+    run[f'Plots/{title}'].upload(fig)
+
+
+def on_pretrain_routine_start(trainer):
+    """Callback function called before the training routine starts."""
+    try:
+        global run
+        run = neptune.init_run(project=trainer.args.project or 'YOLOv8', name=trainer.args.name, tags=['YOLOv8'])
+        run['Configuration/Hyperparameters'] = {k: '' if v is None else v for k, v in vars(trainer.args).items()}
+    except Exception as e:
+        LOGGER.warning(f'WARNING ⚠️ NeptuneAI installed but not initialized correctly, not logging this run. {e}')
+
+
+def on_train_epoch_end(trainer):
+    """Callback function called at end of each training epoch."""
+    _log_scalars(trainer.label_loss_items(trainer.tloss, prefix='train'), trainer.epoch + 1)
+    _log_scalars(trainer.lr, trainer.epoch + 1)
+    if trainer.epoch == 1:
+        _log_images({f.stem: str(f) for f in trainer.save_dir.glob('train_batch*.jpg')}, 'Mosaic')
+
+
+def on_fit_epoch_end(trainer):
+    """Callback function called at end of each fit (train+val) epoch."""
+    if run and trainer.epoch == 0:
+        run['Configuration/Model'] = model_info_for_loggers(trainer)
+    _log_scalars(trainer.metrics, trainer.epoch + 1)
+
+
+def on_val_end(validator):
+    """Callback function called at end of each validation."""
+    if run:
+        # Log val_labels and val_pred
+        _log_images({f.stem: str(f) for f in validator.save_dir.glob('val*.jpg')}, 'Validation')
+
+
+def on_train_end(trainer):
+    """Callback function called at end of training."""
+    if run:
+        # Log final results, CM matrix + PR plots
+        files = [
+            'results.png', 'confusion_matrix.png', 'confusion_matrix_normalized.png',
+            *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
+        files = [(trainer.save_dir / f) for f in files if (trainer.save_dir / f).exists()]  # filter
+        for f in files:
+            _log_plot(title=f.stem, plot_path=f)
+        # Log the final model
+        run[f'weights/{trainer.args.name or trainer.args.task}/{str(trainer.best.name)}'].upload(File(str(
+            trainer.best)))
+
+
+callbacks = {
+    'on_pretrain_routine_start': on_pretrain_routine_start,
+    'on_train_epoch_end': on_train_epoch_end,
+    'on_fit_epoch_end': on_fit_epoch_end,
+    'on_val_end': on_val_end,
+    'on_train_end': on_train_end} if neptune else {}
--- a/ultralytics/utils/callbacks/raytune.py
+++ b/ultralytics/utils/callbacks/raytune.py
@ -0,0 +1,20 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+try:
+    import ray
+    from ray import tune
+    from ray.air import session
+except (ImportError, AssertionError):
+    tune = None
+
+
+def on_fit_epoch_end(trainer):
+    """Sends training metrics to Ray Tune at end of each epoch."""
+    if ray.tune.is_session_enabled():
+        metrics = trainer.metrics
+        metrics['epoch'] = trainer.epoch
+        session.report(metrics)
+
+
+callbacks = {
+    'on_fit_epoch_end': on_fit_epoch_end, } if tune else {}
--- a/ultralytics/utils/callbacks/tensorboard.py
+++ b/ultralytics/utils/callbacks/tensorboard.py
@ -0,0 +1,47 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+from ultralytics.utils import LOGGER, TESTS_RUNNING, colorstr
+
+try:
+    from torch.utils.tensorboard import SummaryWriter
+
+    assert not TESTS_RUNNING  # do not log pytest
+except (ImportError, AssertionError):
+    SummaryWriter = None
+
+writer = None  # TensorBoard SummaryWriter instance
+
+
+def _log_scalars(scalars, step=0):
+    """Logs scalar values to TensorBoard."""
+    if writer:
+        for k, v in scalars.items():
+            writer.add_scalar(k, v, step)
+
+
+def on_pretrain_routine_start(trainer):
+    """Initialize TensorBoard logging with SummaryWriter."""
+    if SummaryWriter:
+        try:
+            global writer
+            writer = SummaryWriter(str(trainer.save_dir))
+            prefix = colorstr('TensorBoard: ')
+            LOGGER.info(f"{prefix}Start with 'tensorboard --logdir {trainer.save_dir}', view at http://localhost:6006/")
+        except Exception as e:
+            LOGGER.warning(f'WARNING ⚠️ TensorBoard not initialized correctly, not logging this run. {e}')
+
+
+def on_batch_end(trainer):
+    """Logs scalar statistics at the end of a training batch."""
+    _log_scalars(trainer.label_loss_items(trainer.tloss, prefix='train'), trainer.epoch + 1)
+
+
+def on_fit_epoch_end(trainer):
+    """Logs epoch metrics at end of training epoch."""
+    _log_scalars(trainer.metrics, trainer.epoch + 1)
+
+
+callbacks = {
+    'on_pretrain_routine_start': on_pretrain_routine_start,
+    'on_fit_epoch_end': on_fit_epoch_end,
+    'on_batch_end': on_batch_end}
--- a/ultralytics/utils/callbacks/wb.py
+++ b/ultralytics/utils/callbacks/wb.py
@ -0,0 +1,60 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+from ultralytics.utils import TESTS_RUNNING
+from ultralytics.utils.torch_utils import model_info_for_loggers
+
+try:
+    import wandb as wb
+
+    assert hasattr(wb, '__version__')
+    assert not TESTS_RUNNING  # do not log pytest
+except (ImportError, AssertionError):
+    wb = None
+
+_processed_plots = {}
+
+
+def _log_plots(plots, step):
+    for name, params in plots.items():
+        timestamp = params['timestamp']
+        if _processed_plots.get(name, None) != timestamp:
+            wb.run.log({name.stem: wb.Image(str(name))}, step=step)
+            _processed_plots[name] = timestamp
+
+
+def on_pretrain_routine_start(trainer):
+    """Initiate and start project if module is present."""
+    wb.run or wb.init(project=trainer.args.project or 'YOLOv8', name=trainer.args.name, config=vars(trainer.args))
+
+
+def on_fit_epoch_end(trainer):
+    """Logs training metrics and model information at the end of an epoch."""
+    wb.run.log(trainer.metrics, step=trainer.epoch + 1)
+    _log_plots(trainer.plots, step=trainer.epoch + 1)
+    _log_plots(trainer.validator.plots, step=trainer.epoch + 1)
+    if trainer.epoch == 0:
+        wb.run.log(model_info_for_loggers(trainer), step=trainer.epoch + 1)
+
+
+def on_train_epoch_end(trainer):
+    """Log metrics and save images at the end of each training epoch."""
+    wb.run.log(trainer.label_loss_items(trainer.tloss, prefix='train'), step=trainer.epoch + 1)
+    wb.run.log(trainer.lr, step=trainer.epoch + 1)
+    if trainer.epoch == 1:
+        _log_plots(trainer.plots, step=trainer.epoch + 1)
+
+
+def on_train_end(trainer):
+    """Save the best model as an artifact at end of training."""
+    _log_plots(trainer.validator.plots, step=trainer.epoch + 1)
+    _log_plots(trainer.plots, step=trainer.epoch + 1)
+    art = wb.Artifact(type='model', name=f'run_{wb.run.id}_model')
+    if trainer.best.exists():
+        art.add_file(trainer.best)
+        wb.run.log_artifact(art, aliases=['best'])
+
+
+callbacks = {
+    'on_pretrain_routine_start': on_pretrain_routine_start,
+    'on_train_epoch_end': on_train_epoch_end,
+    'on_fit_epoch_end': on_fit_epoch_end,
+    'on_train_end': on_train_end} if wb else {}