You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

141 lines
4.3 KiB

# Ultralytics YOLO 🚀, AGPL-3.0 license
import os
import re
from pathlib import Path
import pkg_resources as pkg
from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING
from ultralytics.utils.torch_utils import model_info_for_loggers
try:
from importlib.metadata import version
import dvclive
assert not TESTS_RUNNING # do not log pytest
assert SETTINGS['dvc'] is True # verify integration is enabled
ver = version('dvclive')
if pkg.parse_version(ver) < pkg.parse_version('2.11.0'):
LOGGER.debug(f'DVCLive is detected but version {ver} is incompatible (>=2.11 required).')
dvclive = None # noqa: F811
except (ImportError, AssertionError, TypeError):
dvclive = None
# DVCLive logger instance
live = None
_processed_plots = {}
# `on_fit_epoch_end` is called on final validation (probably need to be fixed)
# for now this is the way we distinguish final evaluation of the best model vs
# last epoch validation
_training_epoch = False
def _log_images(path, prefix=''):
if live:
name = path.name
# Group images by batch to enable sliders in UI
if m := re.search(r'_batch(\d+)', name):
ni = m.group(1)
new_stem = re.sub(r'_batch(\d+)', '_batch', path.stem)
name = (Path(new_stem) / ni).with_suffix(path.suffix)
live.log_image(os.path.join(prefix, name), path)
def _log_plots(plots, prefix=''):
for name, params in plots.items():
timestamp = params['timestamp']
if _processed_plots.get(name) != timestamp:
_log_images(name, prefix)
_processed_plots[name] = timestamp
def _log_confusion_matrix(validator):
targets = []
preds = []
matrix = validator.confusion_matrix.matrix
names = list(validator.names.values())
if validator.confusion_matrix.task == 'detect':
names += ['background']
for ti, pred in enumerate(matrix.T.astype(int)):
for pi, num in enumerate(pred):
targets.extend([names[ti]] * num)
preds.extend([names[pi]] * num)
live.log_sklearn_plot('confusion_matrix', targets, preds, name='cf.json', normalized=True)
def on_pretrain_routine_start(trainer):
try:
global live
live = dvclive.Live(save_dvc_exp=True, cache_images=True)
LOGGER.info(
f'DVCLive is detected and auto logging is enabled (can be disabled in the {SETTINGS.file} with `dvc: false`).'
)
except Exception as e:
LOGGER.warning(f'WARNING ⚠️ DVCLive installed but not initialized correctly, not logging this run. {e}')
def on_pretrain_routine_end(trainer):
_log_plots(trainer.plots, 'train')
def on_train_start(trainer):
if live:
live.log_params(trainer.args)
def on_train_epoch_start(trainer):
global _training_epoch
_training_epoch = True
def on_fit_epoch_end(trainer):
global _training_epoch
if live and _training_epoch:
all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr}
for metric, value in all_metrics.items():
live.log_metric(metric, value)
if trainer.epoch == 0:
for metric, value in model_info_for_loggers(trainer).items():
live.log_metric(metric, value, plot=False)
_log_plots(trainer.plots, 'train')
_log_plots(trainer.validator.plots, 'val')
live.next_step()
_training_epoch = False
def on_train_end(trainer):
if live:
# At the end log the best metrics. It runs validator on the best model internally.
all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr}
for metric, value in all_metrics.items():
live.log_metric(metric, value, plot=False)
_log_plots(trainer.plots, 'val')
_log_plots(trainer.validator.plots, 'val')
_log_confusion_matrix(trainer.validator)
if trainer.best.exists():
live.log_artifact(trainer.best, copy=True, type='model')
live.end()
callbacks = {
'on_pretrain_routine_start': on_pretrain_routine_start,
'on_pretrain_routine_end': on_pretrain_routine_end,
'on_train_start': on_train_start,
'on_train_epoch_start': on_train_epoch_start,
'on_fit_epoch_end': on_fit_epoch_end,
'on_train_end': on_train_end} if dvclive else {}