ultralytics 8.0.53
DDP AMP and Edge TPU fixes (#1362)
Co-authored-by: Richard Aljaste <richardaljasteabramson@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Vuong Kha Sieu <75152429+hotfur@users.noreply.github.com>
This commit is contained in:
@ -299,7 +299,7 @@ def entrypoint(debug=''):
|
||||
task = model.task
|
||||
|
||||
# Mode
|
||||
if mode in {'predict', 'track'} and 'source' not in overrides:
|
||||
if mode in ('predict', 'track') and 'source' not in overrides:
|
||||
overrides['source'] = DEFAULT_CFG.source or ROOT / 'assets' if (ROOT / 'assets').exists() \
|
||||
else 'https://ultralytics.com/images/bus.jpg'
|
||||
LOGGER.warning(f"WARNING ⚠️ 'source' is missing. Using default 'source={overrides['source']}'.")
|
||||
|
@ -14,7 +14,7 @@ from ..utils.checks import check_version
|
||||
from ..utils.instance import Instances
|
||||
from ..utils.metrics import bbox_ioa
|
||||
from ..utils.ops import segment2box
|
||||
from .utils import IMAGENET_MEAN, IMAGENET_STD, polygons2masks, polygons2masks_overlap
|
||||
from .utils import polygons2masks, polygons2masks_overlap
|
||||
|
||||
|
||||
# TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
|
||||
@ -682,12 +682,14 @@ def v8_transforms(dataset, imgsz, hyp):
|
||||
|
||||
|
||||
# Classification augmentations -----------------------------------------------------------------------------------------
|
||||
def classify_transforms(size=224):
|
||||
def classify_transforms(size=224, mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0)): # IMAGENET_MEAN, IMAGENET_STD
|
||||
# Transforms to apply if albumentations not installed
|
||||
if not isinstance(size, int):
|
||||
raise TypeError(f'classify_transforms() size {size} must be integer, not (list, tuple)')
|
||||
# T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
|
||||
return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
|
||||
if any(mean) or any(std):
|
||||
return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(mean, std, inplace=True)])
|
||||
else:
|
||||
return T.Compose([CenterCrop(size), ToTensor()])
|
||||
|
||||
|
||||
def classify_albumentations(
|
||||
@ -697,8 +699,8 @@ def classify_albumentations(
|
||||
hflip=0.5,
|
||||
vflip=0.0,
|
||||
jitter=0.4,
|
||||
mean=IMAGENET_MEAN,
|
||||
std=IMAGENET_STD,
|
||||
mean=(0.0, 0.0, 0.0), # IMAGENET_MEAN
|
||||
std=(1.0, 1.0, 1.0), # IMAGENET_STD
|
||||
auto_aug=False,
|
||||
):
|
||||
# YOLOv8 classification Albumentations (optional, only used if package is installed)
|
||||
|
@ -496,7 +496,7 @@ class LoadImagesAndLabels(Dataset):
|
||||
|
||||
# Display cache
|
||||
nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total
|
||||
if exists and LOCAL_RANK in {-1, 0}:
|
||||
if exists and LOCAL_RANK in (-1, 0):
|
||||
d = f'Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt'
|
||||
tqdm(None, desc=prefix + d, total=n, initial=n, bar_format=TQDM_BAR_FORMAT) # display cache results
|
||||
if cache['msgs']:
|
||||
|
@ -133,7 +133,7 @@ class YOLODataset(BaseDataset):
|
||||
|
||||
# Display cache
|
||||
nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total
|
||||
if exists and LOCAL_RANK in {-1, 0}:
|
||||
if exists and LOCAL_RANK in (-1, 0):
|
||||
d = f'Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt'
|
||||
tqdm(None, desc=self.prefix + d, total=n, initial=n, bar_format=TQDM_BAR_FORMAT) # display cache results
|
||||
if cache['msgs']:
|
||||
|
@ -63,7 +63,6 @@ from ultralytics.nn.autobackend import check_class_names
|
||||
from ultralytics.nn.modules import C2f, Detect, Segment
|
||||
from ultralytics.nn.tasks import DetectionModel, SegmentationModel
|
||||
from ultralytics.yolo.cfg import get_cfg
|
||||
from ultralytics.yolo.data.utils import IMAGENET_MEAN, IMAGENET_STD
|
||||
from ultralytics.yolo.utils import (DEFAULT_CFG, LINUX, LOGGER, MACOS, __version__, callbacks, colorstr,
|
||||
get_default_args, yaml_save)
|
||||
from ultralytics.yolo.utils.checks import check_imgsz, check_requirements, check_version
|
||||
@ -148,7 +147,7 @@ class Exporter:
|
||||
self.run_callbacks('on_export_start')
|
||||
t = time.time()
|
||||
format = self.args.format.lower() # to lowercase
|
||||
if format in {'tensorrt', 'trt'}: # engine aliases
|
||||
if format in ('tensorrt', 'trt'): # engine aliases
|
||||
format = 'engine'
|
||||
fmts = tuple(export_formats()['Argument'][1:]) # available export formats
|
||||
flags = [x == format for x in fmts]
|
||||
@ -408,8 +407,6 @@ class Exporter:
|
||||
scale = 1 / 255
|
||||
classifier_config = None
|
||||
if self.model.task == 'classify':
|
||||
bias = [-x for x in IMAGENET_MEAN]
|
||||
scale = 1 / 255 / (sum(IMAGENET_STD) / 3)
|
||||
classifier_config = ct.ClassifierConfig(list(self.model.names.values())) if self.args.nms else None
|
||||
model = self.model
|
||||
elif self.model.task == 'detect':
|
||||
@ -531,7 +528,7 @@ class Exporter:
|
||||
# Export to TF
|
||||
int8 = '-oiqt -qt per-tensor' if self.args.int8 else ''
|
||||
cmd = f'onnx2tf -i {f_onnx} -o {f} -nuo --non_verbose {int8}'
|
||||
LOGGER.info(f"\n{prefix} running '{cmd}'")
|
||||
LOGGER.info(f"\n{prefix} running '{cmd.strip()}'")
|
||||
subprocess.run(cmd, shell=True)
|
||||
yaml_save(f / 'metadata.yaml', self.metadata) # add metadata.yaml
|
||||
|
||||
|
@ -319,7 +319,7 @@ class YOLO:
|
||||
self.trainer.hub_session = self.session # attach optional HUB session
|
||||
self.trainer.train()
|
||||
# update model and cfg after training
|
||||
if RANK in {0, -1}:
|
||||
if RANK in (-1, 0):
|
||||
self.model, _ = attempt_load_one_weight(str(self.trainer.best))
|
||||
self.overrides = self.model.args
|
||||
self.metrics = getattr(self.trainer.validator, 'metrics', None) # TODO: no metrics returned by DDP
|
||||
|
@ -185,7 +185,7 @@ class Boxes:
|
||||
if boxes.ndim == 1:
|
||||
boxes = boxes[None, :]
|
||||
n = boxes.shape[-1]
|
||||
assert n in {6, 7}, f'expected `n` in [6, 7], but got {n}' # xyxy, (track_id), conf, cls
|
||||
assert n in (6, 7), f'expected `n` in [6, 7], but got {n}' # xyxy, (track_id), conf, cls
|
||||
# TODO
|
||||
self.is_track = n == 7
|
||||
self.boxes = boxes
|
||||
|
@ -95,9 +95,9 @@ class BaseTrainer:
|
||||
self.save_dir = Path(self.args.save_dir)
|
||||
else:
|
||||
self.save_dir = Path(
|
||||
increment_path(Path(project) / name, exist_ok=self.args.exist_ok if RANK in {-1, 0} else True))
|
||||
increment_path(Path(project) / name, exist_ok=self.args.exist_ok if RANK in (-1, 0) else True))
|
||||
self.wdir = self.save_dir / 'weights' # weights dir
|
||||
if RANK in {-1, 0}:
|
||||
if RANK in (-1, 0):
|
||||
self.wdir.mkdir(parents=True, exist_ok=True) # make dir
|
||||
self.args.save_dir = str(self.save_dir)
|
||||
yaml_save(self.save_dir / 'args.yaml', vars(self.args)) # save run args
|
||||
@ -144,7 +144,7 @@ class BaseTrainer:
|
||||
|
||||
# Callbacks
|
||||
self.callbacks = defaultdict(list, callbacks.default_callbacks) # add callbacks
|
||||
if RANK in {0, -1}:
|
||||
if RANK in (-1, 0):
|
||||
callbacks.add_integration_callbacks(self)
|
||||
|
||||
def add_callback(self, event: str, callback):
|
||||
@ -203,9 +203,14 @@ class BaseTrainer:
|
||||
self.model = self.model.to(self.device)
|
||||
self.set_model_attributes()
|
||||
# Check AMP
|
||||
callbacks_backup = callbacks.default_callbacks.copy() # backup callbacks as they are reset by check_amp()
|
||||
self.amp = check_amp(self.model)
|
||||
callbacks.default_callbacks = callbacks_backup # restore callbacks
|
||||
self.amp = torch.tensor(True).to(self.device)
|
||||
if RANK in (-1, 0): # Single-GPU and DDP
|
||||
callbacks_backup = callbacks.default_callbacks.copy() # backup callbacks as check_amp() resets them
|
||||
self.amp = torch.tensor(check_amp(self.model), device=self.device)
|
||||
callbacks.default_callbacks = callbacks_backup # restore callbacks
|
||||
if RANK > -1: # DDP
|
||||
dist.broadcast(self.amp, src=0) # broadcast the tensor from rank 0 to all other ranks (returns None)
|
||||
self.amp = bool(self.amp) # as boolean
|
||||
self.scaler = amp.GradScaler(enabled=self.amp)
|
||||
if world_size > 1:
|
||||
self.model = DDP(self.model, device_ids=[rank])
|
||||
@ -239,7 +244,7 @@ class BaseTrainer:
|
||||
# dataloaders
|
||||
batch_size = self.batch_size // world_size if world_size > 1 else self.batch_size
|
||||
self.train_loader = self.get_dataloader(self.trainset, batch_size=batch_size, rank=rank, mode='train')
|
||||
if rank in {0, -1}:
|
||||
if rank in (-1, 0):
|
||||
self.test_loader = self.get_dataloader(self.testset, batch_size=batch_size * 2, rank=-1, mode='val')
|
||||
self.validator = self.get_validator()
|
||||
metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix='val')
|
||||
@ -286,7 +291,7 @@ class BaseTrainer:
|
||||
if hasattr(self.train_loader.dataset, 'close_mosaic'):
|
||||
self.train_loader.dataset.close_mosaic(hyp=self.args)
|
||||
|
||||
if rank in {-1, 0}:
|
||||
if rank in (-1, 0):
|
||||
LOGGER.info(self.progress_string())
|
||||
pbar = tqdm(enumerate(self.train_loader), total=nb, bar_format=TQDM_BAR_FORMAT)
|
||||
self.tloss = None
|
||||
@ -327,7 +332,7 @@ class BaseTrainer:
|
||||
mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB)
|
||||
loss_len = self.tloss.shape[0] if len(self.tloss.size()) else 1
|
||||
losses = self.tloss if loss_len > 1 else torch.unsqueeze(self.tloss, 0)
|
||||
if rank in {-1, 0}:
|
||||
if rank in (-1, 0):
|
||||
pbar.set_description(
|
||||
('%11s' * 2 + '%11.4g' * (2 + loss_len)) %
|
||||
(f'{epoch + 1}/{self.epochs}', mem, *losses, batch['cls'].shape[0], batch['img'].shape[-1]))
|
||||
@ -342,7 +347,7 @@ class BaseTrainer:
|
||||
self.scheduler.step()
|
||||
self.run_callbacks('on_train_epoch_end')
|
||||
|
||||
if rank in {-1, 0}:
|
||||
if rank in (-1, 0):
|
||||
|
||||
# Validation
|
||||
self.ema.update_attr(self.model, include=['yaml', 'nc', 'args', 'names', 'stride', 'class_weights'])
|
||||
@ -372,7 +377,7 @@ class BaseTrainer:
|
||||
if self.stop:
|
||||
break # must break all DDP ranks
|
||||
|
||||
if rank in {-1, 0}:
|
||||
if rank in (-1, 0):
|
||||
# Do final val with best.pt
|
||||
LOGGER.info(f'\n{epoch - self.start_epoch + 1} epochs completed in '
|
||||
f'{(time.time() - self.train_time_start) / 3600:.3f} hours.')
|
||||
@ -603,7 +608,20 @@ class BaseTrainer:
|
||||
|
||||
|
||||
def check_amp(model):
|
||||
# Check PyTorch Automatic Mixed Precision (AMP) functionality. Return True on correct operation
|
||||
"""
|
||||
This function checks the PyTorch Automatic Mixed Precision (AMP) functionality of a YOLOv8 model.
|
||||
If the checks fail, it means there are anomalies with AMP on the system that may cause NaN losses or zero-mAP
|
||||
results, so AMP will be disabled during training.
|
||||
|
||||
Args:
|
||||
model (nn.Module): A YOLOv8 model instance.
|
||||
|
||||
Returns:
|
||||
bool: Returns True if the AMP functionality works correctly with YOLOv8 model, else False.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the AMP checks fail, indicating anomalies with the AMP functionality on the system.
|
||||
"""
|
||||
device = next(model.parameters()).device # get model device
|
||||
if device.type in ('cpu', 'mps'):
|
||||
return False # AMP only used on CUDA devices
|
||||
@ -613,18 +631,21 @@ def check_amp(model):
|
||||
a = m(im, device=device, verbose=False)[0].boxes.boxes # FP32 inference
|
||||
with torch.cuda.amp.autocast(True):
|
||||
b = m(im, device=device, verbose=False)[0].boxes.boxes # AMP inference
|
||||
return a.shape == b.shape and torch.allclose(a, b.float(), rtol=0.1) # close to 10% absolute tolerance
|
||||
del m
|
||||
return a.shape == b.shape and torch.allclose(a, b.float(), atol=0.5) # close to 0.5 absolute tolerance
|
||||
|
||||
f = ROOT / 'assets/bus.jpg' # image to check
|
||||
im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if ONLINE else np.ones((640, 640, 3))
|
||||
prefix = colorstr('AMP: ')
|
||||
LOGGER.info(f'{prefix}running Automatic Mixed Precision (AMP) checks with YOLOv8n...')
|
||||
try:
|
||||
from ultralytics import YOLO
|
||||
LOGGER.info(f'{prefix}running Automatic Mixed Precision (AMP) checks with YOLOv8n...')
|
||||
assert amp_allclose(YOLO('yolov8n.pt'), im)
|
||||
LOGGER.info(f'{prefix}checks passed ✅')
|
||||
return True
|
||||
except ConnectionError:
|
||||
LOGGER.warning(f"{prefix}checks skipped ⚠️, offline and unable to download YOLOv8n. Setting 'amp=True'.")
|
||||
except AssertionError:
|
||||
LOGGER.warning(f'{prefix}checks failed ❌. Anomalies were detected with AMP on your system that may lead to '
|
||||
f'NaN losses or zero-mAP results, so AMP will be disabled during training.')
|
||||
return False
|
||||
return True
|
||||
|
@ -79,7 +79,7 @@ class BaseValidator:
|
||||
project = self.args.project or Path(SETTINGS['runs_dir']) / self.args.task
|
||||
name = self.args.name or f'{self.args.mode}'
|
||||
self.save_dir = save_dir or increment_path(Path(project) / name,
|
||||
exist_ok=self.args.exist_ok if RANK in {-1, 0} else True)
|
||||
exist_ok=self.args.exist_ok if RANK in (-1, 0) else True)
|
||||
(self.save_dir / 'labels' if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if self.args.conf is None:
|
||||
|
@ -126,7 +126,7 @@ class IterableSimpleNamespace(SimpleNamespace):
|
||||
def set_logging(name=LOGGING_NAME, verbose=True):
|
||||
# sets up logging for the given name
|
||||
rank = int(os.getenv('RANK', -1)) # rank in world for Multi-GPU trainings
|
||||
level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR
|
||||
level = logging.INFO if verbose and rank in (-1, 0) else logging.ERROR
|
||||
logging.config.dictConfig({
|
||||
'version': 1,
|
||||
'disable_existing_loggers': False,
|
||||
@ -524,7 +524,7 @@ def set_sentry():
|
||||
return event
|
||||
|
||||
if SETTINGS['sync'] and \
|
||||
RANK in {-1, 0} and \
|
||||
RANK in (-1, 0) and \
|
||||
Path(sys.argv[0]).name == 'yolo' and \
|
||||
not TESTS_RUNNING and \
|
||||
ONLINE and \
|
||||
|
@ -28,7 +28,7 @@ from pathlib import Path
|
||||
|
||||
from ultralytics import YOLO
|
||||
from ultralytics.yolo.engine.exporter import export_formats
|
||||
from ultralytics.yolo.utils import LINUX, LOGGER, ROOT, SETTINGS
|
||||
from ultralytics.yolo.utils import LINUX, LOGGER, MACOS, ROOT, SETTINGS
|
||||
from ultralytics.yolo.utils.checks import check_yolo
|
||||
from ultralytics.yolo.utils.downloads import download
|
||||
from ultralytics.yolo.utils.files import file_size
|
||||
@ -51,6 +51,8 @@ def benchmark(model=Path(SETTINGS['weights_dir']) / 'yolov8n.pt', imgsz=160, hal
|
||||
if model.task == 'classify':
|
||||
assert i != 11, 'paddle cls exports coming soon'
|
||||
assert i != 9 or LINUX, 'Edge TPU export only supported on Linux'
|
||||
if i == 10:
|
||||
assert MACOS or LINUX, 'TF.js export only supported on macOS and Linux'
|
||||
if 'cpu' in device.type:
|
||||
assert cpu, 'inference not supported on CPU'
|
||||
if 'cuda' in device.type:
|
||||
|
@ -118,7 +118,7 @@ def safe_download(url,
|
||||
raise ConnectionError(f'❌ Download failure for {url}. Retry limit reached.') from e
|
||||
LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...')
|
||||
|
||||
if unzip and f.exists() and f.suffix in {'.zip', '.tar', '.gz'}:
|
||||
if unzip and f.exists() and f.suffix in ('.zip', '.tar', '.gz'):
|
||||
unzip_dir = dir or f.parent # unzip to dir if provided else unzip in place
|
||||
LOGGER.info(f'Unzipping {f} to {unzip_dir}...')
|
||||
if f.suffix == '.zip':
|
||||
|
@ -33,7 +33,7 @@ TORCH_1_12 = check_version(torch.__version__, '1.12.0')
|
||||
def torch_distributed_zero_first(local_rank: int):
|
||||
# Decorator to make all processes in distributed training wait for each local_master to do something
|
||||
initialized = torch.distributed.is_available() and torch.distributed.is_initialized()
|
||||
if initialized and local_rank not in {-1, 0}:
|
||||
if initialized and local_rank not in (-1, 0):
|
||||
dist.barrier(device_ids=[local_rank])
|
||||
yield
|
||||
if initialized and local_rank == 0:
|
||||
|
@ -43,6 +43,8 @@ class ClassificationValidator(BaseValidator):
|
||||
return build_classification_dataloader(path=dataset_path,
|
||||
imgsz=self.args.imgsz,
|
||||
batch_size=batch_size,
|
||||
augment=False,
|
||||
shuffle=False,
|
||||
workers=self.args.workers)
|
||||
|
||||
def print_results(self):
|
||||
|
@ -30,8 +30,8 @@ class DetectionPredictor(BasePredictor):
|
||||
results = []
|
||||
for i, pred in enumerate(preds):
|
||||
orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs
|
||||
shape = orig_img.shape
|
||||
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round()
|
||||
if not isinstance(orig_imgs, torch.Tensor):
|
||||
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
|
||||
path, _, _, _, _ = self.batch
|
||||
img_path = path[i] if isinstance(path, list) else path
|
||||
results.append(Results(orig_img=orig_img, path=img_path, names=self.model.names, boxes=pred))
|
||||
|
@ -23,18 +23,19 @@ class SegmentationPredictor(DetectionPredictor):
|
||||
proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
|
||||
for i, pred in enumerate(p):
|
||||
orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs
|
||||
shape = orig_img.shape
|
||||
path, _, _, _, _ = self.batch
|
||||
img_path = path[i] if isinstance(path, list) else path
|
||||
if not len(pred): # save empty boxes
|
||||
results.append(Results(orig_img=orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6]))
|
||||
continue
|
||||
if self.args.retina_masks:
|
||||
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round()
|
||||
masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], shape[:2]) # HWC
|
||||
if not isinstance(orig_imgs, torch.Tensor):
|
||||
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
|
||||
masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC
|
||||
else:
|
||||
masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC
|
||||
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round()
|
||||
if not isinstance(orig_imgs, torch.Tensor):
|
||||
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
|
||||
results.append(
|
||||
Results(orig_img=orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks))
|
||||
return results
|
||||
|
Reference in New Issue
Block a user