ultralytics 8.0.104 bug fixes and thop dependency removal (#2665)

Co-authored-by: Kevin Abraham <5976139+abraha2d@users.noreply.github.com>
Co-authored-by: Kevin Abraham <abraha2d@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com>
This commit is contained in:
Glenn Jocher
2023-05-17 02:26:01 +02:00
committed by GitHub
parent 7884098857
commit b1119d512e
13 changed files with 132 additions and 27 deletions

View File

@ -9,7 +9,7 @@ import os
import subprocess
import time
from copy import deepcopy
from datetime import datetime
from datetime import datetime, timedelta
from pathlib import Path
import numpy as np
@ -181,8 +181,6 @@ class BaseTrainer:
# Command
cmd, file = generate_ddp_command(world_size, self)
try:
LOGGER.info('Pre-caching dataset to avoid NCCL timeout before running DDP command')
deepcopy(self)._setup_train(world_size=0)
LOGGER.info(f'Running DDP command {cmd}')
subprocess.run(cmd, check=True)
except Exception as e:
@ -197,7 +195,11 @@ class BaseTrainer:
torch.cuda.set_device(RANK)
self.device = torch.device('cuda', RANK)
LOGGER.info(f'DDP settings: RANK {RANK}, WORLD_SIZE {world_size}, DEVICE {self.device}')
dist.init_process_group('nccl' if dist.is_nccl_available() else 'gloo', rank=RANK, world_size=world_size)
os.environ['NCCL_BLOCKING_WAIT'] = '1' # set to enforce timeout
dist.init_process_group('nccl' if dist.is_nccl_available() else 'gloo',
timeout=timedelta(seconds=3600),
rank=RANK,
world_size=world_size)
def _setup_train(self, world_size):
"""