ultralytics 8.0.104
bug fixes and thop
dependency removal (#2665)
Co-authored-by: Kevin Abraham <5976139+abraha2d@users.noreply.github.com> Co-authored-by: Kevin Abraham <abraha2d@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com>
This commit is contained in:
@ -9,7 +9,7 @@ import os
|
||||
import subprocess
|
||||
import time
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
@ -181,8 +181,6 @@ class BaseTrainer:
|
||||
# Command
|
||||
cmd, file = generate_ddp_command(world_size, self)
|
||||
try:
|
||||
LOGGER.info('Pre-caching dataset to avoid NCCL timeout before running DDP command')
|
||||
deepcopy(self)._setup_train(world_size=0)
|
||||
LOGGER.info(f'Running DDP command {cmd}')
|
||||
subprocess.run(cmd, check=True)
|
||||
except Exception as e:
|
||||
@ -197,7 +195,11 @@ class BaseTrainer:
|
||||
torch.cuda.set_device(RANK)
|
||||
self.device = torch.device('cuda', RANK)
|
||||
LOGGER.info(f'DDP settings: RANK {RANK}, WORLD_SIZE {world_size}, DEVICE {self.device}')
|
||||
dist.init_process_group('nccl' if dist.is_nccl_available() else 'gloo', rank=RANK, world_size=world_size)
|
||||
os.environ['NCCL_BLOCKING_WAIT'] = '1' # set to enforce timeout
|
||||
dist.init_process_group('nccl' if dist.is_nccl_available() else 'gloo',
|
||||
timeout=timedelta(seconds=3600),
|
||||
rank=RANK,
|
||||
world_size=world_size)
|
||||
|
||||
def _setup_train(self, world_size):
|
||||
"""
|
||||
|
Reference in New Issue
Block a user