Fix some cuda training issues of segmentation (#46)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Laughing
2022-11-17 06:44:02 -06:00
committed by GitHub
parent db1031a1a9
commit 47f1cb3ef4
5 changed files with 38 additions and 21 deletions

View File

@ -142,7 +142,7 @@ class BaseTrainer:
self.train_loader = self.get_dataloader(self.trainset, batch_size=self.args.batch_size, rank=rank)
if rank in {0, -1}:
print(" Creating testloader rank :", rank)
self.test_loader = self.get_dataloader(self.testset, batch_size=self.args.batch_size * 2, rank=rank)
self.test_loader = self.get_dataloader(self.testset, batch_size=self.args.batch_size * 2, rank=-1)
self.validator = self.get_validator()
print("created testloader :", rank)
self.console.info(self.progress_string())
@ -150,6 +150,8 @@ class BaseTrainer:
def _do_train(self, rank, world_size):
if world_size > 1:
self._setup_ddp(rank, world_size)
else:
self.model = self.model.to(self.device)
# callback hook. before_train
self._setup_train(rank)
@ -192,8 +194,8 @@ class BaseTrainer:
losses = tloss if loss_len > 1 else torch.unsqueeze(tloss, 0)
if rank in {-1, 0}:
pbar.set_description(
(" {} " + "{:.3f} " * (2 + loss_len)).format(f'{epoch + 1}/{self.args.epochs}', mem, *losses,
batch["img"].shape[-1]))
(" {} " + "{:.3f} " * (1 + loss_len) + ' {} ').format(f'{epoch + 1}/{self.args.epochs}', mem,
*losses, batch["img"].shape[-1]))
if rank in [-1, 0]:
# validation
@ -286,7 +288,8 @@ class BaseTrainer:
"fitness" metric.
"""
self.metrics = self.validator(self)
self.fitness = self.metrics.get("fitness") or (-self.loss) # use loss as fitness measure if not found
self.fitness = self.metrics.get("fitness",
-self.loss.detach().cpu().numpy()) # use loss as fitness measure if not found
if not self.best_fitness or self.best_fitness < self.fitness:
self.best_fitness = self.fitness

View File

@ -6,7 +6,7 @@ from tqdm import tqdm
from ultralytics.yolo.engine.trainer import DEFAULT_CONFIG
from ultralytics.yolo.utils.ops import Profile
from ultralytics.yolo.utils.torch_utils import select_device
from ultralytics.yolo.utils.torch_utils import de_parallel, select_device
class BaseValidator:
@ -36,7 +36,9 @@ class BaseValidator:
if training:
model = trainer.model
self.args.half &= self.device.type != 'cpu'
model = model.half() if self.args.half else model
# NOTE: half() inference in evaluation will make training stuck,
# so I comment it out for now, I think we can reuse half mode after we add EMA.
# model = model.half() if self.args.half else model
else: # TODO: handle this when detectMultiBackend is supported
# model = DetectMultiBacked(model)
pass
@ -48,8 +50,8 @@ class BaseValidator:
n_batches = len(self.dataloader)
desc = self.get_desc()
bar = tqdm(self.dataloader, desc, n_batches, not training, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')
self.init_metrics(model)
with torch.cuda.amp.autocast(enabled=self.device.type != 'cpu'):
self.init_metrics(de_parallel(model))
with torch.no_grad():
for batch_i, batch in enumerate(bar):
self.batch_i = batch_i
# pre-process
@ -58,7 +60,7 @@ class BaseValidator:
# inference
with dt[1]:
preds = model(batch["img"])
preds = model(batch["img"].float())
# TODO: remember to add native augmentation support when implementing model, like:
# preds, train_out = model(im, augment=augment)
@ -85,6 +87,8 @@ class BaseValidator:
self.logger.info(
'Speed: %.1fms pre-process, %.1fms inference, %.1fms loss, %.1fms post-process per image at shape ' % t)
if self.training:
model.float()
# TODO: implement save json
return stats