|
|
@ -136,20 +136,20 @@ class BaseTrainer:
|
|
|
|
if RANK in {0, -1}:
|
|
|
|
if RANK in {0, -1}:
|
|
|
|
callbacks.add_integration_callbacks(self)
|
|
|
|
callbacks.add_integration_callbacks(self)
|
|
|
|
|
|
|
|
|
|
|
|
def add_callback(self, onevent: str, callback):
|
|
|
|
def add_callback(self, event: str, callback):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
appends the given callback
|
|
|
|
appends the given callback
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
self.callbacks[onevent].append(callback)
|
|
|
|
self.callbacks[event].append(callback)
|
|
|
|
|
|
|
|
|
|
|
|
def set_callback(self, onevent: str, callback):
|
|
|
|
def set_callback(self, event: str, callback):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
overrides the existing callbacks with the given callback
|
|
|
|
overrides the existing callbacks with the given callback
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
self.callbacks[onevent] = [callback]
|
|
|
|
self.callbacks[event] = [callback]
|
|
|
|
|
|
|
|
|
|
|
|
def trigger_callbacks(self, onevent: str):
|
|
|
|
def run_callbacks(self, event: str):
|
|
|
|
for callback in self.callbacks.get(onevent, []):
|
|
|
|
for callback in self.callbacks.get(event, []):
|
|
|
|
callback(self)
|
|
|
|
callback(self)
|
|
|
|
|
|
|
|
|
|
|
|
def train(self):
|
|
|
|
def train(self):
|
|
|
@ -178,7 +178,7 @@ class BaseTrainer:
|
|
|
|
Builds dataloaders and optimizer on correct rank process
|
|
|
|
Builds dataloaders and optimizer on correct rank process
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
# model
|
|
|
|
# model
|
|
|
|
self.trigger_callbacks("on_pretrain_routine_start")
|
|
|
|
self.run_callbacks("on_pretrain_routine_start")
|
|
|
|
ckpt = self.setup_model()
|
|
|
|
ckpt = self.setup_model()
|
|
|
|
self.model = self.model.to(self.device)
|
|
|
|
self.model = self.model.to(self.device)
|
|
|
|
self.set_model_attributes()
|
|
|
|
self.set_model_attributes()
|
|
|
@ -210,7 +210,7 @@ class BaseTrainer:
|
|
|
|
metric_keys = self.validator.metric_keys + self.label_loss_items(prefix="val")
|
|
|
|
metric_keys = self.validator.metric_keys + self.label_loss_items(prefix="val")
|
|
|
|
self.metrics = dict(zip(metric_keys, [0] * len(metric_keys))) # TODO: init metrics for plot_results()?
|
|
|
|
self.metrics = dict(zip(metric_keys, [0] * len(metric_keys))) # TODO: init metrics for plot_results()?
|
|
|
|
self.ema = ModelEMA(self.model)
|
|
|
|
self.ema = ModelEMA(self.model)
|
|
|
|
self.trigger_callbacks("on_pretrain_routine_end")
|
|
|
|
self.run_callbacks("on_pretrain_routine_end")
|
|
|
|
|
|
|
|
|
|
|
|
def _do_train(self, rank=-1, world_size=1):
|
|
|
|
def _do_train(self, rank=-1, world_size=1):
|
|
|
|
if world_size > 1:
|
|
|
|
if world_size > 1:
|
|
|
@ -224,14 +224,14 @@ class BaseTrainer:
|
|
|
|
nb = len(self.train_loader) # number of batches
|
|
|
|
nb = len(self.train_loader) # number of batches
|
|
|
|
nw = max(round(self.args.warmup_epochs * nb), 100) # number of warmup iterations
|
|
|
|
nw = max(round(self.args.warmup_epochs * nb), 100) # number of warmup iterations
|
|
|
|
last_opt_step = -1
|
|
|
|
last_opt_step = -1
|
|
|
|
self.trigger_callbacks("on_train_start")
|
|
|
|
self.run_callbacks("on_train_start")
|
|
|
|
self.log(f"Image sizes {self.args.imgsz} train, {self.args.imgsz} val\n"
|
|
|
|
self.log(f"Image sizes {self.args.imgsz} train, {self.args.imgsz} val\n"
|
|
|
|
f'Using {self.train_loader.num_workers * (world_size or 1)} dataloader workers\n'
|
|
|
|
f'Using {self.train_loader.num_workers * (world_size or 1)} dataloader workers\n'
|
|
|
|
f"Logging results to {colorstr('bold', self.save_dir)}\n"
|
|
|
|
f"Logging results to {colorstr('bold', self.save_dir)}\n"
|
|
|
|
f"Starting training for {self.epochs} epochs...")
|
|
|
|
f"Starting training for {self.epochs} epochs...")
|
|
|
|
for epoch in range(self.start_epoch, self.epochs):
|
|
|
|
for epoch in range(self.start_epoch, self.epochs):
|
|
|
|
self.epoch = epoch
|
|
|
|
self.epoch = epoch
|
|
|
|
self.trigger_callbacks("on_train_epoch_start")
|
|
|
|
self.run_callbacks("on_train_epoch_start")
|
|
|
|
self.model.train()
|
|
|
|
self.model.train()
|
|
|
|
if rank != -1:
|
|
|
|
if rank != -1:
|
|
|
|
self.train_loader.sampler.set_epoch(epoch)
|
|
|
|
self.train_loader.sampler.set_epoch(epoch)
|
|
|
@ -242,7 +242,7 @@ class BaseTrainer:
|
|
|
|
self.tloss = None
|
|
|
|
self.tloss = None
|
|
|
|
self.optimizer.zero_grad()
|
|
|
|
self.optimizer.zero_grad()
|
|
|
|
for i, batch in pbar:
|
|
|
|
for i, batch in pbar:
|
|
|
|
self.trigger_callbacks("on_train_batch_start")
|
|
|
|
self.run_callbacks("on_train_batch_start")
|
|
|
|
|
|
|
|
|
|
|
|
# Update dataloader attributes (optional)
|
|
|
|
# Update dataloader attributes (optional)
|
|
|
|
if epoch == (self.epochs - self.args.close_mosaic) and hasattr(self.train_loader.dataset, 'mosaic'):
|
|
|
|
if epoch == (self.epochs - self.args.close_mosaic) and hasattr(self.train_loader.dataset, 'mosaic'):
|
|
|
@ -287,35 +287,34 @@ class BaseTrainer:
|
|
|
|
pbar.set_description(
|
|
|
|
pbar.set_description(
|
|
|
|
('%11s' * 2 + '%11.4g' * (2 + loss_len)) %
|
|
|
|
('%11s' * 2 + '%11.4g' * (2 + loss_len)) %
|
|
|
|
(f'{epoch + 1}/{self.epochs}', mem, *losses, batch["cls"].shape[0], batch["img"].shape[-1]))
|
|
|
|
(f'{epoch + 1}/{self.epochs}', mem, *losses, batch["cls"].shape[0], batch["img"].shape[-1]))
|
|
|
|
self.trigger_callbacks('on_batch_end')
|
|
|
|
self.run_callbacks('on_batch_end')
|
|
|
|
if self.args.plots and ni < 3:
|
|
|
|
if self.args.plots and ni < 3:
|
|
|
|
self.plot_training_samples(batch, ni)
|
|
|
|
self.plot_training_samples(batch, ni)
|
|
|
|
|
|
|
|
|
|
|
|
self.trigger_callbacks("on_train_batch_end")
|
|
|
|
self.run_callbacks("on_train_batch_end")
|
|
|
|
|
|
|
|
|
|
|
|
lr = {f"lr{ir}": x['lr'] for ir, x in enumerate(self.optimizer.param_groups)} # for loggers
|
|
|
|
lr = {f"lr{ir}": x['lr'] for ir, x in enumerate(self.optimizer.param_groups)} # for loggers
|
|
|
|
self.scheduler.step()
|
|
|
|
self.scheduler.step()
|
|
|
|
self.trigger_callbacks("on_train_epoch_end")
|
|
|
|
self.run_callbacks("on_train_epoch_end")
|
|
|
|
|
|
|
|
|
|
|
|
if rank in {-1, 0}:
|
|
|
|
if rank in {-1, 0}:
|
|
|
|
|
|
|
|
|
|
|
|
# Validation
|
|
|
|
# Validation
|
|
|
|
self.trigger_callbacks('on_val_start')
|
|
|
|
|
|
|
|
self.ema.update_attr(self.model, include=['yaml', 'nc', 'args', 'names', 'stride', 'class_weights'])
|
|
|
|
self.ema.update_attr(self.model, include=['yaml', 'nc', 'args', 'names', 'stride', 'class_weights'])
|
|
|
|
final_epoch = (epoch + 1 == self.epochs)
|
|
|
|
final_epoch = (epoch + 1 == self.epochs)
|
|
|
|
if self.args.val or final_epoch:
|
|
|
|
if self.args.val or final_epoch:
|
|
|
|
self.metrics, self.fitness = self.validate()
|
|
|
|
self.metrics, self.fitness = self.validate()
|
|
|
|
self.trigger_callbacks('on_val_end')
|
|
|
|
|
|
|
|
self.save_metrics(metrics={**self.label_loss_items(self.tloss), **self.metrics, **lr})
|
|
|
|
self.save_metrics(metrics={**self.label_loss_items(self.tloss), **self.metrics, **lr})
|
|
|
|
|
|
|
|
|
|
|
|
# Save model
|
|
|
|
# Save model
|
|
|
|
if self.args.save or (epoch + 1 == self.epochs):
|
|
|
|
if self.args.save or (epoch + 1 == self.epochs):
|
|
|
|
self.save_model()
|
|
|
|
self.save_model()
|
|
|
|
self.trigger_callbacks('on_model_save')
|
|
|
|
self.run_callbacks('on_model_save')
|
|
|
|
|
|
|
|
|
|
|
|
tnow = time.time()
|
|
|
|
tnow = time.time()
|
|
|
|
self.epoch_time = tnow - self.epoch_time_start
|
|
|
|
self.epoch_time = tnow - self.epoch_time_start
|
|
|
|
self.epoch_time_start = tnow
|
|
|
|
self.epoch_time_start = tnow
|
|
|
|
|
|
|
|
self.run_callbacks("on_fit_epoch_end")
|
|
|
|
# TODO: termination condition
|
|
|
|
# TODO: termination condition
|
|
|
|
|
|
|
|
|
|
|
|
if rank in {-1, 0}:
|
|
|
|
if rank in {-1, 0}:
|
|
|
@ -326,9 +325,9 @@ class BaseTrainer:
|
|
|
|
if self.args.plots:
|
|
|
|
if self.args.plots:
|
|
|
|
self.plot_metrics()
|
|
|
|
self.plot_metrics()
|
|
|
|
self.log(f"Results saved to {colorstr('bold', self.save_dir)}")
|
|
|
|
self.log(f"Results saved to {colorstr('bold', self.save_dir)}")
|
|
|
|
self.trigger_callbacks('on_train_end')
|
|
|
|
self.run_callbacks('on_train_end')
|
|
|
|
torch.cuda.empty_cache()
|
|
|
|
torch.cuda.empty_cache()
|
|
|
|
self.trigger_callbacks('teardown')
|
|
|
|
self.run_callbacks('teardown')
|
|
|
|
|
|
|
|
|
|
|
|
def save_model(self):
|
|
|
|
def save_model(self):
|
|
|
|
ckpt = {
|
|
|
|
ckpt = {
|
|
|
@ -470,7 +469,7 @@ class BaseTrainer:
|
|
|
|
self.validator.args.save_json = True
|
|
|
|
self.validator.args.save_json = True
|
|
|
|
self.metrics = self.validator(model=f)
|
|
|
|
self.metrics = self.validator(model=f)
|
|
|
|
self.metrics.pop('fitness', None)
|
|
|
|
self.metrics.pop('fitness', None)
|
|
|
|
self.trigger_callbacks('on_val_end')
|
|
|
|
self.run_callbacks('on_val_end')
|
|
|
|
|
|
|
|
|
|
|
|
def check_resume(self):
|
|
|
|
def check_resume(self):
|
|
|
|
resume = self.args.resume
|
|
|
|
resume = self.args.resume
|
|
|
|