|
|
@ -24,11 +24,11 @@ from tqdm import tqdm
|
|
|
|
import ultralytics.yolo.utils as utils
|
|
|
|
import ultralytics.yolo.utils as utils
|
|
|
|
import ultralytics.yolo.utils.callbacks as callbacks
|
|
|
|
import ultralytics.yolo.utils.callbacks as callbacks
|
|
|
|
from ultralytics.yolo.data.utils import check_dataset, check_dataset_yaml
|
|
|
|
from ultralytics.yolo.data.utils import check_dataset, check_dataset_yaml
|
|
|
|
from ultralytics.yolo.utils import LOGGER, ROOT, TQDM_BAR_FORMAT
|
|
|
|
from ultralytics.yolo.utils import LOGGER, ROOT, TQDM_BAR_FORMAT, colorstr
|
|
|
|
from ultralytics.yolo.utils.checks import print_args
|
|
|
|
from ultralytics.yolo.utils.checks import print_args
|
|
|
|
from ultralytics.yolo.utils.files import increment_path, save_yaml
|
|
|
|
from ultralytics.yolo.utils.files import increment_path, save_yaml
|
|
|
|
from ultralytics.yolo.utils.modeling import get_model
|
|
|
|
from ultralytics.yolo.utils.modeling import get_model
|
|
|
|
from ultralytics.yolo.utils.torch_utils import ModelEMA, de_parallel, init_seeds, one_cycle
|
|
|
|
from ultralytics.yolo.utils.torch_utils import ModelEMA, de_parallel, init_seeds, one_cycle, strip_optimizer
|
|
|
|
|
|
|
|
|
|
|
|
DEFAULT_CONFIG = ROOT / "yolo/utils/configs/default.yaml"
|
|
|
|
DEFAULT_CONFIG = ROOT / "yolo/utils/configs/default.yaml"
|
|
|
|
RANK = int(os.getenv('RANK', -1))
|
|
|
|
RANK = int(os.getenv('RANK', -1))
|
|
|
@ -48,13 +48,15 @@ class BaseTrainer:
|
|
|
|
self.wdir = self.save_dir / 'weights' # weights dir
|
|
|
|
self.wdir = self.save_dir / 'weights' # weights dir
|
|
|
|
self.wdir.mkdir(parents=True, exist_ok=True) # make dir
|
|
|
|
self.wdir.mkdir(parents=True, exist_ok=True) # make dir
|
|
|
|
self.last, self.best = self.wdir / 'last.pt', self.wdir / 'best.pt' # checkpoint paths
|
|
|
|
self.last, self.best = self.wdir / 'last.pt', self.wdir / 'best.pt' # checkpoint paths
|
|
|
|
|
|
|
|
self.batch_size = self.args.batch_size
|
|
|
|
|
|
|
|
self.epochs = self.args.epochs
|
|
|
|
print_args(dict(self.args))
|
|
|
|
print_args(dict(self.args))
|
|
|
|
|
|
|
|
|
|
|
|
# Save run settings
|
|
|
|
# Save run settings
|
|
|
|
save_yaml(self.save_dir / 'args.yaml', OmegaConf.to_container(self.args, resolve=True))
|
|
|
|
save_yaml(self.save_dir / 'args.yaml', OmegaConf.to_container(self.args, resolve=True))
|
|
|
|
|
|
|
|
|
|
|
|
# device
|
|
|
|
# device
|
|
|
|
self.device = utils.torch_utils.select_device(self.args.device, self.args.batch_size)
|
|
|
|
self.device = utils.torch_utils.select_device(self.args.device, self.batch_size)
|
|
|
|
self.scaler = amp.GradScaler(enabled=self.device.type != 'cpu')
|
|
|
|
self.scaler = amp.GradScaler(enabled=self.device.type != 'cpu')
|
|
|
|
|
|
|
|
|
|
|
|
# Model and Dataloaders.
|
|
|
|
# Model and Dataloaders.
|
|
|
@ -73,10 +75,11 @@ class BaseTrainer:
|
|
|
|
self.scheduler = None
|
|
|
|
self.scheduler = None
|
|
|
|
|
|
|
|
|
|
|
|
# epoch level metrics
|
|
|
|
# epoch level metrics
|
|
|
|
self.metrics = {} # handle metrics returned by validator
|
|
|
|
|
|
|
|
self.best_fitness = None
|
|
|
|
self.best_fitness = None
|
|
|
|
self.fitness = None
|
|
|
|
self.fitness = None
|
|
|
|
self.loss = None
|
|
|
|
self.loss = None
|
|
|
|
|
|
|
|
self.tloss = None
|
|
|
|
|
|
|
|
self.csv = self.save_dir / 'results.csv'
|
|
|
|
|
|
|
|
|
|
|
|
for callback, func in callbacks.default_callbacks.items():
|
|
|
|
for callback, func in callbacks.default_callbacks.items():
|
|
|
|
self.add_callback(callback, func)
|
|
|
|
self.add_callback(callback, func)
|
|
|
@ -122,6 +125,7 @@ class BaseTrainer:
|
|
|
|
if world_size > 1:
|
|
|
|
if world_size > 1:
|
|
|
|
mp.spawn(self._do_train, args=(world_size,), nprocs=world_size, join=True)
|
|
|
|
mp.spawn(self._do_train, args=(world_size,), nprocs=world_size, join=True)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
|
|
|
|
# self._do_train(int(os.getenv("RANK", -1)), world_size)
|
|
|
|
self._do_train()
|
|
|
|
self._do_train()
|
|
|
|
|
|
|
|
|
|
|
|
def _setup_ddp(self, rank, world_size):
|
|
|
|
def _setup_ddp(self, rank, world_size):
|
|
|
@ -129,21 +133,20 @@ class BaseTrainer:
|
|
|
|
os.environ['MASTER_PORT'] = '9020'
|
|
|
|
os.environ['MASTER_PORT'] = '9020'
|
|
|
|
torch.cuda.set_device(rank)
|
|
|
|
torch.cuda.set_device(rank)
|
|
|
|
self.device = torch.device('cuda', rank)
|
|
|
|
self.device = torch.device('cuda', rank)
|
|
|
|
print(f"RANK - WORLD_SIZE - DEVICE: {rank} - {world_size} - {self.device} ")
|
|
|
|
self.console.info(f"RANK - WORLD_SIZE - DEVICE: {rank} - {world_size} - {self.device} ")
|
|
|
|
|
|
|
|
|
|
|
|
dist.init_process_group("nccl" if dist.is_nccl_available() else "gloo", rank=rank, world_size=world_size)
|
|
|
|
dist.init_process_group("nccl" if dist.is_nccl_available() else "gloo", rank=rank, world_size=world_size)
|
|
|
|
self.model = self.model.to(self.device)
|
|
|
|
self.model = self.model.to(self.device)
|
|
|
|
self.model = DDP(self.model, device_ids=[rank])
|
|
|
|
self.model = DDP(self.model, device_ids=[rank])
|
|
|
|
self.args.batch_size = self.args.batch_size // world_size
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _setup_train(self, rank):
|
|
|
|
def _setup_train(self, rank, world_size):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Builds dataloaders and optimizer on correct rank process
|
|
|
|
Builds dataloaders and optimizer on correct rank process
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
# Optimizer
|
|
|
|
# Optimizer
|
|
|
|
self.set_model_attributes()
|
|
|
|
self.set_model_attributes()
|
|
|
|
accumulate = max(round(self.args.nbs / self.args.batch_size), 1) # accumulate loss before optimizing
|
|
|
|
self.accumulate = max(round(self.args.nbs / self.batch_size), 1) # accumulate loss before optimizing
|
|
|
|
self.args.weight_decay *= self.args.batch_size * accumulate / self.args.nbs # scale weight_decay
|
|
|
|
self.args.weight_decay *= self.batch_size * self.accumulate / self.args.nbs # scale weight_decay
|
|
|
|
self.optimizer = build_optimizer(model=self.model,
|
|
|
|
self.optimizer = build_optimizer(model=self.model,
|
|
|
|
name=self.args.optimizer,
|
|
|
|
name=self.args.optimizer,
|
|
|
|
lr=self.args.lr0,
|
|
|
|
lr=self.args.lr0,
|
|
|
@ -151,18 +154,21 @@ class BaseTrainer:
|
|
|
|
decay=self.args.weight_decay)
|
|
|
|
decay=self.args.weight_decay)
|
|
|
|
# Scheduler
|
|
|
|
# Scheduler
|
|
|
|
if self.args.cos_lr:
|
|
|
|
if self.args.cos_lr:
|
|
|
|
self.lf = one_cycle(1, self.args.lrf, self.args.epochs) # cosine 1->hyp['lrf']
|
|
|
|
self.lf = one_cycle(1, self.args.lrf, self.epochs) # cosine 1->hyp['lrf']
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
self.lf = lambda x: (1 - x / self.args.epochs) * (1.0 - self.args.lrf + self.args.lrf) # linear
|
|
|
|
self.lf = lambda x: (1 - x / self.epochs) * (1.0 - self.args.lrf) + self.args.lrf # linear
|
|
|
|
self.scheduler = lr_scheduler.LambdaLR(self.optimizer, lr_lambda=self.lf)
|
|
|
|
self.scheduler = lr_scheduler.LambdaLR(self.optimizer, lr_lambda=self.lf)
|
|
|
|
|
|
|
|
|
|
|
|
# dataloaders
|
|
|
|
# dataloaders
|
|
|
|
self.train_loader = self.get_dataloader(self.trainset, batch_size=self.args.batch_size, rank=rank)
|
|
|
|
batch_size = self.batch_size // world_size
|
|
|
|
|
|
|
|
self.train_loader = self.get_dataloader(self.trainset, batch_size=batch_size, rank=rank, mode="train")
|
|
|
|
if rank in {0, -1}:
|
|
|
|
if rank in {0, -1}:
|
|
|
|
print(" Creating testloader rank :", rank)
|
|
|
|
self.test_loader = self.get_dataloader(self.testset, batch_size=batch_size * 2, rank=-1, mode="val")
|
|
|
|
self.test_loader = self.get_dataloader(self.testset, batch_size=self.args.batch_size * 2, rank=-1)
|
|
|
|
validator = self.get_validator()
|
|
|
|
self.validator = self.get_validator()
|
|
|
|
# init metric, for plot_results
|
|
|
|
print("created testloader :", rank)
|
|
|
|
metric_keys = validator.metric_keys + self.label_loss_items(prefix="val")
|
|
|
|
|
|
|
|
self.metrics = dict(zip(metric_keys, [0] * len(metric_keys)))
|
|
|
|
|
|
|
|
self.validator = validator
|
|
|
|
self.ema = ModelEMA(self.model)
|
|
|
|
self.ema = ModelEMA(self.model)
|
|
|
|
|
|
|
|
|
|
|
|
def _do_train(self, rank=-1, world_size=1):
|
|
|
|
def _do_train(self, rank=-1, world_size=1):
|
|
|
@ -172,7 +178,7 @@ class BaseTrainer:
|
|
|
|
self.model = self.model.to(self.device)
|
|
|
|
self.model = self.model.to(self.device)
|
|
|
|
|
|
|
|
|
|
|
|
self.trigger_callbacks("before_train")
|
|
|
|
self.trigger_callbacks("before_train")
|
|
|
|
self._setup_train(rank)
|
|
|
|
self._setup_train(rank, world_size)
|
|
|
|
|
|
|
|
|
|
|
|
self.epoch = 0
|
|
|
|
self.epoch = 0
|
|
|
|
self.epoch_time = None
|
|
|
|
self.epoch_time = None
|
|
|
@ -181,13 +187,17 @@ class BaseTrainer:
|
|
|
|
nb = len(self.train_loader) # number of batches
|
|
|
|
nb = len(self.train_loader) # number of batches
|
|
|
|
nw = max(round(self.args.warmup_epochs * nb), 100) # number of warmup iterations
|
|
|
|
nw = max(round(self.args.warmup_epochs * nb), 100) # number of warmup iterations
|
|
|
|
last_opt_step = -1
|
|
|
|
last_opt_step = -1
|
|
|
|
for epoch in range(self.args.epochs):
|
|
|
|
for epoch in range(self.epochs):
|
|
|
|
self.trigger_callbacks("on_epoch_start")
|
|
|
|
self.trigger_callbacks("on_epoch_start")
|
|
|
|
self.model.train()
|
|
|
|
self.model.train()
|
|
|
|
|
|
|
|
if rank != -1:
|
|
|
|
|
|
|
|
self.train_loader.sampler.set_epoch(epoch)
|
|
|
|
pbar = enumerate(self.train_loader)
|
|
|
|
pbar = enumerate(self.train_loader)
|
|
|
|
if rank in {-1, 0}:
|
|
|
|
if rank in {-1, 0}:
|
|
|
|
|
|
|
|
self.console.info(self.progress_string())
|
|
|
|
pbar = tqdm(enumerate(self.train_loader), total=len(self.train_loader), bar_format=TQDM_BAR_FORMAT)
|
|
|
|
pbar = tqdm(enumerate(self.train_loader), total=len(self.train_loader), bar_format=TQDM_BAR_FORMAT)
|
|
|
|
self.tloss = None
|
|
|
|
self.tloss = None
|
|
|
|
|
|
|
|
self.optimizer.zero_grad()
|
|
|
|
for i, batch in pbar:
|
|
|
|
for i, batch in pbar:
|
|
|
|
self.trigger_callbacks("on_batch_start")
|
|
|
|
self.trigger_callbacks("on_batch_start")
|
|
|
|
# forward
|
|
|
|
# forward
|
|
|
@ -197,7 +207,7 @@ class BaseTrainer:
|
|
|
|
ni = i + nb * epoch
|
|
|
|
ni = i + nb * epoch
|
|
|
|
if ni <= nw:
|
|
|
|
if ni <= nw:
|
|
|
|
xi = [0, nw] # x interp
|
|
|
|
xi = [0, nw] # x interp
|
|
|
|
accumulate = max(1, np.interp(ni, xi, [1, self.args.nbs / self.args.batch_size]).round())
|
|
|
|
self.accumulate = max(1, np.interp(ni, xi, [1, self.args.nbs / self.batch_size]).round())
|
|
|
|
for j, x in enumerate(self.optimizer.param_groups):
|
|
|
|
for j, x in enumerate(self.optimizer.param_groups):
|
|
|
|
# bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
|
|
|
|
# bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
|
|
|
|
x['lr'] = np.interp(
|
|
|
|
x['lr'] = np.interp(
|
|
|
@ -207,37 +217,47 @@ class BaseTrainer:
|
|
|
|
|
|
|
|
|
|
|
|
preds = self.model(batch["img"])
|
|
|
|
preds = self.model(batch["img"])
|
|
|
|
self.loss, self.loss_items = self.criterion(preds, batch)
|
|
|
|
self.loss, self.loss_items = self.criterion(preds, batch)
|
|
|
|
|
|
|
|
if rank != -1:
|
|
|
|
|
|
|
|
self.loss *= world_size
|
|
|
|
self.tloss = (self.tloss * i + self.loss_items) / (i + 1) if self.tloss is not None \
|
|
|
|
self.tloss = (self.tloss * i + self.loss_items) / (i + 1) if self.tloss is not None \
|
|
|
|
else self.loss_items
|
|
|
|
else self.loss_items
|
|
|
|
|
|
|
|
|
|
|
|
# backward
|
|
|
|
# backward
|
|
|
|
self.model.zero_grad(set_to_none=True)
|
|
|
|
|
|
|
|
self.scaler.scale(self.loss).backward()
|
|
|
|
self.scaler.scale(self.loss).backward()
|
|
|
|
|
|
|
|
|
|
|
|
# optimize
|
|
|
|
# optimize
|
|
|
|
if ni - last_opt_step >= accumulate:
|
|
|
|
if ni - last_opt_step >= self.accumulate:
|
|
|
|
self.optimizer_step()
|
|
|
|
self.optimizer_step()
|
|
|
|
last_opt_step = ni
|
|
|
|
last_opt_step = ni
|
|
|
|
|
|
|
|
|
|
|
|
# log
|
|
|
|
# log
|
|
|
|
mem = (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB)
|
|
|
|
mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB)
|
|
|
|
loss_len = self.tloss.shape[0] if len(self.tloss.size()) else 1
|
|
|
|
loss_len = self.tloss.shape[0] if len(self.tloss.size()) else 1
|
|
|
|
losses = self.tloss if loss_len > 1 else torch.unsqueeze(self.tloss, 0)
|
|
|
|
losses = self.tloss if loss_len > 1 else torch.unsqueeze(self.tloss, 0)
|
|
|
|
if rank in {-1, 0}:
|
|
|
|
if rank in {-1, 0}:
|
|
|
|
pbar.set_description(
|
|
|
|
pbar.set_description(
|
|
|
|
(" {} " + "{:.3f} " * (1 + loss_len) + ' {} ').format(f'{epoch + 1}/{self.args.epochs}', mem,
|
|
|
|
('%11s' * 2 + '%11.4g' * (2 + loss_len)) %
|
|
|
|
*losses, batch["img"].shape[-1]))
|
|
|
|
(f'{epoch + 1}/{self.epochs}', mem, *losses, batch["cls"].shape[0], batch["img"].shape[-1]))
|
|
|
|
self.trigger_callbacks('on_batch_end')
|
|
|
|
self.trigger_callbacks('on_batch_end')
|
|
|
|
|
|
|
|
if self.args.plots and ni < 3:
|
|
|
|
|
|
|
|
self.plot_training_samples(batch, ni)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
lr = {f"lr{ir}": x['lr'] for ir, x in enumerate(self.optimizer.param_groups)} # for loggers
|
|
|
|
|
|
|
|
self.scheduler.step()
|
|
|
|
|
|
|
|
|
|
|
|
if rank in [-1, 0]:
|
|
|
|
if rank in [-1, 0]:
|
|
|
|
# validation
|
|
|
|
# validation
|
|
|
|
self.trigger_callbacks('on_val_start')
|
|
|
|
self.trigger_callbacks('on_val_start')
|
|
|
|
self.ema.update_attr(self.model, include=['yaml', 'nc', 'args', 'names', 'stride', 'class_weights'])
|
|
|
|
self.ema.update_attr(self.model, include=['yaml', 'nc', 'args', 'names', 'stride', 'class_weights'])
|
|
|
|
self.metrics, self.fitness = self.validate()
|
|
|
|
final_epoch = (epoch + 1 == self.epochs)
|
|
|
|
|
|
|
|
if not self.args.noval or final_epoch:
|
|
|
|
|
|
|
|
self.metrics, self.fitness = self.validate()
|
|
|
|
self.trigger_callbacks('on_val_end')
|
|
|
|
self.trigger_callbacks('on_val_end')
|
|
|
|
|
|
|
|
log_vals = self.label_loss_items(self.tloss) | self.metrics | lr
|
|
|
|
|
|
|
|
self.save_metrics(metrics=log_vals)
|
|
|
|
|
|
|
|
|
|
|
|
# save model
|
|
|
|
# save model
|
|
|
|
if (not self.args.nosave) or (self.epoch + 1 == self.args.epochs):
|
|
|
|
if (not self.args.nosave) or (self.epoch + 1 == self.epochs):
|
|
|
|
self.save_model()
|
|
|
|
self.save_model()
|
|
|
|
self.trigger_callbacks('on_model_save')
|
|
|
|
self.trigger_callbacks('on_model_save')
|
|
|
|
|
|
|
|
|
|
|
@ -248,9 +268,15 @@ class BaseTrainer:
|
|
|
|
|
|
|
|
|
|
|
|
# TODO: termination condition
|
|
|
|
# TODO: termination condition
|
|
|
|
|
|
|
|
|
|
|
|
self.log(f"\nTraining complete ({(time.time() - self.train_time_start) / 3600:.3f} hours)")
|
|
|
|
if rank in [-1, 0]:
|
|
|
|
self.trigger_callbacks('on_train_end')
|
|
|
|
# do the last evaluation with best.pt
|
|
|
|
|
|
|
|
self.final_eval()
|
|
|
|
|
|
|
|
if self.args.plots:
|
|
|
|
|
|
|
|
self.plot_metrics()
|
|
|
|
|
|
|
|
self.log(f"\nTraining complete ({(time.time() - self.train_time_start) / 3600:.3f} hours)")
|
|
|
|
|
|
|
|
self.trigger_callbacks('on_train_end')
|
|
|
|
dist.destroy_process_group() if world_size != 1 else None
|
|
|
|
dist.destroy_process_group() if world_size != 1 else None
|
|
|
|
|
|
|
|
torch.cuda.empty_cache()
|
|
|
|
|
|
|
|
|
|
|
|
def save_model(self):
|
|
|
|
def save_model(self):
|
|
|
|
ckpt = {
|
|
|
|
ckpt = {
|
|
|
@ -306,7 +332,7 @@ class BaseTrainer:
|
|
|
|
"fitness" metric.
|
|
|
|
"fitness" metric.
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
metrics = self.validator(self)
|
|
|
|
metrics = self.validator(self)
|
|
|
|
fitness = metrics.get("fitness", -self.loss.detach().cpu().numpy()) # use loss as fitness measure if not found
|
|
|
|
fitness = metrics.pop("fitness", -self.loss.detach().cpu().numpy()) # use loss as fitness measure if not found
|
|
|
|
if not self.best_fitness or self.best_fitness < fitness:
|
|
|
|
if not self.best_fitness or self.best_fitness < fitness:
|
|
|
|
self.best_fitness = self.fitness
|
|
|
|
self.best_fitness = self.fitness
|
|
|
|
return metrics, fitness
|
|
|
|
return metrics, fitness
|
|
|
@ -339,12 +365,12 @@ class BaseTrainer:
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
raise NotImplementedError("criterion function not implemented in trainer")
|
|
|
|
raise NotImplementedError("criterion function not implemented in trainer")
|
|
|
|
|
|
|
|
|
|
|
|
def label_loss_items(self, loss_items):
|
|
|
|
def label_loss_items(self, loss_items=None, prefix="train"):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Returns a loss dict with labelled training loss items tensor
|
|
|
|
Returns a loss dict with labelled training loss items tensor
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
# Not needed for classification but necessary for segmentation & detection
|
|
|
|
# Not needed for classification but necessary for segmentation & detection
|
|
|
|
return {"loss": loss_items}
|
|
|
|
return {"loss": loss_items} if loss_items is not None else ["loss"]
|
|
|
|
|
|
|
|
|
|
|
|
def set_model_attributes(self):
|
|
|
|
def set_model_attributes(self):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
@ -355,6 +381,31 @@ class BaseTrainer:
|
|
|
|
def build_targets(self, preds, targets):
|
|
|
|
def build_targets(self, preds, targets):
|
|
|
|
pass
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def progress_string(self):
|
|
|
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# TODO: may need to put these following functions into callback
|
|
|
|
|
|
|
|
def plot_training_samples(self, batch, ni):
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_metrics(self, metrics):
|
|
|
|
|
|
|
|
keys, vals = list(metrics.keys()), list(metrics.values())
|
|
|
|
|
|
|
|
n = len(metrics) + 1 # number of cols
|
|
|
|
|
|
|
|
s = '' if self.csv.exists() else (('%23s,' * n % tuple(['epoch'] + keys)).rstrip(',') + '\n') # header
|
|
|
|
|
|
|
|
with open(self.csv, 'a') as f:
|
|
|
|
|
|
|
|
f.write(s + ('%23.5g,' * n % tuple([self.epoch] + vals)).rstrip(',') + '\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def plot_metrics(self):
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def final_eval(self):
|
|
|
|
|
|
|
|
# TODO: need standalone evaluator to do this
|
|
|
|
|
|
|
|
for f in self.last, self.best:
|
|
|
|
|
|
|
|
if f.exists():
|
|
|
|
|
|
|
|
strip_optimizer(f) # strip optimizers
|
|
|
|
|
|
|
|
if f is self.best:
|
|
|
|
|
|
|
|
self.console.info(f'\nValidating {f}...')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_optimizer(model, name='Adam', lr=0.001, momentum=0.9, decay=1e-5):
|
|
|
|
def build_optimizer(model, name='Adam', lr=0.001, momentum=0.9, decay=1e-5):
|
|
|
|
# TODO: 1. docstring with example? 2. Move this inside Trainer? or utils?
|
|
|
|
# TODO: 1. docstring with example? 2. Move this inside Trainer? or utils?
|
|
|
@ -382,7 +433,7 @@ def build_optimizer(model, name='Adam', lr=0.001, momentum=0.9, decay=1e-5):
|
|
|
|
|
|
|
|
|
|
|
|
optimizer.add_param_group({'params': g[0], 'weight_decay': decay}) # add g0 with weight_decay
|
|
|
|
optimizer.add_param_group({'params': g[0], 'weight_decay': decay}) # add g0 with weight_decay
|
|
|
|
optimizer.add_param_group({'params': g[1], 'weight_decay': 0.0}) # add g1 (BatchNorm2d weights)
|
|
|
|
optimizer.add_param_group({'params': g[1], 'weight_decay': 0.0}) # add g1 (BatchNorm2d weights)
|
|
|
|
LOGGER.info(f"optimizer: {type(optimizer).__name__}(lr={lr}) with parameter groups "
|
|
|
|
LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}) with parameter groups "
|
|
|
|
f"{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias")
|
|
|
|
f"{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias")
|
|
|
|
return optimizer
|
|
|
|
return optimizer
|
|
|
|
|
|
|
|
|
|
|
|