Add RTDETR Trainer (#2745)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: Laughing-q <1185102784@qq.com> Co-authored-by: Kayzwer <68285002+Kayzwer@users.noreply.github.com> Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com>
This commit is contained in:
@ -5,15 +5,15 @@
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from ultralytics.nn.tasks import DetectionModel, attempt_load_one_weight, yaml_model_load
|
||||
from ultralytics.nn.tasks import RTDETRDetectionModel, attempt_load_one_weight, yaml_model_load
|
||||
from ultralytics.yolo.cfg import get_cfg
|
||||
from ultralytics.yolo.engine.exporter import Exporter
|
||||
from ultralytics.yolo.utils import DEFAULT_CFG, DEFAULT_CFG_DICT, LOGGER, ROOT, is_git_dir
|
||||
from ultralytics.yolo.utils import DEFAULT_CFG, DEFAULT_CFG_DICT, LOGGER, RANK, ROOT, is_git_dir
|
||||
from ultralytics.yolo.utils.checks import check_imgsz
|
||||
from ultralytics.yolo.utils.torch_utils import model_info
|
||||
from ultralytics.yolo.utils.torch_utils import model_info, smart_inference_mode
|
||||
|
||||
from ...yolo.utils.torch_utils import smart_inference_mode
|
||||
from .predict import RTDETRPredictor
|
||||
from .train import RTDETRTrainer
|
||||
from .val import RTDETRValidator
|
||||
|
||||
|
||||
@ -24,6 +24,7 @@ class RTDETR:
|
||||
raise NotImplementedError('RT-DETR only supports creating from pt file or yaml file.')
|
||||
# Load or create new YOLO model
|
||||
self.predictor = None
|
||||
self.ckpt = None
|
||||
suffix = Path(model).suffix
|
||||
if suffix == '.yaml':
|
||||
self._new(model)
|
||||
@ -34,7 +35,7 @@ class RTDETR:
|
||||
cfg_dict = yaml_model_load(cfg)
|
||||
self.cfg = cfg
|
||||
self.task = 'detect'
|
||||
self.model = DetectionModel(cfg_dict, verbose=verbose) # build model
|
||||
self.model = RTDETRDetectionModel(cfg_dict, verbose=verbose) # build model
|
||||
|
||||
# Below added to allow export from yamls
|
||||
self.model.args = DEFAULT_CFG_DICT # attach args to model
|
||||
@ -42,10 +43,20 @@ class RTDETR:
|
||||
|
||||
@smart_inference_mode()
|
||||
def _load(self, weights: str):
|
||||
self.model, _ = attempt_load_one_weight(weights)
|
||||
self.model, self.ckpt = attempt_load_one_weight(weights)
|
||||
self.model.args = DEFAULT_CFG_DICT # attach args to model
|
||||
self.task = self.model.args['task']
|
||||
|
||||
@smart_inference_mode()
|
||||
def load(self, weights='yolov8n.pt'):
|
||||
"""
|
||||
Transfers parameters with matching names and shapes from 'weights' to model.
|
||||
"""
|
||||
if isinstance(weights, (str, Path)):
|
||||
weights, self.ckpt = attempt_load_one_weight(weights)
|
||||
self.model.load(weights)
|
||||
return self
|
||||
|
||||
@smart_inference_mode()
|
||||
def predict(self, source=None, stream=False, **kwargs):
|
||||
"""
|
||||
@ -74,8 +85,30 @@ class RTDETR:
|
||||
return self.predictor(source, stream=stream)
|
||||
|
||||
def train(self, **kwargs):
|
||||
"""Function trains models but raises an error as RTDETR models do not support training."""
|
||||
raise NotImplementedError("RTDETR models don't support training")
|
||||
"""
|
||||
Trains the model on a given dataset.
|
||||
|
||||
Args:
|
||||
**kwargs (Any): Any number of arguments representing the training configuration.
|
||||
"""
|
||||
overrides = dict(task='detect', mode='train')
|
||||
overrides.update(kwargs)
|
||||
overrides['deterministic'] = False
|
||||
if not overrides.get('data'):
|
||||
raise AttributeError("Dataset required but missing, i.e. pass 'data=coco128.yaml'")
|
||||
if overrides.get('resume'):
|
||||
overrides['resume'] = self.ckpt_path
|
||||
self.task = overrides.get('task') or self.task
|
||||
self.trainer = RTDETRTrainer(overrides=overrides)
|
||||
if not overrides.get('resume'): # manually set model only if not resuming
|
||||
self.trainer.model = self.trainer.get_model(weights=self.model if self.ckpt else None, cfg=self.model.yaml)
|
||||
self.model = self.trainer.model
|
||||
self.trainer.train()
|
||||
# Update model and cfg after training
|
||||
if RANK in (-1, 0):
|
||||
self.model, _ = attempt_load_one_weight(str(self.trainer.best))
|
||||
self.overrides = self.model.args
|
||||
self.metrics = getattr(self.trainer.validator, 'metrics', None) # TODO: no metrics returned by DDP
|
||||
|
||||
def val(self, **kwargs):
|
||||
"""Run validation given dataset."""
|
||||
|
78
ultralytics/vit/rtdetr/train.py
Normal file
78
ultralytics/vit/rtdetr/train.py
Normal file
@ -0,0 +1,78 @@
|
||||
from copy import copy
|
||||
|
||||
import torch
|
||||
|
||||
from ultralytics.nn.tasks import RTDETRDetectionModel
|
||||
from ultralytics.yolo.utils import DEFAULT_CFG, RANK, colorstr
|
||||
from ultralytics.yolo.v8.detect import DetectionTrainer
|
||||
|
||||
from .val import RTDETRDataset, RTDETRValidator
|
||||
|
||||
|
||||
class RTDETRTrainer(DetectionTrainer):
|
||||
|
||||
def get_model(self, cfg=None, weights=None, verbose=True):
|
||||
"""Return a YOLO detection model."""
|
||||
model = RTDETRDetectionModel(cfg, nc=self.data['nc'], verbose=verbose and RANK == -1)
|
||||
if weights:
|
||||
model.load(weights)
|
||||
return model
|
||||
|
||||
def build_dataset(self, img_path, mode='val', batch=None):
|
||||
"""Build RTDETR Dataset
|
||||
|
||||
Args:
|
||||
img_path (str): Path to the folder containing images.
|
||||
mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
|
||||
batch (int, optional): Size of batches, this is for `rect`. Defaults to None.
|
||||
"""
|
||||
return RTDETRDataset(
|
||||
img_path=img_path,
|
||||
imgsz=self.args.imgsz,
|
||||
batch_size=batch,
|
||||
augment=mode == 'train', # no augmentation
|
||||
hyp=self.args,
|
||||
rect=False, # no rect
|
||||
cache=self.args.cache or None,
|
||||
prefix=colorstr(f'{mode}: '),
|
||||
data=self.data)
|
||||
|
||||
def get_validator(self):
|
||||
"""Returns a DetectionValidator for RTDETR model validation."""
|
||||
self.loss_names = 'giou_loss', 'cls_loss', 'l1_loss'
|
||||
return RTDETRValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args))
|
||||
|
||||
def preprocess_batch(self, batch):
|
||||
"""Preprocesses a batch of images by scaling and converting to float."""
|
||||
batch = super().preprocess_batch(batch)
|
||||
bs = len(batch['img'])
|
||||
batch_idx = batch['batch_idx']
|
||||
gt_bbox, gt_class = [], []
|
||||
for i in range(bs):
|
||||
gt_bbox.append(batch['bboxes'][batch_idx == i].to(batch_idx.device))
|
||||
gt_class.append(batch['cls'][batch_idx == i].to(device=batch_idx.device, dtype=torch.long))
|
||||
return batch
|
||||
|
||||
|
||||
def train(cfg=DEFAULT_CFG, use_python=False):
|
||||
"""Train and optimize RTDETR model given training data and device."""
|
||||
model = 'rtdetr-l.yaml'
|
||||
data = cfg.data or 'coco128.yaml' # or yolo.ClassificationDataset("mnist")
|
||||
device = cfg.device if cfg.device is not None else ''
|
||||
|
||||
# NOTE: F.grid_sample which is in rt-detr does not support deterministic=True
|
||||
# NOTE: amp training causes nan outputs and end with error while doing bipartite graph matching
|
||||
args = dict(model=model,
|
||||
data=data,
|
||||
device=device,
|
||||
imgsz=640,
|
||||
exist_ok=True,
|
||||
batch=4,
|
||||
deterministic=False,
|
||||
amp=False)
|
||||
trainer = RTDETRTrainer(overrides=args)
|
||||
trainer.train()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
train()
|
@ -2,10 +2,12 @@
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from ultralytics.yolo.data import YOLODataset
|
||||
from ultralytics.yolo.data.augment import Compose, Format, LetterBox
|
||||
from ultralytics.yolo.data.augment import Compose, Format, v8_transforms
|
||||
from ultralytics.yolo.utils import colorstr, ops
|
||||
from ultralytics.yolo.v8.detect import DetectionValidator
|
||||
|
||||
@ -18,9 +20,41 @@ class RTDETRDataset(YOLODataset):
|
||||
def __init__(self, *args, data=None, **kwargs):
|
||||
super().__init__(*args, data=data, use_segments=False, use_keypoints=False, **kwargs)
|
||||
|
||||
# NOTE: add stretch version load_image for rtdetr mosaic
|
||||
def load_image(self, i):
|
||||
"""Loads 1 image from dataset index 'i', returns (im, resized hw)."""
|
||||
im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i]
|
||||
if im is None: # not cached in RAM
|
||||
if fn.exists(): # load npy
|
||||
im = np.load(fn)
|
||||
else: # read image
|
||||
im = cv2.imread(f) # BGR
|
||||
if im is None:
|
||||
raise FileNotFoundError(f'Image Not Found {f}')
|
||||
h0, w0 = im.shape[:2] # orig hw
|
||||
im = cv2.resize(im, (self.imgsz, self.imgsz), interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
# Add to buffer if training with augmentations
|
||||
if self.augment:
|
||||
self.ims[i], self.im_hw0[i], self.im_hw[i] = im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized
|
||||
self.buffer.append(i)
|
||||
if len(self.buffer) >= self.max_buffer_length:
|
||||
j = self.buffer.pop(0)
|
||||
self.ims[j], self.im_hw0[j], self.im_hw[j] = None, None, None
|
||||
|
||||
return im, (h0, w0), im.shape[:2]
|
||||
|
||||
return self.ims[i], self.im_hw0[i], self.im_hw[i]
|
||||
|
||||
def build_transforms(self, hyp=None):
|
||||
"""Temporarily, only for evaluation."""
|
||||
transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), auto=False, scaleFill=True)])
|
||||
if self.augment:
|
||||
hyp.mosaic = hyp.mosaic if self.augment and not self.rect else 0.0
|
||||
hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0
|
||||
transforms = v8_transforms(self, self.imgsz, hyp, stretch=True)
|
||||
else:
|
||||
# transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), auto=False, scaleFill=True)])
|
||||
transforms = Compose([])
|
||||
transforms.append(
|
||||
Format(bbox_format='xywh',
|
||||
normalize=True,
|
||||
@ -65,6 +99,8 @@ class RTDETRValidator(DetectionValidator):
|
||||
# Do not need threshold for evaluation as only got 300 boxes here.
|
||||
# idx = score > self.args.conf
|
||||
pred = torch.cat([bbox, score[..., None], cls[..., None]], dim=-1) # filter
|
||||
# sort by confidence to correctly get internal metrics.
|
||||
pred = pred[score.argsort(descending=True)]
|
||||
outputs[i] = pred # [idx]
|
||||
|
||||
return outputs
|
||||
@ -100,7 +136,8 @@ class RTDETRValidator(DetectionValidator):
|
||||
tbox[..., [0, 2]] *= shape[1] # native-space pred
|
||||
tbox[..., [1, 3]] *= shape[0] # native-space pred
|
||||
labelsn = torch.cat((cls, tbox), 1) # native-space labels
|
||||
correct_bboxes = self._process_batch(predn, labelsn)
|
||||
# NOTE: To get correct metrics, the inputs of `_process_batch` should always be float32 type.
|
||||
correct_bboxes = self._process_batch(predn.float(), labelsn)
|
||||
# TODO: maybe remove these `self.` arguments as they already are member variable
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(predn, labelsn)
|
||||
|
Reference in New Issue
Block a user