YOLOv5 updates (#90)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2022-12-25 14:33:18 +01:00
parent ebd3cfb2fd
commit 98815d560f
27 changed files with 281 additions and 161 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -26,7 +26,7 @@ seaborn>=0.11.0
 # Export --------------------------------------
 # coremltools>=5.2  # CoreML export
-# onnx>=1.9.0  # ONNX export
+# onnx>=1.12.0  # ONNX export
 # onnx-simplifier>=0.4.1  # ONNX simplifier
 # nvidia-pyindex  # TensorRT export
 # nvidia-tensorrt  # TensorRT export
--- a/ultralytics/tests/data/dataloader/yolodetection.py
+++ b/ultralytics/tests/data/dataloader/yolodetection.py
@ -55,7 +55,7 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=None):
        )
-@hydra.main(version_base=None, config_path=DEFAULT_CONFIG.parent, config_name=DEFAULT_CONFIG.name)
+@hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent), config_name=DEFAULT_CONFIG.name)
 def test(cfg):
    cfg.task = "detect"
    cfg.mode = "train"
--- a/ultralytics/tests/data/dataloader/yolosegment.py
+++ b/ultralytics/tests/data/dataloader/yolosegment.py
@ -54,7 +54,7 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=None):
        )
-@hydra.main(version_base=None, config_path=DEFAULT_CONFIG.parent, config_name=DEFAULT_CONFIG.name)
+@hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent), config_name=DEFAULT_CONFIG.name)
 def test(cfg):
    cfg.task = "segment"
    cfg.mode = "train"
--- a/ultralytics/yolo/data/augment.py
+++ b/ultralytics/yolo/data/augment.py
@ -82,7 +82,7 @@ class BaseMixTransform:
            indexes = [indexes]
        # get images information will be used for Mosaic or MixUp
-        mix_labels = [deepcopy(dataset.get_label_info(index)) for index in indexes]
+        mix_labels = [dataset.get_label_info(index) for index in indexes]
        if self.pre_transform is not None:
            for i, data in enumerate(mix_labels):
@ -134,9 +134,8 @@ class Mosaic(BaseMixTransform):
        assert len(labels.get("mix_labels", [])) > 0, "There are no other images for mosaic augment."
        s = self.imgsz
        yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.border)  # mosaic center x, y
        mix_labels = labels["mix_labels"]
        for i in range(4):
-            labels_patch = deepcopy(labels) if i == 0 else deepcopy(mix_labels[i - 1])
+            labels_patch = (labels if i == 0 else labels["mix_labels"][i - 1]).copy()
            # Load image
            img = labels_patch["img"]
            h, w = labels_patch["resized_shape"]
@ -186,9 +185,8 @@ class Mosaic(BaseMixTransform):
            "ori_shape": mosaic_labels[0]["ori_shape"],
            "resized_shape": (self.imgsz * 2, self.imgsz * 2),
            "im_file": mosaic_labels[0]["im_file"],
-            "cls": np.concatenate(cls, 0)}
+            "cls": np.concatenate(cls, 0),
-
+            "instances": Instances.concatenate(instances, axis=0)}
        final_labels["instances"] = Instances.concatenate(instances, axis=0)
        final_labels["instances"].clip(self.imgsz * 2, self.imgsz * 2)
        return final_labels
@ -345,7 +343,6 @@ class RandomPerspective:
        Affine images and targets.
        Args:
            img(ndarray): image.
            labels(Dict): a dict of `bboxes`, `segments`, `keypoints`.
        """
        img = labels["img"]
@ -387,7 +384,7 @@ class RandomPerspective:
        return labels
    def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)
-        # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
+        # Compute box candidates: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
        w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
        w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
        ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
@ -609,6 +606,7 @@ class Format:
        self.batch_idx = batch_idx  # keep the batch indexes
    def __call__(self, labels):
        labels.pop("dataset", None)
        img = labels["img"]
        h, w = img.shape[:2]
        cls = labels.pop("cls")
@ -672,10 +670,7 @@ def mosaic_transforms(imgsz, hyp):
        ),])
    return Compose([
        pre_transform,
-        MixUp(
+        MixUp(pre_transform=pre_transform, p=hyp.mixup),
            pre_transform=pre_transform,
            p=hyp.mixup,
        ),
        Albumentations(p=1.0),
        RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
        RandomFlip(direction="vertical", p=hyp.flipud),
--- a/ultralytics/yolo/data/base.py
+++ b/ultralytics/yolo/data/base.py
@ -1,4 +1,5 @@
 import glob
 import math
 import os
 from multiprocessing.pool import ThreadPool
 from pathlib import Path
@ -121,7 +122,7 @@ class BaseDataset(Dataset):
            r = self.imgsz / max(h0, w0)  # ratio
            if r != 1:  # if sizes are not equal
                interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
-                im = cv2.resize(im, (int(w0 * r), int(h0 * r)), interpolation=interp)
+                im = cv2.resize(im, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp)
            return im, (h0, w0), im.shape[:2]  # im, hw_original, hw_resized
        return self.ims[i], self.im_hw0[i], self.im_hw[i]  # im, hw_original, hw_resized
@ -179,10 +180,7 @@ class BaseDataset(Dataset):
    def get_label_info(self, index):
        label = self.labels[index].copy()
-        img, (h0, w0), (h, w) = self.load_image(index)
+        label["img"], label["ori_shape"], label["resized_shape"] = self.load_image(index)
        label["img"] = img
        label["ori_shape"] = (h0, w0)
        label["resized_shape"] = (h, w)
        if self.rect:
            label["rect_shape"] = self.batch_shapes[self.batch[index]]
        label = self.update_labels_info(label)
--- a/ultralytics/yolo/data/build.py
+++ b/ultralytics/yolo/data/build.py
@ -64,7 +64,7 @@ def build_dataloader(cfg, batch_size, img_path, stride=32, label_path=None, rank
            label_path=label_path,
            imgsz=cfg.imgsz,
            batch_size=batch_size,
-            augment=True if mode == "train" else False,  # augmentation
+            augment=mode == "train",  # augmentation
            hyp=cfg,  # TODO: probably add a get_hyps_from_cfg function
            rect=cfg.rect if mode == "train" else True,  # rectangular batches
            cache=None if cfg.noval else cfg.get("cache", None),
@ -73,31 +73,25 @@ def build_dataloader(cfg, batch_size, img_path, stride=32, label_path=None, rank
            pad=0.0 if mode == "train" else 0.5,
            prefix=colorstr(f"{mode}: "),
            use_segments=cfg.task == "segment",
-            use_keypoints=cfg.task == "keypoint",
+            use_keypoints=cfg.task == "keypoint")
        )
    batch_size = min(batch_size, len(dataset))
    nd = torch.cuda.device_count()  # number of CUDA devices
    workers = cfg.workers if mode == "train" else cfg.workers * 2
    nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers])  # number of workers
    sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
-    loader = DataLoader if cfg.image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
+    loader = DataLoader if cfg.image_weights or cfg.close_mosaic else InfiniteDataLoader  # allow attribute updates
    generator = torch.Generator()
    generator.manual_seed(6148914691236517205 + RANK)
-    return (
+    return loader(dataset=dataset,
-        loader(
+                  batch_size=batch_size,
-            dataset=dataset,
+                  shuffle=shuffle and sampler is None,
-            batch_size=batch_size,
+                  num_workers=nw,
-            shuffle=shuffle and sampler is None,
+                  sampler=sampler,
-            num_workers=nw,
+                  pin_memory=PIN_MEMORY,
-            sampler=sampler,
+                  collate_fn=getattr(dataset, "collate_fn", None),
-            pin_memory=PIN_MEMORY,
+                  worker_init_fn=seed_worker,
-            collate_fn=getattr(dataset, "collate_fn", None),
+                  generator=generator), dataset
            worker_init_fn=seed_worker,
            generator=generator,
        ),
        dataset,
    )
 # build classification
--- a/ultralytics/yolo/data/dataset.py
+++ b/ultralytics/yolo/data/dataset.py
@ -124,13 +124,9 @@ class YOLODataset(BaseDataset):
    # TODO: use hyp config to set all these augmentations
    def build_transforms(self, hyp=None):
        mosaic = self.augment and not self.rect
        # mosaic = False
        if self.augment:
-            if mosaic:
+            mosaic = self.augment and not self.rect
-                transforms = mosaic_transforms(self.imgsz, hyp)
+            transforms = mosaic_transforms(self.imgsz, hyp) if mosaic else affine_transforms(self.imgsz, hyp)
            else:
                transforms = affine_transforms(self.imgsz, hyp)
        else:
            transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz))])
        transforms.append(
@ -143,7 +139,7 @@ class YOLODataset(BaseDataset):
    def update_labels_info(self, label):
        """custom your label format here"""
-        # NOTE: cls is not with bboxes now, since other tasks like classification and semantic segmentation need a independent cls label
+        # NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
        # we can make it also support classification and semantic segmentation by add or remove some dict keys there.
        bboxes = label.pop("bboxes")
        segments = label.pop("segments")
@ -206,7 +202,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
            sample = self.album_transforms(image=cv2.cvtColor(im, cv2.COLOR_BGR2RGB))["image"]
        else:
            sample = self.torch_transforms(im)
-        return OrderedDict(img=sample, cls=j)
+        return {'img': sample, 'cls': j}
    def __len__(self) -> int:
        return len(self.samples)
--- a/ultralytics/yolo/data/datasets/coco.yaml
+++ b/ultralytics/yolo/data/datasets/coco.yaml
@ -0,0 +1,113 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 # COCO 2017 dataset http://cocodataset.org by Microsoft
 # Example usage: python train.py --data coco.yaml
 # parent
 # ├── yolov5
 # └── datasets
 #     └── coco  ← downloads here (20.1 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/coco  # dataset root dir
 train: train2017.txt  # train images (relative to 'path') 118287 images
 val: val2017.txt  # val images (relative to 'path') 5000 images
 test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
 # Classes
 names:
  0: person
  1: bicycle
  2: car
  3: motorcycle
  4: airplane
  5: bus
  6: train
  7: truck
  8: boat
  9: traffic light
  10: fire hydrant
  11: stop sign
  12: parking meter
  13: bench
  14: bird
  15: cat
  16: dog
  17: horse
  18: sheep
  19: cow
  20: elephant
  21: bear
  22: zebra
  23: giraffe
  24: backpack
  25: umbrella
  26: handbag
  27: tie
  28: suitcase
  29: frisbee
  30: skis
  31: snowboard
  32: sports ball
  33: kite
  34: baseball bat
  35: baseball glove
  36: skateboard
  37: surfboard
  38: tennis racket
  39: bottle
  40: wine glass
  41: cup
  42: fork
  43: knife
  44: spoon
  45: bowl
  46: banana
  47: apple
  48: sandwich
  49: orange
  50: broccoli
  51: carrot
  52: hot dog
  53: pizza
  54: donut
  55: cake
  56: chair
  57: couch
  58: potted plant
  59: bed
  60: dining table
  61: toilet
  62: tv
  63: laptop
  64: mouse
  65: remote
  66: keyboard
  67: cell phone
  68: microwave
  69: oven
  70: toaster
  71: sink
  72: refrigerator
  73: book
  74: clock
  75: vase
  76: scissors
  77: teddy bear
  78: hair drier
  79: toothbrush
 # Download script/URL (optional)
 download: |
  from utils.general import download, Path
  # Download labels
  segments = True  # segment or box labels
  dir = Path(yaml['path'])  # dataset root dir
  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
  urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')]  # labels
  download(urls, dir=dir.parent)
  # Download data
  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
  download(urls, dir=dir / 'images', threads=3)
--- a/ultralytics/yolo/engine/predictor.py
+++ b/ultralytics/yolo/engine/predictor.py
@ -29,16 +29,14 @@ import platform
 from pathlib import Path
 import cv2
 import torch
 from ultralytics.yolo.data.dataloaders.stream_loaders import LoadImages, LoadScreenshots, LoadStreams
 from ultralytics.yolo.data.utils import IMG_FORMATS, VID_FORMATS, check_dataset, check_dataset_yaml
-from ultralytics.yolo.utils import LOGGER, ROOT, TQDM_BAR_FORMAT, colorstr, ops
+from ultralytics.yolo.utils import LOGGER, ROOT, colorstr, ops
 from ultralytics.yolo.utils.checks import check_file, check_imshow
 from ultralytics.yolo.utils.configs import get_config
 from ultralytics.yolo.utils.files import increment_path
 from ultralytics.yolo.utils.modeling.autobackend import AutoBackend
 from ultralytics.yolo.utils.plotting import Annotator
 from ultralytics.yolo.utils.torch_utils import check_imgsz, select_device, smart_inference_mode
 DEFAULT_CONFIG = ROOT / "yolo/utils/configs/default.yaml"
@ -125,11 +123,7 @@ class BasePredictor:
    @smart_inference_mode()
    def __call__(self, source=None, model=None):
-        if not self.done_setup:
+        model = self.model if self.done_setup else self.setup(source, model)
            model = self.setup(source, model)
        else:
            model = self.model
        self.seen, self.windows, self.dt = 0, [], (ops.Profile(), ops.Profile(), ops.Profile())
        for batch in self.dataset:
            path, im, im0s, vid_cap, s = batch
--- a/ultralytics/yolo/engine/trainer.py
+++ b/ultralytics/yolo/engine/trainer.py
@ -60,7 +60,8 @@ class BaseTrainer:
        # device
        self.device = utils.torch_utils.select_device(self.args.device, self.batch_size)
-        self.scaler = amp.GradScaler(enabled=self.device.type != 'cpu')
+        self.amp = self.device.type != 'cpu'
        self.scaler = amp.GradScaler(enabled=self.amp)
        # Model and Dataloaders.
        self.model = self.args.model
@ -175,6 +176,10 @@ class BaseTrainer:
        nw = max(round(self.args.warmup_epochs * nb), 100)  # number of warmup iterations
        last_opt_step = -1
        self.trigger_callbacks("on_train_start")
        self.log(f"Image sizes {self.args.imgsz} train, {self.args.imgsz} val\n"
                 f'Using {self.train_loader.num_workers * (world_size or 1)} dataloader workers\n'
                 f"Logging results to {colorstr('bold', self.save_dir)}\n"
                 f"Starting training for {self.epochs} epochs...")
        for epoch in range(self.start_epoch, self.epochs):
            self.epoch = epoch
            self.trigger_callbacks("on_train_epoch_start")
@ -189,8 +194,6 @@ class BaseTrainer:
            self.optimizer.zero_grad()
            for i, batch in pbar:
                self.trigger_callbacks("on_train_batch_start")
                # forward
                batch = self.preprocess_batch(batch)
                # warmup
                ni = i + nb * epoch
@ -204,17 +207,20 @@ class BaseTrainer:
                        if 'momentum' in x:
                            x['momentum'] = np.interp(ni, xi, [self.args.warmup_momentum, self.args.momentum])
-                preds = self.model(batch["img"])
+                # Forward
-                self.loss, self.loss_items = self.criterion(preds, batch)
+                with torch.cuda.amp.autocast(self.amp):
-                if rank != -1:
+                    batch = self.preprocess_batch(batch)
-                    self.loss *= world_size
+                    preds = self.model(batch["img"])
-                self.tloss = (self.tloss * i + self.loss_items) / (i + 1) if self.tloss is not None \
+                    self.loss, self.loss_items = self.criterion(preds, batch)
-                    else self.loss_items
+                    if rank != -1:
                        self.loss *= world_size
                    self.tloss = (self.tloss * i + self.loss_items) / (i + 1) if self.tloss is not None \
                        else self.loss_items
-                # backward
+                # Backward
                self.scaler.scale(self.loss).backward()
-                # optimize
+                # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html
                if ni - last_opt_step >= self.accumulate:
                    self.optimizer_step()
                    last_opt_step = ni
@ -237,7 +243,7 @@ class BaseTrainer:
            self.scheduler.step()
            self.trigger_callbacks("on_train_epoch_end")
-            if rank in [-1, 0]:
+            if rank in {-1, 0}:
                # validation
                self.trigger_callbacks('on_val_start')
                self.ema.update_attr(self.model, include=['yaml', 'nc', 'args', 'names', 'stride', 'class_weights'])
@ -245,7 +251,7 @@ class BaseTrainer:
                if not self.args.noval or final_epoch:
                    self.metrics, self.fitness = self.validate()
                self.trigger_callbacks('on_val_end')
-                log_vals = self.label_loss_items(self.tloss) | self.metrics | lr
+                log_vals = {**self.label_loss_items(self.tloss), **self.metrics, **lr}
                self.save_metrics(metrics=log_vals)
                # save model
@ -259,12 +265,13 @@ class BaseTrainer:
            # TODO: termination condition
-        if rank in [-1, 0]:
+        if rank in {-1, 0}:
            # do the last evaluation with best.pt
            self.log(f'\n{epoch - self.start_epoch + 1} epochs completed in '
                     f'{(time.time() - self.train_time_start) / 3600:.3f} hours.')
            self.final_eval()
            if self.args.plots:
                self.plot_metrics()
            self.log(f"\nTraining complete ({(time.time() - self.train_time_start) / 3600:.3f} hours)")
            self.log(f"Results saved to {colorstr('bold', self.save_dir)}")
            self.trigger_callbacks('on_train_end')
        dist.destroy_process_group() if world_size > 1 else None
--- a/ultralytics/yolo/engine/validator.py
+++ b/ultralytics/yolo/engine/validator.py
@ -1,4 +1,3 @@
 import logging
 from pathlib import Path
 import torch
@ -9,10 +8,9 @@ from ultralytics.yolo.data.utils import check_dataset, check_dataset_yaml
 from ultralytics.yolo.engine.trainer import DEFAULT_CONFIG
 from ultralytics.yolo.utils import LOGGER, TQDM_BAR_FORMAT
 from ultralytics.yolo.utils.files import increment_path
 from ultralytics.yolo.utils.modeling import get_model
 from ultralytics.yolo.utils.modeling.autobackend import AutoBackend
 from ultralytics.yolo.utils.ops import Profile
-from ultralytics.yolo.utils.torch_utils import check_imgsz, de_parallel, select_device
+from ultralytics.yolo.utils.torch_utils import check_imgsz, de_parallel, select_device, smart_inference_mode
 class BaseValidator:
@ -32,8 +30,9 @@ class BaseValidator:
        self.training = True
        self.speed = None
        self.save_dir = save_dir if save_dir is not None else \
-                increment_path(Path(self.args.project) / self.args.name, exist_ok=self.args.exist_ok)
+            increment_path(Path(self.args.project) / self.args.name, exist_ok=self.args.exist_ok)
    @smart_inference_mode()
    def __call__(self, trainer=None, model=None):
        """
        Supports validation of a pre-trained model if passed or a model being trained
@ -76,35 +75,34 @@ class BaseValidator:
        dt = Profile(), Profile(), Profile(), Profile()
        n_batches = len(self.dataloader)
        desc = self.get_desc()
-        # NOTE: keeping this `not self.training` in tqdm will eliminate pbar after finishing segmantation evaluation during training,
+        # NOTE: keeping `not self.training` in tqdm will eliminate pbar after segmentation evaluation during training,
-        # so I removed it, not sure if this will affect classification task cause I saw we use this arg in yolov5/classify/val.py.
+        # which may affect classification task since this arg is in yolov5/classify/val.py.
        # bar = tqdm(self.dataloader, desc, n_batches, not self.training, bar_format=TQDM_BAR_FORMAT)
        bar = tqdm(self.dataloader, desc, n_batches, bar_format=TQDM_BAR_FORMAT)
        self.init_metrics(de_parallel(model))
-        with torch.no_grad():
+        for batch_i, batch in enumerate(bar):
-            for batch_i, batch in enumerate(bar):
+            self.batch_i = batch_i
-                self.batch_i = batch_i
+            # pre-process
-                # pre-process
+            with dt[0]:
-                with dt[0]:
+                batch = self.preprocess(batch)
                    batch = self.preprocess(batch)
-                # inference
+            # inference
-                with dt[1]:
+            with dt[1]:
-                    preds = model(batch["img"])
+                preds = model(batch["img"])
-                # loss
+            # loss
-                with dt[2]:
+            with dt[2]:
-                    if self.training:
+                if self.training:
-                        self.loss += trainer.criterion(preds, batch)[1]
+                    self.loss += trainer.criterion(preds, batch)[1]
-                # pre-process predictions
+            # pre-process predictions
-                with dt[3]:
+            with dt[3]:
-                    preds = self.postprocess(preds)
+                preds = self.postprocess(preds)
-                self.update_metrics(preds, batch)
+            self.update_metrics(preds, batch)
-                if self.args.plots and batch_i < 3:
+            if self.args.plots and batch_i < 3:
-                    self.plot_val_samples(batch, batch_i)
+                self.plot_val_samples(batch, batch_i)
-                    self.plot_predictions(batch, preds, batch_i)
+                self.plot_predictions(batch, preds, batch_i)
        stats = self.get_stats()
        self.check_stats(stats)
@ -113,22 +111,21 @@ class BaseValidator:
        # calculate speed only once when training
        if not self.training or trainer.epoch == 0:
-            t = tuple(x.t / len(self.dataloader.dataset) * 1E3 for x in dt)  # speeds per image
+            self.speed = tuple(x.t / len(self.dataloader.dataset) * 1E3 for x in dt)  # speeds per image
            self.speed = t
-            if not self.training:  # print only at inference
+        if not self.training:  # print only at inference
-                self.logger.info(
+            self.logger.info('Speed: %.1fms pre-process, %.1fms inference, %.1fms loss, %.1fms post-process per image' %
-                    'Speed: %.1fms pre-process, %.1fms inference, %.1fms loss, %.1fms post-process per image' % t)
+                             self.speed)
        if self.training:
            model.float()
        # TODO: implement save json
-        return stats | trainer.label_loss_items(self.loss.cpu() / len(self.dataloader), prefix="val") \
+        return {**stats, **trainer.label_loss_items(self.loss.cpu() / len(self.dataloader), prefix="val")} \
-                if self.training else stats
+            if self.training else stats
    def get_dataloader(self, dataset_path, batch_size):
-        raise Exception("get_dataloder function not implemented for this validator")
+        raise NotImplementedError("get_dataloader function not implemented for this validator")
    def preprocess(self, batch):
        return batch
--- a/ultralytics/yolo/utils/init.py
+++ b/ultralytics/yolo/utils/init.py
@ -17,7 +17,7 @@ NUM_THREADS = min(8, max(1, os.cpu_count() - 1))  # number of YOLOv5 multiproces
 AUTOINSTALL = str(os.getenv('YOLOv5_AUTOINSTALL', True)).lower() == 'true'  # global auto-install mode
 FONT = 'Arial.ttf'  # https://ultralytics.com/assets/Arial.ttf
 VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true'  # global verbose mode
-TQDM_BAR_FORMAT = '{l_bar}{bar:10}| {n_fmt}/{total_fmt} {elapsed}'  # tqdm bar format
+TQDM_BAR_FORMAT = '{l_bar}{bar:10}{r_bar}'  # tqdm bar format
 LOGGING_NAME = 'yolov5'
--- a/ultralytics/yolo/utils/callbacks/clearml.py
+++ b/ultralytics/yolo/utils/callbacks/clearml.py
@ -23,9 +23,9 @@ def on_train_start(trainer):
 def on_val_end(trainer):
    if trainer.epoch == 0:
        model_info = {
-            "Inference speed (ms/img)": round(trainer.validator.speed[1], 1),
+            "Parameters": get_num_params(trainer.model),
            "GFLOPs": round(get_flops(trainer.model), 1),
-            "Parameters": get_num_params(trainer.model)}
+            "Inference speed (ms/img)": round(trainer.validator.speed[1], 1)}
        Task.current_task().connect(model_info, name='Model')
--- a/ultralytics/yolo/utils/callbacks/tb.py
+++ b/ultralytics/yolo/utils/callbacks/tb.py
@ -11,8 +11,6 @@ def _log_scalars(scalars, step=0):
 def on_train_start(trainer):
    global writer
    writer = SummaryWriter(str(trainer.save_dir))
    trainer.console.info(f"Logging results to {trainer.save_dir}\n"
                         f"Starting training for {trainer.args.epochs} epochs...")
 def on_batch_end(trainer):
--- a/ultralytics/yolo/utils/configs/default.yaml
+++ b/ultralytics/yolo/utils/configs/default.yaml
@ -27,12 +27,13 @@ local_rank: -1
 single_cls: False  # train multi-class data as single-class
 image_weights: False  # use weighted image selection for training
 rect: False  # support rectangular training
-cos_lr: False # Use cosine LR scheduler
+cos_lr: False  # use cosine LR scheduler
 close_mosaic: 10  # disable mosaic for final 10 epochs
 # Segmentation
 overlap_mask: True  # masks overlap
 mask_ratio: 4  # mask downsample ratio
 # Classification
-dropout: False # use dropout
+dropout: False  # use dropout
 resume: False
--- a/ultralytics/yolo/utils/modeling/init.py
+++ b/ultralytics/yolo/utils/modeling/init.py
@ -45,7 +45,7 @@ def attempt_load_weights(weights, device=None, inplace=True, fuse=True):
 def parse_model(d, ch):  # model_dict, input_channels(3)
    # Parse a YOLOv5 model.yaml dictionary
-    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
+    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<50}{'arguments':<30}")
    nc, gd, gw, act = d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation')
    if act:
        Conv.default_act = eval(act)  # redefine default activation, i.e. Conv.default_act = nn.SiLU()
@ -87,7 +87,7 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
        t = str(m)[8:-2].replace('__main__.', '')  # module type
        m.np = sum(x.numel() for x in m_.parameters())  # number params
        m_.i, m_.f, m_.type = i, f, t  # attach index, 'from' index, type
-        LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{m.np:10.0f}  {t:<40}{str(args):<30}')  # print
+        LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{m.np:10.0f}  {t:<50}{str(args):<30}')  # print
        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
        layers.append(m_)
        if i == 0:
--- a/ultralytics/yolo/utils/ops.py
+++ b/ultralytics/yolo/utils/ops.py
@ -63,9 +63,9 @@ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]
-    boxes[:, [0, 2]] -= pad[0]  # x padding
+    boxes[..., [0, 2]] -= pad[0]  # x padding
-    boxes[:, [1, 3]] -= pad[1]  # y padding
+    boxes[..., [1, 3]] -= pad[1]  # y padding
-    boxes[:, :4] /= gain
+    boxes[..., :4] /= gain
    clip_boxes(boxes, img0_shape)
    return boxes
@ -73,13 +73,13 @@ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
 def clip_boxes(boxes, shape):
    # Clip boxes (xyxy) to image shape (height, width)
    if isinstance(boxes, torch.Tensor):  # faster individually
-        boxes[:, 0].clamp_(0, shape[1])  # x1
+        boxes[..., 0].clamp_(0, shape[1])  # x1
-        boxes[:, 1].clamp_(0, shape[0])  # y1
+        boxes[..., 1].clamp_(0, shape[0])  # y1
-        boxes[:, 2].clamp_(0, shape[1])  # x2
+        boxes[..., 2].clamp_(0, shape[1])  # x2
-        boxes[:, 3].clamp_(0, shape[0])  # y2
+        boxes[..., 3].clamp_(0, shape[0])  # y2
    else:  # np.array (faster grouped)
-        boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1])  # x1, x2
+        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
-        boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2
+        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2
 def make_divisible(x, divisor):
@ -106,6 +106,9 @@ def non_max_suppression(
         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
    """
    # Checks
    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
    if isinstance(prediction, (list, tuple)):  # YOLOv5 model in validation model, output = (inference_out, loss_out)
        prediction = prediction[0]  # select only inference output
@ -118,10 +121,6 @@ def non_max_suppression(
    mi = 4 + nc  # mask start index
    xc = prediction[:, 4:mi].amax(1) > conf_thres  # candidates
    # Checks
    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
    # Settings
    # min_wh = 2  # (pixels) minimum box width and height
    max_wh = 7680  # (pixels) maximum box width and height
@ -172,17 +171,13 @@ def non_max_suppression(
        n = x.shape[0]  # number of boxes
        if not n:  # no boxes
            continue
-        elif n > max_nms:  # excess boxes
+        x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence and remove excess boxes
            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
        else:
            x = x[x[:, 4].argsort(descending=True)]  # sort by confidence
        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
-        if i.shape[0] > max_det:  # limit detections
+        i = i[:max_det]  # limit detections
            i = i[:max_det]
        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
@ -244,20 +239,50 @@ def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
 def xyxy2xywh(x):
    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
-    y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
+    y[..., 0] = (x[..., 0] + x[..., 2]) / 2  # x center
-    y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
+    y[..., 1] = (x[..., 1] + x[..., 3]) / 2  # y center
-    y[:, 2] = x[:, 2] - x[:, 0]  # width
+    y[..., 2] = x[..., 2] - x[..., 0]  # width
-    y[:, 3] = x[:, 3] - x[:, 1]  # height
+    y[..., 3] = x[..., 3] - x[..., 1]  # height
    return y
 def xywh2xyxy(x):
    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
-    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
-    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
-    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[..., 2] = x[..., 0] + x[..., 2] / 2  # bottom right x
-    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    y[..., 3] = x[..., 1] + x[..., 3] / 2  # bottom right y
    return y
 def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
    # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw  # top left x
    y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh  # top left y
    y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw  # bottom right x
    y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh  # bottom right y
    return y
 def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
    if clip:
        clip_boxes(x, (h - eps, w - eps))  # warning: inplace clip
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w  # x center
    y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h  # y center
    y[..., 2] = (x[..., 2] - x[..., 0]) / w  # width
    y[..., 3] = (x[..., 3] - x[..., 1]) / h  # height
    return y
 def xyn2xy(x, w=640, h=640, padw=0, padh=0):
    # Convert normalized segments into pixel segments, shape (n,2)
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = w * x[..., 0] + padw  # top left x
    y[..., 1] = h * x[..., 1] + padh  # top left y
    return y
--- a/ultralytics/yolo/utils/torch_utils.py
+++ b/ultralytics/yolo/utils/torch_utils.py
@ -29,7 +29,7 @@ WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
@contextmanager
 def torch_distributed_zero_first(local_rank: int):
    # Decorator to make all processes in distributed training wait for each local_master to do something
-    if local_rank not in [-1, 0]:
+    if local_rank not in {-1, 0}:
        dist.barrier(device_ids=[local_rank])
    yield
    if local_rank == 0:
--- a/ultralytics/yolo/v8/classify/predict.py
+++ b/ultralytics/yolo/v8/classify/predict.py
@ -52,7 +52,7 @@ class ClassificationPredictor(BasePredictor):
        return log_string
-@hydra.main(version_base=None, config_path=DEFAULT_CONFIG.parent, config_name=DEFAULT_CONFIG.name)
+@hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent), config_name=DEFAULT_CONFIG.name)
 def predict(cfg):
    cfg.model = cfg.model or "squeezenet1_0"
    sz = cfg.imgsz
--- a/ultralytics/yolo/v8/classify/train.py
+++ b/ultralytics/yolo/v8/classify/train.py
@ -59,7 +59,7 @@ class ClassificationTrainer(BaseTrainer):
        pass
-@hydra.main(version_base=None, config_path=DEFAULT_CONFIG.parent, config_name=DEFAULT_CONFIG.name)
+@hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent), config_name=DEFAULT_CONFIG.name)
 def train(cfg):
    cfg.model = cfg.model or "resnet18"
    cfg.data = cfg.data or "imagenette160"  # or yolo.ClassificationDataset("mnist")
--- a/ultralytics/yolo/v8/classify/val.py
+++ b/ultralytics/yolo/v8/classify/val.py
@ -35,7 +35,7 @@ class ClassificationValidator(BaseValidator):
        return ["top1", "top5"]
-@hydra.main(version_base=None, config_path=DEFAULT_CONFIG.parent, config_name=DEFAULT_CONFIG.name)
+@hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent), config_name=DEFAULT_CONFIG.name)
 def val(cfg):
    cfg.data = cfg.data or "imagenette160"
    cfg.model = cfg.model or "resnet18"
--- a/ultralytics/yolo/v8/detect/predict.py
+++ b/ultralytics/yolo/v8/detect/predict.py
@ -81,7 +81,7 @@ class DetectionPredictor(BasePredictor):
        return log_string
-@hydra.main(version_base=None, config_path=DEFAULT_CONFIG.parent, config_name=DEFAULT_CONFIG.name)
+@hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent), config_name=DEFAULT_CONFIG.name)
 def predict(cfg):
    cfg.model = cfg.model or "n.pt"
    sz = cfg.imgsz
--- a/ultralytics/yolo/v8/detect/train.py
+++ b/ultralytics/yolo/v8/detect/train.py
@ -53,7 +53,9 @@ class DetectionTrainer(BaseTrainer):
                                            args=self.args)
    def criterion(self, preds, batch):
-        return Loss(self.model)(preds, batch)
+        if not hasattr(self, 'compute_loss'):
            self.compute_loss = Loss(de_parallel(self.model))
        return self.compute_loss(preds, batch)
    def label_loss_items(self, loss_items=None, prefix="train"):
        # We should just use named tensors here in future
@ -61,8 +63,8 @@ class DetectionTrainer(BaseTrainer):
        return dict(zip(keys, loss_items)) if loss_items is not None else keys
    def progress_string(self):
-        return ('\n' + '%11s' * 6) % \
+        return ('\n' + '%11s' * 7) % \
-               ('Epoch', 'GPU_mem', *self.loss_names, 'Size')
+               ('Epoch', 'GPU_mem', *self.loss_names, 'Instances', 'Size')
    def plot_training_samples(self, batch, ni):
        images = batch["img"]
@ -79,7 +81,7 @@ class DetectionTrainer(BaseTrainer):
 # Criterion class for computing training losses
 class Loss:
-    def __init__(self, model):
+    def __init__(self, model):  # model must be de-paralleled
        device = next(model.parameters()).device  # get model device
        h = model.args  # hyperparameters
@ -90,7 +92,7 @@ class Loss:
        # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
        self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0))  # positive, negative BCE targets
-        m = de_parallel(model).model[-1]  # Detect() module
+        m = model.model[-1]  # Detect() module
        self.BCEcls = BCEcls
        self.hyp = h
        self.stride = m.stride  # model strides
@ -169,12 +171,12 @@ class Loss:
        return loss.sum() * batch_size, loss.detach()  # loss(box, cls, dfl)
-@hydra.main(version_base=None, config_path=DEFAULT_CONFIG.parent, config_name=DEFAULT_CONFIG.name)
+@hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent), config_name=DEFAULT_CONFIG.name)
 def train(cfg):
    cfg.model = cfg.model or "models/yolov8n.yaml"
    cfg.data = cfg.data or "coco128.yaml"  # or yolo.ClassificationDataset("mnist")
-    cfg.imgsz = 160
+    # cfg.imgsz = 160
-    cfg.epochs = 5
+    # cfg.epochs = 5
    trainer = DetectionTrainer(cfg)
    trainer.train()
--- a/ultralytics/yolo/v8/detect/val.py
+++ b/ultralytics/yolo/v8/detect/val.py
@ -119,9 +119,9 @@ class DetectionValidator(BaseValidator):
        if len(stats) and stats[0].any():
            self.metrics.process(*stats)
        self.nt_per_class = np.bincount(stats[-1].astype(int), minlength=self.nc)  # number of targets per class
-        metrics = {"fitness": self.metrics.fitness()}
+        fitness = {"fitness": self.metrics.fitness()}
-        metrics |= zip(self.metric_keys, self.metrics.mean_results())
+        metrics = dict(zip(self.metric_keys, self.metrics.mean_results()))
-        return metrics
+        return {**metrics, **fitness}
    def print_results(self):
        pf = '%22s' + '%11i' * 2 + '%11.3g' * len(self.metric_keys)  # print format
@ -198,7 +198,7 @@ class DetectionValidator(BaseValidator):
                    names=self.names)  # pred
-@hydra.main(version_base=None, config_path=DEFAULT_CONFIG.parent, config_name=DEFAULT_CONFIG.name)
+@hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent), config_name=DEFAULT_CONFIG.name)
 def val(cfg):
    cfg.data = cfg.data or "coco128.yaml"
    validator = DetectionValidator(args=cfg)
--- a/ultralytics/yolo/v8/segment/predict.py
+++ b/ultralytics/yolo/v8/segment/predict.py
@ -99,7 +99,7 @@ class SegmentationPredictor(DetectionPredictor):
        return log_string
-@hydra.main(version_base=None, config_path=DEFAULT_CONFIG.parent, config_name=DEFAULT_CONFIG.name)
+@hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent), config_name=DEFAULT_CONFIG.name)
 def predict(cfg):
    cfg.model = cfg.model or "n.pt"
    sz = cfg.imgsz
--- a/ultralytics/yolo/v8/segment/train.py
+++ b/ultralytics/yolo/v8/segment/train.py
@ -214,8 +214,8 @@ class SegmentationTrainer(DetectionTrainer):
        return dict(zip(keys, loss_items)) if loss_items is not None else keys
    def progress_string(self):
-        return ('\n' + '%11s' * 7) % \
+        return ('\n' + '%11s' * 8) % \
-               ('Epoch', 'GPU_mem', *self.loss_names, 'Size')
+               ('Epoch', 'GPU_mem', *self.loss_names, 'Instances', 'Size')
    def plot_training_samples(self, batch, ni):
        images = batch["img"]
@ -230,7 +230,7 @@ class SegmentationTrainer(DetectionTrainer):
        plot_results(file=self.csv, segment=True)  # save results.png
-@hydra.main(version_base=None, config_path=DEFAULT_CONFIG.parent, config_name=DEFAULT_CONFIG.name)
+@hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent), config_name=DEFAULT_CONFIG.name)
 def train(cfg):
    cfg.model = cfg.model or "models/yolov8n-seg.yaml"
    cfg.data = cfg.data or "coco128-seg.yaml"  # or yolo.ClassificationDataset("mnist")
--- a/ultralytics/yolo/v8/segment/val.py
+++ b/ultralytics/yolo/v8/segment/val.py
@ -211,7 +211,7 @@ class SegmentationValidator(DetectionValidator):
        self.plot_masks.clear()
-@hydra.main(version_base=None, config_path=DEFAULT_CONFIG.parent, config_name=DEFAULT_CONFIG.name)
+@hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent), config_name=DEFAULT_CONFIG.name)
 def val(cfg):
    cfg.data = cfg.data or "coco128-seg.yaml"
    validator = SegmentationValidator(args=cfg)