ultralytics 8.0.125 NMS speed improvements (#3463)

Co-authored-by: Compunet <117437050+dme-compunet@users.noreply.github.com>
2023-07-03 16:34:13 +02:00
parent 0e5a6b8158
commit 8a11eda4a9
6 changed files with 9 additions and 8 deletions
--- a/ultralytics/init.py
+++ b/ultralytics/init.py
@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license

-__version__ = '8.0.124'
+__version__ = '8.0.125'

 from ultralytics.hub import start
 from ultralytics.vit.rtdetr import RTDETR
--- a/ultralytics/nn/autobackend.py
+++ b/ultralytics/nn/autobackend.py
@ -340,7 +340,7 @@ class AutoBackend(nn.Module):
        elif self.coreml:  # CoreML
            im = im[0].cpu().numpy()
            im_pil = Image.fromarray((im * 255).astype('uint8'))
-            # im = im.resize((192, 320), Image.ANTIALIAS)
+            # im = im.resize((192, 320), Image.BILINEAR)
            y = self.model.predict({'image': im_pil})  # coordinates are xywh normalized
            if 'confidence' in y:
                box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]])  # xyxy pixels
--- a/ultralytics/yolo/engine/exporter.py
+++ b/ultralytics/yolo/engine/exporter.py
@ -447,7 +447,7 @@ class Exporter:
                check_requirements('nvidia-tensorrt', cmds='-U --index-url https://pypi.ngc.nvidia.com')
            import tensorrt as trt  # noqa

-        check_version(trt.__version__, '7.0.0', hard=True)  # require tensorrt>=8.0.0
+        check_version(trt.__version__, '7.0.0', hard=True)  # require tensorrt>=7.0.0
        self.args.simplify = True
        f_onnx, _ = self.export_onnx()

--- a/ultralytics/yolo/utils/ops.py
+++ b/ultralytics/yolo/utils/ops.py
@ -200,8 +200,7 @@ def non_max_suppression(
    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
    merge = False  # use merge-NMS

-    prediction = prediction.clone()  # don't modify original
-    prediction = prediction.transpose(-1, -2)  # to (batch, boxes, items)
+    prediction = prediction.transpose(-1, -2)  # shape(1,84,6300) to shape(1,6300,84)
    prediction[..., :4] = xywh2xyxy(prediction[..., :4])  # xywh to xyxy

    t = time.time()
@ -245,7 +244,6 @@ def non_max_suppression(
        n = x.shape[0]  # number of boxes
        if not n:  # no boxes
            continue
-
        if n > max_nms:  # excess boxes
            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence and remove excess boxes