Improve tests coverage and speed (#4340)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2023-08-13 22:24:01 +02:00
parent d704507217
commit 9f6d48d3cf
10 changed files with 183 additions and 347 deletions
--- a/docs/models/fast-sam.md
+++ b/docs/models/fast-sam.md
@ -47,22 +47,17 @@ To perform object detection on an image, use the `predict` method as shown below
        from ultralytics import FastSAM
        from ultralytics.models.fastsam import FastSAMPrompt

-        # Define image path and inference device
-        IMAGE_PATH = 'ultralytics/assets/bus.jpg'
-        DEVICE = 'cpu'
+        # Define an inference source
+        source = 'path/to/bus.jpg'

        # Create a FastSAM model
        model = FastSAM('FastSAM-s.pt')  # or FastSAM-x.pt

        # Run inference on an image
-        everything_results = model(IMAGE_PATH,
-                                 device=DEVICE,
-                                 retina_masks=True,
-                                 imgsz=1024,
-                                 conf=0.4,
-                                 iou=0.9)
+        everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
      
-        prompt_process = FastSAMPrompt(IMAGE_PATH, everything_results, device=DEVICE)
+        # Prepare a Prompt Process object
+        prompt_process = FastSAMPrompt(source, everything_results, device='cpu')

        # Everything prompt
        ann = prompt_process.everything_prompt()
@ -80,6 +75,12 @@ To perform object detection on an image, use the `predict` method as shown below
        prompt_process.plot(annotations=ann, output='./')
        ```
      
+    === "CLI"
+        ```bash
+        # Load a FastSAM model and segment everything with it
+        yolo segment predict model=FastSAM-s.pt source=path/to/bus.jpg imgsz=640
+        ```
+
 This snippet demonstrates the simplicity of loading a pre-trained model and running a prediction on an image.

 #### Val Usage
@ -89,7 +90,6 @@ Validation of the model on a dataset can be done as follows:
 !!! example ""

    === "Python"
-
        ```python
        from ultralytics import FastSAM

@ -100,6 +100,12 @@ Validation of the model on a dataset can be done as follows:
        results = model.val(data='coco8-seg.yaml')
        ```

+    === "CLI"
+        ```bash
+        # Load a FastSAM model and validate it on the COCO8 example dataset at image size 640
+        yolo segment val model=FastSAM-s.pt data=coco8.yaml imgsz=640
+        ```
+
 Please note that FastSAM only supports detection and segmentation of a single class of object. This means it will recognize and segment all objects as the same class. Therefore, when preparing the dataset, you need to convert all object category IDs to 0.

 ### FastSAM official Usage
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -1,3 +1,5 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
 import pytest


--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@ -8,12 +8,16 @@ import pytest
 from ultralytics.utils import ONLINE, ROOT, SETTINGS

 WEIGHT_DIR = Path(SETTINGS['weights_dir'])
-TASK_ARGS = [  # (task, model, data)
-    ('detect', 'yolov8n', 'coco8.yaml'), ('segment', 'yolov8n-seg', 'coco8-seg.yaml'),
-    ('classify', 'yolov8n-cls', 'imagenet10'), ('pose', 'yolov8n-pose', 'coco8-pose.yaml')]
-EXPORT_ARGS = [  # (model, format)
-    ('yolov8n', 'torchscript'), ('yolov8n-seg', 'torchscript'), ('yolov8n-cls', 'torchscript'),
-    ('yolov8n-pose', 'torchscript')]
+TASK_ARGS = [
+    ('detect', 'yolov8n', 'coco8.yaml'),
+    ('segment', 'yolov8n-seg', 'coco8-seg.yaml'),
+    ('classify', 'yolov8n-cls', 'imagenet10'),
+    ('pose', 'yolov8n-pose', 'coco8-pose.yaml'), ]  # (task, model, data)
+EXPORT_ARGS = [
+    ('yolov8n', 'torchscript'),
+    ('yolov8n-seg', 'torchscript'),
+    ('yolov8n-cls', 'torchscript'),
+    ('yolov8n-pose', 'torchscript'), ]  # (model, format)


 def run(cmd):
@ -22,9 +26,12 @@ def run(cmd):


 def test_special_modes():
-    run('yolo checks')
-    run('yolo settings')
    run('yolo help')
+    run('yolo checks')
+    run('yolo version')
+    run('yolo settings reset')
+    run('yolo copy-cfg')
+    run('yolo cfg')


@pytest.mark.parametrize('task,model,data', TASK_ARGS)
@ -34,21 +41,82 @@ def test_train(task, model, data):

@pytest.mark.parametrize('task,model,data', TASK_ARGS)
 def test_val(task, model, data):
-    run(f'yolo val {task} model={model}.pt data={data} imgsz=32')
+    run(f'yolo val {task} model={WEIGHT_DIR / model}.pt data={data} imgsz=32')


@pytest.mark.parametrize('task,model,data', TASK_ARGS)
 def test_predict(task, model, data):
-    run(f"yolo predict model={model}.pt source={ROOT / 'assets'} imgsz=32 save save_crop save_txt")
-    if ONLINE:
-        run(f'yolo predict model={model}.pt source=https://ultralytics.com/images/bus.jpg imgsz=32')
-        run(f'yolo predict model={model}.pt source=https://ultralytics.com/assets/decelera_landscape_min.mov imgsz=32')
-        run(f'yolo predict model={model}.pt source=https://ultralytics.com/assets/decelera_portrait_min.mov imgsz=32')
+    run(f"yolo predict model={WEIGHT_DIR / model}.pt source={ROOT / 'assets'} imgsz=32 save save_crop save_txt")
+
+
+@pytest.mark.skipif(not ONLINE, reason='environment is offline')
+@pytest.mark.parametrize('task,model,data', TASK_ARGS)
+def test_predict_online(task, model, data):
+    mode = 'track' if task in ('detect', 'segment', 'pose') else 'predict'  # mode for video inference
+    run(f'yolo predict model={WEIGHT_DIR / model}.pt source=https://ultralytics.com/images/bus.jpg imgsz=32')
+    run(f'yolo {mode} model={WEIGHT_DIR / model}.pt source=https://ultralytics.com/assets/decelera_landscape_min.mov imgsz=32'
+        )
+
+    # Run Python YouTube tracking because CLI is broken. TODO: fix CLI YouTube
+    # run(f'yolo {mode} model={model}.pt source=https://youtu.be/G17sBkb38XQ imgsz=32 tracker=bytetrack.yaml')


@pytest.mark.parametrize('model,format', EXPORT_ARGS)
 def test_export(model, format):
-    run(f'yolo export model={model}.pt format={format}')
+    run(f'yolo export model={WEIGHT_DIR / model}.pt format={format} imgsz=32')
+
+
+# Test SAM, RTDETR Models
+def test_rtdetr(task='detect', model='yolov8n-rtdetr.yaml', data='coco8.yaml'):
+    # Warning: MUST use imgsz=640
+    run(f'yolo train {task} model={model} data={data} imgsz=640 epochs=1 cache=disk')
+    run(f'yolo val {task} model={model} data={data} imgsz=640')
+    run(f"yolo predict {task} model={model} source={ROOT / 'assets/bus.jpg'} imgsz=640 save save_crop save_txt")
+
+
+def test_fastsam(task='segment', model='FastSAM-s.pt', data='coco8-seg.yaml'):
+    source = ROOT / 'assets/bus.jpg'
+
+    run(f'yolo segment val {task} model={model} data={data} imgsz=32')
+    run(f'yolo segment predict model={model} source={source} imgsz=32 save save_crop save_txt')
+
+    from ultralytics import FastSAM
+    from ultralytics.models.fastsam import FastSAMPrompt
+
+    # Create a FastSAM model
+    model = FastSAM('FastSAM-s.pt')  # or FastSAM-x.pt
+
+    # Run inference on an image
+    everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
+
+    # Everything prompt
+    prompt_process = FastSAMPrompt(source, everything_results, device='cpu')
+    ann = prompt_process.everything_prompt()
+
+    # Bbox default shape [0,0,0,0] -> [x1,y1,x2,y2]
+    ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300])
+
+    # Text prompt
+    ann = prompt_process.text_prompt(text='a photo of a dog')
+
+    # Point prompt
+    # points default [[0,0]] [[x1,y1],[x2,y2]]
+    # point_label default [0] [1,0] 0:background, 1:foreground
+    ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1])
+    prompt_process.plot(annotations=ann, output='./')
+
+
+def test_mobilesam():
+    from ultralytics import SAM
+
+    # Load the model
+    model = SAM('mobile_sam.pt')
+
+    # Predict a segment based on a point prompt
+    model.predict(ROOT / 'assets/zidane.jpg', points=[900, 370], labels=[1])
+
+    # Predict a segment based on a box prompt
+    model.predict(ROOT / 'assets/zidane.jpg', bboxes=[439, 437, 524, 709])


 # Slow Tests
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@ -10,13 +10,13 @@ from ultralytics.utils import DEFAULT_CFG, ROOT, SETTINGS

 CFG_DET = 'yolov8n.yaml'
 CFG_SEG = 'yolov8n-seg.yaml'
-CFG_CLS = 'squeezenet1_0'
+CFG_CLS = 'yolov8n-cls.yaml'  # or 'squeezenet1_0'
 CFG = get_cfg(DEFAULT_CFG)
 MODEL = Path(SETTINGS['weights_dir']) / 'yolov8n'
 SOURCE = ROOT / 'assets'


-def test_func(model=None):
+def test_func(*args):  # noqa
    print('callback test passed')


@ -31,6 +31,7 @@ def test_export():
 def test_detect():
    overrides = {'data': 'coco8.yaml', 'model': CFG_DET, 'imgsz': 32, 'epochs': 1, 'save': False}
    CFG.data = 'coco8.yaml'
+    CFG.imgsz = 32

    # Trainer
    trainer = detect.DetectionTrainer(overrides=overrides)
@ -65,6 +66,7 @@ def test_detect():
 def test_segment():
    overrides = {'data': 'coco8-seg.yaml', 'model': CFG_SEG, 'imgsz': 32, 'epochs': 1, 'save': False}
    CFG.data = 'coco8-seg.yaml'
+    CFG.imgsz = 32
    # YOLO(CFG_SEG).train(**overrides)  # works

    # trainer
@ -99,7 +101,7 @@ def test_segment():


 def test_classify():
-    overrides = {'data': 'imagenet10', 'model': 'yolov8n-cls.yaml', 'imgsz': 32, 'epochs': 1, 'save': False}
+    overrides = {'data': 'imagenet10', 'model': CFG_CLS, 'imgsz': 32, 'epochs': 1, 'save': False}
    CFG.data = 'imagenet10'
    CFG.imgsz = 32
    # YOLO(CFG_SEG).train(**overrides)  # works
--- a/tests/test_python.py
+++ b/tests/test_python.py
@ -10,9 +10,11 @@ from torchvision.transforms import ToTensor

 from ultralytics import RTDETR, YOLO
 from ultralytics.data.build import load_inference_source
-from ultralytics.utils import LINUX, ONLINE, ROOT, SETTINGS
+from ultralytics.utils import LINUX, MACOS, ONLINE, ROOT, SETTINGS
+from ultralytics.utils.torch_utils import TORCH_1_9

-MODEL = Path(SETTINGS['weights_dir']) / 'path with spaces' / 'yolov8n.pt'  # test spaces in path
+WEIGHTS_DIR = Path(SETTINGS['weights_dir'])
+MODEL = WEIGHTS_DIR / 'path with spaces' / 'yolov8n.pt'  # test spaces in path
 CFG = 'yolov8n.yaml'
 SOURCE = ROOT / 'assets/bus.jpg'
 SOURCE_GREYSCALE = Path(f'{SOURCE.parent / SOURCE.stem}_greyscale.jpg')
@ -26,39 +28,35 @@ im.convert('RGBA').save(SOURCE_RGBA)  # 4-ch PNG with alpha

 def test_model_forward():
    model = YOLO(CFG)
-    model(SOURCE)
+    model(SOURCE, imgsz=32)


 def test_model_info():
-    model = YOLO(CFG)
-    model.info()
    model = YOLO(MODEL)
    model.info(verbose=True)


 def test_model_fuse():
-    model = YOLO(CFG)
-    model.fuse()
    model = YOLO(MODEL)
    model.fuse()


 def test_predict_dir():
    model = YOLO(MODEL)
-    model(source=ROOT / 'assets')
+    model(source=ROOT / 'assets', imgsz=32)


 def test_predict_img():
    model = YOLO(MODEL)
-    seg_model = YOLO('yolov8n-seg.pt')
-    cls_model = YOLO('yolov8n-cls.pt')
-    pose_model = YOLO('yolov8n-pose.pt')
+    seg_model = YOLO(WEIGHTS_DIR / 'yolov8n-seg.pt')
+    cls_model = YOLO(WEIGHTS_DIR / 'yolov8n-cls.pt')
+    pose_model = YOLO(WEIGHTS_DIR / 'yolov8n-pose.pt')
    im = cv2.imread(str(SOURCE))
-    assert len(model(source=Image.open(SOURCE), save=True, verbose=True)) == 1  # PIL
-    assert len(model(source=im, save=True, save_txt=True)) == 1  # ndarray
-    assert len(model(source=[im, im], save=True, save_txt=True)) == 2  # batch
-    assert len(list(model(source=[im, im], save=True, stream=True))) == 2  # stream
-    assert len(model(torch.zeros(320, 640, 3).numpy())) == 1  # tensor to numpy
+    assert len(model(source=Image.open(SOURCE), save=True, verbose=True, imgsz=32)) == 1  # PIL
+    assert len(model(source=im, save=True, save_txt=True, imgsz=32)) == 1  # ndarray
+    assert len(model(source=[im, im], save=True, save_txt=True, imgsz=32)) == 2  # batch
+    assert len(list(model(source=[im, im], save=True, stream=True, imgsz=32))) == 2  # stream
+    assert len(model(torch.zeros(320, 640, 3).numpy(), imgsz=32)) == 1  # tensor to numpy
    batch = [
        str(SOURCE),  # filename
        Path(SOURCE),  # Path
@ -66,20 +64,20 @@ def test_predict_img():
        cv2.imread(str(SOURCE)),  # OpenCV
        Image.open(SOURCE),  # PIL
        np.zeros((320, 640, 3))]  # numpy
-    assert len(model(batch, visualize=True)) == len(batch)  # multiple sources in a batch
+    assert len(model(batch, imgsz=32)) == len(batch)  # multiple sources in a batch

    # Test tensor inference
    im = cv2.imread(str(SOURCE))  # OpenCV
    t = cv2.resize(im, (32, 32))
    t = ToTensor()(t)
    t = torch.stack([t, t, t, t])
-    results = model(t, visualize=True)
+    results = model(t, imgsz=32)
    assert len(results) == t.shape[0]
-    results = seg_model(t, visualize=True)
+    results = seg_model(t, imgsz=32)
    assert len(results) == t.shape[0]
-    results = cls_model(t, visualize=True)
+    results = cls_model(t, imgsz=32)
    assert len(results) == t.shape[0]
-    results = pose_model(t, visualize=True)
+    results = pose_model(t, imgsz=32)
    assert len(results) == t.shape[0]


@ -87,7 +85,13 @@ def test_predict_grey_and_4ch():
    model = YOLO(MODEL)
    for f in SOURCE_RGBA, SOURCE_GREYSCALE:
        for source in Image.open(f), cv2.imread(str(f)), f:
-            model(source, save=True, verbose=True)
+            model(source, save=True, verbose=True, imgsz=32)
+
+
+def test_track_stream():
+    # Test YouTube streaming inference (short 10 frame video) with non-default ByteTrack tracker
+    model = YOLO(MODEL)
+    model.track('https://youtu.be/G17sBkb38XQ', imgsz=32, tracker='bytetrack.yaml')


 def test_val():
@ -95,11 +99,6 @@ def test_val():
    model.val(data='coco8.yaml', imgsz=32)


-def test_val_scratch():
-    model = YOLO(CFG)
-    model.val(data='coco8.yaml', imgsz=32)
-
-
 def test_amp():
    if torch.cuda.is_available():
        from ultralytics.utils.checks import check_amp
@ -109,7 +108,7 @@ def test_amp():

 def test_train_scratch():
    model = YOLO(CFG)
-    model.train(data='coco8.yaml', epochs=1, imgsz=32, cache='disk')  # test disk caching
+    model.train(data='coco8.yaml', epochs=1, imgsz=32, cache='disk', batch=-1)  # test disk caching with AutoBatch
    model(SOURCE)


@ -125,12 +124,6 @@ def test_export_torchscript():
    YOLO(f)(SOURCE)  # exported model inference


-def test_export_torchscript_scratch():
-    model = YOLO(CFG)
-    f = model.export(format='torchscript')
-    YOLO(f)(SOURCE)  # exported model inference
-
-
 def test_export_onnx():
    model = YOLO(MODEL)
    f = model.export(format='onnx')
@ -138,6 +131,7 @@ def test_export_onnx():


 def test_export_openvino():
+    if not MACOS:
        model = YOLO(MODEL)
        f = model.export(format='openvino')
        YOLO(f)(SOURCE)  # exported model inference
@ -145,7 +139,7 @@ def test_export_openvino():

 def test_export_coreml():  # sourcery skip: move-assign
    model = YOLO(MODEL)
-    model.export(format='coreml')
+    model.export(format='coreml', nms=True)
    # if MACOS:
    #    YOLO(f)(SOURCE)  # model prediction only supported on macOS

@ -174,8 +168,9 @@ def test_export_paddle(enabled=False):


 def test_all_model_yamls():
-    for m in list((ROOT / 'models').rglob('yolo*.yaml')):
-        if m.name == 'yolov8-rtdetr.yaml':  # except the rtdetr model
+    for m in (ROOT / 'cfg' / 'models').rglob('*.yaml'):
+        if 'rtdetr' in m.name:
+            if TORCH_1_9:  # torch<=1.8 issue - TypeError: __init__() got an unexpected keyword argument 'batch_first'
                RTDETR(m.name)
        else:
            YOLO(m.name)
@ -190,10 +185,9 @@ def test_workflow():


 def test_predict_callback_and_setup():
-    # test callback addition for prediction
+    # Test callback addition for prediction
    def on_predict_batch_end(predictor):  # results -> List[batch_size]
        path, im0s, _, _ = predictor.batch
-        # print('on_predict_batch_end', im0s[0].shape)
        im0s = im0s if isinstance(im0s, list) else [im0s]
        bs = [predictor.dataset.bs for _ in range(len(path))]
        predictor.results = zip(predictor.results, im0s, bs)
@ -204,42 +198,26 @@ def test_predict_callback_and_setup():
    dataset = load_inference_source(source=SOURCE)
    bs = dataset.bs  # noqa access predictor properties
    results = model.predict(dataset, stream=True)  # source already setup
-    for _, (result, im0, bs) in enumerate(results):
+    for r, im0, bs in results:
        print('test_callback', im0.shape)
        print('test_callback', bs)
-        boxes = result.boxes  # Boxes object for bbox outputs
+        boxes = r.boxes  # Boxes object for bbox outputs
        print(boxes)


-def _test_results_api(res):
-    # General apis except plot
-    res = res.cpu().numpy()
-    # res = res.cuda()
-    res = res.to(device='cpu', dtype=torch.float32)
-    res.save_txt('label.txt', save_conf=False)
-    res.save_txt('label.txt', save_conf=True)
-    res.save_crop('crops/')
-    res.tojson(normalize=False)
-    res.tojson(normalize=True)
-    res.plot(pil=True)
-    res.plot(conf=True, boxes=False)
-    res.plot()
-    print(res)
-    print(res.path)
-    for k in res.keys:
-        print(getattr(res, k))
-
-
 def test_results():
-    for m in ['yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt', 'yolov8n-cls.pt']:
+    for m in 'yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt', 'yolov8n-cls.pt':
        model = YOLO(m)
-        res = model([SOURCE, SOURCE])
-        _test_results_api(res[0])
-
-
-def test_track():
-    im = cv2.imread(str(SOURCE))
-    for m in ['yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt']:
-        model = YOLO(m)
-        res = model.track(source=im)
-        _test_results_api(res[0])
+        results = model([SOURCE, SOURCE])
+        for r in results:
+            r = r.cpu().numpy()
+            r = r.to(device='cpu', dtype=torch.float32)
+            r.save_txt(txt_file='label.txt', save_conf=True)
+            r.save_crop(save_dir='crops/')
+            r.tojson(normalize=True)
+            r.plot(pil=True)
+            r.plot(conf=True, boxes=True)
+            print(r)
+            print(r.path)
+            for k in r.keys:
+                print(getattr(r, k))
--- a/ultralytics/models/fastsam/prompt.py
+++ b/ultralytics/models/fastsam/prompt.py
@ -1,6 +1,7 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license

 import os
+from pathlib import Path

 import cv2
 import matplotlib.pyplot as plt
@ -8,6 +9,8 @@ import numpy as np
 import torch
 from PIL import Image

+from ultralytics.utils import LOGGER
+

 class FastSAMPrompt:

@ -15,8 +18,8 @@ class FastSAMPrompt:
        # self.img_path = img_path
        self.device = device
        self.results = results
-        self.img_path = img_path
-        self.ori_img = cv2.imread(img_path)
+        self.img_path = str(img_path)
+        self.ori_img = cv2.imread(self.img_path)

        # Import and assign clip
        try:
@ -111,7 +114,7 @@ class FastSAMPrompt:
        original_w = image.shape[1]
        # for macOS only
        # plt.switch_backend('TkAgg')
-        plt.figure(figsize=(original_w / 100, original_h / 100))
+        fig = plt.figure(figsize=(original_w / 100, original_h / 100))
        # Add subplot with no margin.
        plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
        plt.margins(0, 0)
@ -174,21 +177,11 @@ class FastSAMPrompt:
            contour_mask = temp / 255 * color.reshape(1, 1, -1)
            plt.imshow(contour_mask)

-        save_path = output
-        if not os.path.exists(save_path):
-            os.makedirs(save_path)
+        save_path = Path(output) / result_name
+        save_path.parent.mkdir(exist_ok=True, parents=True)
        plt.axis('off')
-        fig = plt.gcf()
-        plt.draw()
-
-        try:
-            buf = fig.canvas.tostring_rgb()
-        except AttributeError:
-            fig.canvas.draw()
-            buf = fig.canvas.tostring_rgb()
-        cols, rows = fig.canvas.get_width_height()
-        img_array = np.frombuffer(buf, dtype=np.uint8).reshape(rows, cols, 3)
-        cv2.imwrite(os.path.join(save_path, result_name), cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR))
+        fig.savefig(save_path)
+        LOGGER.info(f'Saved to {save_path.absolute()}')

    #   CPU post process
    def fast_show_mask(
--- a/ultralytics/models/fastsam/val.py
+++ b/ultralytics/models/fastsam/val.py
@ -1,231 +1,14 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license

-from multiprocessing.pool import ThreadPool
-from pathlib import Path
-
-import numpy as np
-import torch
-import torch.nn.functional as F
-
-from ultralytics.models.yolo.detect import DetectionValidator
-from ultralytics.utils import LOGGER, NUM_THREADS, ops
-from ultralytics.utils.checks import check_requirements
-from ultralytics.utils.metrics import SegmentMetrics, box_iou, mask_iou
-from ultralytics.utils.plotting import output_to_target, plot_images
+from ultralytics.models.yolo.segment import SegmentationValidator
+from ultralytics.utils.metrics import SegmentMetrics


-class FastSAMValidator(DetectionValidator):
+class FastSAMValidator(SegmentationValidator):

    def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
        """Initialize SegmentationValidator and set task to 'segment', metrics to SegmentMetrics."""
        super().__init__(dataloader, save_dir, pbar, args, _callbacks)
        self.args.task = 'segment'
+        self.args.plots = False  # disable ConfusionMatrix and other plots to avoid errors
        self.metrics = SegmentMetrics(save_dir=self.save_dir, on_plot=self.on_plot)
-
-    def preprocess(self, batch):
-        """Preprocesses batch by converting masks to float and sending to device."""
-        batch = super().preprocess(batch)
-        batch['masks'] = batch['masks'].to(self.device).float()
-        return batch
-
-    def init_metrics(self, model):
-        """Initialize metrics and select mask processing function based on save_json flag."""
-        super().init_metrics(model)
-        self.plot_masks = []
-        if self.args.save_json:
-            check_requirements('pycocotools>=2.0.6')
-            self.process = ops.process_mask_upsample  # more accurate
-        else:
-            self.process = ops.process_mask  # faster
-
-    def get_desc(self):
-        """Return a formatted description of evaluation metrics."""
-        return ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)', 'Mask(P',
-                                         'R', 'mAP50', 'mAP50-95)')
-
-    def postprocess(self, preds):
-        """Post-processes YOLO predictions and returns output detections with proto."""
-        p = ops.non_max_suppression(preds[0],
-                                    self.args.conf,
-                                    self.args.iou,
-                                    labels=self.lb,
-                                    multi_label=True,
-                                    agnostic=self.args.single_cls,
-                                    max_det=self.args.max_det,
-                                    nc=self.nc)
-        proto = preds[1][-1] if len(preds[1]) == 3 else preds[1]  # second output is len 3 if pt, but only 1 if exported
-        return p, proto
-
-    def update_metrics(self, preds, batch):
-        """Metrics."""
-        for si, (pred, proto) in enumerate(zip(preds[0], preds[1])):
-            idx = batch['batch_idx'] == si
-            cls = batch['cls'][idx]
-            bbox = batch['bboxes'][idx]
-            nl, npr = cls.shape[0], pred.shape[0]  # number of labels, predictions
-            shape = batch['ori_shape'][si]
-            correct_masks = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device)  # init
-            correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device)  # init
-            self.seen += 1
-
-            if npr == 0:
-                if nl:
-                    self.stats.append((correct_bboxes, correct_masks, *torch.zeros(
-                        (2, 0), device=self.device), cls.squeeze(-1)))
-                    if self.args.plots:
-                        self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
-                continue
-
-            # Masks
-            midx = [si] if self.args.overlap_mask else idx
-            gt_masks = batch['masks'][midx]
-            pred_masks = self.process(proto, pred[:, 6:], pred[:, :4], shape=batch['img'][si].shape[1:])
-
-            # Predictions
-            if self.args.single_cls:
-                pred[:, 5] = 0
-            predn = pred.clone()
-            ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape,
-                            ratio_pad=batch['ratio_pad'][si])  # native-space pred
-
-            # Evaluate
-            if nl:
-                height, width = batch['img'].shape[2:]
-                tbox = ops.xywh2xyxy(bbox) * torch.tensor(
-                    (width, height, width, height), device=self.device)  # target boxes
-                ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape,
-                                ratio_pad=batch['ratio_pad'][si])  # native-space labels
-                labelsn = torch.cat((cls, tbox), 1)  # native-space labels
-                correct_bboxes = self._process_batch(predn, labelsn)
-                # TODO: maybe remove these `self.` arguments as they already are member variable
-                correct_masks = self._process_batch(predn,
-                                                    labelsn,
-                                                    pred_masks,
-                                                    gt_masks,
-                                                    overlap=self.args.overlap_mask,
-                                                    masks=True)
-                if self.args.plots:
-                    self.confusion_matrix.process_batch(predn, labelsn)
-
-            # Append correct_masks, correct_boxes, pconf, pcls, tcls
-            self.stats.append((correct_bboxes, correct_masks, pred[:, 4], pred[:, 5], cls.squeeze(-1)))
-
-            pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
-            if self.args.plots and self.batch_i < 3:
-                self.plot_masks.append(pred_masks[:15].cpu())  # filter top 15 to plot
-
-            # Save
-            if self.args.save_json:
-                pred_masks = ops.scale_image(pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
-                                             shape,
-                                             ratio_pad=batch['ratio_pad'][si])
-                self.pred_to_json(predn, batch['im_file'][si], pred_masks)
-            # if self.args.save_txt:
-            #    save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt')
-
-    def finalize_metrics(self, *args, **kwargs):
-        """Sets speed and confusion matrix for evaluation metrics."""
-        self.metrics.speed = self.speed
-        self.metrics.confusion_matrix = self.confusion_matrix
-
-    def _process_batch(self, detections, labels, pred_masks=None, gt_masks=None, overlap=False, masks=False):
-        """
-        Return correct prediction matrix
-        Arguments:
-            detections (array[N, 6]), x1, y1, x2, y2, conf, class
-            labels (array[M, 5]), class, x1, y1, x2, y2
-        Returns:
-            correct (array[N, 10]), for 10 IoU levels
-        """
-        if masks:
-            if overlap:
-                nl = len(labels)
-                index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
-                gt_masks = gt_masks.repeat(nl, 1, 1)  # shape(1,640,640) -> (n,640,640)
-                gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
-            if gt_masks.shape[1:] != pred_masks.shape[1:]:
-                gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode='bilinear', align_corners=False)[0]
-                gt_masks = gt_masks.gt_(0.5)
-            iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
-        else:  # boxes
-            iou = box_iou(labels[:, 1:], detections[:, :4])
-
-        return self.match_predictions(detections[:, 5], labels[:, 0], iou)
-
-    def plot_val_samples(self, batch, ni):
-        """Plots validation samples with bounding box labels."""
-        plot_images(batch['img'],
-                    batch['batch_idx'],
-                    batch['cls'].squeeze(-1),
-                    batch['bboxes'],
-                    batch['masks'],
-                    paths=batch['im_file'],
-                    fname=self.save_dir / f'val_batch{ni}_labels.jpg',
-                    names=self.names,
-                    on_plot=self.on_plot)
-
-    def plot_predictions(self, batch, preds, ni):
-        """Plots batch predictions with masks and bounding boxes."""
-        plot_images(
-            batch['img'],
-            *output_to_target(preds[0], max_det=15),  # not set to self.args.max_det due to slow plotting speed
-            torch.cat(self.plot_masks, dim=0) if len(self.plot_masks) else self.plot_masks,
-            paths=batch['im_file'],
-            fname=self.save_dir / f'val_batch{ni}_pred.jpg',
-            names=self.names,
-            on_plot=self.on_plot)  # pred
-        self.plot_masks.clear()
-
-    def pred_to_json(self, predn, filename, pred_masks):
-        """Save one JSON result."""
-        # Example result = {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
-        from pycocotools.mask import encode  # noqa
-
-        def single_encode(x):
-            """Encode predicted masks as RLE and append results to jdict."""
-            rle = encode(np.asarray(x[:, :, None], order='F', dtype='uint8'))[0]
-            rle['counts'] = rle['counts'].decode('utf-8')
-            return rle
-
-        stem = Path(filename).stem
-        image_id = int(stem) if stem.isnumeric() else stem
-        box = ops.xyxy2xywh(predn[:, :4])  # xywh
-        box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
-        pred_masks = np.transpose(pred_masks, (2, 0, 1))
-        with ThreadPool(NUM_THREADS) as pool:
-            rles = pool.map(single_encode, pred_masks)
-        for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
-            self.jdict.append({
-                'image_id': image_id,
-                'category_id': self.class_map[int(p[5])],
-                'bbox': [round(x, 3) for x in b],
-                'score': round(p[4], 5),
-                'segmentation': rles[i]})
-
-    def eval_json(self, stats):
-        """Return COCO-style object detection evaluation metrics."""
-        if self.args.save_json and self.is_coco and len(self.jdict):
-            anno_json = self.data['path'] / 'annotations/instances_val2017.json'  # annotations
-            pred_json = self.save_dir / 'predictions.json'  # predictions
-            LOGGER.info(f'\nEvaluating pycocotools mAP using {pred_json} and {anno_json}...')
-            try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
-                check_requirements('pycocotools>=2.0.6')
-                from pycocotools.coco import COCO  # noqa
-                from pycocotools.cocoeval import COCOeval  # noqa
-
-                for x in anno_json, pred_json:
-                    assert x.is_file(), f'{x} file not found'
-                anno = COCO(str(anno_json))  # init annotations api
-                pred = anno.loadRes(str(pred_json))  # init predictions api (must pass string, not Path)
-                for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm')]):
-                    if self.is_coco:
-                        eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files]  # im to eval
-                    eval.evaluate()
-                    eval.accumulate()
-                    eval.summarize()
-                    idx = i * 4 + 2
-                    stats[self.metrics.keys[idx + 1]], stats[
-                        self.metrics.keys[idx]] = eval.stats[:2]  # update mAP50-95 and mAP50
-            except Exception as e:
-                LOGGER.warning(f'pycocotools unable to run: {e}')
-        return stats
--- a/ultralytics/models/rtdetr/model.py
+++ b/ultralytics/models/rtdetr/model.py
@ -16,7 +16,7 @@ class RTDETR(Model):
    """

    def __init__(self, model='rtdetr-l.pt') -> None:
-        if model and not model.split('.')[-1] in ('pt', 'yaml', 'yml'):
+        if model and model.split('.')[-1] not in ('pt', 'yaml', 'yml'):
            raise NotImplementedError('RT-DETR only supports creating from *.pt file or *.yaml file.')
        super().__init__(model=model, task='detect')

--- a/ultralytics/nn/modules/head.py
+++ b/ultralytics/nn/modules/head.py
@ -9,7 +9,7 @@ import torch
 import torch.nn as nn
 from torch.nn.init import constant_, xavier_uniform_

-from ultralytics.utils.tal import dist2bbox, make_anchors
+from ultralytics.utils.tal import TORCH_1_10, dist2bbox, make_anchors

 from .block import DFL, Proto
 from .conv import Conv
@ -267,9 +267,9 @@ class RTDETRDecoder(nn.Module):
    def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device='cpu', eps=1e-2):
        anchors = []
        for i, (h, w) in enumerate(shapes):
-            grid_y, grid_x = torch.meshgrid(torch.arange(end=h, dtype=dtype, device=device),
-                                            torch.arange(end=w, dtype=dtype, device=device),
-                                            indexing='ij')
+            sy = torch.arange(end=h, dtype=dtype, device=device)
+            sx = torch.arange(end=w, dtype=dtype, device=device)
+            grid_y, grid_x = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx)
            grid_xy = torch.stack([grid_x, grid_y], -1)  # (h, w, 2)

            valid_WH = torch.tensor([h, w], dtype=dtype, device=device)
--- a/ultralytics/nn/modules/transformer.py
+++ b/ultralytics/nn/modules/transformer.py
@ -22,6 +22,10 @@ class TransformerEncoderLayer(nn.Module):

    def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(), normalize_before=False):
        super().__init__()
+        from ...utils.torch_utils import TORCH_1_9
+        if not TORCH_1_9:
+            raise ModuleNotFoundError(
+                'TransformerEncoderLayer() requires torch>=1.9 to use nn.MultiheadAttention(batch_first=True).')
        self.ma = nn.MultiheadAttention(c1, num_heads, dropout=dropout, batch_first=True)
        # Implementation of Feedforward model
        self.fc1 = nn.Linear(c1, cm)