diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 29a5387..947bd24 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -43,21 +43,7 @@ jobs: python --version pip --version pip list - - name: Test HUB training (Python Usage 1) - shell: python - env: - APIKEY: ${{ secrets.ULTRALYTICS_HUB_APIKEY }} - run: | - import os - from pathlib import Path - from ultralytics import YOLO, hub - from ultralytics.yolo.utils import USER_CONFIG_DIR - Path(USER_CONFIG_DIR / 'settings.yaml').unlink() - key = os.environ['APIKEY'] - hub.reset_model(key) - model = YOLO('https://hub.ultralytics.com/models/' + key) - model.train() - - name: Test HUB training (Python Usage 2) + - name: Test HUB training shell: python env: APIKEY: ${{ secrets.ULTRALYTICS_HUB_APIKEY }} @@ -73,36 +59,6 @@ jobs: hub.login(key) model = YOLO('https://hub.ultralytics.com/models/' + model_id) model.train() - - name: Test HUB training (Python Usage 3) - shell: python - env: - APIKEY: ${{ secrets.ULTRALYTICS_HUB_APIKEY }} - run: | - import os - from pathlib import Path - from ultralytics import YOLO, hub - from ultralytics.yolo.utils import USER_CONFIG_DIR - Path(USER_CONFIG_DIR / 'settings.yaml').unlink() - key = os.environ['APIKEY'] - hub.reset_model(key) - model = YOLO(key) - model.train() - - name: Test HUB training (Python Usage 4) - shell: python - env: - APIKEY: ${{ secrets.ULTRALYTICS_HUB_APIKEY }} - run: | - import os - from pathlib import Path - from ultralytics import YOLO, hub - from ultralytics.yolo.utils import USER_CONFIG_DIR - Path(USER_CONFIG_DIR / 'settings.yaml').unlink() - key = os.environ['APIKEY'] - hub.reset_model(key) - key, model_id = key.split('_') - hub.login(key) - model = YOLO(model_id) - model.train() Benchmarks: runs-on: ${{ matrix.os }} @@ -154,6 +110,11 @@ jobs: run: | from ultralytics.yolo.utils.benchmarks import benchmark benchmark(model='${{ matrix.model }}-cls.pt', imgsz=160, half=False, hard_fail=0.61) + - name: Benchmark PoseModel + shell: python + run: | + from ultralytics.yolo.utils.benchmarks import benchmark + benchmark(model='${{ matrix.model }}-pose.pt', imgsz=160, half=False, hard_fail=0.0) - name: Benchmark Summary run: | cat benchmarks.log @@ -200,30 +161,38 @@ jobs: python --version pip --version pip list - - name: Test detection + - name: Test Detect + shell: bash # for Windows compatibility + run: | + yolo detect train data=coco8.yaml model=yolov8n.yaml epochs=1 imgsz=32 + yolo detect train data=coco8.yaml model=yolov8n.pt epochs=1 imgsz=32 + yolo detect val data=coco8.yaml model=runs/detect/train/weights/last.pt imgsz=32 + yolo detect predict model=runs/detect/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg + yolo export model=runs/detect/train/weights/last.pt imgsz=32 format=torchscript + - name: Test Segment shell: bash # for Windows compatibility run: | - yolo task=detect mode=train data=coco8.yaml model=yolov8n.yaml epochs=1 imgsz=32 - yolo task=detect mode=train data=coco8.yaml model=yolov8n.pt epochs=1 imgsz=32 - yolo task=detect mode=val data=coco8.yaml model=runs/detect/train/weights/last.pt imgsz=32 - yolo task=detect mode=predict model=runs/detect/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg - yolo mode=export model=runs/detect/train/weights/last.pt imgsz=32 format=torchscript - - name: Test segmentation + yolo segment train data=coco8-seg.yaml model=yolov8n-seg.yaml epochs=1 imgsz=32 + yolo segment train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=1 imgsz=32 + yolo segment val data=coco8-seg.yaml model=runs/segment/train/weights/last.pt imgsz=32 + yolo segment predict model=runs/segment/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg + yolo export model=runs/segment/train/weights/last.pt imgsz=32 format=torchscript + - name: Test Classify shell: bash # for Windows compatibility run: | - yolo task=segment mode=train data=coco8-seg.yaml model=yolov8n-seg.yaml epochs=1 imgsz=32 - yolo task=segment mode=train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=1 imgsz=32 - yolo task=segment mode=val data=coco8-seg.yaml model=runs/segment/train/weights/last.pt imgsz=32 - yolo task=segment mode=predict model=runs/segment/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg - yolo mode=export model=runs/segment/train/weights/last.pt imgsz=32 format=torchscript - - name: Test classification + yolo classify train data=imagenet10 model=yolov8n-cls.yaml epochs=1 imgsz=32 + yolo classify train data=imagenet10 model=yolov8n-cls.pt epochs=1 imgsz=32 + yolo classify val data=imagenet10 model=runs/classify/train/weights/last.pt imgsz=32 + yolo classify predict model=runs/classify/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg + yolo export model=runs/classify/train/weights/last.pt imgsz=32 format=torchscript + - name: Test Pose shell: bash # for Windows compatibility run: | - yolo task=classify mode=train data=imagenet10 model=yolov8n-cls.yaml epochs=1 imgsz=32 - yolo task=classify mode=train data=imagenet10 model=yolov8n-cls.pt epochs=1 imgsz=32 - yolo task=classify mode=val data=imagenet10 model=runs/classify/train/weights/last.pt imgsz=32 - yolo task=classify mode=predict model=runs/classify/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg - yolo mode=export model=runs/classify/train/weights/last.pt imgsz=32 format=torchscript + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=1 imgsz=32 + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=1 imgsz=32 + yolo pose val data=coco8-pose.yaml model=runs/pose/train/weights/last.pt imgsz=32 + yolo pose predict model=runs/pose/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg + yolo export model=runs/pose/train/weights/last.pt imgsz=32 format=torchscript - name: Pytest tests shell: bash # for Windows compatibility run: pytest tests diff --git a/README.md b/README.md index c1683e4..f922504 100644 --- a/README.md +++ b/README.md @@ -109,7 +109,10 @@ YOLOv8 [Python Docs](https://docs.ultralytics.com/usage/python) for more example ##
Models
-All YOLOv8 pretrained models are available here. Detect, Segment and Pose models are pretrained on the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco.yaml) dataset, while Classify models are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/ImageNet.yaml) dataset. +All YOLOv8 pretrained models are available here. Detect, Segment and Pose models are pretrained on +the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco.yaml) dataset, while Classify +models are pretrained on +the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/ImageNet.yaml) dataset. [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use. @@ -174,6 +177,28 @@ See [Classification Docs](https://docs.ultralytics.com/tasks/classify/) for usag +
Pose + +See [Pose Docs](https://docs.ultralytics.com/tasks/) for usage examples with these models. + +| Model | size
(pixels) | mAPbox
50-95 | mAPpose
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | +| ---------------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | +| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | - | 49.7 | - | - | 3.3 | 9.2 | +| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | - | 59.2 | - | - | 11.6 | 30.2 | +| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | - | 63.6 | - | - | 26.4 | 81.0 | +| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | - | 67.0 | - | - | 44.4 | 168.6 | +| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | - | 68.9 | - | - | 69.4 | 263.2 | +| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | - | 71.5 | - | - | 99.1 | 1066.4 | + +- **mAPval** values are for single-model single-scale on [COCO Keypoints val2017](http://cocodataset.org) + dataset. +
Reproduce by `yolo val pose data=coco-pose.yaml device=0` +- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) + instance. +
Reproduce by `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu` + +
+ ##
Integrations

diff --git a/README.zh-CN.md b/README.zh-CN.md index 81e568c..b709b3d 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -163,6 +163,28 @@ Ultralytics [发布页](https://github.com/ultralytics/ultralytics/releases) 自 +
Pose + +See [Pose Docs](https://docs.ultralytics.com/tasks/) for usage examples with these models. + +| Model | size
(pixels) | mAPbox
50-95 | mAPpose
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | +| ---------------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | +| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | - | 49.7 | - | - | 3.3 | 9.2 | +| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | - | 59.2 | - | - | 11.6 | 30.2 | +| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | - | 63.6 | - | - | 26.4 | 81.0 | +| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | - | 67.0 | - | - | 44.4 | 168.6 | +| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | - | 68.9 | - | - | 69.4 | 263.2 | +| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | - | 71.5 | - | - | 99.1 | 1066.4 | + +- **mAPval** values are for single-model single-scale on [COCO Keypoints val2017](http://cocodataset.org) + dataset. +
Reproduce by `yolo val pose data=coco-pose.yaml device=0` +- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) + instance. +
Reproduce by `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu` + +
+ ##
模块集成

diff --git a/docker/Dockerfile b/docker/Dockerfile index f9d0c4c..22cd80b 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,7 +2,7 @@ # Builds ultralytics/ultralytics:latest image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics # Image is CUDA-optimized for YOLOv8 single/multi-GPU training and inference -# Start FROM PyTorch image https://hub.docker.com/r/pytorch/pytorch +# Start FROM PyTorch image https://hub.docker.com/r/pytorch/pytorch or nvcr.io/nvidia/pytorch:23.03-py3 FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime # Downloads to user config dir diff --git a/docs/index.md b/docs/index.md index e054ee1..ec61e49 100644 --- a/docs/index.md +++ b/docs/index.md @@ -42,4 +42,4 @@ Since its launch YOLO has been employed in various applications, including auton YOLOv8 is designed with a strong focus on speed, size, and accuracy, making it a compelling choice for various vision AI tasks. It outperforms previous versions by incorporating innovations like a new backbone network, a new anchor-free split head, and new loss functions. These improvements enable YOLOv8 to deliver superior results, while maintaining a compact size and exceptional speed. -Additionally, YOLOv8 supports a full range of vision AI tasks, including [detection](tasks/detect.md), [segmentation](tasks/segment.md), [pose estimation](tasks/keypoints.md), [tracking](modes/track.md), and [classification](tasks/classify.md). This versatility allows users to leverage YOLOv8's capabilities across diverse applications and domains. +Additionally, YOLOv8 supports a full range of vision AI tasks, including [detection](tasks/detect.md), [segmentation](tasks/segment.md), [pose estimation](tasks/pose.md), [tracking](modes/track.md), and [classification](tasks/classify.md). This versatility allows users to leverage YOLOv8's capabilities across diverse applications and domains. diff --git a/docs/modes/benchmark.md b/docs/modes/benchmark.md index b57e093..662a013 100644 --- a/docs/modes/benchmark.md +++ b/docs/modes/benchmark.md @@ -1,7 +1,7 @@ **Benchmark mode** is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks -provide information on the size of the exported format, its `mAP50-95` metrics (for object detection and segmentation) +provide information on the size of the exported format, its `mAP50-95` metrics (for object detection, segmentation and pose) or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for their specific use case based on their requirements for speed and accuracy. diff --git a/docs/modes/index.md b/docs/modes/index.md index ffa544a..1ca2383 100644 --- a/docs/modes/index.md +++ b/docs/modes/index.md @@ -54,7 +54,7 @@ for applications such as surveillance systems or self-driving cars. ## [Benchmark](benchmark.md) Benchmark mode is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks provide -information on the size of the exported format, its `mAP50-95` metrics (for object detection and segmentation) +information on the size of the exported format, its `mAP50-95` metrics (for object detection, segmentation and pose) or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for their specific use case based on their requirements for speed and accuracy. diff --git a/docs/modes/train.md b/docs/modes/train.md index e68c8b8..a9275a0 100644 --- a/docs/modes/train.md +++ b/docs/modes/train.md @@ -88,6 +88,8 @@ task. | `box` | `7.5` | box loss gain | | `cls` | `0.5` | cls loss gain (scale with pixels) | | `dfl` | `1.5` | dfl loss gain | +| `pose` | `12.0` | pose loss gain (pose-only) | +| `kobj` | `2.0` | keypoint obj loss gain (pose-only) | | `fl_gamma` | `0.0` | focal loss gamma (efficientDet default gamma=1.5) | | `label_smoothing` | `0.0` | label smoothing (fraction) | | `nbs` | `64` | nominal batch size | diff --git a/docs/reference/ops.md b/docs/reference/ops.md index 8c4f1b7..3f8246d 100644 --- a/docs/reference/ops.md +++ b/docs/reference/ops.md @@ -175,9 +175,9 @@ show_source: false show_root_toc_entry: false --- -## scale_segments +## scale_coords -:::ultralytics.yolo.utils.ops.scale_segments +:::ultralytics.yolo.utils.ops.scale_coords handler: python options: show_source: false @@ -193,9 +193,9 @@ show_source: false show_root_toc_entry: false --- -## clip_segments +## clip_coords -:::ultralytics.yolo.utils.ops.clip_segments +:::ultralytics.yolo.utils.ops.clip_coords handler: python options: show_source: false diff --git a/docs/tasks/classify.md b/docs/tasks/classify.md index 3c8cb6f..d985d46 100644 --- a/docs/tasks/classify.md +++ b/docs/tasks/classify.md @@ -122,7 +122,7 @@ Use a trained YOLOv8n-cls model to run predictions on images. yolo classify predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model ``` -Read more details of `predict` in our [Predict](https://docs.ultralytics.com/modes/predict/) page. +See full `predict` mode details in the [Predict](https://docs.ultralytics.com/modes/predict/) page. ## Export @@ -150,7 +150,7 @@ Export a YOLOv8n-cls model to a different format like ONNX, CoreML, etc. ``` Available YOLOv8-cls export formats are in the table below. You can predict or validate directly on exported models, -i.e. `yolo predict model=yolov8n-cls.onnx`. +i.e. `yolo predict model=yolov8n-cls.onnx`. Usage examples are shown for your model after export completes. | Format | `format` Argument | Model | Metadata | |--------------------------------------------------------------------|-------------------|-------------------------------|----------| @@ -167,3 +167,4 @@ i.e. `yolo predict model=yolov8n-cls.onnx`. | [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | | [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | +See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. diff --git a/docs/tasks/detect.md b/docs/tasks/detect.md index 34b580d..89cd1d2 100644 --- a/docs/tasks/detect.md +++ b/docs/tasks/detect.md @@ -123,7 +123,7 @@ Use a trained YOLOv8n model to run predictions on images. yolo detect predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model ``` -Read more details of `predict` in our [Predict](https://docs.ultralytics.com/modes/predict/) page. +See full `predict` mode details in the [Predict](https://docs.ultralytics.com/modes/predict/) page. ## Export @@ -151,7 +151,7 @@ Export a YOLOv8n model to a different format like ONNX, CoreML, etc. ``` Available YOLOv8 export formats are in the table below. You can predict or validate directly on exported models, -i.e. `yolo predict model=yolov8n.onnx`. +i.e. `yolo predict model=yolov8n.onnx`. Usage examples are shown for your model after export completes. | Format | `format` Argument | Model | Metadata | |--------------------------------------------------------------------|-------------------|---------------------------|----------| @@ -167,3 +167,5 @@ i.e. `yolo predict model=yolov8n.onnx`. | [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | | [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | | [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | + +See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. diff --git a/docs/tasks/index.md b/docs/tasks/index.md index 3276d53..47bbd39 100644 --- a/docs/tasks/index.md +++ b/docs/tasks/index.md @@ -2,7 +2,7 @@ YOLOv8 is an AI framework that supports multiple computer vision **tasks**. The framework can be used to perform [detection](detect.md), [segmentation](segment.md), [classification](classify.md), -and [keypoints](keypoints.md) detection. Each of these tasks has a different objective and use case. +and [pose](pose.md) estimation. Each of these tasks has a different objective and use case. @@ -29,15 +29,13 @@ images based on their content. It uses a variant of the EfficientNet architectur [Classification Examples](classify.md){ .md-button .md-button--primary} - +[Pose Examples](pose.md){ .md-button .md-button--primary} ## Conclusion diff --git a/docs/tasks/keypoints.md b/docs/tasks/keypoints.md deleted file mode 100644 index d9f2484..0000000 --- a/docs/tasks/keypoints.md +++ /dev/null @@ -1,149 +0,0 @@ -Key Point Estimation is a task that involves identifying the location of specific points in an image, usually referred -to as keypoints. The keypoints can represent various parts of the object such as joints, landmarks, or other distinctive -features. The locations of the keypoints are usually represented as a set of 2D `[x, y]` or 3D `[x, y, visible]` -coordinates. - - - -The output of a keypoint detector is a set of points that represent the keypoints on the object in the image, usually -along with the confidence scores for each point. Keypoint estimation is a good choice when you need to identify specific -parts of an object in a scene, and their location in relation to each other. - -!!! tip "Tip" - - YOLOv8 _keypoints_ models use the `-kpts` suffix, i.e. `yolov8n-kpts.pt`. These models are trained on the COCO dataset and are suitable for a variety of keypoint estimation tasks. - -[Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models/v8){ .md-button .md-button--primary} - -## Train TODO - -Train an OpenPose model on a custom dataset of keypoints using the OpenPose framework. For more information on how to -train an OpenPose model on a custom dataset, see the OpenPose Training page. - -!!! example "" - - === "Python" - - ```python - from ultralytics import YOLO - - # Load a model - model = YOLO('yolov8n.yaml') # build a new model from YAML - model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) - model = YOLO('yolov8n.yaml').load('yolov8n.pt') # build from YAML and transfer weights - - # Train the model - model.train(data='coco128.yaml', epochs=100, imgsz=640) - ``` - === "CLI" - - ```bash - # Build a new model from YAML and start training from scratch - yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 - - # Start training from a pretrained *.pt model - yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 - - # Build a new model from YAML, transfer pretrained weights to it and start training - yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 - ``` - -## Val TODO - -Validate trained YOLOv8n model accuracy on the COCO128 dataset. No argument need to passed as the `model` retains it's -training `data` and arguments as model attributes. - -!!! example "" - - === "Python" - - ```python - from ultralytics import YOLO - - # Load a model - model = YOLO('yolov8n.pt') # load an official model - model = YOLO('path/to/best.pt') # load a custom model - - # Validate the model - metrics = model.val() # no arguments needed, dataset and settings remembered - metrics.box.map # map50-95 - metrics.box.map50 # map50 - metrics.box.map75 # map75 - metrics.box.maps # a list contains map50-95 of each category - ``` - === "CLI" - - ```bash - yolo detect val model=yolov8n.pt # val official model - yolo detect val model=path/to/best.pt # val custom model - ``` - -## Predict TODO - -Use a trained YOLOv8n model to run predictions on images. - -!!! example "" - - === "Python" - - ```python - from ultralytics import YOLO - - # Load a model - model = YOLO('yolov8n.pt') # load an official model - model = YOLO('path/to/best.pt') # load a custom model - - # Predict with the model - results = model('https://ultralytics.com/images/bus.jpg') # predict on an image - ``` - === "CLI" - - ```bash - yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model - yolo detect predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model - ``` - -Read more details of `predict` in our [Predict](https://docs.ultralytics.com/modes/predict/) page. - -## Export TODO - -Export a YOLOv8n model to a different format like ONNX, CoreML, etc. - -!!! example "" - - === "Python" - - ```python - from ultralytics import YOLO - - # Load a model - model = YOLO('yolov8n.pt') # load an official model - model = YOLO('path/to/best.pt') # load a custom trained - - # Export the model - model.export(format='onnx') - ``` - === "CLI" - - ```bash - yolo export model=yolov8n.pt format=onnx # export official model - yolo export model=path/to/best.pt format=onnx # export custom trained model - ``` - -Available YOLOv8-pose export formats are in the table below. You can predict or validate directly on exported models, -i.e. `yolo predict model=yolov8n-pose.onnx`. - -| Format | `format` Argument | Model | Metadata | -|--------------------------------------------------------------------|-------------------|---------------------------|----------| -| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | -| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | -| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | -| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | -| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | -| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlmodel` | ✅ | -| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | -| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | -| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | -| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | -| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | -| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | diff --git a/docs/tasks/pose.md b/docs/tasks/pose.md new file mode 100644 index 0000000..e32bc7d --- /dev/null +++ b/docs/tasks/pose.md @@ -0,0 +1,175 @@ +Pose estimation is a task that involves identifying the location of specific points in an image, usually referred +to as keypoints. The keypoints can represent various parts of the object such as joints, landmarks, or other distinctive +features. The locations of the keypoints are usually represented as a set of 2D `[x, y]` or 3D `[x, y, visible]` +coordinates. + + + +The output of a pose estimation model is a set of points that represent the keypoints on an object in the image, usually +along with the confidence scores for each point. Pose estimation is a good choice when you need to identify specific +parts of an object in a scene, and their location in relation to each other. + +!!! tip "Tip" + + YOLOv8 _pose_ models use the `-pose` suffix, i.e. `yolov8n-pose.pt`. These models are trained on the [COCO keypoints](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco-pose.yaml) dataset and are suitable for a variety of pose estimation tasks. + +## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models/v8) + +YOLOv8 pretrained Pose models are shown here. Detect, Segment and Pose models are pretrained on +the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco.yaml) dataset, while Classify +models are pretrained on +the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/ImageNet.yaml) dataset. + +[Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models) download automatically from the latest +Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use. + +| Model | size
(pixels) | mAPbox
50-95 | mAPpose
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | +|------------------------------------------------------------------------------------------------------|-----------------------|----------------------|-----------------------|--------------------------------|-------------------------------------|--------------------|-------------------| +| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | - | 49.7 | - | - | 3.3 | 9.2 | +| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | - | 59.2 | - | - | 11.6 | 30.2 | +| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | - | 63.6 | - | - | 26.4 | 81.0 | +| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | - | 67.0 | - | - | 44.4 | 168.6 | +| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | - | 68.9 | - | - | 69.4 | 263.2 | +| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | - | 71.5 | - | - | 99.1 | 1066.4 | + +- **mAPval** values are for single-model single-scale on [COCO Keypoints val2017](http://cocodataset.org) + dataset. +
Reproduce by `yolo val pose data=coco-pose.yaml device=0` +- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) + instance. +
Reproduce by `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu` + +## Train + +Train a YOLOv8-pose model on the COCO128-pose dataset. + +!!! example "" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-pose.yaml') # build a new model from YAML + model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training) + model = YOLO('yolov8n-pose.yaml').load('yolov8n-pose.pt') # build from YAML and transfer weights + + # Train the model + model.train(data='coco128-pose.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Build a new model from YAML and start training from scratch + yolo detect train data=coco128-pose.yaml model=yolov8n-pose.yaml epochs=100 imgsz=640 + + # Start training from a pretrained *.pt model + yolo detect train data=coco128-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + + # Build a new model from YAML, transfer pretrained weights to it and start training + yolo detect train data=coco128-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640 + ``` + +## Val + +Validate trained YOLOv8n-pose model accuracy on the COCO128-pose dataset. No argument need to passed as the `model` +retains it's +training `data` and arguments as model attributes. + +!!! example "" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-pose.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom model + + # Validate the model + metrics = model.val() # no arguments needed, dataset and settings remembered + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # a list contains map50-95 of each category + ``` + === "CLI" + + ```bash + yolo pose val model=yolov8n-pose.pt # val official model + yolo pose val model=path/to/best.pt # val custom model + ``` + +## Predict + +Use a trained YOLOv8n-pose model to run predictions on images. + +!!! example "" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-pose.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom model + + # Predict with the model + results = model('https://ultralytics.com/images/bus.jpg') # predict on an image + ``` + === "CLI" + + ```bash + yolo pose predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model + yolo pose predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model + ``` + +See full `predict` mode details in the [Predict](https://docs.ultralytics.com/modes/predict/) page. + +## Export + +Export a YOLOv8n model to a different format like ONNX, CoreML, etc. + +!!! example "" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom trained + + # Export the model + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # export official model + yolo export model=path/to/best.pt format=onnx # export custom trained model + ``` + +Available YOLOv8-pose export formats are in the table below. You can predict or validate directly on exported models, +i.e. `yolo predict model=yolov8n-pose.onnx`. Usage examples are shown for your model after export completes. + +| Format | `format` Argument | Model | Metadata | +|--------------------------------------------------------------------|-------------------|--------------------------------|----------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-pose.torchscript` | ✅ | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-pose.onnx` | ✅ | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-pose.engine` | ✅ | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-pose.mlmodel` | ✅ | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-pose.pb` | ❌ | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-pose.tflite` | ✅ | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-pose_web_model/` | ✅ | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ | + +See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. diff --git a/docs/tasks/segment.md b/docs/tasks/segment.md index 2ff7f58..67e4c6b 100644 --- a/docs/tasks/segment.md +++ b/docs/tasks/segment.md @@ -127,7 +127,7 @@ Use a trained YOLOv8n-seg model to run predictions on images. yolo segment predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model ``` -Read more details of `predict` in our [Predict](https://docs.ultralytics.com/modes/predict/) page. +See full `predict` mode details in the [Predict](https://docs.ultralytics.com/modes/predict/) page. ## Export @@ -155,7 +155,7 @@ Export a YOLOv8n-seg model to a different format like ONNX, CoreML, etc. ``` Available YOLOv8-seg export formats are in the table below. You can predict or validate directly on exported models, -i.e. `yolo predict model=yolov8n-seg.onnx`. +i.e. `yolo predict model=yolov8n-seg.onnx`. Usage examples are shown for your model after export completes. | Format | `format` Argument | Model | Metadata | |--------------------------------------------------------------------|-------------------|-------------------------------|----------| @@ -172,4 +172,4 @@ i.e. `yolo predict model=yolov8n-seg.onnx`. | [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | | [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | - +See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. diff --git a/docs/usage/cfg.md b/docs/usage/cfg.md index 0367ac4..c540f72 100644 --- a/docs/usage/cfg.md +++ b/docs/usage/cfg.md @@ -110,6 +110,8 @@ The training settings for YOLO models encompass various hyperparameters and conf | `box` | `7.5` | box loss gain | | `cls` | `0.5` | cls loss gain (scale with pixels) | | `dfl` | `1.5` | dfl loss gain | +| `pose` | `12.0` | pose loss gain (pose-only) | +| `kobj` | `2.0` | keypoint obj loss gain (pose-only) | | `fl_gamma` | `0.0` | focal loss gamma (efficientDet default gamma=1.5) | | `label_smoothing` | `0.0` | label smoothing (fraction) | | `nbs` | `64` | nominal batch size | diff --git a/docs/usage/engine.md b/docs/usage/engine.md index 5597be2..9cc85a1 100644 --- a/docs/usage/engine.md +++ b/docs/usage/engine.md @@ -74,7 +74,7 @@ trainer.add_callback("on_train_epoch_end", log_model) # Adds to existing callba trainer.train() ``` -To know more about Callback triggering events and entry point, checkout our Callbacks guide # TODO +To know more about Callback triggering events and entry point, checkout our [Callbacks Guide](callbacks.md) ## Other engine components diff --git a/docs/usage/python.md b/docs/usage/python.md index b5149ad..a4f6944 100644 --- a/docs/usage/python.md +++ b/docs/usage/python.md @@ -59,7 +59,6 @@ accurately predict the classes and locations of objects in an image. === "Resume" ```python - # TODO: Resume feature is under development and should be released soon. model = YOLO("last.pt") model.train(resume=True) ``` diff --git a/examples/tutorial.ipynb b/examples/tutorial.ipynb index 22f271b..deeb945 100644 --- a/examples/tutorial.ipynb +++ b/examples/tutorial.ipynb @@ -32,11 +32,11 @@ " \"Open\n", "
\n", "\n", - "Welcome to the Ultralytics YOLOv8 🚀 notebook! YOLOv8 is the latest version of the YOLO (You Only Look Once) object detection and image segmentation model developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLOv8 and understand its features and capabilities.\n", + "Welcome to the Ultralytics YOLOv8 🚀 notebook! YOLOv8 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLOv8 and understand its features and capabilities.\n", "\n", - "The YOLOv8 models are designed to be fast, accurate, and easy to use, making them an excellent choice for a wide range of object detection and image segmentation tasks. They can be trained on large datasets and are capable of running on a variety of hardware platforms, from CPUs to GPUs.\n", + "YOLOv8 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n", "\n", - "Whether you are a seasoned machine learning practitioner or new to the field, we hope that the resources in this notebook will help you get the most out of YOLOv8. Please feel free to browse the YOLOv8 Docs and reach out to us with any questions or feedback.\n", + "We hope that the resources in this notebook will help you get the most out of YOLOv8. Please browse the YOLOv8 Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n", "\n", "" ] @@ -66,7 +66,7 @@ "import ultralytics\n", "ultralytics.checks()" ], - "execution_count": 1, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -86,7 +86,7 @@ "source": [ "# 1. Predict\n", "\n", - "YOLOv8 may be used directly in the Command Line Interface (CLI) with a `yolo` command for a variety of tasks and modes and accepts additional arguments, i.e. `imgsz=640`. See a full list of available `yolo` [arguments](https://docs.ultralytics.com/usage/cfg/) in the YOLOv8 [Docs](https://docs.ultralytics.com).\n" + "YOLOv8 may be used directly in the Command Line Interface (CLI) with a `yolo` command for a variety of tasks and modes and accepts additional arguments, i.e. `imgsz=640`. See a full list of available `yolo` [arguments](https://docs.ultralytics.com/usage/cfg/) and other details in the [YOLOv8 Predict Docs](https://docs.ultralytics.com/modes/train/).\n" ] }, { @@ -102,7 +102,7 @@ "# Run inference on an image with YOLOv8n\n", "!yolo predict model=yolov8n.pt source='https://ultralytics.com/images/zidane.jpg'" ], - "execution_count": 3, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -135,7 +135,7 @@ }, "source": [ "# 2. Val\n", - "Validate a model's accuracy on the [COCO](https://cocodataset.org/#home) dataset's `val` or `test` splits. The latest YOLOv8 [models](https://github.com/ultralytics/ultralytics#models) are downloaded automatically the first time they are used." + "Validate a model's accuracy on the [COCO](https://cocodataset.org/#home) dataset's `val` or `test` splits. The latest YOLOv8 [models](https://github.com/ultralytics/ultralytics#models) are downloaded automatically the first time they are used. See [YOLOv8 Val Docs](https://docs.ultralytics.com/modes/val/) for more information." ] }, { @@ -165,7 +165,7 @@ "# Validate YOLOv8n on COCO128 val\n", "!yolo val model=yolov8n.pt data=coco128.yaml" ], - "execution_count": 4, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -273,7 +273,7 @@ "\n", "

\n", "\n", - "Train YOLOv8 on [Detection](https://docs.ultralytics.com/tasks/detect/), [Segmentation](https://docs.ultralytics.com/tasks/segment/) and [Classification](https://docs.ultralytics.com/tasks/classify/) datasets." + "Train YOLOv8 on [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/), [Classify](https://docs.ultralytics.com/tasks/classify/) and [Pose](https://docs.ultralytics.com/tasks/pose/) datasets. See [YOLOv8 Train Docs](https://docs.ultralytics.com/modes/train/) for more information." ] }, { @@ -289,7 +289,7 @@ "# Train YOLOv8n on COCO128 for 3 epochs\n", "!yolo train model=yolov8n.pt data=coco128.yaml epochs=3 imgsz=640" ], - "execution_count": 5, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -449,7 +449,7 @@ "source": [ "# 4. Export\n", "\n", - "Export a YOLOv8 model to any supported format with the `format` argument, i.e. `format=onnx`.\n", + "Export a YOLOv8 model to any supported format below with the `format` argument, i.e. `format=onnx`. See [YOLOv8 Export Docs](https://docs.ultralytics.com/modes/export/) for more information.\n", "\n", "- 💡 ProTip: Export to [ONNX](https://onnx.ai/) or [OpenVINO](https://docs.openvino.ai/latest/index.html) for up to 3x CPU speedup. \n", "- 💡 ProTip: Export to [TensorRT](https://developer.nvidia.com/tensorrt) for up to 5x GPU speedup.\n", @@ -487,7 +487,7 @@ "id": "CYIjW4igCjqD", "outputId": "49b5bb9d-2c16-415b-c3e7-ec95c15a9e62" }, - "execution_count": 6, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -515,7 +515,7 @@ "source": [ "# 5. Python Usage\n", "\n", - "YOLOv8 was reimagined using Python-first principles for the most seamless Python YOLO experience yet. YOLOv8 models can be loaded from a trained checkpoint or created from scratch. Then methods are used to train, val, predict, and export the model. See a detailed Python usage examples in the YOLOv8 [Docs](https://docs.ultralytics.com/usage/python/)." + "YOLOv8 was reimagined using Python-first principles for the most seamless Python YOLO experience yet. YOLOv8 models can be loaded from a trained checkpoint or created from scratch. Then methods are used to train, val, predict, and export the model. See detailed Python usage examples in the [YOLOv8 Python Docs](https://docs.ultralytics.com/usage/python/)." ], "metadata": { "id": "kUMOQ0OeDBJG" @@ -547,7 +547,7 @@ "source": [ "# 6. Tasks\n", "\n", - "YOLOv8 can train, val, predict and export models for the 3 primary tasks in vision AI: detection, segmentation and classification.\n", + "YOLOv8 can train, val, predict and export models for the most common tasks in vision AI: [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/), [Classify](https://docs.ultralytics.com/tasks/classify/) and [Pose](https://docs.ultralytics.com/tasks/pose/). See [YOLOv8 Tasks Docs](https://docs.ultralytics.com/tasks/) for more information.\n", "\n", "\n" ], @@ -636,6 +636,33 @@ "execution_count": null, "outputs": [] }, + { + "cell_type": "markdown", + "source": [ + "## 4. Pose\n", + "\n", + "YOLOv8 _pose_ models use the `-pose` suffix, i.e. `yolov8n-pose.pt` and are pretrained on COCO Keypoints. See [Pose Docs](https://docs.ultralytics.com/tasks/pose/) for full details." + ], + "metadata": { + "id": "SpIaFLiO11TG" + } + }, + { + "cell_type": "code", + "source": [ + "# Load YOLOv8n-pose, train it on COCO8-pose for 3 epochs and predict an image with it\n", + "from ultralytics import YOLO\n", + "\n", + "model = YOLO('yolov8n-pose.pt') # load a pretrained YOLOv8n classification model\n", + "model.train(data='coco8-pose.yaml', epochs=3) # train the model\n", + "model('https://ultralytics.com/images/bus.jpg') # predict on an image" + ], + "metadata": { + "id": "si4aKFNg19vX" + }, + "execution_count": null, + "outputs": [] + }, { "cell_type": "markdown", "metadata": { diff --git a/mkdocs.yml b/mkdocs.yml index 3ca07b3..85ade16 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -65,7 +65,7 @@ extra: data: 0 note: >- Thanks for your feedback!
- Tell us what we can improve. + Tell us what we can improve. social: - icon: fontawesome/brands/github @@ -134,7 +134,7 @@ nav: - Detect: tasks/detect.md - Segment: tasks/segment.md - Classify: tasks/classify.md -# - Keypoints: tasks/keypoints.md + - Pose: tasks/pose.md - Usage: - CLI: usage/cli.md - Python: usage/python.md diff --git a/tests/test_cli.py b/tests/test_cli.py index 3e5b8ce..2dca8a8 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -33,6 +33,10 @@ def test_train_cls(): run(f'yolo train classify model={CFG}-cls.yaml data=imagenet10 imgsz=32 epochs=1') +def test_train_pose(): + run(f'yolo train pose model={CFG}-pose.yaml data=coco8-pose.yaml imgsz=32 epochs=1') + + # Val checks ----------------------------------------------------------------------------------------------------------- def test_val_detect(): run(f'yolo val detect model={MODEL}.pt data=coco8.yaml imgsz=32') @@ -46,6 +50,10 @@ def test_val_classify(): run(f'yolo val classify model={MODEL}-cls.pt data=imagenet10 imgsz=32') +def test_val_pose(): + run(f'yolo val pose model={MODEL}-pose.pt data=coco8-pose.yaml imgsz=32') + + # Predict checks ------------------------------------------------------------------------------------------------------- def test_predict_detect(): run(f"yolo predict model={MODEL}.pt source={ROOT / 'assets'} imgsz=32 save save_crop save_txt") @@ -63,6 +71,10 @@ def test_predict_classify(): run(f"yolo predict model={MODEL}-cls.pt source={ROOT / 'assets'} imgsz=32 save save_txt") +def test_predict_pose(): + run(f"yolo predict model={MODEL}-pose.pt source={ROOT / 'assets'} imgsz=32 save save_txt") + + # Export checks -------------------------------------------------------------------------------------------------------- def test_export_detect_torchscript(): run(f'yolo export model={MODEL}.pt format=torchscript') @@ -76,6 +88,10 @@ def test_export_classify_torchscript(): run(f'yolo export model={MODEL}-cls.pt format=torchscript') +def test_export_classify_pose(): + run(f'yolo export model={MODEL}-pose.pt format=torchscript') + + def test_export_detect_edgetpu(enabled=False): if enabled and LINUX: run(f'yolo export model={MODEL}.pt format=edgetpu') diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index 1fb443f..496d5fd 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, GPL-3.0 license -__version__ = '8.0.65' +__version__ = '8.0.66' from ultralytics.hub import start from ultralytics.yolo.engine.model import YOLO diff --git a/ultralytics/datasets/coco-pose.yaml b/ultralytics/datasets/coco-pose.yaml new file mode 100644 index 0000000..537affd --- /dev/null +++ b/ultralytics/datasets/coco-pose.yaml @@ -0,0 +1,38 @@ +# Ultralytics YOLO 🚀, GPL-3.0 license +# COCO 2017 dataset http://cocodataset.org by Microsoft +# Example usage: yolo train data=coco-pose.yaml +# parent +# ├── ultralytics +# └── datasets +# └── coco-pose ← downloads here (20.1 GB) + + +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: ../datasets/coco-pose # dataset root dir +train: train2017.txt # train images (relative to 'path') 118287 images +val: val2017.txt # val images (relative to 'path') 5000 images +test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 + +# Keypoints +kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) +flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] + +# Classes +names: + 0: person + +# Download script/URL (optional) +download: | + from ultralytics.yolo.utils.downloads import download + from pathlib import Path + + # Download labels + dir = Path(yaml['path']) # dataset root dir + url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/' + urls = [url + 'coco2017labels-pose.zip'] # labels + download(urls, dir=dir.parent) + # Download data + urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images + 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images + 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional) + download(urls, dir=dir / 'images', threads=3) diff --git a/ultralytics/datasets/coco8-pose.yaml b/ultralytics/datasets/coco8-pose.yaml new file mode 100644 index 0000000..367a4bb --- /dev/null +++ b/ultralytics/datasets/coco8-pose.yaml @@ -0,0 +1,25 @@ +# Ultralytics YOLO 🚀, GPL-3.0 license +# COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics +# Example usage: yolo train data=coco8-pose.yaml +# parent +# ├── ultralytics +# └── datasets +# └── coco8-pose ← downloads here (1 MB) + + +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: ../datasets/coco8-pose # dataset root dir +train: images/train # train images (relative to 'path') 4 images +val: images/val # val images (relative to 'path') 4 images +test: # test images (optional) + +# Keypoints +kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) +flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] + +# Classes +names: + 0: person + +# Download script/URL (optional) +download: https://ultralytics.com/assets/coco8-pose.zip diff --git a/ultralytics/models/README.md b/ultralytics/models/README.md index 7a291d9..e4d9da1 100644 --- a/ultralytics/models/README.md +++ b/ultralytics/models/README.md @@ -44,13 +44,14 @@ Any of these models can be used by loading their configs or pretrained checkpoin ### 1. YOLOv8 -**About** - Cutting edge Detection, Segmentation and Classification models developed by Ultralytics.
+**About** - Cutting edge Detection, Segmentation, Classification and Pose models developed by Ultralytics.
Available Models: - Detection - `yolov8n`, `yolov8s`, `yolov8m`, `yolov8l`, `yolov8x` - Instance Segmentation - `yolov8n-seg`, `yolov8s-seg`, `yolov8m-seg`, `yolov8l-seg`, `yolov8x-seg` - Classification - `yolov8n-cls`, `yolov8s-cls`, `yolov8m-cls`, `yolov8l-cls`, `yolov8x-cls` +- Pose - `yolov8n-pose`, `yolov8s-pose`, `yolov8m-pose`, `yolov8l-pose`, `yolov8x-pose`, `yolov8x-pose-p6`
Performance @@ -84,6 +85,17 @@ Available Models: | [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | | [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | +### Pose + +| Model | size
(pixels) | mAPbox
50-95 | mAPpose
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | +| ---------------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | +| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | - | 49.7 | - | - | 3.3 | 9.2 | +| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | - | 59.2 | - | - | 11.6 | 30.2 | +| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | - | 63.6 | - | - | 26.4 | 81.0 | +| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | - | 67.0 | - | - | 44.4 | 168.6 | +| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | - | 68.9 | - | - | 69.4 | 263.2 | +| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | - | 71.5 | - | - | 99.1 | 1066.4 | +
### 2. YOLOv5u diff --git a/ultralytics/models/v8/yolov8-pose-p6.yaml b/ultralytics/models/v8/yolov8-pose-p6.yaml new file mode 100644 index 0000000..3d436b8 --- /dev/null +++ b/ultralytics/models/v8/yolov8-pose-p6.yaml @@ -0,0 +1,57 @@ +# Ultralytics YOLO 🚀, GPL-3.0 license +# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect + +# Parameters +nc: 1 # number of classes +kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) +scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.33, 0.25, 1024] + s: [0.33, 0.50, 1024] + m: [0.67, 0.75, 768] + l: [1.00, 1.00, 512] + x: [1.00, 1.25, 512] + +# YOLOv8.0x6 backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32 + - [-1, 3, C2f, [768, True]] + - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64 + - [-1, 3, C2f, [1024, True]] + - [-1, 1, SPPF, [1024, 5]] # 11 + +# YOLOv8.0x6 head +head: + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 8], 1, Concat, [1]] # cat backbone P5 + - [-1, 3, C2, [768, False]] # 14 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2, [512, False]] # 17 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2, [256, False]] # 20 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 17], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2, [512, False]] # 23 (P4/16-medium) + + - [-1, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2, [768, False]] # 26 (P5/32-large) + + - [-1, 1, Conv, [768, 3, 2]] + - [[-1, 11], 1, Concat, [1]] # cat head P6 + - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge) + + - [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6) diff --git a/ultralytics/models/v8/yolov8-pose.yaml b/ultralytics/models/v8/yolov8-pose.yaml new file mode 100644 index 0000000..d7f6dda --- /dev/null +++ b/ultralytics/models/v8/yolov8-pose.yaml @@ -0,0 +1,47 @@ +# Ultralytics YOLO 🚀, GPL-3.0 license +# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose + +# Parameters +nc: 1 # number of classes +kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) +scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.33, 0.25, 1024] + s: [0.33, 0.50, 1024] + m: [0.67, 0.75, 768] + l: [1.00, 1.00, 512] + x: [1.00, 1.25, 512] + +# YOLOv8.0n backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 + - [-1, 3, C2f, [1024, True]] + - [-1, 1, SPPF, [1024, 5]] # 9 + +# YOLOv8.0n head +head: + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2f, [512]] # 12 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2f, [256]] # 15 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 12], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2f, [512]] # 18 (P4/16-medium) + + - [-1, 1, Conv, [512, 3, 2]] + - [[-1, 9], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2f, [1024]] # 21 (P5/32-large) + + - [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5) diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py index 7e35508..9e2c792 100644 --- a/ultralytics/nn/autobackend.py +++ b/ultralytics/nn/autobackend.py @@ -91,8 +91,10 @@ class AutoBackend(nn.Module): if nn_module: model = weights.to(device) model = model.fuse(verbose=verbose) if fuse else model - names = model.module.names if hasattr(model, 'module') else model.names # get class names + if hasattr(model, 'kpt_shape'): + kpt_shape = model.kpt_shape # pose-only stride = max(int(model.stride.max()), 32) # model stride + names = model.module.names if hasattr(model, 'module') else model.names # get class names model.half() if fp16 else model.float() self.model = model # explicitly assign for to(), cpu(), cuda(), half() pt = True @@ -102,6 +104,8 @@ class AutoBackend(nn.Module): device=device, inplace=True, fuse=fuse) + if hasattr(model, 'kpt_shape'): + kpt_shape = model.kpt_shape # pose-only stride = max(int(model.stride.max()), 32) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names model.half() if fp16 else model.float() @@ -268,13 +272,14 @@ class AutoBackend(nn.Module): for k, v in metadata.items(): if k in ('stride', 'batch'): metadata[k] = int(v) - elif k in ('imgsz', 'names') and isinstance(v, str): + elif k in ('imgsz', 'names', 'kpt_shape') and isinstance(v, str): metadata[k] = eval(v) stride = metadata['stride'] task = metadata['task'] batch = metadata['batch'] imgsz = metadata['imgsz'] names = metadata['names'] + kpt_shape = metadata.get('kpt_shape') elif not (pt or triton or nn_module): LOGGER.warning(f"WARNING ⚠️ Metadata not found for 'model={weights}'") diff --git a/ultralytics/nn/modules.py b/ultralytics/nn/modules.py index ddf0085..ee21d79 100644 --- a/ultralytics/nn/modules.py +++ b/ultralytics/nn/modules.py @@ -378,7 +378,9 @@ class Ensemble(nn.ModuleList): return y, None # inference, train output -# heads +# Model heads below ---------------------------------------------------------------------------------------------------- + + class Detect(nn.Module): # YOLOv8 Detect head for detection models dynamic = False # force grid reconstruction @@ -394,7 +396,6 @@ class Detect(nn.Module): self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x) self.no = nc + self.reg_max * 4 # number of outputs per anchor self.stride = torch.zeros(self.nl) # strides computed during build - c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc) # channels self.cv2 = nn.ModuleList( nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch) @@ -454,6 +455,36 @@ class Segment(Detect): return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p)) +class Pose(Detect): + # YOLOv8 Pose head for keypoints models + def __init__(self, nc=80, kpt_shape=(17, 3), ch=()): + super().__init__(nc, ch) + self.kpt_shape = kpt_shape # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) + self.nk = kpt_shape[0] * kpt_shape[1] # number of keypoints total + self.detect = Detect.forward + + c4 = max(ch[0] // 4, self.nk) + self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch) + + def forward(self, x): + bs = x[0].shape[0] # batch size + kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w) + x = self.detect(self, x) + if self.training: + return x, kpt + pred_kpt = self.kpts_decode(kpt) + return torch.cat([x, pred_kpt], 1) if self.export else (torch.cat([x[0], pred_kpt], 1), (x[1], kpt)) + + def kpts_decode(self, kpts): + ndim = self.kpt_shape[1] + y = kpts.clone() + if ndim == 3: + y[:, 2::3].sigmoid_() # inplace sigmoid + y[:, 0::ndim] = (y[:, 0::ndim] * 2.0 + (self.anchors[0] - 0.5)) * self.strides + y[:, 1::ndim] = (y[:, 1::ndim] * 2.0 + (self.anchors[1] - 0.5)) * self.strides + return y + + class Classify(nn.Module): # YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2) def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py index dffd8e6..c8f4627 100644 --- a/ultralytics/nn/tasks.py +++ b/ultralytics/nn/tasks.py @@ -10,7 +10,7 @@ import torch.nn as nn from ultralytics.nn.modules import (C1, C2, C3, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, Classify, Concat, Conv, ConvTranspose, Detect, DWConv, DWConvTranspose2d, Ensemble, Focus, - GhostBottleneck, GhostConv, Segment) + GhostBottleneck, GhostConv, Pose, Segment) from ultralytics.yolo.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load from ultralytics.yolo.utils.checks import check_requirements, check_suffix, check_yaml from ultralytics.yolo.utils.torch_utils import (fuse_conv_and_bn, fuse_deconv_and_bn, initialize_weights, @@ -183,10 +183,10 @@ class DetectionModel(BaseModel): # Build strides m = self.model[-1] # Detect() - if isinstance(m, (Detect, Segment)): + if isinstance(m, (Detect, Segment, Pose)): s = 256 # 2x min stride m.inplace = self.inplace - forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x) + forward = lambda x: self.forward(x)[0] if isinstance(m, (Segment, Pose)) else self.forward(x) m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward self.stride = m.stride m.bias_init() # only run once @@ -242,12 +242,23 @@ class DetectionModel(BaseModel): class SegmentationModel(DetectionModel): # YOLOv8 segmentation model def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True): - super().__init__(cfg, ch, nc, verbose) + super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose) def _forward_augment(self, x): raise NotImplementedError(emojis('WARNING ⚠️ SegmentationModel has not supported augment inference yet!')) +class PoseModel(DetectionModel): + # YOLOv8 pose model + def __init__(self, cfg='yolov8n-pose.yaml', ch=3, nc=None, data_kpt_shape=(None, None), verbose=True): + if not isinstance(cfg, dict): + cfg = yaml_model_load(cfg) # load model YAML + if any(data_kpt_shape) and list(data_kpt_shape) != list(cfg['kpt_shape']): + LOGGER.info(f"Overriding model.yaml kpt_shape={cfg['kpt_shape']} with kpt_shape={data_kpt_shape}") + cfg['kpt_shape'] = data_kpt_shape + super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose) + + class ClassificationModel(BaseModel): # YOLOv8 classification model def __init__(self, @@ -425,7 +436,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) # Args max_channels = float('inf') nc, act, scales = (d.get(x) for x in ('nc', 'act', 'scales')) - depth, width = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple')) + depth, width, kpt_shape = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple', 'kpt_shape')) if scales: scale = d.get('scale') if not scale: @@ -464,7 +475,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) args = [ch[f]] elif m is Concat: c2 = sum(ch[x] for x in f) - elif m in (Detect, Segment): + elif m in (Detect, Segment, Pose): args.append([ch[x] for x in f]) if m is Segment: args[2] = make_divisible(min(args[2], max_channels) * width, 8) @@ -543,6 +554,8 @@ def guess_model_task(model): return 'detect' if m == 'segment': return 'segment' + if m == 'pose': + return 'pose' # Guess from model cfg if isinstance(model, dict): @@ -565,6 +578,8 @@ def guess_model_task(model): return 'segment' elif isinstance(m, Classify): return 'classify' + elif isinstance(m, Pose): + return 'pose' # Guess from model filename if isinstance(model, (str, Path)): @@ -573,10 +588,12 @@ def guess_model_task(model): return 'segment' elif '-cls' in model.stem or 'classify' in model.parts: return 'classify' + elif '-pose' in model.stem or 'pose' in model.parts: + return 'pose' elif 'detect' in model.parts: return 'detect' # Unable to determine task from model LOGGER.warning("WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. " - "Explicitly define task for your model, i.e. 'task=detect', 'task=segment' or 'task=classify'.") + "Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify', or 'pose'.") return 'detect' # assume detect diff --git a/ultralytics/tracker/track.py b/ultralytics/tracker/track.py index 78e32c6..88c54df 100644 --- a/ultralytics/tracker/track.py +++ b/ultralytics/tracker/track.py @@ -33,10 +33,9 @@ def on_predict_postprocess_end(predictor): tracks = predictor.trackers[i].update(det, im0s[i]) if len(tracks) == 0: continue + idx = tracks[:, -1].tolist() + predictor.results[i] = predictor.results[i][idx] predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1])) - if predictor.results[i].masks is not None: - idx = tracks[:, -1].tolist() - predictor.results[i].masks = predictor.results[i].masks[idx] def register_tracker(model): diff --git a/ultralytics/yolo/cfg/__init__.py b/ultralytics/yolo/cfg/__init__.py index 83e997d..7f3f8a9 100644 --- a/ultralytics/yolo/cfg/__init__.py +++ b/ultralytics/yolo/cfg/__init__.py @@ -18,13 +18,13 @@ TASKS = 'detect', 'segment', 'classify', 'pose' TASK2DATA = { 'detect': 'coco128.yaml', 'segment': 'coco128-seg.yaml', - 'pose': 'coco128-pose.yaml', - 'classify': 'imagenet100'} + 'classify': 'imagenet100', + 'pose': 'coco128-pose.yaml'} TASK2MODEL = { 'detect': 'yolov8n.pt', 'segment': 'yolov8n-seg.pt', - 'pose': 'yolov8n-pose.yaml', - 'classify': 'yolov8n-cls.pt'} # temp + 'classify': 'yolov8n-cls.pt', + 'pose': 'yolov8n-pose.yaml'} CLI_HELP_MSG = \ f""" diff --git a/ultralytics/yolo/cfg/default.yaml b/ultralytics/yolo/cfg/default.yaml index f2ab36c..b31c004 100644 --- a/ultralytics/yolo/cfg/default.yaml +++ b/ultralytics/yolo/cfg/default.yaml @@ -88,6 +88,8 @@ warmup_bias_lr: 0.1 # warmup initial bias lr box: 7.5 # box loss gain cls: 0.5 # cls loss gain (scale with pixels) dfl: 1.5 # dfl loss gain +pose: 12.0 # pose loss gain +kobj: 1.0 # keypoint obj loss gain fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) label_smoothing: 0.0 # label smoothing (fraction) nbs: 64 # nominal batch size diff --git a/ultralytics/yolo/data/augment.py b/ultralytics/yolo/data/augment.py index 1658e12..a234da4 100644 --- a/ultralytics/yolo/data/augment.py +++ b/ultralytics/yolo/data/augment.py @@ -16,6 +16,8 @@ from ..utils.metrics import bbox_ioa from ..utils.ops import segment2box from .utils import polygons2masks, polygons2masks_overlap +POSE_FLIPLR_INDEX = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] + # TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic class BaseTransform: @@ -309,27 +311,22 @@ class RandomPerspective: """apply affine to keypoints. Args: - keypoints(ndarray): keypoints, [N, 17, 2]. + keypoints(ndarray): keypoints, [N, 17, 3]. M(ndarray): affine matrix. Return: - new_keypoints(ndarray): keypoints after affine, [N, 17, 2]. + new_keypoints(ndarray): keypoints after affine, [N, 17, 3]. """ - n = len(keypoints) + n, nkpt = keypoints.shape[:2] if n == 0: return keypoints - new_keypoints = np.ones((n * 17, 3)) - new_keypoints[:, :2] = keypoints.reshape(n * 17, 2) # num_kpt is hardcoded to 17 - new_keypoints = new_keypoints @ M.T # transform - new_keypoints = (new_keypoints[:, :2] / new_keypoints[:, 2:3]).reshape(n, 34) # perspective rescale or affine - new_keypoints[keypoints.reshape(-1, 34) == 0] = 0 - x_kpts = new_keypoints[:, list(range(0, 34, 2))] - y_kpts = new_keypoints[:, list(range(1, 34, 2))] - - x_kpts[np.logical_or.reduce((x_kpts < 0, x_kpts > self.size[0], y_kpts < 0, y_kpts > self.size[1]))] = 0 - y_kpts[np.logical_or.reduce((x_kpts < 0, x_kpts > self.size[0], y_kpts < 0, y_kpts > self.size[1]))] = 0 - new_keypoints[:, list(range(0, 34, 2))] = x_kpts - new_keypoints[:, list(range(1, 34, 2))] = y_kpts - return new_keypoints.reshape(n, 17, 2) + xy = np.ones((n * nkpt, 3)) + visible = keypoints[..., 2].reshape(n * nkpt, 1) + xy[:, :2] = keypoints[..., :2].reshape(n * nkpt, 2) + xy = xy @ M.T # transform + xy = xy[:, :2] / xy[:, 2:3] # perspective rescale or affine + out_mask = (xy[:, 0] < 0) | (xy[:, 1] < 0) | (xy[:, 0] > self.size[0]) | (xy[:, 1] > self.size[1]) + visible[out_mask] = 0 + return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3) def __call__(self, labels): """ @@ -415,12 +412,13 @@ class RandomHSV: class RandomFlip: - def __init__(self, p=0.5, direction='horizontal') -> None: + def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None: assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}' assert 0 <= p <= 1.0 self.p = p self.direction = direction + self.flip_idx = flip_idx def __call__(self, labels): img = labels['img'] @@ -437,6 +435,9 @@ class RandomFlip: if self.direction == 'horizontal' and random.random() < self.p: img = np.fliplr(img) instances.fliplr(w) + # for keypoints + if self.flip_idx is not None and instances.keypoints is not None: + instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :]) labels['img'] = np.ascontiguousarray(img) labels['instances'] = instances return labels @@ -633,7 +634,7 @@ class Format: labels['cls'] = torch.from_numpy(cls) if nl else torch.zeros(nl) labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4)) if self.return_keypoint: - labels['keypoints'] = torch.from_numpy(instances.keypoints) if nl else torch.zeros((nl, 17, 2)) + labels['keypoints'] = torch.from_numpy(instances.keypoints) # then we can use collate_fn if self.batch_idx: labels['batch_idx'] = torch.zeros(nl) @@ -672,13 +673,17 @@ def v8_transforms(dataset, imgsz, hyp): perspective=hyp.perspective, pre_transform=LetterBox(new_shape=(imgsz, imgsz)), )]) + flip_idx = dataset.data.get('flip_idx', None) # for keypoints augmentation + if dataset.use_keypoints and flip_idx is None and hyp.fliplr > 0.0: + hyp.fliplr = 0.0 + LOGGER.warning("WARNING ⚠️ No `flip_idx` provided while training keypoints, setting augmentation 'fliplr=0.0'") return Compose([ pre_transform, MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup), Albumentations(p=1.0), RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v), RandomFlip(direction='vertical', p=hyp.flipud), - RandomFlip(direction='horizontal', p=hyp.fliplr)]) # transforms + RandomFlip(direction='horizontal', p=hyp.fliplr, flip_idx=flip_idx)]) # transforms # Classification augmentations ----------------------------------------------------------------------------------------- diff --git a/ultralytics/yolo/data/build.py b/ultralytics/yolo/data/build.py index d4e0b07..e2f756c 100644 --- a/ultralytics/yolo/data/build.py +++ b/ultralytics/yolo/data/build.py @@ -61,7 +61,7 @@ def seed_worker(worker_id): # noqa random.seed(worker_seed) -def build_dataloader(cfg, batch, img_path, stride=32, rect=False, names=None, rank=-1, mode='train'): +def build_dataloader(cfg, batch, img_path, data_info, stride=32, rect=False, rank=-1, mode='train'): assert mode in ['train', 'val'] shuffle = mode == 'train' if cfg.rect and shuffle: @@ -81,9 +81,9 @@ def build_dataloader(cfg, batch, img_path, stride=32, rect=False, names=None, ra pad=0.0 if mode == 'train' else 0.5, prefix=colorstr(f'{mode}: '), use_segments=cfg.task == 'segment', - use_keypoints=cfg.task == 'keypoint', - names=names, - classes=cfg.classes) + use_keypoints=cfg.task == 'pose', + classes=cfg.classes, + data=data_info) batch = min(batch, len(dataset)) nd = torch.cuda.device_count() # number of CUDA devices diff --git a/ultralytics/yolo/data/dataset.py b/ultralytics/yolo/data/dataset.py index 2bc7536..4a511f6 100644 --- a/ultralytics/yolo/data/dataset.py +++ b/ultralytics/yolo/data/dataset.py @@ -57,11 +57,11 @@ class YOLODataset(BaseDataset): single_cls=False, use_segments=False, use_keypoints=False, - names=None, + data=None, classes=None): self.use_segments = use_segments self.use_keypoints = use_keypoints - self.names = names + self.data = data assert not (self.use_segments and self.use_keypoints), 'Can not use both segments and keypoints.' super().__init__(img_path, imgsz, cache, augment, hyp, prefix, rect, batch_size, stride, pad, single_cls, classes) @@ -77,10 +77,16 @@ class YOLODataset(BaseDataset): nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages desc = f'{self.prefix}Scanning {path.parent / path.stem}...' total = len(self.im_files) + nc = len(self.data['names']) + nkpt, ndim = self.data.get('kpt_shape', (0, 0)) + if self.use_keypoints and (nkpt <= 0 or ndim not in (2, 3)): + raise ValueError("'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of " + "keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'") with ThreadPool(NUM_THREADS) as pool: results = pool.imap(func=verify_image_label, iterable=zip(self.im_files, self.label_files, repeat(self.prefix), - repeat(self.use_keypoints), repeat(len(self.names)))) + repeat(self.use_keypoints), repeat(len(self.data['names'])), repeat(nkpt), + repeat(ndim))) pbar = tqdm(results, desc=desc, total=total, bar_format=TQDM_BAR_FORMAT) for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar: nm += nm_f diff --git a/ultralytics/yolo/data/utils.py b/ultralytics/yolo/data/utils.py index 98e0ed8..d62bb8b 100644 --- a/ultralytics/yolo/data/utils.py +++ b/ultralytics/yolo/data/utils.py @@ -6,10 +6,10 @@ import json import os import subprocess import time +import zipfile from multiprocessing.pool import ThreadPool from pathlib import Path from tarfile import is_tarfile -from zipfile import is_zipfile import cv2 import numpy as np @@ -61,7 +61,7 @@ def exif_size(img): def verify_image_label(args): # Verify one image-label pair - im_file, lb_file, prefix, keypoint, num_cls = args + im_file, lb_file, prefix, keypoint, num_cls, nkpt, ndim = args # number (missing, found, empty, corrupt), message, segments, keypoints nm, nf, ne, nc, msg, segments, keypoints = 0, 0, 0, 0, '', [], None try: @@ -92,25 +92,19 @@ def verify_image_label(args): nl = len(lb) if nl: if keypoint: - assert lb.shape[1] == 56, 'labels require 56 columns each' - assert (lb[:, 5::3] <= 1).all(), 'non-normalized or out of bounds coordinate labels' - assert (lb[:, 6::3] <= 1).all(), 'non-normalized or out of bounds coordinate labels' - kpts = np.zeros((lb.shape[0], 39)) - for i in range(len(lb)): - kpt = np.delete(lb[i, 5:], np.arange(2, lb.shape[1] - 5, 3)) # remove occlusion param from GT - kpts[i] = np.hstack((lb[i, :5], kpt)) - lb = kpts - assert lb.shape[1] == 39, 'labels require 39 columns each after removing occlusion parameter' + assert lb.shape[1] == (5 + nkpt * ndim), f'labels require {(5 + nkpt * ndim)} columns each' + assert (lb[:, 5::ndim] <= 1).all(), 'non-normalized or out of bounds coordinate labels' + assert (lb[:, 6::ndim] <= 1).all(), 'non-normalized or out of bounds coordinate labels' else: assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected' assert (lb[:, 1:] <= 1).all(), \ f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}' + assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}' # All labels max_cls = int(lb[:, 0].max()) # max label count assert max_cls <= num_cls, \ f'Label class {max_cls} exceeds dataset class count {num_cls}. ' \ f'Possible class labels are 0-{num_cls - 1}' - assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}' _, i = np.unique(lb, axis=0, return_index=True) if len(i) < nl: # duplicate row check lb = lb[i] # remove duplicates @@ -119,12 +113,18 @@ def verify_image_label(args): msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed' else: ne = 1 # label empty - lb = np.zeros((0, 39), dtype=np.float32) if keypoint else np.zeros((0, 5), dtype=np.float32) + lb = np.zeros((0, (5 + nkpt * ndim)), dtype=np.float32) if keypoint else np.zeros( + (0, 5), dtype=np.float32) else: nm = 1 # label missing - lb = np.zeros((0, 39), dtype=np.float32) if keypoint else np.zeros((0, 5), dtype=np.float32) + lb = np.zeros((0, (5 + nkpt * ndim)), dtype=np.float32) if keypoint else np.zeros((0, 5), dtype=np.float32) if keypoint: - keypoints = lb[:, 5:].reshape(-1, 17, 2) + keypoints = lb[:, 5:].reshape(-1, nkpt, ndim) + if ndim == 2: + kpt_mask = np.ones(keypoints.shape[:2], dtype=np.float32) + kpt_mask = np.where(keypoints[..., 0] < 0, 0.0, kpt_mask) + kpt_mask = np.where(keypoints[..., 1] < 0, 0.0, kpt_mask) + keypoints = np.concatenate([keypoints, kpt_mask[..., None]], axis=-1) # (nl, nkpt, 3) lb = lb[:, :5] return im_file, lb, shape, segments, keypoints, nm, nf, ne, nc, msg except Exception as e: @@ -195,7 +195,7 @@ def check_det_dataset(dataset, autodownload=True): # Download (optional) extract_dir = '' - if isinstance(data, (str, Path)) and (is_zipfile(data) or is_tarfile(data)): + if isinstance(data, (str, Path)) and (zipfile.is_zipfile(data) or is_tarfile(data)): new_dir = safe_download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False) data = next((DATASETS_DIR / new_dir).rglob('*.yaml')) extract_dir, autodownload = data.parent, False @@ -356,23 +356,8 @@ class HUBDatasetStats(): assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/' return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path - def _hub_ops(self, f, max_dim=1920): - # HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing - f_new = self.im_dir / Path(f).name # dataset-hub image filename - try: # use PIL - im = Image.open(f) - r = max_dim / max(im.height, im.width) # ratio - if r < 1.0: # image too large - im = im.resize((int(im.width * r), int(im.height * r))) - im.save(f_new, 'JPEG', quality=50, optimize=True) # save - except Exception as e: # use OpenCV - LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}') - im = cv2.imread(f) - im_height, im_width = im.shape[:2] - r = max_dim / max(im_height, im_width) # ratio - if r < 1.0: # image too large - im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA) - cv2.imwrite(str(f_new), im) + def _hub_ops(self, f): + compress_one_image(f, self.im_dir / Path(f).name) # save to dataset-hub def get_json(self, save=False, verbose=False): # Return dataset JSON for Ultralytics HUB @@ -426,3 +411,93 @@ class HUBDatasetStats(): pass LOGGER.info(f'Done. All images saved to {self.im_dir}') return self.im_dir + + +def compress_one_image(f, f_new=None, max_dim=1920, quality=50): + """ + Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the + Python Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will + not be resized. + + Args: + f (str): The path to the input image file. + f_new (str, optional): The path to the output image file. If not specified, the input file will be overwritten. + max_dim (int, optional): The maximum dimension (width or height) of the output image. Default is 1920 pixels. + quality (int, optional): The image compression quality as a percentage. Default is 50%. + + Returns: + None + + Usage: + from pathlib import Path + from ultralytics.yolo.data.utils import compress_one_image + for f in Path('/Users/glennjocher/Downloads/dataset').rglob('*.jpg'): + compress_one_image(f) + """ + try: # use PIL + im = Image.open(f) + r = max_dim / max(im.height, im.width) # ratio + if r < 1.0: # image too large + im = im.resize((int(im.width * r), int(im.height * r))) + im.save(f_new or f, 'JPEG', quality=quality, optimize=True) # save + except Exception as e: # use OpenCV + LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}') + im = cv2.imread(f) + im_height, im_width = im.shape[:2] + r = max_dim / max(im_height, im_width) # ratio + if r < 1.0: # image too large + im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA) + cv2.imwrite(str(f_new or f), im) + + +def delete_dsstore(path): + """ + Deletes all ".DS_store" files under a specified directory. + + Args: + path (str, optional): The directory path where the ".DS_store" files should be deleted. + + Returns: + None + + Usage: + from ultralytics.yolo.data.utils import delete_dsstore + delete_dsstore('/Users/glennjocher/Downloads/dataset') + + Note: + ".DS_store" files are created by the Apple operating system and contain metadata about folders and files. They + are hidden system files and can cause issues when transferring files between different operating systems. + """ + # Delete Apple .DS_store files + files = list(Path(path).rglob('.DS_store')) + LOGGER.info(f'Deleting *.DS_store files: {files}') + for f in files: + f.unlink() + + +def zip_directory(dir, use_zipfile_library=True): + """Zips a directory and saves the archive to the specified output path. + + Args: + dir (str): The path to the directory to be zipped. + use_zipfile_library (bool): Whether to use zipfile library or shutil for zipping. + + Returns: + None + + Usage: + from ultralytics.yolo.data.utils import zip_directory + zip_directory('/Users/glennjocher/Downloads/playground') + + zip -r coco8-pose.zip coco8-pose + """ + delete_dsstore(dir) + if use_zipfile_library: + dir = Path(dir) + with zipfile.ZipFile(dir.with_suffix('.zip'), 'w', zipfile.ZIP_DEFLATED) as zip_file: + for file_path in dir.glob('**/*'): + if file_path.is_file(): + zip_file.write(file_path, file_path.relative_to(dir)) + else: + import shutil + shutil.make_archive(dir, 'zip', dir) diff --git a/ultralytics/yolo/engine/exporter.py b/ultralytics/yolo/engine/exporter.py index a77c08e..a8cdf3a 100644 --- a/ultralytics/yolo/engine/exporter.py +++ b/ultralytics/yolo/engine/exporter.py @@ -209,8 +209,8 @@ class Exporter: self.file = file self.output_shape = tuple(y.shape) if isinstance(y, torch.Tensor) else tuple(tuple(x.shape) for x in y) self.pretty_name = Path(self.model.yaml.get('yaml_file', self.file)).stem.replace('yolo', 'YOLO') - description = f'Ultralytics {self.pretty_name} model ' + f'trained on {Path(self.args.data).name}' \ - if self.args.data else '(untrained)' + trained_on = f'trained on {Path(self.args.data).name}' if self.args.data else '(untrained)' + description = f'Ultralytics {self.pretty_name} model {trained_on}' self.metadata = { 'description': description, 'author': 'Ultralytics', @@ -221,6 +221,8 @@ class Exporter: 'batch': self.args.batch, 'imgsz': self.imgsz, 'names': model.names} # model metadata + if model.task == 'pose': + self.metadata['kpt_shape'] = model.kpt_shape LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} with input shape {tuple(im.shape)} BCHW and " f'output shape(s) {self.output_shape} ({file_size(file):.1f} MB)') @@ -295,7 +297,8 @@ class Exporter: check_requirements(requirements) import onnx # noqa - LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...') + opset_version = self.args.opset or get_latest_opset() + LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__} opset {opset_version}...') f = str(self.file.with_suffix('.onnx')) output_names = ['output0', 'output1'] if isinstance(self.model, SegmentationModel) else ['output0'] @@ -313,7 +316,7 @@ class Exporter: self.im.cpu() if dynamic else self.im, f, verbose=False, - opset_version=self.args.opset or get_latest_opset(), + opset_version=opset_version, do_constant_folding=True, # WARNING: DNN inference with torch>=1.12 may require do_constant_folding=False input_names=['images'], output_names=output_names, @@ -377,7 +380,6 @@ class Exporter: yaml_save(Path(f) / 'metadata.yaml', self.metadata) # add metadata.yaml return f, None - @try_export def _export_coreml(self, prefix=colorstr('CoreML:')): # YOLOv8 CoreML export check_requirements('coremltools>=6.0') @@ -410,8 +412,8 @@ class Exporter: model = self.model elif self.model.task == 'detect': model = iOSDetectModel(self.model, self.im) if self.args.nms else self.model - elif self.model.task == 'segment': - # TODO CoreML Segmentation model pipelining + else: + # TODO CoreML Segment and Pose model pipelining model = self.model ts = torch.jit.trace(model.eval(), self.im, strict=False) # TorchScript model diff --git a/ultralytics/yolo/engine/model.py b/ultralytics/yolo/engine/model.py index a67166d..aea3661 100644 --- a/ultralytics/yolo/engine/model.py +++ b/ultralytics/yolo/engine/model.py @@ -5,8 +5,8 @@ from pathlib import Path from typing import Union from ultralytics import yolo # noqa -from ultralytics.nn.tasks import (ClassificationModel, DetectionModel, SegmentationModel, attempt_load_one_weight, - guess_model_task, nn, yaml_model_load) +from ultralytics.nn.tasks import (ClassificationModel, DetectionModel, PoseModel, SegmentationModel, + attempt_load_one_weight, guess_model_task, nn, yaml_model_load) from ultralytics.yolo.cfg import get_cfg from ultralytics.yolo.engine.exporter import Exporter from ultralytics.yolo.utils import (DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, RANK, ROOT, callbacks, @@ -25,7 +25,8 @@ TASK_MAP = { yolo.v8.detect.DetectionPredictor], 'segment': [ SegmentationModel, yolo.v8.segment.SegmentationTrainer, yolo.v8.segment.SegmentationValidator, - yolo.v8.segment.SegmentationPredictor]} + yolo.v8.segment.SegmentationPredictor], + 'pose': [PoseModel, yolo.v8.pose.PoseTrainer, yolo.v8.pose.PoseValidator, yolo.v8.pose.PosePredictor]} class YOLO: @@ -195,7 +196,7 @@ class YOLO: self.model.load(weights) return self - def info(self, verbose=False): + def info(self, verbose=True): """ Logs model info. diff --git a/ultralytics/yolo/engine/predictor.py b/ultralytics/yolo/engine/predictor.py index 82905ca..f511459 100644 --- a/ultralytics/yolo/engine/predictor.py +++ b/ultralytics/yolo/engine/predictor.py @@ -246,6 +246,7 @@ class BasePredictor: dnn=self.args.dnn, data=self.args.data, fp16=self.args.half, + fuse=True, verbose=verbose) self.device = device self.model.eval() diff --git a/ultralytics/yolo/engine/results.py b/ultralytics/yolo/engine/results.py index cbaa543..8fbcb4b 100644 --- a/ultralytics/yolo/engine/results.py +++ b/ultralytics/yolo/engine/results.py @@ -17,6 +17,53 @@ from ultralytics.yolo.utils.plotting import Annotator, colors from ultralytics.yolo.utils.torch_utils import TORCHVISION_0_10 +class BaseTensor(SimpleClass): + """ + + Attributes: + tensor (torch.Tensor): A tensor. + orig_shape (tuple): Original image size, in the format (height, width). + + Methods: + cpu(): Returns a copy of the tensor on CPU memory. + numpy(): Returns a copy of the tensor as a numpy array. + cuda(): Returns a copy of the tensor on GPU memory. + to(): Returns a copy of the tensor with the specified device and dtype. + """ + + def __init__(self, tensor, orig_shape) -> None: + super().__init__() + assert isinstance(tensor, torch.Tensor) + self.tensor = tensor + self.orig_shape = orig_shape + + @property + def shape(self): + return self.data.shape + + @property + def data(self): + return self.tensor + + def cpu(self): + return self.__class__(self.data.cpu(), self.orig_shape) + + def numpy(self): + return self.__class__(self.data.numpy(), self.orig_shape) + + def cuda(self): + return self.__class__(self.data.cuda(), self.orig_shape) + + def to(self, *args, **kwargs): + return self.__class__(self.data.to(*args, **kwargs), self.orig_shape) + + def __len__(self): # override len(results) + return len(self.data) + + def __getitem__(self, idx): + return self.__class__(self.data[idx], self.orig_shape) + + class Results(SimpleClass): """ A class for storing and manipulating inference results. @@ -40,22 +87,23 @@ class Results(SimpleClass): _keys (tuple): A tuple of attribute names for non-empty attributes. """ - def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None) -> None: + def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None) -> None: self.orig_img = orig_img self.orig_shape = orig_img.shape[:2] self.boxes = Boxes(boxes, self.orig_shape) if boxes is not None else None # native size boxes self.masks = Masks(masks, self.orig_shape) if masks is not None else None # native size or imgsz masks self.probs = probs if probs is not None else None + self.keypoints = keypoints if keypoints is not None else None self.names = names self.path = path - self._keys = ('boxes', 'masks', 'probs') + self._keys = ('boxes', 'masks', 'probs', 'keypoints') def pandas(self): pass # TODO masks.pandas + boxes.pandas + cls.pandas def __getitem__(self, idx): - r = Results(orig_img=self.orig_img, path=self.path, names=self.names) + r = self.new() for k in self.keys: setattr(r, k, getattr(self, k)[idx]) return r @@ -69,25 +117,25 @@ class Results(SimpleClass): self.probs = probs def cpu(self): - r = Results(orig_img=self.orig_img, path=self.path, names=self.names) + r = self.new() for k in self.keys: setattr(r, k, getattr(self, k).cpu()) return r def numpy(self): - r = Results(orig_img=self.orig_img, path=self.path, names=self.names) + r = self.new() for k in self.keys: setattr(r, k, getattr(self, k).numpy()) return r def cuda(self): - r = Results(orig_img=self.orig_img, path=self.path, names=self.names) + r = self.new() for k in self.keys: setattr(r, k, getattr(self, k).cuda()) return r def to(self, *args, **kwargs): - r = Results(orig_img=self.orig_img, path=self.path, names=self.names) + r = self.new() for k in self.keys: setattr(r, k, getattr(self, k).to(*args, **kwargs)) return r @@ -96,6 +144,9 @@ class Results(SimpleClass): for k in self.keys: return len(getattr(self, k)) + def new(self): + return Results(orig_img=self.orig_img, path=self.path, names=self.names) + @property def keys(self): return [k for k in self._keys if getattr(self, k) is not None] @@ -109,6 +160,7 @@ class Results(SimpleClass): pil=False, example='abc', img=None, + kpt_line=True, labels=True, boxes=True, masks=True, @@ -126,6 +178,7 @@ class Results(SimpleClass): pil (bool): Whether to return the image as a PIL Image. example (str): An example string to display. Useful for indicating the expected format of the output. img (numpy.ndarray): Plot to another image. if not, plot to original image. + kpt_line (bool): Whether to draw lines connecting keypoints. labels (bool): Whether to plot the label of bounding boxes. boxes (bool): Whether to plot the bounding boxes. masks (bool): Whether to plot the masks. @@ -146,11 +199,12 @@ class Results(SimpleClass): pred_masks, show_masks = self.masks, masks pred_probs, show_probs = self.probs, probs names = self.names + keypoints = self.keypoints if pred_boxes and show_boxes: for d in reversed(pred_boxes): c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item()) name = ('' if id is None else f'id:{id} ') + names[c] - label = (name if not conf else f'{name} {conf:.2f}') if labels else None + label = (f'{name} {conf:.2f}' if conf else name) if labels else None annotator.box_label(d.xyxy.squeeze(), label, color=colors(c, True)) if pred_masks and show_masks: @@ -168,10 +222,14 @@ class Results(SimpleClass): text = f"{', '.join(f'{names[j] if names else j} {pred_probs[j]:.2f}' for j in top5i)}, " annotator.text((32, 32), text, txt_color=(255, 255, 255)) # TODO: allow setting colors + if keypoints is not None: + for k in reversed(keypoints): + annotator.kpts(k, self.orig_shape, kpt_line=kpt_line) + return np.asarray(annotator.im) if annotator.pil else annotator.im -class Boxes(SimpleClass): +class Boxes(BaseTensor): """ A class for storing and manipulating detection boxes. @@ -246,37 +304,15 @@ class Boxes(SimpleClass): def xywhn(self): return self.xywh / self.orig_shape[[1, 0, 1, 0]] - def cpu(self): - return Boxes(self.boxes.cpu(), self.orig_shape) - - def numpy(self): - return Boxes(self.boxes.numpy(), self.orig_shape) - - def cuda(self): - return Boxes(self.boxes.cuda(), self.orig_shape) - - def to(self, *args, **kwargs): - return Boxes(self.boxes.to(*args, **kwargs), self.orig_shape) - def pandas(self): LOGGER.info('results.pandas() method not yet implemented') - @property - def shape(self): - return self.boxes.shape - @property def data(self): return self.boxes - def __len__(self): # override len(results) - return len(self.boxes) - - def __getitem__(self, idx): - return Boxes(self.boxes[idx], self.orig_shape) - -class Masks(SimpleClass): +class Masks(BaseTensor): """ A class for storing and manipulating detection masks. @@ -316,7 +352,7 @@ class Masks(SimpleClass): def xyn(self): # Segments (normalized) return [ - ops.scale_segments(self.masks.shape[1:], x, self.orig_shape, normalize=True) + ops.scale_coords(self.masks.shape[1:], x, self.orig_shape, normalize=True) for x in ops.masks2segments(self.masks)] @property @@ -324,31 +360,9 @@ class Masks(SimpleClass): def xy(self): # Segments (pixels) return [ - ops.scale_segments(self.masks.shape[1:], x, self.orig_shape, normalize=False) + ops.scale_coords(self.masks.shape[1:], x, self.orig_shape, normalize=False) for x in ops.masks2segments(self.masks)] - @property - def shape(self): - return self.masks.shape - @property def data(self): return self.masks - - def cpu(self): - return Masks(self.masks.cpu(), self.orig_shape) - - def numpy(self): - return Masks(self.masks.numpy(), self.orig_shape) - - def cuda(self): - return Masks(self.masks.cuda(), self.orig_shape) - - def to(self, *args, **kwargs): - return Masks(self.masks.to(*args, **kwargs), self.orig_shape) - - def __len__(self): # override len(results) - return len(self.masks) - - def __getitem__(self, idx): - return Masks(self.masks[idx], self.orig_shape) diff --git a/ultralytics/yolo/utils/benchmarks.py b/ultralytics/yolo/utils/benchmarks.py index 5b5e24c..5f5c529 100644 --- a/ultralytics/yolo/utils/benchmarks.py +++ b/ultralytics/yolo/utils/benchmarks.py @@ -75,11 +75,13 @@ def benchmark(model=Path(SETTINGS['weights_dir']) / 'yolov8n.pt', imgsz=160, hal # Validate if model.task == 'detect': - data, key = 'coco128.yaml', 'metrics/mAP50-95(B)' + data, key = 'coco8.yaml', 'metrics/mAP50-95(B)' elif model.task == 'segment': - data, key = 'coco128-seg.yaml', 'metrics/mAP50-95(M)' + data, key = 'coco8-seg.yaml', 'metrics/mAP50-95(M)' elif model.task == 'classify': data, key = 'imagenet100', 'metrics/accuracy_top5' + elif model.task == 'pose': + data, key = 'coco8-pose.yaml', 'metrics/mAP50-95(P)' results = export.val(data=data, batch=1, imgsz=imgsz, plots=False, device=device, half=half, verbose=False) metric, speed = results.results_dict[key], results.speed['inference'] diff --git a/ultralytics/yolo/utils/downloads.py b/ultralytics/yolo/utils/downloads.py index 2caefef..b11a789 100644 --- a/ultralytics/yolo/utils/downloads.py +++ b/ultralytics/yolo/utils/downloads.py @@ -14,9 +14,9 @@ from tqdm import tqdm from ultralytics.yolo.utils import LOGGER, checks, emojis, is_online -GITHUB_ASSET_NAMES = [f'yolov8{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] + \ - [f'yolov5{size}u.pt' for size in 'nsmlx'] + \ - [f'yolov3{size}u.pt' for size in ('', '-spp', '-tiny')] +GITHUB_ASSET_NAMES = [f'yolov8{k}{suffix}.pt' for k in 'nsmlx' for suffix in ('', '6', '-cls', '-seg', '-pose')] + \ + [f'yolov5{k}u.pt' for k in 'nsmlx'] + \ + [f'yolov3{k}u.pt' for k in ('', '-spp', '-tiny')] GITHUB_ASSET_STEMS = [Path(k).stem for k in GITHUB_ASSET_NAMES] diff --git a/ultralytics/yolo/utils/instance.py b/ultralytics/yolo/utils/instance.py index 95a62ca..37c1496 100644 --- a/ultralytics/yolo/utils/instance.py +++ b/ultralytics/yolo/utils/instance.py @@ -168,7 +168,7 @@ class Instances: Args: bboxes (ndarray): bboxes with shape [N, 4]. segments (list | ndarray): segments. - keypoints (ndarray): keypoints with shape [N, 17, 2]. + keypoints (ndarray): keypoints(x, y, visible) with shape [N, 17, 3]. """ if segments is None: segments = [] diff --git a/ultralytics/yolo/utils/loss.py b/ultralytics/yolo/utils/loss.py index e365006..60d4727 100644 --- a/ultralytics/yolo/utils/loss.py +++ b/ultralytics/yolo/utils/loss.py @@ -54,3 +54,17 @@ class BboxLoss(nn.Module): wr = 1 - wl # weight right return (F.cross_entropy(pred_dist, tl.view(-1), reduction='none').view(tl.shape) * wl + F.cross_entropy(pred_dist, tr.view(-1), reduction='none').view(tl.shape) * wr).mean(-1, keepdim=True) + + +class KeypointLoss(nn.Module): + + def __init__(self, sigmas) -> None: + super().__init__() + self.sigmas = sigmas + + def forward(self, pred_kpts, gt_kpts, kpt_mask, area): + d = (pred_kpts[..., 0] - gt_kpts[..., 0]) ** 2 + (pred_kpts[..., 1] - gt_kpts[..., 1]) ** 2 + kpt_loss_factor = (torch.sum(kpt_mask != 0) + torch.sum(kpt_mask == 0)) / (torch.sum(kpt_mask != 0) + 1e-9) + # e = d / (2 * (area * self.sigmas) ** 2 + 1e-9) # from formula + e = d / (2 * self.sigmas) ** 2 / (area + 1e-9) / 2 # from cocoeval + return kpt_loss_factor * ((1 - torch.exp(-e)) * kpt_mask).mean() diff --git a/ultralytics/yolo/utils/metrics.py b/ultralytics/yolo/utils/metrics.py index 26e2b0e..d7da8e6 100644 --- a/ultralytics/yolo/utils/metrics.py +++ b/ultralytics/yolo/utils/metrics.py @@ -13,6 +13,8 @@ import torch.nn as nn from ultralytics.yolo.utils import LOGGER, SimpleClass, TryExcept +OKS_SIGMA = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0 + # boxes def box_area(box): @@ -108,8 +110,8 @@ def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7 def mask_iou(mask1, mask2, eps=1e-7): """ - mask1: [N, n] m1 means number of predicted objects - mask2: [M, n] m2 means number of gt objects + mask1: [N, n] m1 means number of gt objects + mask2: [M, n] m2 means number of predicted objects Note: n means image_w x image_h Returns: masks iou, [N, M] """ @@ -118,16 +120,18 @@ def mask_iou(mask1, mask2, eps=1e-7): return intersection / (union + eps) -def masks_iou(mask1, mask2, eps=1e-7): - """ - mask1: [N, n] m1 means number of predicted objects - mask2: [N, n] m2 means number of gt objects - Note: n means image_w x image_h - Returns: masks iou, (N, ) +def kpt_iou(kpt1, kpt2, area, sigma, eps=1e-7): + """OKS + kpt1: [N, 17, 3], gt + kpt2: [M, 17, 3], pred + area: [N], areas from gt """ - intersection = (mask1 * mask2).sum(1).clamp(0) # (N, ) - union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection - return intersection / (union + eps) + d = (kpt1[:, None, :, 0] - kpt2[..., 0]) ** 2 + (kpt1[:, None, :, 1] - kpt2[..., 1]) ** 2 # (N, M, 17) + sigma = torch.tensor(sigma, device=kpt1.device, dtype=kpt1.dtype) # (17, ) + kpt_mask = kpt1[..., 2] != 0 # (N, 17) + e = d / (2 * sigma) ** 2 / (area[:, None, None] + eps) / 2 # from cocoeval + # e = d / ((area[None, :, None] + eps) * sigma) ** 2 / 2 # from formula + return (torch.exp(-e) * kpt_mask[:, None]).sum(-1) / (kpt_mask.sum(-1)[:, None] + eps) def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 @@ -649,13 +653,13 @@ class SegmentMetrics(SimpleClass): self.seg = Metric() self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} - def process(self, tp_m, tp_b, conf, pred_cls, target_cls): + def process(self, tp_b, tp_m, conf, pred_cls, target_cls): """ Processes the detection and segmentation metrics over the given set of predictions. Args: - tp_m (list): List of True Positive masks. tp_b (list): List of True Positive boxes. + tp_m (list): List of True Positive masks. conf (list): List of confidence scores. pred_cls (list): List of predicted classes. target_cls (list): List of target classes. @@ -712,6 +716,100 @@ class SegmentMetrics(SimpleClass): return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness])) +class PoseMetrics(SegmentMetrics): + """ + Calculates and aggregates detection and pose metrics over a given set of classes. + + Args: + save_dir (Path): Path to the directory where the output plots should be saved. Default is the current directory. + plot (bool): Whether to save the detection and segmentation plots. Default is False. + names (list): List of class names. Default is an empty list. + + Attributes: + save_dir (Path): Path to the directory where the output plots should be saved. + plot (bool): Whether to save the detection and segmentation plots. + names (list): List of class names. + box (Metric): An instance of the Metric class to calculate box detection metrics. + pose (Metric): An instance of the Metric class to calculate mask segmentation metrics. + speed (dict): Dictionary to store the time taken in different phases of inference. + + Methods: + process(tp_m, tp_b, conf, pred_cls, target_cls): Processes metrics over the given set of predictions. + mean_results(): Returns the mean of the detection and segmentation metrics over all the classes. + class_result(i): Returns the detection and segmentation metrics of class `i`. + maps: Returns the mean Average Precision (mAP) scores for IoU thresholds ranging from 0.50 to 0.95. + fitness: Returns the fitness scores, which are a single weighted combination of metrics. + ap_class_index: Returns the list of indices of classes used to compute Average Precision (AP). + results_dict: Returns the dictionary containing all the detection and segmentation metrics and fitness score. + """ + + def __init__(self, save_dir=Path('.'), plot=False, names=()) -> None: + super().__init__(save_dir, plot, names) + self.save_dir = save_dir + self.plot = plot + self.names = names + self.box = Metric() + self.pose = Metric() + self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} + + def __getattr__(self, attr): + name = self.__class__.__name__ + raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}") + + def process(self, tp_b, tp_p, conf, pred_cls, target_cls): + """ + Processes the detection and pose metrics over the given set of predictions. + + Args: + tp_b (list): List of True Positive boxes. + tp_p (list): List of True Positive keypoints. + conf (list): List of confidence scores. + pred_cls (list): List of predicted classes. + target_cls (list): List of target classes. + """ + + results_pose = ap_per_class(tp_p, + conf, + pred_cls, + target_cls, + plot=self.plot, + save_dir=self.save_dir, + names=self.names, + prefix='Pose')[2:] + self.pose.nc = len(self.names) + self.pose.update(results_pose) + results_box = ap_per_class(tp_b, + conf, + pred_cls, + target_cls, + plot=self.plot, + save_dir=self.save_dir, + names=self.names, + prefix='Box')[2:] + self.box.nc = len(self.names) + self.box.update(results_box) + + @property + def keys(self): + return [ + 'metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)', + 'metrics/precision(P)', 'metrics/recall(P)', 'metrics/mAP50(P)', 'metrics/mAP50-95(P)'] + + def mean_results(self): + return self.box.mean_results() + self.pose.mean_results() + + def class_result(self, i): + return self.box.class_result(i) + self.pose.class_result(i) + + @property + def maps(self): + return self.box.maps + self.pose.maps + + @property + def fitness(self): + return self.pose.fitness() + self.box.fitness() + + class ClassifyMetrics(SimpleClass): """ Class for computing classification metrics including top-1 and top-5 accuracy. diff --git a/ultralytics/yolo/utils/ops.py b/ultralytics/yolo/utils/ops.py index 2dd89c3..c879d8d 100644 --- a/ultralytics/yolo/utils/ops.py +++ b/ultralytics/yolo/utils/ops.py @@ -281,28 +281,23 @@ def clip_boxes(boxes, shape): boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2 -def clip_coords(boxes, shape): +def clip_coords(coords, shape): """ - Clip bounding xyxy bounding boxes to image shape (height, width). + Clip line coordinates to the image boundaries. Args: - boxes (torch.Tensor or numpy.ndarray): Bounding boxes to be clipped. - shape (tuple): The shape of the image. (height, width) + coords (torch.Tensor) or (numpy.ndarray): A list of line coordinates. + shape (tuple): A tuple of integers representing the size of the image in the format (height, width). Returns: - None - - Note: - The input `boxes` is modified in-place, there is no return value. + (None): The function modifies the input `coordinates` in place, by clipping each coordinate to the image boundaries. """ - if isinstance(boxes, torch.Tensor): # faster individually - boxes[:, 0].clamp_(0, shape[1]) # x1 - boxes[:, 1].clamp_(0, shape[0]) # y1 - boxes[:, 2].clamp_(0, shape[1]) # x2 - boxes[:, 3].clamp_(0, shape[0]) # y2 + if isinstance(coords, torch.Tensor): # faster individually + coords[..., 0].clamp_(0, shape[1]) # x + coords[..., 1].clamp_(0, shape[0]) # y else: # np.array (faster grouped) - boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 - boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 + coords[..., 0] = coords[..., 0].clip(0, shape[1]) # x + coords[..., 1] = coords[..., 1].clip(0, shape[0]) # y def scale_image(im1_shape, masks, im0_shape, ratio_pad=None): @@ -577,17 +572,18 @@ def process_mask_upsample(protos, masks_in, bboxes, shape): def process_mask(protos, masks_in, bboxes, shape, upsample=False): """ - It takes the output of the mask head, and applies the mask to the bounding boxes. This is faster but produces - downsampled quality of mask + Apply masks to bounding boxes using the output of the mask head. Args: - protos (torch.Tensor): [mask_dim, mask_h, mask_w] - masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms - bboxes (torch.Tensor): [n, 4], n is number of masks after nms - shape (tuple): the size of the input image (h,w) + protos (torch.Tensor): A tensor of shape [mask_dim, mask_h, mask_w]. + masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS. + bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS. + shape (tuple): A tuple of integers representing the size of the input image in the format (h, w). + upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False. Returns: - (torch.Tensor): The processed masks. + (torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w + are the height and width of the input image. The mask is applied to the bounding boxes. """ c, mh, mw = protos.shape # CHW @@ -632,19 +628,19 @@ def process_mask_native(protos, masks_in, bboxes, shape): return masks.gt_(0.5) -def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False): +def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False): """ Rescale segment coordinates (xyxy) from img1_shape to img0_shape Args: - img1_shape (tuple): The shape of the image that the segments are from. - segments (torch.Tensor): the segments to be scaled + img1_shape (tuple): The shape of the image that the coords are from. + coords (torch.Tensor): the coords to be scaled img0_shape (tuple): the shape of the image that the segmentation is being applied to ratio_pad (tuple): the ratio of the image size to the padded image size. normalize (bool): If True, the coordinates will be normalized to the range [0, 1]. Defaults to False Returns: - segments (torch.Tensor): the segmented image. + coords (torch.Tensor): the segmented image. """ if ratio_pad is None: # calculate from img0_shape gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new @@ -653,14 +649,15 @@ def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=F gain = ratio_pad[0][0] pad = ratio_pad[1] - segments[:, 0] -= pad[0] # x padding - segments[:, 1] -= pad[1] # y padding - segments /= gain - clip_segments(segments, img0_shape) + coords[..., 0] -= pad[0] # x padding + coords[..., 1] -= pad[1] # y padding + coords[..., 0] /= gain + coords[..., 1] /= gain + clip_coords(coords, img0_shape) if normalize: - segments[:, 0] /= img0_shape[1] # width - segments[:, 1] /= img0_shape[0] # height - return segments + coords[..., 0] /= img0_shape[1] # width + coords[..., 1] /= img0_shape[0] # height + return coords def masks2segments(masks, strategy='largest'): @@ -688,23 +685,6 @@ def masks2segments(masks, strategy='largest'): return segments -def clip_segments(segments, shape): - """ - It takes a list of line segments (x1,y1,x2,y2) and clips them to the image shape (height, width) - - Args: - segments (list): a list of segments, each segment is a list of points, each point is a list of x,y - coordinates - shape (tuple): the shape of the image - """ - if isinstance(segments, torch.Tensor): # faster individually - segments[:, 0].clamp_(0, shape[1]) # x - segments[:, 1].clamp_(0, shape[0]) # y - else: # np.array (faster grouped) - segments[:, 0] = segments[:, 0].clip(0, shape[1]) # x - segments[:, 1] = segments[:, 1].clip(0, shape[0]) # y - - def clean_str(s): """ Cleans a string by replacing special characters with underscore _ diff --git a/ultralytics/yolo/utils/plotting.py b/ultralytics/yolo/utils/plotting.py index e40d486..b139f97 100644 --- a/ultralytics/yolo/utils/plotting.py +++ b/ultralytics/yolo/utils/plotting.py @@ -16,7 +16,7 @@ from ultralytics.yolo.utils import LOGGER, TryExcept, threaded from .checks import check_font, check_version, is_ascii from .files import increment_path -from .ops import clip_coords, scale_image, xywh2xyxy, xyxy2xywh +from .ops import clip_boxes, scale_image, xywh2xyxy, xyxy2xywh matplotlib.rc('font', **{'size': 11}) matplotlib.use('Agg') # for writing to files only @@ -30,6 +30,11 @@ class Colors: '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7') self.palette = [self.hex2rgb(f'#{c}') for c in hexs] self.n = len(self.palette) + self.pose_palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102], [230, 230, 0], [255, 153, 255], + [153, 204, 255], [255, 102, 255], [255, 51, 255], [102, 178, 255], [51, 153, 255], + [255, 153, 153], [255, 102, 102], [255, 51, 51], [153, 255, 153], [102, 255, 102], + [51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0], [255, 255, 255]], + dtype=np.uint8) def __call__(self, i, bgr=False): c = self.palette[int(i) % self.n] @@ -62,6 +67,12 @@ class Annotator: else: # use cv2 self.im = im self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width + # pose + self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9], + [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]] + + self.limb_color = colors.pose_palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]] + self.kpt_color = colors.pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]] def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)): # Add one xyxy box to image with label @@ -132,6 +143,49 @@ class Annotator: # convert im back to PIL and update draw self.fromarray(self.im) + def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True): + """Plot keypoints. + Args: + kpts (tensor): predicted kpts, shape: [17, 3] + shape (tuple): image shape, (h, w) + steps (int): keypoints step + radius (int): size of drawing points + """ + if self.pil: + # convert to numpy first + self.im = np.asarray(self.im).copy() + nkpt, ndim = kpts.shape + is_pose = nkpt == 17 and ndim == 3 + kpt_line &= is_pose # `kpt_line=True` for now only supports human pose plotting + for i, k in enumerate(kpts): + color_k = [int(x) for x in self.kpt_color[i]] if is_pose else colors(i) + x_coord, y_coord = k[0], k[1] + if x_coord % shape[1] != 0 and y_coord % shape[0] != 0: + if len(k) == 3: + conf = k[2] + if conf < 0.5: + continue + cv2.circle(self.im, (int(x_coord), int(y_coord)), radius, color_k, -1) + + if kpt_line: + ndim = kpts.shape[-1] + for sk_id, sk in enumerate(self.skeleton): + pos1 = (int(kpts[(sk[0] - 1), 0]), int(kpts[(sk[0] - 1), 1])) + pos2 = (int(kpts[(sk[1] - 1), 0]), int(kpts[(sk[1] - 1), 1])) + if ndim == 3: + conf1 = kpts[(sk[0] - 1), 2] + conf2 = kpts[(sk[1] - 1), 2] + if conf1 < 0.5 or conf2 < 0.5: + continue + if pos1[0] % shape[1] == 0 or pos1[1] % shape[0] == 0 or pos1[0] < 0 or pos1[1] < 0: + continue + if pos2[0] % shape[1] == 0 or pos2[1] % shape[0] == 0 or pos2[0] < 0 or pos2[1] < 0: + continue + cv2.line(self.im, pos1, pos2, [int(x) for x in self.limb_color[sk_id]], thickness=2) + if self.pil: + # convert im back to PIL and update draw + self.fromarray(self.im) + def rectangle(self, xy, fill=None, outline=None, width=1): # Add rectangle to image (PIL-only) self.draw.rectangle(xy, fill, outline, width) @@ -213,7 +267,7 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad xyxy = xywh2xyxy(b).long() - clip_coords(xyxy, im.shape) + clip_boxes(xyxy, im.shape) crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)] if save: file.parent.mkdir(parents=True, exist_ok=True) # make directory @@ -229,6 +283,7 @@ def plot_images(images, cls, bboxes, masks=np.zeros(0, dtype=np.uint8), + kpts=np.zeros((0, 51), dtype=np.float32), paths=None, fname='images.jpg', names=None): @@ -241,6 +296,8 @@ def plot_images(images, bboxes = bboxes.cpu().numpy() if isinstance(masks, torch.Tensor): masks = masks.cpu().numpy().astype(int) + if isinstance(kpts, torch.Tensor): + kpts = kpts.cpu().numpy() if isinstance(batch_idx, torch.Tensor): batch_idx = batch_idx.cpu().numpy() @@ -300,6 +357,21 @@ def plot_images(images, label = f'{c}' if labels else f'{c} {conf[j]:.1f}' annotator.box_label(box, label, color=color) + # Plot keypoints + if len(kpts): + kpts_ = kpts[idx].copy() + if len(kpts_): + if kpts_[..., 0].max() <= 1.01 or kpts_[..., 1].max() <= 1.01: # if normalized with tolerance .01 + kpts_[..., 0] *= w # scale to pixels + kpts_[..., 1] *= h + elif scale < 1: # absolute coords need scale if image scales + kpts_ *= scale + kpts_[..., 0] += x + kpts_[..., 1] += y + for j in range(len(kpts_)): + if labels or conf[j] > 0.25: # 0.25 conf thresh + annotator.kpts(kpts_[j]) + # Plot masks if len(masks): if idx.shape[0] == masks.shape[0]: # overlap_masks=False @@ -307,7 +379,7 @@ def plot_images(images, else: # overlap_masks=True image_masks = masks[[i]] # (1, 640, 640) nl = idx.sum() - index = np.arange(nl).reshape(nl, 1, 1) + 1 + index = np.arange(nl).reshape((nl, 1, 1)) + 1 image_masks = np.repeat(image_masks, nl, axis=0) image_masks = np.where(image_masks == index, 1.0, 0.0) @@ -328,13 +400,16 @@ def plot_images(images, annotator.im.save(fname) # save -def plot_results(file='path/to/results.csv', dir='', segment=False): +def plot_results(file='path/to/results.csv', dir='', segment=False, pose=False): # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') import pandas as pd save_dir = Path(file).parent if file else Path(dir) if segment: fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) index = [1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12] + elif pose: + fig, ax = plt.subplots(2, 9, figsize=(21, 6), tight_layout=True) + index = [1, 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 16, 17, 18, 8, 9, 12, 13] else: fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True) index = [1, 2, 3, 4, 5, 8, 9, 10, 6, 7] diff --git a/ultralytics/yolo/utils/torch_utils.py b/ultralytics/yolo/utils/torch_utils.py index b906019..84b5483 100644 --- a/ultralytics/yolo/utils/torch_utils.py +++ b/ultralytics/yolo/utils/torch_utils.py @@ -240,8 +240,8 @@ def copy_attr(a, b, include=(), exclude=()): def get_latest_opset(): - # Return max supported ONNX opset by this version of torch - return max(int(k[14:]) for k in vars(torch.onnx) if 'symbolic_opset' in k) # opset + # Return second-most (for maturity) recently supported ONNX opset by this version of torch + return max(int(k[14:]) for k in vars(torch.onnx) if 'symbolic_opset' in k) - 1 # opset def intersect_dicts(da, db, exclude=()): @@ -318,18 +318,18 @@ def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None: """ Strip optimizer from 'f' to finalize training, optionally save as 's'. - Usage: - from ultralytics.yolo.utils.torch_utils import strip_optimizer - from pathlib import Path - for f in Path('/Users/glennjocher/Downloads/weights').glob('*.pt'): - strip_optimizer(f) - Args: f (str): file path to model to strip the optimizer from. Default is 'best.pt'. s (str): file path to save the model with stripped optimizer to. If not provided, 'f' will be overwritten. Returns: None + + Usage: + from pathlib import Path + from ultralytics.yolo.utils.torch_utils import strip_optimizer + for f in Path('/Users/glennjocher/Downloads/weights').rglob('*.pt'): + strip_optimizer(f) """ x = torch.load(f, map_location=torch.device('cpu')) args = {**DEFAULT_CFG_DICT, **x['train_args']} # combine model args with default args, preferring model args @@ -349,7 +349,9 @@ def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None: def profile(input, ops, n=10, device=None): - """ YOLOv8 speed/memory/FLOPs profiler + """ + YOLOv8 speed/memory/FLOPs profiler + Usage: input = torch.randn(16, 3, 640, 640) m1 = lambda x: x * torch.sigmoid(x) diff --git a/ultralytics/yolo/v8/__init__.py b/ultralytics/yolo/v8/__init__.py index 1f03762..7b2d158 100644 --- a/ultralytics/yolo/v8/__init__.py +++ b/ultralytics/yolo/v8/__init__.py @@ -1,5 +1,5 @@ # Ultralytics YOLO 🚀, GPL-3.0 license -from ultralytics.yolo.v8 import classify, detect, segment +from ultralytics.yolo.v8 import classify, detect, pose, segment -__all__ = 'classify', 'segment', 'detect' +__all__ = 'classify', 'segment', 'detect', 'pose' diff --git a/ultralytics/yolo/v8/detect/train.py b/ultralytics/yolo/v8/detect/train.py index 6484cd7..33d463d 100644 --- a/ultralytics/yolo/v8/detect/train.py +++ b/ultralytics/yolo/v8/detect/train.py @@ -41,7 +41,7 @@ class DetectionTrainer(BaseTrainer): shuffle=mode == 'train', seed=self.args.seed)[0] if self.args.v5loader else \ build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, rank=rank, mode=mode, - rect=mode == 'val', names=self.data['names'])[0] + rect=mode == 'val', data_info=self.data)[0] def preprocess_batch(self, batch): batch['img'] = batch['img'].to(self.device, non_blocking=True).float() / 255 diff --git a/ultralytics/yolo/v8/detect/val.py b/ultralytics/yolo/v8/detect/val.py index 5d09942..71044f8 100644 --- a/ultralytics/yolo/v8/detect/val.py +++ b/ultralytics/yolo/v8/detect/val.py @@ -41,7 +41,7 @@ class DetectionValidator(BaseValidator): def init_metrics(self, model): val = self.data.get(self.args.split, '') # validation path - self.is_coco = isinstance(val, str) and val.endswith(f'coco{os.sep}val2017.txt') # is COCO dataset + self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt') # is COCO self.class_map = ops.coco80_to_coco91_class() if self.is_coco else list(range(1000)) self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO self.names = model.names @@ -179,7 +179,7 @@ class DetectionValidator(BaseValidator): prefix=colorstr(f'{self.args.mode}: '), shuffle=False, seed=self.args.seed)[0] if self.args.v5loader else \ - build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, names=self.data['names'], + build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, data_info=self.data, mode='val')[0] def plot_val_samples(self, batch, ni): diff --git a/ultralytics/yolo/v8/pose/__init__.py b/ultralytics/yolo/v8/pose/__init__.py new file mode 100644 index 0000000..3556660 --- /dev/null +++ b/ultralytics/yolo/v8/pose/__init__.py @@ -0,0 +1,7 @@ +# Ultralytics YOLO 🚀, GPL-3.0 license + +from .predict import PosePredictor, predict +from .train import PoseTrainer, train +from .val import PoseValidator, val + +__all__ = 'PoseTrainer', 'train', 'PoseValidator', 'val', 'PosePredictor', 'predict' diff --git a/ultralytics/yolo/v8/pose/predict.py b/ultralytics/yolo/v8/pose/predict.py new file mode 100644 index 0000000..c121f80 --- /dev/null +++ b/ultralytics/yolo/v8/pose/predict.py @@ -0,0 +1,103 @@ +# Ultralytics YOLO 🚀, GPL-3.0 license + +from ultralytics.yolo.engine.results import Results +from ultralytics.yolo.utils import DEFAULT_CFG, ROOT, ops +from ultralytics.yolo.utils.plotting import colors, save_one_box +from ultralytics.yolo.v8.detect.predict import DetectionPredictor + + +class PosePredictor(DetectionPredictor): + + def postprocess(self, preds, img, orig_img): + preds = ops.non_max_suppression(preds, + self.args.conf, + self.args.iou, + agnostic=self.args.agnostic_nms, + max_det=self.args.max_det, + classes=self.args.classes, + nc=len(self.model.names)) + + results = [] + for i, pred in enumerate(preds): + orig_img = orig_img[i] if isinstance(orig_img, list) else orig_img + shape = orig_img.shape + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round() + pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:] + pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, shape) + path, _, _, _, _ = self.batch + img_path = path[i] if isinstance(path, list) else path + results.append( + Results(orig_img=orig_img, + path=img_path, + names=self.model.names, + boxes=pred[:, :6], + keypoints=pred_kpts)) + return results + + def write_results(self, idx, results, batch): + p, im, im0 = batch + log_string = '' + if len(im.shape) == 3: + im = im[None] # expand for batch dim + self.seen += 1 + imc = im0.copy() if self.args.save_crop else im0 + if self.source_type.webcam or self.source_type.from_img: # batch_size >= 1 + log_string += f'{idx}: ' + frame = self.dataset.count + else: + frame = getattr(self.dataset, 'frame', 0) + self.data_path = p + self.txt_path = str(self.save_dir / 'labels' / p.stem) + ('' if self.dataset.mode == 'image' else f'_{frame}') + log_string += '%gx%g ' % im.shape[2:] # print string + self.annotator = self.get_annotator(im0) + + det = results[idx].boxes # TODO: make boxes inherit from tensors + if len(det) == 0: + return f'{log_string}(no detections), ' + for c in det.cls.unique(): + n = (det.cls == c).sum() # detections per class + log_string += f"{n} {self.model.names[int(c)]}{'s' * (n > 1)}, " + + kpts = reversed(results[idx].keypoints) + for k in kpts: + self.annotator.kpts(k, shape=results[idx].orig_shape) + + # write + for j, d in enumerate(reversed(det)): + c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item()) + if self.args.save_txt: # Write to file + kpt = (kpts[j][:, :2] / d.orig_shape[[1, 0]]).reshape(-1).tolist() + box = d.xywhn.view(-1).tolist() + line = (c, *box, *kpt) + (conf, ) * self.args.save_conf + (() if id is None else (id, )) + with open(f'{self.txt_path}.txt', 'a') as f: + f.write(('%g ' * len(line)).rstrip() % line + '\n') + if self.args.save or self.args.show: # Add bbox to image + name = ('' if id is None else f'id:{id} ') + self.model.names[c] + label = (f'{name} {conf:.2f}' if self.args.show_conf else name) if self.args.show_labels else None + if self.args.boxes: + self.annotator.box_label(d.xyxy.squeeze(), label, color=colors(c, True)) + if self.args.save_crop: + save_one_box(d.xyxy, + imc, + file=self.save_dir / 'crops' / self.model.model.names[c] / f'{self.data_path.stem}.jpg', + BGR=True) + + return log_string + + +def predict(cfg=DEFAULT_CFG, use_python=False): + model = cfg.model or 'yolov8n-pose.pt' + source = cfg.source if cfg.source is not None else ROOT / 'assets' if (ROOT / 'assets').exists() \ + else 'https://ultralytics.com/images/bus.jpg' + + args = dict(model=model, source=source) + if use_python: + from ultralytics import YOLO + YOLO(model)(**args) + else: + predictor = PosePredictor(overrides=args) + predictor.predict_cli() + + +if __name__ == '__main__': + predict() diff --git a/ultralytics/yolo/v8/pose/train.py b/ultralytics/yolo/v8/pose/train.py new file mode 100644 index 0000000..dd2dbb9 --- /dev/null +++ b/ultralytics/yolo/v8/pose/train.py @@ -0,0 +1,170 @@ +# Ultralytics YOLO 🚀, GPL-3.0 license + +from copy import copy + +import torch +import torch.nn as nn + +from ultralytics.nn.tasks import PoseModel +from ultralytics.yolo import v8 +from ultralytics.yolo.utils import DEFAULT_CFG +from ultralytics.yolo.utils.loss import KeypointLoss +from ultralytics.yolo.utils.metrics import OKS_SIGMA +from ultralytics.yolo.utils.ops import xyxy2xywh +from ultralytics.yolo.utils.plotting import plot_images, plot_results +from ultralytics.yolo.utils.tal import make_anchors +from ultralytics.yolo.utils.torch_utils import de_parallel +from ultralytics.yolo.v8.detect.train import Loss + + +# BaseTrainer python usage +class PoseTrainer(v8.detect.DetectionTrainer): + + def __init__(self, cfg=DEFAULT_CFG, overrides=None): + if overrides is None: + overrides = {} + overrides['task'] = 'pose' + super().__init__(cfg, overrides) + + def get_model(self, cfg=None, weights=None, verbose=True): + model = PoseModel(cfg, ch=3, nc=self.data['nc'], data_kpt_shape=self.data['kpt_shape'], verbose=verbose) + if weights: + model.load(weights) + + return model + + def set_model_attributes(self): + super().set_model_attributes() + self.model.kpt_shape = self.data['kpt_shape'] + + def get_validator(self): + self.loss_names = 'box_loss', 'pose_loss', 'kobj_loss', 'cls_loss', 'dfl_loss' + return v8.pose.PoseValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args)) + + def criterion(self, preds, batch): + if not hasattr(self, 'compute_loss'): + self.compute_loss = PoseLoss(de_parallel(self.model)) + return self.compute_loss(preds, batch) + + def plot_training_samples(self, batch, ni): + images = batch['img'] + kpts = batch['keypoints'] + cls = batch['cls'].squeeze(-1) + bboxes = batch['bboxes'] + paths = batch['im_file'] + batch_idx = batch['batch_idx'] + plot_images(images, + batch_idx, + cls, + bboxes, + kpts=kpts, + paths=paths, + fname=self.save_dir / f'train_batch{ni}.jpg') + + def plot_metrics(self): + plot_results(file=self.csv, pose=True) # save results.png + + +# Criterion class for computing training losses +class PoseLoss(Loss): + + def __init__(self, model): # model must be de-paralleled + super().__init__(model) + self.kpt_shape = model.model[-1].kpt_shape + self.bce_pose = nn.BCEWithLogitsLoss() + is_pose = self.kpt_shape == [17, 3] + nkpt = self.kpt_shape[0] # number of keypoints + sigmas = torch.from_numpy(OKS_SIGMA).to(self.device) if is_pose else torch.ones(nkpt, device=self.device) / nkpt + self.keypoint_loss = KeypointLoss(sigmas=sigmas) + + def __call__(self, preds, batch): + loss = torch.zeros(5, device=self.device) # box, cls, dfl, kpt_location, kpt_visibility + feats, pred_kpts = preds if isinstance(preds[0], list) else preds[1] + pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( + (self.reg_max * 4, self.nc), 1) + + # b, grids, .. + pred_scores = pred_scores.permute(0, 2, 1).contiguous() + pred_distri = pred_distri.permute(0, 2, 1).contiguous() + pred_kpts = pred_kpts.permute(0, 2, 1).contiguous() + + dtype = pred_scores.dtype + imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) + anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) + + # targets + batch_size = pred_scores.shape[0] + batch_idx = batch['batch_idx'].view(-1, 1) + targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1) + targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) + gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy + mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) + + # pboxes + pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) + pred_kpts = self.kpts_decode(anchor_points, pred_kpts.view(batch_size, -1, *self.kpt_shape)) # (b, h*w, 17, 3) + + _, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner( + pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) + + target_scores_sum = max(target_scores.sum(), 1) + + # cls loss + # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way + loss[3] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE + + # bbox loss + if fg_mask.sum(): + target_bboxes /= stride_tensor + loss[0], loss[4] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, + target_scores_sum, fg_mask) + keypoints = batch['keypoints'].to(self.device).float().clone() + keypoints[..., 0] *= imgsz[1] + keypoints[..., 1] *= imgsz[0] + for i in range(batch_size): + if fg_mask[i].sum(): + idx = target_gt_idx[i][fg_mask[i]] + gt_kpt = keypoints[batch_idx.view(-1) == i][idx] # (n, 51) + gt_kpt[..., 0] /= stride_tensor[fg_mask[i]] + gt_kpt[..., 1] /= stride_tensor[fg_mask[i]] + area = xyxy2xywh(target_bboxes[i][fg_mask[i]])[:, 2:].prod(1, keepdim=True) + pred_kpt = pred_kpts[i][fg_mask[i]] + kpt_mask = gt_kpt[..., 2] != 0 + loss[1] += self.keypoint_loss(pred_kpt, gt_kpt, kpt_mask, area) # pose loss + # kpt_score loss + if pred_kpt.shape[-1] == 3: + loss[2] += self.bce_pose(pred_kpt[..., 2], kpt_mask.float()) # keypoint obj loss + + loss[0] *= self.hyp.box # box gain + loss[1] *= self.hyp.pose / batch_size # pose gain + loss[2] *= self.hyp.kobj / batch_size # kobj gain + loss[3] *= self.hyp.cls # cls gain + loss[4] *= self.hyp.dfl # dfl gain + + return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) + + def kpts_decode(self, anchor_points, pred_kpts): + y = pred_kpts.clone() + y[..., :2] *= 2.0 + y[..., 0] += anchor_points[:, [0]] - 0.5 + y[..., 1] += anchor_points[:, [1]] - 0.5 + return y + + +def train(cfg=DEFAULT_CFG, use_python=False): + model = cfg.model or 'yolov8n-pose.yaml' + data = cfg.data or 'coco8-pose.yaml' + device = cfg.device if cfg.device is not None else '' + + args = dict(model=model, data=data, device=device) + if use_python: + from ultralytics import YOLO + YOLO(model).train(**args) + else: + trainer = PoseTrainer(overrides=args) + trainer.train() + + +if __name__ == '__main__': + train() diff --git a/ultralytics/yolo/v8/pose/val.py b/ultralytics/yolo/v8/pose/val.py new file mode 100644 index 0000000..b849abb --- /dev/null +++ b/ultralytics/yolo/v8/pose/val.py @@ -0,0 +1,213 @@ +# Ultralytics YOLO 🚀, GPL-3.0 license + +from pathlib import Path + +import numpy as np +import torch + +from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, ops +from ultralytics.yolo.utils.checks import check_requirements +from ultralytics.yolo.utils.metrics import OKS_SIGMA, PoseMetrics, box_iou, kpt_iou +from ultralytics.yolo.utils.plotting import output_to_target, plot_images +from ultralytics.yolo.v8.detect import DetectionValidator + + +class PoseValidator(DetectionValidator): + + def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None): + super().__init__(dataloader, save_dir, pbar, args) + self.args.task = 'pose' + self.metrics = PoseMetrics(save_dir=self.save_dir) + + def preprocess(self, batch): + batch = super().preprocess(batch) + batch['keypoints'] = batch['keypoints'].to(self.device).float() + return batch + + def get_desc(self): + return ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)', 'Pose(P', + 'R', 'mAP50', 'mAP50-95)') + + def postprocess(self, preds): + preds = ops.non_max_suppression(preds, + self.args.conf, + self.args.iou, + labels=self.lb, + multi_label=True, + agnostic=self.args.single_cls, + max_det=self.args.max_det, + nc=self.nc) + return preds + + def init_metrics(self, model): + super().init_metrics(model) + self.kpt_shape = self.data['kpt_shape'] + is_pose = self.kpt_shape == [17, 3] + nkpt = self.kpt_shape[0] + self.sigma = OKS_SIGMA if is_pose else np.ones(nkpt) / nkpt + + def update_metrics(self, preds, batch): + # Metrics + for si, pred in enumerate(preds): + idx = batch['batch_idx'] == si + cls = batch['cls'][idx] + bbox = batch['bboxes'][idx] + kpts = batch['keypoints'][idx] + nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions + nk = kpts.shape[1] # number of keypoints + shape = batch['ori_shape'][si] + correct_kpts = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init + correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init + self.seen += 1 + + if npr == 0: + if nl: + self.stats.append((correct_bboxes, correct_kpts, *torch.zeros( + (2, 0), device=self.device), cls.squeeze(-1))) + if self.args.plots: + self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1)) + continue + + # Predictions + if self.args.single_cls: + pred[:, 5] = 0 + predn = pred.clone() + ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape, + ratio_pad=batch['ratio_pad'][si]) # native-space pred + pred_kpts = predn[:, 6:].view(npr, nk, -1) + ops.scale_coords(batch['img'][si].shape[1:], pred_kpts, shape, ratio_pad=batch['ratio_pad'][si]) + + # Evaluate + if nl: + height, width = batch['img'].shape[2:] + tbox = ops.xywh2xyxy(bbox) * torch.tensor( + (width, height, width, height), device=self.device) # target boxes + ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape, + ratio_pad=batch['ratio_pad'][si]) # native-space labels + tkpts = kpts.clone() + tkpts[..., 0] *= width + tkpts[..., 1] *= height + tkpts = ops.scale_coords(batch['img'][si].shape[1:], tkpts, shape, ratio_pad=batch['ratio_pad'][si]) + labelsn = torch.cat((cls, tbox), 1) # native-space labels + correct_bboxes = self._process_batch(predn[:, :6], labelsn) + correct_kpts = self._process_batch(predn[:, :6], labelsn, pred_kpts, tkpts) + if self.args.plots: + self.confusion_matrix.process_batch(predn, labelsn) + + # Append correct_masks, correct_boxes, pconf, pcls, tcls + self.stats.append((correct_bboxes, correct_kpts, pred[:, 4], pred[:, 5], cls.squeeze(-1))) + + # Save + if self.args.save_json: + self.pred_to_json(predn, batch['im_file'][si]) + # if self.args.save_txt: + # save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt') + + def _process_batch(self, detections, labels, pred_kpts=None, gt_kpts=None): + """ + Return correct prediction matrix + Arguments: + detections (array[N, 6]), x1, y1, x2, y2, conf, class + labels (array[M, 5]), class, x1, y1, x2, y2 + pred_kpts (array[N, 51]), 51 = 17 * 3 + gt_kpts (array[N, 51]) + Returns: + correct (array[N, 10]), for 10 IoU levels + """ + if pred_kpts is not None and gt_kpts is not None: + # `0.53` is from https://github.com/jin-s13/xtcocoapi/blob/master/xtcocotools/cocoeval.py#L384 + area = ops.xyxy2xywh(labels[:, 1:])[:, 2:].prod(1) * 0.53 + iou = kpt_iou(gt_kpts, pred_kpts, sigma=self.sigma, area=area) + else: # boxes + iou = box_iou(labels[:, 1:], detections[:, :4]) + + correct = np.zeros((detections.shape[0], self.iouv.shape[0])).astype(bool) + correct_class = labels[:, 0:1] == detections[:, 5] + for i in range(len(self.iouv)): + x = torch.where((iou >= self.iouv[i]) & correct_class) # IoU > threshold and classes match + if x[0].shape[0]: + matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), + 1).cpu().numpy() # [label, detect, iou] + if x[0].shape[0] > 1: + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + # matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + correct[matches[:, 1].astype(int), i] = True + return torch.tensor(correct, dtype=torch.bool, device=detections.device) + + def plot_val_samples(self, batch, ni): + plot_images(batch['img'], + batch['batch_idx'], + batch['cls'].squeeze(-1), + batch['bboxes'], + kpts=batch['keypoints'], + paths=batch['im_file'], + fname=self.save_dir / f'val_batch{ni}_labels.jpg', + names=self.names) + + def plot_predictions(self, batch, preds, ni): + pred_kpts = torch.cat([p[:, 6:].view(-1, *self.kpt_shape)[:15] for p in preds], 0) + plot_images(batch['img'], + *output_to_target(preds, max_det=15), + kpts=pred_kpts, + paths=batch['im_file'], + fname=self.save_dir / f'val_batch{ni}_pred.jpg', + names=self.names) # pred + + def pred_to_json(self, predn, filename): + stem = Path(filename).stem + image_id = int(stem) if stem.isnumeric() else stem + box = ops.xyxy2xywh(predn[:, :4]) # xywh + box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner + for p, b in zip(predn.tolist(), box.tolist()): + self.jdict.append({ + 'image_id': image_id, + 'category_id': self.class_map[int(p[5])], + 'bbox': [round(x, 3) for x in b], + 'keypoints': p[6:], + 'score': round(p[4], 5)}) + + def eval_json(self, stats): + if self.args.save_json and self.is_coco and len(self.jdict): + anno_json = self.data['path'] / 'annotations/person_keypoints_val2017.json' # annotations + pred_json = self.save_dir / 'predictions.json' # predictions + LOGGER.info(f'\nEvaluating pycocotools mAP using {pred_json} and {anno_json}...') + try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb + check_requirements('pycocotools>=2.0.6') + from pycocotools.coco import COCO # noqa + from pycocotools.cocoeval import COCOeval # noqa + + for x in anno_json, pred_json: + assert x.is_file(), f'{x} file not found' + anno = COCO(str(anno_json)) # init annotations api + pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path) + for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'keypoints')]): + if self.is_coco: + eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files] # im to eval + eval.evaluate() + eval.accumulate() + eval.summarize() + idx = i * 4 + 2 + stats[self.metrics.keys[idx + 1]], stats[ + self.metrics.keys[idx]] = eval.stats[:2] # update mAP50-95 and mAP50 + except Exception as e: + LOGGER.warning(f'pycocotools unable to run: {e}') + return stats + + +def val(cfg=DEFAULT_CFG, use_python=False): + model = cfg.model or 'yolov8n-pose.pt' + data = cfg.data or 'coco128-pose.yaml' + + args = dict(model=model, data=data) + if use_python: + from ultralytics import YOLO + YOLO(model).val(**args) + else: + validator = PoseValidator(args=args) + validator(model=args['model']) + + +if __name__ == '__main__': + val() diff --git a/ultralytics/yolo/v8/segment/val.py b/ultralytics/yolo/v8/segment/val.py index 403f880..52b56e9 100644 --- a/ultralytics/yolo/v8/segment/val.py +++ b/ultralytics/yolo/v8/segment/val.py @@ -65,7 +65,7 @@ class SegmentationValidator(DetectionValidator): if npr == 0: if nl: - self.stats.append((correct_masks, correct_bboxes, *torch.zeros( + self.stats.append((correct_bboxes, correct_masks, *torch.zeros( (2, 0), device=self.device), cls.squeeze(-1))) if self.args.plots: self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1)) @@ -103,7 +103,7 @@ class SegmentationValidator(DetectionValidator): self.confusion_matrix.process_batch(predn, labelsn) # Append correct_masks, correct_boxes, pconf, pcls, tcls - self.stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], cls.squeeze(-1))) + self.stats.append((correct_bboxes, correct_masks, pred[:, 4], pred[:, 5], cls.squeeze(-1))) pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8) if self.args.plots and self.batch_i < 3: @@ -220,8 +220,7 @@ class SegmentationValidator(DetectionValidator): pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path) for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm')]): if self.is_coco: - eval.params.imgIds = [int(Path(x).stem) - for x in self.dataloader.dataset.im_files] # images to eval + eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files] # im to eval eval.evaluate() eval.accumulate() eval.summarize()