`ultralytics 8.0.65` YOLOv8 Pose models (#1347)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Mert Can Demir <validatedev@gmail.com>
Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com>
Co-authored-by: Fabian Greavu <fabiangreavu@gmail.com>
Co-authored-by: Yonghye Kwon <developer.0hye@gmail.com>
Co-authored-by: Eric Pedley <ericpedley@gmail.com>
Co-authored-by: JustasBart <40023722+JustasBart@users.noreply.github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Aarni Koskela <akx@iki.fi>
Co-authored-by: Sergio Sanchez <sergio.ssm.97@gmail.com>
Co-authored-by: Bogdan Gheorghe <112427971+bogdan-galileo@users.noreply.github.com>
Co-authored-by: Jaap van de Loosdrecht <jaap@vdlmv.nl>
Co-authored-by: Noobtoss <96134731+Noobtoss@users.noreply.github.com>
Co-authored-by: nerdyespresso <106761627+nerdyespresso@users.noreply.github.com>
Co-authored-by: Farid Inawan <frdteknikelektro@gmail.com>
Co-authored-by: Laughing-q <1185102784@qq.com>
Co-authored-by: Alexander Duda <Alexander.Duda@me.com>
Co-authored-by: Mehran Ghandehari <mehran.maps@gmail.com>
Co-authored-by: Snyk bot <snyk-bot@snyk.io>
Co-authored-by: majid nasiri <majnasai@gmail.com>
single_channel
Ayush Chaurasia 2 years ago committed by GitHub
parent 9af3e69b1a
commit 1cb92d7f42
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -43,21 +43,7 @@ jobs:
python --version python --version
pip --version pip --version
pip list pip list
- name: Test HUB training (Python Usage 1) - name: Test HUB training
shell: python
env:
APIKEY: ${{ secrets.ULTRALYTICS_HUB_APIKEY }}
run: |
import os
from pathlib import Path
from ultralytics import YOLO, hub
from ultralytics.yolo.utils import USER_CONFIG_DIR
Path(USER_CONFIG_DIR / 'settings.yaml').unlink()
key = os.environ['APIKEY']
hub.reset_model(key)
model = YOLO('https://hub.ultralytics.com/models/' + key)
model.train()
- name: Test HUB training (Python Usage 2)
shell: python shell: python
env: env:
APIKEY: ${{ secrets.ULTRALYTICS_HUB_APIKEY }} APIKEY: ${{ secrets.ULTRALYTICS_HUB_APIKEY }}
@ -73,36 +59,6 @@ jobs:
hub.login(key) hub.login(key)
model = YOLO('https://hub.ultralytics.com/models/' + model_id) model = YOLO('https://hub.ultralytics.com/models/' + model_id)
model.train() model.train()
- name: Test HUB training (Python Usage 3)
shell: python
env:
APIKEY: ${{ secrets.ULTRALYTICS_HUB_APIKEY }}
run: |
import os
from pathlib import Path
from ultralytics import YOLO, hub
from ultralytics.yolo.utils import USER_CONFIG_DIR
Path(USER_CONFIG_DIR / 'settings.yaml').unlink()
key = os.environ['APIKEY']
hub.reset_model(key)
model = YOLO(key)
model.train()
- name: Test HUB training (Python Usage 4)
shell: python
env:
APIKEY: ${{ secrets.ULTRALYTICS_HUB_APIKEY }}
run: |
import os
from pathlib import Path
from ultralytics import YOLO, hub
from ultralytics.yolo.utils import USER_CONFIG_DIR
Path(USER_CONFIG_DIR / 'settings.yaml').unlink()
key = os.environ['APIKEY']
hub.reset_model(key)
key, model_id = key.split('_')
hub.login(key)
model = YOLO(model_id)
model.train()
Benchmarks: Benchmarks:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
@ -154,6 +110,11 @@ jobs:
run: | run: |
from ultralytics.yolo.utils.benchmarks import benchmark from ultralytics.yolo.utils.benchmarks import benchmark
benchmark(model='${{ matrix.model }}-cls.pt', imgsz=160, half=False, hard_fail=0.61) benchmark(model='${{ matrix.model }}-cls.pt', imgsz=160, half=False, hard_fail=0.61)
- name: Benchmark PoseModel
shell: python
run: |
from ultralytics.yolo.utils.benchmarks import benchmark
benchmark(model='${{ matrix.model }}-pose.pt', imgsz=160, half=False, hard_fail=0.0)
- name: Benchmark Summary - name: Benchmark Summary
run: | run: |
cat benchmarks.log cat benchmarks.log
@ -200,30 +161,38 @@ jobs:
python --version python --version
pip --version pip --version
pip list pip list
- name: Test detection - name: Test Detect
shell: bash # for Windows compatibility
run: |
yolo detect train data=coco8.yaml model=yolov8n.yaml epochs=1 imgsz=32
yolo detect train data=coco8.yaml model=yolov8n.pt epochs=1 imgsz=32
yolo detect val data=coco8.yaml model=runs/detect/train/weights/last.pt imgsz=32
yolo detect predict model=runs/detect/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg
yolo export model=runs/detect/train/weights/last.pt imgsz=32 format=torchscript
- name: Test Segment
shell: bash # for Windows compatibility shell: bash # for Windows compatibility
run: | run: |
yolo task=detect mode=train data=coco8.yaml model=yolov8n.yaml epochs=1 imgsz=32 yolo segment train data=coco8-seg.yaml model=yolov8n-seg.yaml epochs=1 imgsz=32
yolo task=detect mode=train data=coco8.yaml model=yolov8n.pt epochs=1 imgsz=32 yolo segment train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=1 imgsz=32
yolo task=detect mode=val data=coco8.yaml model=runs/detect/train/weights/last.pt imgsz=32 yolo segment val data=coco8-seg.yaml model=runs/segment/train/weights/last.pt imgsz=32
yolo task=detect mode=predict model=runs/detect/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg yolo segment predict model=runs/segment/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg
yolo mode=export model=runs/detect/train/weights/last.pt imgsz=32 format=torchscript yolo export model=runs/segment/train/weights/last.pt imgsz=32 format=torchscript
- name: Test segmentation - name: Test Classify
shell: bash # for Windows compatibility shell: bash # for Windows compatibility
run: | run: |
yolo task=segment mode=train data=coco8-seg.yaml model=yolov8n-seg.yaml epochs=1 imgsz=32 yolo classify train data=imagenet10 model=yolov8n-cls.yaml epochs=1 imgsz=32
yolo task=segment mode=train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=1 imgsz=32 yolo classify train data=imagenet10 model=yolov8n-cls.pt epochs=1 imgsz=32
yolo task=segment mode=val data=coco8-seg.yaml model=runs/segment/train/weights/last.pt imgsz=32 yolo classify val data=imagenet10 model=runs/classify/train/weights/last.pt imgsz=32
yolo task=segment mode=predict model=runs/segment/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg yolo classify predict model=runs/classify/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg
yolo mode=export model=runs/segment/train/weights/last.pt imgsz=32 format=torchscript yolo export model=runs/classify/train/weights/last.pt imgsz=32 format=torchscript
- name: Test classification - name: Test Pose
shell: bash # for Windows compatibility shell: bash # for Windows compatibility
run: | run: |
yolo task=classify mode=train data=imagenet10 model=yolov8n-cls.yaml epochs=1 imgsz=32 yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=1 imgsz=32
yolo task=classify mode=train data=imagenet10 model=yolov8n-cls.pt epochs=1 imgsz=32 yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=1 imgsz=32
yolo task=classify mode=val data=imagenet10 model=runs/classify/train/weights/last.pt imgsz=32 yolo pose val data=coco8-pose.yaml model=runs/pose/train/weights/last.pt imgsz=32
yolo task=classify mode=predict model=runs/classify/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg yolo pose predict model=runs/pose/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg
yolo mode=export model=runs/classify/train/weights/last.pt imgsz=32 format=torchscript yolo export model=runs/pose/train/weights/last.pt imgsz=32 format=torchscript
- name: Pytest tests - name: Pytest tests
shell: bash # for Windows compatibility shell: bash # for Windows compatibility
run: pytest tests run: pytest tests

@ -109,7 +109,10 @@ YOLOv8 [Python Docs](https://docs.ultralytics.com/usage/python) for more example
## <div align="center">Models</div> ## <div align="center">Models</div>
All YOLOv8 pretrained models are available here. Detect, Segment and Pose models are pretrained on the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco.yaml) dataset, while Classify models are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/ImageNet.yaml) dataset. All YOLOv8 pretrained models are available here. Detect, Segment and Pose models are pretrained on
the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco.yaml) dataset, while Classify
models are pretrained on
the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/ImageNet.yaml) dataset.
[Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models) download automatically from the latest [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models) download automatically from the latest
Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use. Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use.
@ -174,6 +177,28 @@ See [Classification Docs](https://docs.ultralytics.com/tasks/classify/) for usag
</details> </details>
<details><summary>Pose</summary>
See [Pose Docs](https://docs.ultralytics.com/tasks/) for usage examples with these models.
| Model | size<br><sup>(pixels) | mAP<sup>box<br>50-95 | mAP<sup>pose<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>A100 TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |
| ---------------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | - | 49.7 | - | - | 3.3 | 9.2 |
| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | - | 59.2 | - | - | 11.6 | 30.2 |
| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | - | 63.6 | - | - | 26.4 | 81.0 |
| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | - | 67.0 | - | - | 44.4 | 168.6 |
| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | - | 68.9 | - | - | 69.4 | 263.2 |
| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | - | 71.5 | - | - | 99.1 | 1066.4 |
- **mAP<sup>val</sup>** values are for single-model single-scale on [COCO Keypoints val2017](http://cocodataset.org)
dataset.
<br>Reproduce by `yolo val pose data=coco-pose.yaml device=0`
- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/)
instance.
<br>Reproduce by `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu`
</details>
## <div align="center">Integrations</div> ## <div align="center">Integrations</div>
<br> <br>

@ -163,6 +163,28 @@ Ultralytics [发布页](https://github.com/ultralytics/ultralytics/releases) 自
</details> </details>
<details><summary>Pose</summary>
See [Pose Docs](https://docs.ultralytics.com/tasks/) for usage examples with these models.
| Model | size<br><sup>(pixels) | mAP<sup>box<br>50-95 | mAP<sup>pose<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>A100 TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |
| ---------------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | - | 49.7 | - | - | 3.3 | 9.2 |
| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | - | 59.2 | - | - | 11.6 | 30.2 |
| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | - | 63.6 | - | - | 26.4 | 81.0 |
| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | - | 67.0 | - | - | 44.4 | 168.6 |
| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | - | 68.9 | - | - | 69.4 | 263.2 |
| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | - | 71.5 | - | - | 99.1 | 1066.4 |
- **mAP<sup>val</sup>** values are for single-model single-scale on [COCO Keypoints val2017](http://cocodataset.org)
dataset.
<br>Reproduce by `yolo val pose data=coco-pose.yaml device=0`
- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/)
instance.
<br>Reproduce by `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu`
</details>
## <div align="center">模块集成</div> ## <div align="center">模块集成</div>
<br> <br>

@ -2,7 +2,7 @@
# Builds ultralytics/ultralytics:latest image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics # Builds ultralytics/ultralytics:latest image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics
# Image is CUDA-optimized for YOLOv8 single/multi-GPU training and inference # Image is CUDA-optimized for YOLOv8 single/multi-GPU training and inference
# Start FROM PyTorch image https://hub.docker.com/r/pytorch/pytorch # Start FROM PyTorch image https://hub.docker.com/r/pytorch/pytorch or nvcr.io/nvidia/pytorch:23.03-py3
FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
# Downloads to user config dir # Downloads to user config dir

@ -42,4 +42,4 @@ Since its launch YOLO has been employed in various applications, including auton
YOLOv8 is designed with a strong focus on speed, size, and accuracy, making it a compelling choice for various vision AI tasks. It outperforms previous versions by incorporating innovations like a new backbone network, a new anchor-free split head, and new loss functions. These improvements enable YOLOv8 to deliver superior results, while maintaining a compact size and exceptional speed. YOLOv8 is designed with a strong focus on speed, size, and accuracy, making it a compelling choice for various vision AI tasks. It outperforms previous versions by incorporating innovations like a new backbone network, a new anchor-free split head, and new loss functions. These improvements enable YOLOv8 to deliver superior results, while maintaining a compact size and exceptional speed.
Additionally, YOLOv8 supports a full range of vision AI tasks, including [detection](tasks/detect.md), [segmentation](tasks/segment.md), [pose estimation](tasks/keypoints.md), [tracking](modes/track.md), and [classification](tasks/classify.md). This versatility allows users to leverage YOLOv8's capabilities across diverse applications and domains. Additionally, YOLOv8 supports a full range of vision AI tasks, including [detection](tasks/detect.md), [segmentation](tasks/segment.md), [pose estimation](tasks/pose.md), [tracking](modes/track.md), and [classification](tasks/classify.md). This versatility allows users to leverage YOLOv8's capabilities across diverse applications and domains.

@ -1,7 +1,7 @@
<img width="1024" src="https://github.com/ultralytics/assets/raw/main/yolov8/banner-integrations.png"> <img width="1024" src="https://github.com/ultralytics/assets/raw/main/yolov8/banner-integrations.png">
**Benchmark mode** is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks **Benchmark mode** is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks
provide information on the size of the exported format, its `mAP50-95` metrics (for object detection and segmentation) provide information on the size of the exported format, its `mAP50-95` metrics (for object detection, segmentation and pose)
or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export
formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for
their specific use case based on their requirements for speed and accuracy. their specific use case based on their requirements for speed and accuracy.

@ -54,7 +54,7 @@ for applications such as surveillance systems or self-driving cars.
## [Benchmark](benchmark.md) ## [Benchmark](benchmark.md)
Benchmark mode is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks provide Benchmark mode is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks provide
information on the size of the exported format, its `mAP50-95` metrics (for object detection and segmentation) information on the size of the exported format, its `mAP50-95` metrics (for object detection, segmentation and pose)
or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export
formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for
their specific use case based on their requirements for speed and accuracy. their specific use case based on their requirements for speed and accuracy.

@ -88,6 +88,8 @@ task.
| `box` | `7.5` | box loss gain | | `box` | `7.5` | box loss gain |
| `cls` | `0.5` | cls loss gain (scale with pixels) | | `cls` | `0.5` | cls loss gain (scale with pixels) |
| `dfl` | `1.5` | dfl loss gain | | `dfl` | `1.5` | dfl loss gain |
| `pose` | `12.0` | pose loss gain (pose-only) |
| `kobj` | `2.0` | keypoint obj loss gain (pose-only) |
| `fl_gamma` | `0.0` | focal loss gamma (efficientDet default gamma=1.5) | | `fl_gamma` | `0.0` | focal loss gamma (efficientDet default gamma=1.5) |
| `label_smoothing` | `0.0` | label smoothing (fraction) | | `label_smoothing` | `0.0` | label smoothing (fraction) |
| `nbs` | `64` | nominal batch size | | `nbs` | `64` | nominal batch size |

@ -175,9 +175,9 @@ show_source: false
show_root_toc_entry: false show_root_toc_entry: false
--- ---
## scale_segments ## scale_coords
:::ultralytics.yolo.utils.ops.scale_segments :::ultralytics.yolo.utils.ops.scale_coords
handler: python handler: python
options: options:
show_source: false show_source: false
@ -193,9 +193,9 @@ show_source: false
show_root_toc_entry: false show_root_toc_entry: false
--- ---
## clip_segments ## clip_coords
:::ultralytics.yolo.utils.ops.clip_segments :::ultralytics.yolo.utils.ops.clip_coords
handler: python handler: python
options: options:
show_source: false show_source: false

@ -122,7 +122,7 @@ Use a trained YOLOv8n-cls model to run predictions on images.
yolo classify predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model yolo classify predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model
``` ```
Read more details of `predict` in our [Predict](https://docs.ultralytics.com/modes/predict/) page. See full `predict` mode details in the [Predict](https://docs.ultralytics.com/modes/predict/) page.
## Export ## Export
@ -150,7 +150,7 @@ Export a YOLOv8n-cls model to a different format like ONNX, CoreML, etc.
``` ```
Available YOLOv8-cls export formats are in the table below. You can predict or validate directly on exported models, Available YOLOv8-cls export formats are in the table below. You can predict or validate directly on exported models,
i.e. `yolo predict model=yolov8n-cls.onnx`. i.e. `yolo predict model=yolov8n-cls.onnx`. Usage examples are shown for your model after export completes.
| Format | `format` Argument | Model | Metadata | | Format | `format` Argument | Model | Metadata |
|--------------------------------------------------------------------|-------------------|-------------------------------|----------| |--------------------------------------------------------------------|-------------------|-------------------------------|----------|
@ -167,3 +167,4 @@ i.e. `yolo predict model=yolov8n-cls.onnx`.
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | | [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ |
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | | [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ |
See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page.

@ -123,7 +123,7 @@ Use a trained YOLOv8n model to run predictions on images.
yolo detect predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model yolo detect predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model
``` ```
Read more details of `predict` in our [Predict](https://docs.ultralytics.com/modes/predict/) page. See full `predict` mode details in the [Predict](https://docs.ultralytics.com/modes/predict/) page.
## Export ## Export
@ -151,7 +151,7 @@ Export a YOLOv8n model to a different format like ONNX, CoreML, etc.
``` ```
Available YOLOv8 export formats are in the table below. You can predict or validate directly on exported models, Available YOLOv8 export formats are in the table below. You can predict or validate directly on exported models,
i.e. `yolo predict model=yolov8n.onnx`. i.e. `yolo predict model=yolov8n.onnx`. Usage examples are shown for your model after export completes.
| Format | `format` Argument | Model | Metadata | | Format | `format` Argument | Model | Metadata |
|--------------------------------------------------------------------|-------------------|---------------------------|----------| |--------------------------------------------------------------------|-------------------|---------------------------|----------|
@ -167,3 +167,5 @@ i.e. `yolo predict model=yolov8n.onnx`.
| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | | [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ |
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | | [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ |
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | | [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ |
See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page.

@ -2,7 +2,7 @@
YOLOv8 is an AI framework that supports multiple computer vision **tasks**. The framework can be used to YOLOv8 is an AI framework that supports multiple computer vision **tasks**. The framework can be used to
perform [detection](detect.md), [segmentation](segment.md), [classification](classify.md), perform [detection](detect.md), [segmentation](segment.md), [classification](classify.md),
and [keypoints](keypoints.md) detection. Each of these tasks has a different objective and use case. and [pose](pose.md) estimation. Each of these tasks has a different objective and use case.
<img width="1024" src="https://user-images.githubusercontent.com/26833433/212094133-6bb8c21c-3d47-41df-a512-81c5931054ae.png"> <img width="1024" src="https://user-images.githubusercontent.com/26833433/212094133-6bb8c21c-3d47-41df-a512-81c5931054ae.png">
@ -29,15 +29,13 @@ images based on their content. It uses a variant of the EfficientNet architectur
[Classification Examples](classify.md){ .md-button .md-button--primary} [Classification Examples](classify.md){ .md-button .md-button--primary}
<!-- ## [Pose](pose.md)
## [Keypoints](keypoints.md)
Keypoints detection is a task that involves detecting specific points in an image or video frame. These points are Pose/keypoint detection is a task that involves detecting specific points in an image or video frame. These points are
referred to as keypoints and are used to track movement or pose estimation. YOLOv8 can detect keypoints in an image or referred to as keypoints and are used to track movement or pose estimation. YOLOv8 can detect keypoints in an image or
video frame with high accuracy and speed. video frame with high accuracy and speed.
[Keypoints Examples](keypoints.md){ .md-button .md-button--primary} [Pose Examples](pose.md){ .md-button .md-button--primary}
-->
## Conclusion ## Conclusion

@ -1,149 +0,0 @@
Key Point Estimation is a task that involves identifying the location of specific points in an image, usually referred
to as keypoints. The keypoints can represent various parts of the object such as joints, landmarks, or other distinctive
features. The locations of the keypoints are usually represented as a set of 2D `[x, y]` or 3D `[x, y, visible]`
coordinates.
<img width="1024" src="https://user-images.githubusercontent.com/26833433/212094133-6bb8c21c-3d47-41df-a512-81c5931054ae.png">
The output of a keypoint detector is a set of points that represent the keypoints on the object in the image, usually
along with the confidence scores for each point. Keypoint estimation is a good choice when you need to identify specific
parts of an object in a scene, and their location in relation to each other.
!!! tip "Tip"
YOLOv8 _keypoints_ models use the `-kpts` suffix, i.e. `yolov8n-kpts.pt`. These models are trained on the COCO dataset and are suitable for a variety of keypoint estimation tasks.
[Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models/v8){ .md-button .md-button--primary}
## Train TODO
Train an OpenPose model on a custom dataset of keypoints using the OpenPose framework. For more information on how to
train an OpenPose model on a custom dataset, see the OpenPose Training page.
!!! example ""
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n.yaml') # build a new model from YAML
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
model = YOLO('yolov8n.yaml').load('yolov8n.pt') # build from YAML and transfer weights
# Train the model
model.train(data='coco128.yaml', epochs=100, imgsz=640)
```
=== "CLI"
```bash
# Build a new model from YAML and start training from scratch
yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640
# Start training from a pretrained *.pt model
yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640
# Build a new model from YAML, transfer pretrained weights to it and start training
yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640
```
## Val TODO
Validate trained YOLOv8n model accuracy on the COCO128 dataset. No argument need to passed as the `model` retains it's
training `data` and arguments as model attributes.
!!! example ""
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n.pt') # load an official model
model = YOLO('path/to/best.pt') # load a custom model
# Validate the model
metrics = model.val() # no arguments needed, dataset and settings remembered
metrics.box.map # map50-95
metrics.box.map50 # map50
metrics.box.map75 # map75
metrics.box.maps # a list contains map50-95 of each category
```
=== "CLI"
```bash
yolo detect val model=yolov8n.pt # val official model
yolo detect val model=path/to/best.pt # val custom model
```
## Predict TODO
Use a trained YOLOv8n model to run predictions on images.
!!! example ""
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n.pt') # load an official model
model = YOLO('path/to/best.pt') # load a custom model
# Predict with the model
results = model('https://ultralytics.com/images/bus.jpg') # predict on an image
```
=== "CLI"
```bash
yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model
yolo detect predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model
```
Read more details of `predict` in our [Predict](https://docs.ultralytics.com/modes/predict/) page.
## Export TODO
Export a YOLOv8n model to a different format like ONNX, CoreML, etc.
!!! example ""
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n.pt') # load an official model
model = YOLO('path/to/best.pt') # load a custom trained
# Export the model
model.export(format='onnx')
```
=== "CLI"
```bash
yolo export model=yolov8n.pt format=onnx # export official model
yolo export model=path/to/best.pt format=onnx # export custom trained model
```
Available YOLOv8-pose export formats are in the table below. You can predict or validate directly on exported models,
i.e. `yolo predict model=yolov8n-pose.onnx`.
| Format | `format` Argument | Model | Metadata |
|--------------------------------------------------------------------|-------------------|---------------------------|----------|
| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ |
| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ |
| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ |
| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ |
| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ |
| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlmodel` | ✅ |
| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ |
| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ |
| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ |
| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ |
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ |
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ |

@ -0,0 +1,175 @@
Pose estimation is a task that involves identifying the location of specific points in an image, usually referred
to as keypoints. The keypoints can represent various parts of the object such as joints, landmarks, or other distinctive
features. The locations of the keypoints are usually represented as a set of 2D `[x, y]` or 3D `[x, y, visible]`
coordinates.
<img width="1024" src="https://user-images.githubusercontent.com/26833433/212094133-6bb8c21c-3d47-41df-a512-81c5931054ae.png">
The output of a pose estimation model is a set of points that represent the keypoints on an object in the image, usually
along with the confidence scores for each point. Pose estimation is a good choice when you need to identify specific
parts of an object in a scene, and their location in relation to each other.
!!! tip "Tip"
YOLOv8 _pose_ models use the `-pose` suffix, i.e. `yolov8n-pose.pt`. These models are trained on the [COCO keypoints](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco-pose.yaml) dataset and are suitable for a variety of pose estimation tasks.
## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models/v8)
YOLOv8 pretrained Pose models are shown here. Detect, Segment and Pose models are pretrained on
the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco.yaml) dataset, while Classify
models are pretrained on
the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/ImageNet.yaml) dataset.
[Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models) download automatically from the latest
Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use.
| Model | size<br><sup>(pixels) | mAP<sup>box<br>50-95 | mAP<sup>pose<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>A100 TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |
|------------------------------------------------------------------------------------------------------|-----------------------|----------------------|-----------------------|--------------------------------|-------------------------------------|--------------------|-------------------|
| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | - | 49.7 | - | - | 3.3 | 9.2 |
| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | - | 59.2 | - | - | 11.6 | 30.2 |
| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | - | 63.6 | - | - | 26.4 | 81.0 |
| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | - | 67.0 | - | - | 44.4 | 168.6 |
| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | - | 68.9 | - | - | 69.4 | 263.2 |
| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | - | 71.5 | - | - | 99.1 | 1066.4 |
- **mAP<sup>val</sup>** values are for single-model single-scale on [COCO Keypoints val2017](http://cocodataset.org)
dataset.
<br>Reproduce by `yolo val pose data=coco-pose.yaml device=0`
- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/)
instance.
<br>Reproduce by `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu`
## Train
Train a YOLOv8-pose model on the COCO128-pose dataset.
!!! example ""
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-pose.yaml') # build a new model from YAML
model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training)
model = YOLO('yolov8n-pose.yaml').load('yolov8n-pose.pt') # build from YAML and transfer weights
# Train the model
model.train(data='coco128-pose.yaml', epochs=100, imgsz=640)
```
=== "CLI"
```bash
# Build a new model from YAML and start training from scratch
yolo detect train data=coco128-pose.yaml model=yolov8n-pose.yaml epochs=100 imgsz=640
# Start training from a pretrained *.pt model
yolo detect train data=coco128-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
# Build a new model from YAML, transfer pretrained weights to it and start training
yolo detect train data=coco128-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640
```
## Val
Validate trained YOLOv8n-pose model accuracy on the COCO128-pose dataset. No argument need to passed as the `model`
retains it's
training `data` and arguments as model attributes.
!!! example ""
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-pose.pt') # load an official model
model = YOLO('path/to/best.pt') # load a custom model
# Validate the model
metrics = model.val() # no arguments needed, dataset and settings remembered
metrics.box.map # map50-95
metrics.box.map50 # map50
metrics.box.map75 # map75
metrics.box.maps # a list contains map50-95 of each category
```
=== "CLI"
```bash
yolo pose val model=yolov8n-pose.pt # val official model
yolo pose val model=path/to/best.pt # val custom model
```
## Predict
Use a trained YOLOv8n-pose model to run predictions on images.
!!! example ""
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-pose.pt') # load an official model
model = YOLO('path/to/best.pt') # load a custom model
# Predict with the model
results = model('https://ultralytics.com/images/bus.jpg') # predict on an image
```
=== "CLI"
```bash
yolo pose predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model
yolo pose predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model
```
See full `predict` mode details in the [Predict](https://docs.ultralytics.com/modes/predict/) page.
## Export
Export a YOLOv8n model to a different format like ONNX, CoreML, etc.
!!! example ""
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n.pt') # load an official model
model = YOLO('path/to/best.pt') # load a custom trained
# Export the model
model.export(format='onnx')
```
=== "CLI"
```bash
yolo export model=yolov8n.pt format=onnx # export official model
yolo export model=path/to/best.pt format=onnx # export custom trained model
```
Available YOLOv8-pose export formats are in the table below. You can predict or validate directly on exported models,
i.e. `yolo predict model=yolov8n-pose.onnx`. Usage examples are shown for your model after export completes.
| Format | `format` Argument | Model | Metadata |
|--------------------------------------------------------------------|-------------------|--------------------------------|----------|
| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ |
| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-pose.torchscript` | ✅ |
| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-pose.onnx` | ✅ |
| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ |
| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-pose.engine` | ✅ |
| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-pose.mlmodel` | ✅ |
| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ |
| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-pose.pb` | ❌ |
| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-pose.tflite` | ✅ |
| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ |
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-pose_web_model/` | ✅ |
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ |
See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page.

@ -127,7 +127,7 @@ Use a trained YOLOv8n-seg model to run predictions on images.
yolo segment predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model yolo segment predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model
``` ```
Read more details of `predict` in our [Predict](https://docs.ultralytics.com/modes/predict/) page. See full `predict` mode details in the [Predict](https://docs.ultralytics.com/modes/predict/) page.
## Export ## Export
@ -155,7 +155,7 @@ Export a YOLOv8n-seg model to a different format like ONNX, CoreML, etc.
``` ```
Available YOLOv8-seg export formats are in the table below. You can predict or validate directly on exported models, Available YOLOv8-seg export formats are in the table below. You can predict or validate directly on exported models,
i.e. `yolo predict model=yolov8n-seg.onnx`. i.e. `yolo predict model=yolov8n-seg.onnx`. Usage examples are shown for your model after export completes.
| Format | `format` Argument | Model | Metadata | | Format | `format` Argument | Model | Metadata |
|--------------------------------------------------------------------|-------------------|-------------------------------|----------| |--------------------------------------------------------------------|-------------------|-------------------------------|----------|
@ -172,4 +172,4 @@ i.e. `yolo predict model=yolov8n-seg.onnx`.
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | | [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ |
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | | [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ |
See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page.

@ -110,6 +110,8 @@ The training settings for YOLO models encompass various hyperparameters and conf
| `box` | `7.5` | box loss gain | | `box` | `7.5` | box loss gain |
| `cls` | `0.5` | cls loss gain (scale with pixels) | | `cls` | `0.5` | cls loss gain (scale with pixels) |
| `dfl` | `1.5` | dfl loss gain | | `dfl` | `1.5` | dfl loss gain |
| `pose` | `12.0` | pose loss gain (pose-only) |
| `kobj` | `2.0` | keypoint obj loss gain (pose-only) |
| `fl_gamma` | `0.0` | focal loss gamma (efficientDet default gamma=1.5) | | `fl_gamma` | `0.0` | focal loss gamma (efficientDet default gamma=1.5) |
| `label_smoothing` | `0.0` | label smoothing (fraction) | | `label_smoothing` | `0.0` | label smoothing (fraction) |
| `nbs` | `64` | nominal batch size | | `nbs` | `64` | nominal batch size |

@ -74,7 +74,7 @@ trainer.add_callback("on_train_epoch_end", log_model) # Adds to existing callba
trainer.train() trainer.train()
``` ```
To know more about Callback triggering events and entry point, checkout our Callbacks guide # TODO To know more about Callback triggering events and entry point, checkout our [Callbacks Guide](callbacks.md)
## Other engine components ## Other engine components

@ -59,7 +59,6 @@ accurately predict the classes and locations of objects in an image.
=== "Resume" === "Resume"
```python ```python
# TODO: Resume feature is under development and should be released soon.
model = YOLO("last.pt") model = YOLO("last.pt")
model.train(resume=True) model.train(resume=True)
``` ```

@ -32,11 +32,11 @@
" <a href=\"https://www.kaggle.com/ultralytics/yolov8\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n", " <a href=\"https://www.kaggle.com/ultralytics/yolov8\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
"<br>\n", "<br>\n",
"\n", "\n",
"Welcome to the Ultralytics YOLOv8 🚀 notebook! <a href=\"https://github.com/ultralytics/ultralytics\">YOLOv8</a> is the latest version of the YOLO (You Only Look Once) object detection and image segmentation model developed by <a href=\"https://ultralytics.com\">Ultralytics</a>. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLOv8 and understand its features and capabilities.\n", "Welcome to the Ultralytics YOLOv8 🚀 notebook! <a href=\"https://github.com/ultralytics/ultralytics\">YOLOv8</a> is the latest version of the YOLO (You Only Look Once) AI models developed by <a href=\"https://ultralytics.com\">Ultralytics</a>. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLOv8 and understand its features and capabilities.\n",
"\n", "\n",
"The YOLOv8 models are designed to be fast, accurate, and easy to use, making them an excellent choice for a wide range of object detection and image segmentation tasks. They can be trained on large datasets and are capable of running on a variety of hardware platforms, from CPUs to GPUs.\n", "YOLOv8 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n",
"\n", "\n",
"Whether you are a seasoned machine learning practitioner or new to the field, we hope that the resources in this notebook will help you get the most out of YOLOv8. Please feel free to browse the <a href=\"https://docs.ultralytics.com/\">YOLOv8 Docs</a> and reach out to us with any questions or feedback.\n", "We hope that the resources in this notebook will help you get the most out of YOLOv8. Please browse the YOLOv8 <a href=\"https://docs.ultralytics.com/\">Docs</a> for details, raise an issue on <a href=\"https://github.com/ultralytics/ultralytics\">GitHub</a> for support, and join our <a href=\"https://discord.gg/n6cFeSPZdD\">Discord</a> community for questions and discussions!\n",
"\n", "\n",
"</div>" "</div>"
] ]
@ -66,7 +66,7 @@
"import ultralytics\n", "import ultralytics\n",
"ultralytics.checks()" "ultralytics.checks()"
], ],
"execution_count": 1, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
@ -86,7 +86,7 @@
"source": [ "source": [
"# 1. Predict\n", "# 1. Predict\n",
"\n", "\n",
"YOLOv8 may be used directly in the Command Line Interface (CLI) with a `yolo` command for a variety of tasks and modes and accepts additional arguments, i.e. `imgsz=640`. See a full list of available `yolo` [arguments](https://docs.ultralytics.com/usage/cfg/) in the YOLOv8 [Docs](https://docs.ultralytics.com).\n" "YOLOv8 may be used directly in the Command Line Interface (CLI) with a `yolo` command for a variety of tasks and modes and accepts additional arguments, i.e. `imgsz=640`. See a full list of available `yolo` [arguments](https://docs.ultralytics.com/usage/cfg/) and other details in the [YOLOv8 Predict Docs](https://docs.ultralytics.com/modes/train/).\n"
] ]
}, },
{ {
@ -102,7 +102,7 @@
"# Run inference on an image with YOLOv8n\n", "# Run inference on an image with YOLOv8n\n",
"!yolo predict model=yolov8n.pt source='https://ultralytics.com/images/zidane.jpg'" "!yolo predict model=yolov8n.pt source='https://ultralytics.com/images/zidane.jpg'"
], ],
"execution_count": 3, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
@ -135,7 +135,7 @@
}, },
"source": [ "source": [
"# 2. Val\n", "# 2. Val\n",
"Validate a model's accuracy on the [COCO](https://cocodataset.org/#home) dataset's `val` or `test` splits. The latest YOLOv8 [models](https://github.com/ultralytics/ultralytics#models) are downloaded automatically the first time they are used." "Validate a model's accuracy on the [COCO](https://cocodataset.org/#home) dataset's `val` or `test` splits. The latest YOLOv8 [models](https://github.com/ultralytics/ultralytics#models) are downloaded automatically the first time they are used. See [YOLOv8 Val Docs](https://docs.ultralytics.com/modes/val/) for more information."
] ]
}, },
{ {
@ -165,7 +165,7 @@
"# Validate YOLOv8n on COCO128 val\n", "# Validate YOLOv8n on COCO128 val\n",
"!yolo val model=yolov8n.pt data=coco128.yaml" "!yolo val model=yolov8n.pt data=coco128.yaml"
], ],
"execution_count": 4, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
@ -273,7 +273,7 @@
"\n", "\n",
"<p align=\"\"><a href=\"https://roboflow.com/?ref=ultralytics\"><img width=\"1000\" src=\"https://github.com/ultralytics/assets/raw/main/yolov8/banner-integrations.png\"/></a></p>\n", "<p align=\"\"><a href=\"https://roboflow.com/?ref=ultralytics\"><img width=\"1000\" src=\"https://github.com/ultralytics/assets/raw/main/yolov8/banner-integrations.png\"/></a></p>\n",
"\n", "\n",
"Train YOLOv8 on [Detection](https://docs.ultralytics.com/tasks/detect/), [Segmentation](https://docs.ultralytics.com/tasks/segment/) and [Classification](https://docs.ultralytics.com/tasks/classify/) datasets." "Train YOLOv8 on [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/), [Classify](https://docs.ultralytics.com/tasks/classify/) and [Pose](https://docs.ultralytics.com/tasks/pose/) datasets. See [YOLOv8 Train Docs](https://docs.ultralytics.com/modes/train/) for more information."
] ]
}, },
{ {
@ -289,7 +289,7 @@
"# Train YOLOv8n on COCO128 for 3 epochs\n", "# Train YOLOv8n on COCO128 for 3 epochs\n",
"!yolo train model=yolov8n.pt data=coco128.yaml epochs=3 imgsz=640" "!yolo train model=yolov8n.pt data=coco128.yaml epochs=3 imgsz=640"
], ],
"execution_count": 5, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
@ -449,7 +449,7 @@
"source": [ "source": [
"# 4. Export\n", "# 4. Export\n",
"\n", "\n",
"Export a YOLOv8 model to any supported format with the `format` argument, i.e. `format=onnx`.\n", "Export a YOLOv8 model to any supported format below with the `format` argument, i.e. `format=onnx`. See [YOLOv8 Export Docs](https://docs.ultralytics.com/modes/export/) for more information.\n",
"\n", "\n",
"- 💡 ProTip: Export to [ONNX](https://onnx.ai/) or [OpenVINO](https://docs.openvino.ai/latest/index.html) for up to 3x CPU speedup. \n", "- 💡 ProTip: Export to [ONNX](https://onnx.ai/) or [OpenVINO](https://docs.openvino.ai/latest/index.html) for up to 3x CPU speedup. \n",
"- 💡 ProTip: Export to [TensorRT](https://developer.nvidia.com/tensorrt) for up to 5x GPU speedup.\n", "- 💡 ProTip: Export to [TensorRT](https://developer.nvidia.com/tensorrt) for up to 5x GPU speedup.\n",
@ -487,7 +487,7 @@
"id": "CYIjW4igCjqD", "id": "CYIjW4igCjqD",
"outputId": "49b5bb9d-2c16-415b-c3e7-ec95c15a9e62" "outputId": "49b5bb9d-2c16-415b-c3e7-ec95c15a9e62"
}, },
"execution_count": 6, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
@ -515,7 +515,7 @@
"source": [ "source": [
"# 5. Python Usage\n", "# 5. Python Usage\n",
"\n", "\n",
"YOLOv8 was reimagined using Python-first principles for the most seamless Python YOLO experience yet. YOLOv8 models can be loaded from a trained checkpoint or created from scratch. Then methods are used to train, val, predict, and export the model. See a detailed Python usage examples in the YOLOv8 [Docs](https://docs.ultralytics.com/usage/python/)." "YOLOv8 was reimagined using Python-first principles for the most seamless Python YOLO experience yet. YOLOv8 models can be loaded from a trained checkpoint or created from scratch. Then methods are used to train, val, predict, and export the model. See detailed Python usage examples in the [YOLOv8 Python Docs](https://docs.ultralytics.com/usage/python/)."
], ],
"metadata": { "metadata": {
"id": "kUMOQ0OeDBJG" "id": "kUMOQ0OeDBJG"
@ -547,7 +547,7 @@
"source": [ "source": [
"# 6. Tasks\n", "# 6. Tasks\n",
"\n", "\n",
"YOLOv8 can train, val, predict and export models for the 3 primary tasks in vision AI: detection, segmentation and classification.\n", "YOLOv8 can train, val, predict and export models for the most common tasks in vision AI: [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/), [Classify](https://docs.ultralytics.com/tasks/classify/) and [Pose](https://docs.ultralytics.com/tasks/pose/). See [YOLOv8 Tasks Docs](https://docs.ultralytics.com/tasks/) for more information.\n",
"\n", "\n",
"<img width=\"1024\" src=\"https://user-images.githubusercontent.com/26833433/212094133-6bb8c21c-3d47-41df-a512-81c5931054ae.png\">\n" "<img width=\"1024\" src=\"https://user-images.githubusercontent.com/26833433/212094133-6bb8c21c-3d47-41df-a512-81c5931054ae.png\">\n"
], ],
@ -636,6 +636,33 @@
"execution_count": null, "execution_count": null,
"outputs": [] "outputs": []
}, },
{
"cell_type": "markdown",
"source": [
"## 4. Pose\n",
"\n",
"YOLOv8 _pose_ models use the `-pose` suffix, i.e. `yolov8n-pose.pt` and are pretrained on COCO Keypoints. See [Pose Docs](https://docs.ultralytics.com/tasks/pose/) for full details."
],
"metadata": {
"id": "SpIaFLiO11TG"
}
},
{
"cell_type": "code",
"source": [
"# Load YOLOv8n-pose, train it on COCO8-pose for 3 epochs and predict an image with it\n",
"from ultralytics import YOLO\n",
"\n",
"model = YOLO('yolov8n-pose.pt') # load a pretrained YOLOv8n classification model\n",
"model.train(data='coco8-pose.yaml', epochs=3) # train the model\n",
"model('https://ultralytics.com/images/bus.jpg') # predict on an image"
],
"metadata": {
"id": "si4aKFNg19vX"
},
"execution_count": null,
"outputs": []
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {

@ -65,7 +65,7 @@ extra:
data: 0 data: 0
note: >- note: >-
Thanks for your feedback!<br> Thanks for your feedback!<br>
<a href="https://github.com/ultralytics/ultralytics/issues/new?title=Docs+Feedback+for+{title}+page+at+{url}&labels=enhancement&template=feature-request.yml" target="_blank" rel="noopener">Tell us what we can improve.</a> <a href="https://github.com/ultralytics/ultralytics/issues/new?title=Docs+Feedback+for+{title}+page+at+https://docs.ultralytics.com/{url}&labels=enhancement&template=feature-request.yml" target="_blank" rel="noopener">Tell us what we can improve.</a>
social: social:
- icon: fontawesome/brands/github - icon: fontawesome/brands/github
@ -134,7 +134,7 @@ nav:
- Detect: tasks/detect.md - Detect: tasks/detect.md
- Segment: tasks/segment.md - Segment: tasks/segment.md
- Classify: tasks/classify.md - Classify: tasks/classify.md
# - Keypoints: tasks/keypoints.md - Pose: tasks/pose.md
- Usage: - Usage:
- CLI: usage/cli.md - CLI: usage/cli.md
- Python: usage/python.md - Python: usage/python.md

@ -33,6 +33,10 @@ def test_train_cls():
run(f'yolo train classify model={CFG}-cls.yaml data=imagenet10 imgsz=32 epochs=1') run(f'yolo train classify model={CFG}-cls.yaml data=imagenet10 imgsz=32 epochs=1')
def test_train_pose():
run(f'yolo train pose model={CFG}-pose.yaml data=coco8-pose.yaml imgsz=32 epochs=1')
# Val checks ----------------------------------------------------------------------------------------------------------- # Val checks -----------------------------------------------------------------------------------------------------------
def test_val_detect(): def test_val_detect():
run(f'yolo val detect model={MODEL}.pt data=coco8.yaml imgsz=32') run(f'yolo val detect model={MODEL}.pt data=coco8.yaml imgsz=32')
@ -46,6 +50,10 @@ def test_val_classify():
run(f'yolo val classify model={MODEL}-cls.pt data=imagenet10 imgsz=32') run(f'yolo val classify model={MODEL}-cls.pt data=imagenet10 imgsz=32')
def test_val_pose():
run(f'yolo val pose model={MODEL}-pose.pt data=coco8-pose.yaml imgsz=32')
# Predict checks ------------------------------------------------------------------------------------------------------- # Predict checks -------------------------------------------------------------------------------------------------------
def test_predict_detect(): def test_predict_detect():
run(f"yolo predict model={MODEL}.pt source={ROOT / 'assets'} imgsz=32 save save_crop save_txt") run(f"yolo predict model={MODEL}.pt source={ROOT / 'assets'} imgsz=32 save save_crop save_txt")
@ -63,6 +71,10 @@ def test_predict_classify():
run(f"yolo predict model={MODEL}-cls.pt source={ROOT / 'assets'} imgsz=32 save save_txt") run(f"yolo predict model={MODEL}-cls.pt source={ROOT / 'assets'} imgsz=32 save save_txt")
def test_predict_pose():
run(f"yolo predict model={MODEL}-pose.pt source={ROOT / 'assets'} imgsz=32 save save_txt")
# Export checks -------------------------------------------------------------------------------------------------------- # Export checks --------------------------------------------------------------------------------------------------------
def test_export_detect_torchscript(): def test_export_detect_torchscript():
run(f'yolo export model={MODEL}.pt format=torchscript') run(f'yolo export model={MODEL}.pt format=torchscript')
@ -76,6 +88,10 @@ def test_export_classify_torchscript():
run(f'yolo export model={MODEL}-cls.pt format=torchscript') run(f'yolo export model={MODEL}-cls.pt format=torchscript')
def test_export_classify_pose():
run(f'yolo export model={MODEL}-pose.pt format=torchscript')
def test_export_detect_edgetpu(enabled=False): def test_export_detect_edgetpu(enabled=False):
if enabled and LINUX: if enabled and LINUX:
run(f'yolo export model={MODEL}.pt format=edgetpu') run(f'yolo export model={MODEL}.pt format=edgetpu')

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, GPL-3.0 license # Ultralytics YOLO 🚀, GPL-3.0 license
__version__ = '8.0.65' __version__ = '8.0.66'
from ultralytics.hub import start from ultralytics.hub import start
from ultralytics.yolo.engine.model import YOLO from ultralytics.yolo.engine.model import YOLO

@ -0,0 +1,38 @@
# Ultralytics YOLO 🚀, GPL-3.0 license
# COCO 2017 dataset http://cocodataset.org by Microsoft
# Example usage: yolo train data=coco-pose.yaml
# parent
# ├── ultralytics
# └── datasets
# └── coco-pose ← downloads here (20.1 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco-pose # dataset root dir
train: train2017.txt # train images (relative to 'path') 118287 images
val: val2017.txt # val images (relative to 'path') 5000 images
test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
# Keypoints
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
# Classes
names:
0: person
# Download script/URL (optional)
download: |
from ultralytics.yolo.utils.downloads import download
from pathlib import Path
# Download labels
dir = Path(yaml['path']) # dataset root dir
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
urls = [url + 'coco2017labels-pose.zip'] # labels
download(urls, dir=dir.parent)
# Download data
urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
download(urls, dir=dir / 'images', threads=3)

@ -0,0 +1,25 @@
# Ultralytics YOLO 🚀, GPL-3.0 license
# COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics
# Example usage: yolo train data=coco8-pose.yaml
# parent
# ├── ultralytics
# └── datasets
# └── coco8-pose ← downloads here (1 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco8-pose # dataset root dir
train: images/train # train images (relative to 'path') 4 images
val: images/val # val images (relative to 'path') 4 images
test: # test images (optional)
# Keypoints
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
# Classes
names:
0: person
# Download script/URL (optional)
download: https://ultralytics.com/assets/coco8-pose.zip

@ -44,13 +44,14 @@ Any of these models can be used by loading their configs or pretrained checkpoin
### 1. YOLOv8 ### 1. YOLOv8
**About** - Cutting edge Detection, Segmentation and Classification models developed by Ultralytics. </br> **About** - Cutting edge Detection, Segmentation, Classification and Pose models developed by Ultralytics. </br>
Available Models: Available Models:
- Detection - `yolov8n`, `yolov8s`, `yolov8m`, `yolov8l`, `yolov8x` - Detection - `yolov8n`, `yolov8s`, `yolov8m`, `yolov8l`, `yolov8x`
- Instance Segmentation - `yolov8n-seg`, `yolov8s-seg`, `yolov8m-seg`, `yolov8l-seg`, `yolov8x-seg` - Instance Segmentation - `yolov8n-seg`, `yolov8s-seg`, `yolov8m-seg`, `yolov8l-seg`, `yolov8x-seg`
- Classification - `yolov8n-cls`, `yolov8s-cls`, `yolov8m-cls`, `yolov8l-cls`, `yolov8x-cls` - Classification - `yolov8n-cls`, `yolov8s-cls`, `yolov8m-cls`, `yolov8l-cls`, `yolov8x-cls`
- Pose - `yolov8n-pose`, `yolov8s-pose`, `yolov8m-pose`, `yolov8l-pose`, `yolov8x-pose`, `yolov8x-pose-p6`
<details><summary>Performance</summary> <details><summary>Performance</summary>
@ -84,6 +85,17 @@ Available Models:
| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | | [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 |
| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | | [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 |
### Pose
| Model | size<br><sup>(pixels) | mAP<sup>box<br>50-95 | mAP<sup>pose<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>A100 TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |
| ---------------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | - | 49.7 | - | - | 3.3 | 9.2 |
| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | - | 59.2 | - | - | 11.6 | 30.2 |
| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | - | 63.6 | - | - | 26.4 | 81.0 |
| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | - | 67.0 | - | - | 44.4 | 168.6 |
| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | - | 68.9 | - | - | 69.4 | 263.2 |
| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | - | 71.5 | - | - | 99.1 | 1066.4 |
</details> </details>
### 2. YOLOv5u ### 2. YOLOv5u

@ -0,0 +1,57 @@
# Ultralytics YOLO 🚀, GPL-3.0 license
# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 1 # number of classes
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024]
s: [0.33, 0.50, 1024]
m: [0.67, 0.75, 768]
l: [1.00, 1.00, 512]
x: [1.00, 1.25, 512]
# YOLOv8.0x6 backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]]
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2f, [512, True]]
- [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
- [-1, 3, C2f, [768, True]]
- [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
- [-1, 3, C2f, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 11
# YOLOv8.0x6 head
head:
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 8], 1, Concat, [1]] # cat backbone P5
- [-1, 3, C2, [768, False]] # 14
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2, [512, False]] # 17
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2, [256, False]] # 20 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 17], 1, Concat, [1]] # cat head P4
- [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
- [-1, 1, Conv, [512, 3, 2]]
- [[-1, 14], 1, Concat, [1]] # cat head P5
- [-1, 3, C2, [768, False]] # 26 (P5/32-large)
- [-1, 1, Conv, [768, 3, 2]]
- [[-1, 11], 1, Concat, [1]] # cat head P6
- [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
- [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6)

@ -0,0 +1,47 @@
# Ultralytics YOLO 🚀, GPL-3.0 license
# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
# Parameters
nc: 1 # number of classes
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024]
s: [0.33, 0.50, 1024]
m: [0.67, 0.75, 768]
l: [1.00, 1.00, 512]
x: [1.00, 1.25, 512]
# YOLOv8.0n backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]]
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2f, [512, True]]
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
- [-1, 3, C2f, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 9
# YOLOv8.0n head
head:
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2f, [512]] # 12
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 15 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 12], 1, Concat, [1]] # cat head P4
- [-1, 3, C2f, [512]] # 18 (P4/16-medium)
- [-1, 1, Conv, [512, 3, 2]]
- [[-1, 9], 1, Concat, [1]] # cat head P5
- [-1, 3, C2f, [1024]] # 21 (P5/32-large)
- [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5)

@ -91,8 +91,10 @@ class AutoBackend(nn.Module):
if nn_module: if nn_module:
model = weights.to(device) model = weights.to(device)
model = model.fuse(verbose=verbose) if fuse else model model = model.fuse(verbose=verbose) if fuse else model
names = model.module.names if hasattr(model, 'module') else model.names # get class names if hasattr(model, 'kpt_shape'):
kpt_shape = model.kpt_shape # pose-only
stride = max(int(model.stride.max()), 32) # model stride stride = max(int(model.stride.max()), 32) # model stride
names = model.module.names if hasattr(model, 'module') else model.names # get class names
model.half() if fp16 else model.float() model.half() if fp16 else model.float()
self.model = model # explicitly assign for to(), cpu(), cuda(), half() self.model = model # explicitly assign for to(), cpu(), cuda(), half()
pt = True pt = True
@ -102,6 +104,8 @@ class AutoBackend(nn.Module):
device=device, device=device,
inplace=True, inplace=True,
fuse=fuse) fuse=fuse)
if hasattr(model, 'kpt_shape'):
kpt_shape = model.kpt_shape # pose-only
stride = max(int(model.stride.max()), 32) # model stride stride = max(int(model.stride.max()), 32) # model stride
names = model.module.names if hasattr(model, 'module') else model.names # get class names names = model.module.names if hasattr(model, 'module') else model.names # get class names
model.half() if fp16 else model.float() model.half() if fp16 else model.float()
@ -268,13 +272,14 @@ class AutoBackend(nn.Module):
for k, v in metadata.items(): for k, v in metadata.items():
if k in ('stride', 'batch'): if k in ('stride', 'batch'):
metadata[k] = int(v) metadata[k] = int(v)
elif k in ('imgsz', 'names') and isinstance(v, str): elif k in ('imgsz', 'names', 'kpt_shape') and isinstance(v, str):
metadata[k] = eval(v) metadata[k] = eval(v)
stride = metadata['stride'] stride = metadata['stride']
task = metadata['task'] task = metadata['task']
batch = metadata['batch'] batch = metadata['batch']
imgsz = metadata['imgsz'] imgsz = metadata['imgsz']
names = metadata['names'] names = metadata['names']
kpt_shape = metadata.get('kpt_shape')
elif not (pt or triton or nn_module): elif not (pt or triton or nn_module):
LOGGER.warning(f"WARNING ⚠️ Metadata not found for 'model={weights}'") LOGGER.warning(f"WARNING ⚠️ Metadata not found for 'model={weights}'")

@ -378,7 +378,9 @@ class Ensemble(nn.ModuleList):
return y, None # inference, train output return y, None # inference, train output
# heads # Model heads below ----------------------------------------------------------------------------------------------------
class Detect(nn.Module): class Detect(nn.Module):
# YOLOv8 Detect head for detection models # YOLOv8 Detect head for detection models
dynamic = False # force grid reconstruction dynamic = False # force grid reconstruction
@ -394,7 +396,6 @@ class Detect(nn.Module):
self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x) self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
self.no = nc + self.reg_max * 4 # number of outputs per anchor self.no = nc + self.reg_max * 4 # number of outputs per anchor
self.stride = torch.zeros(self.nl) # strides computed during build self.stride = torch.zeros(self.nl) # strides computed during build
c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc) # channels c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc) # channels
self.cv2 = nn.ModuleList( self.cv2 = nn.ModuleList(
nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch) nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
@ -454,6 +455,36 @@ class Segment(Detect):
return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p)) return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
class Pose(Detect):
# YOLOv8 Pose head for keypoints models
def __init__(self, nc=80, kpt_shape=(17, 3), ch=()):
super().__init__(nc, ch)
self.kpt_shape = kpt_shape # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
self.nk = kpt_shape[0] * kpt_shape[1] # number of keypoints total
self.detect = Detect.forward
c4 = max(ch[0] // 4, self.nk)
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)
def forward(self, x):
bs = x[0].shape[0] # batch size
kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w)
x = self.detect(self, x)
if self.training:
return x, kpt
pred_kpt = self.kpts_decode(kpt)
return torch.cat([x, pred_kpt], 1) if self.export else (torch.cat([x[0], pred_kpt], 1), (x[1], kpt))
def kpts_decode(self, kpts):
ndim = self.kpt_shape[1]
y = kpts.clone()
if ndim == 3:
y[:, 2::3].sigmoid_() # inplace sigmoid
y[:, 0::ndim] = (y[:, 0::ndim] * 2.0 + (self.anchors[0] - 0.5)) * self.strides
y[:, 1::ndim] = (y[:, 1::ndim] * 2.0 + (self.anchors[1] - 0.5)) * self.strides
return y
class Classify(nn.Module): class Classify(nn.Module):
# YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2) # YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups

@ -10,7 +10,7 @@ import torch.nn as nn
from ultralytics.nn.modules import (C1, C2, C3, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, Classify, from ultralytics.nn.modules import (C1, C2, C3, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, Classify,
Concat, Conv, ConvTranspose, Detect, DWConv, DWConvTranspose2d, Ensemble, Focus, Concat, Conv, ConvTranspose, Detect, DWConv, DWConvTranspose2d, Ensemble, Focus,
GhostBottleneck, GhostConv, Segment) GhostBottleneck, GhostConv, Pose, Segment)
from ultralytics.yolo.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load from ultralytics.yolo.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load
from ultralytics.yolo.utils.checks import check_requirements, check_suffix, check_yaml from ultralytics.yolo.utils.checks import check_requirements, check_suffix, check_yaml
from ultralytics.yolo.utils.torch_utils import (fuse_conv_and_bn, fuse_deconv_and_bn, initialize_weights, from ultralytics.yolo.utils.torch_utils import (fuse_conv_and_bn, fuse_deconv_and_bn, initialize_weights,
@ -183,10 +183,10 @@ class DetectionModel(BaseModel):
# Build strides # Build strides
m = self.model[-1] # Detect() m = self.model[-1] # Detect()
if isinstance(m, (Detect, Segment)): if isinstance(m, (Detect, Segment, Pose)):
s = 256 # 2x min stride s = 256 # 2x min stride
m.inplace = self.inplace m.inplace = self.inplace
forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x) forward = lambda x: self.forward(x)[0] if isinstance(m, (Segment, Pose)) else self.forward(x)
m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward
self.stride = m.stride self.stride = m.stride
m.bias_init() # only run once m.bias_init() # only run once
@ -242,12 +242,23 @@ class DetectionModel(BaseModel):
class SegmentationModel(DetectionModel): class SegmentationModel(DetectionModel):
# YOLOv8 segmentation model # YOLOv8 segmentation model
def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True): def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True):
super().__init__(cfg, ch, nc, verbose) super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
def _forward_augment(self, x): def _forward_augment(self, x):
raise NotImplementedError(emojis('WARNING ⚠️ SegmentationModel has not supported augment inference yet!')) raise NotImplementedError(emojis('WARNING ⚠️ SegmentationModel has not supported augment inference yet!'))
class PoseModel(DetectionModel):
# YOLOv8 pose model
def __init__(self, cfg='yolov8n-pose.yaml', ch=3, nc=None, data_kpt_shape=(None, None), verbose=True):
if not isinstance(cfg, dict):
cfg = yaml_model_load(cfg) # load model YAML
if any(data_kpt_shape) and list(data_kpt_shape) != list(cfg['kpt_shape']):
LOGGER.info(f"Overriding model.yaml kpt_shape={cfg['kpt_shape']} with kpt_shape={data_kpt_shape}")
cfg['kpt_shape'] = data_kpt_shape
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
class ClassificationModel(BaseModel): class ClassificationModel(BaseModel):
# YOLOv8 classification model # YOLOv8 classification model
def __init__(self, def __init__(self,
@ -425,7 +436,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
# Args # Args
max_channels = float('inf') max_channels = float('inf')
nc, act, scales = (d.get(x) for x in ('nc', 'act', 'scales')) nc, act, scales = (d.get(x) for x in ('nc', 'act', 'scales'))
depth, width = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple')) depth, width, kpt_shape = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple', 'kpt_shape'))
if scales: if scales:
scale = d.get('scale') scale = d.get('scale')
if not scale: if not scale:
@ -464,7 +475,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
args = [ch[f]] args = [ch[f]]
elif m is Concat: elif m is Concat:
c2 = sum(ch[x] for x in f) c2 = sum(ch[x] for x in f)
elif m in (Detect, Segment): elif m in (Detect, Segment, Pose):
args.append([ch[x] for x in f]) args.append([ch[x] for x in f])
if m is Segment: if m is Segment:
args[2] = make_divisible(min(args[2], max_channels) * width, 8) args[2] = make_divisible(min(args[2], max_channels) * width, 8)
@ -543,6 +554,8 @@ def guess_model_task(model):
return 'detect' return 'detect'
if m == 'segment': if m == 'segment':
return 'segment' return 'segment'
if m == 'pose':
return 'pose'
# Guess from model cfg # Guess from model cfg
if isinstance(model, dict): if isinstance(model, dict):
@ -565,6 +578,8 @@ def guess_model_task(model):
return 'segment' return 'segment'
elif isinstance(m, Classify): elif isinstance(m, Classify):
return 'classify' return 'classify'
elif isinstance(m, Pose):
return 'pose'
# Guess from model filename # Guess from model filename
if isinstance(model, (str, Path)): if isinstance(model, (str, Path)):
@ -573,10 +588,12 @@ def guess_model_task(model):
return 'segment' return 'segment'
elif '-cls' in model.stem or 'classify' in model.parts: elif '-cls' in model.stem or 'classify' in model.parts:
return 'classify' return 'classify'
elif '-pose' in model.stem or 'pose' in model.parts:
return 'pose'
elif 'detect' in model.parts: elif 'detect' in model.parts:
return 'detect' return 'detect'
# Unable to determine task from model # Unable to determine task from model
LOGGER.warning("WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. " LOGGER.warning("WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. "
"Explicitly define task for your model, i.e. 'task=detect', 'task=segment' or 'task=classify'.") "Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify', or 'pose'.")
return 'detect' # assume detect return 'detect' # assume detect

@ -33,10 +33,9 @@ def on_predict_postprocess_end(predictor):
tracks = predictor.trackers[i].update(det, im0s[i]) tracks = predictor.trackers[i].update(det, im0s[i])
if len(tracks) == 0: if len(tracks) == 0:
continue continue
predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1]))
if predictor.results[i].masks is not None:
idx = tracks[:, -1].tolist() idx = tracks[:, -1].tolist()
predictor.results[i].masks = predictor.results[i].masks[idx] predictor.results[i] = predictor.results[i][idx]
predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1]))
def register_tracker(model): def register_tracker(model):

@ -18,13 +18,13 @@ TASKS = 'detect', 'segment', 'classify', 'pose'
TASK2DATA = { TASK2DATA = {
'detect': 'coco128.yaml', 'detect': 'coco128.yaml',
'segment': 'coco128-seg.yaml', 'segment': 'coco128-seg.yaml',
'pose': 'coco128-pose.yaml', 'classify': 'imagenet100',
'classify': 'imagenet100'} 'pose': 'coco128-pose.yaml'}
TASK2MODEL = { TASK2MODEL = {
'detect': 'yolov8n.pt', 'detect': 'yolov8n.pt',
'segment': 'yolov8n-seg.pt', 'segment': 'yolov8n-seg.pt',
'pose': 'yolov8n-pose.yaml', 'classify': 'yolov8n-cls.pt',
'classify': 'yolov8n-cls.pt'} # temp 'pose': 'yolov8n-pose.yaml'}
CLI_HELP_MSG = \ CLI_HELP_MSG = \
f""" f"""

@ -88,6 +88,8 @@ warmup_bias_lr: 0.1 # warmup initial bias lr
box: 7.5 # box loss gain box: 7.5 # box loss gain
cls: 0.5 # cls loss gain (scale with pixels) cls: 0.5 # cls loss gain (scale with pixels)
dfl: 1.5 # dfl loss gain dfl: 1.5 # dfl loss gain
pose: 12.0 # pose loss gain
kobj: 1.0 # keypoint obj loss gain
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
label_smoothing: 0.0 # label smoothing (fraction) label_smoothing: 0.0 # label smoothing (fraction)
nbs: 64 # nominal batch size nbs: 64 # nominal batch size

@ -16,6 +16,8 @@ from ..utils.metrics import bbox_ioa
from ..utils.ops import segment2box from ..utils.ops import segment2box
from .utils import polygons2masks, polygons2masks_overlap from .utils import polygons2masks, polygons2masks_overlap
POSE_FLIPLR_INDEX = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
# TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic # TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
class BaseTransform: class BaseTransform:
@ -309,27 +311,22 @@ class RandomPerspective:
"""apply affine to keypoints. """apply affine to keypoints.
Args: Args:
keypoints(ndarray): keypoints, [N, 17, 2]. keypoints(ndarray): keypoints, [N, 17, 3].
M(ndarray): affine matrix. M(ndarray): affine matrix.
Return: Return:
new_keypoints(ndarray): keypoints after affine, [N, 17, 2]. new_keypoints(ndarray): keypoints after affine, [N, 17, 3].
""" """
n = len(keypoints) n, nkpt = keypoints.shape[:2]
if n == 0: if n == 0:
return keypoints return keypoints
new_keypoints = np.ones((n * 17, 3)) xy = np.ones((n * nkpt, 3))
new_keypoints[:, :2] = keypoints.reshape(n * 17, 2) # num_kpt is hardcoded to 17 visible = keypoints[..., 2].reshape(n * nkpt, 1)
new_keypoints = new_keypoints @ M.T # transform xy[:, :2] = keypoints[..., :2].reshape(n * nkpt, 2)
new_keypoints = (new_keypoints[:, :2] / new_keypoints[:, 2:3]).reshape(n, 34) # perspective rescale or affine xy = xy @ M.T # transform
new_keypoints[keypoints.reshape(-1, 34) == 0] = 0 xy = xy[:, :2] / xy[:, 2:3] # perspective rescale or affine
x_kpts = new_keypoints[:, list(range(0, 34, 2))] out_mask = (xy[:, 0] < 0) | (xy[:, 1] < 0) | (xy[:, 0] > self.size[0]) | (xy[:, 1] > self.size[1])
y_kpts = new_keypoints[:, list(range(1, 34, 2))] visible[out_mask] = 0
return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3)
x_kpts[np.logical_or.reduce((x_kpts < 0, x_kpts > self.size[0], y_kpts < 0, y_kpts > self.size[1]))] = 0
y_kpts[np.logical_or.reduce((x_kpts < 0, x_kpts > self.size[0], y_kpts < 0, y_kpts > self.size[1]))] = 0
new_keypoints[:, list(range(0, 34, 2))] = x_kpts
new_keypoints[:, list(range(1, 34, 2))] = y_kpts
return new_keypoints.reshape(n, 17, 2)
def __call__(self, labels): def __call__(self, labels):
""" """
@ -415,12 +412,13 @@ class RandomHSV:
class RandomFlip: class RandomFlip:
def __init__(self, p=0.5, direction='horizontal') -> None: def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}' assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
assert 0 <= p <= 1.0 assert 0 <= p <= 1.0
self.p = p self.p = p
self.direction = direction self.direction = direction
self.flip_idx = flip_idx
def __call__(self, labels): def __call__(self, labels):
img = labels['img'] img = labels['img']
@ -437,6 +435,9 @@ class RandomFlip:
if self.direction == 'horizontal' and random.random() < self.p: if self.direction == 'horizontal' and random.random() < self.p:
img = np.fliplr(img) img = np.fliplr(img)
instances.fliplr(w) instances.fliplr(w)
# for keypoints
if self.flip_idx is not None and instances.keypoints is not None:
instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
labels['img'] = np.ascontiguousarray(img) labels['img'] = np.ascontiguousarray(img)
labels['instances'] = instances labels['instances'] = instances
return labels return labels
@ -633,7 +634,7 @@ class Format:
labels['cls'] = torch.from_numpy(cls) if nl else torch.zeros(nl) labels['cls'] = torch.from_numpy(cls) if nl else torch.zeros(nl)
labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4)) labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
if self.return_keypoint: if self.return_keypoint:
labels['keypoints'] = torch.from_numpy(instances.keypoints) if nl else torch.zeros((nl, 17, 2)) labels['keypoints'] = torch.from_numpy(instances.keypoints)
# then we can use collate_fn # then we can use collate_fn
if self.batch_idx: if self.batch_idx:
labels['batch_idx'] = torch.zeros(nl) labels['batch_idx'] = torch.zeros(nl)
@ -672,13 +673,17 @@ def v8_transforms(dataset, imgsz, hyp):
perspective=hyp.perspective, perspective=hyp.perspective,
pre_transform=LetterBox(new_shape=(imgsz, imgsz)), pre_transform=LetterBox(new_shape=(imgsz, imgsz)),
)]) )])
flip_idx = dataset.data.get('flip_idx', None) # for keypoints augmentation
if dataset.use_keypoints and flip_idx is None and hyp.fliplr > 0.0:
hyp.fliplr = 0.0
LOGGER.warning("WARNING ⚠️ No `flip_idx` provided while training keypoints, setting augmentation 'fliplr=0.0'")
return Compose([ return Compose([
pre_transform, pre_transform,
MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup), MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup),
Albumentations(p=1.0), Albumentations(p=1.0),
RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v), RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
RandomFlip(direction='vertical', p=hyp.flipud), RandomFlip(direction='vertical', p=hyp.flipud),
RandomFlip(direction='horizontal', p=hyp.fliplr)]) # transforms RandomFlip(direction='horizontal', p=hyp.fliplr, flip_idx=flip_idx)]) # transforms
# Classification augmentations ----------------------------------------------------------------------------------------- # Classification augmentations -----------------------------------------------------------------------------------------

@ -61,7 +61,7 @@ def seed_worker(worker_id): # noqa
random.seed(worker_seed) random.seed(worker_seed)
def build_dataloader(cfg, batch, img_path, stride=32, rect=False, names=None, rank=-1, mode='train'): def build_dataloader(cfg, batch, img_path, data_info, stride=32, rect=False, rank=-1, mode='train'):
assert mode in ['train', 'val'] assert mode in ['train', 'val']
shuffle = mode == 'train' shuffle = mode == 'train'
if cfg.rect and shuffle: if cfg.rect and shuffle:
@ -81,9 +81,9 @@ def build_dataloader(cfg, batch, img_path, stride=32, rect=False, names=None, ra
pad=0.0 if mode == 'train' else 0.5, pad=0.0 if mode == 'train' else 0.5,
prefix=colorstr(f'{mode}: '), prefix=colorstr(f'{mode}: '),
use_segments=cfg.task == 'segment', use_segments=cfg.task == 'segment',
use_keypoints=cfg.task == 'keypoint', use_keypoints=cfg.task == 'pose',
names=names, classes=cfg.classes,
classes=cfg.classes) data=data_info)
batch = min(batch, len(dataset)) batch = min(batch, len(dataset))
nd = torch.cuda.device_count() # number of CUDA devices nd = torch.cuda.device_count() # number of CUDA devices

@ -57,11 +57,11 @@ class YOLODataset(BaseDataset):
single_cls=False, single_cls=False,
use_segments=False, use_segments=False,
use_keypoints=False, use_keypoints=False,
names=None, data=None,
classes=None): classes=None):
self.use_segments = use_segments self.use_segments = use_segments
self.use_keypoints = use_keypoints self.use_keypoints = use_keypoints
self.names = names self.data = data
assert not (self.use_segments and self.use_keypoints), 'Can not use both segments and keypoints.' assert not (self.use_segments and self.use_keypoints), 'Can not use both segments and keypoints.'
super().__init__(img_path, imgsz, cache, augment, hyp, prefix, rect, batch_size, stride, pad, single_cls, super().__init__(img_path, imgsz, cache, augment, hyp, prefix, rect, batch_size, stride, pad, single_cls,
classes) classes)
@ -77,10 +77,16 @@ class YOLODataset(BaseDataset):
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
desc = f'{self.prefix}Scanning {path.parent / path.stem}...' desc = f'{self.prefix}Scanning {path.parent / path.stem}...'
total = len(self.im_files) total = len(self.im_files)
nc = len(self.data['names'])
nkpt, ndim = self.data.get('kpt_shape', (0, 0))
if self.use_keypoints and (nkpt <= 0 or ndim not in (2, 3)):
raise ValueError("'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of "
"keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'")
with ThreadPool(NUM_THREADS) as pool: with ThreadPool(NUM_THREADS) as pool:
results = pool.imap(func=verify_image_label, results = pool.imap(func=verify_image_label,
iterable=zip(self.im_files, self.label_files, repeat(self.prefix), iterable=zip(self.im_files, self.label_files, repeat(self.prefix),
repeat(self.use_keypoints), repeat(len(self.names)))) repeat(self.use_keypoints), repeat(len(self.data['names'])), repeat(nkpt),
repeat(ndim)))
pbar = tqdm(results, desc=desc, total=total, bar_format=TQDM_BAR_FORMAT) pbar = tqdm(results, desc=desc, total=total, bar_format=TQDM_BAR_FORMAT)
for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar: for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar:
nm += nm_f nm += nm_f

@ -6,10 +6,10 @@ import json
import os import os
import subprocess import subprocess
import time import time
import zipfile
from multiprocessing.pool import ThreadPool from multiprocessing.pool import ThreadPool
from pathlib import Path from pathlib import Path
from tarfile import is_tarfile from tarfile import is_tarfile
from zipfile import is_zipfile
import cv2 import cv2
import numpy as np import numpy as np
@ -61,7 +61,7 @@ def exif_size(img):
def verify_image_label(args): def verify_image_label(args):
# Verify one image-label pair # Verify one image-label pair
im_file, lb_file, prefix, keypoint, num_cls = args im_file, lb_file, prefix, keypoint, num_cls, nkpt, ndim = args
# number (missing, found, empty, corrupt), message, segments, keypoints # number (missing, found, empty, corrupt), message, segments, keypoints
nm, nf, ne, nc, msg, segments, keypoints = 0, 0, 0, 0, '', [], None nm, nf, ne, nc, msg, segments, keypoints = 0, 0, 0, 0, '', [], None
try: try:
@ -92,25 +92,19 @@ def verify_image_label(args):
nl = len(lb) nl = len(lb)
if nl: if nl:
if keypoint: if keypoint:
assert lb.shape[1] == 56, 'labels require 56 columns each' assert lb.shape[1] == (5 + nkpt * ndim), f'labels require {(5 + nkpt * ndim)} columns each'
assert (lb[:, 5::3] <= 1).all(), 'non-normalized or out of bounds coordinate labels' assert (lb[:, 5::ndim] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
assert (lb[:, 6::3] <= 1).all(), 'non-normalized or out of bounds coordinate labels' assert (lb[:, 6::ndim] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
kpts = np.zeros((lb.shape[0], 39))
for i in range(len(lb)):
kpt = np.delete(lb[i, 5:], np.arange(2, lb.shape[1] - 5, 3)) # remove occlusion param from GT
kpts[i] = np.hstack((lb[i, :5], kpt))
lb = kpts
assert lb.shape[1] == 39, 'labels require 39 columns each after removing occlusion parameter'
else: else:
assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected' assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected'
assert (lb[:, 1:] <= 1).all(), \ assert (lb[:, 1:] <= 1).all(), \
f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}' f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}'
assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}'
# All labels # All labels
max_cls = int(lb[:, 0].max()) # max label count max_cls = int(lb[:, 0].max()) # max label count
assert max_cls <= num_cls, \ assert max_cls <= num_cls, \
f'Label class {max_cls} exceeds dataset class count {num_cls}. ' \ f'Label class {max_cls} exceeds dataset class count {num_cls}. ' \
f'Possible class labels are 0-{num_cls - 1}' f'Possible class labels are 0-{num_cls - 1}'
assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}'
_, i = np.unique(lb, axis=0, return_index=True) _, i = np.unique(lb, axis=0, return_index=True)
if len(i) < nl: # duplicate row check if len(i) < nl: # duplicate row check
lb = lb[i] # remove duplicates lb = lb[i] # remove duplicates
@ -119,12 +113,18 @@ def verify_image_label(args):
msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed' msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed'
else: else:
ne = 1 # label empty ne = 1 # label empty
lb = np.zeros((0, 39), dtype=np.float32) if keypoint else np.zeros((0, 5), dtype=np.float32) lb = np.zeros((0, (5 + nkpt * ndim)), dtype=np.float32) if keypoint else np.zeros(
(0, 5), dtype=np.float32)
else: else:
nm = 1 # label missing nm = 1 # label missing
lb = np.zeros((0, 39), dtype=np.float32) if keypoint else np.zeros((0, 5), dtype=np.float32) lb = np.zeros((0, (5 + nkpt * ndim)), dtype=np.float32) if keypoint else np.zeros((0, 5), dtype=np.float32)
if keypoint: if keypoint:
keypoints = lb[:, 5:].reshape(-1, 17, 2) keypoints = lb[:, 5:].reshape(-1, nkpt, ndim)
if ndim == 2:
kpt_mask = np.ones(keypoints.shape[:2], dtype=np.float32)
kpt_mask = np.where(keypoints[..., 0] < 0, 0.0, kpt_mask)
kpt_mask = np.where(keypoints[..., 1] < 0, 0.0, kpt_mask)
keypoints = np.concatenate([keypoints, kpt_mask[..., None]], axis=-1) # (nl, nkpt, 3)
lb = lb[:, :5] lb = lb[:, :5]
return im_file, lb, shape, segments, keypoints, nm, nf, ne, nc, msg return im_file, lb, shape, segments, keypoints, nm, nf, ne, nc, msg
except Exception as e: except Exception as e:
@ -195,7 +195,7 @@ def check_det_dataset(dataset, autodownload=True):
# Download (optional) # Download (optional)
extract_dir = '' extract_dir = ''
if isinstance(data, (str, Path)) and (is_zipfile(data) or is_tarfile(data)): if isinstance(data, (str, Path)) and (zipfile.is_zipfile(data) or is_tarfile(data)):
new_dir = safe_download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False) new_dir = safe_download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False)
data = next((DATASETS_DIR / new_dir).rglob('*.yaml')) data = next((DATASETS_DIR / new_dir).rglob('*.yaml'))
extract_dir, autodownload = data.parent, False extract_dir, autodownload = data.parent, False
@ -356,23 +356,8 @@ class HUBDatasetStats():
assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/' assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/'
return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path
def _hub_ops(self, f, max_dim=1920): def _hub_ops(self, f):
# HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing compress_one_image(f, self.im_dir / Path(f).name) # save to dataset-hub
f_new = self.im_dir / Path(f).name # dataset-hub image filename
try: # use PIL
im = Image.open(f)
r = max_dim / max(im.height, im.width) # ratio
if r < 1.0: # image too large
im = im.resize((int(im.width * r), int(im.height * r)))
im.save(f_new, 'JPEG', quality=50, optimize=True) # save
except Exception as e: # use OpenCV
LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}')
im = cv2.imread(f)
im_height, im_width = im.shape[:2]
r = max_dim / max(im_height, im_width) # ratio
if r < 1.0: # image too large
im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA)
cv2.imwrite(str(f_new), im)
def get_json(self, save=False, verbose=False): def get_json(self, save=False, verbose=False):
# Return dataset JSON for Ultralytics HUB # Return dataset JSON for Ultralytics HUB
@ -426,3 +411,93 @@ class HUBDatasetStats():
pass pass
LOGGER.info(f'Done. All images saved to {self.im_dir}') LOGGER.info(f'Done. All images saved to {self.im_dir}')
return self.im_dir return self.im_dir
def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
"""
Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the
Python Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will
not be resized.
Args:
f (str): The path to the input image file.
f_new (str, optional): The path to the output image file. If not specified, the input file will be overwritten.
max_dim (int, optional): The maximum dimension (width or height) of the output image. Default is 1920 pixels.
quality (int, optional): The image compression quality as a percentage. Default is 50%.
Returns:
None
Usage:
from pathlib import Path
from ultralytics.yolo.data.utils import compress_one_image
for f in Path('/Users/glennjocher/Downloads/dataset').rglob('*.jpg'):
compress_one_image(f)
"""
try: # use PIL
im = Image.open(f)
r = max_dim / max(im.height, im.width) # ratio
if r < 1.0: # image too large
im = im.resize((int(im.width * r), int(im.height * r)))
im.save(f_new or f, 'JPEG', quality=quality, optimize=True) # save
except Exception as e: # use OpenCV
LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}')
im = cv2.imread(f)
im_height, im_width = im.shape[:2]
r = max_dim / max(im_height, im_width) # ratio
if r < 1.0: # image too large
im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA)
cv2.imwrite(str(f_new or f), im)
def delete_dsstore(path):
"""
Deletes all ".DS_store" files under a specified directory.
Args:
path (str, optional): The directory path where the ".DS_store" files should be deleted.
Returns:
None
Usage:
from ultralytics.yolo.data.utils import delete_dsstore
delete_dsstore('/Users/glennjocher/Downloads/dataset')
Note:
".DS_store" files are created by the Apple operating system and contain metadata about folders and files. They
are hidden system files and can cause issues when transferring files between different operating systems.
"""
# Delete Apple .DS_store files
files = list(Path(path).rglob('.DS_store'))
LOGGER.info(f'Deleting *.DS_store files: {files}')
for f in files:
f.unlink()
def zip_directory(dir, use_zipfile_library=True):
"""Zips a directory and saves the archive to the specified output path.
Args:
dir (str): The path to the directory to be zipped.
use_zipfile_library (bool): Whether to use zipfile library or shutil for zipping.
Returns:
None
Usage:
from ultralytics.yolo.data.utils import zip_directory
zip_directory('/Users/glennjocher/Downloads/playground')
zip -r coco8-pose.zip coco8-pose
"""
delete_dsstore(dir)
if use_zipfile_library:
dir = Path(dir)
with zipfile.ZipFile(dir.with_suffix('.zip'), 'w', zipfile.ZIP_DEFLATED) as zip_file:
for file_path in dir.glob('**/*'):
if file_path.is_file():
zip_file.write(file_path, file_path.relative_to(dir))
else:
import shutil
shutil.make_archive(dir, 'zip', dir)

@ -209,8 +209,8 @@ class Exporter:
self.file = file self.file = file
self.output_shape = tuple(y.shape) if isinstance(y, torch.Tensor) else tuple(tuple(x.shape) for x in y) self.output_shape = tuple(y.shape) if isinstance(y, torch.Tensor) else tuple(tuple(x.shape) for x in y)
self.pretty_name = Path(self.model.yaml.get('yaml_file', self.file)).stem.replace('yolo', 'YOLO') self.pretty_name = Path(self.model.yaml.get('yaml_file', self.file)).stem.replace('yolo', 'YOLO')
description = f'Ultralytics {self.pretty_name} model ' + f'trained on {Path(self.args.data).name}' \ trained_on = f'trained on {Path(self.args.data).name}' if self.args.data else '(untrained)'
if self.args.data else '(untrained)' description = f'Ultralytics {self.pretty_name} model {trained_on}'
self.metadata = { self.metadata = {
'description': description, 'description': description,
'author': 'Ultralytics', 'author': 'Ultralytics',
@ -221,6 +221,8 @@ class Exporter:
'batch': self.args.batch, 'batch': self.args.batch,
'imgsz': self.imgsz, 'imgsz': self.imgsz,
'names': model.names} # model metadata 'names': model.names} # model metadata
if model.task == 'pose':
self.metadata['kpt_shape'] = model.kpt_shape
LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} with input shape {tuple(im.shape)} BCHW and " LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} with input shape {tuple(im.shape)} BCHW and "
f'output shape(s) {self.output_shape} ({file_size(file):.1f} MB)') f'output shape(s) {self.output_shape} ({file_size(file):.1f} MB)')
@ -295,7 +297,8 @@ class Exporter:
check_requirements(requirements) check_requirements(requirements)
import onnx # noqa import onnx # noqa
LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...') opset_version = self.args.opset or get_latest_opset()
LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__} opset {opset_version}...')
f = str(self.file.with_suffix('.onnx')) f = str(self.file.with_suffix('.onnx'))
output_names = ['output0', 'output1'] if isinstance(self.model, SegmentationModel) else ['output0'] output_names = ['output0', 'output1'] if isinstance(self.model, SegmentationModel) else ['output0']
@ -313,7 +316,7 @@ class Exporter:
self.im.cpu() if dynamic else self.im, self.im.cpu() if dynamic else self.im,
f, f,
verbose=False, verbose=False,
opset_version=self.args.opset or get_latest_opset(), opset_version=opset_version,
do_constant_folding=True, # WARNING: DNN inference with torch>=1.12 may require do_constant_folding=False do_constant_folding=True, # WARNING: DNN inference with torch>=1.12 may require do_constant_folding=False
input_names=['images'], input_names=['images'],
output_names=output_names, output_names=output_names,
@ -377,7 +380,6 @@ class Exporter:
yaml_save(Path(f) / 'metadata.yaml', self.metadata) # add metadata.yaml yaml_save(Path(f) / 'metadata.yaml', self.metadata) # add metadata.yaml
return f, None return f, None
@try_export
def _export_coreml(self, prefix=colorstr('CoreML:')): def _export_coreml(self, prefix=colorstr('CoreML:')):
# YOLOv8 CoreML export # YOLOv8 CoreML export
check_requirements('coremltools>=6.0') check_requirements('coremltools>=6.0')
@ -410,8 +412,8 @@ class Exporter:
model = self.model model = self.model
elif self.model.task == 'detect': elif self.model.task == 'detect':
model = iOSDetectModel(self.model, self.im) if self.args.nms else self.model model = iOSDetectModel(self.model, self.im) if self.args.nms else self.model
elif self.model.task == 'segment': else:
# TODO CoreML Segmentation model pipelining # TODO CoreML Segment and Pose model pipelining
model = self.model model = self.model
ts = torch.jit.trace(model.eval(), self.im, strict=False) # TorchScript model ts = torch.jit.trace(model.eval(), self.im, strict=False) # TorchScript model

@ -5,8 +5,8 @@ from pathlib import Path
from typing import Union from typing import Union
from ultralytics import yolo # noqa from ultralytics import yolo # noqa
from ultralytics.nn.tasks import (ClassificationModel, DetectionModel, SegmentationModel, attempt_load_one_weight, from ultralytics.nn.tasks import (ClassificationModel, DetectionModel, PoseModel, SegmentationModel,
guess_model_task, nn, yaml_model_load) attempt_load_one_weight, guess_model_task, nn, yaml_model_load)
from ultralytics.yolo.cfg import get_cfg from ultralytics.yolo.cfg import get_cfg
from ultralytics.yolo.engine.exporter import Exporter from ultralytics.yolo.engine.exporter import Exporter
from ultralytics.yolo.utils import (DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, RANK, ROOT, callbacks, from ultralytics.yolo.utils import (DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, RANK, ROOT, callbacks,
@ -25,7 +25,8 @@ TASK_MAP = {
yolo.v8.detect.DetectionPredictor], yolo.v8.detect.DetectionPredictor],
'segment': [ 'segment': [
SegmentationModel, yolo.v8.segment.SegmentationTrainer, yolo.v8.segment.SegmentationValidator, SegmentationModel, yolo.v8.segment.SegmentationTrainer, yolo.v8.segment.SegmentationValidator,
yolo.v8.segment.SegmentationPredictor]} yolo.v8.segment.SegmentationPredictor],
'pose': [PoseModel, yolo.v8.pose.PoseTrainer, yolo.v8.pose.PoseValidator, yolo.v8.pose.PosePredictor]}
class YOLO: class YOLO:
@ -195,7 +196,7 @@ class YOLO:
self.model.load(weights) self.model.load(weights)
return self return self
def info(self, verbose=False): def info(self, verbose=True):
""" """
Logs model info. Logs model info.

@ -246,6 +246,7 @@ class BasePredictor:
dnn=self.args.dnn, dnn=self.args.dnn,
data=self.args.data, data=self.args.data,
fp16=self.args.half, fp16=self.args.half,
fuse=True,
verbose=verbose) verbose=verbose)
self.device = device self.device = device
self.model.eval() self.model.eval()

@ -17,6 +17,53 @@ from ultralytics.yolo.utils.plotting import Annotator, colors
from ultralytics.yolo.utils.torch_utils import TORCHVISION_0_10 from ultralytics.yolo.utils.torch_utils import TORCHVISION_0_10
class BaseTensor(SimpleClass):
"""
Attributes:
tensor (torch.Tensor): A tensor.
orig_shape (tuple): Original image size, in the format (height, width).
Methods:
cpu(): Returns a copy of the tensor on CPU memory.
numpy(): Returns a copy of the tensor as a numpy array.
cuda(): Returns a copy of the tensor on GPU memory.
to(): Returns a copy of the tensor with the specified device and dtype.
"""
def __init__(self, tensor, orig_shape) -> None:
super().__init__()
assert isinstance(tensor, torch.Tensor)
self.tensor = tensor
self.orig_shape = orig_shape
@property
def shape(self):
return self.data.shape
@property
def data(self):
return self.tensor
def cpu(self):
return self.__class__(self.data.cpu(), self.orig_shape)
def numpy(self):
return self.__class__(self.data.numpy(), self.orig_shape)
def cuda(self):
return self.__class__(self.data.cuda(), self.orig_shape)
def to(self, *args, **kwargs):
return self.__class__(self.data.to(*args, **kwargs), self.orig_shape)
def __len__(self): # override len(results)
return len(self.data)
def __getitem__(self, idx):
return self.__class__(self.data[idx], self.orig_shape)
class Results(SimpleClass): class Results(SimpleClass):
""" """
A class for storing and manipulating inference results. A class for storing and manipulating inference results.
@ -40,22 +87,23 @@ class Results(SimpleClass):
_keys (tuple): A tuple of attribute names for non-empty attributes. _keys (tuple): A tuple of attribute names for non-empty attributes.
""" """
def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None) -> None: def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None) -> None:
self.orig_img = orig_img self.orig_img = orig_img
self.orig_shape = orig_img.shape[:2] self.orig_shape = orig_img.shape[:2]
self.boxes = Boxes(boxes, self.orig_shape) if boxes is not None else None # native size boxes self.boxes = Boxes(boxes, self.orig_shape) if boxes is not None else None # native size boxes
self.masks = Masks(masks, self.orig_shape) if masks is not None else None # native size or imgsz masks self.masks = Masks(masks, self.orig_shape) if masks is not None else None # native size or imgsz masks
self.probs = probs if probs is not None else None self.probs = probs if probs is not None else None
self.keypoints = keypoints if keypoints is not None else None
self.names = names self.names = names
self.path = path self.path = path
self._keys = ('boxes', 'masks', 'probs') self._keys = ('boxes', 'masks', 'probs', 'keypoints')
def pandas(self): def pandas(self):
pass pass
# TODO masks.pandas + boxes.pandas + cls.pandas # TODO masks.pandas + boxes.pandas + cls.pandas
def __getitem__(self, idx): def __getitem__(self, idx):
r = Results(orig_img=self.orig_img, path=self.path, names=self.names) r = self.new()
for k in self.keys: for k in self.keys:
setattr(r, k, getattr(self, k)[idx]) setattr(r, k, getattr(self, k)[idx])
return r return r
@ -69,25 +117,25 @@ class Results(SimpleClass):
self.probs = probs self.probs = probs
def cpu(self): def cpu(self):
r = Results(orig_img=self.orig_img, path=self.path, names=self.names) r = self.new()
for k in self.keys: for k in self.keys:
setattr(r, k, getattr(self, k).cpu()) setattr(r, k, getattr(self, k).cpu())
return r return r
def numpy(self): def numpy(self):
r = Results(orig_img=self.orig_img, path=self.path, names=self.names) r = self.new()
for k in self.keys: for k in self.keys:
setattr(r, k, getattr(self, k).numpy()) setattr(r, k, getattr(self, k).numpy())
return r return r
def cuda(self): def cuda(self):
r = Results(orig_img=self.orig_img, path=self.path, names=self.names) r = self.new()
for k in self.keys: for k in self.keys:
setattr(r, k, getattr(self, k).cuda()) setattr(r, k, getattr(self, k).cuda())
return r return r
def to(self, *args, **kwargs): def to(self, *args, **kwargs):
r = Results(orig_img=self.orig_img, path=self.path, names=self.names) r = self.new()
for k in self.keys: for k in self.keys:
setattr(r, k, getattr(self, k).to(*args, **kwargs)) setattr(r, k, getattr(self, k).to(*args, **kwargs))
return r return r
@ -96,6 +144,9 @@ class Results(SimpleClass):
for k in self.keys: for k in self.keys:
return len(getattr(self, k)) return len(getattr(self, k))
def new(self):
return Results(orig_img=self.orig_img, path=self.path, names=self.names)
@property @property
def keys(self): def keys(self):
return [k for k in self._keys if getattr(self, k) is not None] return [k for k in self._keys if getattr(self, k) is not None]
@ -109,6 +160,7 @@ class Results(SimpleClass):
pil=False, pil=False,
example='abc', example='abc',
img=None, img=None,
kpt_line=True,
labels=True, labels=True,
boxes=True, boxes=True,
masks=True, masks=True,
@ -126,6 +178,7 @@ class Results(SimpleClass):
pil (bool): Whether to return the image as a PIL Image. pil (bool): Whether to return the image as a PIL Image.
example (str): An example string to display. Useful for indicating the expected format of the output. example (str): An example string to display. Useful for indicating the expected format of the output.
img (numpy.ndarray): Plot to another image. if not, plot to original image. img (numpy.ndarray): Plot to another image. if not, plot to original image.
kpt_line (bool): Whether to draw lines connecting keypoints.
labels (bool): Whether to plot the label of bounding boxes. labels (bool): Whether to plot the label of bounding boxes.
boxes (bool): Whether to plot the bounding boxes. boxes (bool): Whether to plot the bounding boxes.
masks (bool): Whether to plot the masks. masks (bool): Whether to plot the masks.
@ -146,11 +199,12 @@ class Results(SimpleClass):
pred_masks, show_masks = self.masks, masks pred_masks, show_masks = self.masks, masks
pred_probs, show_probs = self.probs, probs pred_probs, show_probs = self.probs, probs
names = self.names names = self.names
keypoints = self.keypoints
if pred_boxes and show_boxes: if pred_boxes and show_boxes:
for d in reversed(pred_boxes): for d in reversed(pred_boxes):
c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item()) c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
name = ('' if id is None else f'id:{id} ') + names[c] name = ('' if id is None else f'id:{id} ') + names[c]
label = (name if not conf else f'{name} {conf:.2f}') if labels else None label = (f'{name} {conf:.2f}' if conf else name) if labels else None
annotator.box_label(d.xyxy.squeeze(), label, color=colors(c, True)) annotator.box_label(d.xyxy.squeeze(), label, color=colors(c, True))
if pred_masks and show_masks: if pred_masks and show_masks:
@ -168,10 +222,14 @@ class Results(SimpleClass):
text = f"{', '.join(f'{names[j] if names else j} {pred_probs[j]:.2f}' for j in top5i)}, " text = f"{', '.join(f'{names[j] if names else j} {pred_probs[j]:.2f}' for j in top5i)}, "
annotator.text((32, 32), text, txt_color=(255, 255, 255)) # TODO: allow setting colors annotator.text((32, 32), text, txt_color=(255, 255, 255)) # TODO: allow setting colors
if keypoints is not None:
for k in reversed(keypoints):
annotator.kpts(k, self.orig_shape, kpt_line=kpt_line)
return np.asarray(annotator.im) if annotator.pil else annotator.im return np.asarray(annotator.im) if annotator.pil else annotator.im
class Boxes(SimpleClass): class Boxes(BaseTensor):
""" """
A class for storing and manipulating detection boxes. A class for storing and manipulating detection boxes.
@ -246,37 +304,15 @@ class Boxes(SimpleClass):
def xywhn(self): def xywhn(self):
return self.xywh / self.orig_shape[[1, 0, 1, 0]] return self.xywh / self.orig_shape[[1, 0, 1, 0]]
def cpu(self):
return Boxes(self.boxes.cpu(), self.orig_shape)
def numpy(self):
return Boxes(self.boxes.numpy(), self.orig_shape)
def cuda(self):
return Boxes(self.boxes.cuda(), self.orig_shape)
def to(self, *args, **kwargs):
return Boxes(self.boxes.to(*args, **kwargs), self.orig_shape)
def pandas(self): def pandas(self):
LOGGER.info('results.pandas() method not yet implemented') LOGGER.info('results.pandas() method not yet implemented')
@property
def shape(self):
return self.boxes.shape
@property @property
def data(self): def data(self):
return self.boxes return self.boxes
def __len__(self): # override len(results)
return len(self.boxes)
def __getitem__(self, idx):
return Boxes(self.boxes[idx], self.orig_shape)
class Masks(SimpleClass): class Masks(BaseTensor):
""" """
A class for storing and manipulating detection masks. A class for storing and manipulating detection masks.
@ -316,7 +352,7 @@ class Masks(SimpleClass):
def xyn(self): def xyn(self):
# Segments (normalized) # Segments (normalized)
return [ return [
ops.scale_segments(self.masks.shape[1:], x, self.orig_shape, normalize=True) ops.scale_coords(self.masks.shape[1:], x, self.orig_shape, normalize=True)
for x in ops.masks2segments(self.masks)] for x in ops.masks2segments(self.masks)]
@property @property
@ -324,31 +360,9 @@ class Masks(SimpleClass):
def xy(self): def xy(self):
# Segments (pixels) # Segments (pixels)
return [ return [
ops.scale_segments(self.masks.shape[1:], x, self.orig_shape, normalize=False) ops.scale_coords(self.masks.shape[1:], x, self.orig_shape, normalize=False)
for x in ops.masks2segments(self.masks)] for x in ops.masks2segments(self.masks)]
@property
def shape(self):
return self.masks.shape
@property @property
def data(self): def data(self):
return self.masks return self.masks
def cpu(self):
return Masks(self.masks.cpu(), self.orig_shape)
def numpy(self):
return Masks(self.masks.numpy(), self.orig_shape)
def cuda(self):
return Masks(self.masks.cuda(), self.orig_shape)
def to(self, *args, **kwargs):
return Masks(self.masks.to(*args, **kwargs), self.orig_shape)
def __len__(self): # override len(results)
return len(self.masks)
def __getitem__(self, idx):
return Masks(self.masks[idx], self.orig_shape)

@ -75,11 +75,13 @@ def benchmark(model=Path(SETTINGS['weights_dir']) / 'yolov8n.pt', imgsz=160, hal
# Validate # Validate
if model.task == 'detect': if model.task == 'detect':
data, key = 'coco128.yaml', 'metrics/mAP50-95(B)' data, key = 'coco8.yaml', 'metrics/mAP50-95(B)'
elif model.task == 'segment': elif model.task == 'segment':
data, key = 'coco128-seg.yaml', 'metrics/mAP50-95(M)' data, key = 'coco8-seg.yaml', 'metrics/mAP50-95(M)'
elif model.task == 'classify': elif model.task == 'classify':
data, key = 'imagenet100', 'metrics/accuracy_top5' data, key = 'imagenet100', 'metrics/accuracy_top5'
elif model.task == 'pose':
data, key = 'coco8-pose.yaml', 'metrics/mAP50-95(P)'
results = export.val(data=data, batch=1, imgsz=imgsz, plots=False, device=device, half=half, verbose=False) results = export.val(data=data, batch=1, imgsz=imgsz, plots=False, device=device, half=half, verbose=False)
metric, speed = results.results_dict[key], results.speed['inference'] metric, speed = results.results_dict[key], results.speed['inference']

@ -14,9 +14,9 @@ from tqdm import tqdm
from ultralytics.yolo.utils import LOGGER, checks, emojis, is_online from ultralytics.yolo.utils import LOGGER, checks, emojis, is_online
GITHUB_ASSET_NAMES = [f'yolov8{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] + \ GITHUB_ASSET_NAMES = [f'yolov8{k}{suffix}.pt' for k in 'nsmlx' for suffix in ('', '6', '-cls', '-seg', '-pose')] + \
[f'yolov5{size}u.pt' for size in 'nsmlx'] + \ [f'yolov5{k}u.pt' for k in 'nsmlx'] + \
[f'yolov3{size}u.pt' for size in ('', '-spp', '-tiny')] [f'yolov3{k}u.pt' for k in ('', '-spp', '-tiny')]
GITHUB_ASSET_STEMS = [Path(k).stem for k in GITHUB_ASSET_NAMES] GITHUB_ASSET_STEMS = [Path(k).stem for k in GITHUB_ASSET_NAMES]

@ -168,7 +168,7 @@ class Instances:
Args: Args:
bboxes (ndarray): bboxes with shape [N, 4]. bboxes (ndarray): bboxes with shape [N, 4].
segments (list | ndarray): segments. segments (list | ndarray): segments.
keypoints (ndarray): keypoints with shape [N, 17, 2]. keypoints (ndarray): keypoints(x, y, visible) with shape [N, 17, 3].
""" """
if segments is None: if segments is None:
segments = [] segments = []

@ -54,3 +54,17 @@ class BboxLoss(nn.Module):
wr = 1 - wl # weight right wr = 1 - wl # weight right
return (F.cross_entropy(pred_dist, tl.view(-1), reduction='none').view(tl.shape) * wl + return (F.cross_entropy(pred_dist, tl.view(-1), reduction='none').view(tl.shape) * wl +
F.cross_entropy(pred_dist, tr.view(-1), reduction='none').view(tl.shape) * wr).mean(-1, keepdim=True) F.cross_entropy(pred_dist, tr.view(-1), reduction='none').view(tl.shape) * wr).mean(-1, keepdim=True)
class KeypointLoss(nn.Module):
def __init__(self, sigmas) -> None:
super().__init__()
self.sigmas = sigmas
def forward(self, pred_kpts, gt_kpts, kpt_mask, area):
d = (pred_kpts[..., 0] - gt_kpts[..., 0]) ** 2 + (pred_kpts[..., 1] - gt_kpts[..., 1]) ** 2
kpt_loss_factor = (torch.sum(kpt_mask != 0) + torch.sum(kpt_mask == 0)) / (torch.sum(kpt_mask != 0) + 1e-9)
# e = d / (2 * (area * self.sigmas) ** 2 + 1e-9) # from formula
e = d / (2 * self.sigmas) ** 2 / (area + 1e-9) / 2 # from cocoeval
return kpt_loss_factor * ((1 - torch.exp(-e)) * kpt_mask).mean()

@ -13,6 +13,8 @@ import torch.nn as nn
from ultralytics.yolo.utils import LOGGER, SimpleClass, TryExcept from ultralytics.yolo.utils import LOGGER, SimpleClass, TryExcept
OKS_SIGMA = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
# boxes # boxes
def box_area(box): def box_area(box):
@ -108,8 +110,8 @@ def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7
def mask_iou(mask1, mask2, eps=1e-7): def mask_iou(mask1, mask2, eps=1e-7):
""" """
mask1: [N, n] m1 means number of predicted objects mask1: [N, n] m1 means number of gt objects
mask2: [M, n] m2 means number of gt objects mask2: [M, n] m2 means number of predicted objects
Note: n means image_w x image_h Note: n means image_w x image_h
Returns: masks iou, [N, M] Returns: masks iou, [N, M]
""" """
@ -118,16 +120,18 @@ def mask_iou(mask1, mask2, eps=1e-7):
return intersection / (union + eps) return intersection / (union + eps)
def masks_iou(mask1, mask2, eps=1e-7): def kpt_iou(kpt1, kpt2, area, sigma, eps=1e-7):
""" """OKS
mask1: [N, n] m1 means number of predicted objects kpt1: [N, 17, 3], gt
mask2: [N, n] m2 means number of gt objects kpt2: [M, 17, 3], pred
Note: n means image_w x image_h area: [N], areas from gt
Returns: masks iou, (N, )
""" """
intersection = (mask1 * mask2).sum(1).clamp(0) # (N, ) d = (kpt1[:, None, :, 0] - kpt2[..., 0]) ** 2 + (kpt1[:, None, :, 1] - kpt2[..., 1]) ** 2 # (N, M, 17)
union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection sigma = torch.tensor(sigma, device=kpt1.device, dtype=kpt1.dtype) # (17, )
return intersection / (union + eps) kpt_mask = kpt1[..., 2] != 0 # (N, 17)
e = d / (2 * sigma) ** 2 / (area[:, None, None] + eps) / 2 # from cocoeval
# e = d / ((area[None, :, None] + eps) * sigma) ** 2 / 2 # from formula
return (torch.exp(-e) * kpt_mask[:, None]).sum(-1) / (kpt_mask.sum(-1)[:, None] + eps)
def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
@ -649,13 +653,13 @@ class SegmentMetrics(SimpleClass):
self.seg = Metric() self.seg = Metric()
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
def process(self, tp_m, tp_b, conf, pred_cls, target_cls): def process(self, tp_b, tp_m, conf, pred_cls, target_cls):
""" """
Processes the detection and segmentation metrics over the given set of predictions. Processes the detection and segmentation metrics over the given set of predictions.
Args: Args:
tp_m (list): List of True Positive masks.
tp_b (list): List of True Positive boxes. tp_b (list): List of True Positive boxes.
tp_m (list): List of True Positive masks.
conf (list): List of confidence scores. conf (list): List of confidence scores.
pred_cls (list): List of predicted classes. pred_cls (list): List of predicted classes.
target_cls (list): List of target classes. target_cls (list): List of target classes.
@ -712,6 +716,100 @@ class SegmentMetrics(SimpleClass):
return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness])) return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness]))
class PoseMetrics(SegmentMetrics):
"""
Calculates and aggregates detection and pose metrics over a given set of classes.
Args:
save_dir (Path): Path to the directory where the output plots should be saved. Default is the current directory.
plot (bool): Whether to save the detection and segmentation plots. Default is False.
names (list): List of class names. Default is an empty list.
Attributes:
save_dir (Path): Path to the directory where the output plots should be saved.
plot (bool): Whether to save the detection and segmentation plots.
names (list): List of class names.
box (Metric): An instance of the Metric class to calculate box detection metrics.
pose (Metric): An instance of the Metric class to calculate mask segmentation metrics.
speed (dict): Dictionary to store the time taken in different phases of inference.
Methods:
process(tp_m, tp_b, conf, pred_cls, target_cls): Processes metrics over the given set of predictions.
mean_results(): Returns the mean of the detection and segmentation metrics over all the classes.
class_result(i): Returns the detection and segmentation metrics of class `i`.
maps: Returns the mean Average Precision (mAP) scores for IoU thresholds ranging from 0.50 to 0.95.
fitness: Returns the fitness scores, which are a single weighted combination of metrics.
ap_class_index: Returns the list of indices of classes used to compute Average Precision (AP).
results_dict: Returns the dictionary containing all the detection and segmentation metrics and fitness score.
"""
def __init__(self, save_dir=Path('.'), plot=False, names=()) -> None:
super().__init__(save_dir, plot, names)
self.save_dir = save_dir
self.plot = plot
self.names = names
self.box = Metric()
self.pose = Metric()
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
def __getattr__(self, attr):
name = self.__class__.__name__
raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}")
def process(self, tp_b, tp_p, conf, pred_cls, target_cls):
"""
Processes the detection and pose metrics over the given set of predictions.
Args:
tp_b (list): List of True Positive boxes.
tp_p (list): List of True Positive keypoints.
conf (list): List of confidence scores.
pred_cls (list): List of predicted classes.
target_cls (list): List of target classes.
"""
results_pose = ap_per_class(tp_p,
conf,
pred_cls,
target_cls,
plot=self.plot,
save_dir=self.save_dir,
names=self.names,
prefix='Pose')[2:]
self.pose.nc = len(self.names)
self.pose.update(results_pose)
results_box = ap_per_class(tp_b,
conf,
pred_cls,
target_cls,
plot=self.plot,
save_dir=self.save_dir,
names=self.names,
prefix='Box')[2:]
self.box.nc = len(self.names)
self.box.update(results_box)
@property
def keys(self):
return [
'metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)',
'metrics/precision(P)', 'metrics/recall(P)', 'metrics/mAP50(P)', 'metrics/mAP50-95(P)']
def mean_results(self):
return self.box.mean_results() + self.pose.mean_results()
def class_result(self, i):
return self.box.class_result(i) + self.pose.class_result(i)
@property
def maps(self):
return self.box.maps + self.pose.maps
@property
def fitness(self):
return self.pose.fitness() + self.box.fitness()
class ClassifyMetrics(SimpleClass): class ClassifyMetrics(SimpleClass):
""" """
Class for computing classification metrics including top-1 and top-5 accuracy. Class for computing classification metrics including top-1 and top-5 accuracy.

@ -281,28 +281,23 @@ def clip_boxes(boxes, shape):
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2 boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
def clip_coords(boxes, shape): def clip_coords(coords, shape):
""" """
Clip bounding xyxy bounding boxes to image shape (height, width). Clip line coordinates to the image boundaries.
Args: Args:
boxes (torch.Tensor or numpy.ndarray): Bounding boxes to be clipped. coords (torch.Tensor) or (numpy.ndarray): A list of line coordinates.
shape (tuple): The shape of the image. (height, width) shape (tuple): A tuple of integers representing the size of the image in the format (height, width).
Returns: Returns:
None (None): The function modifies the input `coordinates` in place, by clipping each coordinate to the image boundaries.
Note:
The input `boxes` is modified in-place, there is no return value.
""" """
if isinstance(boxes, torch.Tensor): # faster individually if isinstance(coords, torch.Tensor): # faster individually
boxes[:, 0].clamp_(0, shape[1]) # x1 coords[..., 0].clamp_(0, shape[1]) # x
boxes[:, 1].clamp_(0, shape[0]) # y1 coords[..., 1].clamp_(0, shape[0]) # y
boxes[:, 2].clamp_(0, shape[1]) # x2
boxes[:, 3].clamp_(0, shape[0]) # y2
else: # np.array (faster grouped) else: # np.array (faster grouped)
boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 coords[..., 0] = coords[..., 0].clip(0, shape[1]) # x
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 coords[..., 1] = coords[..., 1].clip(0, shape[0]) # y
def scale_image(im1_shape, masks, im0_shape, ratio_pad=None): def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
@ -577,17 +572,18 @@ def process_mask_upsample(protos, masks_in, bboxes, shape):
def process_mask(protos, masks_in, bboxes, shape, upsample=False): def process_mask(protos, masks_in, bboxes, shape, upsample=False):
""" """
It takes the output of the mask head, and applies the mask to the bounding boxes. This is faster but produces Apply masks to bounding boxes using the output of the mask head.
downsampled quality of mask
Args: Args:
protos (torch.Tensor): [mask_dim, mask_h, mask_w] protos (torch.Tensor): A tensor of shape [mask_dim, mask_h, mask_w].
masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS.
bboxes (torch.Tensor): [n, 4], n is number of masks after nms bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS.
shape (tuple): the size of the input image (h,w) shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False.
Returns: Returns:
(torch.Tensor): The processed masks. (torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
are the height and width of the input image. The mask is applied to the bounding boxes.
""" """
c, mh, mw = protos.shape # CHW c, mh, mw = protos.shape # CHW
@ -632,19 +628,19 @@ def process_mask_native(protos, masks_in, bboxes, shape):
return masks.gt_(0.5) return masks.gt_(0.5)
def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False): def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False):
""" """
Rescale segment coordinates (xyxy) from img1_shape to img0_shape Rescale segment coordinates (xyxy) from img1_shape to img0_shape
Args: Args:
img1_shape (tuple): The shape of the image that the segments are from. img1_shape (tuple): The shape of the image that the coords are from.
segments (torch.Tensor): the segments to be scaled coords (torch.Tensor): the coords to be scaled
img0_shape (tuple): the shape of the image that the segmentation is being applied to img0_shape (tuple): the shape of the image that the segmentation is being applied to
ratio_pad (tuple): the ratio of the image size to the padded image size. ratio_pad (tuple): the ratio of the image size to the padded image size.
normalize (bool): If True, the coordinates will be normalized to the range [0, 1]. Defaults to False normalize (bool): If True, the coordinates will be normalized to the range [0, 1]. Defaults to False
Returns: Returns:
segments (torch.Tensor): the segmented image. coords (torch.Tensor): the segmented image.
""" """
if ratio_pad is None: # calculate from img0_shape if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
@ -653,14 +649,15 @@ def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=F
gain = ratio_pad[0][0] gain = ratio_pad[0][0]
pad = ratio_pad[1] pad = ratio_pad[1]
segments[:, 0] -= pad[0] # x padding coords[..., 0] -= pad[0] # x padding
segments[:, 1] -= pad[1] # y padding coords[..., 1] -= pad[1] # y padding
segments /= gain coords[..., 0] /= gain
clip_segments(segments, img0_shape) coords[..., 1] /= gain
clip_coords(coords, img0_shape)
if normalize: if normalize:
segments[:, 0] /= img0_shape[1] # width coords[..., 0] /= img0_shape[1] # width
segments[:, 1] /= img0_shape[0] # height coords[..., 1] /= img0_shape[0] # height
return segments return coords
def masks2segments(masks, strategy='largest'): def masks2segments(masks, strategy='largest'):
@ -688,23 +685,6 @@ def masks2segments(masks, strategy='largest'):
return segments return segments
def clip_segments(segments, shape):
"""
It takes a list of line segments (x1,y1,x2,y2) and clips them to the image shape (height, width)
Args:
segments (list): a list of segments, each segment is a list of points, each point is a list of x,y
coordinates
shape (tuple): the shape of the image
"""
if isinstance(segments, torch.Tensor): # faster individually
segments[:, 0].clamp_(0, shape[1]) # x
segments[:, 1].clamp_(0, shape[0]) # y
else: # np.array (faster grouped)
segments[:, 0] = segments[:, 0].clip(0, shape[1]) # x
segments[:, 1] = segments[:, 1].clip(0, shape[0]) # y
def clean_str(s): def clean_str(s):
""" """
Cleans a string by replacing special characters with underscore _ Cleans a string by replacing special characters with underscore _

@ -16,7 +16,7 @@ from ultralytics.yolo.utils import LOGGER, TryExcept, threaded
from .checks import check_font, check_version, is_ascii from .checks import check_font, check_version, is_ascii
from .files import increment_path from .files import increment_path
from .ops import clip_coords, scale_image, xywh2xyxy, xyxy2xywh from .ops import clip_boxes, scale_image, xywh2xyxy, xyxy2xywh
matplotlib.rc('font', **{'size': 11}) matplotlib.rc('font', **{'size': 11})
matplotlib.use('Agg') # for writing to files only matplotlib.use('Agg') # for writing to files only
@ -30,6 +30,11 @@ class Colors:
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7') '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
self.palette = [self.hex2rgb(f'#{c}') for c in hexs] self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
self.n = len(self.palette) self.n = len(self.palette)
self.pose_palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102], [230, 230, 0], [255, 153, 255],
[153, 204, 255], [255, 102, 255], [255, 51, 255], [102, 178, 255], [51, 153, 255],
[255, 153, 153], [255, 102, 102], [255, 51, 51], [153, 255, 153], [102, 255, 102],
[51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0], [255, 255, 255]],
dtype=np.uint8)
def __call__(self, i, bgr=False): def __call__(self, i, bgr=False):
c = self.palette[int(i) % self.n] c = self.palette[int(i) % self.n]
@ -62,6 +67,12 @@ class Annotator:
else: # use cv2 else: # use cv2
self.im = im self.im = im
self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width
# pose
self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9],
[8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
self.limb_color = colors.pose_palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]]
self.kpt_color = colors.pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]]
def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)): def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
# Add one xyxy box to image with label # Add one xyxy box to image with label
@ -132,6 +143,49 @@ class Annotator:
# convert im back to PIL and update draw # convert im back to PIL and update draw
self.fromarray(self.im) self.fromarray(self.im)
def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True):
"""Plot keypoints.
Args:
kpts (tensor): predicted kpts, shape: [17, 3]
shape (tuple): image shape, (h, w)
steps (int): keypoints step
radius (int): size of drawing points
"""
if self.pil:
# convert to numpy first
self.im = np.asarray(self.im).copy()
nkpt, ndim = kpts.shape
is_pose = nkpt == 17 and ndim == 3
kpt_line &= is_pose # `kpt_line=True` for now only supports human pose plotting
for i, k in enumerate(kpts):
color_k = [int(x) for x in self.kpt_color[i]] if is_pose else colors(i)
x_coord, y_coord = k[0], k[1]
if x_coord % shape[1] != 0 and y_coord % shape[0] != 0:
if len(k) == 3:
conf = k[2]
if conf < 0.5:
continue
cv2.circle(self.im, (int(x_coord), int(y_coord)), radius, color_k, -1)
if kpt_line:
ndim = kpts.shape[-1]
for sk_id, sk in enumerate(self.skeleton):
pos1 = (int(kpts[(sk[0] - 1), 0]), int(kpts[(sk[0] - 1), 1]))
pos2 = (int(kpts[(sk[1] - 1), 0]), int(kpts[(sk[1] - 1), 1]))
if ndim == 3:
conf1 = kpts[(sk[0] - 1), 2]
conf2 = kpts[(sk[1] - 1), 2]
if conf1 < 0.5 or conf2 < 0.5:
continue
if pos1[0] % shape[1] == 0 or pos1[1] % shape[0] == 0 or pos1[0] < 0 or pos1[1] < 0:
continue
if pos2[0] % shape[1] == 0 or pos2[1] % shape[0] == 0 or pos2[0] < 0 or pos2[1] < 0:
continue
cv2.line(self.im, pos1, pos2, [int(x) for x in self.limb_color[sk_id]], thickness=2)
if self.pil:
# convert im back to PIL and update draw
self.fromarray(self.im)
def rectangle(self, xy, fill=None, outline=None, width=1): def rectangle(self, xy, fill=None, outline=None, width=1):
# Add rectangle to image (PIL-only) # Add rectangle to image (PIL-only)
self.draw.rectangle(xy, fill, outline, width) self.draw.rectangle(xy, fill, outline, width)
@ -213,7 +267,7 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False,
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square
b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad
xyxy = xywh2xyxy(b).long() xyxy = xywh2xyxy(b).long()
clip_coords(xyxy, im.shape) clip_boxes(xyxy, im.shape)
crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)] crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
if save: if save:
file.parent.mkdir(parents=True, exist_ok=True) # make directory file.parent.mkdir(parents=True, exist_ok=True) # make directory
@ -229,6 +283,7 @@ def plot_images(images,
cls, cls,
bboxes, bboxes,
masks=np.zeros(0, dtype=np.uint8), masks=np.zeros(0, dtype=np.uint8),
kpts=np.zeros((0, 51), dtype=np.float32),
paths=None, paths=None,
fname='images.jpg', fname='images.jpg',
names=None): names=None):
@ -241,6 +296,8 @@ def plot_images(images,
bboxes = bboxes.cpu().numpy() bboxes = bboxes.cpu().numpy()
if isinstance(masks, torch.Tensor): if isinstance(masks, torch.Tensor):
masks = masks.cpu().numpy().astype(int) masks = masks.cpu().numpy().astype(int)
if isinstance(kpts, torch.Tensor):
kpts = kpts.cpu().numpy()
if isinstance(batch_idx, torch.Tensor): if isinstance(batch_idx, torch.Tensor):
batch_idx = batch_idx.cpu().numpy() batch_idx = batch_idx.cpu().numpy()
@ -300,6 +357,21 @@ def plot_images(images,
label = f'{c}' if labels else f'{c} {conf[j]:.1f}' label = f'{c}' if labels else f'{c} {conf[j]:.1f}'
annotator.box_label(box, label, color=color) annotator.box_label(box, label, color=color)
# Plot keypoints
if len(kpts):
kpts_ = kpts[idx].copy()
if len(kpts_):
if kpts_[..., 0].max() <= 1.01 or kpts_[..., 1].max() <= 1.01: # if normalized with tolerance .01
kpts_[..., 0] *= w # scale to pixels
kpts_[..., 1] *= h
elif scale < 1: # absolute coords need scale if image scales
kpts_ *= scale
kpts_[..., 0] += x
kpts_[..., 1] += y
for j in range(len(kpts_)):
if labels or conf[j] > 0.25: # 0.25 conf thresh
annotator.kpts(kpts_[j])
# Plot masks # Plot masks
if len(masks): if len(masks):
if idx.shape[0] == masks.shape[0]: # overlap_masks=False if idx.shape[0] == masks.shape[0]: # overlap_masks=False
@ -307,7 +379,7 @@ def plot_images(images,
else: # overlap_masks=True else: # overlap_masks=True
image_masks = masks[[i]] # (1, 640, 640) image_masks = masks[[i]] # (1, 640, 640)
nl = idx.sum() nl = idx.sum()
index = np.arange(nl).reshape(nl, 1, 1) + 1 index = np.arange(nl).reshape((nl, 1, 1)) + 1
image_masks = np.repeat(image_masks, nl, axis=0) image_masks = np.repeat(image_masks, nl, axis=0)
image_masks = np.where(image_masks == index, 1.0, 0.0) image_masks = np.where(image_masks == index, 1.0, 0.0)
@ -328,13 +400,16 @@ def plot_images(images,
annotator.im.save(fname) # save annotator.im.save(fname) # save
def plot_results(file='path/to/results.csv', dir='', segment=False): def plot_results(file='path/to/results.csv', dir='', segment=False, pose=False):
# Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
import pandas as pd import pandas as pd
save_dir = Path(file).parent if file else Path(dir) save_dir = Path(file).parent if file else Path(dir)
if segment: if segment:
fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
index = [1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12] index = [1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]
elif pose:
fig, ax = plt.subplots(2, 9, figsize=(21, 6), tight_layout=True)
index = [1, 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 16, 17, 18, 8, 9, 12, 13]
else: else:
fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True) fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
index = [1, 2, 3, 4, 5, 8, 9, 10, 6, 7] index = [1, 2, 3, 4, 5, 8, 9, 10, 6, 7]

@ -240,8 +240,8 @@ def copy_attr(a, b, include=(), exclude=()):
def get_latest_opset(): def get_latest_opset():
# Return max supported ONNX opset by this version of torch # Return second-most (for maturity) recently supported ONNX opset by this version of torch
return max(int(k[14:]) for k in vars(torch.onnx) if 'symbolic_opset' in k) # opset return max(int(k[14:]) for k in vars(torch.onnx) if 'symbolic_opset' in k) - 1 # opset
def intersect_dicts(da, db, exclude=()): def intersect_dicts(da, db, exclude=()):
@ -318,18 +318,18 @@ def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None:
""" """
Strip optimizer from 'f' to finalize training, optionally save as 's'. Strip optimizer from 'f' to finalize training, optionally save as 's'.
Usage:
from ultralytics.yolo.utils.torch_utils import strip_optimizer
from pathlib import Path
for f in Path('/Users/glennjocher/Downloads/weights').glob('*.pt'):
strip_optimizer(f)
Args: Args:
f (str): file path to model to strip the optimizer from. Default is 'best.pt'. f (str): file path to model to strip the optimizer from. Default is 'best.pt'.
s (str): file path to save the model with stripped optimizer to. If not provided, 'f' will be overwritten. s (str): file path to save the model with stripped optimizer to. If not provided, 'f' will be overwritten.
Returns: Returns:
None None
Usage:
from pathlib import Path
from ultralytics.yolo.utils.torch_utils import strip_optimizer
for f in Path('/Users/glennjocher/Downloads/weights').rglob('*.pt'):
strip_optimizer(f)
""" """
x = torch.load(f, map_location=torch.device('cpu')) x = torch.load(f, map_location=torch.device('cpu'))
args = {**DEFAULT_CFG_DICT, **x['train_args']} # combine model args with default args, preferring model args args = {**DEFAULT_CFG_DICT, **x['train_args']} # combine model args with default args, preferring model args
@ -349,7 +349,9 @@ def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None:
def profile(input, ops, n=10, device=None): def profile(input, ops, n=10, device=None):
""" YOLOv8 speed/memory/FLOPs profiler """
YOLOv8 speed/memory/FLOPs profiler
Usage: Usage:
input = torch.randn(16, 3, 640, 640) input = torch.randn(16, 3, 640, 640)
m1 = lambda x: x * torch.sigmoid(x) m1 = lambda x: x * torch.sigmoid(x)

@ -1,5 +1,5 @@
# Ultralytics YOLO 🚀, GPL-3.0 license # Ultralytics YOLO 🚀, GPL-3.0 license
from ultralytics.yolo.v8 import classify, detect, segment from ultralytics.yolo.v8 import classify, detect, pose, segment
__all__ = 'classify', 'segment', 'detect' __all__ = 'classify', 'segment', 'detect', 'pose'

@ -41,7 +41,7 @@ class DetectionTrainer(BaseTrainer):
shuffle=mode == 'train', shuffle=mode == 'train',
seed=self.args.seed)[0] if self.args.v5loader else \ seed=self.args.seed)[0] if self.args.v5loader else \
build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, rank=rank, mode=mode, build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, rank=rank, mode=mode,
rect=mode == 'val', names=self.data['names'])[0] rect=mode == 'val', data_info=self.data)[0]
def preprocess_batch(self, batch): def preprocess_batch(self, batch):
batch['img'] = batch['img'].to(self.device, non_blocking=True).float() / 255 batch['img'] = batch['img'].to(self.device, non_blocking=True).float() / 255

@ -41,7 +41,7 @@ class DetectionValidator(BaseValidator):
def init_metrics(self, model): def init_metrics(self, model):
val = self.data.get(self.args.split, '') # validation path val = self.data.get(self.args.split, '') # validation path
self.is_coco = isinstance(val, str) and val.endswith(f'coco{os.sep}val2017.txt') # is COCO dataset self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt') # is COCO
self.class_map = ops.coco80_to_coco91_class() if self.is_coco else list(range(1000)) self.class_map = ops.coco80_to_coco91_class() if self.is_coco else list(range(1000))
self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO
self.names = model.names self.names = model.names
@ -179,7 +179,7 @@ class DetectionValidator(BaseValidator):
prefix=colorstr(f'{self.args.mode}: '), prefix=colorstr(f'{self.args.mode}: '),
shuffle=False, shuffle=False,
seed=self.args.seed)[0] if self.args.v5loader else \ seed=self.args.seed)[0] if self.args.v5loader else \
build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, names=self.data['names'], build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, data_info=self.data,
mode='val')[0] mode='val')[0]
def plot_val_samples(self, batch, ni): def plot_val_samples(self, batch, ni):

@ -0,0 +1,7 @@
# Ultralytics YOLO 🚀, GPL-3.0 license
from .predict import PosePredictor, predict
from .train import PoseTrainer, train
from .val import PoseValidator, val
__all__ = 'PoseTrainer', 'train', 'PoseValidator', 'val', 'PosePredictor', 'predict'

@ -0,0 +1,103 @@
# Ultralytics YOLO 🚀, GPL-3.0 license
from ultralytics.yolo.engine.results import Results
from ultralytics.yolo.utils import DEFAULT_CFG, ROOT, ops
from ultralytics.yolo.utils.plotting import colors, save_one_box
from ultralytics.yolo.v8.detect.predict import DetectionPredictor
class PosePredictor(DetectionPredictor):
def postprocess(self, preds, img, orig_img):
preds = ops.non_max_suppression(preds,
self.args.conf,
self.args.iou,
agnostic=self.args.agnostic_nms,
max_det=self.args.max_det,
classes=self.args.classes,
nc=len(self.model.names))
results = []
for i, pred in enumerate(preds):
orig_img = orig_img[i] if isinstance(orig_img, list) else orig_img
shape = orig_img.shape
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round()
pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:]
pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, shape)
path, _, _, _, _ = self.batch
img_path = path[i] if isinstance(path, list) else path
results.append(
Results(orig_img=orig_img,
path=img_path,
names=self.model.names,
boxes=pred[:, :6],
keypoints=pred_kpts))
return results
def write_results(self, idx, results, batch):
p, im, im0 = batch
log_string = ''
if len(im.shape) == 3:
im = im[None] # expand for batch dim
self.seen += 1
imc = im0.copy() if self.args.save_crop else im0
if self.source_type.webcam or self.source_type.from_img: # batch_size >= 1
log_string += f'{idx}: '
frame = self.dataset.count
else:
frame = getattr(self.dataset, 'frame', 0)
self.data_path = p
self.txt_path = str(self.save_dir / 'labels' / p.stem) + ('' if self.dataset.mode == 'image' else f'_{frame}')
log_string += '%gx%g ' % im.shape[2:] # print string
self.annotator = self.get_annotator(im0)
det = results[idx].boxes # TODO: make boxes inherit from tensors
if len(det) == 0:
return f'{log_string}(no detections), '
for c in det.cls.unique():
n = (det.cls == c).sum() # detections per class
log_string += f"{n} {self.model.names[int(c)]}{'s' * (n > 1)}, "
kpts = reversed(results[idx].keypoints)
for k in kpts:
self.annotator.kpts(k, shape=results[idx].orig_shape)
# write
for j, d in enumerate(reversed(det)):
c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item())
if self.args.save_txt: # Write to file
kpt = (kpts[j][:, :2] / d.orig_shape[[1, 0]]).reshape(-1).tolist()
box = d.xywhn.view(-1).tolist()
line = (c, *box, *kpt) + (conf, ) * self.args.save_conf + (() if id is None else (id, ))
with open(f'{self.txt_path}.txt', 'a') as f:
f.write(('%g ' * len(line)).rstrip() % line + '\n')
if self.args.save or self.args.show: # Add bbox to image
name = ('' if id is None else f'id:{id} ') + self.model.names[c]
label = (f'{name} {conf:.2f}' if self.args.show_conf else name) if self.args.show_labels else None
if self.args.boxes:
self.annotator.box_label(d.xyxy.squeeze(), label, color=colors(c, True))
if self.args.save_crop:
save_one_box(d.xyxy,
imc,
file=self.save_dir / 'crops' / self.model.model.names[c] / f'{self.data_path.stem}.jpg',
BGR=True)
return log_string
def predict(cfg=DEFAULT_CFG, use_python=False):
model = cfg.model or 'yolov8n-pose.pt'
source = cfg.source if cfg.source is not None else ROOT / 'assets' if (ROOT / 'assets').exists() \
else 'https://ultralytics.com/images/bus.jpg'
args = dict(model=model, source=source)
if use_python:
from ultralytics import YOLO
YOLO(model)(**args)
else:
predictor = PosePredictor(overrides=args)
predictor.predict_cli()
if __name__ == '__main__':
predict()

@ -0,0 +1,170 @@
# Ultralytics YOLO 🚀, GPL-3.0 license
from copy import copy
import torch
import torch.nn as nn
from ultralytics.nn.tasks import PoseModel
from ultralytics.yolo import v8
from ultralytics.yolo.utils import DEFAULT_CFG
from ultralytics.yolo.utils.loss import KeypointLoss
from ultralytics.yolo.utils.metrics import OKS_SIGMA
from ultralytics.yolo.utils.ops import xyxy2xywh
from ultralytics.yolo.utils.plotting import plot_images, plot_results
from ultralytics.yolo.utils.tal import make_anchors
from ultralytics.yolo.utils.torch_utils import de_parallel
from ultralytics.yolo.v8.detect.train import Loss
# BaseTrainer python usage
class PoseTrainer(v8.detect.DetectionTrainer):
def __init__(self, cfg=DEFAULT_CFG, overrides=None):
if overrides is None:
overrides = {}
overrides['task'] = 'pose'
super().__init__(cfg, overrides)
def get_model(self, cfg=None, weights=None, verbose=True):
model = PoseModel(cfg, ch=3, nc=self.data['nc'], data_kpt_shape=self.data['kpt_shape'], verbose=verbose)
if weights:
model.load(weights)
return model
def set_model_attributes(self):
super().set_model_attributes()
self.model.kpt_shape = self.data['kpt_shape']
def get_validator(self):
self.loss_names = 'box_loss', 'pose_loss', 'kobj_loss', 'cls_loss', 'dfl_loss'
return v8.pose.PoseValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args))
def criterion(self, preds, batch):
if not hasattr(self, 'compute_loss'):
self.compute_loss = PoseLoss(de_parallel(self.model))
return self.compute_loss(preds, batch)
def plot_training_samples(self, batch, ni):
images = batch['img']
kpts = batch['keypoints']
cls = batch['cls'].squeeze(-1)
bboxes = batch['bboxes']
paths = batch['im_file']
batch_idx = batch['batch_idx']
plot_images(images,
batch_idx,
cls,
bboxes,
kpts=kpts,
paths=paths,
fname=self.save_dir / f'train_batch{ni}.jpg')
def plot_metrics(self):
plot_results(file=self.csv, pose=True) # save results.png
# Criterion class for computing training losses
class PoseLoss(Loss):
def __init__(self, model): # model must be de-paralleled
super().__init__(model)
self.kpt_shape = model.model[-1].kpt_shape
self.bce_pose = nn.BCEWithLogitsLoss()
is_pose = self.kpt_shape == [17, 3]
nkpt = self.kpt_shape[0] # number of keypoints
sigmas = torch.from_numpy(OKS_SIGMA).to(self.device) if is_pose else torch.ones(nkpt, device=self.device) / nkpt
self.keypoint_loss = KeypointLoss(sigmas=sigmas)
def __call__(self, preds, batch):
loss = torch.zeros(5, device=self.device) # box, cls, dfl, kpt_location, kpt_visibility
feats, pred_kpts = preds if isinstance(preds[0], list) else preds[1]
pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
(self.reg_max * 4, self.nc), 1)
# b, grids, ..
pred_scores = pred_scores.permute(0, 2, 1).contiguous()
pred_distri = pred_distri.permute(0, 2, 1).contiguous()
pred_kpts = pred_kpts.permute(0, 2, 1).contiguous()
dtype = pred_scores.dtype
imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)
anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
# targets
batch_size = pred_scores.shape[0]
batch_idx = batch['batch_idx'].view(-1, 1)
targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1)
targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy
mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
# pboxes
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
pred_kpts = self.kpts_decode(anchor_points, pred_kpts.view(batch_size, -1, *self.kpt_shape)) # (b, h*w, 17, 3)
_, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner(
pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt)
target_scores_sum = max(target_scores.sum(), 1)
# cls loss
# loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way
loss[3] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE
# bbox loss
if fg_mask.sum():
target_bboxes /= stride_tensor
loss[0], loss[4] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores,
target_scores_sum, fg_mask)
keypoints = batch['keypoints'].to(self.device).float().clone()
keypoints[..., 0] *= imgsz[1]
keypoints[..., 1] *= imgsz[0]
for i in range(batch_size):
if fg_mask[i].sum():
idx = target_gt_idx[i][fg_mask[i]]
gt_kpt = keypoints[batch_idx.view(-1) == i][idx] # (n, 51)
gt_kpt[..., 0] /= stride_tensor[fg_mask[i]]
gt_kpt[..., 1] /= stride_tensor[fg_mask[i]]
area = xyxy2xywh(target_bboxes[i][fg_mask[i]])[:, 2:].prod(1, keepdim=True)
pred_kpt = pred_kpts[i][fg_mask[i]]
kpt_mask = gt_kpt[..., 2] != 0
loss[1] += self.keypoint_loss(pred_kpt, gt_kpt, kpt_mask, area) # pose loss
# kpt_score loss
if pred_kpt.shape[-1] == 3:
loss[2] += self.bce_pose(pred_kpt[..., 2], kpt_mask.float()) # keypoint obj loss
loss[0] *= self.hyp.box # box gain
loss[1] *= self.hyp.pose / batch_size # pose gain
loss[2] *= self.hyp.kobj / batch_size # kobj gain
loss[3] *= self.hyp.cls # cls gain
loss[4] *= self.hyp.dfl # dfl gain
return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl)
def kpts_decode(self, anchor_points, pred_kpts):
y = pred_kpts.clone()
y[..., :2] *= 2.0
y[..., 0] += anchor_points[:, [0]] - 0.5
y[..., 1] += anchor_points[:, [1]] - 0.5
return y
def train(cfg=DEFAULT_CFG, use_python=False):
model = cfg.model or 'yolov8n-pose.yaml'
data = cfg.data or 'coco8-pose.yaml'
device = cfg.device if cfg.device is not None else ''
args = dict(model=model, data=data, device=device)
if use_python:
from ultralytics import YOLO
YOLO(model).train(**args)
else:
trainer = PoseTrainer(overrides=args)
trainer.train()
if __name__ == '__main__':
train()

@ -0,0 +1,213 @@
# Ultralytics YOLO 🚀, GPL-3.0 license
from pathlib import Path
import numpy as np
import torch
from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, ops
from ultralytics.yolo.utils.checks import check_requirements
from ultralytics.yolo.utils.metrics import OKS_SIGMA, PoseMetrics, box_iou, kpt_iou
from ultralytics.yolo.utils.plotting import output_to_target, plot_images
from ultralytics.yolo.v8.detect import DetectionValidator
class PoseValidator(DetectionValidator):
def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None):
super().__init__(dataloader, save_dir, pbar, args)
self.args.task = 'pose'
self.metrics = PoseMetrics(save_dir=self.save_dir)
def preprocess(self, batch):
batch = super().preprocess(batch)
batch['keypoints'] = batch['keypoints'].to(self.device).float()
return batch
def get_desc(self):
return ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)', 'Pose(P',
'R', 'mAP50', 'mAP50-95)')
def postprocess(self, preds):
preds = ops.non_max_suppression(preds,
self.args.conf,
self.args.iou,
labels=self.lb,
multi_label=True,
agnostic=self.args.single_cls,
max_det=self.args.max_det,
nc=self.nc)
return preds
def init_metrics(self, model):
super().init_metrics(model)
self.kpt_shape = self.data['kpt_shape']
is_pose = self.kpt_shape == [17, 3]
nkpt = self.kpt_shape[0]
self.sigma = OKS_SIGMA if is_pose else np.ones(nkpt) / nkpt
def update_metrics(self, preds, batch):
# Metrics
for si, pred in enumerate(preds):
idx = batch['batch_idx'] == si
cls = batch['cls'][idx]
bbox = batch['bboxes'][idx]
kpts = batch['keypoints'][idx]
nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions
nk = kpts.shape[1] # number of keypoints
shape = batch['ori_shape'][si]
correct_kpts = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
self.seen += 1
if npr == 0:
if nl:
self.stats.append((correct_bboxes, correct_kpts, *torch.zeros(
(2, 0), device=self.device), cls.squeeze(-1)))
if self.args.plots:
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
continue
# Predictions
if self.args.single_cls:
pred[:, 5] = 0
predn = pred.clone()
ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape,
ratio_pad=batch['ratio_pad'][si]) # native-space pred
pred_kpts = predn[:, 6:].view(npr, nk, -1)
ops.scale_coords(batch['img'][si].shape[1:], pred_kpts, shape, ratio_pad=batch['ratio_pad'][si])
# Evaluate
if nl:
height, width = batch['img'].shape[2:]
tbox = ops.xywh2xyxy(bbox) * torch.tensor(
(width, height, width, height), device=self.device) # target boxes
ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape,
ratio_pad=batch['ratio_pad'][si]) # native-space labels
tkpts = kpts.clone()
tkpts[..., 0] *= width
tkpts[..., 1] *= height
tkpts = ops.scale_coords(batch['img'][si].shape[1:], tkpts, shape, ratio_pad=batch['ratio_pad'][si])
labelsn = torch.cat((cls, tbox), 1) # native-space labels
correct_bboxes = self._process_batch(predn[:, :6], labelsn)
correct_kpts = self._process_batch(predn[:, :6], labelsn, pred_kpts, tkpts)
if self.args.plots:
self.confusion_matrix.process_batch(predn, labelsn)
# Append correct_masks, correct_boxes, pconf, pcls, tcls
self.stats.append((correct_bboxes, correct_kpts, pred[:, 4], pred[:, 5], cls.squeeze(-1)))
# Save
if self.args.save_json:
self.pred_to_json(predn, batch['im_file'][si])
# if self.args.save_txt:
# save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt')
def _process_batch(self, detections, labels, pred_kpts=None, gt_kpts=None):
"""
Return correct prediction matrix
Arguments:
detections (array[N, 6]), x1, y1, x2, y2, conf, class
labels (array[M, 5]), class, x1, y1, x2, y2
pred_kpts (array[N, 51]), 51 = 17 * 3
gt_kpts (array[N, 51])
Returns:
correct (array[N, 10]), for 10 IoU levels
"""
if pred_kpts is not None and gt_kpts is not None:
# `0.53` is from https://github.com/jin-s13/xtcocoapi/blob/master/xtcocotools/cocoeval.py#L384
area = ops.xyxy2xywh(labels[:, 1:])[:, 2:].prod(1) * 0.53
iou = kpt_iou(gt_kpts, pred_kpts, sigma=self.sigma, area=area)
else: # boxes
iou = box_iou(labels[:, 1:], detections[:, :4])
correct = np.zeros((detections.shape[0], self.iouv.shape[0])).astype(bool)
correct_class = labels[:, 0:1] == detections[:, 5]
for i in range(len(self.iouv)):
x = torch.where((iou >= self.iouv[i]) & correct_class) # IoU > threshold and classes match
if x[0].shape[0]:
matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]),
1).cpu().numpy() # [label, detect, iou]
if x[0].shape[0] > 1:
matches = matches[matches[:, 2].argsort()[::-1]]
matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
# matches = matches[matches[:, 2].argsort()[::-1]]
matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
correct[matches[:, 1].astype(int), i] = True
return torch.tensor(correct, dtype=torch.bool, device=detections.device)
def plot_val_samples(self, batch, ni):
plot_images(batch['img'],
batch['batch_idx'],
batch['cls'].squeeze(-1),
batch['bboxes'],
kpts=batch['keypoints'],
paths=batch['im_file'],
fname=self.save_dir / f'val_batch{ni}_labels.jpg',
names=self.names)
def plot_predictions(self, batch, preds, ni):
pred_kpts = torch.cat([p[:, 6:].view(-1, *self.kpt_shape)[:15] for p in preds], 0)
plot_images(batch['img'],
*output_to_target(preds, max_det=15),
kpts=pred_kpts,
paths=batch['im_file'],
fname=self.save_dir / f'val_batch{ni}_pred.jpg',
names=self.names) # pred
def pred_to_json(self, predn, filename):
stem = Path(filename).stem
image_id = int(stem) if stem.isnumeric() else stem
box = ops.xyxy2xywh(predn[:, :4]) # xywh
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
for p, b in zip(predn.tolist(), box.tolist()):
self.jdict.append({
'image_id': image_id,
'category_id': self.class_map[int(p[5])],
'bbox': [round(x, 3) for x in b],
'keypoints': p[6:],
'score': round(p[4], 5)})
def eval_json(self, stats):
if self.args.save_json and self.is_coco and len(self.jdict):
anno_json = self.data['path'] / 'annotations/person_keypoints_val2017.json' # annotations
pred_json = self.save_dir / 'predictions.json' # predictions
LOGGER.info(f'\nEvaluating pycocotools mAP using {pred_json} and {anno_json}...')
try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
check_requirements('pycocotools>=2.0.6')
from pycocotools.coco import COCO # noqa
from pycocotools.cocoeval import COCOeval # noqa
for x in anno_json, pred_json:
assert x.is_file(), f'{x} file not found'
anno = COCO(str(anno_json)) # init annotations api
pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path)
for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'keypoints')]):
if self.is_coco:
eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files] # im to eval
eval.evaluate()
eval.accumulate()
eval.summarize()
idx = i * 4 + 2
stats[self.metrics.keys[idx + 1]], stats[
self.metrics.keys[idx]] = eval.stats[:2] # update mAP50-95 and mAP50
except Exception as e:
LOGGER.warning(f'pycocotools unable to run: {e}')
return stats
def val(cfg=DEFAULT_CFG, use_python=False):
model = cfg.model or 'yolov8n-pose.pt'
data = cfg.data or 'coco128-pose.yaml'
args = dict(model=model, data=data)
if use_python:
from ultralytics import YOLO
YOLO(model).val(**args)
else:
validator = PoseValidator(args=args)
validator(model=args['model'])
if __name__ == '__main__':
val()

@ -65,7 +65,7 @@ class SegmentationValidator(DetectionValidator):
if npr == 0: if npr == 0:
if nl: if nl:
self.stats.append((correct_masks, correct_bboxes, *torch.zeros( self.stats.append((correct_bboxes, correct_masks, *torch.zeros(
(2, 0), device=self.device), cls.squeeze(-1))) (2, 0), device=self.device), cls.squeeze(-1)))
if self.args.plots: if self.args.plots:
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1)) self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
@ -103,7 +103,7 @@ class SegmentationValidator(DetectionValidator):
self.confusion_matrix.process_batch(predn, labelsn) self.confusion_matrix.process_batch(predn, labelsn)
# Append correct_masks, correct_boxes, pconf, pcls, tcls # Append correct_masks, correct_boxes, pconf, pcls, tcls
self.stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], cls.squeeze(-1))) self.stats.append((correct_bboxes, correct_masks, pred[:, 4], pred[:, 5], cls.squeeze(-1)))
pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8) pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
if self.args.plots and self.batch_i < 3: if self.args.plots and self.batch_i < 3:
@ -220,8 +220,7 @@ class SegmentationValidator(DetectionValidator):
pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path) pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path)
for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm')]): for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm')]):
if self.is_coco: if self.is_coco:
eval.params.imgIds = [int(Path(x).stem) eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files] # im to eval
for x in self.dataloader.dataset.im_files] # images to eval
eval.evaluate() eval.evaluate()
eval.accumulate() eval.accumulate()
eval.summarize() eval.summarize()

Loading…
Cancel
Save