ultralytics 8.0.65
YOLOv8 Pose models (#1347)
Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Mert Can Demir <validatedev@gmail.com> Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com> Co-authored-by: Fabian Greavu <fabiangreavu@gmail.com> Co-authored-by: Yonghye Kwon <developer.0hye@gmail.com> Co-authored-by: Eric Pedley <ericpedley@gmail.com> Co-authored-by: JustasBart <40023722+JustasBart@users.noreply.github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Aarni Koskela <akx@iki.fi> Co-authored-by: Sergio Sanchez <sergio.ssm.97@gmail.com> Co-authored-by: Bogdan Gheorghe <112427971+bogdan-galileo@users.noreply.github.com> Co-authored-by: Jaap van de Loosdrecht <jaap@vdlmv.nl> Co-authored-by: Noobtoss <96134731+Noobtoss@users.noreply.github.com> Co-authored-by: nerdyespresso <106761627+nerdyespresso@users.noreply.github.com> Co-authored-by: Farid Inawan <frdteknikelektro@gmail.com> Co-authored-by: Laughing-q <1185102784@qq.com> Co-authored-by: Alexander Duda <Alexander.Duda@me.com> Co-authored-by: Mehran Ghandehari <mehran.maps@gmail.com> Co-authored-by: Snyk bot <snyk-bot@snyk.io> Co-authored-by: majid nasiri <majnasai@gmail.com>
This commit is contained in:
@ -1,6 +1,6 @@
|
||||
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||
|
||||
__version__ = '8.0.65'
|
||||
__version__ = '8.0.66'
|
||||
|
||||
from ultralytics.hub import start
|
||||
from ultralytics.yolo.engine.model import YOLO
|
||||
|
38
ultralytics/datasets/coco-pose.yaml
Normal file
38
ultralytics/datasets/coco-pose.yaml
Normal file
@ -0,0 +1,38 @@
|
||||
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||
# COCO 2017 dataset http://cocodataset.org by Microsoft
|
||||
# Example usage: yolo train data=coco-pose.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── coco-pose ← downloads here (20.1 GB)
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/coco-pose # dataset root dir
|
||||
train: train2017.txt # train images (relative to 'path') 118287 images
|
||||
val: val2017.txt # val images (relative to 'path') 5000 images
|
||||
test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
|
||||
|
||||
# Keypoints
|
||||
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
||||
flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: person
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: |
|
||||
from ultralytics.yolo.utils.downloads import download
|
||||
from pathlib import Path
|
||||
|
||||
# Download labels
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
|
||||
urls = [url + 'coco2017labels-pose.zip'] # labels
|
||||
download(urls, dir=dir.parent)
|
||||
# Download data
|
||||
urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
|
||||
'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
|
||||
'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
|
||||
download(urls, dir=dir / 'images', threads=3)
|
25
ultralytics/datasets/coco8-pose.yaml
Normal file
25
ultralytics/datasets/coco8-pose.yaml
Normal file
@ -0,0 +1,25 @@
|
||||
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||
# COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics
|
||||
# Example usage: yolo train data=coco8-pose.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── coco8-pose ← downloads here (1 MB)
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/coco8-pose # dataset root dir
|
||||
train: images/train # train images (relative to 'path') 4 images
|
||||
val: images/val # val images (relative to 'path') 4 images
|
||||
test: # test images (optional)
|
||||
|
||||
# Keypoints
|
||||
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
||||
flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: person
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://ultralytics.com/assets/coco8-pose.zip
|
@ -44,13 +44,14 @@ Any of these models can be used by loading their configs or pretrained checkpoin
|
||||
|
||||
### 1. YOLOv8
|
||||
|
||||
**About** - Cutting edge Detection, Segmentation and Classification models developed by Ultralytics. </br>
|
||||
**About** - Cutting edge Detection, Segmentation, Classification and Pose models developed by Ultralytics. </br>
|
||||
|
||||
Available Models:
|
||||
|
||||
- Detection - `yolov8n`, `yolov8s`, `yolov8m`, `yolov8l`, `yolov8x`
|
||||
- Instance Segmentation - `yolov8n-seg`, `yolov8s-seg`, `yolov8m-seg`, `yolov8l-seg`, `yolov8x-seg`
|
||||
- Classification - `yolov8n-cls`, `yolov8s-cls`, `yolov8m-cls`, `yolov8l-cls`, `yolov8x-cls`
|
||||
- Pose - `yolov8n-pose`, `yolov8s-pose`, `yolov8m-pose`, `yolov8l-pose`, `yolov8x-pose`, `yolov8x-pose-p6`
|
||||
|
||||
<details><summary>Performance</summary>
|
||||
|
||||
@ -84,6 +85,17 @@ Available Models:
|
||||
| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 |
|
||||
| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 |
|
||||
|
||||
### Pose
|
||||
|
||||
| Model | size<br><sup>(pixels) | mAP<sup>box<br>50-95 | mAP<sup>pose<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>A100 TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |
|
||||
| ---------------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
|
||||
| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | - | 49.7 | - | - | 3.3 | 9.2 |
|
||||
| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | - | 59.2 | - | - | 11.6 | 30.2 |
|
||||
| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | - | 63.6 | - | - | 26.4 | 81.0 |
|
||||
| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | - | 67.0 | - | - | 44.4 | 168.6 |
|
||||
| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | - | 68.9 | - | - | 69.4 | 263.2 |
|
||||
| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | - | 71.5 | - | - | 99.1 | 1066.4 |
|
||||
|
||||
</details>
|
||||
|
||||
### 2. YOLOv5u
|
||||
|
57
ultralytics/models/v8/yolov8-pose-p6.yaml
Normal file
57
ultralytics/models/v8/yolov8-pose-p6.yaml
Normal file
@ -0,0 +1,57 @@
|
||||
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||
# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
||||
|
||||
# Parameters
|
||||
nc: 1 # number of classes
|
||||
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
s: [0.33, 0.50, 1024]
|
||||
m: [0.67, 0.75, 768]
|
||||
l: [1.00, 1.00, 512]
|
||||
x: [1.00, 1.25, 512]
|
||||
|
||||
# YOLOv8.0x6 backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [768, True]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 11
|
||||
|
||||
# YOLOv8.0x6 head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
||||
- [[-1, 8], 1, Concat, [1]] # cat backbone P5
|
||||
- [-1, 3, C2, [768, False]] # 14
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2, [512, False]] # 17
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2, [256, False]] # 20 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 17], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
|
||||
|
||||
- [-1, 1, Conv, [512, 3, 2]]
|
||||
- [[-1, 14], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2, [768, False]] # 26 (P5/32-large)
|
||||
|
||||
- [-1, 1, Conv, [768, 3, 2]]
|
||||
- [[-1, 11], 1, Concat, [1]] # cat head P6
|
||||
- [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
|
||||
|
||||
- [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6)
|
47
ultralytics/models/v8/yolov8-pose.yaml
Normal file
47
ultralytics/models/v8/yolov8-pose.yaml
Normal file
@ -0,0 +1,47 @@
|
||||
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||
# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
|
||||
|
||||
# Parameters
|
||||
nc: 1 # number of classes
|
||||
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
s: [0.33, 0.50, 1024]
|
||||
m: [0.67, 0.75, 768]
|
||||
l: [1.00, 1.00, 512]
|
||||
x: [1.00, 1.25, 512]
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2f, [512]] # 12
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 15 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 12], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2f, [512]] # 18 (P4/16-medium)
|
||||
|
||||
- [-1, 1, Conv, [512, 3, 2]]
|
||||
- [[-1, 9], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2f, [1024]] # 21 (P5/32-large)
|
||||
|
||||
- [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5)
|
@ -91,8 +91,10 @@ class AutoBackend(nn.Module):
|
||||
if nn_module:
|
||||
model = weights.to(device)
|
||||
model = model.fuse(verbose=verbose) if fuse else model
|
||||
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
||||
if hasattr(model, 'kpt_shape'):
|
||||
kpt_shape = model.kpt_shape # pose-only
|
||||
stride = max(int(model.stride.max()), 32) # model stride
|
||||
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
||||
model.half() if fp16 else model.float()
|
||||
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
|
||||
pt = True
|
||||
@ -102,6 +104,8 @@ class AutoBackend(nn.Module):
|
||||
device=device,
|
||||
inplace=True,
|
||||
fuse=fuse)
|
||||
if hasattr(model, 'kpt_shape'):
|
||||
kpt_shape = model.kpt_shape # pose-only
|
||||
stride = max(int(model.stride.max()), 32) # model stride
|
||||
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
||||
model.half() if fp16 else model.float()
|
||||
@ -268,13 +272,14 @@ class AutoBackend(nn.Module):
|
||||
for k, v in metadata.items():
|
||||
if k in ('stride', 'batch'):
|
||||
metadata[k] = int(v)
|
||||
elif k in ('imgsz', 'names') and isinstance(v, str):
|
||||
elif k in ('imgsz', 'names', 'kpt_shape') and isinstance(v, str):
|
||||
metadata[k] = eval(v)
|
||||
stride = metadata['stride']
|
||||
task = metadata['task']
|
||||
batch = metadata['batch']
|
||||
imgsz = metadata['imgsz']
|
||||
names = metadata['names']
|
||||
kpt_shape = metadata.get('kpt_shape')
|
||||
elif not (pt or triton or nn_module):
|
||||
LOGGER.warning(f"WARNING ⚠️ Metadata not found for 'model={weights}'")
|
||||
|
||||
|
@ -378,7 +378,9 @@ class Ensemble(nn.ModuleList):
|
||||
return y, None # inference, train output
|
||||
|
||||
|
||||
# heads
|
||||
# Model heads below ----------------------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
class Detect(nn.Module):
|
||||
# YOLOv8 Detect head for detection models
|
||||
dynamic = False # force grid reconstruction
|
||||
@ -394,7 +396,6 @@ class Detect(nn.Module):
|
||||
self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
|
||||
self.no = nc + self.reg_max * 4 # number of outputs per anchor
|
||||
self.stride = torch.zeros(self.nl) # strides computed during build
|
||||
|
||||
c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc) # channels
|
||||
self.cv2 = nn.ModuleList(
|
||||
nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
|
||||
@ -454,6 +455,36 @@ class Segment(Detect):
|
||||
return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
|
||||
|
||||
|
||||
class Pose(Detect):
|
||||
# YOLOv8 Pose head for keypoints models
|
||||
def __init__(self, nc=80, kpt_shape=(17, 3), ch=()):
|
||||
super().__init__(nc, ch)
|
||||
self.kpt_shape = kpt_shape # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
||||
self.nk = kpt_shape[0] * kpt_shape[1] # number of keypoints total
|
||||
self.detect = Detect.forward
|
||||
|
||||
c4 = max(ch[0] // 4, self.nk)
|
||||
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)
|
||||
|
||||
def forward(self, x):
|
||||
bs = x[0].shape[0] # batch size
|
||||
kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w)
|
||||
x = self.detect(self, x)
|
||||
if self.training:
|
||||
return x, kpt
|
||||
pred_kpt = self.kpts_decode(kpt)
|
||||
return torch.cat([x, pred_kpt], 1) if self.export else (torch.cat([x[0], pred_kpt], 1), (x[1], kpt))
|
||||
|
||||
def kpts_decode(self, kpts):
|
||||
ndim = self.kpt_shape[1]
|
||||
y = kpts.clone()
|
||||
if ndim == 3:
|
||||
y[:, 2::3].sigmoid_() # inplace sigmoid
|
||||
y[:, 0::ndim] = (y[:, 0::ndim] * 2.0 + (self.anchors[0] - 0.5)) * self.strides
|
||||
y[:, 1::ndim] = (y[:, 1::ndim] * 2.0 + (self.anchors[1] - 0.5)) * self.strides
|
||||
return y
|
||||
|
||||
|
||||
class Classify(nn.Module):
|
||||
# YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
|
@ -10,7 +10,7 @@ import torch.nn as nn
|
||||
|
||||
from ultralytics.nn.modules import (C1, C2, C3, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, Classify,
|
||||
Concat, Conv, ConvTranspose, Detect, DWConv, DWConvTranspose2d, Ensemble, Focus,
|
||||
GhostBottleneck, GhostConv, Segment)
|
||||
GhostBottleneck, GhostConv, Pose, Segment)
|
||||
from ultralytics.yolo.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load
|
||||
from ultralytics.yolo.utils.checks import check_requirements, check_suffix, check_yaml
|
||||
from ultralytics.yolo.utils.torch_utils import (fuse_conv_and_bn, fuse_deconv_and_bn, initialize_weights,
|
||||
@ -183,10 +183,10 @@ class DetectionModel(BaseModel):
|
||||
|
||||
# Build strides
|
||||
m = self.model[-1] # Detect()
|
||||
if isinstance(m, (Detect, Segment)):
|
||||
if isinstance(m, (Detect, Segment, Pose)):
|
||||
s = 256 # 2x min stride
|
||||
m.inplace = self.inplace
|
||||
forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
|
||||
forward = lambda x: self.forward(x)[0] if isinstance(m, (Segment, Pose)) else self.forward(x)
|
||||
m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward
|
||||
self.stride = m.stride
|
||||
m.bias_init() # only run once
|
||||
@ -242,12 +242,23 @@ class DetectionModel(BaseModel):
|
||||
class SegmentationModel(DetectionModel):
|
||||
# YOLOv8 segmentation model
|
||||
def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True):
|
||||
super().__init__(cfg, ch, nc, verbose)
|
||||
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
|
||||
|
||||
def _forward_augment(self, x):
|
||||
raise NotImplementedError(emojis('WARNING ⚠️ SegmentationModel has not supported augment inference yet!'))
|
||||
|
||||
|
||||
class PoseModel(DetectionModel):
|
||||
# YOLOv8 pose model
|
||||
def __init__(self, cfg='yolov8n-pose.yaml', ch=3, nc=None, data_kpt_shape=(None, None), verbose=True):
|
||||
if not isinstance(cfg, dict):
|
||||
cfg = yaml_model_load(cfg) # load model YAML
|
||||
if any(data_kpt_shape) and list(data_kpt_shape) != list(cfg['kpt_shape']):
|
||||
LOGGER.info(f"Overriding model.yaml kpt_shape={cfg['kpt_shape']} with kpt_shape={data_kpt_shape}")
|
||||
cfg['kpt_shape'] = data_kpt_shape
|
||||
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
|
||||
|
||||
|
||||
class ClassificationModel(BaseModel):
|
||||
# YOLOv8 classification model
|
||||
def __init__(self,
|
||||
@ -425,7 +436,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
|
||||
# Args
|
||||
max_channels = float('inf')
|
||||
nc, act, scales = (d.get(x) for x in ('nc', 'act', 'scales'))
|
||||
depth, width = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple'))
|
||||
depth, width, kpt_shape = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple', 'kpt_shape'))
|
||||
if scales:
|
||||
scale = d.get('scale')
|
||||
if not scale:
|
||||
@ -464,7 +475,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
|
||||
args = [ch[f]]
|
||||
elif m is Concat:
|
||||
c2 = sum(ch[x] for x in f)
|
||||
elif m in (Detect, Segment):
|
||||
elif m in (Detect, Segment, Pose):
|
||||
args.append([ch[x] for x in f])
|
||||
if m is Segment:
|
||||
args[2] = make_divisible(min(args[2], max_channels) * width, 8)
|
||||
@ -543,6 +554,8 @@ def guess_model_task(model):
|
||||
return 'detect'
|
||||
if m == 'segment':
|
||||
return 'segment'
|
||||
if m == 'pose':
|
||||
return 'pose'
|
||||
|
||||
# Guess from model cfg
|
||||
if isinstance(model, dict):
|
||||
@ -565,6 +578,8 @@ def guess_model_task(model):
|
||||
return 'segment'
|
||||
elif isinstance(m, Classify):
|
||||
return 'classify'
|
||||
elif isinstance(m, Pose):
|
||||
return 'pose'
|
||||
|
||||
# Guess from model filename
|
||||
if isinstance(model, (str, Path)):
|
||||
@ -573,10 +588,12 @@ def guess_model_task(model):
|
||||
return 'segment'
|
||||
elif '-cls' in model.stem or 'classify' in model.parts:
|
||||
return 'classify'
|
||||
elif '-pose' in model.stem or 'pose' in model.parts:
|
||||
return 'pose'
|
||||
elif 'detect' in model.parts:
|
||||
return 'detect'
|
||||
|
||||
# Unable to determine task from model
|
||||
LOGGER.warning("WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. "
|
||||
"Explicitly define task for your model, i.e. 'task=detect', 'task=segment' or 'task=classify'.")
|
||||
"Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify', or 'pose'.")
|
||||
return 'detect' # assume detect
|
||||
|
@ -33,10 +33,9 @@ def on_predict_postprocess_end(predictor):
|
||||
tracks = predictor.trackers[i].update(det, im0s[i])
|
||||
if len(tracks) == 0:
|
||||
continue
|
||||
idx = tracks[:, -1].tolist()
|
||||
predictor.results[i] = predictor.results[i][idx]
|
||||
predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1]))
|
||||
if predictor.results[i].masks is not None:
|
||||
idx = tracks[:, -1].tolist()
|
||||
predictor.results[i].masks = predictor.results[i].masks[idx]
|
||||
|
||||
|
||||
def register_tracker(model):
|
||||
|
@ -18,13 +18,13 @@ TASKS = 'detect', 'segment', 'classify', 'pose'
|
||||
TASK2DATA = {
|
||||
'detect': 'coco128.yaml',
|
||||
'segment': 'coco128-seg.yaml',
|
||||
'pose': 'coco128-pose.yaml',
|
||||
'classify': 'imagenet100'}
|
||||
'classify': 'imagenet100',
|
||||
'pose': 'coco128-pose.yaml'}
|
||||
TASK2MODEL = {
|
||||
'detect': 'yolov8n.pt',
|
||||
'segment': 'yolov8n-seg.pt',
|
||||
'pose': 'yolov8n-pose.yaml',
|
||||
'classify': 'yolov8n-cls.pt'} # temp
|
||||
'classify': 'yolov8n-cls.pt',
|
||||
'pose': 'yolov8n-pose.yaml'}
|
||||
|
||||
CLI_HELP_MSG = \
|
||||
f"""
|
||||
|
@ -88,6 +88,8 @@ warmup_bias_lr: 0.1 # warmup initial bias lr
|
||||
box: 7.5 # box loss gain
|
||||
cls: 0.5 # cls loss gain (scale with pixels)
|
||||
dfl: 1.5 # dfl loss gain
|
||||
pose: 12.0 # pose loss gain
|
||||
kobj: 1.0 # keypoint obj loss gain
|
||||
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
|
||||
label_smoothing: 0.0 # label smoothing (fraction)
|
||||
nbs: 64 # nominal batch size
|
||||
|
@ -16,6 +16,8 @@ from ..utils.metrics import bbox_ioa
|
||||
from ..utils.ops import segment2box
|
||||
from .utils import polygons2masks, polygons2masks_overlap
|
||||
|
||||
POSE_FLIPLR_INDEX = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
|
||||
|
||||
|
||||
# TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
|
||||
class BaseTransform:
|
||||
@ -309,27 +311,22 @@ class RandomPerspective:
|
||||
"""apply affine to keypoints.
|
||||
|
||||
Args:
|
||||
keypoints(ndarray): keypoints, [N, 17, 2].
|
||||
keypoints(ndarray): keypoints, [N, 17, 3].
|
||||
M(ndarray): affine matrix.
|
||||
Return:
|
||||
new_keypoints(ndarray): keypoints after affine, [N, 17, 2].
|
||||
new_keypoints(ndarray): keypoints after affine, [N, 17, 3].
|
||||
"""
|
||||
n = len(keypoints)
|
||||
n, nkpt = keypoints.shape[:2]
|
||||
if n == 0:
|
||||
return keypoints
|
||||
new_keypoints = np.ones((n * 17, 3))
|
||||
new_keypoints[:, :2] = keypoints.reshape(n * 17, 2) # num_kpt is hardcoded to 17
|
||||
new_keypoints = new_keypoints @ M.T # transform
|
||||
new_keypoints = (new_keypoints[:, :2] / new_keypoints[:, 2:3]).reshape(n, 34) # perspective rescale or affine
|
||||
new_keypoints[keypoints.reshape(-1, 34) == 0] = 0
|
||||
x_kpts = new_keypoints[:, list(range(0, 34, 2))]
|
||||
y_kpts = new_keypoints[:, list(range(1, 34, 2))]
|
||||
|
||||
x_kpts[np.logical_or.reduce((x_kpts < 0, x_kpts > self.size[0], y_kpts < 0, y_kpts > self.size[1]))] = 0
|
||||
y_kpts[np.logical_or.reduce((x_kpts < 0, x_kpts > self.size[0], y_kpts < 0, y_kpts > self.size[1]))] = 0
|
||||
new_keypoints[:, list(range(0, 34, 2))] = x_kpts
|
||||
new_keypoints[:, list(range(1, 34, 2))] = y_kpts
|
||||
return new_keypoints.reshape(n, 17, 2)
|
||||
xy = np.ones((n * nkpt, 3))
|
||||
visible = keypoints[..., 2].reshape(n * nkpt, 1)
|
||||
xy[:, :2] = keypoints[..., :2].reshape(n * nkpt, 2)
|
||||
xy = xy @ M.T # transform
|
||||
xy = xy[:, :2] / xy[:, 2:3] # perspective rescale or affine
|
||||
out_mask = (xy[:, 0] < 0) | (xy[:, 1] < 0) | (xy[:, 0] > self.size[0]) | (xy[:, 1] > self.size[1])
|
||||
visible[out_mask] = 0
|
||||
return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3)
|
||||
|
||||
def __call__(self, labels):
|
||||
"""
|
||||
@ -415,12 +412,13 @@ class RandomHSV:
|
||||
|
||||
class RandomFlip:
|
||||
|
||||
def __init__(self, p=0.5, direction='horizontal') -> None:
|
||||
def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
|
||||
assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
|
||||
assert 0 <= p <= 1.0
|
||||
|
||||
self.p = p
|
||||
self.direction = direction
|
||||
self.flip_idx = flip_idx
|
||||
|
||||
def __call__(self, labels):
|
||||
img = labels['img']
|
||||
@ -437,6 +435,9 @@ class RandomFlip:
|
||||
if self.direction == 'horizontal' and random.random() < self.p:
|
||||
img = np.fliplr(img)
|
||||
instances.fliplr(w)
|
||||
# for keypoints
|
||||
if self.flip_idx is not None and instances.keypoints is not None:
|
||||
instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
|
||||
labels['img'] = np.ascontiguousarray(img)
|
||||
labels['instances'] = instances
|
||||
return labels
|
||||
@ -633,7 +634,7 @@ class Format:
|
||||
labels['cls'] = torch.from_numpy(cls) if nl else torch.zeros(nl)
|
||||
labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
|
||||
if self.return_keypoint:
|
||||
labels['keypoints'] = torch.from_numpy(instances.keypoints) if nl else torch.zeros((nl, 17, 2))
|
||||
labels['keypoints'] = torch.from_numpy(instances.keypoints)
|
||||
# then we can use collate_fn
|
||||
if self.batch_idx:
|
||||
labels['batch_idx'] = torch.zeros(nl)
|
||||
@ -672,13 +673,17 @@ def v8_transforms(dataset, imgsz, hyp):
|
||||
perspective=hyp.perspective,
|
||||
pre_transform=LetterBox(new_shape=(imgsz, imgsz)),
|
||||
)])
|
||||
flip_idx = dataset.data.get('flip_idx', None) # for keypoints augmentation
|
||||
if dataset.use_keypoints and flip_idx is None and hyp.fliplr > 0.0:
|
||||
hyp.fliplr = 0.0
|
||||
LOGGER.warning("WARNING ⚠️ No `flip_idx` provided while training keypoints, setting augmentation 'fliplr=0.0'")
|
||||
return Compose([
|
||||
pre_transform,
|
||||
MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup),
|
||||
Albumentations(p=1.0),
|
||||
RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
|
||||
RandomFlip(direction='vertical', p=hyp.flipud),
|
||||
RandomFlip(direction='horizontal', p=hyp.fliplr)]) # transforms
|
||||
RandomFlip(direction='horizontal', p=hyp.fliplr, flip_idx=flip_idx)]) # transforms
|
||||
|
||||
|
||||
# Classification augmentations -----------------------------------------------------------------------------------------
|
||||
|
@ -61,7 +61,7 @@ def seed_worker(worker_id): # noqa
|
||||
random.seed(worker_seed)
|
||||
|
||||
|
||||
def build_dataloader(cfg, batch, img_path, stride=32, rect=False, names=None, rank=-1, mode='train'):
|
||||
def build_dataloader(cfg, batch, img_path, data_info, stride=32, rect=False, rank=-1, mode='train'):
|
||||
assert mode in ['train', 'val']
|
||||
shuffle = mode == 'train'
|
||||
if cfg.rect and shuffle:
|
||||
@ -81,9 +81,9 @@ def build_dataloader(cfg, batch, img_path, stride=32, rect=False, names=None, ra
|
||||
pad=0.0 if mode == 'train' else 0.5,
|
||||
prefix=colorstr(f'{mode}: '),
|
||||
use_segments=cfg.task == 'segment',
|
||||
use_keypoints=cfg.task == 'keypoint',
|
||||
names=names,
|
||||
classes=cfg.classes)
|
||||
use_keypoints=cfg.task == 'pose',
|
||||
classes=cfg.classes,
|
||||
data=data_info)
|
||||
|
||||
batch = min(batch, len(dataset))
|
||||
nd = torch.cuda.device_count() # number of CUDA devices
|
||||
|
@ -57,11 +57,11 @@ class YOLODataset(BaseDataset):
|
||||
single_cls=False,
|
||||
use_segments=False,
|
||||
use_keypoints=False,
|
||||
names=None,
|
||||
data=None,
|
||||
classes=None):
|
||||
self.use_segments = use_segments
|
||||
self.use_keypoints = use_keypoints
|
||||
self.names = names
|
||||
self.data = data
|
||||
assert not (self.use_segments and self.use_keypoints), 'Can not use both segments and keypoints.'
|
||||
super().__init__(img_path, imgsz, cache, augment, hyp, prefix, rect, batch_size, stride, pad, single_cls,
|
||||
classes)
|
||||
@ -77,10 +77,16 @@ class YOLODataset(BaseDataset):
|
||||
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
|
||||
desc = f'{self.prefix}Scanning {path.parent / path.stem}...'
|
||||
total = len(self.im_files)
|
||||
nc = len(self.data['names'])
|
||||
nkpt, ndim = self.data.get('kpt_shape', (0, 0))
|
||||
if self.use_keypoints and (nkpt <= 0 or ndim not in (2, 3)):
|
||||
raise ValueError("'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of "
|
||||
"keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'")
|
||||
with ThreadPool(NUM_THREADS) as pool:
|
||||
results = pool.imap(func=verify_image_label,
|
||||
iterable=zip(self.im_files, self.label_files, repeat(self.prefix),
|
||||
repeat(self.use_keypoints), repeat(len(self.names))))
|
||||
repeat(self.use_keypoints), repeat(len(self.data['names'])), repeat(nkpt),
|
||||
repeat(ndim)))
|
||||
pbar = tqdm(results, desc=desc, total=total, bar_format=TQDM_BAR_FORMAT)
|
||||
for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar:
|
||||
nm += nm_f
|
||||
|
@ -6,10 +6,10 @@ import json
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
import zipfile
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from pathlib import Path
|
||||
from tarfile import is_tarfile
|
||||
from zipfile import is_zipfile
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@ -61,7 +61,7 @@ def exif_size(img):
|
||||
|
||||
def verify_image_label(args):
|
||||
# Verify one image-label pair
|
||||
im_file, lb_file, prefix, keypoint, num_cls = args
|
||||
im_file, lb_file, prefix, keypoint, num_cls, nkpt, ndim = args
|
||||
# number (missing, found, empty, corrupt), message, segments, keypoints
|
||||
nm, nf, ne, nc, msg, segments, keypoints = 0, 0, 0, 0, '', [], None
|
||||
try:
|
||||
@ -92,25 +92,19 @@ def verify_image_label(args):
|
||||
nl = len(lb)
|
||||
if nl:
|
||||
if keypoint:
|
||||
assert lb.shape[1] == 56, 'labels require 56 columns each'
|
||||
assert (lb[:, 5::3] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
|
||||
assert (lb[:, 6::3] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
|
||||
kpts = np.zeros((lb.shape[0], 39))
|
||||
for i in range(len(lb)):
|
||||
kpt = np.delete(lb[i, 5:], np.arange(2, lb.shape[1] - 5, 3)) # remove occlusion param from GT
|
||||
kpts[i] = np.hstack((lb[i, :5], kpt))
|
||||
lb = kpts
|
||||
assert lb.shape[1] == 39, 'labels require 39 columns each after removing occlusion parameter'
|
||||
assert lb.shape[1] == (5 + nkpt * ndim), f'labels require {(5 + nkpt * ndim)} columns each'
|
||||
assert (lb[:, 5::ndim] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
|
||||
assert (lb[:, 6::ndim] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
|
||||
else:
|
||||
assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected'
|
||||
assert (lb[:, 1:] <= 1).all(), \
|
||||
f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}'
|
||||
assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}'
|
||||
# All labels
|
||||
max_cls = int(lb[:, 0].max()) # max label count
|
||||
assert max_cls <= num_cls, \
|
||||
f'Label class {max_cls} exceeds dataset class count {num_cls}. ' \
|
||||
f'Possible class labels are 0-{num_cls - 1}'
|
||||
assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}'
|
||||
_, i = np.unique(lb, axis=0, return_index=True)
|
||||
if len(i) < nl: # duplicate row check
|
||||
lb = lb[i] # remove duplicates
|
||||
@ -119,12 +113,18 @@ def verify_image_label(args):
|
||||
msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed'
|
||||
else:
|
||||
ne = 1 # label empty
|
||||
lb = np.zeros((0, 39), dtype=np.float32) if keypoint else np.zeros((0, 5), dtype=np.float32)
|
||||
lb = np.zeros((0, (5 + nkpt * ndim)), dtype=np.float32) if keypoint else np.zeros(
|
||||
(0, 5), dtype=np.float32)
|
||||
else:
|
||||
nm = 1 # label missing
|
||||
lb = np.zeros((0, 39), dtype=np.float32) if keypoint else np.zeros((0, 5), dtype=np.float32)
|
||||
lb = np.zeros((0, (5 + nkpt * ndim)), dtype=np.float32) if keypoint else np.zeros((0, 5), dtype=np.float32)
|
||||
if keypoint:
|
||||
keypoints = lb[:, 5:].reshape(-1, 17, 2)
|
||||
keypoints = lb[:, 5:].reshape(-1, nkpt, ndim)
|
||||
if ndim == 2:
|
||||
kpt_mask = np.ones(keypoints.shape[:2], dtype=np.float32)
|
||||
kpt_mask = np.where(keypoints[..., 0] < 0, 0.0, kpt_mask)
|
||||
kpt_mask = np.where(keypoints[..., 1] < 0, 0.0, kpt_mask)
|
||||
keypoints = np.concatenate([keypoints, kpt_mask[..., None]], axis=-1) # (nl, nkpt, 3)
|
||||
lb = lb[:, :5]
|
||||
return im_file, lb, shape, segments, keypoints, nm, nf, ne, nc, msg
|
||||
except Exception as e:
|
||||
@ -195,7 +195,7 @@ def check_det_dataset(dataset, autodownload=True):
|
||||
|
||||
# Download (optional)
|
||||
extract_dir = ''
|
||||
if isinstance(data, (str, Path)) and (is_zipfile(data) or is_tarfile(data)):
|
||||
if isinstance(data, (str, Path)) and (zipfile.is_zipfile(data) or is_tarfile(data)):
|
||||
new_dir = safe_download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False)
|
||||
data = next((DATASETS_DIR / new_dir).rglob('*.yaml'))
|
||||
extract_dir, autodownload = data.parent, False
|
||||
@ -356,23 +356,8 @@ class HUBDatasetStats():
|
||||
assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/'
|
||||
return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path
|
||||
|
||||
def _hub_ops(self, f, max_dim=1920):
|
||||
# HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing
|
||||
f_new = self.im_dir / Path(f).name # dataset-hub image filename
|
||||
try: # use PIL
|
||||
im = Image.open(f)
|
||||
r = max_dim / max(im.height, im.width) # ratio
|
||||
if r < 1.0: # image too large
|
||||
im = im.resize((int(im.width * r), int(im.height * r)))
|
||||
im.save(f_new, 'JPEG', quality=50, optimize=True) # save
|
||||
except Exception as e: # use OpenCV
|
||||
LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}')
|
||||
im = cv2.imread(f)
|
||||
im_height, im_width = im.shape[:2]
|
||||
r = max_dim / max(im_height, im_width) # ratio
|
||||
if r < 1.0: # image too large
|
||||
im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA)
|
||||
cv2.imwrite(str(f_new), im)
|
||||
def _hub_ops(self, f):
|
||||
compress_one_image(f, self.im_dir / Path(f).name) # save to dataset-hub
|
||||
|
||||
def get_json(self, save=False, verbose=False):
|
||||
# Return dataset JSON for Ultralytics HUB
|
||||
@ -426,3 +411,93 @@ class HUBDatasetStats():
|
||||
pass
|
||||
LOGGER.info(f'Done. All images saved to {self.im_dir}')
|
||||
return self.im_dir
|
||||
|
||||
|
||||
def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
|
||||
"""
|
||||
Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the
|
||||
Python Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will
|
||||
not be resized.
|
||||
|
||||
Args:
|
||||
f (str): The path to the input image file.
|
||||
f_new (str, optional): The path to the output image file. If not specified, the input file will be overwritten.
|
||||
max_dim (int, optional): The maximum dimension (width or height) of the output image. Default is 1920 pixels.
|
||||
quality (int, optional): The image compression quality as a percentage. Default is 50%.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Usage:
|
||||
from pathlib import Path
|
||||
from ultralytics.yolo.data.utils import compress_one_image
|
||||
for f in Path('/Users/glennjocher/Downloads/dataset').rglob('*.jpg'):
|
||||
compress_one_image(f)
|
||||
"""
|
||||
try: # use PIL
|
||||
im = Image.open(f)
|
||||
r = max_dim / max(im.height, im.width) # ratio
|
||||
if r < 1.0: # image too large
|
||||
im = im.resize((int(im.width * r), int(im.height * r)))
|
||||
im.save(f_new or f, 'JPEG', quality=quality, optimize=True) # save
|
||||
except Exception as e: # use OpenCV
|
||||
LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}')
|
||||
im = cv2.imread(f)
|
||||
im_height, im_width = im.shape[:2]
|
||||
r = max_dim / max(im_height, im_width) # ratio
|
||||
if r < 1.0: # image too large
|
||||
im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA)
|
||||
cv2.imwrite(str(f_new or f), im)
|
||||
|
||||
|
||||
def delete_dsstore(path):
|
||||
"""
|
||||
Deletes all ".DS_store" files under a specified directory.
|
||||
|
||||
Args:
|
||||
path (str, optional): The directory path where the ".DS_store" files should be deleted.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Usage:
|
||||
from ultralytics.yolo.data.utils import delete_dsstore
|
||||
delete_dsstore('/Users/glennjocher/Downloads/dataset')
|
||||
|
||||
Note:
|
||||
".DS_store" files are created by the Apple operating system and contain metadata about folders and files. They
|
||||
are hidden system files and can cause issues when transferring files between different operating systems.
|
||||
"""
|
||||
# Delete Apple .DS_store files
|
||||
files = list(Path(path).rglob('.DS_store'))
|
||||
LOGGER.info(f'Deleting *.DS_store files: {files}')
|
||||
for f in files:
|
||||
f.unlink()
|
||||
|
||||
|
||||
def zip_directory(dir, use_zipfile_library=True):
|
||||
"""Zips a directory and saves the archive to the specified output path.
|
||||
|
||||
Args:
|
||||
dir (str): The path to the directory to be zipped.
|
||||
use_zipfile_library (bool): Whether to use zipfile library or shutil for zipping.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Usage:
|
||||
from ultralytics.yolo.data.utils import zip_directory
|
||||
zip_directory('/Users/glennjocher/Downloads/playground')
|
||||
|
||||
zip -r coco8-pose.zip coco8-pose
|
||||
"""
|
||||
delete_dsstore(dir)
|
||||
if use_zipfile_library:
|
||||
dir = Path(dir)
|
||||
with zipfile.ZipFile(dir.with_suffix('.zip'), 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
||||
for file_path in dir.glob('**/*'):
|
||||
if file_path.is_file():
|
||||
zip_file.write(file_path, file_path.relative_to(dir))
|
||||
else:
|
||||
import shutil
|
||||
shutil.make_archive(dir, 'zip', dir)
|
||||
|
@ -209,8 +209,8 @@ class Exporter:
|
||||
self.file = file
|
||||
self.output_shape = tuple(y.shape) if isinstance(y, torch.Tensor) else tuple(tuple(x.shape) for x in y)
|
||||
self.pretty_name = Path(self.model.yaml.get('yaml_file', self.file)).stem.replace('yolo', 'YOLO')
|
||||
description = f'Ultralytics {self.pretty_name} model ' + f'trained on {Path(self.args.data).name}' \
|
||||
if self.args.data else '(untrained)'
|
||||
trained_on = f'trained on {Path(self.args.data).name}' if self.args.data else '(untrained)'
|
||||
description = f'Ultralytics {self.pretty_name} model {trained_on}'
|
||||
self.metadata = {
|
||||
'description': description,
|
||||
'author': 'Ultralytics',
|
||||
@ -221,6 +221,8 @@ class Exporter:
|
||||
'batch': self.args.batch,
|
||||
'imgsz': self.imgsz,
|
||||
'names': model.names} # model metadata
|
||||
if model.task == 'pose':
|
||||
self.metadata['kpt_shape'] = model.kpt_shape
|
||||
|
||||
LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} with input shape {tuple(im.shape)} BCHW and "
|
||||
f'output shape(s) {self.output_shape} ({file_size(file):.1f} MB)')
|
||||
@ -295,7 +297,8 @@ class Exporter:
|
||||
check_requirements(requirements)
|
||||
import onnx # noqa
|
||||
|
||||
LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...')
|
||||
opset_version = self.args.opset or get_latest_opset()
|
||||
LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__} opset {opset_version}...')
|
||||
f = str(self.file.with_suffix('.onnx'))
|
||||
|
||||
output_names = ['output0', 'output1'] if isinstance(self.model, SegmentationModel) else ['output0']
|
||||
@ -313,7 +316,7 @@ class Exporter:
|
||||
self.im.cpu() if dynamic else self.im,
|
||||
f,
|
||||
verbose=False,
|
||||
opset_version=self.args.opset or get_latest_opset(),
|
||||
opset_version=opset_version,
|
||||
do_constant_folding=True, # WARNING: DNN inference with torch>=1.12 may require do_constant_folding=False
|
||||
input_names=['images'],
|
||||
output_names=output_names,
|
||||
@ -377,7 +380,6 @@ class Exporter:
|
||||
yaml_save(Path(f) / 'metadata.yaml', self.metadata) # add metadata.yaml
|
||||
return f, None
|
||||
|
||||
@try_export
|
||||
def _export_coreml(self, prefix=colorstr('CoreML:')):
|
||||
# YOLOv8 CoreML export
|
||||
check_requirements('coremltools>=6.0')
|
||||
@ -410,8 +412,8 @@ class Exporter:
|
||||
model = self.model
|
||||
elif self.model.task == 'detect':
|
||||
model = iOSDetectModel(self.model, self.im) if self.args.nms else self.model
|
||||
elif self.model.task == 'segment':
|
||||
# TODO CoreML Segmentation model pipelining
|
||||
else:
|
||||
# TODO CoreML Segment and Pose model pipelining
|
||||
model = self.model
|
||||
|
||||
ts = torch.jit.trace(model.eval(), self.im, strict=False) # TorchScript model
|
||||
|
@ -5,8 +5,8 @@ from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
from ultralytics import yolo # noqa
|
||||
from ultralytics.nn.tasks import (ClassificationModel, DetectionModel, SegmentationModel, attempt_load_one_weight,
|
||||
guess_model_task, nn, yaml_model_load)
|
||||
from ultralytics.nn.tasks import (ClassificationModel, DetectionModel, PoseModel, SegmentationModel,
|
||||
attempt_load_one_weight, guess_model_task, nn, yaml_model_load)
|
||||
from ultralytics.yolo.cfg import get_cfg
|
||||
from ultralytics.yolo.engine.exporter import Exporter
|
||||
from ultralytics.yolo.utils import (DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, RANK, ROOT, callbacks,
|
||||
@ -25,7 +25,8 @@ TASK_MAP = {
|
||||
yolo.v8.detect.DetectionPredictor],
|
||||
'segment': [
|
||||
SegmentationModel, yolo.v8.segment.SegmentationTrainer, yolo.v8.segment.SegmentationValidator,
|
||||
yolo.v8.segment.SegmentationPredictor]}
|
||||
yolo.v8.segment.SegmentationPredictor],
|
||||
'pose': [PoseModel, yolo.v8.pose.PoseTrainer, yolo.v8.pose.PoseValidator, yolo.v8.pose.PosePredictor]}
|
||||
|
||||
|
||||
class YOLO:
|
||||
@ -195,7 +196,7 @@ class YOLO:
|
||||
self.model.load(weights)
|
||||
return self
|
||||
|
||||
def info(self, verbose=False):
|
||||
def info(self, verbose=True):
|
||||
"""
|
||||
Logs model info.
|
||||
|
||||
|
@ -246,6 +246,7 @@ class BasePredictor:
|
||||
dnn=self.args.dnn,
|
||||
data=self.args.data,
|
||||
fp16=self.args.half,
|
||||
fuse=True,
|
||||
verbose=verbose)
|
||||
self.device = device
|
||||
self.model.eval()
|
||||
|
@ -17,6 +17,53 @@ from ultralytics.yolo.utils.plotting import Annotator, colors
|
||||
from ultralytics.yolo.utils.torch_utils import TORCHVISION_0_10
|
||||
|
||||
|
||||
class BaseTensor(SimpleClass):
|
||||
"""
|
||||
|
||||
Attributes:
|
||||
tensor (torch.Tensor): A tensor.
|
||||
orig_shape (tuple): Original image size, in the format (height, width).
|
||||
|
||||
Methods:
|
||||
cpu(): Returns a copy of the tensor on CPU memory.
|
||||
numpy(): Returns a copy of the tensor as a numpy array.
|
||||
cuda(): Returns a copy of the tensor on GPU memory.
|
||||
to(): Returns a copy of the tensor with the specified device and dtype.
|
||||
"""
|
||||
|
||||
def __init__(self, tensor, orig_shape) -> None:
|
||||
super().__init__()
|
||||
assert isinstance(tensor, torch.Tensor)
|
||||
self.tensor = tensor
|
||||
self.orig_shape = orig_shape
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
return self.data.shape
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
return self.tensor
|
||||
|
||||
def cpu(self):
|
||||
return self.__class__(self.data.cpu(), self.orig_shape)
|
||||
|
||||
def numpy(self):
|
||||
return self.__class__(self.data.numpy(), self.orig_shape)
|
||||
|
||||
def cuda(self):
|
||||
return self.__class__(self.data.cuda(), self.orig_shape)
|
||||
|
||||
def to(self, *args, **kwargs):
|
||||
return self.__class__(self.data.to(*args, **kwargs), self.orig_shape)
|
||||
|
||||
def __len__(self): # override len(results)
|
||||
return len(self.data)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
return self.__class__(self.data[idx], self.orig_shape)
|
||||
|
||||
|
||||
class Results(SimpleClass):
|
||||
"""
|
||||
A class for storing and manipulating inference results.
|
||||
@ -40,22 +87,23 @@ class Results(SimpleClass):
|
||||
_keys (tuple): A tuple of attribute names for non-empty attributes.
|
||||
"""
|
||||
|
||||
def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None) -> None:
|
||||
def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None) -> None:
|
||||
self.orig_img = orig_img
|
||||
self.orig_shape = orig_img.shape[:2]
|
||||
self.boxes = Boxes(boxes, self.orig_shape) if boxes is not None else None # native size boxes
|
||||
self.masks = Masks(masks, self.orig_shape) if masks is not None else None # native size or imgsz masks
|
||||
self.probs = probs if probs is not None else None
|
||||
self.keypoints = keypoints if keypoints is not None else None
|
||||
self.names = names
|
||||
self.path = path
|
||||
self._keys = ('boxes', 'masks', 'probs')
|
||||
self._keys = ('boxes', 'masks', 'probs', 'keypoints')
|
||||
|
||||
def pandas(self):
|
||||
pass
|
||||
# TODO masks.pandas + boxes.pandas + cls.pandas
|
||||
|
||||
def __getitem__(self, idx):
|
||||
r = Results(orig_img=self.orig_img, path=self.path, names=self.names)
|
||||
r = self.new()
|
||||
for k in self.keys:
|
||||
setattr(r, k, getattr(self, k)[idx])
|
||||
return r
|
||||
@ -69,25 +117,25 @@ class Results(SimpleClass):
|
||||
self.probs = probs
|
||||
|
||||
def cpu(self):
|
||||
r = Results(orig_img=self.orig_img, path=self.path, names=self.names)
|
||||
r = self.new()
|
||||
for k in self.keys:
|
||||
setattr(r, k, getattr(self, k).cpu())
|
||||
return r
|
||||
|
||||
def numpy(self):
|
||||
r = Results(orig_img=self.orig_img, path=self.path, names=self.names)
|
||||
r = self.new()
|
||||
for k in self.keys:
|
||||
setattr(r, k, getattr(self, k).numpy())
|
||||
return r
|
||||
|
||||
def cuda(self):
|
||||
r = Results(orig_img=self.orig_img, path=self.path, names=self.names)
|
||||
r = self.new()
|
||||
for k in self.keys:
|
||||
setattr(r, k, getattr(self, k).cuda())
|
||||
return r
|
||||
|
||||
def to(self, *args, **kwargs):
|
||||
r = Results(orig_img=self.orig_img, path=self.path, names=self.names)
|
||||
r = self.new()
|
||||
for k in self.keys:
|
||||
setattr(r, k, getattr(self, k).to(*args, **kwargs))
|
||||
return r
|
||||
@ -96,6 +144,9 @@ class Results(SimpleClass):
|
||||
for k in self.keys:
|
||||
return len(getattr(self, k))
|
||||
|
||||
def new(self):
|
||||
return Results(orig_img=self.orig_img, path=self.path, names=self.names)
|
||||
|
||||
@property
|
||||
def keys(self):
|
||||
return [k for k in self._keys if getattr(self, k) is not None]
|
||||
@ -109,6 +160,7 @@ class Results(SimpleClass):
|
||||
pil=False,
|
||||
example='abc',
|
||||
img=None,
|
||||
kpt_line=True,
|
||||
labels=True,
|
||||
boxes=True,
|
||||
masks=True,
|
||||
@ -126,6 +178,7 @@ class Results(SimpleClass):
|
||||
pil (bool): Whether to return the image as a PIL Image.
|
||||
example (str): An example string to display. Useful for indicating the expected format of the output.
|
||||
img (numpy.ndarray): Plot to another image. if not, plot to original image.
|
||||
kpt_line (bool): Whether to draw lines connecting keypoints.
|
||||
labels (bool): Whether to plot the label of bounding boxes.
|
||||
boxes (bool): Whether to plot the bounding boxes.
|
||||
masks (bool): Whether to plot the masks.
|
||||
@ -146,11 +199,12 @@ class Results(SimpleClass):
|
||||
pred_masks, show_masks = self.masks, masks
|
||||
pred_probs, show_probs = self.probs, probs
|
||||
names = self.names
|
||||
keypoints = self.keypoints
|
||||
if pred_boxes and show_boxes:
|
||||
for d in reversed(pred_boxes):
|
||||
c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
|
||||
name = ('' if id is None else f'id:{id} ') + names[c]
|
||||
label = (name if not conf else f'{name} {conf:.2f}') if labels else None
|
||||
label = (f'{name} {conf:.2f}' if conf else name) if labels else None
|
||||
annotator.box_label(d.xyxy.squeeze(), label, color=colors(c, True))
|
||||
|
||||
if pred_masks and show_masks:
|
||||
@ -168,10 +222,14 @@ class Results(SimpleClass):
|
||||
text = f"{', '.join(f'{names[j] if names else j} {pred_probs[j]:.2f}' for j in top5i)}, "
|
||||
annotator.text((32, 32), text, txt_color=(255, 255, 255)) # TODO: allow setting colors
|
||||
|
||||
if keypoints is not None:
|
||||
for k in reversed(keypoints):
|
||||
annotator.kpts(k, self.orig_shape, kpt_line=kpt_line)
|
||||
|
||||
return np.asarray(annotator.im) if annotator.pil else annotator.im
|
||||
|
||||
|
||||
class Boxes(SimpleClass):
|
||||
class Boxes(BaseTensor):
|
||||
"""
|
||||
A class for storing and manipulating detection boxes.
|
||||
|
||||
@ -246,37 +304,15 @@ class Boxes(SimpleClass):
|
||||
def xywhn(self):
|
||||
return self.xywh / self.orig_shape[[1, 0, 1, 0]]
|
||||
|
||||
def cpu(self):
|
||||
return Boxes(self.boxes.cpu(), self.orig_shape)
|
||||
|
||||
def numpy(self):
|
||||
return Boxes(self.boxes.numpy(), self.orig_shape)
|
||||
|
||||
def cuda(self):
|
||||
return Boxes(self.boxes.cuda(), self.orig_shape)
|
||||
|
||||
def to(self, *args, **kwargs):
|
||||
return Boxes(self.boxes.to(*args, **kwargs), self.orig_shape)
|
||||
|
||||
def pandas(self):
|
||||
LOGGER.info('results.pandas() method not yet implemented')
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
return self.boxes.shape
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
return self.boxes
|
||||
|
||||
def __len__(self): # override len(results)
|
||||
return len(self.boxes)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
return Boxes(self.boxes[idx], self.orig_shape)
|
||||
|
||||
|
||||
class Masks(SimpleClass):
|
||||
class Masks(BaseTensor):
|
||||
"""
|
||||
A class for storing and manipulating detection masks.
|
||||
|
||||
@ -316,7 +352,7 @@ class Masks(SimpleClass):
|
||||
def xyn(self):
|
||||
# Segments (normalized)
|
||||
return [
|
||||
ops.scale_segments(self.masks.shape[1:], x, self.orig_shape, normalize=True)
|
||||
ops.scale_coords(self.masks.shape[1:], x, self.orig_shape, normalize=True)
|
||||
for x in ops.masks2segments(self.masks)]
|
||||
|
||||
@property
|
||||
@ -324,31 +360,9 @@ class Masks(SimpleClass):
|
||||
def xy(self):
|
||||
# Segments (pixels)
|
||||
return [
|
||||
ops.scale_segments(self.masks.shape[1:], x, self.orig_shape, normalize=False)
|
||||
ops.scale_coords(self.masks.shape[1:], x, self.orig_shape, normalize=False)
|
||||
for x in ops.masks2segments(self.masks)]
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
return self.masks.shape
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
return self.masks
|
||||
|
||||
def cpu(self):
|
||||
return Masks(self.masks.cpu(), self.orig_shape)
|
||||
|
||||
def numpy(self):
|
||||
return Masks(self.masks.numpy(), self.orig_shape)
|
||||
|
||||
def cuda(self):
|
||||
return Masks(self.masks.cuda(), self.orig_shape)
|
||||
|
||||
def to(self, *args, **kwargs):
|
||||
return Masks(self.masks.to(*args, **kwargs), self.orig_shape)
|
||||
|
||||
def __len__(self): # override len(results)
|
||||
return len(self.masks)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
return Masks(self.masks[idx], self.orig_shape)
|
||||
|
@ -75,11 +75,13 @@ def benchmark(model=Path(SETTINGS['weights_dir']) / 'yolov8n.pt', imgsz=160, hal
|
||||
|
||||
# Validate
|
||||
if model.task == 'detect':
|
||||
data, key = 'coco128.yaml', 'metrics/mAP50-95(B)'
|
||||
data, key = 'coco8.yaml', 'metrics/mAP50-95(B)'
|
||||
elif model.task == 'segment':
|
||||
data, key = 'coco128-seg.yaml', 'metrics/mAP50-95(M)'
|
||||
data, key = 'coco8-seg.yaml', 'metrics/mAP50-95(M)'
|
||||
elif model.task == 'classify':
|
||||
data, key = 'imagenet100', 'metrics/accuracy_top5'
|
||||
elif model.task == 'pose':
|
||||
data, key = 'coco8-pose.yaml', 'metrics/mAP50-95(P)'
|
||||
|
||||
results = export.val(data=data, batch=1, imgsz=imgsz, plots=False, device=device, half=half, verbose=False)
|
||||
metric, speed = results.results_dict[key], results.speed['inference']
|
||||
|
@ -14,9 +14,9 @@ from tqdm import tqdm
|
||||
|
||||
from ultralytics.yolo.utils import LOGGER, checks, emojis, is_online
|
||||
|
||||
GITHUB_ASSET_NAMES = [f'yolov8{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] + \
|
||||
[f'yolov5{size}u.pt' for size in 'nsmlx'] + \
|
||||
[f'yolov3{size}u.pt' for size in ('', '-spp', '-tiny')]
|
||||
GITHUB_ASSET_NAMES = [f'yolov8{k}{suffix}.pt' for k in 'nsmlx' for suffix in ('', '6', '-cls', '-seg', '-pose')] + \
|
||||
[f'yolov5{k}u.pt' for k in 'nsmlx'] + \
|
||||
[f'yolov3{k}u.pt' for k in ('', '-spp', '-tiny')]
|
||||
GITHUB_ASSET_STEMS = [Path(k).stem for k in GITHUB_ASSET_NAMES]
|
||||
|
||||
|
||||
|
@ -168,7 +168,7 @@ class Instances:
|
||||
Args:
|
||||
bboxes (ndarray): bboxes with shape [N, 4].
|
||||
segments (list | ndarray): segments.
|
||||
keypoints (ndarray): keypoints with shape [N, 17, 2].
|
||||
keypoints (ndarray): keypoints(x, y, visible) with shape [N, 17, 3].
|
||||
"""
|
||||
if segments is None:
|
||||
segments = []
|
||||
|
@ -54,3 +54,17 @@ class BboxLoss(nn.Module):
|
||||
wr = 1 - wl # weight right
|
||||
return (F.cross_entropy(pred_dist, tl.view(-1), reduction='none').view(tl.shape) * wl +
|
||||
F.cross_entropy(pred_dist, tr.view(-1), reduction='none').view(tl.shape) * wr).mean(-1, keepdim=True)
|
||||
|
||||
|
||||
class KeypointLoss(nn.Module):
|
||||
|
||||
def __init__(self, sigmas) -> None:
|
||||
super().__init__()
|
||||
self.sigmas = sigmas
|
||||
|
||||
def forward(self, pred_kpts, gt_kpts, kpt_mask, area):
|
||||
d = (pred_kpts[..., 0] - gt_kpts[..., 0]) ** 2 + (pred_kpts[..., 1] - gt_kpts[..., 1]) ** 2
|
||||
kpt_loss_factor = (torch.sum(kpt_mask != 0) + torch.sum(kpt_mask == 0)) / (torch.sum(kpt_mask != 0) + 1e-9)
|
||||
# e = d / (2 * (area * self.sigmas) ** 2 + 1e-9) # from formula
|
||||
e = d / (2 * self.sigmas) ** 2 / (area + 1e-9) / 2 # from cocoeval
|
||||
return kpt_loss_factor * ((1 - torch.exp(-e)) * kpt_mask).mean()
|
||||
|
@ -13,6 +13,8 @@ import torch.nn as nn
|
||||
|
||||
from ultralytics.yolo.utils import LOGGER, SimpleClass, TryExcept
|
||||
|
||||
OKS_SIGMA = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
|
||||
|
||||
|
||||
# boxes
|
||||
def box_area(box):
|
||||
@ -108,8 +110,8 @@ def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7
|
||||
|
||||
def mask_iou(mask1, mask2, eps=1e-7):
|
||||
"""
|
||||
mask1: [N, n] m1 means number of predicted objects
|
||||
mask2: [M, n] m2 means number of gt objects
|
||||
mask1: [N, n] m1 means number of gt objects
|
||||
mask2: [M, n] m2 means number of predicted objects
|
||||
Note: n means image_w x image_h
|
||||
Returns: masks iou, [N, M]
|
||||
"""
|
||||
@ -118,16 +120,18 @@ def mask_iou(mask1, mask2, eps=1e-7):
|
||||
return intersection / (union + eps)
|
||||
|
||||
|
||||
def masks_iou(mask1, mask2, eps=1e-7):
|
||||
def kpt_iou(kpt1, kpt2, area, sigma, eps=1e-7):
|
||||
"""OKS
|
||||
kpt1: [N, 17, 3], gt
|
||||
kpt2: [M, 17, 3], pred
|
||||
area: [N], areas from gt
|
||||
"""
|
||||
mask1: [N, n] m1 means number of predicted objects
|
||||
mask2: [N, n] m2 means number of gt objects
|
||||
Note: n means image_w x image_h
|
||||
Returns: masks iou, (N, )
|
||||
"""
|
||||
intersection = (mask1 * mask2).sum(1).clamp(0) # (N, )
|
||||
union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection
|
||||
return intersection / (union + eps)
|
||||
d = (kpt1[:, None, :, 0] - kpt2[..., 0]) ** 2 + (kpt1[:, None, :, 1] - kpt2[..., 1]) ** 2 # (N, M, 17)
|
||||
sigma = torch.tensor(sigma, device=kpt1.device, dtype=kpt1.dtype) # (17, )
|
||||
kpt_mask = kpt1[..., 2] != 0 # (N, 17)
|
||||
e = d / (2 * sigma) ** 2 / (area[:, None, None] + eps) / 2 # from cocoeval
|
||||
# e = d / ((area[None, :, None] + eps) * sigma) ** 2 / 2 # from formula
|
||||
return (torch.exp(-e) * kpt_mask[:, None]).sum(-1) / (kpt_mask.sum(-1)[:, None] + eps)
|
||||
|
||||
|
||||
def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
|
||||
@ -649,13 +653,13 @@ class SegmentMetrics(SimpleClass):
|
||||
self.seg = Metric()
|
||||
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
|
||||
|
||||
def process(self, tp_m, tp_b, conf, pred_cls, target_cls):
|
||||
def process(self, tp_b, tp_m, conf, pred_cls, target_cls):
|
||||
"""
|
||||
Processes the detection and segmentation metrics over the given set of predictions.
|
||||
|
||||
Args:
|
||||
tp_m (list): List of True Positive masks.
|
||||
tp_b (list): List of True Positive boxes.
|
||||
tp_m (list): List of True Positive masks.
|
||||
conf (list): List of confidence scores.
|
||||
pred_cls (list): List of predicted classes.
|
||||
target_cls (list): List of target classes.
|
||||
@ -712,6 +716,100 @@ class SegmentMetrics(SimpleClass):
|
||||
return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness]))
|
||||
|
||||
|
||||
class PoseMetrics(SegmentMetrics):
|
||||
"""
|
||||
Calculates and aggregates detection and pose metrics over a given set of classes.
|
||||
|
||||
Args:
|
||||
save_dir (Path): Path to the directory where the output plots should be saved. Default is the current directory.
|
||||
plot (bool): Whether to save the detection and segmentation plots. Default is False.
|
||||
names (list): List of class names. Default is an empty list.
|
||||
|
||||
Attributes:
|
||||
save_dir (Path): Path to the directory where the output plots should be saved.
|
||||
plot (bool): Whether to save the detection and segmentation plots.
|
||||
names (list): List of class names.
|
||||
box (Metric): An instance of the Metric class to calculate box detection metrics.
|
||||
pose (Metric): An instance of the Metric class to calculate mask segmentation metrics.
|
||||
speed (dict): Dictionary to store the time taken in different phases of inference.
|
||||
|
||||
Methods:
|
||||
process(tp_m, tp_b, conf, pred_cls, target_cls): Processes metrics over the given set of predictions.
|
||||
mean_results(): Returns the mean of the detection and segmentation metrics over all the classes.
|
||||
class_result(i): Returns the detection and segmentation metrics of class `i`.
|
||||
maps: Returns the mean Average Precision (mAP) scores for IoU thresholds ranging from 0.50 to 0.95.
|
||||
fitness: Returns the fitness scores, which are a single weighted combination of metrics.
|
||||
ap_class_index: Returns the list of indices of classes used to compute Average Precision (AP).
|
||||
results_dict: Returns the dictionary containing all the detection and segmentation metrics and fitness score.
|
||||
"""
|
||||
|
||||
def __init__(self, save_dir=Path('.'), plot=False, names=()) -> None:
|
||||
super().__init__(save_dir, plot, names)
|
||||
self.save_dir = save_dir
|
||||
self.plot = plot
|
||||
self.names = names
|
||||
self.box = Metric()
|
||||
self.pose = Metric()
|
||||
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
|
||||
|
||||
def __getattr__(self, attr):
|
||||
name = self.__class__.__name__
|
||||
raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}")
|
||||
|
||||
def process(self, tp_b, tp_p, conf, pred_cls, target_cls):
|
||||
"""
|
||||
Processes the detection and pose metrics over the given set of predictions.
|
||||
|
||||
Args:
|
||||
tp_b (list): List of True Positive boxes.
|
||||
tp_p (list): List of True Positive keypoints.
|
||||
conf (list): List of confidence scores.
|
||||
pred_cls (list): List of predicted classes.
|
||||
target_cls (list): List of target classes.
|
||||
"""
|
||||
|
||||
results_pose = ap_per_class(tp_p,
|
||||
conf,
|
||||
pred_cls,
|
||||
target_cls,
|
||||
plot=self.plot,
|
||||
save_dir=self.save_dir,
|
||||
names=self.names,
|
||||
prefix='Pose')[2:]
|
||||
self.pose.nc = len(self.names)
|
||||
self.pose.update(results_pose)
|
||||
results_box = ap_per_class(tp_b,
|
||||
conf,
|
||||
pred_cls,
|
||||
target_cls,
|
||||
plot=self.plot,
|
||||
save_dir=self.save_dir,
|
||||
names=self.names,
|
||||
prefix='Box')[2:]
|
||||
self.box.nc = len(self.names)
|
||||
self.box.update(results_box)
|
||||
|
||||
@property
|
||||
def keys(self):
|
||||
return [
|
||||
'metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)',
|
||||
'metrics/precision(P)', 'metrics/recall(P)', 'metrics/mAP50(P)', 'metrics/mAP50-95(P)']
|
||||
|
||||
def mean_results(self):
|
||||
return self.box.mean_results() + self.pose.mean_results()
|
||||
|
||||
def class_result(self, i):
|
||||
return self.box.class_result(i) + self.pose.class_result(i)
|
||||
|
||||
@property
|
||||
def maps(self):
|
||||
return self.box.maps + self.pose.maps
|
||||
|
||||
@property
|
||||
def fitness(self):
|
||||
return self.pose.fitness() + self.box.fitness()
|
||||
|
||||
|
||||
class ClassifyMetrics(SimpleClass):
|
||||
"""
|
||||
Class for computing classification metrics including top-1 and top-5 accuracy.
|
||||
|
@ -281,28 +281,23 @@ def clip_boxes(boxes, shape):
|
||||
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
|
||||
|
||||
|
||||
def clip_coords(boxes, shape):
|
||||
def clip_coords(coords, shape):
|
||||
"""
|
||||
Clip bounding xyxy bounding boxes to image shape (height, width).
|
||||
Clip line coordinates to the image boundaries.
|
||||
|
||||
Args:
|
||||
boxes (torch.Tensor or numpy.ndarray): Bounding boxes to be clipped.
|
||||
shape (tuple): The shape of the image. (height, width)
|
||||
coords (torch.Tensor) or (numpy.ndarray): A list of line coordinates.
|
||||
shape (tuple): A tuple of integers representing the size of the image in the format (height, width).
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Note:
|
||||
The input `boxes` is modified in-place, there is no return value.
|
||||
(None): The function modifies the input `coordinates` in place, by clipping each coordinate to the image boundaries.
|
||||
"""
|
||||
if isinstance(boxes, torch.Tensor): # faster individually
|
||||
boxes[:, 0].clamp_(0, shape[1]) # x1
|
||||
boxes[:, 1].clamp_(0, shape[0]) # y1
|
||||
boxes[:, 2].clamp_(0, shape[1]) # x2
|
||||
boxes[:, 3].clamp_(0, shape[0]) # y2
|
||||
if isinstance(coords, torch.Tensor): # faster individually
|
||||
coords[..., 0].clamp_(0, shape[1]) # x
|
||||
coords[..., 1].clamp_(0, shape[0]) # y
|
||||
else: # np.array (faster grouped)
|
||||
boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
|
||||
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
|
||||
coords[..., 0] = coords[..., 0].clip(0, shape[1]) # x
|
||||
coords[..., 1] = coords[..., 1].clip(0, shape[0]) # y
|
||||
|
||||
|
||||
def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
|
||||
@ -577,17 +572,18 @@ def process_mask_upsample(protos, masks_in, bboxes, shape):
|
||||
|
||||
def process_mask(protos, masks_in, bboxes, shape, upsample=False):
|
||||
"""
|
||||
It takes the output of the mask head, and applies the mask to the bounding boxes. This is faster but produces
|
||||
downsampled quality of mask
|
||||
Apply masks to bounding boxes using the output of the mask head.
|
||||
|
||||
Args:
|
||||
protos (torch.Tensor): [mask_dim, mask_h, mask_w]
|
||||
masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms
|
||||
bboxes (torch.Tensor): [n, 4], n is number of masks after nms
|
||||
shape (tuple): the size of the input image (h,w)
|
||||
protos (torch.Tensor): A tensor of shape [mask_dim, mask_h, mask_w].
|
||||
masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS.
|
||||
bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS.
|
||||
shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
|
||||
upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False.
|
||||
|
||||
Returns:
|
||||
(torch.Tensor): The processed masks.
|
||||
(torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
|
||||
are the height and width of the input image. The mask is applied to the bounding boxes.
|
||||
"""
|
||||
|
||||
c, mh, mw = protos.shape # CHW
|
||||
@ -632,19 +628,19 @@ def process_mask_native(protos, masks_in, bboxes, shape):
|
||||
return masks.gt_(0.5)
|
||||
|
||||
|
||||
def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False):
|
||||
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False):
|
||||
"""
|
||||
Rescale segment coordinates (xyxy) from img1_shape to img0_shape
|
||||
|
||||
Args:
|
||||
img1_shape (tuple): The shape of the image that the segments are from.
|
||||
segments (torch.Tensor): the segments to be scaled
|
||||
img1_shape (tuple): The shape of the image that the coords are from.
|
||||
coords (torch.Tensor): the coords to be scaled
|
||||
img0_shape (tuple): the shape of the image that the segmentation is being applied to
|
||||
ratio_pad (tuple): the ratio of the image size to the padded image size.
|
||||
normalize (bool): If True, the coordinates will be normalized to the range [0, 1]. Defaults to False
|
||||
|
||||
Returns:
|
||||
segments (torch.Tensor): the segmented image.
|
||||
coords (torch.Tensor): the segmented image.
|
||||
"""
|
||||
if ratio_pad is None: # calculate from img0_shape
|
||||
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
||||
@ -653,14 +649,15 @@ def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=F
|
||||
gain = ratio_pad[0][0]
|
||||
pad = ratio_pad[1]
|
||||
|
||||
segments[:, 0] -= pad[0] # x padding
|
||||
segments[:, 1] -= pad[1] # y padding
|
||||
segments /= gain
|
||||
clip_segments(segments, img0_shape)
|
||||
coords[..., 0] -= pad[0] # x padding
|
||||
coords[..., 1] -= pad[1] # y padding
|
||||
coords[..., 0] /= gain
|
||||
coords[..., 1] /= gain
|
||||
clip_coords(coords, img0_shape)
|
||||
if normalize:
|
||||
segments[:, 0] /= img0_shape[1] # width
|
||||
segments[:, 1] /= img0_shape[0] # height
|
||||
return segments
|
||||
coords[..., 0] /= img0_shape[1] # width
|
||||
coords[..., 1] /= img0_shape[0] # height
|
||||
return coords
|
||||
|
||||
|
||||
def masks2segments(masks, strategy='largest'):
|
||||
@ -688,23 +685,6 @@ def masks2segments(masks, strategy='largest'):
|
||||
return segments
|
||||
|
||||
|
||||
def clip_segments(segments, shape):
|
||||
"""
|
||||
It takes a list of line segments (x1,y1,x2,y2) and clips them to the image shape (height, width)
|
||||
|
||||
Args:
|
||||
segments (list): a list of segments, each segment is a list of points, each point is a list of x,y
|
||||
coordinates
|
||||
shape (tuple): the shape of the image
|
||||
"""
|
||||
if isinstance(segments, torch.Tensor): # faster individually
|
||||
segments[:, 0].clamp_(0, shape[1]) # x
|
||||
segments[:, 1].clamp_(0, shape[0]) # y
|
||||
else: # np.array (faster grouped)
|
||||
segments[:, 0] = segments[:, 0].clip(0, shape[1]) # x
|
||||
segments[:, 1] = segments[:, 1].clip(0, shape[0]) # y
|
||||
|
||||
|
||||
def clean_str(s):
|
||||
"""
|
||||
Cleans a string by replacing special characters with underscore _
|
||||
|
@ -16,7 +16,7 @@ from ultralytics.yolo.utils import LOGGER, TryExcept, threaded
|
||||
|
||||
from .checks import check_font, check_version, is_ascii
|
||||
from .files import increment_path
|
||||
from .ops import clip_coords, scale_image, xywh2xyxy, xyxy2xywh
|
||||
from .ops import clip_boxes, scale_image, xywh2xyxy, xyxy2xywh
|
||||
|
||||
matplotlib.rc('font', **{'size': 11})
|
||||
matplotlib.use('Agg') # for writing to files only
|
||||
@ -30,6 +30,11 @@ class Colors:
|
||||
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
|
||||
self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
|
||||
self.n = len(self.palette)
|
||||
self.pose_palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102], [230, 230, 0], [255, 153, 255],
|
||||
[153, 204, 255], [255, 102, 255], [255, 51, 255], [102, 178, 255], [51, 153, 255],
|
||||
[255, 153, 153], [255, 102, 102], [255, 51, 51], [153, 255, 153], [102, 255, 102],
|
||||
[51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0], [255, 255, 255]],
|
||||
dtype=np.uint8)
|
||||
|
||||
def __call__(self, i, bgr=False):
|
||||
c = self.palette[int(i) % self.n]
|
||||
@ -62,6 +67,12 @@ class Annotator:
|
||||
else: # use cv2
|
||||
self.im = im
|
||||
self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width
|
||||
# pose
|
||||
self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9],
|
||||
[8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
|
||||
|
||||
self.limb_color = colors.pose_palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]]
|
||||
self.kpt_color = colors.pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]]
|
||||
|
||||
def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
|
||||
# Add one xyxy box to image with label
|
||||
@ -132,6 +143,49 @@ class Annotator:
|
||||
# convert im back to PIL and update draw
|
||||
self.fromarray(self.im)
|
||||
|
||||
def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True):
|
||||
"""Plot keypoints.
|
||||
Args:
|
||||
kpts (tensor): predicted kpts, shape: [17, 3]
|
||||
shape (tuple): image shape, (h, w)
|
||||
steps (int): keypoints step
|
||||
radius (int): size of drawing points
|
||||
"""
|
||||
if self.pil:
|
||||
# convert to numpy first
|
||||
self.im = np.asarray(self.im).copy()
|
||||
nkpt, ndim = kpts.shape
|
||||
is_pose = nkpt == 17 and ndim == 3
|
||||
kpt_line &= is_pose # `kpt_line=True` for now only supports human pose plotting
|
||||
for i, k in enumerate(kpts):
|
||||
color_k = [int(x) for x in self.kpt_color[i]] if is_pose else colors(i)
|
||||
x_coord, y_coord = k[0], k[1]
|
||||
if x_coord % shape[1] != 0 and y_coord % shape[0] != 0:
|
||||
if len(k) == 3:
|
||||
conf = k[2]
|
||||
if conf < 0.5:
|
||||
continue
|
||||
cv2.circle(self.im, (int(x_coord), int(y_coord)), radius, color_k, -1)
|
||||
|
||||
if kpt_line:
|
||||
ndim = kpts.shape[-1]
|
||||
for sk_id, sk in enumerate(self.skeleton):
|
||||
pos1 = (int(kpts[(sk[0] - 1), 0]), int(kpts[(sk[0] - 1), 1]))
|
||||
pos2 = (int(kpts[(sk[1] - 1), 0]), int(kpts[(sk[1] - 1), 1]))
|
||||
if ndim == 3:
|
||||
conf1 = kpts[(sk[0] - 1), 2]
|
||||
conf2 = kpts[(sk[1] - 1), 2]
|
||||
if conf1 < 0.5 or conf2 < 0.5:
|
||||
continue
|
||||
if pos1[0] % shape[1] == 0 or pos1[1] % shape[0] == 0 or pos1[0] < 0 or pos1[1] < 0:
|
||||
continue
|
||||
if pos2[0] % shape[1] == 0 or pos2[1] % shape[0] == 0 or pos2[0] < 0 or pos2[1] < 0:
|
||||
continue
|
||||
cv2.line(self.im, pos1, pos2, [int(x) for x in self.limb_color[sk_id]], thickness=2)
|
||||
if self.pil:
|
||||
# convert im back to PIL and update draw
|
||||
self.fromarray(self.im)
|
||||
|
||||
def rectangle(self, xy, fill=None, outline=None, width=1):
|
||||
# Add rectangle to image (PIL-only)
|
||||
self.draw.rectangle(xy, fill, outline, width)
|
||||
@ -213,7 +267,7 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False,
|
||||
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square
|
||||
b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad
|
||||
xyxy = xywh2xyxy(b).long()
|
||||
clip_coords(xyxy, im.shape)
|
||||
clip_boxes(xyxy, im.shape)
|
||||
crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
|
||||
if save:
|
||||
file.parent.mkdir(parents=True, exist_ok=True) # make directory
|
||||
@ -229,6 +283,7 @@ def plot_images(images,
|
||||
cls,
|
||||
bboxes,
|
||||
masks=np.zeros(0, dtype=np.uint8),
|
||||
kpts=np.zeros((0, 51), dtype=np.float32),
|
||||
paths=None,
|
||||
fname='images.jpg',
|
||||
names=None):
|
||||
@ -241,6 +296,8 @@ def plot_images(images,
|
||||
bboxes = bboxes.cpu().numpy()
|
||||
if isinstance(masks, torch.Tensor):
|
||||
masks = masks.cpu().numpy().astype(int)
|
||||
if isinstance(kpts, torch.Tensor):
|
||||
kpts = kpts.cpu().numpy()
|
||||
if isinstance(batch_idx, torch.Tensor):
|
||||
batch_idx = batch_idx.cpu().numpy()
|
||||
|
||||
@ -300,6 +357,21 @@ def plot_images(images,
|
||||
label = f'{c}' if labels else f'{c} {conf[j]:.1f}'
|
||||
annotator.box_label(box, label, color=color)
|
||||
|
||||
# Plot keypoints
|
||||
if len(kpts):
|
||||
kpts_ = kpts[idx].copy()
|
||||
if len(kpts_):
|
||||
if kpts_[..., 0].max() <= 1.01 or kpts_[..., 1].max() <= 1.01: # if normalized with tolerance .01
|
||||
kpts_[..., 0] *= w # scale to pixels
|
||||
kpts_[..., 1] *= h
|
||||
elif scale < 1: # absolute coords need scale if image scales
|
||||
kpts_ *= scale
|
||||
kpts_[..., 0] += x
|
||||
kpts_[..., 1] += y
|
||||
for j in range(len(kpts_)):
|
||||
if labels or conf[j] > 0.25: # 0.25 conf thresh
|
||||
annotator.kpts(kpts_[j])
|
||||
|
||||
# Plot masks
|
||||
if len(masks):
|
||||
if idx.shape[0] == masks.shape[0]: # overlap_masks=False
|
||||
@ -307,7 +379,7 @@ def plot_images(images,
|
||||
else: # overlap_masks=True
|
||||
image_masks = masks[[i]] # (1, 640, 640)
|
||||
nl = idx.sum()
|
||||
index = np.arange(nl).reshape(nl, 1, 1) + 1
|
||||
index = np.arange(nl).reshape((nl, 1, 1)) + 1
|
||||
image_masks = np.repeat(image_masks, nl, axis=0)
|
||||
image_masks = np.where(image_masks == index, 1.0, 0.0)
|
||||
|
||||
@ -328,13 +400,16 @@ def plot_images(images,
|
||||
annotator.im.save(fname) # save
|
||||
|
||||
|
||||
def plot_results(file='path/to/results.csv', dir='', segment=False):
|
||||
def plot_results(file='path/to/results.csv', dir='', segment=False, pose=False):
|
||||
# Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
|
||||
import pandas as pd
|
||||
save_dir = Path(file).parent if file else Path(dir)
|
||||
if segment:
|
||||
fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
|
||||
index = [1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]
|
||||
elif pose:
|
||||
fig, ax = plt.subplots(2, 9, figsize=(21, 6), tight_layout=True)
|
||||
index = [1, 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 16, 17, 18, 8, 9, 12, 13]
|
||||
else:
|
||||
fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
|
||||
index = [1, 2, 3, 4, 5, 8, 9, 10, 6, 7]
|
||||
|
@ -240,8 +240,8 @@ def copy_attr(a, b, include=(), exclude=()):
|
||||
|
||||
|
||||
def get_latest_opset():
|
||||
# Return max supported ONNX opset by this version of torch
|
||||
return max(int(k[14:]) for k in vars(torch.onnx) if 'symbolic_opset' in k) # opset
|
||||
# Return second-most (for maturity) recently supported ONNX opset by this version of torch
|
||||
return max(int(k[14:]) for k in vars(torch.onnx) if 'symbolic_opset' in k) - 1 # opset
|
||||
|
||||
|
||||
def intersect_dicts(da, db, exclude=()):
|
||||
@ -318,18 +318,18 @@ def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None:
|
||||
"""
|
||||
Strip optimizer from 'f' to finalize training, optionally save as 's'.
|
||||
|
||||
Usage:
|
||||
from ultralytics.yolo.utils.torch_utils import strip_optimizer
|
||||
from pathlib import Path
|
||||
for f in Path('/Users/glennjocher/Downloads/weights').glob('*.pt'):
|
||||
strip_optimizer(f)
|
||||
|
||||
Args:
|
||||
f (str): file path to model to strip the optimizer from. Default is 'best.pt'.
|
||||
s (str): file path to save the model with stripped optimizer to. If not provided, 'f' will be overwritten.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Usage:
|
||||
from pathlib import Path
|
||||
from ultralytics.yolo.utils.torch_utils import strip_optimizer
|
||||
for f in Path('/Users/glennjocher/Downloads/weights').rglob('*.pt'):
|
||||
strip_optimizer(f)
|
||||
"""
|
||||
x = torch.load(f, map_location=torch.device('cpu'))
|
||||
args = {**DEFAULT_CFG_DICT, **x['train_args']} # combine model args with default args, preferring model args
|
||||
@ -349,7 +349,9 @@ def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None:
|
||||
|
||||
|
||||
def profile(input, ops, n=10, device=None):
|
||||
""" YOLOv8 speed/memory/FLOPs profiler
|
||||
"""
|
||||
YOLOv8 speed/memory/FLOPs profiler
|
||||
|
||||
Usage:
|
||||
input = torch.randn(16, 3, 640, 640)
|
||||
m1 = lambda x: x * torch.sigmoid(x)
|
||||
|
@ -1,5 +1,5 @@
|
||||
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||
|
||||
from ultralytics.yolo.v8 import classify, detect, segment
|
||||
from ultralytics.yolo.v8 import classify, detect, pose, segment
|
||||
|
||||
__all__ = 'classify', 'segment', 'detect'
|
||||
__all__ = 'classify', 'segment', 'detect', 'pose'
|
||||
|
@ -41,7 +41,7 @@ class DetectionTrainer(BaseTrainer):
|
||||
shuffle=mode == 'train',
|
||||
seed=self.args.seed)[0] if self.args.v5loader else \
|
||||
build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, rank=rank, mode=mode,
|
||||
rect=mode == 'val', names=self.data['names'])[0]
|
||||
rect=mode == 'val', data_info=self.data)[0]
|
||||
|
||||
def preprocess_batch(self, batch):
|
||||
batch['img'] = batch['img'].to(self.device, non_blocking=True).float() / 255
|
||||
|
@ -41,7 +41,7 @@ class DetectionValidator(BaseValidator):
|
||||
|
||||
def init_metrics(self, model):
|
||||
val = self.data.get(self.args.split, '') # validation path
|
||||
self.is_coco = isinstance(val, str) and val.endswith(f'coco{os.sep}val2017.txt') # is COCO dataset
|
||||
self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt') # is COCO
|
||||
self.class_map = ops.coco80_to_coco91_class() if self.is_coco else list(range(1000))
|
||||
self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO
|
||||
self.names = model.names
|
||||
@ -179,7 +179,7 @@ class DetectionValidator(BaseValidator):
|
||||
prefix=colorstr(f'{self.args.mode}: '),
|
||||
shuffle=False,
|
||||
seed=self.args.seed)[0] if self.args.v5loader else \
|
||||
build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, names=self.data['names'],
|
||||
build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, data_info=self.data,
|
||||
mode='val')[0]
|
||||
|
||||
def plot_val_samples(self, batch, ni):
|
||||
|
7
ultralytics/yolo/v8/pose/__init__.py
Normal file
7
ultralytics/yolo/v8/pose/__init__.py
Normal file
@ -0,0 +1,7 @@
|
||||
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||
|
||||
from .predict import PosePredictor, predict
|
||||
from .train import PoseTrainer, train
|
||||
from .val import PoseValidator, val
|
||||
|
||||
__all__ = 'PoseTrainer', 'train', 'PoseValidator', 'val', 'PosePredictor', 'predict'
|
103
ultralytics/yolo/v8/pose/predict.py
Normal file
103
ultralytics/yolo/v8/pose/predict.py
Normal file
@ -0,0 +1,103 @@
|
||||
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||
|
||||
from ultralytics.yolo.engine.results import Results
|
||||
from ultralytics.yolo.utils import DEFAULT_CFG, ROOT, ops
|
||||
from ultralytics.yolo.utils.plotting import colors, save_one_box
|
||||
from ultralytics.yolo.v8.detect.predict import DetectionPredictor
|
||||
|
||||
|
||||
class PosePredictor(DetectionPredictor):
|
||||
|
||||
def postprocess(self, preds, img, orig_img):
|
||||
preds = ops.non_max_suppression(preds,
|
||||
self.args.conf,
|
||||
self.args.iou,
|
||||
agnostic=self.args.agnostic_nms,
|
||||
max_det=self.args.max_det,
|
||||
classes=self.args.classes,
|
||||
nc=len(self.model.names))
|
||||
|
||||
results = []
|
||||
for i, pred in enumerate(preds):
|
||||
orig_img = orig_img[i] if isinstance(orig_img, list) else orig_img
|
||||
shape = orig_img.shape
|
||||
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round()
|
||||
pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:]
|
||||
pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, shape)
|
||||
path, _, _, _, _ = self.batch
|
||||
img_path = path[i] if isinstance(path, list) else path
|
||||
results.append(
|
||||
Results(orig_img=orig_img,
|
||||
path=img_path,
|
||||
names=self.model.names,
|
||||
boxes=pred[:, :6],
|
||||
keypoints=pred_kpts))
|
||||
return results
|
||||
|
||||
def write_results(self, idx, results, batch):
|
||||
p, im, im0 = batch
|
||||
log_string = ''
|
||||
if len(im.shape) == 3:
|
||||
im = im[None] # expand for batch dim
|
||||
self.seen += 1
|
||||
imc = im0.copy() if self.args.save_crop else im0
|
||||
if self.source_type.webcam or self.source_type.from_img: # batch_size >= 1
|
||||
log_string += f'{idx}: '
|
||||
frame = self.dataset.count
|
||||
else:
|
||||
frame = getattr(self.dataset, 'frame', 0)
|
||||
self.data_path = p
|
||||
self.txt_path = str(self.save_dir / 'labels' / p.stem) + ('' if self.dataset.mode == 'image' else f'_{frame}')
|
||||
log_string += '%gx%g ' % im.shape[2:] # print string
|
||||
self.annotator = self.get_annotator(im0)
|
||||
|
||||
det = results[idx].boxes # TODO: make boxes inherit from tensors
|
||||
if len(det) == 0:
|
||||
return f'{log_string}(no detections), '
|
||||
for c in det.cls.unique():
|
||||
n = (det.cls == c).sum() # detections per class
|
||||
log_string += f"{n} {self.model.names[int(c)]}{'s' * (n > 1)}, "
|
||||
|
||||
kpts = reversed(results[idx].keypoints)
|
||||
for k in kpts:
|
||||
self.annotator.kpts(k, shape=results[idx].orig_shape)
|
||||
|
||||
# write
|
||||
for j, d in enumerate(reversed(det)):
|
||||
c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item())
|
||||
if self.args.save_txt: # Write to file
|
||||
kpt = (kpts[j][:, :2] / d.orig_shape[[1, 0]]).reshape(-1).tolist()
|
||||
box = d.xywhn.view(-1).tolist()
|
||||
line = (c, *box, *kpt) + (conf, ) * self.args.save_conf + (() if id is None else (id, ))
|
||||
with open(f'{self.txt_path}.txt', 'a') as f:
|
||||
f.write(('%g ' * len(line)).rstrip() % line + '\n')
|
||||
if self.args.save or self.args.show: # Add bbox to image
|
||||
name = ('' if id is None else f'id:{id} ') + self.model.names[c]
|
||||
label = (f'{name} {conf:.2f}' if self.args.show_conf else name) if self.args.show_labels else None
|
||||
if self.args.boxes:
|
||||
self.annotator.box_label(d.xyxy.squeeze(), label, color=colors(c, True))
|
||||
if self.args.save_crop:
|
||||
save_one_box(d.xyxy,
|
||||
imc,
|
||||
file=self.save_dir / 'crops' / self.model.model.names[c] / f'{self.data_path.stem}.jpg',
|
||||
BGR=True)
|
||||
|
||||
return log_string
|
||||
|
||||
|
||||
def predict(cfg=DEFAULT_CFG, use_python=False):
|
||||
model = cfg.model or 'yolov8n-pose.pt'
|
||||
source = cfg.source if cfg.source is not None else ROOT / 'assets' if (ROOT / 'assets').exists() \
|
||||
else 'https://ultralytics.com/images/bus.jpg'
|
||||
|
||||
args = dict(model=model, source=source)
|
||||
if use_python:
|
||||
from ultralytics import YOLO
|
||||
YOLO(model)(**args)
|
||||
else:
|
||||
predictor = PosePredictor(overrides=args)
|
||||
predictor.predict_cli()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
predict()
|
170
ultralytics/yolo/v8/pose/train.py
Normal file
170
ultralytics/yolo/v8/pose/train.py
Normal file
@ -0,0 +1,170 @@
|
||||
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||
|
||||
from copy import copy
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from ultralytics.nn.tasks import PoseModel
|
||||
from ultralytics.yolo import v8
|
||||
from ultralytics.yolo.utils import DEFAULT_CFG
|
||||
from ultralytics.yolo.utils.loss import KeypointLoss
|
||||
from ultralytics.yolo.utils.metrics import OKS_SIGMA
|
||||
from ultralytics.yolo.utils.ops import xyxy2xywh
|
||||
from ultralytics.yolo.utils.plotting import plot_images, plot_results
|
||||
from ultralytics.yolo.utils.tal import make_anchors
|
||||
from ultralytics.yolo.utils.torch_utils import de_parallel
|
||||
from ultralytics.yolo.v8.detect.train import Loss
|
||||
|
||||
|
||||
# BaseTrainer python usage
|
||||
class PoseTrainer(v8.detect.DetectionTrainer):
|
||||
|
||||
def __init__(self, cfg=DEFAULT_CFG, overrides=None):
|
||||
if overrides is None:
|
||||
overrides = {}
|
||||
overrides['task'] = 'pose'
|
||||
super().__init__(cfg, overrides)
|
||||
|
||||
def get_model(self, cfg=None, weights=None, verbose=True):
|
||||
model = PoseModel(cfg, ch=3, nc=self.data['nc'], data_kpt_shape=self.data['kpt_shape'], verbose=verbose)
|
||||
if weights:
|
||||
model.load(weights)
|
||||
|
||||
return model
|
||||
|
||||
def set_model_attributes(self):
|
||||
super().set_model_attributes()
|
||||
self.model.kpt_shape = self.data['kpt_shape']
|
||||
|
||||
def get_validator(self):
|
||||
self.loss_names = 'box_loss', 'pose_loss', 'kobj_loss', 'cls_loss', 'dfl_loss'
|
||||
return v8.pose.PoseValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args))
|
||||
|
||||
def criterion(self, preds, batch):
|
||||
if not hasattr(self, 'compute_loss'):
|
||||
self.compute_loss = PoseLoss(de_parallel(self.model))
|
||||
return self.compute_loss(preds, batch)
|
||||
|
||||
def plot_training_samples(self, batch, ni):
|
||||
images = batch['img']
|
||||
kpts = batch['keypoints']
|
||||
cls = batch['cls'].squeeze(-1)
|
||||
bboxes = batch['bboxes']
|
||||
paths = batch['im_file']
|
||||
batch_idx = batch['batch_idx']
|
||||
plot_images(images,
|
||||
batch_idx,
|
||||
cls,
|
||||
bboxes,
|
||||
kpts=kpts,
|
||||
paths=paths,
|
||||
fname=self.save_dir / f'train_batch{ni}.jpg')
|
||||
|
||||
def plot_metrics(self):
|
||||
plot_results(file=self.csv, pose=True) # save results.png
|
||||
|
||||
|
||||
# Criterion class for computing training losses
|
||||
class PoseLoss(Loss):
|
||||
|
||||
def __init__(self, model): # model must be de-paralleled
|
||||
super().__init__(model)
|
||||
self.kpt_shape = model.model[-1].kpt_shape
|
||||
self.bce_pose = nn.BCEWithLogitsLoss()
|
||||
is_pose = self.kpt_shape == [17, 3]
|
||||
nkpt = self.kpt_shape[0] # number of keypoints
|
||||
sigmas = torch.from_numpy(OKS_SIGMA).to(self.device) if is_pose else torch.ones(nkpt, device=self.device) / nkpt
|
||||
self.keypoint_loss = KeypointLoss(sigmas=sigmas)
|
||||
|
||||
def __call__(self, preds, batch):
|
||||
loss = torch.zeros(5, device=self.device) # box, cls, dfl, kpt_location, kpt_visibility
|
||||
feats, pred_kpts = preds if isinstance(preds[0], list) else preds[1]
|
||||
pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
|
||||
(self.reg_max * 4, self.nc), 1)
|
||||
|
||||
# b, grids, ..
|
||||
pred_scores = pred_scores.permute(0, 2, 1).contiguous()
|
||||
pred_distri = pred_distri.permute(0, 2, 1).contiguous()
|
||||
pred_kpts = pred_kpts.permute(0, 2, 1).contiguous()
|
||||
|
||||
dtype = pred_scores.dtype
|
||||
imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)
|
||||
anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
|
||||
|
||||
# targets
|
||||
batch_size = pred_scores.shape[0]
|
||||
batch_idx = batch['batch_idx'].view(-1, 1)
|
||||
targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1)
|
||||
targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
|
||||
gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy
|
||||
mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
|
||||
|
||||
# pboxes
|
||||
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
|
||||
pred_kpts = self.kpts_decode(anchor_points, pred_kpts.view(batch_size, -1, *self.kpt_shape)) # (b, h*w, 17, 3)
|
||||
|
||||
_, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner(
|
||||
pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
|
||||
anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt)
|
||||
|
||||
target_scores_sum = max(target_scores.sum(), 1)
|
||||
|
||||
# cls loss
|
||||
# loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way
|
||||
loss[3] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE
|
||||
|
||||
# bbox loss
|
||||
if fg_mask.sum():
|
||||
target_bboxes /= stride_tensor
|
||||
loss[0], loss[4] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores,
|
||||
target_scores_sum, fg_mask)
|
||||
keypoints = batch['keypoints'].to(self.device).float().clone()
|
||||
keypoints[..., 0] *= imgsz[1]
|
||||
keypoints[..., 1] *= imgsz[0]
|
||||
for i in range(batch_size):
|
||||
if fg_mask[i].sum():
|
||||
idx = target_gt_idx[i][fg_mask[i]]
|
||||
gt_kpt = keypoints[batch_idx.view(-1) == i][idx] # (n, 51)
|
||||
gt_kpt[..., 0] /= stride_tensor[fg_mask[i]]
|
||||
gt_kpt[..., 1] /= stride_tensor[fg_mask[i]]
|
||||
area = xyxy2xywh(target_bboxes[i][fg_mask[i]])[:, 2:].prod(1, keepdim=True)
|
||||
pred_kpt = pred_kpts[i][fg_mask[i]]
|
||||
kpt_mask = gt_kpt[..., 2] != 0
|
||||
loss[1] += self.keypoint_loss(pred_kpt, gt_kpt, kpt_mask, area) # pose loss
|
||||
# kpt_score loss
|
||||
if pred_kpt.shape[-1] == 3:
|
||||
loss[2] += self.bce_pose(pred_kpt[..., 2], kpt_mask.float()) # keypoint obj loss
|
||||
|
||||
loss[0] *= self.hyp.box # box gain
|
||||
loss[1] *= self.hyp.pose / batch_size # pose gain
|
||||
loss[2] *= self.hyp.kobj / batch_size # kobj gain
|
||||
loss[3] *= self.hyp.cls # cls gain
|
||||
loss[4] *= self.hyp.dfl # dfl gain
|
||||
|
||||
return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl)
|
||||
|
||||
def kpts_decode(self, anchor_points, pred_kpts):
|
||||
y = pred_kpts.clone()
|
||||
y[..., :2] *= 2.0
|
||||
y[..., 0] += anchor_points[:, [0]] - 0.5
|
||||
y[..., 1] += anchor_points[:, [1]] - 0.5
|
||||
return y
|
||||
|
||||
|
||||
def train(cfg=DEFAULT_CFG, use_python=False):
|
||||
model = cfg.model or 'yolov8n-pose.yaml'
|
||||
data = cfg.data or 'coco8-pose.yaml'
|
||||
device = cfg.device if cfg.device is not None else ''
|
||||
|
||||
args = dict(model=model, data=data, device=device)
|
||||
if use_python:
|
||||
from ultralytics import YOLO
|
||||
YOLO(model).train(**args)
|
||||
else:
|
||||
trainer = PoseTrainer(overrides=args)
|
||||
trainer.train()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
train()
|
213
ultralytics/yolo/v8/pose/val.py
Normal file
213
ultralytics/yolo/v8/pose/val.py
Normal file
@ -0,0 +1,213 @@
|
||||
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, ops
|
||||
from ultralytics.yolo.utils.checks import check_requirements
|
||||
from ultralytics.yolo.utils.metrics import OKS_SIGMA, PoseMetrics, box_iou, kpt_iou
|
||||
from ultralytics.yolo.utils.plotting import output_to_target, plot_images
|
||||
from ultralytics.yolo.v8.detect import DetectionValidator
|
||||
|
||||
|
||||
class PoseValidator(DetectionValidator):
|
||||
|
||||
def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None):
|
||||
super().__init__(dataloader, save_dir, pbar, args)
|
||||
self.args.task = 'pose'
|
||||
self.metrics = PoseMetrics(save_dir=self.save_dir)
|
||||
|
||||
def preprocess(self, batch):
|
||||
batch = super().preprocess(batch)
|
||||
batch['keypoints'] = batch['keypoints'].to(self.device).float()
|
||||
return batch
|
||||
|
||||
def get_desc(self):
|
||||
return ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)', 'Pose(P',
|
||||
'R', 'mAP50', 'mAP50-95)')
|
||||
|
||||
def postprocess(self, preds):
|
||||
preds = ops.non_max_suppression(preds,
|
||||
self.args.conf,
|
||||
self.args.iou,
|
||||
labels=self.lb,
|
||||
multi_label=True,
|
||||
agnostic=self.args.single_cls,
|
||||
max_det=self.args.max_det,
|
||||
nc=self.nc)
|
||||
return preds
|
||||
|
||||
def init_metrics(self, model):
|
||||
super().init_metrics(model)
|
||||
self.kpt_shape = self.data['kpt_shape']
|
||||
is_pose = self.kpt_shape == [17, 3]
|
||||
nkpt = self.kpt_shape[0]
|
||||
self.sigma = OKS_SIGMA if is_pose else np.ones(nkpt) / nkpt
|
||||
|
||||
def update_metrics(self, preds, batch):
|
||||
# Metrics
|
||||
for si, pred in enumerate(preds):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx]
|
||||
bbox = batch['bboxes'][idx]
|
||||
kpts = batch['keypoints'][idx]
|
||||
nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions
|
||||
nk = kpts.shape[1] # number of keypoints
|
||||
shape = batch['ori_shape'][si]
|
||||
correct_kpts = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
|
||||
correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
|
||||
self.seen += 1
|
||||
|
||||
if npr == 0:
|
||||
if nl:
|
||||
self.stats.append((correct_bboxes, correct_kpts, *torch.zeros(
|
||||
(2, 0), device=self.device), cls.squeeze(-1)))
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
|
||||
continue
|
||||
|
||||
# Predictions
|
||||
if self.args.single_cls:
|
||||
pred[:, 5] = 0
|
||||
predn = pred.clone()
|
||||
ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape,
|
||||
ratio_pad=batch['ratio_pad'][si]) # native-space pred
|
||||
pred_kpts = predn[:, 6:].view(npr, nk, -1)
|
||||
ops.scale_coords(batch['img'][si].shape[1:], pred_kpts, shape, ratio_pad=batch['ratio_pad'][si])
|
||||
|
||||
# Evaluate
|
||||
if nl:
|
||||
height, width = batch['img'].shape[2:]
|
||||
tbox = ops.xywh2xyxy(bbox) * torch.tensor(
|
||||
(width, height, width, height), device=self.device) # target boxes
|
||||
ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape,
|
||||
ratio_pad=batch['ratio_pad'][si]) # native-space labels
|
||||
tkpts = kpts.clone()
|
||||
tkpts[..., 0] *= width
|
||||
tkpts[..., 1] *= height
|
||||
tkpts = ops.scale_coords(batch['img'][si].shape[1:], tkpts, shape, ratio_pad=batch['ratio_pad'][si])
|
||||
labelsn = torch.cat((cls, tbox), 1) # native-space labels
|
||||
correct_bboxes = self._process_batch(predn[:, :6], labelsn)
|
||||
correct_kpts = self._process_batch(predn[:, :6], labelsn, pred_kpts, tkpts)
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(predn, labelsn)
|
||||
|
||||
# Append correct_masks, correct_boxes, pconf, pcls, tcls
|
||||
self.stats.append((correct_bboxes, correct_kpts, pred[:, 4], pred[:, 5], cls.squeeze(-1)))
|
||||
|
||||
# Save
|
||||
if self.args.save_json:
|
||||
self.pred_to_json(predn, batch['im_file'][si])
|
||||
# if self.args.save_txt:
|
||||
# save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt')
|
||||
|
||||
def _process_batch(self, detections, labels, pred_kpts=None, gt_kpts=None):
|
||||
"""
|
||||
Return correct prediction matrix
|
||||
Arguments:
|
||||
detections (array[N, 6]), x1, y1, x2, y2, conf, class
|
||||
labels (array[M, 5]), class, x1, y1, x2, y2
|
||||
pred_kpts (array[N, 51]), 51 = 17 * 3
|
||||
gt_kpts (array[N, 51])
|
||||
Returns:
|
||||
correct (array[N, 10]), for 10 IoU levels
|
||||
"""
|
||||
if pred_kpts is not None and gt_kpts is not None:
|
||||
# `0.53` is from https://github.com/jin-s13/xtcocoapi/blob/master/xtcocotools/cocoeval.py#L384
|
||||
area = ops.xyxy2xywh(labels[:, 1:])[:, 2:].prod(1) * 0.53
|
||||
iou = kpt_iou(gt_kpts, pred_kpts, sigma=self.sigma, area=area)
|
||||
else: # boxes
|
||||
iou = box_iou(labels[:, 1:], detections[:, :4])
|
||||
|
||||
correct = np.zeros((detections.shape[0], self.iouv.shape[0])).astype(bool)
|
||||
correct_class = labels[:, 0:1] == detections[:, 5]
|
||||
for i in range(len(self.iouv)):
|
||||
x = torch.where((iou >= self.iouv[i]) & correct_class) # IoU > threshold and classes match
|
||||
if x[0].shape[0]:
|
||||
matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]),
|
||||
1).cpu().numpy() # [label, detect, iou]
|
||||
if x[0].shape[0] > 1:
|
||||
matches = matches[matches[:, 2].argsort()[::-1]]
|
||||
matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
|
||||
# matches = matches[matches[:, 2].argsort()[::-1]]
|
||||
matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
|
||||
correct[matches[:, 1].astype(int), i] = True
|
||||
return torch.tensor(correct, dtype=torch.bool, device=detections.device)
|
||||
|
||||
def plot_val_samples(self, batch, ni):
|
||||
plot_images(batch['img'],
|
||||
batch['batch_idx'],
|
||||
batch['cls'].squeeze(-1),
|
||||
batch['bboxes'],
|
||||
kpts=batch['keypoints'],
|
||||
paths=batch['im_file'],
|
||||
fname=self.save_dir / f'val_batch{ni}_labels.jpg',
|
||||
names=self.names)
|
||||
|
||||
def plot_predictions(self, batch, preds, ni):
|
||||
pred_kpts = torch.cat([p[:, 6:].view(-1, *self.kpt_shape)[:15] for p in preds], 0)
|
||||
plot_images(batch['img'],
|
||||
*output_to_target(preds, max_det=15),
|
||||
kpts=pred_kpts,
|
||||
paths=batch['im_file'],
|
||||
fname=self.save_dir / f'val_batch{ni}_pred.jpg',
|
||||
names=self.names) # pred
|
||||
|
||||
def pred_to_json(self, predn, filename):
|
||||
stem = Path(filename).stem
|
||||
image_id = int(stem) if stem.isnumeric() else stem
|
||||
box = ops.xyxy2xywh(predn[:, :4]) # xywh
|
||||
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
|
||||
for p, b in zip(predn.tolist(), box.tolist()):
|
||||
self.jdict.append({
|
||||
'image_id': image_id,
|
||||
'category_id': self.class_map[int(p[5])],
|
||||
'bbox': [round(x, 3) for x in b],
|
||||
'keypoints': p[6:],
|
||||
'score': round(p[4], 5)})
|
||||
|
||||
def eval_json(self, stats):
|
||||
if self.args.save_json and self.is_coco and len(self.jdict):
|
||||
anno_json = self.data['path'] / 'annotations/person_keypoints_val2017.json' # annotations
|
||||
pred_json = self.save_dir / 'predictions.json' # predictions
|
||||
LOGGER.info(f'\nEvaluating pycocotools mAP using {pred_json} and {anno_json}...')
|
||||
try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
|
||||
check_requirements('pycocotools>=2.0.6')
|
||||
from pycocotools.coco import COCO # noqa
|
||||
from pycocotools.cocoeval import COCOeval # noqa
|
||||
|
||||
for x in anno_json, pred_json:
|
||||
assert x.is_file(), f'{x} file not found'
|
||||
anno = COCO(str(anno_json)) # init annotations api
|
||||
pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path)
|
||||
for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'keypoints')]):
|
||||
if self.is_coco:
|
||||
eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files] # im to eval
|
||||
eval.evaluate()
|
||||
eval.accumulate()
|
||||
eval.summarize()
|
||||
idx = i * 4 + 2
|
||||
stats[self.metrics.keys[idx + 1]], stats[
|
||||
self.metrics.keys[idx]] = eval.stats[:2] # update mAP50-95 and mAP50
|
||||
except Exception as e:
|
||||
LOGGER.warning(f'pycocotools unable to run: {e}')
|
||||
return stats
|
||||
|
||||
|
||||
def val(cfg=DEFAULT_CFG, use_python=False):
|
||||
model = cfg.model or 'yolov8n-pose.pt'
|
||||
data = cfg.data or 'coco128-pose.yaml'
|
||||
|
||||
args = dict(model=model, data=data)
|
||||
if use_python:
|
||||
from ultralytics import YOLO
|
||||
YOLO(model).val(**args)
|
||||
else:
|
||||
validator = PoseValidator(args=args)
|
||||
validator(model=args['model'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
val()
|
@ -65,7 +65,7 @@ class SegmentationValidator(DetectionValidator):
|
||||
|
||||
if npr == 0:
|
||||
if nl:
|
||||
self.stats.append((correct_masks, correct_bboxes, *torch.zeros(
|
||||
self.stats.append((correct_bboxes, correct_masks, *torch.zeros(
|
||||
(2, 0), device=self.device), cls.squeeze(-1)))
|
||||
if self.args.plots:
|
||||
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
|
||||
@ -103,7 +103,7 @@ class SegmentationValidator(DetectionValidator):
|
||||
self.confusion_matrix.process_batch(predn, labelsn)
|
||||
|
||||
# Append correct_masks, correct_boxes, pconf, pcls, tcls
|
||||
self.stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], cls.squeeze(-1)))
|
||||
self.stats.append((correct_bboxes, correct_masks, pred[:, 4], pred[:, 5], cls.squeeze(-1)))
|
||||
|
||||
pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
|
||||
if self.args.plots and self.batch_i < 3:
|
||||
@ -220,8 +220,7 @@ class SegmentationValidator(DetectionValidator):
|
||||
pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path)
|
||||
for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm')]):
|
||||
if self.is_coco:
|
||||
eval.params.imgIds = [int(Path(x).stem)
|
||||
for x in self.dataloader.dataset.im_files] # images to eval
|
||||
eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files] # im to eval
|
||||
eval.evaluate()
|
||||
eval.accumulate()
|
||||
eval.summarize()
|
||||
|
Reference in New Issue
Block a user