From e18ae9d8e160c1635b66650793f80f2728a192d1 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 11 Jan 2023 21:54:41 +0100 Subject: [PATCH] README and Docs updates with A100 TensorRT times (#270) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- README.md | 42 +++--- README.zh-CN.md | 42 +++--- docs/assets/favicon.ico | Bin 0 -> 5430 bytes docs/engine.md | 2 +- docs/index.md | 10 +- mkdocs.yml | 27 ++-- ultralytics/nn/autobackend.py | 76 +++++----- ultralytics/nn/tasks.py | 51 ++++--- ultralytics/yolo/engine/trainer.py | 12 +- ultralytics/yolo/utils/ops.py | 229 ++++++++++++++--------------- 10 files changed, 250 insertions(+), 241 deletions(-) create mode 100644 docs/assets/favicon.ico diff --git a/README.md b/README.md index 19057d4..adf2ee3 100644 --- a/README.md +++ b/README.md @@ -121,13 +121,13 @@ Ultralytics [release](https://github.com/ultralytics/ultralytics/releases) on fi
Detection -| Model | size
(pixels) | mAPval
50-95 | Speed
CPU
(ms) | Speed
T4 GPU
(ms) | params
(M) | FLOPs
(B) | -| ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------- | ---------------------------- | ------------------ | ----------------- | -| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | - | - | 3.2 | 8.7 | -| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | - | - | 11.2 | 28.6 | -| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | - | - | 25.9 | 78.9 | -| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | - | - | 43.7 | 165.2 | -| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | - | - | 68.2 | 257.8 | +| Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | +| ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | +| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | - | 0.99 | 3.2 | 8.7 | +| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | - | 1.20 | 11.2 | 28.6 | +| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | - | 1.83 | 25.9 | 78.9 | +| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | - | 2.39 | 43.7 | 165.2 | +| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | - | 3.53 | 68.2 | 257.8 | - **mAPval** values are for single-model single-scale on [COCO val2017](http://cocodataset.org) dataset.
Reproduce by `yolo mode=val task=detect data=coco.yaml device=0` @@ -138,13 +138,13 @@ Ultralytics [release](https://github.com/ultralytics/ultralytics/releases) on fi
Segmentation -| Model | size
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Speed
CPU
(ms) | Speed
T4 GPU
(ms) | params
(M) | FLOPs
(B) | -| ---------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------- | ---------------------------- | ------------------ | ----------------- | -| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | - | - | 3.4 | 12.6 | -| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | - | - | 11.8 | 42.6 | -| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | - | - | 27.3 | 110.2 | -| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | - | - | 46.0 | 220.5 | -| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | - | - | 71.8 | 344.1 | +| Model | size
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | +| ---------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | +| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | - | - | 3.4 | 12.6 | +| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | - | - | 11.8 | 42.6 | +| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | - | - | 27.3 | 110.2 | +| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | - | - | 46.0 | 220.5 | +| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | - | - | 71.8 | 344.1 | - **mAPval** values are for single-model single-scale on [COCO val2017](http://cocodataset.org) dataset.
Reproduce by `yolo mode=val task=detect data=coco.yaml device=0` @@ -155,13 +155,13 @@ Ultralytics [release](https://github.com/ultralytics/ultralytics/releases) on fi
Classification -| Model | size
(pixels) | acc
top1 | acc
top5 | Speed
CPU
(ms) | Speed
T4 GPU
(ms) | params
(M) | FLOPs
(B) at 640 | -| ---------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------- | ---------------------------- | ------------------ | ------------------------ | -| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | - | - | 2.7 | 4.3 | -| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | - | - | 6.4 | 13.5 | -| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | - | - | 17.0 | 42.7 | -| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | - | - | 37.5 | 99.7 | -| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | - | - | 57.4 | 154.8 | +| Model | size
(pixels) | acc
top1 | acc
top5 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) at 640 | +| ---------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ | +| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | - | - | 2.7 | 4.3 | +| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | - | - | 6.4 | 13.5 | +| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | - | - | 17.0 | 42.7 | +| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | - | - | 37.5 | 99.7 | +| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | - | - | 57.4 | 154.8 | - **mAPval** values are for single-model single-scale on [ImageNet](https://www.image-net.org/) dataset.
Reproduce by `yolo mode=val task=detect data=coco.yaml device=0` diff --git a/README.zh-CN.md b/README.zh-CN.md index 262dd4b..506126e 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -115,13 +115,13 @@ success = YOLO("yolov8n.pt").export(format="onnx") # 将模型导出为 ONNX
目标检测 -| 模型 | 尺寸
(像素) | mAPval
50-95 | 推理速度
CPU
(ms) | 推理速度
T4 GPU
(ms) | 参数量
(M) | FLOPs
(B) | -| ----------------------------------------------------------------------------------------- | --------------- | -------------------- | ------------------------ | --------------------------- | --------------- | ----------------- | -| [YOLOv8n](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8n.pt) | 640 | 37.3 | - | - | 3.2 | 8.7 | -| [YOLOv8s](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8s.pt) | 640 | 44.9 | - | - | 11.2 | 28.6 | -| [YOLOv8m](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8m.pt) | 640 | 50.2 | - | - | 25.9 | 78.9 | -| [YOLOv8l](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8l.pt) | 640 | 52.9 | - | - | 43.7 | 165.2 | -| [YOLOv8x](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8x.pt) | 640 | 53.9 | - | - | 68.2 | 257.8 | +| 模型 | 尺寸
(像素) | mAPval
50-95 | 推理速度
CPU ONNX
(ms) | 推理速度
A100 TensorRT
(ms) | 参数量
(M) | FLOPs
(B) | +| ------------------------------------------------------------------------------------ | --------------- | -------------------- | ----------------------------- | ---------------------------------- | --------------- | ----------------- | +| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | - | 0.99 | 3.2 | 8.7 | +| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | - | 1.20 | 11.2 | 28.6 | +| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | - | 1.83 | 25.9 | 78.9 | +| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | - | 2.39 | 43.7 | 165.2 | +| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | - | 3.53 | 68.2 | 257.8 | - **mAPval** 结果都在 [COCO val2017](http://cocodataset.org) 数据集上,使用单模型单尺度测试得到。
复现命令 `yolo mode=val task=detect data=coco.yaml device=0` @@ -132,13 +132,13 @@ success = YOLO("yolov8n.pt").export(format="onnx") # 将模型导出为 ONNX
实例分割 -| 模型 | 尺寸
(像素) | mAPbox
50-95 | mAPmask
50-95 | 推理速度
CPU
(ms) | 推理速度
T4 GPU
(ms) | 参数量
(M) | FLOPs
(B) | -| --------------------------------------------------------------------------------------------- | --------------- | -------------------- | --------------------- | ------------------------ | --------------------------- | --------------- | ----------------- | -| [YOLOv8n](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | - | - | 3.4 | 12.6 | -| [YOLOv8s](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | - | - | 11.8 | 42.6 | -| [YOLOv8m](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | - | - | 27.3 | 110.2 | -| [YOLOv8l](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | - | - | 46.0 | 220.5 | -| [YOLOv8x](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | - | - | 71.8 | 344.1 | +| 模型 | 尺寸
(像素) | mAPbox
50-95 | mAPmask
50-95 | 推理速度
CPU ONNX
(ms) | 推理速度
A100 TensorRT
(ms) | 参数量
(M) | FLOPs
(B) | +| --------------------------------------------------------------------------------------------- | --------------- | -------------------- | --------------------- | ----------------------------- | ---------------------------------- | --------------- | ----------------- | +| [YOLOv8n](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | - | - | 3.4 | 12.6 | +| [YOLOv8s](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | - | - | 11.8 | 42.6 | +| [YOLOv8m](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | - | - | 27.3 | 110.2 | +| [YOLOv8l](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | - | - | 46.0 | 220.5 | +| [YOLOv8x](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | - | - | 71.8 | 344.1 | - **mAPval** 结果都在 [COCO val2017](http://cocodataset.org) 数据集上,使用单模型单尺度测试得到。
复现命令 `yolo mode=val task=detect data=coco.yaml device=0` @@ -149,13 +149,13 @@ success = YOLO("yolov8n.pt").export(format="onnx") # 将模型导出为 ONNX
分类 -| 模型 | 尺寸
(像素) | acc
top1 | acc
top5 | 推理速度
CPU
(ms) | 推理速度
T4 GPU
(ms) | 参数量
(M) | FLOPs
(B) at 640 | -| --------------------------------------------------------------------------------------------- | --------------- | ---------------- | ---------------- | ------------------------ | --------------------------- | --------------- | ------------------------ | -| [YOLOv8n](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | - | - | 2.7 | 4.3 | -| [YOLOv8s](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | - | - | 6.4 | 13.5 | -| [YOLOv8m](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | - | - | 17.0 | 42.7 | -| [YOLOv8l](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | - | - | 37.5 | 99.7 | -| [YOLOv8x](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | - | - | 57.4 | 154.8 | +| 模型 | 尺寸
(像素) | acc
top1 | acc
top5 | 推理速度
CPU ONNX
(ms) | 推理速度
A100 TensorRT
(ms) | 参数量
(M) | FLOPs
(B) at 640 | +| --------------------------------------------------------------------------------------------- | --------------- | ---------------- | ---------------- | ----------------------------- | ---------------------------------- | --------------- | ------------------------ | +| [YOLOv8n](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | - | - | 2.7 | 4.3 | +| [YOLOv8s](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | - | - | 6.4 | 13.5 | +| [YOLOv8m](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | - | - | 17.0 | 42.7 | +| [YOLOv8l](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | - | - | 37.5 | 99.7 | +| [YOLOv8x](https://github.com/ultralytics/ultralytics/releases/download/v8.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | - | - | 57.4 | 154.8 | - **mAPval** 都在 [ImageNet](https://www.image-net.org/) 数据集上,使用单模型单尺度测试得到。
复现命令 `yolo mode=val task=detect data=coco.yaml device=0` diff --git a/docs/assets/favicon.ico b/docs/assets/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..b71e7ec58990df7ba6cc96f6dc0e77998c8ca9a4 GIT binary patch literal 5430 zcmeI0eNYuu7{(9asC$+XhH0GsU?xV%tjx&DtO!WUv=6f~L;T3hh)m7QtjzTKk&&61 znHghN=J)}K2nq@aS3p2SKtx1EBqT&sWE{s~9LKlk-Q(W7fP6RoQ#(Afd-tCA{Lb0C z=Y5}p5H7-1czOz)eMDF{A^Hg+e0Au_xj@=F402W$LU`=E*5E?wRu|DxlApx zDp|{^h?6;GTjl1`U*wcyYovQigScGUQ(P+6!Y<@%bu|YxLM$Pc5KAXENILeTq$A(R z+QT+ExxmIVa~^faHBom=TZdQzm(C_3b|+%DNCKC@r6bEF9bP2q;3tx@X2=!$r%Fc$ zmyT+o7YelXHCfu6suXQ;MJ&d=8L{gn75^Zq@N2{_MC@Ei2c}C(e_K*&h(68i0ORxA z)>jXPRzmY3U&IC%ua?&e!zJY{Kn*ElFauc+22I$ zSj4`l)9#@$K+u#6FGKAv$dzs1y3HK^q>c zm2178lQv$4r;I`D^E&McLhNIReE_j{8!=Z9o6<`BkmS*N_@0r74b&-iFk%NF*4M$~ z_(9Os+w`%Kf1iZm`hr~nh<#M2==%}pjOB6GavPinLeWqqR0z$6u!@9;dr}YB@d#q? zYZlA#Te(eJN7TLg2DG(47ry3a=hId!e~)&qP$-lND6{^1#;5* z%0wA~e7L-Py9`549DHh%G~Umy)0{mZcLyMcZmP}It{@ju^?u~x6iJnd$fxm=&csMM zy-gC>bmCWe@py!cKyDr6?4fg4btRe~a&RhgFo8{+|1oc{HL#^o2QY6sjaqPvO<)@} z;pi%(F2o{N`gW9#n&S%gJg~2G?KltiIg^e0Q<(rZ>Ik#B4xRi%)uQ7+H)0dmTwAUl znlD3})~1%mbh%XP4t6BiWUvpa+Vxi&*FHS+1XaVzqg5Sab}8zh!A3nh`kkt2x7Y+W z<)XGB->*3^L%O!a<@>0Y-N25$R0Q^6%-h1|+WF^h%sUqIHrX3h9W7a>YN><$CFX6D z2Ak_@_H3iJQf8Qpu-4b6V?nK6g?CM{bF@0t>g?Xa}pR};K78k8l z^|-)ff3E6s_J^uAXN0NxjG8sKDVIOPV?2*}r{bB1)E?4$Tif9Gz7xOqlCwp zS013`C-{KXPImrM-ngi&E5rO zK}(@>$Xb`m@Yx0#1`F&y#ox>FA}bqSCigStXB_NM_BA@CO;Fw@nc1UI^HI0qae~cx z@fm#&g}C-vxlOOD@x4E{FIpk{6@De_^A{^0#ccLNSs!5D@I)E!C{L9(K{vcn@~g@p z8SG~a-;l-q@PR%9plfZg&9Shj7{YN_+o9~kaz6$8BgJMvX0YMO(#AJtCyszm8U{9e zN8A&JZ{gq99b$iB_TQc}{~kT_=F4timl^Dt@PhDj=~MBZnQZvF-JS2X9;k#i06;e)-|_uD7& z+E#v;+3blEhbvzk@2~u=$%gMUeJ-!F8=d4|Wj?&Wmz~%4ILsS<8f)9|)L37%3!dD; z9;E!W!FGDxDzJG?b!6UV+cM_E$9q}wijxgb507kk^BpF;KR8Y{_rrYl*Jhs`IU7EH z-&pC@D6jC^wy^Pz8+(JOdz!IL{)kTUgLwY?@m{>PdEdu-AtSFa+fVIluor9XnO1QhU~{c0YUdd8(ay2aP8jJf zkkjfL<(2lad0f^U%=_KVt&D-!$!}0E9<%j8UWw&>EAPi*IIpO+-P*j(@0MBn;cN8o z2Q7zEAl_5)c^TBGK3n6kcO7A_?GCVcf858~Pgkz7?~b% iU~49un@zCnJ#3=F!zLO6?JnpXssNzggXkt;>i8Q=l+%#_ literal 0 HcmV?d00001 diff --git a/docs/engine.md b/docs/engine.md index b0318d6..962c128 100644 --- a/docs/engine.md +++ b/docs/engine.md @@ -5,7 +5,7 @@ BaseTrainer contains the generic boilerplate training routine. It can be customi * `get_model(cfg, weights)` - The function that builds a the model to be trained * `get_dataloder()` - The function that builds the dataloder -More details and source code can be found in [`BaseTrainer` Reference](../reference/base_trainer.md) +More details and source code can be found in [`BaseTrainer` Reference](reference/base_trainer.md) ## DetectionTrainer Here's how you can use the YOLOv8 `DetectionTrainer` and customize it. diff --git a/docs/index.md b/docs/index.md index f7edd1b..cde7e11 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,9 +2,13 @@
- Run on Gradient - Open In Colab - Open In Kaggle + Ultralytics CI + YOLOv8 Citation + Docker Pulls +
+ Run on Gradient + Open In Colab + Open In Kaggle

diff --git a/mkdocs.yml b/mkdocs.yml index f71d4bd..b7d358c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,25 +1,14 @@ site_name: Ultralytics Docs repo_url: https://github.com/ultralytics/ultralytics -repo_name: Ultralytics +edit_uri: https://github.com/ultralytics/ultralytics/tree/main/docs +repo_name: ultralytics/ultralytics theme: name: "material" logo: https://github.com/ultralytics/assets/raw/main/logo/Ultralytics-logomark-white.png - icon: - repo: fontawesome/brands/github - admonition: - note: octicons/tag-16 - abstract: octicons/checklist-16 - info: octicons/info-16 - tip: octicons/squirrel-16 - success: octicons/check-16 - question: octicons/question-16 - warning: octicons/alert-16 - failure: octicons/x-circle-16 - danger: octicons/zap-16 - bug: octicons/bug-16 - example: octicons/beaker-16 - quote: octicons/quote-16 + favicon: assets/favicon.ico + font: + text: Roboto palette: # Palette toggle for light mode @@ -34,12 +23,16 @@ theme: icon: material/brightness-4 name: Switch to light mode features: + - content.action.edit - content.code.annotate - content.tooltips - search.highlight - search.share - search.suggest - toc.follow + - navigation.top + - navigation.expand + - navigation.footer extra_css: - stylesheets/style.css @@ -72,8 +65,10 @@ markdown_extensions: - pymdownx.keys - pymdownx.mark - pymdownx.tilde + plugins: - mkdocstrings + - search # Primary navigation nav: diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py index 5ffe000..4d862ba 100644 --- a/ultralytics/nn/autobackend.py +++ b/ultralytics/nn/autobackend.py @@ -22,32 +22,31 @@ class AutoBackend(nn.Module): def __init__(self, weights='yolov8n.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True): """ - Ultralytics YOLO MultiBackend class for python inference on various backends + MultiBackend class for python inference on various platforms using Ultralytics YOLO. Args: - weights: the path to the weights file. Defaults to yolov8n.pt - device: The device to run the model on. - dnn: If you want to use OpenCV's DNN module to run the inference, set this to True. Defaults to - False - data: a dictionary containing the following keys: - fp16: If true, will use half precision. Defaults to False - fuse: whether to fuse the model or not. Defaults to True + weights (str): The path to the weights file. Default: 'yolov8n.pt' + device (torch.device): The device to run the model on. + dnn (bool): Use OpenCV's DNN module for inference if True, defaults to False. + data (dict): Additional data, optional + fp16 (bool): If True, use half precision. Default: False + fuse (bool): Whether to fuse the model or not. Default: True - Supported format and their usage: - | Platform | weights | - |-----------------------|------------------| - | PyTorch | *.pt | - | TorchScript | *.torchscript | - | ONNX Runtime | *.onnx | - | ONNX OpenCV DNN | *.onnx --dnn | - | OpenVINO | *.xml | - | CoreML | *.mlmodel | - | TensorRT | *.engine | - | TensorFlow SavedModel | *_saved_model | - | TensorFlow GraphDef | *.pb | - | TensorFlow Lite | *.tflite | - | TensorFlow Edge TPU | *_edgetpu.tflite | - | PaddlePaddle | *_paddle_model | + Supported formats and their usage: + Platform | Weights Format + -----------------------|------------------ + PyTorch | *.pt + TorchScript | *.torchscript + ONNX Runtime | *.onnx + ONNX OpenCV DNN | *.onnx --dnn + OpenVINO | *.xml + CoreML | *.mlmodel + TensorRT | *.engine + TensorFlow SavedModel | *_saved_model + TensorFlow GraphDef | *.pb + TensorFlow Lite | *.tflite + TensorFlow Edge TPU | *_edgetpu.tflite + PaddlePaddle | *_paddle_model """ super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) @@ -234,15 +233,16 @@ class AutoBackend(nn.Module): def forward(self, im, augment=False, visualize=False): """ - Runs inference on the given model + Runs inference on the YOLOv8 MultiBackend model. Args: - im: the image tensor - augment: whether to augment the image. Defaults to False - visualize: if True, then the network will output the feature maps of the last convolutional layer. - Defaults to False + im (torch.tensor): The image tensor to perform inference on. + augment (bool): whether to perform data augmentation during inference, defaults to False + visualize (bool): whether to visualize the output predictions, defaults to False + + Returns: + (tuple): Tuple containing the raw output tensor, and the processed output for visualization (if visualize=True) """ - # YOLOv5 MultiBackend inference b, ch, h, w = im.shape # batch, channel, height, width if self.fp16 and im.dtype != torch.float16: im = im.half() # to FP16 @@ -325,19 +325,25 @@ class AutoBackend(nn.Module): def from_numpy(self, x): """ - `from_numpy` converts a numpy array to a tensor + Convert a numpy array to a tensor. - Args: - x: the numpy array to convert - """ + Args: + x (numpy.ndarray): The array to be converted. + + Returns: + (torch.tensor): The converted tensor + """ return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x def warmup(self, imgsz=(1, 3, 640, 640)): """ - Warmup model by running inference once + Warm up the model by running one forward pass with a dummy input. Args: - imgsz: the size of the image you want to run inference on. + imgsz (tuple): The shape of the dummy input tensor in the format (batch_size, channels, height, width) + + Returns: + (None): This method runs the forward pass and don't return any value """ warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module if any(warmup_types) and (self.device.type != 'cpu' or self.triton): diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py index f143c14..a7d0118 100644 --- a/ultralytics/nn/tasks.py +++ b/ultralytics/nn/tasks.py @@ -17,35 +17,36 @@ from ultralytics.yolo.utils.torch_utils import (fuse_conv_and_bn, initialize_wei class BaseModel(nn.Module): - ''' - The BaseModel class is a base class for all the models in the Ultralytics YOLO family. - ''' + """ + The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family. + """ def forward(self, x, profile=False, visualize=False): """ - > `forward` is a wrapper for `_forward_once` that runs the model on a single scale + Forward pass of the model on a single scale. + Wrapper for `_forward_once` method. Args: - x: the input image - profile: whether to profile the model. Defaults to False - visualize: if True, will return the intermediate feature maps. Defaults to False + x (torch.tensor): The input image tensor + profile (bool): Whether to profile the model, defaults to False + visualize (bool): Whether to return the intermediate feature maps, defaults to False Returns: - The output of the network. + (torch.tensor): The output of the network. """ return self._forward_once(x, profile, visualize) def _forward_once(self, x, profile=False, visualize=False): """ - > Forward pass of the network + Perform a forward pass through the network. Args: - x: input to the model - profile: if True, the time taken for each layer will be printed. Defaults to False - visualize: If True, it will save the feature maps of the model. Defaults to False + x (torch.tensor): The input tensor to the model + profile (bool): Print the computation time of each layer if True, defaults to False. + visualize (bool): Save the feature maps of the model if True, defaults to False Returns: - The last layer of the model. + (torch.tensor): The last output of the model. """ y, dt = [], [] # outputs for m in self.model: @@ -62,13 +63,15 @@ class BaseModel(nn.Module): def _profile_one_layer(self, m, x, dt): """ - It takes a model, an input, and a list of times, and it profiles the model on the input, appending - the time to the list + Profile the computation time and FLOPs of a single layer of the model on a given input. Appends the results to the provided list. Args: - m: the model - x: the input image - dt: list of time taken for each layer + m (nn.Module): The layer to be profiled. + x (torch.Tensor): The input data to the layer. + dt (list): A list to store the computation time of the layer. + + Returns: + None """ c = m == self.model[-1] # is final layer, copy input as inplace fix o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs @@ -84,10 +87,10 @@ class BaseModel(nn.Module): def fuse(self): """ - > It takes a model and fuses the Conv2d() and BatchNorm2d() layers into a single layer + Fuse the `Conv2d()` and `BatchNorm2d()` layers of the model into a single layer, in order to improve the computation efficiency. Returns: - The model is being returned. + (nn.Module): The fused model is returned. """ LOGGER.info('Fusing layers... ') for m in self.model.modules(): @@ -103,8 +106,8 @@ class BaseModel(nn.Module): Prints model information Args: - verbose: if True, prints out the model information. Defaults to False - imgsz: the size of the image that the model will be trained on. Defaults to 640 + verbose (bool): if True, prints out the model information. Defaults to False + imgsz (int): the size of the image that the model will be trained on. Defaults to 640 """ model_info(self, verbose, imgsz) @@ -129,10 +132,10 @@ class BaseModel(nn.Module): def load(self, weights): """ - > This function loads the weights of the model from a file + This function loads the weights of the model from a file Args: - weights: The weights to load into the model. + weights (str): The weights to load into the model. """ # Force all tasks to implement this function raise NotImplementedError("This function needs to be implemented by derived classes!") diff --git a/ultralytics/yolo/engine/trainer.py b/ultralytics/yolo/engine/trainer.py index f6be047..727a50f 100644 --- a/ultralytics/yolo/engine/trainer.py +++ b/ultralytics/yolo/engine/trainer.py @@ -84,6 +84,7 @@ class BaseTrainer: if overrides is None: overrides = {} self.args = get_config(config, overrides) + self.device = utils.torch_utils.select_device(self.args.device, self.args.batch) self.check_resume() self.console = LOGGER self.validator = None @@ -113,7 +114,6 @@ class BaseTrainer: print_args(dict(self.args)) # Device - self.device = utils.torch_utils.select_device(self.args.device, self.batch_size) self.amp = self.device.type != 'cpu' self.scaler = amp.GradScaler(enabled=self.amp) if self.device.type == 'cpu': @@ -164,7 +164,15 @@ class BaseTrainer: callback(self) def train(self): - world_size = torch.cuda.device_count() + # Allow device='', device=None on Multi-GPU systems to default to device=0 + if isinstance(self.args.device, int) or self.args.device: # i.e. device=0 or device=[0,1,2,3] + world_size = torch.cuda.device_count() + elif torch.cuda.is_available(): # i.e. device=None or device='' + world_size = 1 # default to device 0 + else: # i.e. device='cpu' or 'mps' + world_size = 0 + + # Run subprocess if DDP training, else train normally if world_size > 1 and "LOCAL_RANK" not in os.environ: command = generate_ddp_command(world_size, self) try: diff --git a/ultralytics/yolo/utils/ops.py b/ultralytics/yolo/utils/ops.py index 67e5d52..677df60 100644 --- a/ultralytics/yolo/utils/ops.py +++ b/ultralytics/yolo/utils/ops.py @@ -1,5 +1,3 @@ -# Ultralytics YOLO 🚀, GPL-3.0 license - import contextlib import math import re @@ -50,15 +48,15 @@ def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) def segment2box(segment, width=640, height=640): """ - > Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to + Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) Args: - segment: the segment label - width: the width of the image. Defaults to 640 - height: The height of the image. Defaults to 640 + segment (torch.tensor): the segment label + width (int): the width of the image. Defaults to 640 + height (int): The height of the image. Defaults to 640 Returns: - the minimum and maximum x and y values of the segment. + (np.array): the minimum and maximum x and y values of the segment. """ # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) x, y = segment.T # segment xy @@ -69,17 +67,16 @@ def segment2box(segment, width=640, height=640): def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None): """ - > Rescale boxes (xyxy) from img1_shape to img0_shape + Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in (img1_shape) to the shape of a different image (img0_shape). Args: - img1_shape: The shape of the image that the bounding boxes are for. - boxes: the bounding boxes of the objects in the image - img0_shape: the shape of the original image - ratio_pad: a tuple of (ratio, pad) + img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width). + boxes (torch.tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2) + img0_shape (tuple): the shape of the target image, in the format of (height, width). + ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be calculated based on the size difference between the two images. Returns: - The boxes are being returned. + boxes (torch.tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2) """ - # if ratio_pad is None: # calculate from img0_shape gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding @@ -113,7 +110,7 @@ def non_max_suppression( nm=0, # number of masks ): """ - > Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box. + Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box. Arguments: prediction (torch.Tensor): A tensor of shape (batch_size, num_boxes, num_classes + 4 + num_masks) @@ -134,7 +131,7 @@ def non_max_suppression( nm (int): The number of masks output by the model. Returns: - List[torch.Tensor]: A list of length batch_size, where each element is a tensor of + (List[torch.Tensor]): A list of length batch_size, where each element is a tensor of shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns (x1, y1, x2, y2, confidence, class, mask1, mask2, ...). """ @@ -231,12 +228,12 @@ def non_max_suppression( def clip_boxes(boxes, shape): """ - > It takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the + It takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape Args: - boxes: the bounding boxes to clip - shape: the shape of the image + boxes (torch.tensor): the bounding boxes to clip + shape (tuple): the shape of the image """ if isinstance(boxes, torch.Tensor): # faster individually boxes[..., 0].clamp_(0, shape[1]) # x1 @@ -262,16 +259,16 @@ def clip_coords(boxes, shape): def scale_image(im1_shape, masks, im0_shape, ratio_pad=None): """ - > It takes a mask, and resizes it to the original image size + Takes a mask, and resizes it to the original image size Args: - im1_shape: model input shape, [h, w] - masks: [h, w, num] - im0_shape: the original image shape - ratio_pad: the ratio of the padding to the original image. + im1_shape (tuple): model input shape, [h, w] + masks (torch.tensor): [h, w, num] + im0_shape (tuple): the original image shape + ratio_pad (tuple): the ratio of the padding to the original image. Returns: - The masks are being returned. + masks (torch.tensor): The masks that are being returned. """ # Rescale coordinates (xyxy) from im1_shape to im0_shape if ratio_pad is None: # calculate from im0_shape @@ -297,14 +294,12 @@ def scale_image(im1_shape, masks, im0_shape, ratio_pad=None): def xyxy2xywh(x): """ - > It takes a list of bounding boxes, and converts them from the format [x1, y1, x2, y2] to [x, y, w, - h] where xy1=top-left, xy2=bottom-right + Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format. Args: - x: the input tensor - + x (np.ndarray) or (torch.Tensor): The input tensor containing the bounding box coordinates in (x1, y1, x2, y2) format. Returns: - the center of the box, the width and the height of the box. + y (numpy.ndarray) or (torch.Tensor): The bounding box coordinates in (x, y, width, height) format. """ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center @@ -316,13 +311,12 @@ def xyxy2xywh(x): def xywh2xyxy(x): """ - > It converts the bounding box from x,y,w,h to x1,y1,x2,y2 where xy1=top-left, xy2=bottom-right + Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the top-left corner and (x2, y2) is the bottom-right corner. Args: - x: the input tensor - + x (np.ndarray) or (torch.Tensor): The input tensor containing the bounding box coordinates in (x, y, width, height) format. Returns: - the top left and bottom right coordinates of the bounding box. + y (numpy.ndarray) or (torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format. """ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x @@ -334,17 +328,16 @@ def xywh2xyxy(x): def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): """ - > It converts the normalized coordinates to the actual coordinates [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + Convert normalized bounding box coordinates to pixel coordinates. Args: - x: the bounding box coordinates - w: width of the image. Defaults to 640 - h: height of the image. Defaults to 640 - padw: padding width. Defaults to 0 - padh: height of the padding. Defaults to 0 - + x (np.ndarray) or (torch.Tensor): The bounding box coordinates. + w (int): Width of the image. Defaults to 640 + h (int): Height of the image. Defaults to 640 + padw (int): Padding width. Defaults to 0 + padh (int): Padding height. Defaults to 0 Returns: - the xyxy coordinates of the bounding box. + y (numpy.ndarray) or (torch.Tensor): The coordinates of the bounding box in the format [x1, y1, x2, y2] where x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box. """ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x @@ -356,18 +349,16 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): """ - > It takes in a list of bounding boxes, and returns a list of bounding boxes, but with the x and y - coordinates normalized to the width and height of the image + Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y, width and height are normalized to image dimensions Args: - x: the bounding box coordinates - w: width of the image. Defaults to 640 - h: height of the image. Defaults to 640 - clip: If True, the boxes will be clipped to the image boundaries. Defaults to False - eps: the minimum value of the box's width and height. - + x (np.ndarray) or (torch.Tensor): The input tensor containing the bounding box coordinates in (x1, y1, x2, y2) format. + w (int): The width of the image. Defaults to 640 + h (int): The height of the image. Defaults to 640 + clip (bool): If True, the boxes will be clipped to the image boundaries. Defaults to False + eps (float): The minimum value of the box's width and height. Defaults to 0.0 Returns: - the xywhn format of the bounding boxes. + y (numpy.ndarray) or (torch.Tensor): The bounding box coordinates in (x, y, width, height, normalized) format """ if clip: clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip @@ -381,17 +372,16 @@ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): def xyn2xy(x, w=640, h=640, padw=0, padh=0): """ - > It converts normalized segments into pixel segments of shape (n,2) + Convert normalized coordinates to pixel coordinates of shape (n,2) Args: - x: the normalized coordinates of the bounding box - w: width of the image. Defaults to 640 - h: height of the image. Defaults to 640 - padw: padding width. Defaults to 0 - padh: padding height. Defaults to 0 - + x (numpy.ndarray) or (torch.Tensor): The input tensor of normalized bounding box coordinates + w (int): The width of the image. Defaults to 640 + h (int): The height of the image. Defaults to 640 + padw (int): The width of the padding. Defaults to 0 + padh (int): The height of the padding. Defaults to 0 Returns: - the x and y coordinates of the top left corner of the bounding box. + y (numpy.ndarray) or (torch.Tensor): The x and y coordinates of the top left corner of the bounding box """ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = w * x[..., 0] + padw # top left x @@ -401,13 +391,12 @@ def xyn2xy(x, w=640, h=640, padw=0, padh=0): def xywh2ltwh(x): """ - > It converts the bounding box from [x, y, w, h] to [x1, y1, w, h] where xy1=top-left + Convert the bounding box format from [x, y, w, h] to [x1, y1, w, h], where x1, y1 are the top-left coordinates. Args: - x: the x coordinate of the center of the bounding box - + x (numpy.ndarray) or (torch.Tensor): The input tensor with the bounding box coordinates in the xywh format Returns: - the top left x and y coordinates of the bounding box. + y (numpy.ndarray) or (torch.Tensor): The bounding box coordinates in the xyltwh format """ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x @@ -417,13 +406,12 @@ def xywh2ltwh(x): def xyxy2ltwh(x): """ - > Convert nx4 boxes from [x1, y1, x2, y2] to [x1, y1, w, h] where xy1=top-left, xy2=bottom-right + Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right Args: - x: the input tensor - + x (numpy.ndarray) or (torch.Tensor): The input tensor with the bounding boxes coordinates in the xyxy format Returns: - the xyxy2ltwh function. + y (numpy.ndarray) or (torch.Tensor): The bounding box coordinates in the xyltwh format. """ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[:, 2] = x[:, 2] - x[:, 0] # width @@ -433,10 +421,10 @@ def xyxy2ltwh(x): def ltwh2xywh(x): """ - > Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center + Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center Args: - x: the input tensor + x (torch.tensor): the input tensor """ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[:, 0] = x[:, 0] + x[:, 2] / 2 # center x @@ -446,14 +434,13 @@ def ltwh2xywh(x): def ltwh2xyxy(x): """ - > It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, - xy2=bottom-right + It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right Args: - x: the input image + x (numpy.ndarray) or (torch.Tensor): the input image Returns: - the xyxy coordinates of the bounding boxes. + y (numpy.ndarray) or (torch.Tensor): the xyxy coordinates of the bounding boxes. """ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[:, 2] = x[:, 2] + x[:, 0] # width @@ -463,14 +450,13 @@ def ltwh2xyxy(x): def segments2boxes(segments): """ - > It converts segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh) + It converts segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh) Args: - segments: list of segments, each segment is a list of points, each point is a list of x, y - coordinates + segments (list): list of segments, each segment is a list of points, each point is a list of x, y coordinates Returns: - the xywh coordinates of the bounding boxes. + (np.array): the xywh coordinates of the bounding boxes. """ boxes = [] for s in segments: @@ -481,15 +467,14 @@ def segments2boxes(segments): def resample_segments(segments, n=1000): """ - > It takes a list of segments (n,2) and returns a list of segments (n,2) where each segment has been - up-sampled to n points + It takes a list of segments (n,2) and returns a list of segments (n,2) where each segment has been up-sampled to n points Args: - segments: a list of (n,2) arrays, where n is the number of points in the segment. - n: number of points to resample the segment to. Defaults to 1000 + segments (list): a list of (n,2) arrays, where n is the number of points in the segment. + n (int): number of points to resample the segment to. Defaults to 1000 Returns: - the resampled segments. + segments (list): the resampled segments. """ for i, s in enumerate(segments): s = np.concatenate((s, s[0:1, :]), axis=0) @@ -501,14 +486,14 @@ def resample_segments(segments, n=1000): def crop_mask(masks, boxes): """ - > It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box + It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box Args: - masks: [h, w, n] tensor of masks - boxes: [n, 4] tensor of bbox coords in relative point form + masks (torch.tensor): [h, w, n] tensor of masks + boxes (torch.tensor): [n, 4] tensor of bbox coordinates in relative point form Returns: - The masks are being cropped to the bounding box. + (torch.tensor): The masks are being cropped to the bounding box. """ n, h, w = masks.shape x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n) @@ -520,17 +505,17 @@ def crop_mask(masks, boxes): def process_mask_upsample(protos, masks_in, bboxes, shape): """ - > It takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher + It takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher quality but is slower. Args: - protos: [mask_dim, mask_h, mask_w] - masks_in: [n, mask_dim], n is number of masks after nms - bboxes: [n, 4], n is number of masks after nms - shape: the size of the input image + protos (torch.tensor): [mask_dim, mask_h, mask_w] + masks_in (torch.tensor): [n, mask_dim], n is number of masks after nms + bboxes (torch.tensor): [n, 4], n is number of masks after nms + shape (tuple): the size of the input image (h,w) Returns: - mask + (torch.tensor): The upsampled masks. """ c, mh, mw = protos.shape # CHW masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) @@ -541,17 +526,17 @@ def process_mask_upsample(protos, masks_in, bboxes, shape): def process_mask(protos, masks_in, bboxes, shape, upsample=False): """ - > It takes the output of the mask head, and applies the mask to the bounding boxes. This is faster but produces + It takes the output of the mask head, and applies the mask to the bounding boxes. This is faster but produces downsampled quality of mask Args: - protos: [mask_dim, mask_h, mask_w] - masks_in: [n, mask_dim], n is number of masks after nms - bboxes: [n, 4], n is number of masks after nms - shape: the size of the input image + protos (torch.tensor): [mask_dim, mask_h, mask_w] + masks_in (torch.tensor): [n, mask_dim], n is number of masks after nms + bboxes (torch.tensor): [n, 4], n is number of masks after nms + shape (tuple): the size of the input image (h,w) Returns: - mask + (torch.tensor): The processed masks. """ c, mh, mw = protos.shape # CHW @@ -572,16 +557,16 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False): def process_mask_native(protos, masks_in, bboxes, shape): """ - > It takes the output of the mask head, and crops it after upsampling to the bounding boxes. + It takes the output of the mask head, and crops it after upsampling to the bounding boxes. Args: - protos: [mask_dim, mask_h, mask_w] - masks_in: [n, mask_dim], n is number of masks after nms - bboxes: [n, 4], n is number of masks after nms - shape: input_image_size, (h, w) + protos (torch.tensor): [mask_dim, mask_h, mask_w] + masks_in (torch.tensor): [n, mask_dim], n is number of masks after nms + bboxes (torch.tensor): [n, 4], n is number of masks after nms + shape (tuple): the size of the input image (h,w) Returns: - masks: [h, w, n] + masks (torch.tensor): The returned masks with dimensions [h, w, n] """ c, mh, mw = protos.shape # CHW masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) @@ -598,17 +583,17 @@ def process_mask_native(protos, masks_in, bboxes, shape): def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False): """ - > Rescale segment coords (xyxy) from img1_shape to img0_shape + Rescale segment coordinates (xyxy) from img1_shape to img0_shape Args: - img1_shape: The shape of the image that the segments are from. - segments: the segments to be scaled - img0_shape: the shape of the image that the segmentation is being applied to - ratio_pad: the ratio of the image size to the padded image size. - normalize: If True, the coordinates will be normalized to the range [0, 1]. Defaults to False + img1_shape (tuple): The shape of the image that the segments are from. + segments (torch.tensor): the segments to be scaled + img0_shape (tuple): the shape of the image that the segmentation is being applied to + ratio_pad (tuple): the ratio of the image size to the padded image size. + normalize (bool): If True, the coordinates will be normalized to the range [0, 1]. Defaults to False Returns: - the segmented image. + segments (torch.tensor): the segmented image. """ if ratio_pad is None: # calculate from img0_shape gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new @@ -629,11 +614,11 @@ def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=F def masks2segments(masks, strategy='largest'): """ - > It takes a list of masks(n,h,w) and returns a list of segments(n,xy) + It takes a list of masks(n,h,w) and returns a list of segments(n,xy) Args: - masks: the output of the model, which is a tensor of shape (batch_size, 160, 160) - strategy: 'concat' or 'largest'. Defaults to largest + masks (torch.tensor): the output of the model, which is a tensor of shape (batch_size, 160, 160) + strategy (str): 'concat' or 'largest'. Defaults to largest Returns: segments (List): list of segment masks @@ -654,12 +639,12 @@ def masks2segments(masks, strategy='largest'): def clip_segments(segments, shape): """ - > It takes a list of line segments (x1,y1,x2,y2) and clips them to the image shape (height, width) + It takes a list of line segments (x1,y1,x2,y2) and clips them to the image shape (height, width) Args: - segments: a list of segments, each segment is a list of points, each point is a list of x,y + segments (list): a list of segments, each segment is a list of points, each point is a list of x,y coordinates - shape: the shape of the image + shape (tuple): the shape of the image """ if isinstance(segments, torch.Tensor): # faster individually segments[:, 0].clamp_(0, shape[1]) # x @@ -670,5 +655,13 @@ def clip_segments(segments, shape): def clean_str(s): - # Cleans a string by replacing special characters with underscore _ + """ + Cleans a string by replacing special characters with underscore _ + + Args: + s (str): a string needing special characters replaced + + Returns: + (str): a string with special characters replaced by an underscore _ + """ return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)