From f921e1ac21f9d89976ea0504e58d65714f7ab10c Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 12 Mar 2023 02:08:13 +0100 Subject: [PATCH] `ultralytics 8.0.53` DDP AMP and Edge TPU fixes (#1362) Co-authored-by: Richard Aljaste Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Vuong Kha Sieu <75152429+hotfur@users.noreply.github.com> --- .github/workflows/ci.yaml | 2 +- .github/workflows/publish.yml | 4 +- README.md | 2 +- README.zh-CN.md | 2 +- docs/app.md | 35 ++- docs/cfg.md | 236 ----------------- docs/hub.md | 2 - docs/modes/benchmark.md | 65 +++++ docs/modes/export.md | 81 ++++++ docs/modes/index.md | 62 +++++ docs/{ => modes}/predict.md | 48 +++- docs/{tasks/tracking.md => modes/track.md} | 5 +- docs/modes/train.md | 88 ++++++ docs/modes/val.md | 86 ++++++ docs/quickstart.md | 4 +- docs/tasks/{classification.md => classify.md} | 35 +-- docs/tasks/{detection.md => detect.md} | 35 +-- docs/tasks/index.md | 46 ++++ docs/tasks/keypoints.md | 141 ++++++++++ docs/tasks/{segmentation.md => segment.md} | 35 +-- docs/{ => usage}/callbacks.md | 0 docs/usage/cfg.md | 250 ++++++++++++++++++ docs/{ => usage}/cli.md | 0 docs/{ => usage}/engine.md | 2 +- docs/{ => usage}/python.md | 2 +- mkdocs.yml | 31 ++- tests/test_python.py | 10 +- ultralytics/__init__.py | 2 +- ultralytics/hub/utils.py | 2 +- ultralytics/nn/autobackend.py | 11 +- ultralytics/yolo/cfg/__init__.py | 2 +- ultralytics/yolo/data/augment.py | 14 +- ultralytics/yolo/data/dataloaders/v5loader.py | 2 +- ultralytics/yolo/data/dataset.py | 2 +- ultralytics/yolo/engine/exporter.py | 7 +- ultralytics/yolo/engine/model.py | 2 +- ultralytics/yolo/engine/results.py | 2 +- ultralytics/yolo/engine/trainer.py | 51 ++-- ultralytics/yolo/engine/validator.py | 2 +- ultralytics/yolo/utils/__init__.py | 4 +- ultralytics/yolo/utils/benchmarks.py | 4 +- ultralytics/yolo/utils/downloads.py | 2 +- ultralytics/yolo/utils/torch_utils.py | 2 +- ultralytics/yolo/v8/classify/val.py | 2 + ultralytics/yolo/v8/detect/predict.py | 4 +- ultralytics/yolo/v8/segment/predict.py | 9 +- 46 files changed, 1048 insertions(+), 387 deletions(-) delete mode 100644 docs/cfg.md create mode 100644 docs/modes/benchmark.md create mode 100644 docs/modes/export.md create mode 100644 docs/modes/index.md rename docs/{ => modes}/predict.md (54%) rename docs/{tasks/tracking.md => modes/track.md} (96%) create mode 100644 docs/modes/train.md create mode 100644 docs/modes/val.md rename docs/tasks/{classification.md => classify.md} (79%) rename docs/tasks/{detection.md => detect.md} (80%) create mode 100644 docs/tasks/index.md create mode 100644 docs/tasks/keypoints.md rename docs/tasks/{segmentation.md => segment.md} (80%) rename docs/{ => usage}/callbacks.md (100%) create mode 100644 docs/usage/cfg.md rename docs/{ => usage}/cli.md (100%) rename docs/{ => usage}/engine.md (98%) rename docs/{ => usage}/python.md (98%) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 50a5482..0b1be69 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -103,7 +103,7 @@ jobs: shell: python run: | from ultralytics.yolo.utils.benchmarks import benchmark - benchmark(model='${{ matrix.model }}-cls.pt', imgsz=160, half=False, hard_fail=0.60) + benchmark(model='${{ matrix.model }}-cls.pt', imgsz=160, half=False, hard_fail=0.61) - name: Benchmark Summary run: cat benchmarks.log diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 7ed500b..c533e33 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -61,7 +61,9 @@ jobs: - name: Deploy Docs continue-on-error: true if: (github.event_name == 'push' && steps.check_pypi.outputs.increment == 'True') || github.event.inputs.docs == 'true' + env: + PERSONAL_ACCESS_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} run: | mkdocs gh-deploy || true git checkout gh-pages - git push https://${{ secrets.PERSONAL_ACCESS_TOKEN }}@github.com/ultralytics/docs gh-pages --force + git push https://$PERSONAL_ACCESS_TOKEN@github.com/ultralytics/docs gh-pages --force diff --git a/README.md b/README.md index eae7e0a..88e0cd8 100644 --- a/README.md +++ b/README.md @@ -240,7 +240,7 @@ on your experience. Thank you ๐Ÿ™ to all our contributors! - + ##
License
diff --git a/README.zh-CN.md b/README.zh-CN.md index 355c281..42822e4 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -219,7 +219,7 @@ Ultralytics [ๅ‘ๅธƒ้กต](https://github.com/ultralytics/ultralytics/releases) ่‡ช - + ##
License
diff --git a/docs/app.md b/docs/app.md index 6870874..1a15f61 100644 --- a/docs/app.md +++ b/docs/app.md @@ -3,7 +3,6 @@
-
@@ -27,26 +26,26 @@

- - CI CPU - - Open In Colab + +   + +
-
-Welcome to the Ultralytics HUB app for demonstrating YOLOv5 and YOLOv8 models! In this app, available on the [Apple App -Store](https://apps.apple.com/xk/app/ultralytics/id1583935240) and the -[Google Play Store](https://play.google.com/store/apps/details?id=com.ultralytics.ultralytics_app), you will be able -to see the power and capabilities of YOLOv5, a state-of-the-art object detection model developed by Ultralytics. +Welcome to the Ultralytics HUB app, which is designed to demonstrate the power and capabilities of the YOLOv5 and YOLOv8 +models. This app is available for download on +the [Apple App Store](https://apps.apple.com/xk/app/ultralytics/id1583935240) and +the [Google Play Store](https://play.google.com/store/apps/details?id=com.ultralytics.ultralytics_app). -**To install simply scan the QR code above**. The App currently features YOLOv5 models, with YOLOv8 models coming soon. +**To install the app, simply scan the QR code provided above**. At the moment, the app features YOLOv5 models, with +YOLOv8 models set to be available soon. -With YOLOv5, you can detect and classify objects in images and videos with high accuracy and speed. The model has been -trained on a large dataset and is able to detect a wide range of objects, including cars, pedestrians, and traffic -signs. +With the YOLOv5 model, you can easily detect and classify objects in images and videos with high accuracy and speed. The +model has been trained on a vast dataset and can recognize a wide range of objects, including pedestrians, traffic +signs, and cars. -In this app, you will be able to try out YOLOv5 on your own images and videos, and see the model in action. You can also -learn more about how YOLOv5 works and how it can be used in real-world applications. +Using this app, you can try out YOLOv5 on your images and videos, and observe how the model works in real-time. +Additionally, you can learn more about YOLOv5's functionality and how it can be integrated into real-world applications. -We hope you enjoy using YOLOv5 and seeing its capabilities firsthand. Thank you for choosing Ultralytics for your object -detection needs! \ No newline at end of file +We are confident that you will enjoy using YOLOv5 and be amazed at its capabilities. Thank you for choosing Ultralytics +for your AI solutions. \ No newline at end of file diff --git a/docs/cfg.md b/docs/cfg.md deleted file mode 100644 index d7e2c60..0000000 --- a/docs/cfg.md +++ /dev/null @@ -1,236 +0,0 @@ -YOLO settings and hyperparameters play a critical role in the model's performance, speed, and accuracy. These settings -and hyperparameters can affect the model's behavior at various stages of the model development process, including -training, validation, and prediction. - -YOLOv8 'yolo' CLI commands use the following syntax: - -!!! example "" - - === "CLI" - - ```bash - yolo TASK MODE ARGS - ``` - -Where: - -- `TASK` (optional) is one of `[detect, segment, classify]`. If it is not passed explicitly YOLOv8 will try to guess - the `TASK` from the model type. -- `MODE` (required) is one of `[train, val, predict, export]` -- `ARGS` (optional) are any number of custom `arg=value` pairs like `imgsz=320` that override defaults. - For a full list of available `ARGS` see the [Configuration](cfg.md) page and `defaults.yaml` - GitHub [source](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/yolo/cfg/default.yaml). - -#### Tasks - -YOLO models can be used for a variety of tasks, including detection, segmentation, and classification. These tasks -differ in the type of output they produce and the specific problem they are designed to solve. - -- **Detect**: Detection tasks involve identifying and localizing objects or regions of interest in an image or video. - YOLO models can be used for object detection tasks by predicting the bounding boxes and class labels of objects in an - image. -- **Segment**: Segmentation tasks involve dividing an image or video into regions or pixels that correspond to - different objects or classes. YOLO models can be used for image segmentation tasks by predicting a mask or label for - each pixel in an image. -- **Classify**: Classification tasks involve assigning a class label to an input, such as an image or text. YOLO - models can be used for image classification tasks by predicting the class label of an input image. - -#### Modes - -YOLO models can be used in different modes depending on the specific problem you are trying to solve. These modes -include train, val, and predict. - -- **Train**: The train mode is used to train the model on a dataset. This mode is typically used during the development - and - testing phase of a model. -- **Val**: The val mode is used to evaluate the model's performance on a validation dataset. This mode is typically used - to - tune the model's hyperparameters and detect overfitting. -- **Predict**: The predict mode is used to make predictions with the model on new data. This mode is typically used in - production or when deploying the model to users. - -| Key | Value | Description | -|--------|----------|-----------------------------------------------------------------------------------------------| -| task | 'detect' | inference task, i.e. detect, segment, or classify | -| mode | 'train' | YOLO mode, i.e. train, val, predict, or export | -| resume | False | resume training from last checkpoint or custom checkpoint if passed as resume=path/to/best.pt | -| model | null | path to model file, i.e. yolov8n.pt, yolov8n.yaml | -| data | null | path to data file, i.e. coco128.yaml | - -### Training - -Training settings for YOLO models refer to the various hyperparameters and configurations used to train the model on a -dataset. These settings can affect the model's performance, speed, and accuracy. Some common YOLO training settings -include the batch size, learning rate, momentum, and weight decay. Other factors that may affect the training process -include the choice of optimizer, the choice of loss function, and the size and composition of the training dataset. It -is important to carefully tune and experiment with these settings to achieve the best possible performance for a given -task. - -| Key | Value | Description | -|-----------------|--------|--------------------------------------------------------------------------------| -| model | null | path to model file, i.e. yolov8n.pt, yolov8n.yaml | -| data | null | path to data file, i.e. coco128.yaml | -| epochs | 100 | number of epochs to train for | -| patience | 50 | epochs to wait for no observable improvement for early stopping of training | -| batch | 16 | number of images per batch (-1 for AutoBatch) | -| imgsz | 640 | size of input images as integer or w,h | -| save | True | save train checkpoints and predict results | -| save_period | -1 | Save checkpoint every x epochs (disabled if < 1) | -| cache | False | True/ram, disk or False. Use cache for data loading | -| device | null | device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu | -| workers | 8 | number of worker threads for data loading (per RANK if DDP) | -| project | null | project name | -| name | null | experiment name | -| exist_ok | False | whether to overwrite existing experiment | -| pretrained | False | whether to use a pretrained model | -| optimizer | 'SGD' | optimizer to use, choices=['SGD', 'Adam', 'AdamW', 'RMSProp'] | -| verbose | False | whether to print verbose output | -| seed | 0 | random seed for reproducibility | -| deterministic | True | whether to enable deterministic mode | -| single_cls | False | train multi-class data as single-class | -| image_weights | False | use weighted image selection for training | -| rect | False | support rectangular training | -| cos_lr | False | use cosine learning rate scheduler | -| close_mosaic | 10 | disable mosaic augmentation for final 10 epochs | -| resume | False | resume training from last checkpoint | -| lr0 | 0.01 | initial learning rate (i.e. SGD=1E-2, Adam=1E-3) | -| lrf | 0.01 | final learning rate (lr0 * lrf) | -| momentum | 0.937 | SGD momentum/Adam beta1 | -| weight_decay | 0.0005 | optimizer weight decay 5e-4 | -| warmup_epochs | 3.0 | warmup epochs (fractions ok) | -| warmup_momentum | 0.8 | warmup initial momentum | -| warmup_bias_lr | 0.1 | warmup initial bias lr | -| box | 7.5 | box loss gain | -| cls | 0.5 | cls loss gain (scale with pixels) | -| dfl | 1.5 | dfl loss gain | -| fl_gamma | 0.0 | focal loss gamma (efficientDet default gamma=1.5) | -| label_smoothing | 0.0 | label smoothing (fraction) | -| nbs | 64 | nominal batch size | -| overlap_mask | True | masks should overlap during training (segment train only) | -| mask_ratio | 4 | mask downsample ratio (segment train only) | -| dropout | 0.0 | use dropout regularization (classify train only) | -| val | True | validate/test during training | - -### Prediction - -Prediction settings for YOLO models refer to the various hyperparameters and configurations used to make predictions -with the model on new data. These settings can affect the model's performance, speed, and accuracy. Some common YOLO -prediction settings include the confidence threshold, non-maximum suppression (NMS) threshold, and the number of classes -to consider. Other factors that may affect the prediction process include the size and format of the input data, the -presence of additional features such as masks or multiple labels per box, and the specific task the model is being used -for. It is important to carefully tune and experiment with these settings to achieve the best possible performance for a -given task. - -| Key | Value | Description | -|----------------|----------------------|----------------------------------------------------------| -| source | 'ultralytics/assets' | source directory for images or videos | -| conf | 0.25 | object confidence threshold for detection | -| iou | 0.7 | intersection over union (IoU) threshold for NMS | -| half | False | use half precision (FP16) | -| device | null | device to run on, i.e. cuda device=0/1/2/3 or device=cpu | -| show | False | show results if possible | -| save | False | save images with results | -| save_txt | False | save results as .txt file | -| save_conf | False | save results with confidence scores | -| save_crop | False | save cropped images with results | -| hide_labels | False | hide labels | -| hide_conf | False | hide confidence scores | -| max_det | 300 | maximum number of detections per image | -| vid_stride | False | video frame-rate stride | -| line_thickness | 3 | bounding box thickness (pixels) | -| visualize | False | visualize model features | -| augment | False | apply image augmentation to prediction sources | -| agnostic_nms | False | class-agnostic NMS | -| retina_masks | False | use high-resolution segmentation masks | -| classes | null | filter results by class, i.e. class=0, or class=[0,2,3] | -| box | True | Show boxes in segmentation predictions | - -### Validation - -Validation settings for YOLO models refer to the various hyperparameters and configurations used to -evaluate the model's performance on a validation dataset. These settings can affect the model's performance, speed, and -accuracy. Some common YOLO validation settings include the batch size, the frequency with which validation is performed -during training, and the metrics used to evaluate the model's performance. Other factors that may affect the validation -process include the size and composition of the validation dataset and the specific task the model is being used for. It -is important to carefully tune and experiment with these settings to ensure that the model is performing well on the -validation dataset and to detect and prevent overfitting. - -| Key | Value | Description | -|-------------|-------|--------------------------------------------------------------------| -| save_json | False | save results to JSON file | -| save_hybrid | False | save hybrid version of labels (labels + additional predictions) | -| conf | 0.001 | object confidence threshold for detection | -| iou | 0.6 | intersection over union (IoU) threshold for NMS | -| max_det | 300 | maximum number of detections per image | -| half | True | use half precision (FP16) | -| device | null | device to run on, i.e. cuda device=0/1/2/3 or device=cpu | -| dnn | False | use OpenCV DNN for ONNX inference | -| plots | False | show plots during training | -| rect | False | support rectangular evaluation | -| split | val | dataset split to use for validation, i.e. 'val', 'test' or 'train' | - -### Export - -Export settings for YOLO models refer to the various configurations and options used to save or -export the model for use in other environments or platforms. These settings can affect the model's performance, size, -and compatibility with different systems. Some common YOLO export settings include the format of the exported model -file (e.g. ONNX, TensorFlow SavedModel), the device on which the model will be run (e.g. CPU, GPU), and the presence of -additional features such as masks or multiple labels per box. Other factors that may affect the export process include -the specific task the model is being used for and the requirements or constraints of the target environment or platform. -It is important to carefully consider and configure these settings to ensure that the exported model is optimized for -the intended use case and can be used effectively in the target environment. - -### Augmentation - -Augmentation settings for YOLO models refer to the various transformations and modifications -applied to the training data to increase the diversity and size of the dataset. These settings can affect the model's -performance, speed, and accuracy. Some common YOLO augmentation settings include the type and intensity of the -transformations applied (e.g. random flips, rotations, cropping, color changes), the probability with which each -transformation is applied, and the presence of additional features such as masks or multiple labels per box. Other -factors that may affect the augmentation process include the size and composition of the original dataset and the -specific task the model is being used for. It is important to carefully tune and experiment with these settings to -ensure that the augmented dataset is diverse and representative enough to train a high-performing model. - -| Key | Value | Description | -|-------------|-------|-------------------------------------------------| -| hsv_h | 0.015 | image HSV-Hue augmentation (fraction) | -| hsv_s | 0.7 | image HSV-Saturation augmentation (fraction) | -| hsv_v | 0.4 | image HSV-Value augmentation (fraction) | -| degrees | 0.0 | image rotation (+/- deg) | -| translate | 0.1 | image translation (+/- fraction) | -| scale | 0.5 | image scale (+/- gain) | -| shear | 0.0 | image shear (+/- deg) | -| perspective | 0.0 | image perspective (+/- fraction), range 0-0.001 | -| flipud | 0.0 | image flip up-down (probability) | -| fliplr | 0.5 | image flip left-right (probability) | -| mosaic | 1.0 | image mosaic (probability) | -| mixup | 0.0 | image mixup (probability) | -| copy_paste | 0.0 | segment copy-paste (probability) | - -### Logging, checkpoints, plotting and file management - -Logging, checkpoints, plotting, and file management are important considerations when training a YOLO model. - -- Logging: It is often helpful to log various metrics and statistics during training to track the model's progress and - diagnose any issues that may arise. This can be done using a logging library such as TensorBoard or by writing log - messages to a file. -- Checkpoints: It is a good practice to save checkpoints of the model at regular intervals during training. This allows - you to resume training from a previous point if the training process is interrupted or if you want to experiment with - different training configurations. -- Plotting: Visualizing the model's performance and training progress can be helpful for understanding how the model is - behaving and identifying potential issues. This can be done using a plotting library such as matplotlib or by - generating plots using a logging library such as TensorBoard. -- File management: Managing the various files generated during the training process, such as model checkpoints, log - files, and plots, can be challenging. It is important to have a clear and organized file structure to keep track of - these files and make it easy to access and analyze them as needed. - -Effective logging, checkpointing, plotting, and file management can help you keep track of the model's progress and make -it easier to debug and optimize the training process. - -| Key | Value | Description | -|----------|--------|------------------------------------------------------------------------------------------------| -| project | 'runs' | project name | -| name | 'exp' | experiment name. `exp` gets automatically incremented if not specified, i.e, `exp`, `exp2` ... | -| exist_ok | False | whether to overwrite existing experiment | -| plots | False | save plots during train/val | -| save | False | save train checkpoints and predict results | diff --git a/docs/hub.md b/docs/hub.md index cf45130..c7d8c0d 100644 --- a/docs/hub.md +++ b/docs/hub.md @@ -3,7 +3,6 @@
-
@@ -32,7 +31,6 @@ Open In Colab
-
[Ultralytics HUB](https://hub.ultralytics.com) is a new no-code online tool developed diff --git a/docs/modes/benchmark.md b/docs/modes/benchmark.md new file mode 100644 index 0000000..b57e093 --- /dev/null +++ b/docs/modes/benchmark.md @@ -0,0 +1,65 @@ + + +**Benchmark mode** is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks +provide information on the size of the exported format, its `mAP50-95` metrics (for object detection and segmentation) +or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export +formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for +their specific use case based on their requirements for speed and accuracy. + +!!! tip "Tip" + + * Export to ONNX or OpenVINO for up to 3x CPU speedup. + * Export to TensorRT for up to 5x GPU speedup. + +## Usage Examples + +Run YOLOv8n benchmarks on all supported export formats including ONNX, TensorRT etc. See Arguments section below for a +full list of export arguments. + +!!! example "" + + === "Python" + + ```python + from ultralytics.yolo.utils.benchmarks import benchmark + + # Benchmark + benchmark(model='yolov8n.pt', imgsz=640, half=False, device=0) + ``` + === "CLI" + + ```bash + yolo benchmark model=yolov8n.pt imgsz=640 half=False device=0 + ``` + +## Arguments + +Arguments such as `model`, `imgsz`, `half`, `device`, and `hard_fail` provide users with the flexibility to fine-tune +the benchmarks to their specific needs and compare the performance of different export formats with ease. + +| Key | Value | Description | +|-------------|---------|----------------------------------------------------------------------| +| `model` | `None` | path to model file, i.e. yolov8n.pt, yolov8n.yaml | +| `imgsz` | `640` | image size as scalar or (h, w) list, i.e. (640, 480) | +| `half` | `False` | FP16 quantization | +| `device` | `None` | device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu | +| `hard_fail` | `False` | do not continue on error (bool), or val floor threshold (float) | + +## Export Formats + +Benchmarks will attempt to run automatically on all possible export formats below. + +| Format | `format` Argument | Model | Metadata | +|--------------------------------------------------------------------|-------------------|---------------------------|----------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | โœ… | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | โœ… | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | โœ… | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | โœ… | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | โœ… | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlmodel` | โœ… | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | โœ… | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | โŒ | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | โœ… | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | โœ… | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | โœ… | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | โœ… | diff --git a/docs/modes/export.md b/docs/modes/export.md new file mode 100644 index 0000000..3c4deed --- /dev/null +++ b/docs/modes/export.md @@ -0,0 +1,81 @@ + + +**Export mode** is used for exporting a YOLOv8 model to a format that can be used for deployment. In this mode, the +model is converted to a format that can be used by other software applications or hardware devices. This mode is useful +when deploying the model to production environments. + +!!! tip "Tip" + + * Export to ONNX or OpenVINO for up to 3x CPU speedup. + * Export to TensorRT for up to 5x GPU speedup. + +## Usage Examples + +Export a YOLOv8n model to a different format like ONNX or TensorRT. See Arguments section below for a full list of +export arguments. + +!!! example "" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO("yolov8n.pt") # load an official model + model = YOLO("path/to/best.pt") # load a custom trained + + # Export the model + model.export(format="onnx") + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # export official model + yolo export model=path/to/best.pt format=onnx # export custom trained model + ``` + +## Arguments + +Export settings for YOLO models refer to the various configurations and options used to save or +export the model for use in other environments or platforms. These settings can affect the model's performance, size, +and compatibility with different systems. Some common YOLO export settings include the format of the exported model +file (e.g. ONNX, TensorFlow SavedModel), the device on which the model will be run (e.g. CPU, GPU), and the presence of +additional features such as masks or multiple labels per box. Other factors that may affect the export process include +the specific task the model is being used for and the requirements or constraints of the target environment or platform. +It is important to carefully consider and configure these settings to ensure that the exported model is optimized for +the intended use case and can be used effectively in the target environment. + +| Key | Value | Description | +|-------------|-----------------|------------------------------------------------------| +| `format` | `'torchscript'` | format to export to | +| `imgsz` | `640` | image size as scalar or (h, w) list, i.e. (640, 480) | +| `keras` | `False` | use Keras for TF SavedModel export | +| `optimize` | `False` | TorchScript: optimize for mobile | +| `half` | `False` | FP16 quantization | +| `int8` | `False` | INT8 quantization | +| `dynamic` | `False` | ONNX/TF/TensorRT: dynamic axes | +| `simplify` | `False` | ONNX: simplify model | +| `opset` | `None` | ONNX: opset version (optional, defaults to latest) | +| `workspace` | `4` | TensorRT: workspace size (GB) | +| `nms` | `False` | CoreML: add NMS | + +## Export Formats + +Available YOLOv8 export formats are in the table below. You can export to any format using the `format` argument, +i.e. `format='onnx'` or `format='engine'`. + +| Format | `format` Argument | Model | Metadata | +|--------------------------------------------------------------------|-------------------|---------------------------|----------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | โœ… | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | โœ… | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | โœ… | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | โœ… | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | โœ… | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlmodel` | โœ… | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | โœ… | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | โŒ | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | โœ… | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | โœ… | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | โœ… | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | โœ… | diff --git a/docs/modes/index.md b/docs/modes/index.md new file mode 100644 index 0000000..14e2d85 --- /dev/null +++ b/docs/modes/index.md @@ -0,0 +1,62 @@ +# YOLOv8 Modes + + + +Ultralytics YOLOv8 supports several **modes** that can be used to perform different tasks. These modes are: + +**Train**: For training a YOLOv8 model on a custom dataset. +**Val**: For validating a YOLOv8 model after it has been trained. +**Predict**: For making predictions using a trained YOLOv8 model on new images or videos. +**Export**: For exporting a YOLOv8 model to a format that can be used for deployment. +**Track**: For tracking objects in real-time using a YOLOv8 model. +**Benchmark**: For benchmarking YOLOv8 exports (ONNX, TensorRT, etc.) speed and accuracy. + +## [Train](train.md) + +Train mode is used for training a YOLOv8 model on a custom dataset. In this mode, the model is trained using the +specified dataset and hyperparameters. The training process involves optimizing the model's parameters so that it can +accurately predict the classes and locations of objects in an image. + +[Train Examples](train.md){ .md-button .md-button--primary} + +## [Val](val.md) + +Val mode is used for validating a YOLOv8 model after it has been trained. In this mode, the model is evaluated on a +validation set to measure its accuracy and generalization performance. This mode can be used to tune the hyperparameters +of the model to improve its performance. + +[Val Examples](val.md){ .md-button .md-button--primary} + +## [Predict](predict.md) + +Predict mode is used for making predictions using a trained YOLOv8 model on new images or videos. In this mode, the +model is loaded from a checkpoint file, and the user can provide images or videos to perform inference. The model +predicts the classes and locations of objects in the input images or videos. + +[Predict Examples](predict.md){ .md-button .md-button--primary} + +## [Export](export.md) + +Export mode is used for exporting a YOLOv8 model to a format that can be used for deployment. In this mode, the model is +converted to a format that can be used by other software applications or hardware devices. This mode is useful when +deploying the model to production environments. + +[Export Examples](export.md){ .md-button .md-button--primary} + +## [Track](track.md) + +Track mode is used for tracking objects in real-time using a YOLOv8 model. In this mode, the model is loaded from a +checkpoint file, and the user can provide a live video stream to perform real-time object tracking. This mode is useful +for applications such as surveillance systems or self-driving cars. + +[Track Examples](track.md){ .md-button .md-button--primary} + +## [Benchmark](benchmark.md) + +Benchmark mode is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks provide +information on the size of the exported format, its `mAP50-95` metrics (for object detection and segmentation) +or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export +formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for +their specific use case based on their requirements for speed and accuracy. + +[Benchmark Examples](benchmark.md){ .md-button .md-button--primary} diff --git a/docs/predict.md b/docs/modes/predict.md similarity index 54% rename from docs/predict.md rename to docs/modes/predict.md index a52b0d8..ed60327 100644 --- a/docs/predict.md +++ b/docs/modes/predict.md @@ -1,10 +1,12 @@ + + Inference or prediction of a task returns a list of `Results` objects. Alternatively, in the streaming mode, it returns a generator of `Results` objects which is memory efficient. Streaming mode can be enabled by passing `stream=True` in predictor's call method. !!! example "Predict" - === "Getting a List" + === "Return a List" ```python inputs = [img, img] # list of np arrays @@ -16,7 +18,7 @@ predictor's call method. probs = result.probs # Class probabilities for classification outputs ``` - === "Getting a Generator" + === "Return a Generator" ```python inputs = [img, img] # list of numpy arrays @@ -51,6 +53,46 @@ source can be used as a stream and the model argument required for that source. | YouTube | ✓ | `'https://youtu.be/Zgi9g1ksQHc'` | `str` | | | stream | ✓ | `'rtsp://example.com/media.mp4'` | `str` | RTSP, RTMP, HTTP | +## Image Formats + +For images, YOLOv8 supports a variety of image formats defined +in [yolo/data/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/yolo/data/utils.py). The +following suffixes are valid for images: + +| Image Suffixes | Example Predict Command | Reference | +|----------------|----------------------------------|--------------------------------------------------------------------------------------| +| bmp | `yolo predict source=image.bmp` | [Microsoft](https://docs.microsoft.com/en-us/windows/win32/gdi/bitmap-file-format) | +| dng | `yolo predict source=image.dng` | [Adobe](https://helpx.adobe.com/photoshop/using/digital-negative.html) | +| jpeg | `yolo predict source=image.jpeg` | [Joint Photographic Experts Group](https://jpeg.org/jpeg/) | +| jpg | `yolo predict source=image.jpg` | [Joint Photographic Experts Group](https://jpeg.org/jpeg/) | +| mpo | `yolo predict source=image.mpo` | [CIPA](https://www.cipa.jp/std/documents/e/DC-007-Translation-2018-E.pdf) | +| png | `yolo predict source=image.png` | [Portable Network Graphics](https://www.w3.org/TR/PNG/) | +| tif | `yolo predict source=image.tif` | [Adobe](https://www.adobe.com/content/dam/acom/en/products/photoshop/pdfs/tiff6.pdf) | +| tiff | `yolo predict source=image.tiff` | [Adobe](https://www.adobe.com/content/dam/acom/en/products/photoshop/pdfs/tiff6.pdf) | +| webp | `yolo predict source=image.webp` | [Google Developers](https://developers.google.com/speed/webp) | +| pfm | `yolo predict source=image.pfm` | [HDR Labs](http://hdrlabs.com/tools/pfrenchy/) | + +## Video Formats + +For videos, YOLOv8 also supports a variety of video formats defined +in [yolo/data/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/yolo/data/utils.py). The +following suffixes are valid for videos: + +| Video Suffixes | Example Predict Command | Reference | +|----------------|----------------------------------|----------------------------------------------------------------------------------------------------------------| +| asf | `yolo predict source=video.asf` | [Microsoft](https://docs.microsoft.com/en-us/windows/win32/wmformat/asf-file-structure) | +| avi | `yolo predict source=video.avi` | [Microsoft](https://docs.microsoft.com/en-us/windows/win32/directshow/avi-riff-file-reference) | +| gif | `yolo predict source=video.gif` | [CompuServe](https://www.w3.org/Graphics/GIF/spec-gif89a.txt) | +| m4v | `yolo predict source=video.m4v` | [Apple](https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html) | +| mkv | `yolo predict source=video.mkv` | [Matroska](https://matroska.org/technical/specs/index.html) | +| mov | `yolo predict source=video.mov` | [Apple](https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFPreface/qtffPreface.html) | +| mp4 | `yolo predict source=video.mp4` | [ISO 68939](https://www.iso.org/standard/68939.html) | +| mpeg | `yolo predict source=video.mpeg` | [ISO 56021](https://www.iso.org/standard/56021.html) | +| mpg | `yolo predict source=video.mpg` | [ISO 56021](https://www.iso.org/standard/56021.html) | +| ts | `yolo predict source=video.ts` | [MPEG Transport Stream](https://en.wikipedia.org/wiki/MPEG_transport_stream) | +| wmv | `yolo predict source=video.wmv` | [Microsoft](https://docs.microsoft.com/en-us/windows/win32/wmformat/wmv-file-structure) | +| webm | `yolo predict source=video.webm` | [Google Developers](https://developers.google.com/media/vp9/getting-started/webm-file-format) | + ## Working with Results Results object consists of these component objects: @@ -116,7 +158,7 @@ results = model(inputs) results[0].probs # cls prob, (num_class, ) ``` -Class reference documentation for `Results` module and its components can be found [here](reference/results.md) +Class reference documentation for `Results` module and its components can be found [here](../reference/results.md) ## Plotting results diff --git a/docs/tasks/tracking.md b/docs/modes/track.md similarity index 96% rename from docs/tasks/tracking.md rename to docs/modes/track.md index 81734fe..551c502 100644 --- a/docs/tasks/tracking.md +++ b/docs/modes/track.md @@ -1,3 +1,5 @@ + + Object tracking is a task that involves identifying the location and class of objects, then assigning a unique ID to that detection in video streams. @@ -87,9 +89,8 @@ any configurations(expect the `tracker_type`) you need to. ```bash yolo track model=yolov8n.pt source="https://youtu.be/Zgi9g1ksQHc" tracker='custom_tracker.yaml' - ``` Please refer to [ultralytics/tracker/cfg](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/tracker/cfg) -page. +page diff --git a/docs/modes/train.md b/docs/modes/train.md new file mode 100644 index 0000000..dc1f8a6 --- /dev/null +++ b/docs/modes/train.md @@ -0,0 +1,88 @@ + + +**Train mode** is used for training a YOLOv8 model on a custom dataset. In this mode, the model is trained using the +specified dataset and hyperparameters. The training process involves optimizing the model's parameters so that it can +accurately predict the classes and locations of objects in an image. + +!!! tip "Tip" + + * YOLOv8 datasets like COCO, VOC, ImageNet and many others automatically download on first use, i.e. `yolo train data=coco.yaml` + +## Usage Examples + +Train YOLOv8n on the COCO128 dataset for 100 epochs at image size 640. See Arguments section below for a full list of +training arguments. + +!!! example "" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO("yolov8n.yaml") # build a new model from scratch + model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + + # Train the model + model.train(data="coco128.yaml", epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + ``` + +## Arguments + +Training settings for YOLO models refer to the various hyperparameters and configurations used to train the model on a +dataset. These settings can affect the model's performance, speed, and accuracy. Some common YOLO training settings +include the batch size, learning rate, momentum, and weight decay. Other factors that may affect the training process +include the choice of optimizer, the choice of loss function, and the size and composition of the training dataset. It +is important to carefully tune and experiment with these settings to achieve the best possible performance for a given +task. + +| Key | Value | Description | +|-------------------|----------|-----------------------------------------------------------------------------| +| `model` | `None` | path to model file, i.e. yolov8n.pt, yolov8n.yaml | +| `data` | `None` | path to data file, i.e. coco128.yaml | +| `epochs` | `100` | number of epochs to train for | +| `patience` | `50` | epochs to wait for no observable improvement for early stopping of training | +| `batch` | `16` | number of images per batch (-1 for AutoBatch) | +| `imgsz` | `640` | size of input images as integer or w,h | +| `save` | `True` | save train checkpoints and predict results | +| `save_period` | `-1` | Save checkpoint every x epochs (disabled if < 1) | +| `cache` | `False` | True/ram, disk or False. Use cache for data loading | +| `device` | `None` | device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu | +| `workers` | `8` | number of worker threads for data loading (per RANK if DDP) | +| `project` | `None` | project name | +| `name` | `None` | experiment name | +| `exist_ok` | `False` | whether to overwrite existing experiment | +| `pretrained` | `False` | whether to use a pretrained model | +| `optimizer` | `'SGD'` | optimizer to use, choices=['SGD', 'Adam', 'AdamW', 'RMSProp'] | +| `verbose` | `False` | whether to print verbose output | +| `seed` | `0` | random seed for reproducibility | +| `deterministic` | `True` | whether to enable deterministic mode | +| `single_cls` | `False` | train multi-class data as single-class | +| `image_weights` | `False` | use weighted image selection for training | +| `rect` | `False` | support rectangular training | +| `cos_lr` | `False` | use cosine learning rate scheduler | +| `close_mosaic` | `10` | disable mosaic augmentation for final 10 epochs | +| `resume` | `False` | resume training from last checkpoint | +| `lr0` | `0.01` | initial learning rate (i.e. SGD=1E-2, Adam=1E-3) | +| `lrf` | `0.01` | final learning rate (lr0 * lrf) | +| `momentum` | `0.937` | SGD momentum/Adam beta1 | +| `weight_decay` | `0.0005` | optimizer weight decay 5e-4 | +| `warmup_epochs` | `3.0` | warmup epochs (fractions ok) | +| `warmup_momentum` | `0.8` | warmup initial momentum | +| `warmup_bias_lr` | `0.1` | warmup initial bias lr | +| `box` | `7.5` | box loss gain | +| `cls` | `0.5` | cls loss gain (scale with pixels) | +| `dfl` | `1.5` | dfl loss gain | +| `fl_gamma` | `0.0` | focal loss gamma (efficientDet default gamma=1.5) | +| `label_smoothing` | `0.0` | label smoothing (fraction) | +| `nbs` | `64` | nominal batch size | +| `overlap_mask` | `True` | masks should overlap during training (segment train only) | +| `mask_ratio` | `4` | mask downsample ratio (segment train only) | +| `dropout` | `0.0` | use dropout regularization (classify train only) | +| `val` | `True` | validate/test during training | diff --git a/docs/modes/val.md b/docs/modes/val.md new file mode 100644 index 0000000..be4175b --- /dev/null +++ b/docs/modes/val.md @@ -0,0 +1,86 @@ + + +**Val mode** is used for validating a YOLOv8 model after it has been trained. In this mode, the model is evaluated on a +validation set to measure its accuracy and generalization performance. This mode can be used to tune the hyperparameters +of the model to improve its performance. + +!!! tip "Tip" + + * YOLOv8 models automatically remember their training settings, so you can validate a model at the same image size and on the original dataset easily with just `yolo val model=yolov8n.pt` or `model('yolov8n.pt').val()` + +## Usage Examples + +Validate trained YOLOv8n model accuracy on the COCO128 dataset. No argument need to passed as the `model` retains it's +training `data` and arguments as model attributes. See Arguments section below for a full list of export arguments. + +!!! example "" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO("yolov8n.pt") # load an official model + model = YOLO("path/to/best.pt") # load a custom model + + # Validate the model + metrics = model.val() # no arguments needed, dataset and settings remembered + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # a list contains map50-95 of each category + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # val official model + yolo detect val model=path/to/best.pt # val custom model + ``` + +## Arguments + +Validation settings for YOLO models refer to the various hyperparameters and configurations used to +evaluate the model's performance on a validation dataset. These settings can affect the model's performance, speed, and +accuracy. Some common YOLO validation settings include the batch size, the frequency with which validation is performed +during training, and the metrics used to evaluate the model's performance. Other factors that may affect the validation +process include the size and composition of the validation dataset and the specific task the model is being used for. It +is important to carefully tune and experiment with these settings to ensure that the model is performing well on the +validation dataset and to detect and prevent overfitting. + +| Key | Value | Description | +|---------------|---------|--------------------------------------------------------------------| +| `data` | `None` | path to data file, i.e. coco128.yaml | +| `imgsz` | `640` | image size as scalar or (h, w) list, i.e. (640, 480) | +| `batch` | `16` | number of images per batch (-1 for AutoBatch) | +| `save_json` | `False` | save results to JSON file | +| `save_hybrid` | `False` | save hybrid version of labels (labels + additional predictions) | +| `conf` | `0.001` | object confidence threshold for detection | +| `iou` | `0.6` | intersection over union (IoU) threshold for NMS | +| `max_det` | `300` | maximum number of detections per image | +| `half` | `True` | use half precision (FP16) | +| `device` | `None` | device to run on, i.e. cuda device=0/1/2/3 or device=cpu | +| `dnn` | `False` | use OpenCV DNN for ONNX inference | +| `plots` | `False` | show plots during training | +| `rect` | `False` | support rectangular evaluation | +| `split` | `val` | dataset split to use for validation, i.e. 'val', 'test' or 'train' | + +## Export Formats + +Available YOLOv8 export formats are in the table below. You can export to any format using the `format` argument, +i.e. `format='onnx'` or `format='engine'`. + +| Format | `format` Argument | Model | Metadata | +|--------------------------------------------------------------------|-------------------|---------------------------|----------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | โœ… | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | โœ… | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | โœ… | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | โœ… | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | โœ… | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlmodel` | โœ… | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | โœ… | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | โŒ | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | โœ… | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | โœ… | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | โœ… | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | โœ… | diff --git a/docs/quickstart.md b/docs/quickstart.md index ac5e791..40777b1 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -43,7 +43,7 @@ CLI requires no customization or code. You can simply run all tasks from the ter yolo detect train model=yolov8n.pt data=coco128.yaml device=\'0,1,2,3\' ``` -[CLI Guide](cli.md){ .md-button .md-button--primary} +[CLI Guide](usage/cli.md){ .md-button .md-button--primary} ## Use with Python @@ -70,4 +70,4 @@ classification into their Python projects using YOLOv8. success = model.export(format="onnx") # export the model to ONNX format ``` -[Python Guide](python.md){.md-button .md-button--primary} +[Python Guide](usage/python.md){.md-button .md-button--primary} diff --git a/docs/tasks/classification.md b/docs/tasks/classify.md similarity index 79% rename from docs/tasks/classification.md rename to docs/tasks/classify.md index 6b60df1..b2da5b8 100644 --- a/docs/tasks/classification.md +++ b/docs/tasks/classify.md @@ -16,7 +16,7 @@ of that class are located or what their exact shape is. ## Train Train YOLOv8n-cls on the MNIST160 dataset for 100 epochs at image size 64. For a full list of available arguments -see the [Configuration](../cfg.md) page. +see the [Configuration](../usage/cfg.md) page. !!! example "" @@ -118,20 +118,21 @@ Export a YOLOv8n-cls model to a different format like ONNX, CoreML, etc. yolo export model=path/to/best.pt format=onnx # export custom trained model ``` -Available YOLOv8-cls export formats include: - -| Format | `format=` | Model | Metadata | -|--------------------------------------------------------------------|---------------|-------------------------------|----------| -| [PyTorch](https://pytorch.org/) | - | `yolov8n-cls.pt` | โœ… | -| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-cls.torchscript` | โœ… | -| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-cls.onnx` | โœ… | -| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-cls_openvino_model/` | โœ… | -| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-cls.engine` | โœ… | -| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-cls.mlmodel` | โœ… | -| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-cls_saved_model/` | โœ… | -| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-cls.pb` | โŒ | -| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-cls.tflite` | โœ… | -| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-cls_edgetpu.tflite` | โœ… | -| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | โœ… | -| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | โœ… | +Available YOLOv8-cls export formats are in the table below. You can predict or validate directly on exported models, +i.e. `yolo predict model=yolov8n-cls.onnx`. + +| Format | `format` Argument | Model | Metadata | +|--------------------------------------------------------------------|-------------------|-------------------------------|----------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-cls.pt` | โœ… | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-cls.torchscript` | โœ… | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-cls.onnx` | โœ… | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-cls_openvino_model/` | โœ… | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-cls.engine` | โœ… | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-cls.mlmodel` | โœ… | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-cls_saved_model/` | โœ… | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-cls.pb` | โŒ | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-cls.tflite` | โœ… | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-cls_edgetpu.tflite` | โœ… | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | โœ… | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | โœ… | diff --git a/docs/tasks/detection.md b/docs/tasks/detect.md similarity index 80% rename from docs/tasks/detection.md rename to docs/tasks/detect.md index d2f7c4f..5b5e545 100644 --- a/docs/tasks/detection.md +++ b/docs/tasks/detect.md @@ -16,7 +16,7 @@ scene, but don't need to know exactly where the object is or its exact shape. ## Train Train YOLOv8n on the COCO128 dataset for 100 epochs at image size 640. For a full list of available arguments see -the [Configuration](../cfg.md) page. +the [Configuration](../usage/cfg.md) page. !!! example "" @@ -120,19 +120,20 @@ Export a YOLOv8n model to a different format like ONNX, CoreML, etc. yolo export model=path/to/best.pt format=onnx # export custom trained model ``` -Available YOLOv8 export formats include: - -| Format | `format=` | Model | Metadata | -|--------------------------------------------------------------------|---------------|---------------------------|----------| -| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | โœ… | -| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | โœ… | -| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | โœ… | -| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | โœ… | -| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | โœ… | -| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlmodel` | โœ… | -| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | โœ… | -| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | โŒ | -| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | โœ… | -| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | โœ… | -| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | โœ… | -| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | โœ… | +Available YOLOv8 export formats are in the table below. You can predict or validate directly on exported models, +i.e. `yolo predict model=yolov8n.onnx`. + +| Format | `format` Argument | Model | Metadata | +|--------------------------------------------------------------------|-------------------|---------------------------|----------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | โœ… | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | โœ… | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | โœ… | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | โœ… | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | โœ… | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlmodel` | โœ… | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | โœ… | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | โŒ | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | โœ… | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | โœ… | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | โœ… | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | โœ… | diff --git a/docs/tasks/index.md b/docs/tasks/index.md new file mode 100644 index 0000000..3276d53 --- /dev/null +++ b/docs/tasks/index.md @@ -0,0 +1,46 @@ +# Ultralytics YOLOv8 Tasks + +YOLOv8 is an AI framework that supports multiple computer vision **tasks**. The framework can be used to +perform [detection](detect.md), [segmentation](segment.md), [classification](classify.md), +and [keypoints](keypoints.md) detection. Each of these tasks has a different objective and use case. + + + +## [Detection](detect.md) + +Detection is the primary task supported by YOLOv8. It involves detecting objects in an image or video frame and drawing +bounding boxes around them. The detected objects are classified into different categories based on their features. +YOLOv8 can detect multiple objects in a single image or video frame with high accuracy and speed. + +[Detection Examples](detect.md){ .md-button .md-button--primary} + +## [Segmentation](segment.md) + +Segmentation is a task that involves segmenting an image into different regions based on the content of the image. Each +region is assigned a label based on its content. This task is useful in applications such as image segmentation and +medical imaging. YOLOv8 uses a variant of the U-Net architecture to perform segmentation. + +[Segmentation Examples](segment.md){ .md-button .md-button--primary} + +## [Classification](classify.md) + +Classification is a task that involves classifying an image into different categories. YOLOv8 can be used to classify +images based on their content. It uses a variant of the EfficientNet architecture to perform classification. + +[Classification Examples](classify.md){ .md-button .md-button--primary} + + + +## Conclusion + +YOLOv8 supports multiple tasks, including detection, segmentation, classification, and keypoints detection. Each of +these tasks has different objectives and use cases. By understanding the differences between these tasks, you can choose +the appropriate task for your computer vision application. \ No newline at end of file diff --git a/docs/tasks/keypoints.md b/docs/tasks/keypoints.md new file mode 100644 index 0000000..6c4c74d --- /dev/null +++ b/docs/tasks/keypoints.md @@ -0,0 +1,141 @@ +Key Point Estimation is a task that involves identifying the location of specific points in an image, usually referred +to as keypoints. The keypoints can represent various parts of the object such as joints, landmarks, or other distinctive +features. The locations of the keypoints are usually represented as a set of 2D `[x, y]` or 3D `[x, y, visible]` +coordinates. + + + +The output of a keypoint detector is a set of points that represent the keypoints on the object in the image, usually +along with the confidence scores for each point. Keypoint estimation is a good choice when you need to identify specific +parts of an object in a scene, and their location in relation to each other. + +!!! tip "Tip" + + YOLOv8 _keypoints_ models use the `-kpts` suffix, i.e. `yolov8n-kpts.pt`. These models are trained on the COCO dataset and are suitable for a variety of keypoint estimation tasks. + +[Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models/v8){ .md-button .md-button--primary} + +## Train TODO + +Train an OpenPose model on a custom dataset of keypoints using the OpenPose framework. For more information on how to +train an OpenPose model on a custom dataset, see the OpenPose Training page. + +!!! example "" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO("yolov8n.yaml") # build a new model from scratch + model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + + # Train the model + model.train(data="coco128.yaml", epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + ``` + +## Val TODO + +Validate trained YOLOv8n model accuracy on the COCO128 dataset. No argument need to passed as the `model` retains it's +training `data` and arguments as model attributes. + +!!! example "" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO("yolov8n.pt") # load an official model + model = YOLO("path/to/best.pt") # load a custom model + + # Validate the model + metrics = model.val() # no arguments needed, dataset and settings remembered + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # a list contains map50-95 of each category + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # val official model + yolo detect val model=path/to/best.pt # val custom model + ``` + +## Predict TODO + +Use a trained YOLOv8n model to run predictions on images. + +!!! example "" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO("yolov8n.pt") # load an official model + model = YOLO("path/to/best.pt") # load a custom model + + # Predict with the model + results = model("https://ultralytics.com/images/bus.jpg") # predict on an image + ``` + === "CLI" + + ```bash + yolo detect predict model=yolov8n.pt source="https://ultralytics.com/images/bus.jpg" # predict with official model + yolo detect predict model=path/to/best.pt source="https://ultralytics.com/images/bus.jpg" # predict with custom model + ``` + +Read more details of `predict` in our [Predict](https://docs.ultralytics.com/predict/) page. + +## Export TODO + +Export a YOLOv8n model to a different format like ONNX, CoreML, etc. + +!!! example "" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO("yolov8n.pt") # load an official model + model = YOLO("path/to/best.pt") # load a custom trained + + # Export the model + model.export(format="onnx") + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # export official model + yolo export model=path/to/best.pt format=onnx # export custom trained model + ``` + +Available YOLOv8-pose export formats are in the table below. You can predict or validate directly on exported models, +i.e. `yolo predict model=yolov8n-pose.onnx`. + +| Format | `format` Argument | Model | Metadata | +|--------------------------------------------------------------------|-------------------|---------------------------|----------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | โœ… | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | โœ… | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | โœ… | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | โœ… | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | โœ… | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlmodel` | โœ… | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | โœ… | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | โŒ | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | โœ… | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | โœ… | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | โœ… | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | โœ… | diff --git a/docs/tasks/segmentation.md b/docs/tasks/segment.md similarity index 80% rename from docs/tasks/segmentation.md rename to docs/tasks/segment.md index 5155115..d38f317 100644 --- a/docs/tasks/segmentation.md +++ b/docs/tasks/segment.md @@ -16,7 +16,7 @@ segmentation is useful when you need to know not only where objects are in an im ## Train Train YOLOv8n-seg on the COCO128-seg dataset for 100 epochs at image size 640. For a full list of available -arguments see the [Configuration](../cfg.md) page. +arguments see the [Configuration](../usage/cfg.md) page. !!! example "" @@ -124,21 +124,22 @@ Export a YOLOv8n-seg model to a different format like ONNX, CoreML, etc. yolo export model=path/to/best.pt format=onnx # export custom trained model ``` -Available YOLOv8-seg export formats include: - -| Format | `format=` | Model | Metadata | -|--------------------------------------------------------------------|---------------|-------------------------------|----------| -| [PyTorch](https://pytorch.org/) | - | `yolov8n-seg.pt` | โœ… | -| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-seg.torchscript` | โœ… | -| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-seg.onnx` | โœ… | -| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-seg_openvino_model/` | โœ… | -| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-seg.engine` | โœ… | -| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-seg.mlmodel` | โœ… | -| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-seg_saved_model/` | โœ… | -| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-seg.pb` | โŒ | -| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-seg.tflite` | โœ… | -| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-seg_edgetpu.tflite` | โœ… | -| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | โœ… | -| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | โœ… | +Available YOLOv8-seg export formats are in the table below. You can predict or validate directly on exported models, +i.e. `yolo predict model=yolov8n-seg.onnx`. + +| Format | `format` Argument | Model | Metadata | +|--------------------------------------------------------------------|-------------------|-------------------------------|----------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-seg.pt` | โœ… | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-seg.torchscript` | โœ… | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-seg.onnx` | โœ… | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-seg_openvino_model/` | โœ… | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-seg.engine` | โœ… | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-seg.mlmodel` | โœ… | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-seg_saved_model/` | โœ… | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-seg.pb` | โŒ | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-seg.tflite` | โœ… | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-seg_edgetpu.tflite` | โœ… | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | โœ… | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | โœ… | diff --git a/docs/callbacks.md b/docs/usage/callbacks.md similarity index 100% rename from docs/callbacks.md rename to docs/usage/callbacks.md diff --git a/docs/usage/cfg.md b/docs/usage/cfg.md new file mode 100644 index 0000000..763b892 --- /dev/null +++ b/docs/usage/cfg.md @@ -0,0 +1,250 @@ +YOLO settings and hyperparameters play a critical role in the model's performance, speed, and accuracy. These settings +and hyperparameters can affect the model's behavior at various stages of the model development process, including +training, validation, and prediction. + +YOLOv8 'yolo' CLI commands use the following syntax: + +!!! example "" + + === "CLI" + + ```bash + yolo TASK MODE ARGS + ``` + +Where: + +- `TASK` (optional) is one of `[detect, segment, classify]`. If it is not passed explicitly YOLOv8 will try to guess + the `TASK` from the model type. +- `MODE` (required) is one of `[train, val, predict, export]` +- `ARGS` (optional) are any number of custom `arg=value` pairs like `imgsz=320` that override defaults. + For a full list of available `ARGS` see the [Configuration](cfg.md) page and `defaults.yaml` + GitHub [source](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/yolo/cfg/default.yaml). + +#### Tasks + +YOLO models can be used for a variety of tasks, including detection, segmentation, and classification. These tasks +differ in the type of output they produce and the specific problem they are designed to solve. + +- **Detect**: Detection tasks involve identifying and localizing objects or regions of interest in an image or video. + YOLO models can be used for object detection tasks by predicting the bounding boxes and class labels of objects in an + image. +- **Segment**: Segmentation tasks involve dividing an image or video into regions or pixels that correspond to + different objects or classes. YOLO models can be used for image segmentation tasks by predicting a mask or label for + each pixel in an image. +- **Classify**: Classification tasks involve assigning a class label to an input, such as an image or text. YOLO + models can be used for image classification tasks by predicting the class label of an input image. + +#### Modes + +YOLO models can be used in different modes depending on the specific problem you are trying to solve. These modes +include train, val, and predict. + +- **Train**: The train mode is used to train the model on a dataset. This mode is typically used during the development + and + testing phase of a model. +- **Val**: The val mode is used to evaluate the model's performance on a validation dataset. This mode is typically used + to + tune the model's hyperparameters and detect overfitting. +- **Predict**: The predict mode is used to make predictions with the model on new data. This mode is typically used in + production or when deploying the model to users. + +| Key | Value | Description | +|----------|------------|-----------------------------------------------------------------------------------------------| +| `task` | `'detect'` | inference task, i.e. detect, segment, or classify | +| `mode` | `'train'` | YOLO mode, i.e. train, val, predict, or export | +| `resume` | `False` | resume training from last checkpoint or custom checkpoint if passed as resume=path/to/best.pt | +| `model` | `None` | path to model file, i.e. yolov8n.pt, yolov8n.yaml | +| `data` | `None` | path to data file, i.e. coco128.yaml | + +### Training + +Training settings for YOLO models refer to the various hyperparameters and configurations used to train the model on a +dataset. These settings can affect the model's performance, speed, and accuracy. Some common YOLO training settings +include the batch size, learning rate, momentum, and weight decay. Other factors that may affect the training process +include the choice of optimizer, the choice of loss function, and the size and composition of the training dataset. It +is important to carefully tune and experiment with these settings to achieve the best possible performance for a given +task. + +| Key | Value | Description | +|-------------------|----------|-----------------------------------------------------------------------------| +| `model` | `None` | path to model file, i.e. yolov8n.pt, yolov8n.yaml | +| `data` | `None` | path to data file, i.e. coco128.yaml | +| `epochs` | `100` | number of epochs to train for | +| `patience` | `50` | epochs to wait for no observable improvement for early stopping of training | +| `batch` | `16` | number of images per batch (-1 for AutoBatch) | +| `imgsz` | `640` | size of input images as integer or w,h | +| `save` | `True` | save train checkpoints and predict results | +| `save_period` | `-1` | Save checkpoint every x epochs (disabled if < 1) | +| `cache` | `False` | True/ram, disk or False. Use cache for data loading | +| `device` | `None` | device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu | +| `workers` | `8` | number of worker threads for data loading (per RANK if DDP) | +| `project` | `None` | project name | +| `name` | `None` | experiment name | +| `exist_ok` | `False` | whether to overwrite existing experiment | +| `pretrained` | `False` | whether to use a pretrained model | +| `optimizer` | `'SGD'` | optimizer to use, choices=['SGD', 'Adam', 'AdamW', 'RMSProp'] | +| `verbose` | `False` | whether to print verbose output | +| `seed` | `0` | random seed for reproducibility | +| `deterministic` | `True` | whether to enable deterministic mode | +| `single_cls` | `False` | train multi-class data as single-class | +| `image_weights` | `False` | use weighted image selection for training | +| `rect` | `False` | support rectangular training | +| `cos_lr` | `False` | use cosine learning rate scheduler | +| `close_mosaic` | `10` | disable mosaic augmentation for final 10 epochs | +| `resume` | `False` | resume training from last checkpoint | +| `lr0` | `0.01` | initial learning rate (i.e. SGD=1E-2, Adam=1E-3) | +| `lrf` | `0.01` | final learning rate (lr0 * lrf) | +| `momentum` | `0.937` | SGD momentum/Adam beta1 | +| `weight_decay` | `0.0005` | optimizer weight decay 5e-4 | +| `warmup_epochs` | `3.0` | warmup epochs (fractions ok) | +| `warmup_momentum` | `0.8` | warmup initial momentum | +| `warmup_bias_lr` | `0.1` | warmup initial bias lr | +| `box` | `7.5` | box loss gain | +| `cls` | `0.5` | cls loss gain (scale with pixels) | +| `dfl` | `1.5` | dfl loss gain | +| `fl_gamma` | `0.0` | focal loss gamma (efficientDet default gamma=1.5) | +| `label_smoothing` | `0.0` | label smoothing (fraction) | +| `nbs` | `64` | nominal batch size | +| `overlap_mask` | `True` | masks should overlap during training (segment train only) | +| `mask_ratio` | `4` | mask downsample ratio (segment train only) | +| `dropout` | `0.0` | use dropout regularization (classify train only) | +| `val` | `True` | validate/test during training | + +### Prediction + +Prediction settings for YOLO models refer to the various hyperparameters and configurations used to make predictions +with the model on new data. These settings can affect the model's performance, speed, and accuracy. Some common YOLO +prediction settings include the confidence threshold, non-maximum suppression (NMS) threshold, and the number of classes +to consider. Other factors that may affect the prediction process include the size and format of the input data, the +presence of additional features such as masks or multiple labels per box, and the specific task the model is being used +for. It is important to carefully tune and experiment with these settings to achieve the best possible performance for a +given task. + +| Key | Value | Description | +|------------------|------------------------|----------------------------------------------------------| +| `source` | `'ultralytics/assets'` | source directory for images or videos | +| `conf` | `0.25` | object confidence threshold for detection | +| `iou` | `0.7` | intersection over union (IoU) threshold for NMS | +| `half` | `False` | use half precision (FP16) | +| `device` | `None` | device to run on, i.e. cuda device=0/1/2/3 or device=cpu | +| `show` | `False` | show results if possible | +| `save` | `False` | save images with results | +| `save_txt` | `False` | save results as .txt file | +| `save_conf` | `False` | save results with confidence scores | +| `save_crop` | `False` | save cropped images with results | +| `hide_labels` | `False` | hide labels | +| `hide_conf` | `False` | hide confidence scores | +| `max_det` | `300` | maximum number of detections per image | +| `vid_stride` | `False` | video frame-rate stride | +| `line_thickness` | `3` | bounding box thickness (pixels) | +| `visualize` | `False` | visualize model features | +| `augment` | `False` | apply image augmentation to prediction sources | +| `agnostic_nms` | `False` | class-agnostic NMS | +| `retina_masks` | `False` | use high-resolution segmentation masks | +| `classes` | `None` | filter results by class, i.e. class=0, or class=[0,2,3] | +| `box` | `True` | Show boxes in segmentation predictions | + +### Validation + +Validation settings for YOLO models refer to the various hyperparameters and configurations used to +evaluate the model's performance on a validation dataset. These settings can affect the model's performance, speed, and +accuracy. Some common YOLO validation settings include the batch size, the frequency with which validation is performed +during training, and the metrics used to evaluate the model's performance. Other factors that may affect the validation +process include the size and composition of the validation dataset and the specific task the model is being used for. It +is important to carefully tune and experiment with these settings to ensure that the model is performing well on the +validation dataset and to detect and prevent overfitting. + +| Key | Value | Description | +|---------------|---------|--------------------------------------------------------------------| +| `save_json` | `False` | save results to JSON file | +| `save_hybrid` | `False` | save hybrid version of labels (labels + additional predictions) | +| `conf` | `0.001` | object confidence threshold for detection | +| `iou` | `0.6` | intersection over union (IoU) threshold for NMS | +| `max_det` | `300` | maximum number of detections per image | +| `half` | `True` | use half precision (FP16) | +| `device` | `None` | device to run on, i.e. cuda device=0/1/2/3 or device=cpu | +| `dnn` | `False` | use OpenCV DNN for ONNX inference | +| `plots` | `False` | show plots during training | +| `rect` | `False` | support rectangular evaluation | +| `split` | `val` | dataset split to use for validation, i.e. 'val', 'test' or 'train' | + +### Export + +Export settings for YOLO models refer to the various configurations and options used to save or +export the model for use in other environments or platforms. These settings can affect the model's performance, size, +and compatibility with different systems. Some common YOLO export settings include the format of the exported model +file (e.g. ONNX, TensorFlow SavedModel), the device on which the model will be run (e.g. CPU, GPU), and the presence of +additional features such as masks or multiple labels per box. Other factors that may affect the export process include +the specific task the model is being used for and the requirements or constraints of the target environment or platform. +It is important to carefully consider and configure these settings to ensure that the exported model is optimized for +the intended use case and can be used effectively in the target environment. + +| Key | Value | Description | +|-------------|-----------------|------------------------------------------------------| +| `format` | `'torchscript'` | format to export to | +| `imgsz` | `640` | image size as scalar or (h, w) list, i.e. (640, 480) | +| `keras` | `False` | use Keras for TF SavedModel export | +| `optimize` | `False` | TorchScript: optimize for mobile | +| `half` | `False` | FP16 quantization | +| `int8` | `False` | INT8 quantization | +| `dynamic` | `False` | ONNX/TF/TensorRT: dynamic axes | +| `simplify` | `False` | ONNX: simplify model | +| `opset` | `None` | ONNX: opset version (optional, defaults to latest) | +| `workspace` | `4` | TensorRT: workspace size (GB) | +| `nms` | `False` | CoreML: add NMS | + +### Augmentation + +Augmentation settings for YOLO models refer to the various transformations and modifications +applied to the training data to increase the diversity and size of the dataset. These settings can affect the model's +performance, speed, and accuracy. Some common YOLO augmentation settings include the type and intensity of the +transformations applied (e.g. random flips, rotations, cropping, color changes), the probability with which each +transformation is applied, and the presence of additional features such as masks or multiple labels per box. Other +factors that may affect the augmentation process include the size and composition of the original dataset and the +specific task the model is being used for. It is important to carefully tune and experiment with these settings to +ensure that the augmented dataset is diverse and representative enough to train a high-performing model. + +| Key | Value | Description | +|---------------|-------|-------------------------------------------------| +| `hsv_h` | 0.015 | image HSV-Hue augmentation (fraction) | +| `hsv_s` | 0.7 | image HSV-Saturation augmentation (fraction) | +| `hsv_v` | 0.4 | image HSV-Value augmentation (fraction) | +| `degrees` | 0.0 | image rotation (+/- deg) | +| `translate` | 0.1 | image translation (+/- fraction) | +| `scale` | 0.5 | image scale (+/- gain) | +| `shear` | 0.0 | image shear (+/- deg) | +| `perspective` | 0.0 | image perspective (+/- fraction), range 0-0.001 | +| `flipud` | 0.0 | image flip up-down (probability) | +| `fliplr` | 0.5 | image flip left-right (probability) | +| `mosaic` | 1.0 | image mosaic (probability) | +| `mixup` | 0.0 | image mixup (probability) | +| `copy_paste` | 0.0 | segment copy-paste (probability) | + +### Logging, checkpoints, plotting and file management + +Logging, checkpoints, plotting, and file management are important considerations when training a YOLO model. + +- Logging: It is often helpful to log various metrics and statistics during training to track the model's progress and + diagnose any issues that may arise. This can be done using a logging library such as TensorBoard or by writing log + messages to a file. +- Checkpoints: It is a good practice to save checkpoints of the model at regular intervals during training. This allows + you to resume training from a previous point if the training process is interrupted or if you want to experiment with + different training configurations. +- Plotting: Visualizing the model's performance and training progress can be helpful for understanding how the model is + behaving and identifying potential issues. This can be done using a plotting library such as matplotlib or by + generating plots using a logging library such as TensorBoard. +- File management: Managing the various files generated during the training process, such as model checkpoints, log + files, and plots, can be challenging. It is important to have a clear and organized file structure to keep track of + these files and make it easy to access and analyze them as needed. + +Effective logging, checkpointing, plotting, and file management can help you keep track of the model's progress and make +it easier to debug and optimize the training process. + +| Key | Value | Description | +|------------|----------|------------------------------------------------------------------------------------------------| +| `project` | `'runs'` | project name | +| `name` | `'exp'` | experiment name. `exp` gets automatically incremented if not specified, i.e, `exp`, `exp2` ... | +| `exist_ok` | `False` | whether to overwrite existing experiment | +| `plots` | `False` | save plots during train/val | +| `save` | `False` | save train checkpoints and predict results | diff --git a/docs/cli.md b/docs/usage/cli.md similarity index 100% rename from docs/cli.md rename to docs/usage/cli.md diff --git a/docs/engine.md b/docs/usage/engine.md similarity index 98% rename from docs/engine.md rename to docs/usage/engine.md index 3f90a1f..5597be2 100644 --- a/docs/engine.md +++ b/docs/usage/engine.md @@ -9,7 +9,7 @@ custom model and dataloader by just overriding these functions: * `get_model(cfg, weights)` - The function that builds the model to be trained * `get_dataloder()` - The function that builds the dataloader - More details and source code can be found in [`BaseTrainer` Reference](reference/base_trainer.md) + More details and source code can be found in [`BaseTrainer` Reference](../reference/base_trainer.md) ## DetectionTrainer diff --git a/docs/python.md b/docs/usage/python.md similarity index 98% rename from docs/python.md rename to docs/usage/python.md index 3083af2..10ddef9 100644 --- a/docs/python.md +++ b/docs/usage/python.md @@ -127,7 +127,7 @@ The simplest way of simply using YOLOv8 directly in a Python environment. To know more about using `YOLO` models, refer Model class Reference -[Model reference](reference/model.md){ .md-button .md-button--primary} +[Model reference](../reference/model.md){ .md-button .md-button--primary} --- diff --git a/mkdocs.yml b/mkdocs.yml index 29f92ca..8d07614 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -38,6 +38,9 @@ theme: - navigation.top - navigation.expand - navigation.footer + - navigation.tracking + - navigation.instant + - navigation.indexes - content.tabs.link # all code tabs change simultaneously # Customization @@ -102,18 +105,26 @@ plugins: nav: - Home: index.md - Quickstart: quickstart.md + - Modes: + - modes/index.md + - Train: modes/train.md + - Val: modes/val.md + - Predict: modes/predict.md + - Export: modes/export.md + - Track: modes/track.md + - Benchmark: modes/benchmark.md - Tasks: - - Detection: tasks/detection.md - - Segmentation: tasks/segmentation.md - - Multi-Object Tracking: tasks/tracking.md - - Classification: tasks/classification.md + - tasks/index.md + - Detect: tasks/detect.md + - Segment: tasks/segment.md + - Classify: tasks/classify.md +# - Keypoints: tasks/keypoints.md - Usage: - - CLI: cli.md - - Python: python.md - - Predict: predict.md - - Configuration: cfg.md - - Customization using callbacks: callbacks.md - - Advanced customization: engine.md + - CLI: usage/cli.md + - Python: usage/python.md + - Callbacks: usage/callbacks.md + - Configuration: usage/cfg.md + - Advanced Customization: usage/engine.md - Ultralytics HUB: hub.md - iOS and Android App: app.md - Reference: diff --git a/tests/test_python.py b/tests/test_python.py index 243fbe5..3ec0bf7 100644 --- a/tests/test_python.py +++ b/tests/test_python.py @@ -96,6 +96,13 @@ def test_val_scratch(): model.val(data='coco8.yaml', imgsz=32) +def test_amp(): + if torch.cuda.is_available(): + from ultralytics.yolo.engine.trainer import check_amp + model = YOLO(MODEL).model.cuda() + assert check_amp(model) + + def test_train_scratch(): model = YOLO(CFG) model.train(data='coco8.yaml', epochs=1, imgsz=32) @@ -213,6 +220,3 @@ def test_result(): res = model(SOURCE) res[0].plot() print(res[0].path) - - -test_predict_img() diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index 42bd947..dfddfe9 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO ๐Ÿš€, GPL-3.0 license -__version__ = '8.0.52' +__version__ = '8.0.53' from ultralytics.yolo.engine.model import YOLO from ultralytics.yolo.utils.checks import check_yolo as checks diff --git a/ultralytics/hub/utils.py b/ultralytics/hub/utils.py index 60af743..267e28b 100644 --- a/ultralytics/hub/utils.py +++ b/ultralytics/hub/utils.py @@ -182,7 +182,7 @@ class Traces: 'environment': ENVIRONMENT} self.enabled = \ SETTINGS['sync'] and \ - RANK in {-1, 0} and \ + RANK in (-1, 0) and \ not TESTS_RUNNING and \ ONLINE and \ (is_pip_package() or get_git_origin_url() == 'https://github.com/ultralytics/ultralytics.git') diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py index a0b79d4..0f4945e 100644 --- a/ultralytics/nn/autobackend.py +++ b/ultralytics/nn/autobackend.py @@ -332,13 +332,6 @@ class AutoBackend(nn.Module): y = [self.bindings[x].data for x in sorted(self.output_names)] elif self.coreml: # CoreML im = im[0].cpu().numpy() - if self.task == 'classify': - from ultralytics.yolo.data.utils import IMAGENET_MEAN, IMAGENET_STD - - # im_pil = Image.fromarray(((im / 6 + 0.5) * 255).astype('uint8')) - for i in range(3): - im[..., i] *= IMAGENET_STD[i] - im[..., i] += IMAGENET_MEAN[i] im_pil = Image.fromarray((im * 255).astype('uint8')) # im = im.resize((192, 320), Image.ANTIALIAS) y = self.model.predict({'image': im_pil}) # coordinates are xywh normalized @@ -371,10 +364,10 @@ class AutoBackend(nn.Module): self.names = {i: f'class{i}' for i in range(nc)} else: # Lite or Edge TPU input = self.input_details[0] - int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model + int8 = input['dtype'] == np.int8 # is TFLite quantized int8 model if int8: scale, zero_point = input['quantization'] - im = (im / scale + zero_point).astype(np.uint8) # de-scale + im = (im / scale + zero_point).astype(np.int8) # de-scale self.interpreter.set_tensor(input['index'], im) self.interpreter.invoke() y = [] diff --git a/ultralytics/yolo/cfg/__init__.py b/ultralytics/yolo/cfg/__init__.py index 22b6719..91a6e66 100644 --- a/ultralytics/yolo/cfg/__init__.py +++ b/ultralytics/yolo/cfg/__init__.py @@ -299,7 +299,7 @@ def entrypoint(debug=''): task = model.task # Mode - if mode in {'predict', 'track'} and 'source' not in overrides: + if mode in ('predict', 'track') and 'source' not in overrides: overrides['source'] = DEFAULT_CFG.source or ROOT / 'assets' if (ROOT / 'assets').exists() \ else 'https://ultralytics.com/images/bus.jpg' LOGGER.warning(f"WARNING โš ๏ธ 'source' is missing. Using default 'source={overrides['source']}'.") diff --git a/ultralytics/yolo/data/augment.py b/ultralytics/yolo/data/augment.py index 136e015..1658e12 100644 --- a/ultralytics/yolo/data/augment.py +++ b/ultralytics/yolo/data/augment.py @@ -14,7 +14,7 @@ from ..utils.checks import check_version from ..utils.instance import Instances from ..utils.metrics import bbox_ioa from ..utils.ops import segment2box -from .utils import IMAGENET_MEAN, IMAGENET_STD, polygons2masks, polygons2masks_overlap +from .utils import polygons2masks, polygons2masks_overlap # TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic @@ -682,12 +682,14 @@ def v8_transforms(dataset, imgsz, hyp): # Classification augmentations ----------------------------------------------------------------------------------------- -def classify_transforms(size=224): +def classify_transforms(size=224, mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0)): # IMAGENET_MEAN, IMAGENET_STD # Transforms to apply if albumentations not installed if not isinstance(size, int): raise TypeError(f'classify_transforms() size {size} must be integer, not (list, tuple)') - # T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) - return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) + if any(mean) or any(std): + return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(mean, std, inplace=True)]) + else: + return T.Compose([CenterCrop(size), ToTensor()]) def classify_albumentations( @@ -697,8 +699,8 @@ def classify_albumentations( hflip=0.5, vflip=0.0, jitter=0.4, - mean=IMAGENET_MEAN, - std=IMAGENET_STD, + mean=(0.0, 0.0, 0.0), # IMAGENET_MEAN + std=(1.0, 1.0, 1.0), # IMAGENET_STD auto_aug=False, ): # YOLOv8 classification Albumentations (optional, only used if package is installed) diff --git a/ultralytics/yolo/data/dataloaders/v5loader.py b/ultralytics/yolo/data/dataloaders/v5loader.py index 4a6c709..f6b6734 100644 --- a/ultralytics/yolo/data/dataloaders/v5loader.py +++ b/ultralytics/yolo/data/dataloaders/v5loader.py @@ -496,7 +496,7 @@ class LoadImagesAndLabels(Dataset): # Display cache nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total - if exists and LOCAL_RANK in {-1, 0}: + if exists and LOCAL_RANK in (-1, 0): d = f'Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt' tqdm(None, desc=prefix + d, total=n, initial=n, bar_format=TQDM_BAR_FORMAT) # display cache results if cache['msgs']: diff --git a/ultralytics/yolo/data/dataset.py b/ultralytics/yolo/data/dataset.py index af1123e..80fe24b 100644 --- a/ultralytics/yolo/data/dataset.py +++ b/ultralytics/yolo/data/dataset.py @@ -133,7 +133,7 @@ class YOLODataset(BaseDataset): # Display cache nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total - if exists and LOCAL_RANK in {-1, 0}: + if exists and LOCAL_RANK in (-1, 0): d = f'Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt' tqdm(None, desc=self.prefix + d, total=n, initial=n, bar_format=TQDM_BAR_FORMAT) # display cache results if cache['msgs']: diff --git a/ultralytics/yolo/engine/exporter.py b/ultralytics/yolo/engine/exporter.py index cb0fc44..a920c3d 100644 --- a/ultralytics/yolo/engine/exporter.py +++ b/ultralytics/yolo/engine/exporter.py @@ -63,7 +63,6 @@ from ultralytics.nn.autobackend import check_class_names from ultralytics.nn.modules import C2f, Detect, Segment from ultralytics.nn.tasks import DetectionModel, SegmentationModel from ultralytics.yolo.cfg import get_cfg -from ultralytics.yolo.data.utils import IMAGENET_MEAN, IMAGENET_STD from ultralytics.yolo.utils import (DEFAULT_CFG, LINUX, LOGGER, MACOS, __version__, callbacks, colorstr, get_default_args, yaml_save) from ultralytics.yolo.utils.checks import check_imgsz, check_requirements, check_version @@ -148,7 +147,7 @@ class Exporter: self.run_callbacks('on_export_start') t = time.time() format = self.args.format.lower() # to lowercase - if format in {'tensorrt', 'trt'}: # engine aliases + if format in ('tensorrt', 'trt'): # engine aliases format = 'engine' fmts = tuple(export_formats()['Argument'][1:]) # available export formats flags = [x == format for x in fmts] @@ -408,8 +407,6 @@ class Exporter: scale = 1 / 255 classifier_config = None if self.model.task == 'classify': - bias = [-x for x in IMAGENET_MEAN] - scale = 1 / 255 / (sum(IMAGENET_STD) / 3) classifier_config = ct.ClassifierConfig(list(self.model.names.values())) if self.args.nms else None model = self.model elif self.model.task == 'detect': @@ -531,7 +528,7 @@ class Exporter: # Export to TF int8 = '-oiqt -qt per-tensor' if self.args.int8 else '' cmd = f'onnx2tf -i {f_onnx} -o {f} -nuo --non_verbose {int8}' - LOGGER.info(f"\n{prefix} running '{cmd}'") + LOGGER.info(f"\n{prefix} running '{cmd.strip()}'") subprocess.run(cmd, shell=True) yaml_save(f / 'metadata.yaml', self.metadata) # add metadata.yaml diff --git a/ultralytics/yolo/engine/model.py b/ultralytics/yolo/engine/model.py index fecca7e..f9f73b5 100644 --- a/ultralytics/yolo/engine/model.py +++ b/ultralytics/yolo/engine/model.py @@ -319,7 +319,7 @@ class YOLO: self.trainer.hub_session = self.session # attach optional HUB session self.trainer.train() # update model and cfg after training - if RANK in {0, -1}: + if RANK in (-1, 0): self.model, _ = attempt_load_one_weight(str(self.trainer.best)) self.overrides = self.model.args self.metrics = getattr(self.trainer.validator, 'metrics', None) # TODO: no metrics returned by DDP diff --git a/ultralytics/yolo/engine/results.py b/ultralytics/yolo/engine/results.py index a8a2120..bd23fcc 100644 --- a/ultralytics/yolo/engine/results.py +++ b/ultralytics/yolo/engine/results.py @@ -185,7 +185,7 @@ class Boxes: if boxes.ndim == 1: boxes = boxes[None, :] n = boxes.shape[-1] - assert n in {6, 7}, f'expected `n` in [6, 7], but got {n}' # xyxy, (track_id), conf, cls + assert n in (6, 7), f'expected `n` in [6, 7], but got {n}' # xyxy, (track_id), conf, cls # TODO self.is_track = n == 7 self.boxes = boxes diff --git a/ultralytics/yolo/engine/trainer.py b/ultralytics/yolo/engine/trainer.py index 55159c3..9c2d1f9 100644 --- a/ultralytics/yolo/engine/trainer.py +++ b/ultralytics/yolo/engine/trainer.py @@ -95,9 +95,9 @@ class BaseTrainer: self.save_dir = Path(self.args.save_dir) else: self.save_dir = Path( - increment_path(Path(project) / name, exist_ok=self.args.exist_ok if RANK in {-1, 0} else True)) + increment_path(Path(project) / name, exist_ok=self.args.exist_ok if RANK in (-1, 0) else True)) self.wdir = self.save_dir / 'weights' # weights dir - if RANK in {-1, 0}: + if RANK in (-1, 0): self.wdir.mkdir(parents=True, exist_ok=True) # make dir self.args.save_dir = str(self.save_dir) yaml_save(self.save_dir / 'args.yaml', vars(self.args)) # save run args @@ -144,7 +144,7 @@ class BaseTrainer: # Callbacks self.callbacks = defaultdict(list, callbacks.default_callbacks) # add callbacks - if RANK in {0, -1}: + if RANK in (-1, 0): callbacks.add_integration_callbacks(self) def add_callback(self, event: str, callback): @@ -203,9 +203,14 @@ class BaseTrainer: self.model = self.model.to(self.device) self.set_model_attributes() # Check AMP - callbacks_backup = callbacks.default_callbacks.copy() # backup callbacks as they are reset by check_amp() - self.amp = check_amp(self.model) - callbacks.default_callbacks = callbacks_backup # restore callbacks + self.amp = torch.tensor(True).to(self.device) + if RANK in (-1, 0): # Single-GPU and DDP + callbacks_backup = callbacks.default_callbacks.copy() # backup callbacks as check_amp() resets them + self.amp = torch.tensor(check_amp(self.model), device=self.device) + callbacks.default_callbacks = callbacks_backup # restore callbacks + if RANK > -1: # DDP + dist.broadcast(self.amp, src=0) # broadcast the tensor from rank 0 to all other ranks (returns None) + self.amp = bool(self.amp) # as boolean self.scaler = amp.GradScaler(enabled=self.amp) if world_size > 1: self.model = DDP(self.model, device_ids=[rank]) @@ -239,7 +244,7 @@ class BaseTrainer: # dataloaders batch_size = self.batch_size // world_size if world_size > 1 else self.batch_size self.train_loader = self.get_dataloader(self.trainset, batch_size=batch_size, rank=rank, mode='train') - if rank in {0, -1}: + if rank in (-1, 0): self.test_loader = self.get_dataloader(self.testset, batch_size=batch_size * 2, rank=-1, mode='val') self.validator = self.get_validator() metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix='val') @@ -286,7 +291,7 @@ class BaseTrainer: if hasattr(self.train_loader.dataset, 'close_mosaic'): self.train_loader.dataset.close_mosaic(hyp=self.args) - if rank in {-1, 0}: + if rank in (-1, 0): LOGGER.info(self.progress_string()) pbar = tqdm(enumerate(self.train_loader), total=nb, bar_format=TQDM_BAR_FORMAT) self.tloss = None @@ -327,7 +332,7 @@ class BaseTrainer: mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) loss_len = self.tloss.shape[0] if len(self.tloss.size()) else 1 losses = self.tloss if loss_len > 1 else torch.unsqueeze(self.tloss, 0) - if rank in {-1, 0}: + if rank in (-1, 0): pbar.set_description( ('%11s' * 2 + '%11.4g' * (2 + loss_len)) % (f'{epoch + 1}/{self.epochs}', mem, *losses, batch['cls'].shape[0], batch['img'].shape[-1])) @@ -342,7 +347,7 @@ class BaseTrainer: self.scheduler.step() self.run_callbacks('on_train_epoch_end') - if rank in {-1, 0}: + if rank in (-1, 0): # Validation self.ema.update_attr(self.model, include=['yaml', 'nc', 'args', 'names', 'stride', 'class_weights']) @@ -372,7 +377,7 @@ class BaseTrainer: if self.stop: break # must break all DDP ranks - if rank in {-1, 0}: + if rank in (-1, 0): # Do final val with best.pt LOGGER.info(f'\n{epoch - self.start_epoch + 1} epochs completed in ' f'{(time.time() - self.train_time_start) / 3600:.3f} hours.') @@ -603,7 +608,20 @@ class BaseTrainer: def check_amp(model): - # Check PyTorch Automatic Mixed Precision (AMP) functionality. Return True on correct operation + """ + This function checks the PyTorch Automatic Mixed Precision (AMP) functionality of a YOLOv8 model. + If the checks fail, it means there are anomalies with AMP on the system that may cause NaN losses or zero-mAP + results, so AMP will be disabled during training. + + Args: + model (nn.Module): A YOLOv8 model instance. + + Returns: + bool: Returns True if the AMP functionality works correctly with YOLOv8 model, else False. + + Raises: + AssertionError: If the AMP checks fail, indicating anomalies with the AMP functionality on the system. + """ device = next(model.parameters()).device # get model device if device.type in ('cpu', 'mps'): return False # AMP only used on CUDA devices @@ -613,18 +631,21 @@ def check_amp(model): a = m(im, device=device, verbose=False)[0].boxes.boxes # FP32 inference with torch.cuda.amp.autocast(True): b = m(im, device=device, verbose=False)[0].boxes.boxes # AMP inference - return a.shape == b.shape and torch.allclose(a, b.float(), rtol=0.1) # close to 10% absolute tolerance + del m + return a.shape == b.shape and torch.allclose(a, b.float(), atol=0.5) # close to 0.5 absolute tolerance f = ROOT / 'assets/bus.jpg' # image to check im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if ONLINE else np.ones((640, 640, 3)) prefix = colorstr('AMP: ') + LOGGER.info(f'{prefix}running Automatic Mixed Precision (AMP) checks with YOLOv8n...') try: from ultralytics import YOLO - LOGGER.info(f'{prefix}running Automatic Mixed Precision (AMP) checks with YOLOv8n...') assert amp_allclose(YOLO('yolov8n.pt'), im) LOGGER.info(f'{prefix}checks passed โœ…') - return True + except ConnectionError: + LOGGER.warning(f"{prefix}checks skipped โš ๏ธ, offline and unable to download YOLOv8n. Setting 'amp=True'.") except AssertionError: LOGGER.warning(f'{prefix}checks failed โŒ. Anomalies were detected with AMP on your system that may lead to ' f'NaN losses or zero-mAP results, so AMP will be disabled during training.') return False + return True diff --git a/ultralytics/yolo/engine/validator.py b/ultralytics/yolo/engine/validator.py index 5b29a36..dddca2a 100644 --- a/ultralytics/yolo/engine/validator.py +++ b/ultralytics/yolo/engine/validator.py @@ -79,7 +79,7 @@ class BaseValidator: project = self.args.project or Path(SETTINGS['runs_dir']) / self.args.task name = self.args.name or f'{self.args.mode}' self.save_dir = save_dir or increment_path(Path(project) / name, - exist_ok=self.args.exist_ok if RANK in {-1, 0} else True) + exist_ok=self.args.exist_ok if RANK in (-1, 0) else True) (self.save_dir / 'labels' if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True) if self.args.conf is None: diff --git a/ultralytics/yolo/utils/__init__.py b/ultralytics/yolo/utils/__init__.py index 5b4dfdd..b582d8e 100644 --- a/ultralytics/yolo/utils/__init__.py +++ b/ultralytics/yolo/utils/__init__.py @@ -126,7 +126,7 @@ class IterableSimpleNamespace(SimpleNamespace): def set_logging(name=LOGGING_NAME, verbose=True): # sets up logging for the given name rank = int(os.getenv('RANK', -1)) # rank in world for Multi-GPU trainings - level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR + level = logging.INFO if verbose and rank in (-1, 0) else logging.ERROR logging.config.dictConfig({ 'version': 1, 'disable_existing_loggers': False, @@ -524,7 +524,7 @@ def set_sentry(): return event if SETTINGS['sync'] and \ - RANK in {-1, 0} and \ + RANK in (-1, 0) and \ Path(sys.argv[0]).name == 'yolo' and \ not TESTS_RUNNING and \ ONLINE and \ diff --git a/ultralytics/yolo/utils/benchmarks.py b/ultralytics/yolo/utils/benchmarks.py index 4a16114..20a1853 100644 --- a/ultralytics/yolo/utils/benchmarks.py +++ b/ultralytics/yolo/utils/benchmarks.py @@ -28,7 +28,7 @@ from pathlib import Path from ultralytics import YOLO from ultralytics.yolo.engine.exporter import export_formats -from ultralytics.yolo.utils import LINUX, LOGGER, ROOT, SETTINGS +from ultralytics.yolo.utils import LINUX, LOGGER, MACOS, ROOT, SETTINGS from ultralytics.yolo.utils.checks import check_yolo from ultralytics.yolo.utils.downloads import download from ultralytics.yolo.utils.files import file_size @@ -51,6 +51,8 @@ def benchmark(model=Path(SETTINGS['weights_dir']) / 'yolov8n.pt', imgsz=160, hal if model.task == 'classify': assert i != 11, 'paddle cls exports coming soon' assert i != 9 or LINUX, 'Edge TPU export only supported on Linux' + if i == 10: + assert MACOS or LINUX, 'TF.js export only supported on macOS and Linux' if 'cpu' in device.type: assert cpu, 'inference not supported on CPU' if 'cuda' in device.type: diff --git a/ultralytics/yolo/utils/downloads.py b/ultralytics/yolo/utils/downloads.py index 527c6f6..25137ec 100644 --- a/ultralytics/yolo/utils/downloads.py +++ b/ultralytics/yolo/utils/downloads.py @@ -118,7 +118,7 @@ def safe_download(url, raise ConnectionError(f'โŒ Download failure for {url}. Retry limit reached.') from e LOGGER.warning(f'โš ๏ธ Download failure, retrying {i + 1}/{retry} {url}...') - if unzip and f.exists() and f.suffix in {'.zip', '.tar', '.gz'}: + if unzip and f.exists() and f.suffix in ('.zip', '.tar', '.gz'): unzip_dir = dir or f.parent # unzip to dir if provided else unzip in place LOGGER.info(f'Unzipping {f} to {unzip_dir}...') if f.suffix == '.zip': diff --git a/ultralytics/yolo/utils/torch_utils.py b/ultralytics/yolo/utils/torch_utils.py index 61c7b72..6ab54d0 100644 --- a/ultralytics/yolo/utils/torch_utils.py +++ b/ultralytics/yolo/utils/torch_utils.py @@ -33,7 +33,7 @@ TORCH_1_12 = check_version(torch.__version__, '1.12.0') def torch_distributed_zero_first(local_rank: int): # Decorator to make all processes in distributed training wait for each local_master to do something initialized = torch.distributed.is_available() and torch.distributed.is_initialized() - if initialized and local_rank not in {-1, 0}: + if initialized and local_rank not in (-1, 0): dist.barrier(device_ids=[local_rank]) yield if initialized and local_rank == 0: diff --git a/ultralytics/yolo/v8/classify/val.py b/ultralytics/yolo/v8/classify/val.py index 9eed580..f4b503b 100644 --- a/ultralytics/yolo/v8/classify/val.py +++ b/ultralytics/yolo/v8/classify/val.py @@ -43,6 +43,8 @@ class ClassificationValidator(BaseValidator): return build_classification_dataloader(path=dataset_path, imgsz=self.args.imgsz, batch_size=batch_size, + augment=False, + shuffle=False, workers=self.args.workers) def print_results(self): diff --git a/ultralytics/yolo/v8/detect/predict.py b/ultralytics/yolo/v8/detect/predict.py index c83f39c..6443585 100644 --- a/ultralytics/yolo/v8/detect/predict.py +++ b/ultralytics/yolo/v8/detect/predict.py @@ -30,8 +30,8 @@ class DetectionPredictor(BasePredictor): results = [] for i, pred in enumerate(preds): orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs - shape = orig_img.shape - pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round() + if not isinstance(orig_imgs, torch.Tensor): + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) path, _, _, _, _ = self.batch img_path = path[i] if isinstance(path, list) else path results.append(Results(orig_img=orig_img, path=img_path, names=self.model.names, boxes=pred)) diff --git a/ultralytics/yolo/v8/segment/predict.py b/ultralytics/yolo/v8/segment/predict.py index 2c004f0..41b436d 100644 --- a/ultralytics/yolo/v8/segment/predict.py +++ b/ultralytics/yolo/v8/segment/predict.py @@ -23,18 +23,19 @@ class SegmentationPredictor(DetectionPredictor): proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported for i, pred in enumerate(p): orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs - shape = orig_img.shape path, _, _, _, _ = self.batch img_path = path[i] if isinstance(path, list) else path if not len(pred): # save empty boxes results.append(Results(orig_img=orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6])) continue if self.args.retina_masks: - pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round() - masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], shape[:2]) # HWC + if not isinstance(orig_imgs, torch.Tensor): + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC else: masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC - pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round() + if not isinstance(orig_imgs, torch.Tensor): + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) results.append( Results(orig_img=orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks)) return results