From 4db686a31535968d34588fb4113bff66df8733e4 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 22 May 2023 13:31:19 +0200 Subject: [PATCH] `ultralytics 8.0.106` (#2736) Signed-off-by: dependabot[bot] Co-authored-by: vyskocj Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: triple Mu Co-authored-by: Ayush Chaurasia Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com> --- .github/workflows/ci.yaml | 34 +------ .github/workflows/publish.yml | 4 +- docker/Dockerfile | 2 +- docs/datasets/classify/caltech101.md | 75 +++++++++++++- docs/datasets/classify/caltech256.md | 70 ++++++++++++- docs/datasets/classify/cifar10.md | 72 +++++++++++++- docs/datasets/classify/cifar100.md | 72 +++++++++++++- docs/datasets/classify/fashion-mnist.md | 75 +++++++++++++- docs/datasets/classify/imagenet.md | 77 ++++++++++++++- docs/datasets/classify/imagenet10.md | 74 +++++++++++++- docs/datasets/classify/imagenette.md | 109 ++++++++++++++++++++- docs/datasets/classify/imagewoof.md | 80 ++++++++++++++- docs/datasets/classify/mnist.md | 2 +- docs/datasets/pose/coco.md | 87 +++++++++++++++- docs/datasets/pose/coco8-pose.md | 4 +- docs/datasets/segment/coco.md | 86 +++++++++++++++- docs/datasets/segment/coco8-seg.md | 4 +- docs/help/CI.md | 34 +++++++ docs/help/index.md | 1 + docs/hub/inference_api.md | 5 +- docs/models/yolov3.md | 74 +++++++++++++- docs/models/yolov5.md | 50 ++++++++-- docs/models/yolov8.md | 39 +++++++- docs/tasks/classify.md | 18 +--- docs/tasks/detect.md | 4 +- docs/tasks/pose.md | 8 +- docs/tasks/segment.md | 8 +- docs/yolov5/tutorials/train_custom_data.md | 2 +- mkdocs.yml | 1 + ultralytics/__init__.py | 2 +- ultralytics/nn/modules/head.py | 2 +- ultralytics/vit/rtdetr/predict.py | 8 +- ultralytics/yolo/data/dataset.py | 2 - ultralytics/yolo/engine/trainer.py | 12 ++- ultralytics/yolo/utils/__init__.py | 24 +---- ultralytics/yolo/utils/ops.py | 2 + ultralytics/yolo/utils/patches.py | 45 +++++++++ ultralytics/yolo/utils/torch_utils.py | 8 +- ultralytics/yolo/utils/tuner.py | 2 + ultralytics/yolo/v8/detect/val.py | 15 ++- ultralytics/yolo/v8/pose/val.py | 17 ++-- 41 files changed, 1159 insertions(+), 151 deletions(-) create mode 100644 docs/help/CI.md create mode 100644 ultralytics/yolo/utils/patches.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 04717be..5e9e07a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -158,38 +158,6 @@ jobs: python --version pip --version pip list - - name: Test Detect - shell: bash # for Windows compatibility - run: | - yolo detect train data=coco8.yaml model=yolov8n.yaml epochs=1 imgsz=32 - yolo detect train data=coco8.yaml model=yolov8n.pt epochs=1 imgsz=32 - yolo detect val data=coco8.yaml model=runs/detect/train/weights/last.pt imgsz=32 - yolo detect predict model=runs/detect/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg - yolo export model=runs/detect/train/weights/last.pt imgsz=32 format=torchscript - - name: Test Segment - shell: bash # for Windows compatibility - run: | - yolo segment train data=coco8-seg.yaml model=yolov8n-seg.yaml epochs=1 imgsz=32 - yolo segment train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=1 imgsz=32 - yolo segment val data=coco8-seg.yaml model=runs/segment/train/weights/last.pt imgsz=32 - yolo segment predict model=runs/segment/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg - yolo export model=runs/segment/train/weights/last.pt imgsz=32 format=torchscript - - name: Test Classify - shell: bash # for Windows compatibility - run: | - yolo classify train data=imagenet10 model=yolov8n-cls.yaml epochs=1 imgsz=32 - yolo classify train data=imagenet10 model=yolov8n-cls.pt epochs=1 imgsz=32 - yolo classify val data=imagenet10 model=runs/classify/train/weights/last.pt imgsz=32 - yolo classify predict model=runs/classify/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg - yolo export model=runs/classify/train/weights/last.pt imgsz=32 format=torchscript - - name: Test Pose - shell: bash # for Windows compatibility - run: | - yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=1 imgsz=32 - yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=1 imgsz=32 - yolo pose val data=coco8-pose.yaml model=runs/pose/train/weights/last.pt imgsz=32 - yolo pose predict model=runs/pose/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg - yolo export model=runs/pose/train/weights/last.pt imgsz=32 format=torchscript - name: Pytest tests shell: bash # for Windows compatibility run: pytest tests @@ -201,7 +169,7 @@ jobs: steps: - name: Check for failure and notify if: (needs.HUB.result == 'failure' || needs.Benchmarks.result == 'failure' || needs.Tests.result == 'failure') && github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event_name == 'push') - uses: slackapi/slack-github-action@v1.23.0 + uses: slackapi/slack-github-action@v1.24.0 with: payload: | {"text": " GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 38f92d2..c9be02c 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -94,7 +94,7 @@ jobs: echo "PR_TITLE=$PR_TITLE" >> $GITHUB_ENV - name: Notify on Slack (Success) if: success() && github.event_name == 'push' && steps.check_pypi.outputs.increment == 'True' - uses: slackapi/slack-github-action@v1.23.0 + uses: slackapi/slack-github-action@v1.24.0 with: payload: | {"text": " GitHub Actions success for ${{ github.workflow }} βœ…\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* NEW 'ultralytics ${{ steps.check_pypi.outputs.version }}' pip package published πŸ˜ƒ\n*Job Status:* ${{ job.status }}\n*Pull Request:* ${{ env.PR_TITLE }}\n"} @@ -102,7 +102,7 @@ jobs: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }} - name: Notify on Slack (Failure) if: failure() - uses: slackapi/slack-github-action@v1.23.0 + uses: slackapi/slack-github-action@v1.24.0 with: payload: | {"text": " GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n*Job Status:* ${{ job.status }}\n*Pull Request:* ${{ env.PR_TITLE }}\n"} diff --git a/docker/Dockerfile b/docker/Dockerfile index 656c074..fd8d809 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -29,7 +29,7 @@ ADD https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt /u # Install pip packages RUN python3 -m pip install --upgrade pip wheel -RUN pip install --no-cache -e . albumentations comet tensorboard +RUN pip install --no-cache -e . albumentations comet tensorboard thop # Set environment variables ENV OMP_NUM_THREADS=1 diff --git a/docs/datasets/classify/caltech101.md b/docs/datasets/classify/caltech101.md index 19480d2..44f34e8 100644 --- a/docs/datasets/classify/caltech101.md +++ b/docs/datasets/classify/caltech101.md @@ -1,7 +1,78 @@ --- comments: true +description: Learn about the Caltech-101 dataset, a collection of images for object recognition tasks in machine learning and computer vision algorithms. --- -# 🚧 Page Under Construction βš’ +# Caltech-101 Dataset -This page is currently under construction!️ πŸ‘·Please check back later for updates. πŸ˜ƒπŸ”œ +The [Caltech-101](https://data.caltech.edu/records/mzrjq-6wc02) dataset is a widely used dataset for object recognition tasks, containing around 9,000 images from 101 object categories. The categories were chosen to reflect a variety of real-world objects, and the images themselves were carefully selected and annotated to provide a challenging benchmark for object recognition algorithms. + +## Key Features + +- The Caltech-101 dataset comprises around 9,000 color images divided into 101 categories. +- The categories encompass a wide variety of objects, including animals, vehicles, household items, and people. +- The number of images per category varies, with about 40 to 800 images in each category. +- Images are of variable sizes, with most images being medium resolution. +- Caltech-101 is widely used for training and testing in the field of machine learning, particularly for object recognition tasks. + +## Dataset Structure + +Unlike many other datasets, the Caltech-101 dataset is not formally split into training and testing sets. Users typically create their own splits based on their specific needs. However, a common practice is to use a random subset of images for training (e.g., 30 images per category) and the remaining images for testing. + +## Applications + +The Caltech-101 dataset is extensively used for training and evaluating deep learning models in object recognition tasks, such as Convolutional Neural Networks (CNNs), Support Vector Machines (SVMs), and various other machine learning algorithms. Its wide variety of categories and high-quality images make it an excellent dataset for research and development in the field of machine learning and computer vision. + +## Usage + +To train a YOLO model on the Caltech-101 dataset for 100 epochs, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. + +!!! example "Train Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) + + # Train the model + model.train(data='caltech101', epochs=100, imgsz=416) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model + yolo detect train data=caltech101 model=yolov8n-cls.pt epochs=100 imgsz=416 + ``` + +## Sample Images and Annotations + +The Caltech-101 dataset contains high-quality color images of various objects, providing a well-structured dataset for object recognition tasks. Here are some examples of images from the dataset: + +![Dataset sample image](https://user-images.githubusercontent.com/26833433/239366386-44171121-b745-4206-9b59-a3be41e16089.png) + +The example showcases the variety and complexity of the objects in the Caltech-101 dataset, emphasizing the significance of a diverse dataset for training robust object recognition models. + +## Citations and Acknowledgments + +If you use the Caltech-101 dataset in your research or development work, please cite the following paper: + +```bibtex +@article{fei2007learning, + title={Learning generative visual models from few training examples: An incremental Bayesian approach tested on 101 object categories}, + author={Fei-Fei, Li and Fergus, Rob and Perona, Pietro}, + journal={Computer vision and Image understanding}, + volume={106}, + number={ + +1}, + pages={59--70}, + year={2007}, + publisher={Elsevier} +} +``` + +We would like to acknowledge Li Fei-Fei, Rob Fergus, and Pietro Perona for creating and maintaining the Caltech-101 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the Caltech-101 dataset and its creators, visit the [Caltech-101 dataset website](https://data.caltech.edu/records/mzrjq-6wc02). \ No newline at end of file diff --git a/docs/datasets/classify/caltech256.md b/docs/datasets/classify/caltech256.md index 19480d2..fbd444e 100644 --- a/docs/datasets/classify/caltech256.md +++ b/docs/datasets/classify/caltech256.md @@ -1,7 +1,73 @@ --- comments: true +description: Learn about the Caltech-256 dataset, a broad collection of images used for object classification tasks in machine learning and computer vision algorithms. --- -# 🚧 Page Under Construction βš’ +# Caltech-256 Dataset -This page is currently under construction!️ πŸ‘·Please check back later for updates. πŸ˜ƒπŸ”œ +The [Caltech-256](https://data.caltech.edu/records/nyy15-4j048) dataset is an extensive collection of images used for object classification tasks. It contains around 30,000 images divided into 257 categories (256 object categories and 1 background category). The images are carefully curated and annotated to provide a challenging and diverse benchmark for object recognition algorithms. + +## Key Features + +- The Caltech-256 dataset comprises around 30,000 color images divided into 257 categories. +- Each category contains a minimum of 80 images. +- The categories encompass a wide variety of real-world objects, including animals, vehicles, household items, and people. +- Images are of variable sizes and resolutions. +- Caltech-256 is widely used for training and testing in the field of machine learning, particularly for object recognition tasks. + +## Dataset Structure + +Like Caltech-101, the Caltech-256 dataset does not have a formal split between training and testing sets. Users typically create their own splits according to their specific needs. A common practice is to use a random subset of images for training and the remaining images for testing. + +## Applications + +The Caltech-256 dataset is extensively used for training and evaluating deep learning models in object recognition tasks, such as Convolutional Neural Networks (CNNs), Support Vector Machines (SVMs), and various other machine learning algorithms. Its diverse set of categories and high-quality images make it an invaluable dataset for research and development in the field of machine learning and computer vision. + +## Usage + +To train a YOLO model on the Caltech-256 dataset for 100 epochs, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. + +!!! example "Train Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) + + # Train the model + model.train(data='caltech256', epochs=100, imgsz=416) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model + yolo detect train data=caltech256 model=yolov8n-cls.pt epochs=100 imgsz=416 + ``` + +## Sample Images and Annotations + +The Caltech-256 dataset contains high-quality color images of various objects, providing a comprehensive dataset for object recognition tasks. Here are some examples of images from the dataset ([credit](https://ml4a.github.io/demos/tsne_viewer.html)): + +![Dataset sample image](https://user-images.githubusercontent.com/26833433/239365061-1e5f7857-b1e8-44ca-b3d7-d0befbcd33f9.jpg) + +The example showcases the diversity and complexity of the objects in the Caltech-256 dataset, emphasizing the importance of a varied dataset for training robust object recognition models. + +## Citations and Acknowledgments + +If you use the Caltech-256 dataset in your research or development work, please cite the following paper: + +```bibtex +@article{griffin2007caltech, + title={Caltech-256 object category dataset}, + author={Griffin, Gregory and Holub, Alex and Perona, Pietro}, + year={2007} +} +``` + +We would like to acknowledge Gregory Griffin, Alex Holub, and Pietro Perona for creating and maintaining the Caltech-256 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the + +Caltech-256 dataset and its creators, visit the [Caltech-256 dataset website](https://data.caltech.edu/records/nyy15-4j048). \ No newline at end of file diff --git a/docs/datasets/classify/cifar10.md b/docs/datasets/classify/cifar10.md index 19480d2..c01829f 100644 --- a/docs/datasets/classify/cifar10.md +++ b/docs/datasets/classify/cifar10.md @@ -1,7 +1,75 @@ --- comments: true +description: Learn about the CIFAR-10 dataset, a collection of images that are commonly used to train machine learning and computer vision algorithms. --- -# 🚧 Page Under Construction βš’ +# CIFAR-10 Dataset -This page is currently under construction!️ πŸ‘·Please check back later for updates. πŸ˜ƒπŸ”œ +The [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) (Canadian Institute For Advanced Research) dataset is a collection of images used widely for machine learning and computer vision algorithms. It was developed by researchers at the CIFAR institute and consists of 60,000 32x32 color images in 10 different classes. + +## Key Features + +- The CIFAR-10 dataset consists of 60,000 images, divided into 10 classes. +- Each class contains 6,000 images, split into 5,000 for training and 1,000 for testing. +- The images are colored and of size 32x32 pixels. +- The 10 different classes represent airplanes, cars, birds, cats, deer, dogs, frogs, horses, ships, and trucks. +- CIFAR-10 is commonly used for training and testing in the field of machine learning and computer vision. + +## Dataset Structure + +The CIFAR-10 dataset is split into two subsets: + +1. **Training Set**: This subset contains 50,000 images used for training machine learning models. +2. **Testing Set**: This subset consists of 10,000 images used for testing and benchmarking the trained models. + +## Applications + +The CIFAR-10 dataset is widely used for training and evaluating deep learning models in image classification tasks, such as Convolutional Neural Networks (CNNs), Support Vector Machines (SVMs), and various other machine learning algorithms. The diversity of the dataset in terms of classes and the presence of color images make it a well-rounded dataset for research and development in the field of machine learning and computer vision. + +## Usage + +To train a YOLO model on the CIFAR-10 dataset for 100 epochs with an image size of 32x32, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. + +!!! example "Train Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) + + # Train the model + model.train(data='cifar10', epochs=100, imgsz=32) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model + yolo detect train data=cifar10 model=yolov8n-cls.pt epochs=100 imgsz=32 + ``` + +## Sample Images and Annotations + +The CIFAR-10 dataset contains color images of various objects, providing a well-structured dataset for image classification tasks. Here are some examples of images from the dataset: + +![Dataset sample image](https://miro.medium.com/max/1100/1*SZnidBt7CQ4Xqcag6rd8Ew.png) + +The example showcases the variety and complexity of the objects in the CIFAR-10 dataset, highlighting the importance of a diverse dataset for training robust image classification models. + +## Citations and Acknowledgments + +If you use the CIFAR-10 dataset in your research or development work, please cite the following paper: + +```bibtex +@TECHREPORT{Krizhevsky09learningmultiple, + author = {Alex Krizhevsky}, + title = {Learning multiple layers of features from tiny images}, + institution = {}, + year = {2009} +} +``` + +We would like to acknowledge Alex Krizhevsky for creating and maintaining the CIFAR-10 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the CIFAR-10 dataset and its creator, visit the [CIFAR-10 dataset website](https://www.cs.toronto.edu/~kriz/cifar.html). \ No newline at end of file diff --git a/docs/datasets/classify/cifar100.md b/docs/datasets/classify/cifar100.md index 19480d2..f7d2ac2 100644 --- a/docs/datasets/classify/cifar100.md +++ b/docs/datasets/classify/cifar100.md @@ -1,7 +1,75 @@ --- comments: true +description: Learn about the CIFAR-100 dataset, a collection of images that are commonly used to train machine learning and computer vision algorithms. --- -# 🚧 Page Under Construction βš’ +# CIFAR-100 Dataset -This page is currently under construction!️ πŸ‘·Please check back later for updates. πŸ˜ƒπŸ”œ +The [CIFAR-100](https://www.cs.toronto.edu/~kriz/cifar.html) (Canadian Institute For Advanced Research) dataset is a significant extension of the CIFAR-10 dataset, composed of 60,000 32x32 color images in 100 different classes. It was developed by researchers at the CIFAR institute, offering a more challenging dataset for more complex machine learning and computer vision tasks. + +## Key Features + +- The CIFAR-100 dataset consists of 60,000 images, divided into 100 classes. +- Each class contains 600 images, split into 500 for training and 100 for testing. +- The images are colored and of size 32x32 pixels. +- The 100 different classes are grouped into 20 coarse categories for higher level classification. +- CIFAR-100 is commonly used for training and testing in the field of machine learning and computer vision. + +## Dataset Structure + +The CIFAR-100 dataset is split into two subsets: + +1. **Training Set**: This subset contains 50,000 images used for training machine learning models. +2. **Testing Set**: This subset consists of 10,000 images used for testing and benchmarking the trained models. + +## Applications + +The CIFAR-100 dataset is extensively used for training and evaluating deep learning models in image classification tasks, such as Convolutional Neural Networks (CNNs), Support Vector Machines (SVMs), and various other machine learning algorithms. The diversity of the dataset in terms of classes and the presence of color images make it a more challenging and comprehensive dataset for research and development in the field of machine learning and computer vision. + +## Usage + +To train a YOLO model on the CIFAR-100 dataset for 100 epochs with an image size of 32x32, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. + +!!! example "Train Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) + + # Train the model + model.train(data='cifar100', epochs=100, imgsz=32) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model + yolo detect train data=cifar100 model=yolov8n-cls.pt epochs=100 imgsz=32 + ``` + +## Sample Images and Annotations + +The CIFAR-100 dataset contains color images of various objects, providing a well-structured dataset for image classification tasks. Here are some examples of images from the dataset: + +![Dataset sample image](https://user-images.githubusercontent.com/26833433/239363319-62ebf02f-7469-4178-b066-ccac3cd334db.jpg) + +The example showcases the variety and complexity of the objects in the CIFAR-100 dataset, highlighting the importance of a diverse dataset for training robust image classification models. + +## Citations and Acknowledgments + +If you use the CIFAR-100 dataset in your research or development work, please cite the following paper: + +```bibtex +@TECHREPORT{Krizhevsky09learningmultiple, + author = {Alex Krizhevsky}, + title = {Learning multiple layers of features from tiny images}, + institution = {}, + year = {2009} +} +``` + +We would like to acknowledge Alex Krizhevsky for creating and maintaining the CIFAR-100 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the CIFAR-100 dataset and its creator, visit the [CIFAR-100 dataset website](https://www.cs.toronto.edu/~kriz/cifar.html). \ No newline at end of file diff --git a/docs/datasets/classify/fashion-mnist.md b/docs/datasets/classify/fashion-mnist.md index 19480d2..4a947a2 100644 --- a/docs/datasets/classify/fashion-mnist.md +++ b/docs/datasets/classify/fashion-mnist.md @@ -1,7 +1,78 @@ --- comments: true +description: Learn about the Fashion-MNIST dataset, a large database of Zalando's article images used for training various image processing systems and machine learning models. --- -# 🚧 Page Under Construction βš’ +# Fashion-MNIST Dataset -This page is currently under construction!️ πŸ‘·Please check back later for updates. πŸ˜ƒπŸ”œ +The [Fashion-MNIST](https://github.com/zalandoresearch/fashion-mnist) dataset is a database of Zalando's article imagesβ€”consisting of a training set of 60,000 examples and a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes. Fashion-MNIST is intended to serve as a direct drop-in replacement for the original MNIST dataset for benchmarking machine learning algorithms. + +## Key Features + +- Fashion-MNIST contains 60,000 training images and 10,000 testing images of Zalando's article images. +- The dataset comprises grayscale images of size 28x28 pixels. +- Each pixel has a single pixel-value associated with it, indicating the lightness or darkness of that pixel, with higher numbers meaning darker. This pixel-value is an integer between 0 and 255. +- Fashion-MNIST is widely used for training and testing in the field of machine learning, especially for image classification tasks. + +## Dataset Structure + +The Fashion-MNIST dataset is split into two subsets: + +1. **Training Set**: This subset contains 60,000 images used for training machine learning models. +2. **Testing Set**: This subset consists of 10,000 images used for testing and benchmarking the trained models. + +## Labels + +Each training and test example is assigned to one of the following labels: + +0. T-shirt/top +1. Trouser +2. Pullover +3. Dress +4. Coat +5. Sandal +6. Shirt +7. Sneaker +8. Bag +9. Ankle boot + +## Applications + +The Fashion-MNIST dataset is widely used for training and evaluating deep learning models in image classification tasks, such as Convolutional Neural Networks (CNNs), Support Vector Machines (SVMs), and various other machine learning algorithms. The dataset's simple and well-structured format makes it an essential resource for researchers and practitioners in the field of machine learning and computer vision. + +## Usage + +To train a CNN model on the Fashion-MNIST dataset for 100 epochs with an image size of 28x28, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. + +!!! example "Train Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) + + # Train the model + model.train(data='fashion-mnist', epochs=100, imgsz=28) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model + yolo detect train data=fashion-mnist model=yolov8n-cls.pt epochs=100 imgsz=28 + ``` + +## Sample Images and Annotations + +The Fashion-MNIST dataset contains grayscale images of Zalando's article images, providing a well-structured dataset for image classification tasks. Here are some examples of images from the dataset: + +![Dataset sample image](https://user-images.githubusercontent.com/26833433/239359139-ce0a434e-9056-43e0-a306-3214f193dcce.png) + +The example showcases the variety and complexity of the images in the Fashion-MNIST dataset, highlighting the importance of a diverse dataset for training robust image classification models. + +## Acknowledgments + +If you use the Fashion-MNIST dataset in your research or development work, please acknowledge the dataset by linking to the [GitHub repository](https://github.com/zalandoresearch/fashion-mnist). This dataset was made available by Zalando Research. diff --git a/docs/datasets/classify/imagenet.md b/docs/datasets/classify/imagenet.md index 19480d2..1fc6a42 100644 --- a/docs/datasets/classify/imagenet.md +++ b/docs/datasets/classify/imagenet.md @@ -1,7 +1,80 @@ --- comments: true +description: Learn about the ImageNet dataset, a large-scale database of annotated images commonly used for training deep learning models in computer vision tasks. --- -# 🚧 Page Under Construction βš’ +# ImageNet Dataset -This page is currently under construction!️ πŸ‘·Please check back later for updates. πŸ˜ƒπŸ”œ +[ImageNet](https://www.image-net.org/) is a large-scale database of annotated images designed for use in visual object recognition research. It contains over 14 million images, with each image annotated using WordNet synsets, making it one of the most extensive resources available for training deep learning models in computer vision tasks. + +## Key Features + +- ImageNet contains over 14 million high-resolution images spanning thousands of object categories. +- The dataset is organized according to the WordNet hierarchy, with each synset representing a category. +- ImageNet is widely used for training and benchmarking in the field of computer vision, particularly for image classification and object detection tasks. +- The annual ImageNet Large Scale Visual Recognition Challenge (ILSVRC) has been instrumental in advancing computer vision research. + +## Dataset Structure + +The ImageNet dataset is organized using the WordNet hierarchy. Each node in the hierarchy represents a category, and each category is described by a synset (a collection of synonymous terms). The images in ImageNet are annotated with one or more synsets, providing a rich resource for training models to recognize various objects and their relationships. + +## ImageNet Large Scale Visual Recognition Challenge (ILSVRC) + +The annual [ImageNet Large Scale Visual Recognition Challenge (ILSVRC)](http://image-net.org/challenges/LSVRC/) has been an important event in the field of computer vision. It has provided a platform for researchers and developers to evaluate their algorithms and models on a large-scale dataset with standardized evaluation metrics. The ILSVRC has led to significant advancements in the development of deep learning models for image classification, object detection, and other computer vision tasks. + +## Applications + +The ImageNet dataset is widely used for training and evaluating deep learning models in various computer vision tasks, such as image classification, object detection, and object localization. Some popular deep learning architectures, such as AlexNet, VGG, and ResNet, were developed and benchmarked using the ImageNet dataset. + +## Usage + +To train a deep learning model on the ImageNet dataset for 100 epochs with an image size of 224x224, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. + +!!! example "Train Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) + + # Train the model + model.train(data='imagenet', epochs=100, imgsz=224) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model + yolo train data=imagenet model=yolov8n-cls.pt epochs=100 imgsz=224 + ``` + +## Sample Images and Annotations + +The ImageNet dataset contains high-resolution images spanning thousands of object categories, providing a diverse and extensive dataset for training and evaluating computer vision models. Here are some examples of images from the dataset: + +![Dataset sample images](https://user-images.githubusercontent.com/26833433/239280348-3d8f30c7-6f05-4dda-9cfe-d62ad9faecc9.png) + +The example showcases the variety and complexity of the images in the ImageNet dataset, highlighting the importance of a diverse dataset for training robust computer vision models. + +## Citations and Acknowledgments + +If you use the ImageNet dataset in your research or development work, please cite the following paper: + +```bibtex +@article{ILSVRC15, + Author = {Olga Russakovsky + + and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei}, + Title = { {ImageNet Large Scale Visual Recognition Challenge}}, + Year = {2015}, + journal = {International Journal of Computer Vision (IJCV)}, + volume={115}, + number={3}, + pages={211-252} +} +``` + +We would like to acknowledge the ImageNet team, led by Olga Russakovsky, Jia Deng, and Li Fei-Fei, for creating and maintaining the ImageNet dataset as a valuable resource for the machine learning and computer vision research community. For more information about the ImageNet dataset and its creators, visit the [ImageNet website](https://www.image-net.org/). \ No newline at end of file diff --git a/docs/datasets/classify/imagenet10.md b/docs/datasets/classify/imagenet10.md index 19480d2..4258582 100644 --- a/docs/datasets/classify/imagenet10.md +++ b/docs/datasets/classify/imagenet10.md @@ -1,7 +1,77 @@ --- comments: true +description: Learn about the ImageNet10 dataset, a compact subset of the original ImageNet dataset designed for quick testing, CI tests, and sanity checks. --- -# 🚧 Page Under Construction βš’ +# ImageNet10 Dataset -This page is currently under construction!️ πŸ‘·Please check back later for updates. πŸ˜ƒπŸ”œ +The [ImageNet10](https://github.com/ultralytics/yolov5/releases/download/v1.0/imagenet10.zip) dataset is a small-scale subset of the [ImageNet](https://www.image-net.org/) database, developed by [Ultralytics](https://ultralytics.com) and designed for CI tests, sanity checks, and fast testing of training pipelines. This dataset is composed of the first image in the training set and the first image from the validation set of the first 10 classes in ImageNet. Although significantly smaller, it retains the structure and diversity of the original ImageNet dataset. + +## Key Features + +- ImageNet10 is a compact version of ImageNet, with 20 images representing the first 10 classes of the original dataset. +- The dataset is organized according to the WordNet hierarchy, mirroring the structure of the full ImageNet dataset. +- It is ideally suited for CI tests, sanity checks, and rapid testing of training pipelines in computer vision tasks. +- Although not designed for model benchmarking, it can provide a quick indication of a model's basic functionality and correctness. + +## Dataset Structure + +The ImageNet10 dataset, like the original ImageNet, is organized using the WordNet hierarchy. Each of the 10 classes in ImageNet10 is described by a synset (a collection of synonymous terms). The images in ImageNet10 are annotated with one or more synsets, providing a compact resource for testing models to recognize various objects and their relationships. + +## Applications + +The ImageNet10 dataset is useful for quickly testing and debugging computer vision models and pipelines. Its small size allows for rapid iteration, making it ideal for continuous integration tests and sanity checks. It can also be used for fast preliminary testing of new models or changes to existing models before moving on to full-scale testing with the complete ImageNet dataset. + +## Usage + +To test a deep learning model on the ImageNet10 dataset with an image size of 224x224, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. + +!!! example "Test Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) + + # Train the model + model.train(data='imagenet10', epochs=5, imgsz=224) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model + yolo train data=imagenet10 model=yolov8n-cls.pt epochs=5 imgsz=224 + ``` + +## Sample Images and Annotations + +The ImageNet10 dataset contains a subset of images from the original ImageNet dataset. These images are chosen to represent the first 10 classes in the dataset, providing a diverse yet compact dataset for quick testing and evaluation. + +![Dataset sample images](https://user-images.githubusercontent.com/26833433/239689723-16f9b4a7-becc-4deb-b875-d3e5c28eb03b.png) +The example showcases the variety and complexity of the images in the ImageNet10 dataset, highlighting its usefulness for sanity checks and quick testing of computer vision models. + +## Citations and Acknowledgments + +If you use the ImageNet10 dataset in your research or development work, please cite the original ImageNet paper: + +```bibtex +@article{ILSVRC15, + Author = {Olga Russakovsky + + and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei}, + Title = { {ImageNet Large + +Scale Visual Recognition Challenge}}, + Year = {2015}, + journal = {International Journal of Computer Vision (IJCV)}, + volume={115}, + number={3}, + pages={211-252} +} +``` + +We would like to acknowledge the ImageNet team, led by Olga Russakovsky, Jia Deng, and Li Fei-Fei, for creating and maintaining the ImageNet dataset. The ImageNet10 dataset, while a compact subset, is a valuable resource for quick testing and debugging in the machine learning and computer vision research community. For more information about the ImageNet dataset and its creators, visit the [ImageNet website](https://www.image-net.org/). \ No newline at end of file diff --git a/docs/datasets/classify/imagenette.md b/docs/datasets/classify/imagenette.md index 19480d2..0f77bfa 100644 --- a/docs/datasets/classify/imagenette.md +++ b/docs/datasets/classify/imagenette.md @@ -1,7 +1,112 @@ --- comments: true +description: Learn about the ImageNette dataset, a subset of 10 easily classified classes from the Imagenet dataset commonly used for training various image processing systems and machine learning models. --- -# 🚧 Page Under Construction βš’ +# ImageNette Dataset -This page is currently under construction!️ πŸ‘·Please check back later for updates. πŸ˜ƒπŸ”œ +The [ImageNette](https://github.com/fastai/imagenette) dataset is a subset of the larger [Imagenet](http://www.image-net.org/) dataset, but it only includes 10 easily distinguishable classes. It was created to provide a quicker, easier-to-use version of Imagenet for software development and education. + +## Key Features + +- ImageNette contains images from 10 different classes such as tench, English springer, cassette player, chain saw, church, French horn, garbage truck, gas pump, golf ball, parachute. +- The dataset comprises colored images of varying dimensions. +- ImageNette is widely used for training and testing in the field of machine learning, especially for image classification tasks. + +## Dataset Structure + +The ImageNette dataset is split into two subsets: + +1. **Training Set**: This subset contains several thousands of images used for training machine learning models. The exact number varies per class. +2. **Validation Set**: This subset consists of several hundreds of images used for validating and benchmarking the trained models. Again, the exact number varies per class. + +## Applications + +The ImageNette dataset is widely used for training and evaluating deep learning models in image classification tasks, such as Convolutional Neural Networks (CNNs), and various other machine learning algorithms. The dataset's straightforward format and well-chosen classes make it a handy resource for both beginner and experienced practitioners in the field of machine learning and computer vision. + +## Usage + +To train a model on the ImageNette dataset for 100 epochs with a standard image size of 224x224, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. + +!!! example "Train Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) + + # Train the model + model.train(data='imagenette', epochs=100, imgsz=224) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model + yolo detect train data=imagenette model=yolov8n-cls.pt epochs=100 imgsz=224 + ``` + +## Sample Images and Annotations + +The ImageNette dataset contains colored images of various objects and scenes, providing a diverse dataset for image classification tasks. Here are some examples of images from the dataset: + +![Dataset sample image](https://docs.fast.ai/22_tutorial.imagenette_files/figure-html/cell-21-output-1.png) + +The example showcases the variety and complexity of the images in the ImageNette dataset, highlighting the importance of a diverse dataset for training robust image classification models. + +## ImageNette160 and ImageNette320 + +For faster prototyping and training, the ImageNette dataset is also available in two reduced sizes: ImageNette160 and ImageNette320. These datasets maintain the same classes and structure as the full ImageNette dataset, but the images are resized to a smaller dimension. As such, these versions of the dataset are particularly useful for preliminary model testing, or when computational resources are limited. + +To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imagenette320' in the training command. The following code snippets illustrate this: + +!!! example "Train Example with ImageNette160" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) + + # Train the model with ImageNette160 + model.train(data='imagenette160', epochs=100, imgsz=160) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model with ImageNette160 + yolo detect train data=imagenette160 model=yolov8n-cls.pt epochs=100 imgsz=160 + ``` + +!!! example "Train Example with ImageNette320" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) + + # Train the model with ImageNette320 + model.train(data='imagenette320', epochs=100, imgsz=320) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model with ImageNette320 + yolo detect train data=imagenette320 model=yolov8n-cls.pt epochs=100 imgsz=320 + ``` + +These smaller versions of the dataset allow for rapid iterations during the development process while still providing valuable and realistic image classification tasks. + +## Citations and Acknowledgments + +If you use the ImageNette dataset in your research or development work, please acknowledge it appropriately. For more information about the ImageNette dataset, visit the [ImageNette dataset GitHub page](https://github.com/fastai/imagenette). \ No newline at end of file diff --git a/docs/datasets/classify/imagewoof.md b/docs/datasets/classify/imagewoof.md index 19480d2..63d0f54 100644 --- a/docs/datasets/classify/imagewoof.md +++ b/docs/datasets/classify/imagewoof.md @@ -1,7 +1,83 @@ --- comments: true +description: Learn about the ImageWoof dataset, a subset of the ImageNet consisting of 10 challenging-to-classify dog breed classes. --- -# 🚧 Page Under Construction βš’ +# ImageWoof Dataset -This page is currently under construction!️ πŸ‘·Please check back later for updates. πŸ˜ƒπŸ”œ +The [ImageWoof](https://github.com/fastai/imagenette) dataset is a subset of the ImageNet consisting of 10 classes that are challenging to classify, since they're all dog breeds. It was created as a more difficult task for image classification algorithms to solve, aiming at encouraging development of more advanced models. + +## Key Features + +- ImageWoof contains images of 10 different dog breeds: Australian terrier, Border terrier, Samoyed, Beagle, Shih-Tzu, English foxhound, Rhodesian ridgeback, Dingo, Golden retriever, and Old English sheepdog. +- The dataset provides images at various resolutions (full size, 320px, 160px), accommodating for different computational capabilities and research needs. +- It also includes a version with noisy labels, providing a more realistic scenario where labels might not always be reliable. + +## Dataset Structure + +The ImageWoof dataset structure is based on the dog breed classes, with each breed having its own directory of images. + +## Applications + +The ImageWoof dataset is widely used for training and evaluating deep learning models in image classification tasks, especially when it comes to more complex and similar classes. The dataset's challenge lies in the subtle differences between the dog breeds, pushing the limits of model's performance and generalization. + +## Usage + +To train a CNN model on the ImageWoof dataset for 100 epochs with an image size of 224x224, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. + +!!! example "Train Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) + + # Train the model + model.train(data='imagewoof', epochs=100, imgsz=224) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model + yolo detect train data=imagewoof model=yolov8n-cls.pt epochs=100 imgsz=224 + ``` + +## Dataset Variants + +ImageWoof dataset comes in three different sizes to accommodate various research needs and computational capabilities: + +1. **Full Size (imagewoof)**: This is the original version of the ImageWoof dataset. It contains full-sized images and is ideal for final training and performance benchmarking. + +2. **Medium Size (imagewoof320)**: This version contains images resized to have a maximum edge length of 320 pixels. It's suitable for faster training without significantly sacrificing model performance. + +3. **Small Size (imagewoof160)**: This version contains images resized to have a maximum edge length of 160 pixels. It's designed for rapid prototyping and experimentation where training speed is a priority. + +To use these variants in your training, simply replace 'imagewoof' in the dataset argument with 'imagewoof320' or 'imagewoof160'. For example: + +```python +# For medium-sized dataset +model.train(data='imagewoof320', epochs=100, imgsz=224) + +# For small-sized dataset +model.train(data='imagewoof160', epochs=100, imgsz=224) +``` + +It's important to note that using smaller images will likely yield lower performance in terms of classification accuracy. However, it's an excellent way to iterate quickly in the early stages of model development and prototyping. + +## Sample Images and Annotations + +The ImageWoof dataset contains colorful images of various dog breeds, providing a challenging dataset for image classification tasks. Here are some examples of images from the dataset: + +![Dataset sample image](https://user-images.githubusercontent.com/26833433/239357533-ec833254-4351-491b-8cb3-59578ea5d0b2.png) + +The example showcases the subtle differences and similarities among the different dog breeds in the ImageWoof dataset, highlighting the complexity and difficulty of the classification task. + +## Citations and Acknowledgments + +If you use the ImageWoof dataset in your research or development work, please make sure to acknowledge the creators of the dataset by linking to the [official dataset repository](https://github.com/fastai/imagenette). As of my knowledge cutoff in September 2021, there is no official publication specifically about ImageWoof for citation. + +We would like to acknowledge the FastAI team for creating and maintaining the ImageWoof dataset as a valuable resource for the machine learning and computer vision research community. For more information about the ImageWoof dataset, visit the [ImageWoof dataset repository](https://github.com/fastai/imagenette). \ No newline at end of file diff --git a/docs/datasets/classify/mnist.md b/docs/datasets/classify/mnist.md index c19708a..5f99583 100644 --- a/docs/datasets/classify/mnist.md +++ b/docs/datasets/classify/mnist.md @@ -51,7 +51,7 @@ To train a CNN model on the MNIST dataset for 100 epochs with an image size of 3 ```bash # Start training from a pretrained *.pt model - cnn detect train data=MNIST.yaml model=cnn_mnist.pt epochs=100 imgsz=28 + cnn detect train data=mnist model=yolov8n-cls.pt epochs=100 imgsz=28 ``` ## Sample Images and Annotations diff --git a/docs/datasets/pose/coco.md b/docs/datasets/pose/coco.md index 19480d2..772b327 100644 --- a/docs/datasets/pose/coco.md +++ b/docs/datasets/pose/coco.md @@ -1,7 +1,90 @@ --- comments: true +description: Learn about the COCO-Pose dataset, designed to encourage research on pose estimation tasks with standardized evaluation metrics. --- -# 🚧 Page Under Construction βš’ +# COCO-Pose Dataset -This page is currently under construction!️ πŸ‘·Please check back later for updates. πŸ˜ƒπŸ”œ +The [COCO-Pose](https://cocodataset.org/#keypoints-2017) dataset is a specialized version of the COCO (Common Objects in Context) dataset, designed for pose estimation tasks. It leverages the COCO Keypoints 2017 images and labels to enable the training of models like YOLO for pose estimation tasks. + +![Pose sample image](https://user-images.githubusercontent.com/26833433/239691398-d62692dc-713e-4207-9908-2f6710050e5c.jpg) + +## Key Features + +- COCO-Pose builds upon the COCO Keypoints 2017 dataset which contains 200K images labeled with keypoints for pose estimation tasks. +- The dataset supports 17 keypoints for human figures, facilitating detailed pose estimation. +- Like COCO, it provides standardized evaluation metrics, including Object Keypoint Similarity (OKS) for pose estimation tasks, making it suitable for comparing model performance. + +## Dataset Structure + +The COCO-Pose dataset is split into three subsets: + +1. **Train2017**: This subset contains a portion of the 118K images from the COCO dataset, annotated for training pose estimation models. +2. **Val2017**: This subset has a selection of images used for validation purposes during model training. +3. **Test2017**: This subset consists of images used for testing and benchmarking the trained models. Ground truth annotations for this subset are not publicly available, and the results are submitted to the [COCO evaluation server](https://competitions.codalab.org/competitions/5181) for performance evaluation. + +## Applications + +The COCO-Pose dataset is specifically used for training and evaluating deep learning models in keypoint detection and pose estimation tasks, such as OpenPose. The dataset's large number of annotated images and standardized evaluation metrics make it an essential resource for computer vision researchers and practitioners focused on pose estimation. + +## Dataset YAML + +A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the COCO-Pose dataset, the `coco-pose.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco-pose.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco-pose.yaml). + +!!! example "ultralytics/datasets/coco-pose.yaml" + + ```yaml + --8<-- "ultralytics/datasets/coco-pose.yaml" + ``` + +## Usage + +To train a YOLOv8n-pose model on the COCO-Pose dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. + +!!! example "Train Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training) + + # Train the model + model.train(data='coco-pose.yaml', epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model + yolo detect train data=coco-pose.yaml model=yolov8n.pt epochs=100 imgsz=640 + ``` + +## Sample Images and Annotations + +The COCO-Pose dataset contains a diverse set of images with human figures annotated with keypoints. Here are some examples of images from the dataset, along with their corresponding annotations: + +![Dataset sample image](https://user-images.githubusercontent.com/26833433/239690150-a9dc0bd0-7ad9-4b78-a30f-189ed727ea0e.jpg) + +- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This helps improve the model's ability to generalize to different object sizes, aspect ratios, and contexts. + +The example showcases the variety and complexity of the images in the COCO-Pose dataset and the benefits of using mosaicing during the training process. + +## Citations and Acknowledgments + +If you use the COCO-Pose dataset in your research or development work, please cite the following paper: + +```bibtex +@misc{lin2015microsoft, + title={Microsoft COCO: Common Objects in Context}, + author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr DollΓ‘r}, + year={2015}, + eprint={1405.0312}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO-Pose dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home). \ No newline at end of file diff --git a/docs/datasets/pose/coco8-pose.md b/docs/datasets/pose/coco8-pose.md index cff6067..6540f7d 100644 --- a/docs/datasets/pose/coco8-pose.md +++ b/docs/datasets/pose/coco8-pose.md @@ -28,7 +28,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n model on the COCO8-Pose dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -38,7 +38,7 @@ To train a YOLOv8n model on the COCO8-Pose dataset for 100 epochs with an image from ultralytics import YOLO # Load a model - model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) + model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training) # Train the model model.train(data='coco8-pose.yaml', epochs=100, imgsz=640) diff --git a/docs/datasets/segment/coco.md b/docs/datasets/segment/coco.md index 19480d2..44d2e57 100644 --- a/docs/datasets/segment/coco.md +++ b/docs/datasets/segment/coco.md @@ -1,7 +1,89 @@ --- comments: true +description: Learn about the COCO-Seg dataset, designed for simple training of YOLO models on instance segmentation tasks. --- -# 🚧 Page Under Construction βš’ +# COCO-Seg Dataset -This page is currently under construction!️ πŸ‘·Please check back later for updates. πŸ˜ƒπŸ”œ +The [COCO-Seg](https://cocodataset.org/#home) dataset, an extension of the COCO (Common Objects in Context) dataset, is specially designed to aid research in object instance segmentation. It uses the same images as COCO but introduces more detailed segmentation annotations. This dataset is a crucial resource for researchers and developers working on instance segmentation tasks, especially for training YOLO models. + +## Key Features + +- COCO-Seg retains the original 330K images from COCO. +- The dataset consists of the same 80 object categories found in the original COCO dataset. +- Annotations now include more detailed instance segmentation masks for each object in the images. +- COCO-Seg provides standardized evaluation metrics like mean Average Precision (mAP) for object detection, and mean Average Recall (mAR) for instance segmentation tasks, enabling effective comparison of model performance. + +## Dataset Structure + +The COCO-Seg dataset is partitioned into three subsets: + +1. **Train2017**: This subset contains 118K images for training instance segmentation models. +2. **Val2017**: This subset includes 5K images used for validation purposes during model training. +3. **Test2017**: This subset encompasses 20K images used for testing and benchmarking the trained models. Ground truth annotations for this subset are not publicly available, and the results are submitted to the [COCO evaluation server](https://competitions.codalab.org/competitions/5181) for performance evaluation. + +## Applications + +COCO-Seg is widely used for training and evaluating deep learning models in instance segmentation, such as the YOLO models. The large number of annotated images, the diversity of object categories, and the standardized evaluation metrics make it an indispensable resource for computer vision researchers and practitioners. + +## Dataset YAML + +A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the COCO-Seg dataset, the `coco.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco.yaml). + +!!! example "ultralytics/datasets/coco.yaml" + + ```yaml + --8<-- "ultralytics/datasets/coco.yaml" + ``` + +## Usage + +To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. + +!!! example "Train Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training) + + # Train the model + model.train(data='coco-seg.yaml', epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model + yolo detect train data=coco-seg.yaml model=yolov8n.pt epochs=100 imgsz=640 + ``` + +## Sample Images and Annotations + +COCO-Seg, like its predecessor COCO, contains a diverse set of images with various object categories and complex scenes. However, COCO-Seg introduces more detailed instance segmentation masks for each object in the images. Here are some examples of images from the dataset, along with their corresponding instance segmentation masks: + +![Dataset sample image](https://user-images.githubusercontent.com/26833433/239690696-93fa8765-47a2-4b34-a6e5-516d0d1c725b.jpg) + +- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This aids the model's ability to generalize to different object sizes, aspect ratios, and contexts. + +The example showcases the variety and complexity of the images in the COCO-Seg dataset and the benefits of using mosaicing during the training process. + +## Citations and Acknowledgments + +If you use the COCO-Seg dataset in your research or development work, please cite the original COCO paper and acknowledge the extension to COCO-Seg: + +```bibtex +@misc{lin2015microsoft, + title={Microsoft COCO: Common Objects in Context}, + author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr DollΓ‘r}, + year={2015}, + eprint={1405.0312}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +We extend our thanks to the COCO Consortium for creating and maintaining this invaluable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home). \ No newline at end of file diff --git a/docs/datasets/segment/coco8-seg.md b/docs/datasets/segment/coco8-seg.md index 2ff85a7..fd737fa 100644 --- a/docs/datasets/segment/coco8-seg.md +++ b/docs/datasets/segment/coco8-seg.md @@ -28,7 +28,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n model on the COCO8-Seg dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLOv8n-seg model on the COCO8-Seg dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -38,7 +38,7 @@ To train a YOLOv8n model on the COCO8-Seg dataset for 100 epochs with an image s from ultralytics import YOLO # Load a model - model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) + model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training) # Train the model model.train(data='coco8-seg.yaml', epochs=100, imgsz=640) diff --git a/docs/help/CI.md b/docs/help/CI.md new file mode 100644 index 0000000..45bf774 --- /dev/null +++ b/docs/help/CI.md @@ -0,0 +1,34 @@ +--- +comments: true +description: Understand all the Continuous Integration (CI) tests for Ultralytics repositories and see their statuses in a clear, concise table. +--- + +# Continuous Integration (CI) + +Continuous Integration (CI) is an essential aspect of software development which involves integrating changes and testing them automatically. CI allows us to maintain high-quality code by catching issues early and often in the development process. At Ultralytics, we use various CI tests to ensure the quality and integrity of our codebase. + +Here's a brief description of our CI tests: + +- **CI:** This is our primary CI test that involves running unit tests, linting checks, and sometimes more comprehensive tests depending on the repository. +- **Docker Deployment:** This test checks the deployment of the project using Docker to ensure the Dockerfile and related scripts are working correctly. +- **Broken Links:** This test scans the codebase for any broken or dead links in our markdown or HTML files. +- **CodeQL:** CodeQL is a tool from GitHub that performs semantic analysis on our code, helping to find potential security vulnerabilities and maintain high-quality code. +- **PyPi Publishing:** This test checks if the project can be packaged and published to PyPi without any errors. + +Below is the table showing the status of these CI tests for our main repositories: + +| Repository | CI | Docker Deployment | Broken Links | CodeQL | PyPi and Docs Publishing | +|-----------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [yolov3](https://github.com/ultralytics/yolov3) | [![YOLOv3 CI](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml) | [![Publish Docker Images](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml) | [![Check Broken links](https://github.com/ultralytics/yolov3/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/yolov3/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/codeql-analysis.yml) | | +| [yolov5](https://github.com/ultralytics/yolov5) | [![YOLOv5 CI](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml) | [![Publish Docker Images](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml) | [![Check Broken links](https://github.com/ultralytics/yolov5/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/yolov5/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/codeql-analysis.yml) | | +| [ultralytics](https://github.com/ultralytics/ultralytics) | [![ultralytics CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml) | [![Publish Docker Images](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml) | [![Check Broken links](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml) | [![Publish to PyPI and Deploy Docs](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml) | +| [hub](https://github.com/ultralytics/hub) | [![HUB CI](https://github.com/ultralytics/hub/actions/workflows/ci.yaml/badge.svg)](https://github.com/ultralytics/hub/actions/workflows/ci.yaml) | | [![Check Broken links](https://github.com/ultralytics/hub/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/hub/actions/workflows/links.yml) | | | +| [docs](https://github.com/ultralytics/docs) | | | | | [![pages-build-deployment](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment) | + +Each badge shows the status of the last run of the corresponding CI test on the `main` branch of the respective repository. If a test fails, the badge will display a "failing" status, and if it passes, it will display a "passing" status. + +If you notice a test failing, it would be a great help if you could report it through a GitHub issue in the respective repository. + +Remember, a successful CI test does not mean that everything is perfect. It is always recommended to manually review the code before deployment or merging changes. + +Happy coding! diff --git a/docs/help/index.md b/docs/help/index.md index 25e3ebc..529b6ee 100644 --- a/docs/help/index.md +++ b/docs/help/index.md @@ -7,6 +7,7 @@ Welcome to the Ultralytics Help page! We are committed to providing you with com - [Frequently Asked Questions (FAQ)](FAQ.md): Find answers to common questions and issues faced by users and contributors of Ultralytics YOLO repositories. - [Contributing Guide](contributing.md): Learn the best practices for submitting pull requests, reporting bugs, and contributing to the development of our repositories. +- [Continuous Integration (CI) Guide](CI.md): Understand the CI tests we perform for each Ultralytics repository and see their current statuses. - [Contributor License Agreement (CLA)](CLA.md): Familiarize yourself with our CLA to understand the terms and conditions for contributing to Ultralytics projects. - [Minimum Reproducible Example (MRE) Guide](minimum_reproducible_example.md): Understand how to create an MRE when submitting bug reports to ensure that our team can quickly and efficiently address the issue. - [Code of Conduct](code_of_conduct.md): Learn about our community guidelines and expectations to ensure a welcoming and inclusive environment for all participants. diff --git a/docs/hub/inference_api.md b/docs/hub/inference_api.md index b69b13a..4bc8f99 100644 --- a/docs/hub/inference_api.md +++ b/docs/hub/inference_api.md @@ -1,11 +1,8 @@ --- comments: true +description: Explore Ultralytics YOLOv8 Inference API for efficient object detection. Check out our Python and CLI examples to streamline your image analysis. --- -# 🚧 Page Under Construction βš’ - -This page is currently under construction!️ πŸ‘·Please check back later for updates. πŸ˜ƒπŸ”œ - # YOLO Inference API The YOLO Inference API allows you to access the YOLOv8 object detection capabilities via a RESTful API. This enables you to run object detection on images without the need to install and set up the YOLOv8 environment locally. diff --git a/docs/models/yolov3.md b/docs/models/yolov3.md index 7b7f76d..0f02445 100644 --- a/docs/models/yolov3.md +++ b/docs/models/yolov3.md @@ -1,7 +1,77 @@ --- comments: true +description: YOLOv3, YOLOv3-Ultralytics and YOLOv3u by Ultralytics explained. Learn the evolution of these models and their specifications. --- -# 🚧Page Under Construction βš’ +# YOLOv3, YOLOv3-Ultralytics, and YOLOv3u -This page is currently under construction!οΈπŸ‘·Please check back later for updates. πŸ˜ƒπŸ”œ +## Overview + +This document presents an overview of three closely related object detection models, namely [YOLOv3](https://pjreddie.com/darknet/yolo/), [YOLOv3-Ultralytics](https://github.com/ultralytics/yolov3), and [YOLOv3u](https://github.com/ultralytics/ultralytics). + +1. **YOLOv3:** This is the third version of the You Only Look Once (YOLO) object detection algorithm. Originally developed by Joseph Redmon, YOLOv3 improved on its predecessors by introducing features such as multiscale predictions and three different sizes of detection kernels. + +2. **YOLOv3-Ultralytics:** This is Ultralytics' implementation of the YOLOv3 model. It reproduces the original YOLOv3 architecture and offers additional functionalities, such as support for more pre-trained models and easier customization options. + +3. **YOLOv3u:** This is an updated version of YOLOv3-Ultralytics that incorporates the anchor-free, objectness-free split head used in YOLOv8 models. YOLOv3u maintains the same backbone and neck architecture as YOLOv3 but with the updated detection head from YOLOv8. + +## Key Features + +- **YOLOv3:** Introduced the use of three different scales for detection, leveraging three different sizes of detection kernels: 13x13, 26x26, and 52x52. This significantly improved detection accuracy for objects of different sizes. Additionally, YOLOv3 added features such as multi-label predictions for each bounding box and a better feature extractor network. + +- **YOLOv3-Ultralytics:** Ultralytics' implementation of YOLOv3 provides the same performance as the original model but comes with added support for more pre-trained models, additional training methods, and easier customization options. This makes it more versatile and user-friendly for practical applications. + +- **YOLOv3u:** This updated model incorporates the anchor-free, objectness-free split head from YOLOv8. By eliminating the need for pre-defined anchor boxes and objectness scores, this detection head design can improve the model's ability to detect objects of varying sizes and shapes. This makes YOLOv3u more robust and accurate for object detection tasks. + +## Supported Tasks + +YOLOv3, YOLOv3-Ultralytics, and YOLOv3u all support the following tasks: + +- Object Detection + +## Supported Modes + +All three models support the following modes: + +- Inference +- Validation +- Training +- Export + +## Performance + +Below is a comparison of the performance of the three models. The performance is measured in terms of the Mean Average Precision (mAP) on the COCO dataset: + +TODO + +## Usage + +You can use these models for object detection tasks using the Ultralytics YOLOv3 repository. The following is a sample code snippet showing how to use the YOLOv3u model for inference: + +```python +from ultralytics import YOLO + +# Load the model +model = YOLO('yolov3.pt') # load a pretrained model + +# Perform inference +results = model('image.jpg') + +# Print the results +results.print() +``` + +## Citations and Acknowledgments + +If you use YOLOv3 in your research, please cite the original YOLO papers and the Ultralytics YOLOv3 repository: + +```bibtex +@article{redmon2018yolov3, + title={YOLOv3: An Incremental Improvement}, + author={Redmon, Joseph and Farhadi, Ali}, + journal={arXiv preprint arXiv:1804.02767}, + year={2018} +} +``` + +Thank you to Joseph Redmon and Ali Farhadi for developing the original YOLOv3. \ No newline at end of file diff --git a/docs/models/yolov5.md b/docs/models/yolov5.md index 00a0a4b..1f40631 100644 --- a/docs/models/yolov5.md +++ b/docs/models/yolov5.md @@ -1,19 +1,21 @@ --- comments: true -description: Detect objects faster and more accurately using Ultralytics YOLOv5u. Find pre-trained models for each task, including Inference, Validation and Training. +description: YOLOv5u by Ultralytics explained. Discover the evolution of this model and its key specifications. Experience faster and more accurate object detection. --- # YOLOv5u ## Overview -YOLOv5u is an updated version of YOLOv5 that incorporates the anchor-free split Ultralytics head used in the YOLOv8 models. It retains the same backbone and neck architecture as YOLOv5 but offers improved accuracy-speed tradeoff for object detection tasks. +YOLOv5u is an enhanced version of the [YOLOv5](https://github.com/ultralytics/yolov5) object detection model from Ultralytics. This iteration incorporates the anchor-free, objectness-free split head that is featured in the [YOLOv8](./yolov8.md) models. Although it maintains the same backbone and neck architecture as YOLOv5, YOLOv5u provides an improved accuracy-speed tradeoff for object detection tasks, making it a robust choice for numerous applications. ## Key Features -- **Anchor-free Split Ultralytics Head:** YOLOv5u replaces the traditional anchor-based detection head with an anchor-free split Ultralytics head, resulting in improved performance. -- **Optimized Accuracy-Speed Tradeoff:** The updated model offers a better balance between accuracy and speed, making it more suitable for a wider range of applications. -- **Variety of Pre-trained Models:** YOLOv5u offers a range of pre-trained models tailored for various tasks, including Inference, Validation, and Training. +- **Anchor-free Split Ultralytics Head:** YOLOv5u replaces the conventional anchor-based detection head with an anchor-free split Ultralytics head, boosting performance in object detection tasks. + +- **Optimized Accuracy-Speed Tradeoff:** By delivering a better balance between accuracy and speed, YOLOv5u is suitable for a diverse range of real-time applications, from autonomous driving to video surveillance. + +- **Variety of Pre-trained Models:** YOLOv5u includes numerous pre-trained models for tasks like Inference, Validation, and Training, providing the flexibility to tackle various object detection challenges. ## Supported Tasks @@ -45,4 +47,40 @@ YOLOv5u is an updated version of YOLOv5 that incorporates the anchor-free split | [YOLOv5s6u](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5s6u.pt) | 1280 | 48.6 | - | - | 15.3 | 24.6 | | [YOLOv5m6u](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5m6u.pt) | 1280 | 53.6 | - | - | 41.2 | 65.7 | | [YOLOv5l6u](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5l6u.pt) | 1280 | 55.7 | - | - | 86.1 | 137.4 | - | [YOLOv5x6u](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5x6u.pt) | 1280 | 56.8 | - | - | 155.4 | 250.7 | \ No newline at end of file + | [YOLOv5x6u](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5x6u.pt) | 1280 | 56.8 | - | - | 155.4 | 250.7 | + +## Usage + +You can use YOLOv5u for object detection tasks using the Ultralytics repository. The following is a sample code snippet showing how to use YOLOv5u model for inference: + +```python +from ultralytics import YOLO + +# Load the model +model = YOLO('yolov5n.pt') # load a pretrained model + +# Perform inference +results = model('image.jpg') + +# Print the results +results.print() +``` + +## Citations and Acknowledgments + +If you use YOLOv5 or YOLOv5u in your research, please cite the Ultralytics YOLOv5 repository as follows: + +```bibtex +@software{yolov5, + title = {YOLOv5 by Ultralytics}, + author = {Glenn Jocher}, + year = {2020}, + version = {7.0}, + license = {AGPL-3.0}, + url = {https://github.com/ultralytics/yolov5}, + doi = {10.5281/zenodo.3908559}, + orcid = {0000-0001-5950-6979} +} +``` + +Special thanks to Glenn Jocher and the Ultralytics team for their work on developing and maintaining the YOLOv5 and YOLOv5u models. \ No newline at end of file diff --git a/docs/models/yolov8.md b/docs/models/yolov8.md index 53df59c..159ed56 100644 --- a/docs/models/yolov8.md +++ b/docs/models/yolov8.md @@ -74,4 +74,41 @@ YOLOv8 is the latest iteration in the YOLO series of real-time object detectors, | [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | | [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | | [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | - | [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | \ No newline at end of file + | [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + + +## Usage + +You can use YOLOv8 for object detection tasks using the Ultralytics pip package. The following is a sample code snippet showing how to use YOLOv8 models for inference: + +```python +from ultralytics import YOLO + +# Load the model +model = YOLO('yolov8n.pt') # load a pretrained model + +# Perform inference +results = model('image.jpg') + +# Print the results +results.print() +``` + + +## Citation + +If you use the YOLOv8 model or any other software from this repository in your work, please cite it using the following format: + +```bibtex +@software{yolov8_ultralytics, + author = {Glenn Jocher and Ayush Chaurasia and Jing Qiu}, + title = {YOLO by Ultralytics}, + version = {8.0.0}, + year = {2023}, + url = {https://github.com/ultralytics/ultralytics}, + orcid = {0000-0001-5950-6979, 0000-0002-7603-6750, 0000-0003-3783-7069}, + license = {AGPL-3.0} +} +``` + +Please note that the DOI is pending and will be added to the citation once it is available. The usage of the software is in accordance with the AGPL-3.0 license. \ No newline at end of file diff --git a/docs/tasks/classify.md b/docs/tasks/classify.md index 8411e2b..fbdd96b 100644 --- a/docs/tasks/classify.md +++ b/docs/tasks/classify.md @@ -76,21 +76,7 @@ see the [Configuration](../usage/cfg.md) page. ### Dataset format -The YOLO classification dataset format is same as the torchvision format. Each class of images has its own folder and you have to simply pass the path of the dataset folder, i.e, `yolo classify train data="path/to/dataset"` - -``` -dataset/ -β”œβ”€β”€ train/ -β”œβ”€β”€β”€β”€ class1/ -β”œβ”€β”€β”€β”€ class2/ -β”œβ”€β”€β”€β”€ class3/ -β”œβ”€β”€β”€β”€ ... -β”œβ”€β”€ val/ -β”œβ”€β”€β”€β”€ class1/ -β”œβ”€β”€β”€β”€ class2/ -β”œβ”€β”€β”€β”€ class3/ -β”œβ”€β”€β”€β”€ ... -``` +YOLO classification dataset format can be found in detail in the [Dataset Guide](../datasets/classify/index.md). ## Val @@ -190,4 +176,4 @@ i.e. `yolo predict model=yolov8n-cls.onnx`. Usage examples are shown for your mo | [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | βœ… | `imgsz` | | [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | βœ… | `imgsz` | -See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. \ No newline at end of file +See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. diff --git a/docs/tasks/detect.md b/docs/tasks/detect.md index a3e5728..44a18c7 100644 --- a/docs/tasks/detect.md +++ b/docs/tasks/detect.md @@ -67,7 +67,7 @@ Train YOLOv8n on the COCO128 dataset for 100 epochs at image size 640. For a ful ### Dataset format -YOLO detection dataset format can be found in detail in the [Dataset Guide](../yolov5/tutorials/train_custom_data.md). To convert your existing dataset from other formats( like COCO, VOC etc.) to YOLO format, please use [json2yolo tool](https://github.com/ultralytics/JSON2YOLO) by Ultralytics. +YOLO detection dataset format can be found in detail in the [Dataset Guide](../datasets/detect/index.md). To convert your existing dataset from other formats( like COCO etc.) to YOLO format, please use [json2yolo tool](https://github.com/ultralytics/JSON2YOLO) by Ultralytics. ## Val @@ -167,4 +167,4 @@ Available YOLOv8 export formats are in the table below. You can predict or valid | [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | βœ… | `imgsz` | | [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | βœ… | `imgsz` | -See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. \ No newline at end of file +See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. diff --git a/docs/tasks/pose.md b/docs/tasks/pose.md index 6c43d6c..65148fe 100644 --- a/docs/tasks/pose.md +++ b/docs/tasks/pose.md @@ -8,7 +8,7 @@ to as keypoints. The keypoints can represent various parts of the object such as features. The locations of the keypoints are usually represented as a set of 2D `[x, y]` or 3D `[x, y, visible]` coordinates. - + The output of a pose estimation model is a set of points that represent the keypoints on an object in the image, usually along with the confidence scores for each point. Pose estimation is a good choice when you need to identify specific @@ -76,6 +76,10 @@ Train a YOLOv8-pose model on the COCO128-pose dataset. yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640 ``` +### Dataset format + +YOLO pose dataset format can be found in detail in the [Dataset Guide](../datasets/pose/index.md). To convert your existing dataset from other formats( like COCO etc.) to YOLO format, please use [json2yolo tool](https://github.com/ultralytics/JSON2YOLO) by Ultralytics. + ## Val Validate trained YOLOv8n-pose model accuracy on the COCO128-pose dataset. No argument need to passed as the `model` @@ -177,4 +181,4 @@ i.e. `yolo predict model=yolov8n-pose.onnx`. Usage examples are shown for your m | [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-pose_web_model/` | βœ… | `imgsz` | | [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-pose_paddle_model/` | βœ… | `imgsz` | -See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. \ No newline at end of file +See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. diff --git a/docs/tasks/segment.md b/docs/tasks/segment.md index 0b9d4d2..b12f83e 100644 --- a/docs/tasks/segment.md +++ b/docs/tasks/segment.md @@ -75,11 +75,7 @@ arguments see the [Configuration](../usage/cfg.md) page. ### Dataset format -YOLO segmentation dataset label format extends detection format with segment points. - -`cls x1 y1 x2 y2 p1 p2 ... pn` - -To convert your existing dataset from other formats( like COCO, VOC etc.) to YOLO format, please use [json2yolo tool](https://github.com/ultralytics/JSON2YOLO) by Ultralytics. +YOLO segmentation dataset format can be found in detail in the [Dataset Guide](../datasets/segment/index.md). To convert your existing dataset from other formats( like COCO etc.) to YOLO format, please use [json2yolo tool](https://github.com/ultralytics/JSON2YOLO) by Ultralytics. ## Val @@ -185,4 +181,4 @@ i.e. `yolo predict model=yolov8n-seg.onnx`. Usage examples are shown for your mo | [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | βœ… | `imgsz` | | [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | βœ… | `imgsz` | -See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. \ No newline at end of file +See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. diff --git a/docs/yolov5/tutorials/train_custom_data.md b/docs/yolov5/tutorials/train_custom_data.md index 4b0febc..b018a78 100644 --- a/docs/yolov5/tutorials/train_custom_data.md +++ b/docs/yolov5/tutorials/train_custom_data.md @@ -29,7 +29,7 @@ Creating a custom model to detect your objects is an iterative process of collec YOLOv5 models must be trained on labelled data in order to learn classes of objects in that data. There are two options for creating your dataset before you start training: -
+
Use Roboflow to create your dataset in YOLO format ### 1.1 Collect Images diff --git a/mkdocs.yml b/mkdocs.yml index f68fafd..45195e7 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -335,6 +335,7 @@ nav: - Help: help/index.md - Frequently Asked Questions (FAQ): help/FAQ.md - Contributing Guide: help/contributing.md + - Continuous Integration (CI) Guide: help/CI.md - Contributor License Agreement (CLA): help/CLA.md - Minimum Reproducible Example (MRE) Guide: help/minimum_reproducible_example.md - Code of Conduct: help/code_of_conduct.md diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index fed200b..51e4bbd 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO πŸš€, AGPL-3.0 license -__version__ = '8.0.105' +__version__ = '8.0.106' from ultralytics.hub import start from ultralytics.vit.rtdetr import RTDETR diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py index 26b2342..f9145f0 100644 --- a/ultralytics/nn/modules/head.py +++ b/ultralytics/nn/modules/head.py @@ -127,7 +127,7 @@ class Pose(Detect): y = kpts.view(bs, *self.kpt_shape, -1) a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides if ndim == 3: - a = torch.cat((a, y[:, :, 1:2].sigmoid()), 2) + a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2) return a.view(bs, self.nk, -1) else: y = kpts.clone() diff --git a/ultralytics/vit/rtdetr/predict.py b/ultralytics/vit/rtdetr/predict.py index ee47b37..78219b2 100644 --- a/ultralytics/vit/rtdetr/predict.py +++ b/ultralytics/vit/rtdetr/predict.py @@ -17,9 +17,11 @@ class RTDETRPredictor(BasePredictor): results = [] for i, bbox in enumerate(bboxes): # (300, 4) bbox = ops.xywh2xyxy(bbox) - score, cls = scores[i].max(-1) # (300, ) - idx = score > self.args.conf - pred = torch.cat([bbox, score[..., None], cls[..., None]], dim=-1)[idx] # filter + score, cls = scores[i].max(-1, keepdim=True) # (300, 1) + idx = score.squeeze(-1) > self.args.conf # (300, ) + if self.args.classes is not None: + idx = (cls == torch.tensor(self.args.classes, device=cls.device)).any(1) & idx + pred = torch.cat([bbox, score, cls], dim=-1)[idx] # filter orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs oh, ow = orig_img.shape[:2] if not isinstance(orig_imgs, torch.Tensor): diff --git a/ultralytics/yolo/data/dataset.py b/ultralytics/yolo/data/dataset.py index dbfc182..bc0cf7c 100644 --- a/ultralytics/yolo/data/dataset.py +++ b/ultralytics/yolo/data/dataset.py @@ -206,8 +206,6 @@ class ClassificationDataset(torchvision.datasets.ImageFolder): Args: root (str): Dataset path. - transform (callable, optional): torchvision transforms, used by default. - album_transform (callable, optional): Albumentations transforms, used if installed. Attributes: cache_ram (bool): True if images should be cached in RAM, False otherwise. diff --git a/ultralytics/yolo/engine/trainer.py b/ultralytics/yolo/engine/trainer.py index 3757a86..d9eecbc 100644 --- a/ultralytics/yolo/engine/trainer.py +++ b/ultralytics/yolo/engine/trainer.py @@ -414,12 +414,18 @@ class BaseTrainer: 'date': datetime.now().isoformat(), 'version': __version__} + # Use dill (if exists) to serialize the lambda functions where pickle does not do this + try: + import dill as pickle + except ImportError: + import pickle + # Save last, best and delete - torch.save(ckpt, self.last) + torch.save(ckpt, self.last, pickle_module=pickle) if self.best_fitness == self.fitness: - torch.save(ckpt, self.best) + torch.save(ckpt, self.best, pickle_module=pickle) if (self.epoch > 0) and (self.save_period > 0) and (self.epoch % self.save_period == 0): - torch.save(ckpt, self.wdir / f'epoch{self.epoch}.pt') + torch.save(ckpt, self.wdir / f'epoch{self.epoch}.pt', pickle_module=pickle) del ckpt @staticmethod diff --git a/ultralytics/yolo/utils/__init__.py b/ultralytics/yolo/utils/__init__.py index 96010aa..98a4e05 100644 --- a/ultralytics/yolo/utils/__init__.py +++ b/ultralytics/yolo/utils/__init__.py @@ -754,25 +754,9 @@ ENVIRONMENT = 'Colab' if is_colab() else 'Kaggle' if is_kaggle() else 'Jupyter' TESTS_RUNNING = is_pytest_running() or is_github_actions_ci() set_sentry() -# OpenCV Multilanguage-friendly functions ------------------------------------------------------------------------------ -imshow_ = cv2.imshow # copy to avoid recursion errors - - -def imread(filename, flags=cv2.IMREAD_COLOR): - return cv2.imdecode(np.fromfile(filename, np.uint8), flags) - - -def imwrite(filename, img): - try: - cv2.imencode(Path(filename).suffix, img)[1].tofile(filename) - return True - except Exception: - return False - - -def imshow(path, im): - imshow_(path.encode('unicode_escape').decode(), im) - +# Apply monkey patches if the script is being run from within the parent directory of the script's location +from .patches import imread, imshow, imwrite +# torch.save = torch_save if Path(inspect.stack()[0].filename).parent.parent.as_posix() in inspect.stack()[-1].filename: - cv2.imread, cv2.imwrite, cv2.imshow = imread, imwrite, imshow # redefine + cv2.imread, cv2.imwrite, cv2.imshow = imread, imwrite, imshow diff --git a/ultralytics/yolo/utils/ops.py b/ultralytics/yolo/utils/ops.py index 17dbd4a..b85ea84 100644 --- a/ultralytics/yolo/utils/ops.py +++ b/ultralytics/yolo/utils/ops.py @@ -1,3 +1,5 @@ +# Ultralytics YOLO πŸš€, AGPL-3.0 license + import contextlib import math import re diff --git a/ultralytics/yolo/utils/patches.py b/ultralytics/yolo/utils/patches.py new file mode 100644 index 0000000..2b023b9 --- /dev/null +++ b/ultralytics/yolo/utils/patches.py @@ -0,0 +1,45 @@ +# Ultralytics YOLO πŸš€, AGPL-3.0 license +""" +Monkey patches to update/extend functionality of existing functions +""" + +from pathlib import Path + +import cv2 +import numpy as np +import torch + +# OpenCV Multilanguage-friendly functions ------------------------------------------------------------------------------ +_imshow = cv2.imshow # copy to avoid recursion errors + + +def imread(filename, flags=cv2.IMREAD_COLOR): + return cv2.imdecode(np.fromfile(filename, np.uint8), flags) + + +def imwrite(filename, img): + try: + cv2.imencode(Path(filename).suffix, img)[1].tofile(filename) + return True + except Exception: + return False + + +def imshow(path, im): + _imshow(path.encode('unicode_escape').decode(), im) + + +# PyTorch functions ---------------------------------------------------------------------------------------------------- +_torch_save = torch.save # copy to avoid recursion errors + + +def torch_save(*args, **kwargs): + # Use dill (if exists) to serialize the lambda functions where pickle does not do this + try: + import dill as pickle + except ImportError: + import pickle + + if 'pickle_module' not in kwargs: + kwargs['pickle_module'] = pickle + return _torch_save(*args, **kwargs) diff --git a/ultralytics/yolo/utils/torch_utils.py b/ultralytics/yolo/utils/torch_utils.py index a519936..0c69dc3 100644 --- a/ultralytics/yolo/utils/torch_utils.py +++ b/ultralytics/yolo/utils/torch_utils.py @@ -341,6 +341,12 @@ def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None: for f in Path('/Users/glennjocher/Downloads/weights').rglob('*.pt'): strip_optimizer(f) """ + # Use dill (if exists) to serialize the lambda functions where pickle does not do this + try: + import dill as pickle + except ImportError: + import pickle + x = torch.load(f, map_location=torch.device('cpu')) args = {**DEFAULT_CFG_DICT, **x['train_args']} # combine model args with default args, preferring model args if x.get('ema'): @@ -353,7 +359,7 @@ def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None: p.requires_grad = False x['train_args'] = {k: v for k, v in args.items() if k in DEFAULT_CFG_KEYS} # strip non-default keys # x['model'].args = x['train_args'] - torch.save(x, s or f) + torch.save(x, s or f, pickle_module=pickle) mb = os.path.getsize(s or f) / 1E6 # filesize LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB") diff --git a/ultralytics/yolo/utils/tuner.py b/ultralytics/yolo/utils/tuner.py index 0de0708..54e1b01 100644 --- a/ultralytics/yolo/utils/tuner.py +++ b/ultralytics/yolo/utils/tuner.py @@ -1,3 +1,5 @@ +# Ultralytics YOLO πŸš€, AGPL-3.0 license + from ultralytics.yolo.utils import LOGGER try: diff --git a/ultralytics/yolo/v8/detect/val.py b/ultralytics/yolo/v8/detect/val.py index 37e742a..7759a43 100644 --- a/ultralytics/yolo/v8/detect/val.py +++ b/ultralytics/yolo/v8/detect/val.py @@ -62,14 +62,13 @@ class DetectionValidator(BaseValidator): def postprocess(self, preds): """Apply Non-maximum suppression to prediction outputs.""" - preds = ops.non_max_suppression(preds, - self.args.conf, - self.args.iou, - labels=self.lb, - multi_label=True, - agnostic=self.args.single_cls, - max_det=self.args.max_det) - return preds + return ops.non_max_suppression(preds, + self.args.conf, + self.args.iou, + labels=self.lb, + multi_label=True, + agnostic=self.args.single_cls, + max_det=self.args.max_det) def update_metrics(self, preds, batch): """Metrics.""" diff --git a/ultralytics/yolo/v8/pose/val.py b/ultralytics/yolo/v8/pose/val.py index 7514d2f..a062727 100644 --- a/ultralytics/yolo/v8/pose/val.py +++ b/ultralytics/yolo/v8/pose/val.py @@ -33,15 +33,14 @@ class PoseValidator(DetectionValidator): def postprocess(self, preds): """Apply non-maximum suppression and return detections with high confidence scores.""" - preds = ops.non_max_suppression(preds, - self.args.conf, - self.args.iou, - labels=self.lb, - multi_label=True, - agnostic=self.args.single_cls, - max_det=self.args.max_det, - nc=self.nc) - return preds + return ops.non_max_suppression(preds, + self.args.conf, + self.args.iou, + labels=self.lb, + multi_label=True, + agnostic=self.args.single_cls, + max_det=self.args.max_det, + nc=self.nc) def init_metrics(self, model): """Initiate pose estimation metrics for YOLO model."""