From 1e5702a5b5a05809bfcb873a556bbcb8e49bbb05 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sat, 17 Jun 2023 16:44:41 +0200 Subject: [PATCH] Add Ultralytics ViT Docs (#3230) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/hub/datasets.md | 2 +- docs/reference/vit/rtdetr/model.md | 9 ++ docs/reference/vit/rtdetr/predict.md | 9 ++ docs/reference/vit/rtdetr/train.md | 14 +++ docs/reference/vit/rtdetr/val.md | 14 +++ docs/reference/vit/sam/amg.md | 89 +++++++++++++++++++ docs/reference/vit/sam/autosize.md | 9 ++ docs/reference/vit/sam/build.md | 29 ++++++ docs/reference/vit/sam/model.md | 9 ++ docs/reference/vit/sam/modules/decoders.md | 9 ++ docs/reference/vit/sam/modules/encoders.md | 54 +++++++++++ .../vit/sam/modules/mask_generator.md | 9 ++ .../vit/sam/modules/prompt_predictor.md | 9 ++ docs/reference/vit/sam/modules/sam.md | 9 ++ docs/reference/vit/sam/modules/transformer.md | 19 ++++ docs/reference/vit/sam/predict.md | 9 ++ docs/reference/vit/utils/loss.md | 14 +++ docs/reference/vit/utils/ops.md | 19 ++++ mkdocs.yml | 22 +++++ ultralytics/vit/rtdetr/model.py | 2 +- ultralytics/vit/rtdetr/train.py | 2 + ultralytics/vit/sam/__init__.py | 2 + ultralytics/vit/sam/amg.py | 2 + ultralytics/vit/sam/autosize.py | 2 + ultralytics/vit/sam/build.py | 2 + ultralytics/vit/sam/model.py | 5 +- ultralytics/vit/sam/modules/__init__.py | 1 + ultralytics/vit/sam/modules/decoders.py | 2 + ultralytics/vit/sam/modules/encoders.py | 2 + ultralytics/vit/sam/modules/mask_generator.py | 2 + .../vit/sam/modules/prompt_predictor.py | 2 + ultralytics/vit/sam/modules/sam.py | 2 + ultralytics/vit/sam/modules/transformer.py | 2 + ultralytics/vit/sam/predict.py | 2 + ultralytics/vit/utils/__init__.py | 1 + ultralytics/vit/utils/loss.py | 5 +- ultralytics/vit/utils/ops.py | 88 ++++++++++++------ 37 files changed, 451 insertions(+), 32 deletions(-) create mode 100644 docs/reference/vit/rtdetr/model.md create mode 100644 docs/reference/vit/rtdetr/predict.md create mode 100644 docs/reference/vit/rtdetr/train.md create mode 100644 docs/reference/vit/rtdetr/val.md create mode 100644 docs/reference/vit/sam/amg.md create mode 100644 docs/reference/vit/sam/autosize.md create mode 100644 docs/reference/vit/sam/build.md create mode 100644 docs/reference/vit/sam/model.md create mode 100644 docs/reference/vit/sam/modules/decoders.md create mode 100644 docs/reference/vit/sam/modules/encoders.md create mode 100644 docs/reference/vit/sam/modules/mask_generator.md create mode 100644 docs/reference/vit/sam/modules/prompt_predictor.md create mode 100644 docs/reference/vit/sam/modules/sam.md create mode 100644 docs/reference/vit/sam/modules/transformer.md create mode 100644 docs/reference/vit/sam/predict.md create mode 100644 docs/reference/vit/utils/loss.md create mode 100644 docs/reference/vit/utils/ops.md create mode 100644 ultralytics/vit/utils/__init__.py diff --git a/docs/hub/datasets.md b/docs/hub/datasets.md index 0a984a8..d09f77b 100644 --- a/docs/hub/datasets.md +++ b/docs/hub/datasets.md @@ -1,6 +1,6 @@ --- comments: true -description: Upload custom datasets to Ultralytics HUB for YOLOv5 and YOLOv8 models. Follow YAML structure, zip and upload. Scan & train new models. +description: Efficiently manage and use custom datasets on Ultralytics HUB for streamlined training with YOLOv5 and YOLOv8 models. keywords: Ultralytics, HUB, Datasets, Upload, Visualize, Train, Custom Data, YAML, YOLOv5, YOLOv8 --- diff --git a/docs/reference/vit/rtdetr/model.md b/docs/reference/vit/rtdetr/model.md new file mode 100644 index 0000000..c979186 --- /dev/null +++ b/docs/reference/vit/rtdetr/model.md @@ -0,0 +1,9 @@ +--- +description: Learn about the RTDETR model in Ultralytics YOLO Docs and how it can be used for object detection with improved speed and accuracy. Find implementation details and more. +keywords: RTDETR, Ultralytics, YOLO, object detection, speed, accuracy, implementation details +--- + +## RTDETR +--- +### ::: ultralytics.vit.rtdetr.model.RTDETR +

\ No newline at end of file diff --git a/docs/reference/vit/rtdetr/predict.md b/docs/reference/vit/rtdetr/predict.md new file mode 100644 index 0000000..c5b5420 --- /dev/null +++ b/docs/reference/vit/rtdetr/predict.md @@ -0,0 +1,9 @@ +--- +description: Learn about the RTDETRPredictor class and how to use it for vision transformer object detection with Ultralytics YOLO. +keywords: RTDETRPredictor, object detection, vision transformer, Ultralytics YOLO +--- + +## RTDETRPredictor +--- +### ::: ultralytics.vit.rtdetr.predict.RTDETRPredictor +

\ No newline at end of file diff --git a/docs/reference/vit/rtdetr/train.md b/docs/reference/vit/rtdetr/train.md new file mode 100644 index 0000000..b7bb384 --- /dev/null +++ b/docs/reference/vit/rtdetr/train.md @@ -0,0 +1,14 @@ +--- +description: Learn how to use RTDETRTrainer from Ultralytics YOLO Docs. Train object detection models with the latest VIT-based RTDETR system. +keywords: RTDETRTrainer, Ultralytics YOLO Docs, object detection, VIT-based RTDETR system, train +--- + +## RTDETRTrainer +--- +### ::: ultralytics.vit.rtdetr.train.RTDETRTrainer +

+ +## train +--- +### ::: ultralytics.vit.rtdetr.train.train +

\ No newline at end of file diff --git a/docs/reference/vit/rtdetr/val.md b/docs/reference/vit/rtdetr/val.md new file mode 100644 index 0000000..43c1898 --- /dev/null +++ b/docs/reference/vit/rtdetr/val.md @@ -0,0 +1,14 @@ +--- +description: Documentation for RTDETRValidator data validation tool in Ultralytics RTDETRDataset. +keywords: RTDETRDataset, RTDETRValidator, data validation, documentation +--- + +## RTDETRDataset +--- +### ::: ultralytics.vit.rtdetr.val.RTDETRDataset +

+ +## RTDETRValidator +--- +### ::: ultralytics.vit.rtdetr.val.RTDETRValidator +

\ No newline at end of file diff --git a/docs/reference/vit/sam/amg.md b/docs/reference/vit/sam/amg.md new file mode 100644 index 0000000..82c66e8 --- /dev/null +++ b/docs/reference/vit/sam/amg.md @@ -0,0 +1,89 @@ +--- +description: Explore and learn about functions in Ultralytics MaskData library such as mask_to_rle_pytorch, area_from_rle, generate_crop_boxes, and more. +keywords: Ultralytics, SAM, MaskData, mask_to_rle_pytorch, area_from_rle, generate_crop_boxes, batched_mask_to_box, documentation +--- + +## MaskData +--- +### ::: ultralytics.vit.sam.amg.MaskData +

+ +## is_box_near_crop_edge +--- +### ::: ultralytics.vit.sam.amg.is_box_near_crop_edge +

+ +## box_xyxy_to_xywh +--- +### ::: ultralytics.vit.sam.amg.box_xyxy_to_xywh +

+ +## batch_iterator +--- +### ::: ultralytics.vit.sam.amg.batch_iterator +

+ +## mask_to_rle_pytorch +--- +### ::: ultralytics.vit.sam.amg.mask_to_rle_pytorch +

+ +## rle_to_mask +--- +### ::: ultralytics.vit.sam.amg.rle_to_mask +

+ +## area_from_rle +--- +### ::: ultralytics.vit.sam.amg.area_from_rle +

+ +## calculate_stability_score +--- +### ::: ultralytics.vit.sam.amg.calculate_stability_score +

+ +## build_point_grid +--- +### ::: ultralytics.vit.sam.amg.build_point_grid +

+ +## build_all_layer_point_grids +--- +### ::: ultralytics.vit.sam.amg.build_all_layer_point_grids +

+ +## generate_crop_boxes +--- +### ::: ultralytics.vit.sam.amg.generate_crop_boxes +

+ +## uncrop_boxes_xyxy +--- +### ::: ultralytics.vit.sam.amg.uncrop_boxes_xyxy +

+ +## uncrop_points +--- +### ::: ultralytics.vit.sam.amg.uncrop_points +

+ +## uncrop_masks +--- +### ::: ultralytics.vit.sam.amg.uncrop_masks +

+ +## remove_small_regions +--- +### ::: ultralytics.vit.sam.amg.remove_small_regions +

+ +## coco_encode_rle +--- +### ::: ultralytics.vit.sam.amg.coco_encode_rle +

+ +## batched_mask_to_box +--- +### ::: ultralytics.vit.sam.amg.batched_mask_to_box +

\ No newline at end of file diff --git a/docs/reference/vit/sam/autosize.md b/docs/reference/vit/sam/autosize.md new file mode 100644 index 0000000..cbb0ca7 --- /dev/null +++ b/docs/reference/vit/sam/autosize.md @@ -0,0 +1,9 @@ +--- +description: Learn how to use the ResizeLongestSide module in Ultralytics YOLO for automatic image resizing. Resize your images with ease. +keywords: ResizeLongestSide, Ultralytics YOLO, automatic image resizing, image resizing +--- + +## ResizeLongestSide +--- +### ::: ultralytics.vit.sam.autosize.ResizeLongestSide +

\ No newline at end of file diff --git a/docs/reference/vit/sam/build.md b/docs/reference/vit/sam/build.md new file mode 100644 index 0000000..faa26ee --- /dev/null +++ b/docs/reference/vit/sam/build.md @@ -0,0 +1,29 @@ +--- +description: Learn how to build SAM and VIT models with Ultralytics YOLO Docs. Enhance your understanding of computer vision models today!. +keywords: SAM, VIT, computer vision models, build SAM models, build VIT models, Ultralytics YOLO Docs +--- + +## build_sam_vit_h +--- +### ::: ultralytics.vit.sam.build.build_sam_vit_h +

+ +## build_sam_vit_l +--- +### ::: ultralytics.vit.sam.build.build_sam_vit_l +

+ +## build_sam_vit_b +--- +### ::: ultralytics.vit.sam.build.build_sam_vit_b +

+ +## _build_sam +--- +### ::: ultralytics.vit.sam.build._build_sam +

+ +## build_sam +--- +### ::: ultralytics.vit.sam.build.build_sam +

\ No newline at end of file diff --git a/docs/reference/vit/sam/model.md b/docs/reference/vit/sam/model.md new file mode 100644 index 0000000..7d924d4 --- /dev/null +++ b/docs/reference/vit/sam/model.md @@ -0,0 +1,9 @@ +--- +description: Learn about the Ultralytics VIT SAM model for object detection and how it can help streamline your computer vision workflow. Check out the documentation for implementation details and examples. +keywords: Ultralytics, VIT, SAM, object detection, computer vision, deep learning, implementation, examples +--- + +## SAM +--- +### ::: ultralytics.vit.sam.model.SAM +

\ No newline at end of file diff --git a/docs/reference/vit/sam/modules/decoders.md b/docs/reference/vit/sam/modules/decoders.md new file mode 100644 index 0000000..e89ca9d --- /dev/null +++ b/docs/reference/vit/sam/modules/decoders.md @@ -0,0 +1,9 @@ +## MaskDecoder +--- +### ::: ultralytics.vit.sam.modules.decoders.MaskDecoder +

+ +## MLP +--- +### ::: ultralytics.vit.sam.modules.decoders.MLP +

diff --git a/docs/reference/vit/sam/modules/encoders.md b/docs/reference/vit/sam/modules/encoders.md new file mode 100644 index 0000000..8c338bc --- /dev/null +++ b/docs/reference/vit/sam/modules/encoders.md @@ -0,0 +1,54 @@ +--- +description: Learn about Ultralytics ViT encoder, position embeddings, attention, window partition, and more in our comprehensive documentation. +keywords: Ultralytics YOLO, ViT Encoder, Position Embeddings, Attention, Window Partition, Rel Pos Encoding +--- + +## ImageEncoderViT +--- +### ::: ultralytics.vit.sam.modules.encoders.ImageEncoderViT +

+ +## PromptEncoder +--- +### ::: ultralytics.vit.sam.modules.encoders.PromptEncoder +

+ +## PositionEmbeddingRandom +--- +### ::: ultralytics.vit.sam.modules.encoders.PositionEmbeddingRandom +

+ +## Block +--- +### ::: ultralytics.vit.sam.modules.encoders.Block +

+ +## Attention +--- +### ::: ultralytics.vit.sam.modules.encoders.Attention +

+ +## PatchEmbed +--- +### ::: ultralytics.vit.sam.modules.encoders.PatchEmbed +

+ +## window_partition +--- +### ::: ultralytics.vit.sam.modules.encoders.window_partition +

+ +## window_unpartition +--- +### ::: ultralytics.vit.sam.modules.encoders.window_unpartition +

+ +## get_rel_pos +--- +### ::: ultralytics.vit.sam.modules.encoders.get_rel_pos +

+ +## add_decomposed_rel_pos +--- +### ::: ultralytics.vit.sam.modules.encoders.add_decomposed_rel_pos +

\ No newline at end of file diff --git a/docs/reference/vit/sam/modules/mask_generator.md b/docs/reference/vit/sam/modules/mask_generator.md new file mode 100644 index 0000000..e2e1251 --- /dev/null +++ b/docs/reference/vit/sam/modules/mask_generator.md @@ -0,0 +1,9 @@ +--- +description: Learn about the SamAutomaticMaskGenerator module in Ultralytics YOLO, an automatic mask generator for image segmentation. +keywords: SamAutomaticMaskGenerator, Ultralytics YOLO, automatic mask generator, image segmentation +--- + +## SamAutomaticMaskGenerator +--- +### ::: ultralytics.vit.sam.modules.mask_generator.SamAutomaticMaskGenerator +

\ No newline at end of file diff --git a/docs/reference/vit/sam/modules/prompt_predictor.md b/docs/reference/vit/sam/modules/prompt_predictor.md new file mode 100644 index 0000000..f7e3b37 --- /dev/null +++ b/docs/reference/vit/sam/modules/prompt_predictor.md @@ -0,0 +1,9 @@ +--- +description: Learn about PromptPredictor - a module in Ultralytics VIT SAM that predicts image captions based on prompts. Get started today!. +keywords: PromptPredictor, Ultralytics, YOLO, VIT SAM, image captioning, deep learning, computer vision +--- + +## PromptPredictor +--- +### ::: ultralytics.vit.sam.modules.prompt_predictor.PromptPredictor +

\ No newline at end of file diff --git a/docs/reference/vit/sam/modules/sam.md b/docs/reference/vit/sam/modules/sam.md new file mode 100644 index 0000000..acd467b --- /dev/null +++ b/docs/reference/vit/sam/modules/sam.md @@ -0,0 +1,9 @@ +--- +description: Explore the Sam module in Ultralytics VIT, a PyTorch-based vision library, and learn how to improve your image classification and segmentation tasks. +keywords: Ultralytics VIT, Sam module, PyTorch vision library, image classification, segmentation tasks +--- + +## Sam +--- +### ::: ultralytics.vit.sam.modules.sam.Sam +

\ No newline at end of file diff --git a/docs/reference/vit/sam/modules/transformer.md b/docs/reference/vit/sam/modules/transformer.md new file mode 100644 index 0000000..994b984 --- /dev/null +++ b/docs/reference/vit/sam/modules/transformer.md @@ -0,0 +1,19 @@ +--- +description: Explore the Attention and TwoWayTransformer modules in Ultralytics YOLO documentation. Learn how to integrate them in your project efficiently. +keywords: Ultralytics YOLO, Attention module, TwoWayTransformer module, Object Detection, Deep Learning +--- + +## TwoWayTransformer +--- +### ::: ultralytics.vit.sam.modules.transformer.TwoWayTransformer +

+ +## TwoWayAttentionBlock +--- +### ::: ultralytics.vit.sam.modules.transformer.TwoWayAttentionBlock +

+ +## Attention +--- +### ::: ultralytics.vit.sam.modules.transformer.Attention +

\ No newline at end of file diff --git a/docs/reference/vit/sam/predict.md b/docs/reference/vit/sam/predict.md new file mode 100644 index 0000000..836d91e --- /dev/null +++ b/docs/reference/vit/sam/predict.md @@ -0,0 +1,9 @@ +--- +description: The VIT SAM Predictor from Ultralytics provides object detection capabilities for YOLO. Learn how to use it and speed up your object detection models. +keywords: Ultralytics, VIT SAM Predictor, object detection, YOLO +--- + +## Predictor +--- +### ::: ultralytics.vit.sam.predict.Predictor +

\ No newline at end of file diff --git a/docs/reference/vit/utils/loss.md b/docs/reference/vit/utils/loss.md new file mode 100644 index 0000000..3eb366e --- /dev/null +++ b/docs/reference/vit/utils/loss.md @@ -0,0 +1,14 @@ +--- +description: DETRLoss is a method for optimizing detection of objects in images. Learn how to use it in RTDETRDetectionLoss at Ultralytics Docs. +keywords: DETRLoss, RTDETRDetectionLoss, Ultralytics, object detection, image classification, computer vision +--- + +## DETRLoss +--- +### ::: ultralytics.vit.utils.loss.DETRLoss +

+ +## RTDETRDetectionLoss +--- +### ::: ultralytics.vit.utils.loss.RTDETRDetectionLoss +

\ No newline at end of file diff --git a/docs/reference/vit/utils/ops.md b/docs/reference/vit/utils/ops.md new file mode 100644 index 0000000..f4b7c81 --- /dev/null +++ b/docs/reference/vit/utils/ops.md @@ -0,0 +1,19 @@ +--- +description: Learn about HungarianMatcher and inverse_sigmoid functions in the Ultralytics YOLO Docs. Improve your object detection skills today!. +keywords: Ultralytics, YOLO, object detection, HungarianMatcher, inverse_sigmoid +--- + +## HungarianMatcher +--- +### ::: ultralytics.vit.utils.ops.HungarianMatcher +

+ +## get_cdn_group +--- +### ::: ultralytics.vit.utils.ops.get_cdn_group +

+ +## inverse_sigmoid +--- +### ::: ultralytics.vit.utils.ops.inverse_sigmoid +

\ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index b6930aa..146da12 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -273,6 +273,28 @@ nav: - gmc: reference/tracker/utils/gmc.md - kalman_filter: reference/tracker/utils/kalman_filter.md - matching: reference/tracker/utils/matching.md + - vit: + - rtdetr: + - model: reference/vit/rtdetr/model.md + - predict: reference/vit/rtdetr/predict.md + - train: reference/vit/rtdetr/train.md + - val: reference/vit/rtdetr/val.md + - sam: + - amg: reference/vit/sam/amg.md + - autosize: reference/vit/sam/autosize.md + - build: reference/vit/sam/build.md + - model: reference/vit/sam/model.md + - modules: + - decoders: reference/vit/sam/modules/decoders.md + - encoders: reference/vit/sam/modules/encoders.md + - mask_generator: reference/vit/sam/modules/mask_generator.md + - prompt_predictor: reference/vit/sam/modules/prompt_predictor.md + - sam: reference/vit/sam/modules/sam.md + - transformer: reference/vit/sam/modules/transformer.md + - predict: reference/vit/sam/predict.md + - utils: + - loss: reference/vit/utils/loss.md + - ops: reference/vit/utils/ops.md - yolo: - cfg: - __init__: reference/yolo/cfg/__init__.md diff --git a/ultralytics/vit/rtdetr/model.py b/ultralytics/vit/rtdetr/model.py index 322912c..5142056 100644 --- a/ultralytics/vit/rtdetr/model.py +++ b/ultralytics/vit/rtdetr/model.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license """ -# RT-DETR model interface +RT-DETR model interface """ from pathlib import Path diff --git a/ultralytics/vit/rtdetr/train.py b/ultralytics/vit/rtdetr/train.py index 5a29589..54eeaf4 100644 --- a/ultralytics/vit/rtdetr/train.py +++ b/ultralytics/vit/rtdetr/train.py @@ -1,3 +1,5 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + from copy import copy import torch diff --git a/ultralytics/vit/sam/__init__.py b/ultralytics/vit/sam/__init__.py index 64d8d05..b47c043 100644 --- a/ultralytics/vit/sam/__init__.py +++ b/ultralytics/vit/sam/__init__.py @@ -1,3 +1,5 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + from .build import build_sam # noqa from .model import SAM # noqa from .modules.prompt_predictor import PromptPredictor # noqa diff --git a/ultralytics/vit/sam/amg.py b/ultralytics/vit/sam/amg.py index 1522931..29f0bcf 100644 --- a/ultralytics/vit/sam/amg.py +++ b/ultralytics/vit/sam/amg.py @@ -1,3 +1,5 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + import math from copy import deepcopy from itertools import product diff --git a/ultralytics/vit/sam/autosize.py b/ultralytics/vit/sam/autosize.py index d0a298c..ef33644 100644 --- a/ultralytics/vit/sam/autosize.py +++ b/ultralytics/vit/sam/autosize.py @@ -1,3 +1,5 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. diff --git a/ultralytics/vit/sam/build.py b/ultralytics/vit/sam/build.py index 67127ba..b2e0986 100644 --- a/ultralytics/vit/sam/build.py +++ b/ultralytics/vit/sam/build.py @@ -1,3 +1,5 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. diff --git a/ultralytics/vit/sam/model.py b/ultralytics/vit/sam/model.py index a60c8cf..420d6a6 100644 --- a/ultralytics/vit/sam/model.py +++ b/ultralytics/vit/sam/model.py @@ -1,4 +1,7 @@ -# SAM model interface +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +SAM model interface +""" from ultralytics.yolo.cfg import get_cfg diff --git a/ultralytics/vit/sam/modules/__init__.py b/ultralytics/vit/sam/modules/__init__.py index e69de29..9e68dc1 100644 --- a/ultralytics/vit/sam/modules/__init__.py +++ b/ultralytics/vit/sam/modules/__init__.py @@ -0,0 +1 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license diff --git a/ultralytics/vit/sam/modules/decoders.py b/ultralytics/vit/sam/modules/decoders.py index 743dcb4..43a2932 100644 --- a/ultralytics/vit/sam/modules/decoders.py +++ b/ultralytics/vit/sam/modules/decoders.py @@ -1,3 +1,5 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + from typing import List, Tuple, Type import torch diff --git a/ultralytics/vit/sam/modules/encoders.py b/ultralytics/vit/sam/modules/encoders.py index 52be592..0da032d 100644 --- a/ultralytics/vit/sam/modules/encoders.py +++ b/ultralytics/vit/sam/modules/encoders.py @@ -1,3 +1,5 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + from typing import Any, Optional, Tuple, Type import numpy as np diff --git a/ultralytics/vit/sam/modules/mask_generator.py b/ultralytics/vit/sam/modules/mask_generator.py index 6157534..8c1e00e 100644 --- a/ultralytics/vit/sam/modules/mask_generator.py +++ b/ultralytics/vit/sam/modules/mask_generator.py @@ -1,3 +1,5 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. diff --git a/ultralytics/vit/sam/modules/prompt_predictor.py b/ultralytics/vit/sam/modules/prompt_predictor.py index bf61186..bf89893 100644 --- a/ultralytics/vit/sam/modules/prompt_predictor.py +++ b/ultralytics/vit/sam/modules/prompt_predictor.py @@ -1,3 +1,5 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + from typing import Optional, Tuple import numpy as np diff --git a/ultralytics/vit/sam/modules/sam.py b/ultralytics/vit/sam/modules/sam.py index 34963f1..49f4bfc 100644 --- a/ultralytics/vit/sam/modules/sam.py +++ b/ultralytics/vit/sam/modules/sam.py @@ -1,3 +1,5 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. diff --git a/ultralytics/vit/sam/modules/transformer.py b/ultralytics/vit/sam/modules/transformer.py index 3f32b94..d5275bf 100644 --- a/ultralytics/vit/sam/modules/transformer.py +++ b/ultralytics/vit/sam/modules/transformer.py @@ -1,3 +1,5 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + import math from typing import Tuple, Type diff --git a/ultralytics/vit/sam/predict.py b/ultralytics/vit/sam/predict.py index 5bbccac..063955d 100644 --- a/ultralytics/vit/sam/predict.py +++ b/ultralytics/vit/sam/predict.py @@ -1,3 +1,5 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + import numpy as np import torch diff --git a/ultralytics/vit/utils/__init__.py b/ultralytics/vit/utils/__init__.py new file mode 100644 index 0000000..9e68dc1 --- /dev/null +++ b/ultralytics/vit/utils/__init__.py @@ -0,0 +1 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license diff --git a/ultralytics/vit/utils/loss.py b/ultralytics/vit/utils/loss.py index 1a5ba29..6ba24c2 100644 --- a/ultralytics/vit/utils/loss.py +++ b/ultralytics/vit/utils/loss.py @@ -1,3 +1,5 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + import torch import torch.nn as nn import torch.nn.functional as F @@ -18,11 +20,12 @@ class DETRLoss(nn.Module): use_uni_match=False, uni_match_ind=0): """ + DETR loss function. + Args: nc (int): The number of classes. loss_gain (dict): The coefficient of loss. aux_loss (bool): If 'aux_loss = True', loss at each decoder layer are to be used. - use_focal_loss (bool): Use focal loss or not. use_vfl (bool): Use VarifocalLoss or not. use_uni_match (bool): Whether to use a fixed layer to assign labels for auxiliary branch. uni_match_ind (int): The fixed indices of a layer. diff --git a/ultralytics/vit/utils/ops.py b/ultralytics/vit/utils/ops.py index 5b92963..164c874 100644 --- a/ultralytics/vit/utils/ops.py +++ b/ultralytics/vit/utils/ops.py @@ -1,4 +1,4 @@ -# TODO: license +# Ultralytics YOLO 🚀, AGPL-3.0 license import torch import torch.nn as nn @@ -10,12 +10,31 @@ from ultralytics.yolo.utils.ops import xywh2xyxy, xyxy2xywh class HungarianMatcher(nn.Module): + """ + A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in + an end-to-end fashion. + + HungarianMatcher performs optimal assignment over predicted and ground truth bounding boxes using a cost function + that considers classification scores, bounding box coordinates, and optionally, mask predictions. + + Attributes: + cost_gain (dict): Dictionary of cost coefficients for different components: 'class', 'bbox', 'giou', 'mask', and 'dice'. + use_fl (bool): Indicates whether to use Focal Loss for the classification cost calculation. + with_mask (bool): Indicates whether the model makes mask predictions. + num_sample_points (int): The number of sample points used in mask cost calculation. + alpha (float): The alpha factor in Focal Loss calculation. + gamma (float): The gamma factor in Focal Loss calculation. + + Methods: + forward(pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=None, gt_mask=None): Computes the assignment + between predictions and ground truths for a batch. + _cost_mask(bs, num_gts, masks=None, gt_mask=None): Computes the mask cost and dice cost if masks are predicted. + """ + + class HungarianMatcher(nn.Module): + ... def __init__(self, cost_gain=None, use_fl=True, with_mask=False, num_sample_points=12544, alpha=0.25, gamma=2.0): - """ - Args: - matcher_coeff (dict): The coefficient of hungarian matcher cost. - """ super().__init__() if cost_gain is None: cost_gain = {'class': 1, 'bbox': 5, 'giou': 2, 'mask': 1, 'dice': 1} @@ -28,22 +47,30 @@ class HungarianMatcher(nn.Module): def forward(self, pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=None, gt_mask=None): """ + Forward pass for HungarianMatcher. This function computes costs based on prediction and ground truth + (classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching + between predictions and ground truth based on these costs. + Args: - pred_bboxes (Tensor): [b, query, 4] - pred_scores (Tensor): [b, query, num_classes] - gt_cls (torch.Tensor) with shape [num_gts, ] - gt_bboxes (torch.Tensor): [num_gts, 4] - gt_groups (List(int)): a list of batch size length includes the number of gts of each image. - masks (Tensor|None): [b, query, h, w] - gt_mask (List(Tensor)): list[[n, H, W]] + pred_bboxes (Tensor): Predicted bounding boxes with shape [batch_size, num_queries, 4]. + pred_scores (Tensor): Predicted scores with shape [batch_size, num_queries, num_classes]. + gt_cls (torch.Tensor): Ground truth classes with shape [num_gts, ]. + gt_bboxes (torch.Tensor): Ground truth bounding boxes with shape [num_gts, 4]. + gt_groups (List[int]): List of length equal to batch size, containing the number of ground truths for + each image. + masks (Tensor, optional): Predicted masks with shape [batch_size, num_queries, height, width]. + Defaults to None. + gt_mask (List[Tensor], optional): List of ground truth masks, each with shape [num_masks, Height, Width]. + Defaults to None. Returns: - A list of size batch_size, containing tuples of (index_i, index_j) where: - - index_i is the indices of the selected predictions (in order) - - index_j is the indices of the corresponding selected targets (in order) - For each batch element, it holds: - len(index_i) = len(index_j) = min(num_queries, num_target_boxes) + (List[Tuple[Tensor, Tensor]]): A list of size batch_size, each element is a tuple (index_i, index_j), where: + - index_i is the tensor of indices of the selected predictions (in order) + - index_j is the tensor of indices of the corresponding selected ground truth targets (in order) + For each batch element, it holds: + len(index_i) = len(index_j) = min(num_queries, num_target_boxes) """ + bs, nq, nc = pred_scores.shape if sum(gt_groups) == 0: @@ -124,24 +151,29 @@ def get_cdn_group(batch, cls_noise_ratio=0.5, box_noise_scale=1.0, training=False): - """Get contrastive denoising training group + """ + Get contrastive denoising training group. This function creates a contrastive denoising training group with + positive and negative samples from the ground truths (gt). It applies noise to the class labels and bounding + box coordinates, and returns the modified labels, bounding boxes, attention mask and meta information. Args: - batch (dict): A dict includes: - gt_cls (torch.Tensor) with shape [num_gts, ], - gt_bboxes (torch.Tensor): [num_gts, 4], - gt_groups (List(int)): a list of batch size length includes the number of gts of each image. + batch (dict): A dict that includes 'gt_cls' (torch.Tensor with shape [num_gts, ]), 'gt_bboxes' + (torch.Tensor with shape [num_gts, 4]), 'gt_groups' (List(int)) which is a list of batch size length + indicating the number of gts of each image. num_classes (int): Number of classes. num_queries (int): Number of queries. - class_embed (torch.Tensor): Embedding weights to map cls to embedding space. - num_dn (int): Number of denoising. - cls_noise_ratio (float): Noise ratio for class. - box_noise_scale (float): Noise scale for bbox. - training (bool): If it's training or not. + class_embed (torch.Tensor): Embedding weights to map class labels to embedding space. + num_dn (int, optional): Number of denoising. Defaults to 100. + cls_noise_ratio (float, optional): Noise ratio for class labels. Defaults to 0.5. + box_noise_scale (float, optional): Noise scale for bounding box coordinates. Defaults to 1.0. + training (bool, optional): If it's in training mode. Defaults to False. Returns: - + (Tuple[Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Dict]]): The modified class embeddings, + bounding boxes, attention mask and meta information for denoising. If not in training mode or 'num_dn' + is less than or equal to 0, the function returns None for all elements in the tuple. """ + if (not training) or num_dn <= 0: return None, None, None, None gt_groups = batch['gt_groups']