diff --git a/docs/models/fast-sam.md b/docs/models/fast-sam.md index 4e022ac..79d9f49 100644 --- a/docs/models/fast-sam.md +++ b/docs/models/fast-sam.md @@ -40,42 +40,45 @@ The FastSAM models are easy to integrate into your Python applications. Ultralyt To perform object detection on an image, use the `predict` method as shown below: -```python -from ultralytics import FastSAM -from ultralytics.models.fastsam import FastSAMPrompt - -# Define image path and inference device -IMAGE_PATH = 'ultralytics/assets/bus.jpg' -DEVICE = 'cpu' - -# Create a FastSAM model -model = FastSAM('FastSAM-s.pt') # or FastSAM-x.pt - -# Run inference on an image -everything_results = model(IMAGE_PATH, - device=DEVICE, - retina_masks=True, - imgsz=1024, - conf=0.4, - iou=0.9) - -prompt_process = FastSAMPrompt(IMAGE_PATH, everything_results, device=DEVICE) - -# Everything prompt -ann = prompt_process.everything_prompt() - -# Bbox default shape [0,0,0,0] -> [x1,y1,x2,y2] -ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300]) - -# Text prompt -ann = prompt_process.text_prompt(text='a photo of a dog') - -# Point prompt -# points default [[0,0]] [[x1,y1],[x2,y2]] -# point_label default [0] [1,0] 0:background, 1:foreground -ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1]) -prompt_process.plot(annotations=ann, output='./') -``` +!!! example "" + + === "Python" + ```python + from ultralytics import FastSAM + from ultralytics.models.fastsam import FastSAMPrompt + + # Define image path and inference device + IMAGE_PATH = 'ultralytics/assets/bus.jpg' + DEVICE = 'cpu' + + # Create a FastSAM model + model = FastSAM('FastSAM-s.pt') # or FastSAM-x.pt + + # Run inference on an image + everything_results = model(IMAGE_PATH, + device=DEVICE, + retina_masks=True, + imgsz=1024, + conf=0.4, + iou=0.9) + + prompt_process = FastSAMPrompt(IMAGE_PATH, everything_results, device=DEVICE) + + # Everything prompt + ann = prompt_process.everything_prompt() + + # Bbox default shape [0,0,0,0] -> [x1,y1,x2,y2] + ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300]) + + # Text prompt + ann = prompt_process.text_prompt(text='a photo of a dog') + + # Point prompt + # points default [[0,0]] [[x1,y1],[x2,y2]] + # point_label default [0] [1,0] 0:background, 1:foreground + ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1]) + prompt_process.plot(annotations=ann, output='./') + ``` This snippet demonstrates the simplicity of loading a pre-trained model and running a prediction on an image. @@ -83,15 +86,19 @@ This snippet demonstrates the simplicity of loading a pre-trained model and runn Validation of the model on a dataset can be done as follows: -```python -from ultralytics import FastSAM +!!! example "" + + === "Python" + + ```python + from ultralytics import FastSAM -# Create a FastSAM model -model = FastSAM('FastSAM-s.pt') # or FastSAM-x.pt + # Create a FastSAM model + model = FastSAM('FastSAM-s.pt') # or FastSAM-x.pt -# Validate the model -results = model.val(data='coco8-seg.yaml') -``` + # Validate the model + results = model.val(data='coco8-seg.yaml') + ``` Please note that FastSAM only supports detection and segmentation of a single class of object. This means it will recognize and segment all objects as the same class. Therefore, when preparing the dataset, you need to convert all object category IDs to 0. diff --git a/docs/models/index.md b/docs/models/index.md index 887189f..8210aa9 100644 --- a/docs/models/index.md +++ b/docs/models/index.md @@ -26,7 +26,7 @@ You can use many of these models directly in the Command Line Interface (CLI) or ## Usage - This example provides simple inference code for YOLO, SAM and RTDETR models. For more options including handling inference results see [Predict](../modes/predict.md) mode. For using models with additional modes see [Train](../modes/train.md), [Val](../modes/val.md) and [Export](../modes/export.md). +This example provides simple inference code for YOLO, SAM and RTDETR models. For more options including handling inference results see [Predict](../modes/predict.md) mode. For using models with additional modes see [Train](../modes/train.md), [Val](../modes/val.md) and [Export](../modes/export.md). !!! example "" diff --git a/docs/models/mobile-sam.md b/docs/models/mobile-sam.md index 7c49d4f..e224046 100644 --- a/docs/models/mobile-sam.md +++ b/docs/models/mobile-sam.md @@ -61,27 +61,33 @@ You can download the model [here](https://github.com/ChaoningZhang/MobileSAM/blo ### Point Prompt -```python -from ultralytics import SAM - -# Load the model -model = SAM('mobile_sam.pt') - -# Predict a segment based on a point prompt -model.predict('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) -``` +!!! example "" + + === "Python" + ```python + from ultralytics import SAM + + # Load the model + model = SAM('mobile_sam.pt') + + # Predict a segment based on a point prompt + model.predict('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` ### Box Prompt -```python -from ultralytics import SAM - -# Load the model -model = SAM('mobile_sam.pt') - -# Predict a segment based on a box prompt -model.predict('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) -``` +!!! example "" + + === "Python" + ```python + from ultralytics import SAM + + # Load the model + model = SAM('mobile_sam.pt') + + # Predict a segment based on a box prompt + model.predict('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + ``` We have implemented `MobileSAM` and `SAM` using the same API. For more usage information, please see the [SAM page](./sam.md). diff --git a/docs/models/sam.md b/docs/models/sam.md index 2aad7d8..d6ffd4d 100644 --- a/docs/models/sam.md +++ b/docs/models/sam.md @@ -152,29 +152,33 @@ This comparison shows the order-of-magnitude differences in the model sizes and Tests run on a 2023 Apple M2 Macbook with 16GB of RAM. To reproduce this test: -```python -from ultralytics import FastSAM, SAM, YOLO - -# Profile SAM-b -model = SAM('sam_b.pt') -model.info() -model('ultralytics/assets') - -# Profile MobileSAM -model = SAM('mobile_sam.pt') -model.info() -model('ultralytics/assets') - -# Profile FastSAM-s -model = FastSAM('FastSAM-s.pt') -model.info() -model('ultralytics/assets') - -# Profile YOLOv8n-seg -model = YOLO('yolov8n-seg.pt') -model.info() -model('ultralytics/assets') -``` + +!!! example "" + + === "Python" + ```python + from ultralytics import FastSAM, SAM, YOLO + + # Profile SAM-b + model = SAM('sam_b.pt') + model.info() + model('ultralytics/assets') + + # Profile MobileSAM + model = SAM('mobile_sam.pt') + model.info() + model('ultralytics/assets') + + # Profile FastSAM-s + model = FastSAM('FastSAM-s.pt') + model.info() + model('ultralytics/assets') + + # Profile YOLOv8n-seg + model = YOLO('yolov8n-seg.pt') + model.info() + model('ultralytics/assets') + ``` ## Auto-Annotation: A Quick Path to Segmentation Datasets @@ -184,11 +188,14 @@ Auto-annotation is a key feature of SAM, allowing users to generate a [segmentat To auto-annotate your dataset with the Ultralytics framework, use the `auto_annotate` function as shown below: -```python -from ultralytics.data.annotator import auto_annotate +!!! example "" -auto_annotate(data="path/to/images", det_model="yolov8x.pt", sam_model='sam_b.pt') -``` + === "Python" + ```python + from ultralytics.data.annotator import auto_annotate + + auto_annotate(data="path/to/images", det_model="yolov8x.pt", sam_model='sam_b.pt') + ``` | Argument | Type | Description | Default | |------------|---------------------|---------------------------------------------------------------------------------------------------------|--------------| diff --git a/docs/modes/train.md b/docs/modes/train.md index db9b8b4..e95b652 100644 --- a/docs/modes/train.md +++ b/docs/modes/train.md @@ -33,6 +33,7 @@ Train YOLOv8n on the COCO128 dataset for 100 epochs at image size 640. See Argum # Train the model results = model.train(data='coco128.yaml', epochs=100, imgsz=640) ``` + === "CLI" ```bash @@ -63,6 +64,7 @@ The training device can be specified using the `device` argument. If no argument # Train the model with 2 GPUs results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device=[0, 1]) ``` + === "CLI" ```bash @@ -89,6 +91,7 @@ To enable training on Apple M1 and M2 chips, you should specify 'mps' as your de # Train the model with 2 GPUs results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device='mps') ``` + === "CLI" ```bash @@ -121,6 +124,7 @@ Below is an example of how to resume an interrupted training using Python and vi # Resume training results = model.train(resume=True) ``` + === "CLI" ```bash @@ -196,12 +200,15 @@ To use a logger, select it from the dropdown menu in the code snippet above and To use Comet: -```python -# pip install comet_ml -import comet_ml +!!! example "" -comet_ml.init() -``` + === "Python" + ```python + # pip install comet_ml + import comet_ml + + comet_ml.init() + ``` Remember to sign in to your Comet account on their website and get your API key. You will need to add this to your environment variables or your script to log your experiments. @@ -211,12 +218,15 @@ Remember to sign in to your Comet account on their website and get your API key. To use ClearML: -```python -# pip install clearml -import clearml +!!! example "" -clearml.browser_login() -``` + === "Python" + ```python + # pip install clearml + import clearml + + clearml.browser_login() + ``` After running this script, you will need to sign in to your ClearML account on the browser and authenticate your session. @@ -226,16 +236,22 @@ After running this script, you will need to sign in to your ClearML account on t To use TensorBoard in [Google Colab](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb): -```bash -load_ext tensorboard -tensorboard --logdir ultralytics/runs # replace with 'runs' directory -``` +!!! example "" + + === "CLI" + ```bash + load_ext tensorboard + tensorboard --logdir ultralytics/runs # replace with 'runs' directory + ``` To use TensorBoard locally run the below command and view results at http://localhost:6006/. -```bash -tensorboard --logdir ultralytics/runs # replace with 'runs' directory -``` +!!! example "" + + === "CLI" + ```bash + tensorboard --logdir ultralytics/runs # replace with 'runs' directory + ``` This will load TensorBoard and direct it to the directory where your training logs are saved. diff --git a/docs/reference/models/sam/amg.md b/docs/reference/models/sam/amg.md index e7cf406..1ec59eb 100644 --- a/docs/reference/models/sam/amg.md +++ b/docs/reference/models/sam/amg.md @@ -9,34 +9,14 @@ keywords: Ultralytics, Mask Data, Transformation, Encoding, RLE encoding, Image Full source code for this file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/amg.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/amg.py). Help us fix any issues you see by submitting a [Pull Request](https://docs.ultralytics.com/help/contributing/) 🛠️. Thank you 🙏! ---- -## ::: ultralytics.models.sam.amg.MaskData -

- --- ## ::: ultralytics.models.sam.amg.is_box_near_crop_edge

---- -## ::: ultralytics.models.sam.amg.box_xyxy_to_xywh -

- --- ## ::: ultralytics.models.sam.amg.batch_iterator

---- -## ::: ultralytics.models.sam.amg.mask_to_rle_pytorch -

- ---- -## ::: ultralytics.models.sam.amg.rle_to_mask -

- ---- -## ::: ultralytics.models.sam.amg.area_from_rle -

- --- ## ::: ultralytics.models.sam.amg.calculate_stability_score

@@ -69,10 +49,6 @@ keywords: Ultralytics, Mask Data, Transformation, Encoding, RLE encoding, Image ## ::: ultralytics.models.sam.amg.remove_small_regions

---- -## ::: ultralytics.models.sam.amg.coco_encode_rle -

- --- ## ::: ultralytics.models.sam.amg.batched_mask_to_box

diff --git a/ultralytics/data/augment.py b/ultralytics/data/augment.py index 12d09cf..053a3a5 100644 --- a/ultralytics/data/augment.py +++ b/ultralytics/data/augment.py @@ -17,8 +17,6 @@ from ultralytics.utils.ops import segment2box from .utils import polygons2masks, polygons2masks_overlap -POSE_FLIPLR_INDEX = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] - # TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic class BaseTransform: diff --git a/ultralytics/data/dataset.py b/ultralytics/data/dataset.py index 1d13261..6d62039 100644 --- a/ultralytics/data/dataset.py +++ b/ultralytics/data/dataset.py @@ -30,7 +30,6 @@ class YOLODataset(BaseDataset): (torch.utils.data.Dataset): A PyTorch dataset object that can be used for training an object detection model. """ cache_version = '1.0.2' # dataset labels *.cache version, >= 1.0.0 for YOLOv8 - rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4] def __init__(self, *args, data=None, use_segments=False, use_keypoints=False, **kwargs): self.use_segments = use_segments diff --git a/ultralytics/data/utils.py b/ultralytics/data/utils.py index b728546..2f48367 100644 --- a/ultralytics/data/utils.py +++ b/ultralytics/data/utils.py @@ -28,8 +28,6 @@ HELP_URL = 'See https://docs.ultralytics.com/datasets/detect for dataset formatt IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm' # image suffixes VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv', 'webm' # video suffixes PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true' # global pin_memory for dataloaders -IMAGENET_MEAN = 0.485, 0.456, 0.406 # RGB mean -IMAGENET_STD = 0.229, 0.224, 0.225 # RGB standard deviation # Get orientation exif tag for orientation in ExifTags.TAGS.keys(): diff --git a/ultralytics/models/sam/amg.py b/ultralytics/models/sam/amg.py index 29f0bcf..f251fe4 100644 --- a/ultralytics/models/sam/amg.py +++ b/ultralytics/models/sam/amg.py @@ -1,82 +1,13 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license import math -from copy import deepcopy from itertools import product -from typing import Any, Dict, Generator, ItemsView, List, Tuple +from typing import Any, Generator, List, Tuple import numpy as np import torch -class MaskData: - """ - A structure for storing masks and their related data in batched format. - Implements basic filtering and concatenation. - """ - - def __init__(self, **kwargs) -> None: - """Initialize a MaskData object, ensuring all values are supported types.""" - for v in kwargs.values(): - assert isinstance( - v, (list, np.ndarray, torch.Tensor)), 'MaskData only supports list, numpy arrays, and torch tensors.' - self._stats = dict(**kwargs) - - def __setitem__(self, key: str, item: Any) -> None: - """Set an item in the MaskData object, ensuring it is a supported type.""" - assert isinstance( - item, (list, np.ndarray, torch.Tensor)), 'MaskData only supports list, numpy arrays, and torch tensors.' - self._stats[key] = item - - def __delitem__(self, key: str) -> None: - """Delete an item from the MaskData object.""" - del self._stats[key] - - def __getitem__(self, key: str) -> Any: - """Get an item from the MaskData object.""" - return self._stats[key] - - def items(self) -> ItemsView[str, Any]: - """Return an ItemsView of the MaskData object.""" - return self._stats.items() - - def filter(self, keep: torch.Tensor) -> None: - """Filter the MaskData object based on the given boolean tensor.""" - for k, v in self._stats.items(): - if v is None: - self._stats[k] = None - elif isinstance(v, torch.Tensor): - self._stats[k] = v[torch.as_tensor(keep, device=v.device)] - elif isinstance(v, np.ndarray): - self._stats[k] = v[keep.detach().cpu().numpy()] - elif isinstance(v, list) and keep.dtype == torch.bool: - self._stats[k] = [a for i, a in enumerate(v) if keep[i]] - elif isinstance(v, list): - self._stats[k] = [v[i] for i in keep] - else: - raise TypeError(f'MaskData key {k} has an unsupported type {type(v)}.') - - def cat(self, new_stats: 'MaskData') -> None: - """Concatenate a new MaskData object to the current one.""" - for k, v in new_stats.items(): - if k not in self._stats or self._stats[k] is None: - self._stats[k] = deepcopy(v) - elif isinstance(v, torch.Tensor): - self._stats[k] = torch.cat([self._stats[k], v], dim=0) - elif isinstance(v, np.ndarray): - self._stats[k] = np.concatenate([self._stats[k], v], axis=0) - elif isinstance(v, list): - self._stats[k] = self._stats[k] + deepcopy(v) - else: - raise TypeError(f'MaskData key {k} has an unsupported type {type(v)}.') - - def to_numpy(self) -> None: - """Convert all torch tensors in the MaskData object to numpy arrays.""" - for k, v in self._stats.items(): - if isinstance(v, torch.Tensor): - self._stats[k] = v.detach().cpu().numpy() - - def is_box_near_crop_edge(boxes: torch.Tensor, crop_box: List[int], orig_box: List[int], @@ -91,14 +22,6 @@ def is_box_near_crop_edge(boxes: torch.Tensor, return torch.any(near_crop_edge, dim=1) -def box_xyxy_to_xywh(box_xyxy: torch.Tensor) -> torch.Tensor: - """Convert bounding boxes from XYXY format to XYWH format.""" - box_xywh = deepcopy(box_xyxy) - box_xywh[2] = box_xywh[2] - box_xywh[0] - box_xywh[3] = box_xywh[3] - box_xywh[1] - return box_xywh - - def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]: """Yield batches of data from the input arguments.""" assert args and all(len(a) == len(args[0]) for a in args), 'Batched iteration must have same-size inputs.' @@ -107,50 +30,6 @@ def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]: yield [arg[b * batch_size:(b + 1) * batch_size] for arg in args] -def mask_to_rle_pytorch(tensor: torch.Tensor) -> List[Dict[str, Any]]: - """Encode masks as uncompressed RLEs in the format expected by pycocotools.""" - # Put in fortran order and flatten h,w - b, h, w = tensor.shape - tensor = tensor.permute(0, 2, 1).flatten(1) - - # Compute change indices - diff = tensor[:, 1:] ^ tensor[:, :-1] - change_indices = diff.nonzero() - - # Encode run length - out = [] - for i in range(b): - cur_idxs = change_indices[change_indices[:, 0] == i, 1] - cur_idxs = torch.cat([ - torch.tensor([0], dtype=cur_idxs.dtype, device=cur_idxs.device), - cur_idxs + 1, - torch.tensor([h * w], dtype=cur_idxs.dtype, device=cur_idxs.device), ]) - btw_idxs = cur_idxs[1:] - cur_idxs[:-1] - counts = [] if tensor[i, 0] == 0 else [0] - counts.extend(btw_idxs.detach().cpu().tolist()) - out.append({'size': [h, w], 'counts': counts}) - return out - - -def rle_to_mask(rle: Dict[str, Any]) -> np.ndarray: - """Compute a binary mask from an uncompressed RLE.""" - h, w = rle['size'] - mask = np.empty(h * w, dtype=bool) - idx = 0 - parity = False - for count in rle['counts']: - mask[idx:idx + count] = parity - idx += count - parity ^= True - mask = mask.reshape(w, h) - return mask.transpose() # Put in C order - - -def area_from_rle(rle: Dict[str, Any]) -> int: - """Calculate the area of a mask from its uncompressed RLE.""" - return sum(rle['counts'][1::2]) - - def calculate_stability_score(masks: torch.Tensor, mask_threshold: float, threshold_offset: float) -> torch.Tensor: """ Computes the stability score for a batch of masks. The stability @@ -264,16 +143,6 @@ def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> Tup return mask, True -def coco_encode_rle(uncompressed_rle: Dict[str, Any]) -> Dict[str, Any]: - """Encode uncompressed RLE (run-length encoding) to COCO RLE format.""" - from pycocotools import mask as mask_utils # type: ignore - - h, w = uncompressed_rle['size'] - rle = mask_utils.frPyObjects(uncompressed_rle, h, w) - rle['counts'] = rle['counts'].decode('utf-8') # Necessary to serialize with json - return rle - - def batched_mask_to_box(masks: torch.Tensor) -> torch.Tensor: """ Calculates boxes in XYXY format around masks. Return [0,0,0,0] for diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py index 0d213f3..c1eb585 100644 --- a/ultralytics/utils/__init__.py +++ b/ultralytics/utils/__init__.py @@ -26,7 +26,6 @@ from ultralytics import __version__ # PyTorch Multi-GPU DDP Constants RANK = int(os.getenv('RANK', -1)) LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html -WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) # Other Constants FILE = Path(__file__).resolve() diff --git a/ultralytics/utils/files.py b/ultralytics/utils/files.py index 750dfa7..8771ea1 100644 --- a/ultralytics/utils/files.py +++ b/ultralytics/utils/files.py @@ -22,7 +22,7 @@ class WorkingDirectory(contextlib.ContextDecorator): """Changes the current directory to the specified directory.""" os.chdir(self.dir) - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__(self, exc_type, exc_val, exc_tb): # noqa """Restore the current working directory on context exit.""" os.chdir(self.cwd) diff --git a/ultralytics/utils/ops.py b/ultralytics/utils/ops.py index d729526..6f11d55 100644 --- a/ultralytics/utils/ops.py +++ b/ultralytics/utils/ops.py @@ -39,7 +39,7 @@ class Profile(contextlib.ContextDecorator): self.start = self.time() return self - def __exit__(self, type, value, traceback): + def __exit__(self, type, value, traceback): # noqa """ Stop timing. """ diff --git a/ultralytics/utils/torch_utils.py b/ultralytics/utils/torch_utils.py index c593ac2..4cebcb0 100644 --- a/ultralytics/utils/torch_utils.py +++ b/ultralytics/utils/torch_utils.py @@ -15,7 +15,6 @@ import torch import torch.distributed as dist import torch.nn as nn import torch.nn.functional as F -import torchvision from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, RANK, __version__ from ultralytics.utils.checks import check_version @@ -25,10 +24,7 @@ try: except ImportError: thop = None -TORCHVISION_0_10 = check_version(torchvision.__version__, '0.10.0') TORCH_1_9 = check_version(torch.__version__, '1.9.0') -TORCH_1_11 = check_version(torch.__version__, '1.11.0') -TORCH_1_12 = check_version(torch.__version__, '1.12.0') TORCH_2_0 = check_version(torch.__version__, '2.0.0') @@ -457,7 +453,7 @@ def profile(input, ops, n=10, device=None): y = m(x) t[1] = time_sync() try: - _ = (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward() + (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward() t[2] = time_sync() except Exception: # no backward method # print(e) # for debug