[Docs]: Link buttons, add autobackend, BaseModel and ops (#130)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
		| @ -34,11 +34,10 @@ To use pythonic interface of Ultralytics YOLO model | ||||
| ```python | ||||
| from ultralytics import YOLO | ||||
|  | ||||
| model = YOLO.new("yolov8n.yaml")  # create a new model from scratch | ||||
| model = YOLO.load( | ||||
| model = YOLO("yolov8n.yaml")  # create a new model from scratch | ||||
| model = YOLO( | ||||
|     "yolov8n.pt" | ||||
| )  # load a pretrained model (recommended for best training results) | ||||
|  | ||||
| results = model.train(data="coco128.yaml", epochs=100, imgsz=640, ...) | ||||
| results = model.val() | ||||
| results = model.predict(source="bus.jpg") | ||||
|  | ||||
| @ -33,7 +33,7 @@ CLI requires no customization or code. You can simply run all tasks from the ter | ||||
|         ```bash | ||||
|         yolo task=detect mode=train model=s.yaml device=\'0,1,2,3\' | ||||
|         ``` | ||||
| [CLI Guide](#){ .md-button .md-button--primary} | ||||
| [CLI Guide](cli.md){ .md-button .md-button--primary} | ||||
|  | ||||
| ## Python API | ||||
| Ultralytics YOLO comes with pythonic Model and Trainer interface.  | ||||
| @ -42,10 +42,9 @@ Ultralytics YOLO comes with pythonic Model and Trainer interface. | ||||
|     import ultralytics | ||||
|     from ultralytics import YOLO | ||||
|  | ||||
|     model = YOLO() | ||||
|     model.new("s-seg.yaml") # automatically detects task type | ||||
|     model.load("s-seg.pt") # load checkpoint | ||||
|     model = YOLO("s-seg.yaml") # automatically detects task type | ||||
|     model = YOLO("s-seg.pt") # load checkpoint | ||||
|     model.train(data="coco128-segments", epochs=1, lr0=0.01, ...) | ||||
|     model.train(data="coco128-segments", epochs=1, lr0=0.01, device="0,1,2,3") # DDP mode | ||||
|     ``` | ||||
| [API Guide](#){ .md-button .md-button--primary} | ||||
| [API Guide](sdk.md){ .md-button .md-button--primary} | ||||
|  | ||||
							
								
								
									
										15
									
								
								docs/reference/nn.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								docs/reference/nn.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,15 @@ | ||||
| # nn Module | ||||
| Ultralytics nn module contains 3 main components: | ||||
|  | ||||
| 1. **AutoBackend**: A module that can run inference on all popular model formats | ||||
| 2. **BaseModel**: `BaseModel` class defines the operations supported by tasks like Detection and Segmentation | ||||
| 3. **modules**: Optimized and reusable neural network blocks built on PyTorch. | ||||
|  | ||||
| ## AutoBackend | ||||
| :::ultralytics.nn.autobackend.AutoBackend | ||||
|  | ||||
| ## BaseModel | ||||
| :::ultralytics.nn.tasks.BaseModel | ||||
|  | ||||
| ## Modules | ||||
| TODO | ||||
							
								
								
									
										162
									
								
								docs/reference/ops.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										162
									
								
								docs/reference/ops.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,162 @@ | ||||
| This module contains optimized deep learning related operations used in the Ultralytics YOLO framework | ||||
| ## Non-max suppression | ||||
| :::ultralytics.ops.non_max_suppression | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## Scale boxes | ||||
| :::ultralytics.ops.scale_boxes | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## Scale image | ||||
| :::ultralytics.ops.scale_image | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## clip boxes | ||||
| :::ultralytics.ops.clip_boxes | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| # Box Format Conversion | ||||
| ## xyxy2xywh | ||||
| :::ultralytics.ops.xyxy2xywh | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## xywh2xyxy | ||||
| :::ultralytics.ops.xywh2xyxy | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## xywhn2xyxy | ||||
| :::ultralytics.ops.xywhn2xyxy | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## xyxy2xywhn | ||||
| :::ultralytics.ops.xyxy2xywhn | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## xyn2xy | ||||
| :::ultralytics.ops.xyn2xy | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## xywh2ltwh | ||||
| :::ultralytics.ops.xywh2ltwh | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## xyxy2ltwh | ||||
| :::ultralytics.ops.xyxy2ltwh | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## ltwh2xywh | ||||
| :::ultralytics.ops.ltwh2xywh | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## ltwh2xyxy | ||||
| :::ultralytics.ops.ltwh2xyxy | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## segment2box | ||||
| :::ultralytics.ops.segment2box | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| # Mask Operations | ||||
| ## resample_segments | ||||
| :::ultralytics.ops.resample_segments | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## crop_mask | ||||
| :::ultralytics.ops.crop_mask | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## process_mask_upsample | ||||
| :::ultralytics.ops.process_mask_upsample | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## process_mask | ||||
| :::ultralytics.ops.process_mask | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## process_mask_native | ||||
| :::ultralytics.ops.process_mask_native | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## scale_segments | ||||
| :::ultralytics.ops.scale_segments | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## masks2segments | ||||
| :::ultralytics.ops.masks2segments | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
| ## clip_segments | ||||
| :::ultralytics.ops.clip_segments | ||||
|     handler: python | ||||
|     options: | ||||
|         show_source: false | ||||
|         show_root_toc_entry: false | ||||
| --- | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
							
								
								
									
										15
									
								
								docs/sdk.md
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								docs/sdk.md
									
									
									
									
									
								
							| @ -6,8 +6,7 @@ This is the simplest way of simply using yolo models in a python environment. It | ||||
|         ```python | ||||
|         from ultralytics import YOLO | ||||
|  | ||||
|         model = YOLO() | ||||
|         model.new("n.yaml") # pass any model type | ||||
|         model = YOLO("yolov8n.yaml") | ||||
|         model(img_tensor) # Or model.forward(). inference. | ||||
|         model.train(data="coco128.yaml", epochs=5) | ||||
|         ``` | ||||
| @ -16,10 +15,9 @@ This is the simplest way of simply using yolo models in a python environment. It | ||||
|         ```python | ||||
|         from ultralytics import YOLO | ||||
|  | ||||
|         model = YOLO() | ||||
|         model.load("n.pt") # pass any model type | ||||
|         model = YOLO("yolov8n.pt") # pass any model type | ||||
|         model(...) # inference | ||||
|         model.train(data="coco128.yaml", epochs=5) | ||||
|         model.train(epochs=5) | ||||
|         ``` | ||||
|  | ||||
|     === "Resume Training" | ||||
| @ -35,8 +33,7 @@ This is the simplest way of simply using yolo models in a python environment. It | ||||
|     ```python | ||||
|     from ultralytics import YOLO | ||||
|  | ||||
|     model = YOLO() | ||||
|     model.load("model.pt") | ||||
|     model = YOLO("model.pt") | ||||
|     model.predict(source="0") # accepts all formats - img/folder/vid.*(mp4/format). 0 for webcam | ||||
|     model.predict(source="folder", view_img=True) # Display preds. Accepts all yolo predict arguments | ||||
|  | ||||
| @ -48,7 +45,7 @@ This is the simplest way of simply using yolo models in a python environment. It | ||||
|         ```python | ||||
|         from ultralytics import YOLO | ||||
|  | ||||
|         model = YOLO() | ||||
|         model = YOLO("model.pt") | ||||
|         model.fuse()   | ||||
|         model.info(verbose=True)  # Print model information | ||||
|         model.export(format=)  # TODO:  | ||||
| @ -61,7 +58,7 @@ This is the simplest way of simply using yolo models in a python environment. It | ||||
|  | ||||
| To know more about using `YOLO` models, refer Model class refernce | ||||
|  | ||||
| [Model reference](#){ .md-button .md-button--primary} | ||||
| [Model reference](reference/model.md){ .md-button .md-button--primary} | ||||
|  | ||||
| --- | ||||
| ### Customizing Tasks with Trainers | ||||
|  | ||||
| @ -51,6 +51,7 @@ markdown_extensions: | ||||
|   - pymdownx.superfences | ||||
|   - tables | ||||
|   - attr_list | ||||
|   - def_list | ||||
|   # Syntax highlight | ||||
|   - pymdownx.highlight: | ||||
|       anchor_linenums: true | ||||
| @ -84,14 +85,16 @@ nav: | ||||
|       - Detection: tasks/detection.md | ||||
|       - Segmentation: tasks/segmentation.md | ||||
|       - Classification: tasks/classification.md | ||||
|   - Customization Tutorials: | ||||
|   - Advanced Tutorials: | ||||
|       - Customize Trainer: customize/train.md | ||||
|       - Customize Validator: customize/val.md | ||||
|       - Customize Predictor: customize/predict.md | ||||
|   - Reference: | ||||
|       - YOLO Models: reference/model.md | ||||
|       - Python Model interface: reference/model.md | ||||
|       - Engine: | ||||
|           - Trainer: reference/base_trainer.md | ||||
|           - Validator: reference/base_val.md | ||||
|           - Predictor: reference/base_pred.md | ||||
|           - Exporter: reference/exporter.md | ||||
|       - nn Module: reference/nn.md | ||||
|       - operations: reference/ops.md | ||||
|  | ||||
| @ -1,5 +1,6 @@ | ||||
| __version__ = "8.0.0.dev0" | ||||
|  | ||||
| from ultralytics.yolo.engine.model import YOLO | ||||
| from ultralytics.yolo.utils import ops | ||||
|  | ||||
| __all__ = ["__version__", "YOLO", "hub"]  # allow simpler import | ||||
|  | ||||
| @ -17,22 +17,36 @@ from ultralytics.yolo.utils.ops import xywh2xyxy | ||||
|  | ||||
|  | ||||
| class AutoBackend(nn.Module): | ||||
|     # YOLOv5 MultiBackend class for python inference on various backends | ||||
|     def __init__(self, weights='yolov8n.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True): | ||||
|         # Usage: | ||||
|         #   PyTorch:              weights = *.pt | ||||
|         #   TorchScript:                    *.torchscript | ||||
|         #   ONNX Runtime:                   *.onnx | ||||
|         #   ONNX OpenCV DNN:                *.onnx --dnn | ||||
|         #   OpenVINO:                       *.xml | ||||
|         #   CoreML:                         *.mlmodel | ||||
|         #   TensorRT:                       *.engine | ||||
|         #   TensorFlow SavedModel:          *_saved_model | ||||
|         #   TensorFlow GraphDef:            *.pb | ||||
|         #   TensorFlow Lite:                *.tflite | ||||
|         #   TensorFlow Edge TPU:            *_edgetpu.tflite | ||||
|         #   PaddlePaddle:                   *_paddle_model | ||||
|  | ||||
|     def __init__(self, weights='yolov8n.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True): | ||||
|         """ | ||||
|         Ultralytics YOLO MultiBackend class for python inference on various backends | ||||
|  | ||||
|         Args: | ||||
|           weights: the path to the weights file. Defaults to yolov8n.pt | ||||
|           device: The device to run the model on. | ||||
|           dnn: If you want to use OpenCV's DNN module to run the inference, set this to True. Defaults to | ||||
|         False | ||||
|           data: a dictionary containing the following keys: | ||||
|           fp16: If true, will use half precision. Defaults to False | ||||
|           fuse: whether to fuse the model or not. Defaults to True | ||||
|  | ||||
|         Supported format and their usage: | ||||
|             | Platform              | weights          | | ||||
|             |-----------------------|------------------| | ||||
|             | PyTorch               | *.pt             | | ||||
|             | TorchScript           | *.torchscript    | | ||||
|             | ONNX Runtime          | *.onnx           | | ||||
|             | ONNX OpenCV DNN       | *.onnx --dnn     | | ||||
|             | OpenVINO              | *.xml            | | ||||
|             | CoreML                | *.mlmodel        | | ||||
|             | TensorRT              | *.engine         | | ||||
|             | TensorFlow SavedModel | *_saved_model    | | ||||
|             | TensorFlow GraphDef   | *.pb             | | ||||
|             | TensorFlow Lite       | *.tflite         | | ||||
|             | TensorFlow Edge TPU   | *_edgetpu.tflite | | ||||
|             | PaddlePaddle          | *_paddle_model   | | ||||
|         """ | ||||
|         super().__init__() | ||||
|         w = str(weights[0] if isinstance(weights, list) else weights) | ||||
|         nn_module = isinstance(weights, torch.nn.Module) | ||||
| @ -215,6 +229,15 @@ class AutoBackend(nn.Module): | ||||
|         self.__dict__.update(locals())  # assign all variables to self | ||||
|  | ||||
|     def forward(self, im, augment=False, visualize=False): | ||||
|         """ | ||||
|         Runs inference on the given model | ||||
|  | ||||
|         Args: | ||||
|           im: the image tensor | ||||
|           augment: whether to augment the image. Defaults to False | ||||
|           visualize: if True, then the network will output the feature maps of the last convolutional layer. | ||||
|         Defaults to False | ||||
|         """ | ||||
|         # YOLOv5 MultiBackend inference | ||||
|         b, ch, h, w = im.shape  # batch, channel, height, width | ||||
|         if self.fp16 and im.dtype != torch.float16: | ||||
| @ -297,10 +320,21 @@ class AutoBackend(nn.Module): | ||||
|             return self.from_numpy(y) | ||||
|  | ||||
|     def from_numpy(self, x): | ||||
|         """ | ||||
|         `from_numpy` converts a numpy array to a tensor | ||||
|  | ||||
|         Args: | ||||
|           x: the numpy array to convert | ||||
|         """ | ||||
|         return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x | ||||
|  | ||||
|     def warmup(self, imgsz=(1, 3, 640, 640)): | ||||
|         # Warmup model by running inference once | ||||
|         """ | ||||
|         Warmup model by running inference once | ||||
|  | ||||
|         Args: | ||||
|           imgsz: the size of the image you want to run inference on. | ||||
|         """ | ||||
|         warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module | ||||
|         if any(warmup_types) and (self.device.type != 'cpu' or self.triton): | ||||
|             im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device)  # input | ||||
| @ -309,6 +343,12 @@ class AutoBackend(nn.Module): | ||||
|  | ||||
|     @staticmethod | ||||
|     def _model_type(p='path/to/model.pt'): | ||||
|         """ | ||||
|         This function takes a path to a model file and returns the model type | ||||
|  | ||||
|         Args: | ||||
|           p: path to the model file. Defaults to path/to/model.pt | ||||
|         """ | ||||
|         # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx | ||||
|         # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle] | ||||
|         from ultralytics.yolo.engine.exporter import export_formats | ||||
| @ -323,6 +363,12 @@ class AutoBackend(nn.Module): | ||||
|  | ||||
|     @staticmethod | ||||
|     def _load_metadata(f=Path('path/to/meta.yaml')): | ||||
|         """ | ||||
|         > Loads the metadata from a yaml file | ||||
|  | ||||
|         Args: | ||||
|           f: The path to the metadata file. | ||||
|         """ | ||||
|         from ultralytics.yolo.utils.files import yaml_load | ||||
|  | ||||
|         # Load metadata from meta.yaml if it exists | ||||
|  | ||||
| @ -17,11 +17,36 @@ from ultralytics.yolo.utils.torch_utils import (fuse_conv_and_bn, initialize_wei | ||||
|  | ||||
|  | ||||
| class BaseModel(nn.Module): | ||||
|     # YOLOv5 base model | ||||
|     ''' | ||||
|      The BaseModel class is a base class for all the models in the Ultralytics YOLO family. | ||||
|     ''' | ||||
|  | ||||
|     def forward(self, x, profile=False, visualize=False): | ||||
|         return self._forward_once(x, profile, visualize)  # single-scale inference, train | ||||
|         """ | ||||
|         > `forward` is a wrapper for `_forward_once` that runs the model on a single scale | ||||
|  | ||||
|         Args: | ||||
|           x: the input image | ||||
|           profile: whether to profile the model. Defaults to False | ||||
|           visualize: if True, will return the intermediate feature maps. Defaults to False | ||||
|  | ||||
|         Returns: | ||||
|           The output of the network. | ||||
|         """ | ||||
|         return self._forward_once(x, profile, visualize) | ||||
|  | ||||
|     def _forward_once(self, x, profile=False, visualize=False): | ||||
|         """ | ||||
|         > Forward pass of the network | ||||
|  | ||||
|         Args: | ||||
|           x: input to the model | ||||
|           profile: if True, the time taken for each layer will be printed. Defaults to False | ||||
|           visualize: If True, it will save the feature maps of the model. Defaults to False | ||||
|  | ||||
|         Returns: | ||||
|           The last layer of the model. | ||||
|         """ | ||||
|         y, dt = [], []  # outputs | ||||
|         for m in self.model: | ||||
|             if m.f != -1:  # if not from previous layer | ||||
| @ -36,6 +61,15 @@ class BaseModel(nn.Module): | ||||
|         return x | ||||
|  | ||||
|     def _profile_one_layer(self, m, x, dt): | ||||
|         """ | ||||
|         It takes a model, an input, and a list of times, and it profiles the model on the input, appending | ||||
|         the time to the list | ||||
|  | ||||
|         Args: | ||||
|           m: the model | ||||
|           x: the input image | ||||
|           dt: list of time taken for each layer | ||||
|         """ | ||||
|         c = m == self.model[-1]  # is final layer, copy input as inplace fix | ||||
|         o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPs | ||||
|         t = time_sync() | ||||
| @ -48,7 +82,13 @@ class BaseModel(nn.Module): | ||||
|         if c: | ||||
|             LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s}  Total") | ||||
|  | ||||
|     def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers | ||||
|     def fuse(self): | ||||
|         """ | ||||
|         > It takes a model and fuses the Conv2d() and BatchNorm2d() layers into a single layer | ||||
|  | ||||
|         Returns: | ||||
|           The model is being returned. | ||||
|         """ | ||||
|         LOGGER.info('Fusing layers... ') | ||||
|         for m in self.model.modules(): | ||||
|             if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'): | ||||
| @ -58,11 +98,27 @@ class BaseModel(nn.Module): | ||||
|         self.info() | ||||
|         return self | ||||
|  | ||||
|     def info(self, verbose=False, imgsz=640):  # print model information | ||||
|     def info(self, verbose=False, imgsz=640): | ||||
|         """ | ||||
|         Prints model information | ||||
|  | ||||
|         Args: | ||||
|           verbose: if True, prints out the model information. Defaults to False | ||||
|           imgsz: the size of the image that the model will be trained on. Defaults to 640 | ||||
|         """ | ||||
|         model_info(self, verbose, imgsz) | ||||
|  | ||||
|     def _apply(self, fn): | ||||
|         # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers | ||||
|         """ | ||||
|         `_apply()` is a function that applies a function to all the tensors in the model that are not | ||||
|         parameters or registered buffers | ||||
|  | ||||
|         Args: | ||||
|           fn: the function to apply to the model | ||||
|  | ||||
|         Returns: | ||||
|           A model that is a Detect() object. | ||||
|         """ | ||||
|         self = super()._apply(fn) | ||||
|         m = self.model[-1]  # Detect() | ||||
|         if isinstance(m, (Detect, Segment)): | ||||
| @ -72,6 +128,12 @@ class BaseModel(nn.Module): | ||||
|         return self | ||||
|  | ||||
|     def load(self, weights): | ||||
|         """ | ||||
|         > This function loads the weights of the model from a file | ||||
|  | ||||
|         Args: | ||||
|           weights: The weights to load into the model. | ||||
|         """ | ||||
|         # Force all tasks to implement this function | ||||
|         raise NotImplementedError("This function needs to be implemented by derived classes!") | ||||
|  | ||||
|  | ||||
| @ -47,6 +47,17 @@ def coco80_to_coco91_class():  # converts 80-index (val2014) to 91-index (paper) | ||||
|  | ||||
|  | ||||
| def segment2box(segment, width=640, height=640): | ||||
|     """ | ||||
|     > Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to | ||||
|     (xyxy) | ||||
|     Args: | ||||
|       segment: the segment label | ||||
|       width: the width of the image. Defaults to 640 | ||||
|       height: The height of the image. Defaults to 640 | ||||
|  | ||||
|     Returns: | ||||
|       the minimum and maximum x and y values of the segment. | ||||
|     """ | ||||
|     # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) | ||||
|     x, y = segment.T  # segment xy | ||||
|     inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height) | ||||
| @ -55,7 +66,18 @@ def segment2box(segment, width=640, height=640): | ||||
|  | ||||
|  | ||||
| def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None): | ||||
|     # Rescale boxes (xyxy) from img1_shape to img0_shape | ||||
|     """ | ||||
|     > Rescale boxes (xyxy) from img1_shape to img0_shape | ||||
|     Args: | ||||
|       img1_shape: The shape of the image that the bounding boxes are for. | ||||
|       boxes: the bounding boxes of the objects in the image | ||||
|       img0_shape: the shape of the original image | ||||
|       ratio_pad: a tuple of (ratio, pad) | ||||
|  | ||||
|     Returns: | ||||
|       The boxes are being returned. | ||||
|     """ | ||||
|     # | ||||
|     if ratio_pad is None:  # calculate from img0_shape | ||||
|         gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new | ||||
|         pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding | ||||
| @ -70,18 +92,6 @@ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None): | ||||
|     return boxes | ||||
|  | ||||
|  | ||||
| def clip_boxes(boxes, shape): | ||||
|     # Clip boxes (xyxy) to image shape (height, width) | ||||
|     if isinstance(boxes, torch.Tensor):  # faster individually | ||||
|         boxes[..., 0].clamp_(0, shape[1])  # x1 | ||||
|         boxes[..., 1].clamp_(0, shape[0])  # y1 | ||||
|         boxes[..., 2].clamp_(0, shape[1])  # x2 | ||||
|         boxes[..., 3].clamp_(0, shape[0])  # y2 | ||||
|     else:  # np.array (faster grouped) | ||||
|         boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2 | ||||
|         boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2 | ||||
|  | ||||
|  | ||||
| def make_divisible(x, divisor): | ||||
|     # Returns nearest x divisible by divisor | ||||
|     if isinstance(divisor, torch.Tensor): | ||||
| @ -101,7 +111,7 @@ def non_max_suppression( | ||||
|         nm=0,  # number of masks | ||||
| ): | ||||
|     """ | ||||
|     Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box. | ||||
|     > Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box. | ||||
|  | ||||
|     Arguments: | ||||
|         prediction (torch.Tensor): A tensor of shape (batch_size, num_boxes, num_classes + 4 + num_masks) | ||||
| @ -217,6 +227,25 @@ def non_max_suppression( | ||||
|     return output | ||||
|  | ||||
|  | ||||
| def clip_boxes(boxes, shape): | ||||
|     """ | ||||
|     > It takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the | ||||
|     shape | ||||
|  | ||||
|     Args: | ||||
|       boxes: the bounding boxes to clip | ||||
|       shape: the shape of the image | ||||
|     """ | ||||
|     if isinstance(boxes, torch.Tensor):  # faster individually | ||||
|         boxes[..., 0].clamp_(0, shape[1])  # x1 | ||||
|         boxes[..., 1].clamp_(0, shape[0])  # y1 | ||||
|         boxes[..., 2].clamp_(0, shape[1])  # x2 | ||||
|         boxes[..., 3].clamp_(0, shape[0])  # y2 | ||||
|     else:  # np.array (faster grouped) | ||||
|         boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2 | ||||
|         boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2 | ||||
|  | ||||
|  | ||||
| def clip_coords(boxes, shape): | ||||
|     # Clip bounding xyxy bounding boxes to image shape (height, width) | ||||
|     if isinstance(boxes, torch.Tensor):  # faster individually | ||||
| @ -231,9 +260,16 @@ def clip_coords(boxes, shape): | ||||
|  | ||||
| def scale_image(im1_shape, masks, im0_shape, ratio_pad=None): | ||||
|     """ | ||||
|     img1_shape: model input shape, [h, w] | ||||
|     img0_shape: origin pic shape, [h, w, 3] | ||||
|     masks: [h, w, num] | ||||
|     > It takes a mask, and resizes it to the original image size | ||||
|  | ||||
|     Args: | ||||
|       im1_shape: model input shape, [h, w] | ||||
|       masks: [h, w, num] | ||||
|       im0_shape: the original image shape | ||||
|       ratio_pad: the ratio of the padding to the original image. | ||||
|  | ||||
|     Returns: | ||||
|       The masks are being returned. | ||||
|     """ | ||||
|     # Rescale coordinates (xyxy) from im1_shape to im0_shape | ||||
|     if ratio_pad is None:  # calculate from im0_shape | ||||
| @ -258,7 +294,16 @@ def scale_image(im1_shape, masks, im0_shape, ratio_pad=None): | ||||
|  | ||||
|  | ||||
| def xyxy2xywh(x): | ||||
|     # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right | ||||
|     """ | ||||
|     > It takes a list of bounding boxes, and converts them from the format [x1, y1, x2, y2] to [x, y, w, | ||||
|     h]  where xy1=top-left, xy2=bottom-right | ||||
|  | ||||
|     Args: | ||||
|       x: the input tensor | ||||
|  | ||||
|     Returns: | ||||
|       the center of the box, the width and the height of the box. | ||||
|     """ | ||||
|     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) | ||||
|     y[..., 0] = (x[..., 0] + x[..., 2]) / 2  # x center | ||||
|     y[..., 1] = (x[..., 1] + x[..., 3]) / 2  # y center | ||||
| @ -268,7 +313,15 @@ def xyxy2xywh(x): | ||||
|  | ||||
|  | ||||
| def xywh2xyxy(x): | ||||
|     # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right | ||||
|     """ | ||||
|     > It converts the bounding box from x,y,w,h to x1,y1,x2,y2 where xy1=top-left, xy2=bottom-right | ||||
|  | ||||
|     Args: | ||||
|       x: the input tensor | ||||
|  | ||||
|     Returns: | ||||
|       the top left and bottom right coordinates of the bounding box. | ||||
|     """ | ||||
|     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) | ||||
|     y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x | ||||
|     y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y | ||||
| @ -278,7 +331,19 @@ def xywh2xyxy(x): | ||||
|  | ||||
|  | ||||
| def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): | ||||
|     # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right | ||||
|     """ | ||||
|     > It converts the normalized coordinates to the actual coordinates [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right | ||||
|  | ||||
|     Args: | ||||
|       x: the bounding box coordinates | ||||
|       w: width of the image. Defaults to 640 | ||||
|       h: height of the image. Defaults to 640 | ||||
|       padw: padding width. Defaults to 0 | ||||
|       padh: height of the padding. Defaults to 0 | ||||
|  | ||||
|     Returns: | ||||
|       the xyxy coordinates of the bounding box. | ||||
|     """ | ||||
|     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) | ||||
|     y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw  # top left x | ||||
|     y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh  # top left y | ||||
| @ -288,7 +353,20 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): | ||||
|  | ||||
|  | ||||
| def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): | ||||
|     # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right | ||||
|     """ | ||||
|     > It takes in a list of bounding boxes, and returns a list of bounding boxes, but with the x and y | ||||
|     coordinates normalized to the width and height of the image | ||||
|  | ||||
|     Args: | ||||
|       x: the bounding box coordinates | ||||
|       w: width of the image. Defaults to 640 | ||||
|       h: height of the image. Defaults to 640 | ||||
|       clip: If True, the boxes will be clipped to the image boundaries. Defaults to False | ||||
|       eps: the minimum value of the box's width and height. | ||||
|  | ||||
|     Returns: | ||||
|       the xywhn format of the bounding boxes. | ||||
|     """ | ||||
|     if clip: | ||||
|         clip_boxes(x, (h - eps, w - eps))  # warning: inplace clip | ||||
|     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) | ||||
| @ -300,7 +378,19 @@ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): | ||||
|  | ||||
|  | ||||
| def xyn2xy(x, w=640, h=640, padw=0, padh=0): | ||||
|     # Convert normalized segments into pixel segments, shape (n,2) | ||||
|     """ | ||||
|     > It converts normalized segments into pixel segments of shape (n,2) | ||||
|  | ||||
|     Args: | ||||
|       x: the normalized coordinates of the bounding box | ||||
|       w: width of the image. Defaults to 640 | ||||
|       h: height of the image. Defaults to 640 | ||||
|       padw: padding width. Defaults to 0 | ||||
|       padh: padding height. Defaults to 0 | ||||
|  | ||||
|     Returns: | ||||
|       the x and y coordinates of the top left corner of the bounding box. | ||||
|     """ | ||||
|     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) | ||||
|     y[..., 0] = w * x[..., 0] + padw  # top left x | ||||
|     y[..., 1] = h * x[..., 1] + padh  # top left y | ||||
| @ -308,7 +398,15 @@ def xyn2xy(x, w=640, h=640, padw=0, padh=0): | ||||
|  | ||||
|  | ||||
| def xywh2ltwh(x): | ||||
|     # Convert nx4 boxes from [x, y, w, h] to [x1, y1, w, h] where xy1=top-left | ||||
|     """ | ||||
|     > It converts the bounding box from [x, y, w, h] to [x1, y1, w, h] where xy1=top-left | ||||
|  | ||||
|     Args: | ||||
|       x: the x coordinate of the center of the bounding box | ||||
|  | ||||
|     Returns: | ||||
|       the top left x and y coordinates of the bounding box. | ||||
|     """ | ||||
|     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) | ||||
|     y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x | ||||
|     y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y | ||||
| @ -316,7 +414,15 @@ def xywh2ltwh(x): | ||||
|  | ||||
|  | ||||
| def xyxy2ltwh(x): | ||||
|     # Convert nx4 boxes from [x1, y1, x2, y2] to [x1, y1, w, h] where xy1=top-left, xy2=bottom-right | ||||
|     """ | ||||
|     > Convert nx4 boxes from [x1, y1, x2, y2] to [x1, y1, w, h] where xy1=top-left, xy2=bottom-right | ||||
|  | ||||
|     Args: | ||||
|       x: the input tensor | ||||
|  | ||||
|     Returns: | ||||
|       the xyxy2ltwh function. | ||||
|     """ | ||||
|     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) | ||||
|     y[:, 2] = x[:, 2] - x[:, 0]  # width | ||||
|     y[:, 3] = x[:, 3] - x[:, 1]  # height | ||||
| @ -324,7 +430,12 @@ def xyxy2ltwh(x): | ||||
|  | ||||
|  | ||||
| def ltwh2xywh(x): | ||||
|     # Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center | ||||
|     """ | ||||
|     > Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center | ||||
|  | ||||
|     Args: | ||||
|       x: the input tensor | ||||
|     """ | ||||
|     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) | ||||
|     y[:, 0] = x[:, 0] + x[:, 2] / 2  # center x | ||||
|     y[:, 1] = x[:, 1] + x[:, 3] / 2  # center y | ||||
| @ -332,7 +443,16 @@ def ltwh2xywh(x): | ||||
|  | ||||
|  | ||||
| def ltwh2xyxy(x): | ||||
|     # Convert nx4 boxes from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right | ||||
|     """ | ||||
|     > It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, | ||||
|     xy2=bottom-right | ||||
|  | ||||
|     Args: | ||||
|       x: the input image | ||||
|  | ||||
|     Returns: | ||||
|       the xyxy coordinates of the bounding boxes. | ||||
|     """ | ||||
|     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) | ||||
|     y[:, 2] = x[:, 2] + x[:, 0]  # width | ||||
|     y[:, 3] = x[:, 3] + x[:, 1]  # height | ||||
| @ -340,7 +460,16 @@ def ltwh2xyxy(x): | ||||
|  | ||||
|  | ||||
| def segments2boxes(segments): | ||||
|     # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh) | ||||
|     """ | ||||
|     > It converts segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh) | ||||
|  | ||||
|     Args: | ||||
|       segments: list of segments, each segment is a list of points, each point is a list of x, y | ||||
|     coordinates | ||||
|  | ||||
|     Returns: | ||||
|       the xywh coordinates of the bounding boxes. | ||||
|     """ | ||||
|     boxes = [] | ||||
|     for s in segments: | ||||
|         x, y = s.T  # segment xy | ||||
| @ -349,7 +478,17 @@ def segments2boxes(segments): | ||||
|  | ||||
|  | ||||
| def resample_segments(segments, n=1000): | ||||
|     # Up-sample an (n,2) segment | ||||
|     """ | ||||
|     > It takes a list of segments (n,2) and returns a list of segments (n,2) where each segment has been | ||||
|     up-sampled to n points | ||||
|  | ||||
|     Args: | ||||
|       segments: a list of (n,2) arrays, where n is the number of points in the segment. | ||||
|       n: number of points to resample the segment to. Defaults to 1000 | ||||
|  | ||||
|     Returns: | ||||
|       the resampled segments. | ||||
|     """ | ||||
|     for i, s in enumerate(segments): | ||||
|         s = np.concatenate((s, s[0:1, :]), axis=0) | ||||
|         x = np.linspace(0, len(s) - 1, n) | ||||
| @ -360,13 +499,15 @@ def resample_segments(segments, n=1000): | ||||
|  | ||||
| def crop_mask(masks, boxes): | ||||
|     """ | ||||
|     "Crop" predicted masks by zeroing out everything not in the predicted bbox. | ||||
|     Vectorized by Chong (thanks Chong). | ||||
|     Args: | ||||
|         - masks should be a size [h, w, n] tensor of masks | ||||
|         - boxes should be a size [n, 4] tensor of bbox coords in relative point form | ||||
|     """ | ||||
|     > It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box | ||||
|  | ||||
|     Args: | ||||
|       masks: [h, w, n] tensor of masks | ||||
|       boxes: [n, 4] tensor of bbox coords in relative point form | ||||
|  | ||||
|     Returns: | ||||
|       The masks are being cropped to the bounding box. | ||||
|     """ | ||||
|     n, h, w = masks.shape | ||||
|     x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(1,1,n) | ||||
|     r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # rows shape(1,w,1) | ||||
| @ -377,14 +518,18 @@ def crop_mask(masks, boxes): | ||||
|  | ||||
| def process_mask_upsample(protos, masks_in, bboxes, shape): | ||||
|     """ | ||||
|     Crop after upsample. | ||||
|     proto_out: [mask_dim, mask_h, mask_w] | ||||
|     out_masks: [n, mask_dim], n is number of masks after nms | ||||
|     bboxes: [n, 4], n is number of masks after nms | ||||
|     shape:input_image_size, (h, w) | ||||
|     return: h, w, n | ||||
|     """ | ||||
|     > It takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher | ||||
|     quality but is slower. | ||||
|  | ||||
|     Args: | ||||
|       protos: [mask_dim, mask_h, mask_w] | ||||
|       masks_in: [n, mask_dim], n is number of masks after nms | ||||
|       bboxes: [n, 4], n is number of masks after nms | ||||
|       shape: the size of the input image | ||||
|  | ||||
|     Returns: | ||||
|       mask | ||||
|     """ | ||||
|     c, mh, mw = protos.shape  # CHW | ||||
|     masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) | ||||
|     masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW | ||||
| @ -394,12 +539,17 @@ def process_mask_upsample(protos, masks_in, bboxes, shape): | ||||
|  | ||||
| def process_mask(protos, masks_in, bboxes, shape, upsample=False): | ||||
|     """ | ||||
|     Crop before upsample. | ||||
|     proto_out: [mask_dim, mask_h, mask_w] | ||||
|     out_masks: [n, mask_dim], n is number of masks after nms | ||||
|     bboxes: [n, 4], n is number of masks after nms | ||||
|     shape:input_image_size, (h, w) | ||||
|     return: h, w, n | ||||
|     > It takes the output of the mask head, and applies the mask to the bounding boxes. This is faster but produces | ||||
|     downsampled quality of mask | ||||
|  | ||||
|     Args: | ||||
|       protos: [mask_dim, mask_h, mask_w] | ||||
|       masks_in: [n, mask_dim], n is number of masks after nms | ||||
|       bboxes: [n, 4], n is number of masks after nms | ||||
|       shape: the size of the input image | ||||
|  | ||||
|     Returns: | ||||
|       mask | ||||
|     """ | ||||
|  | ||||
|     c, mh, mw = protos.shape  # CHW | ||||
| @ -420,12 +570,16 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False): | ||||
|  | ||||
| def process_mask_native(protos, masks_in, bboxes, shape): | ||||
|     """ | ||||
|     Crop after upsample. | ||||
|     protos: [mask_dim, mask_h, mask_w] | ||||
|     masks_in: [n, mask_dim], n is number of masks after nms | ||||
|     bboxes: [n, 4], n is number of masks after nms | ||||
|     shape: input_image_size, (h, w) | ||||
|     return: h, w, n | ||||
|     > It takes the output of the mask head, and crops it after upsampling to the bounding boxes. | ||||
|  | ||||
|     Args: | ||||
|       protos: [mask_dim, mask_h, mask_w] | ||||
|       masks_in: [n, mask_dim], n is number of masks after nms | ||||
|       bboxes: [n, 4], n is number of masks after nms | ||||
|       shape: input_image_size, (h, w) | ||||
|  | ||||
|     Returns: | ||||
|       masks: [h, w, n] | ||||
|     """ | ||||
|     c, mh, mw = protos.shape  # CHW | ||||
|     masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) | ||||
| @ -441,7 +595,19 @@ def process_mask_native(protos, masks_in, bboxes, shape): | ||||
|  | ||||
|  | ||||
| def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False): | ||||
|     # Rescale coords (xyxy) from img1_shape to img0_shape | ||||
|     """ | ||||
|     > Rescale segment coords (xyxy) from img1_shape to img0_shape | ||||
|  | ||||
|     Args: | ||||
|       img1_shape: The shape of the image that the segments are from. | ||||
|       segments: the segments to be scaled | ||||
|       img0_shape: the shape of the image that the segmentation is being applied to | ||||
|       ratio_pad: the ratio of the image size to the padded image size. | ||||
|       normalize: If True, the coordinates will be normalized to the range [0, 1]. Defaults to False | ||||
|  | ||||
|     Returns: | ||||
|       the segmented image. | ||||
|     """ | ||||
|     if ratio_pad is None:  # calculate from img0_shape | ||||
|         gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new | ||||
|         pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding | ||||
| @ -460,7 +626,16 @@ def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=F | ||||
|  | ||||
|  | ||||
| def masks2segments(masks, strategy='largest'): | ||||
|     # Convert masks(n,160,160) into segments(n,xy) | ||||
|     """ | ||||
|     > It takes a list of masks(n,h,w) and returns a list of segments(n,xy) | ||||
|  | ||||
|     Args: | ||||
|       masks: the output of the model, which is a tensor of shape (batch_size, 160, 160) | ||||
|       strategy: 'concat' or 'largest'. Defaults to largest | ||||
|  | ||||
|     Returns: | ||||
|       segments (List): list of segment masks | ||||
|     """ | ||||
|     segments = [] | ||||
|     for x in masks.int().cpu().numpy().astype('uint8'): | ||||
|         c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] | ||||
| @ -476,7 +651,14 @@ def masks2segments(masks, strategy='largest'): | ||||
|  | ||||
|  | ||||
| def clip_segments(segments, shape): | ||||
|     # Clip segments (xy1,xy2,...) to image shape (height, width) | ||||
|     """ | ||||
|     > It takes a list of line segments (x1,y1,x2,y2) and clips them to the image shape (height, width) | ||||
|  | ||||
|     Args: | ||||
|       segments: a list of segments, each segment is a list of points, each point is a list of x,y | ||||
|     coordinates | ||||
|       shape: the shape of the image | ||||
|     """ | ||||
|     if isinstance(segments, torch.Tensor):  # faster individually | ||||
|         segments[:, 0].clamp_(0, shape[1])  # x | ||||
|         segments[:, 1].clamp_(0, shape[0])  # y | ||||
|  | ||||
		Reference in New Issue
	
	Block a user