From 8996c5c6cf6470109bed60573ac7edc75de9f4c4 Mon Sep 17 00:00:00 2001
From: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Date: Mon, 2 Jan 2023 20:42:30 +0530
Subject: [PATCH] [Docs]: Link buttons, add autobackend, BaseModel and ops
 (#130)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
---
 README.md                     |   5 +-
 docs/quickstart.md            |   9 +-
 docs/reference/nn.md          |  15 ++
 docs/reference/ops.md         | 162 +++++++++++++++++++
 docs/sdk.md                   |  15 +-
 mkdocs.yml                    |   7 +-
 ultralytics/__init__.py       |   1 +
 ultralytics/nn/autobackend.py |  78 +++++++--
 ultralytics/nn/tasks.py       |  72 ++++++++-
 ultralytics/yolo/utils/ops.py | 294 +++++++++++++++++++++++++++-------
 10 files changed, 562 insertions(+), 96 deletions(-)
 create mode 100644 docs/reference/nn.md
 create mode 100644 docs/reference/ops.md

diff --git a/README.md b/README.md
index 1dcfe92..d4993b6 100644
--- a/README.md
+++ b/README.md
@@ -34,11 +34,10 @@ To use pythonic interface of Ultralytics YOLO model
 ```python
 from ultralytics import YOLO
 
-model = YOLO.new("yolov8n.yaml")  # create a new model from scratch
-model = YOLO.load(
+model = YOLO("yolov8n.yaml")  # create a new model from scratch
+model = YOLO(
     "yolov8n.pt"
 )  # load a pretrained model (recommended for best training results)
-
 results = model.train(data="coco128.yaml", epochs=100, imgsz=640, ...)
 results = model.val()
 results = model.predict(source="bus.jpg")
diff --git a/docs/quickstart.md b/docs/quickstart.md
index 42d0f2a..d73ac8a 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -33,7 +33,7 @@ CLI requires no customization or code. You can simply run all tasks from the ter
         ```bash
         yolo task=detect mode=train model=s.yaml device=\'0,1,2,3\'
         ```
-[CLI Guide](#){ .md-button .md-button--primary}
+[CLI Guide](cli.md){ .md-button .md-button--primary}
 
 ## Python API
 Ultralytics YOLO comes with pythonic Model and Trainer interface. 
@@ -42,10 +42,9 @@ Ultralytics YOLO comes with pythonic Model and Trainer interface.
     import ultralytics
     from ultralytics import YOLO
 
-    model = YOLO()
-    model.new("s-seg.yaml") # automatically detects task type
-    model.load("s-seg.pt") # load checkpoint
+    model = YOLO("s-seg.yaml") # automatically detects task type
+    model = YOLO("s-seg.pt") # load checkpoint
     model.train(data="coco128-segments", epochs=1, lr0=0.01, ...)
     model.train(data="coco128-segments", epochs=1, lr0=0.01, device="0,1,2,3") # DDP mode
     ```
-[API Guide](#){ .md-button .md-button--primary}
+[API Guide](sdk.md){ .md-button .md-button--primary}
diff --git a/docs/reference/nn.md b/docs/reference/nn.md
new file mode 100644
index 0000000..8a66fce
--- /dev/null
+++ b/docs/reference/nn.md
@@ -0,0 +1,15 @@
+# nn Module
+Ultralytics nn module contains 3 main components:
+
+1. **AutoBackend**: A module that can run inference on all popular model formats
+2. **BaseModel**: `BaseModel` class defines the operations supported by tasks like Detection and Segmentation
+3. **modules**: Optimized and reusable neural network blocks built on PyTorch.
+
+## AutoBackend
+:::ultralytics.nn.autobackend.AutoBackend
+
+## BaseModel
+:::ultralytics.nn.tasks.BaseModel
+
+## Modules
+TODO
\ No newline at end of file
diff --git a/docs/reference/ops.md b/docs/reference/ops.md
new file mode 100644
index 0000000..ed85005
--- /dev/null
+++ b/docs/reference/ops.md
@@ -0,0 +1,162 @@
+This module contains optimized deep learning related operations used in the Ultralytics YOLO framework
+## Non-max suppression
+:::ultralytics.ops.non_max_suppression
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## Scale boxes
+:::ultralytics.ops.scale_boxes
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## Scale image
+:::ultralytics.ops.scale_image
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## clip boxes
+:::ultralytics.ops.clip_boxes
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+# Box Format Conversion
+## xyxy2xywh
+:::ultralytics.ops.xyxy2xywh
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## xywh2xyxy
+:::ultralytics.ops.xywh2xyxy
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## xywhn2xyxy
+:::ultralytics.ops.xywhn2xyxy
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## xyxy2xywhn
+:::ultralytics.ops.xyxy2xywhn
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## xyn2xy
+:::ultralytics.ops.xyn2xy
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## xywh2ltwh
+:::ultralytics.ops.xywh2ltwh
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## xyxy2ltwh
+:::ultralytics.ops.xyxy2ltwh
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## ltwh2xywh
+:::ultralytics.ops.ltwh2xywh
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## ltwh2xyxy
+:::ultralytics.ops.ltwh2xyxy
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## segment2box
+:::ultralytics.ops.segment2box
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+# Mask Operations
+## resample_segments
+:::ultralytics.ops.resample_segments
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## crop_mask
+:::ultralytics.ops.crop_mask
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## process_mask_upsample
+:::ultralytics.ops.process_mask_upsample
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## process_mask
+:::ultralytics.ops.process_mask
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## process_mask_native
+:::ultralytics.ops.process_mask_native
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## scale_segments
+:::ultralytics.ops.scale_segments
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## masks2segments
+:::ultralytics.ops.masks2segments
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+## clip_segments
+:::ultralytics.ops.clip_segments
+    handler: python
+    options:
+        show_source: false
+        show_root_toc_entry: false
+---
+
+
+
+
+
diff --git a/docs/sdk.md b/docs/sdk.md
index 148f9fd..f858d8c 100644
--- a/docs/sdk.md
+++ b/docs/sdk.md
@@ -6,8 +6,7 @@ This is the simplest way of simply using yolo models in a python environment. It
         ```python
         from ultralytics import YOLO
 
-        model = YOLO()
-        model.new("n.yaml") # pass any model type
+        model = YOLO("yolov8n.yaml")
         model(img_tensor) # Or model.forward(). inference.
         model.train(data="coco128.yaml", epochs=5)
         ```
@@ -16,10 +15,9 @@ This is the simplest way of simply using yolo models in a python environment. It
         ```python
         from ultralytics import YOLO
 
-        model = YOLO()
-        model.load("n.pt") # pass any model type
+        model = YOLO("yolov8n.pt") # pass any model type
         model(...) # inference
-        model.train(data="coco128.yaml", epochs=5)
+        model.train(epochs=5)
         ```
 
     === "Resume Training"
@@ -35,8 +33,7 @@ This is the simplest way of simply using yolo models in a python environment. It
     ```python
     from ultralytics import YOLO
 
-    model = YOLO()
-    model.load("model.pt")
+    model = YOLO("model.pt")
     model.predict(source="0") # accepts all formats - img/folder/vid.*(mp4/format). 0 for webcam
     model.predict(source="folder", view_img=True) # Display preds. Accepts all yolo predict arguments
 
@@ -48,7 +45,7 @@ This is the simplest way of simply using yolo models in a python environment. It
         ```python
         from ultralytics import YOLO
 
-        model = YOLO()
+        model = YOLO("model.pt")
         model.fuse()  
         model.info(verbose=True)  # Print model information
         model.export(format=)  # TODO: 
@@ -61,7 +58,7 @@ This is the simplest way of simply using yolo models in a python environment. It
 
 To know more about using `YOLO` models, refer Model class refernce
 
-[Model reference](#){ .md-button .md-button--primary}
+[Model reference](reference/model.md){ .md-button .md-button--primary}
 
 ---
 ### Customizing Tasks with Trainers
diff --git a/mkdocs.yml b/mkdocs.yml
index 9e42752..764e08d 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -51,6 +51,7 @@ markdown_extensions:
   - pymdownx.superfences
   - tables
   - attr_list
+  - def_list
   # Syntax highlight
   - pymdownx.highlight:
       anchor_linenums: true
@@ -84,14 +85,16 @@ nav:
       - Detection: tasks/detection.md
       - Segmentation: tasks/segmentation.md
       - Classification: tasks/classification.md
-  - Customization Tutorials:
+  - Advanced Tutorials:
       - Customize Trainer: customize/train.md
       - Customize Validator: customize/val.md
       - Customize Predictor: customize/predict.md
   - Reference:
-      - YOLO Models: reference/model.md
+      - Python Model interface: reference/model.md
       - Engine:
           - Trainer: reference/base_trainer.md
           - Validator: reference/base_val.md
           - Predictor: reference/base_pred.md
           - Exporter: reference/exporter.md
+      - nn Module: reference/nn.md
+      - operations: reference/ops.md
diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index 8ca0777..46aaedc 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -1,5 +1,6 @@
 __version__ = "8.0.0.dev0"
 
 from ultralytics.yolo.engine.model import YOLO
+from ultralytics.yolo.utils import ops
 
 __all__ = ["__version__", "YOLO", "hub"]  # allow simpler import
diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py
index d4ad519..0b50e52 100644
--- a/ultralytics/nn/autobackend.py
+++ b/ultralytics/nn/autobackend.py
@@ -17,22 +17,36 @@ from ultralytics.yolo.utils.ops import xywh2xyxy
 
 
 class AutoBackend(nn.Module):
-    # YOLOv5 MultiBackend class for python inference on various backends
-    def __init__(self, weights='yolov8n.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
-        # Usage:
-        #   PyTorch:              weights = *.pt
-        #   TorchScript:                    *.torchscript
-        #   ONNX Runtime:                   *.onnx
-        #   ONNX OpenCV DNN:                *.onnx --dnn
-        #   OpenVINO:                       *.xml
-        #   CoreML:                         *.mlmodel
-        #   TensorRT:                       *.engine
-        #   TensorFlow SavedModel:          *_saved_model
-        #   TensorFlow GraphDef:            *.pb
-        #   TensorFlow Lite:                *.tflite
-        #   TensorFlow Edge TPU:            *_edgetpu.tflite
-        #   PaddlePaddle:                   *_paddle_model
 
+    def __init__(self, weights='yolov8n.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
+        """
+        Ultralytics YOLO MultiBackend class for python inference on various backends
+
+        Args:
+          weights: the path to the weights file. Defaults to yolov8n.pt
+          device: The device to run the model on.
+          dnn: If you want to use OpenCV's DNN module to run the inference, set this to True. Defaults to
+        False
+          data: a dictionary containing the following keys:
+          fp16: If true, will use half precision. Defaults to False
+          fuse: whether to fuse the model or not. Defaults to True
+
+        Supported format and their usage:
+            | Platform              | weights          |
+            |-----------------------|------------------|
+            | PyTorch               | *.pt             |
+            | TorchScript           | *.torchscript    |
+            | ONNX Runtime          | *.onnx           |
+            | ONNX OpenCV DNN       | *.onnx --dnn     |
+            | OpenVINO              | *.xml            |
+            | CoreML                | *.mlmodel        |
+            | TensorRT              | *.engine         |
+            | TensorFlow SavedModel | *_saved_model    |
+            | TensorFlow GraphDef   | *.pb             |
+            | TensorFlow Lite       | *.tflite         |
+            | TensorFlow Edge TPU   | *_edgetpu.tflite |
+            | PaddlePaddle          | *_paddle_model   |
+        """
         super().__init__()
         w = str(weights[0] if isinstance(weights, list) else weights)
         nn_module = isinstance(weights, torch.nn.Module)
@@ -215,6 +229,15 @@ class AutoBackend(nn.Module):
         self.__dict__.update(locals())  # assign all variables to self
 
     def forward(self, im, augment=False, visualize=False):
+        """
+        Runs inference on the given model
+
+        Args:
+          im: the image tensor
+          augment: whether to augment the image. Defaults to False
+          visualize: if True, then the network will output the feature maps of the last convolutional layer.
+        Defaults to False
+        """
         # YOLOv5 MultiBackend inference
         b, ch, h, w = im.shape  # batch, channel, height, width
         if self.fp16 and im.dtype != torch.float16:
@@ -297,10 +320,21 @@ class AutoBackend(nn.Module):
             return self.from_numpy(y)
 
     def from_numpy(self, x):
+        """
+        `from_numpy` converts a numpy array to a tensor
+
+        Args:
+          x: the numpy array to convert
+        """
         return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
 
     def warmup(self, imgsz=(1, 3, 640, 640)):
-        # Warmup model by running inference once
+        """
+        Warmup model by running inference once
+
+        Args:
+          imgsz: the size of the image you want to run inference on.
+        """
         warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module
         if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
             im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device)  # input
@@ -309,6 +343,12 @@ class AutoBackend(nn.Module):
 
     @staticmethod
     def _model_type(p='path/to/model.pt'):
+        """
+        This function takes a path to a model file and returns the model type
+
+        Args:
+          p: path to the model file. Defaults to path/to/model.pt
+        """
         # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
         # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
         from ultralytics.yolo.engine.exporter import export_formats
@@ -323,6 +363,12 @@ class AutoBackend(nn.Module):
 
     @staticmethod
     def _load_metadata(f=Path('path/to/meta.yaml')):
+        """
+        > Loads the metadata from a yaml file
+
+        Args:
+          f: The path to the metadata file.
+        """
         from ultralytics.yolo.utils.files import yaml_load
 
         # Load metadata from meta.yaml if it exists
diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py
index 7edbb53..afe0bfa 100644
--- a/ultralytics/nn/tasks.py
+++ b/ultralytics/nn/tasks.py
@@ -17,11 +17,36 @@ from ultralytics.yolo.utils.torch_utils import (fuse_conv_and_bn, initialize_wei
 
 
 class BaseModel(nn.Module):
-    # YOLOv5 base model
+    '''
+     The BaseModel class is a base class for all the models in the Ultralytics YOLO family.
+    '''
+
     def forward(self, x, profile=False, visualize=False):
-        return self._forward_once(x, profile, visualize)  # single-scale inference, train
+        """
+        > `forward` is a wrapper for `_forward_once` that runs the model on a single scale
+
+        Args:
+          x: the input image
+          profile: whether to profile the model. Defaults to False
+          visualize: if True, will return the intermediate feature maps. Defaults to False
+
+        Returns:
+          The output of the network.
+        """
+        return self._forward_once(x, profile, visualize)
 
     def _forward_once(self, x, profile=False, visualize=False):
+        """
+        > Forward pass of the network
+
+        Args:
+          x: input to the model
+          profile: if True, the time taken for each layer will be printed. Defaults to False
+          visualize: If True, it will save the feature maps of the model. Defaults to False
+
+        Returns:
+          The last layer of the model.
+        """
         y, dt = [], []  # outputs
         for m in self.model:
             if m.f != -1:  # if not from previous layer
@@ -36,6 +61,15 @@ class BaseModel(nn.Module):
         return x
 
     def _profile_one_layer(self, m, x, dt):
+        """
+        It takes a model, an input, and a list of times, and it profiles the model on the input, appending
+        the time to the list
+
+        Args:
+          m: the model
+          x: the input image
+          dt: list of time taken for each layer
+        """
         c = m == self.model[-1]  # is final layer, copy input as inplace fix
         o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPs
         t = time_sync()
@@ -48,7 +82,13 @@ class BaseModel(nn.Module):
         if c:
             LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s}  Total")
 
-    def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
+    def fuse(self):
+        """
+        > It takes a model and fuses the Conv2d() and BatchNorm2d() layers into a single layer
+
+        Returns:
+          The model is being returned.
+        """
         LOGGER.info('Fusing layers... ')
         for m in self.model.modules():
             if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
@@ -58,11 +98,27 @@ class BaseModel(nn.Module):
         self.info()
         return self
 
-    def info(self, verbose=False, imgsz=640):  # print model information
+    def info(self, verbose=False, imgsz=640):
+        """
+        Prints model information
+
+        Args:
+          verbose: if True, prints out the model information. Defaults to False
+          imgsz: the size of the image that the model will be trained on. Defaults to 640
+        """
         model_info(self, verbose, imgsz)
 
     def _apply(self, fn):
-        # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
+        """
+        `_apply()` is a function that applies a function to all the tensors in the model that are not
+        parameters or registered buffers
+
+        Args:
+          fn: the function to apply to the model
+
+        Returns:
+          A model that is a Detect() object.
+        """
         self = super()._apply(fn)
         m = self.model[-1]  # Detect()
         if isinstance(m, (Detect, Segment)):
@@ -72,6 +128,12 @@ class BaseModel(nn.Module):
         return self
 
     def load(self, weights):
+        """
+        > This function loads the weights of the model from a file
+
+        Args:
+          weights: The weights to load into the model.
+        """
         # Force all tasks to implement this function
         raise NotImplementedError("This function needs to be implemented by derived classes!")
 
diff --git a/ultralytics/yolo/utils/ops.py b/ultralytics/yolo/utils/ops.py
index 41b0db0..edb32b0 100644
--- a/ultralytics/yolo/utils/ops.py
+++ b/ultralytics/yolo/utils/ops.py
@@ -47,6 +47,17 @@ def coco80_to_coco91_class():  # converts 80-index (val2014) to 91-index (paper)
 
 
 def segment2box(segment, width=640, height=640):
+    """
+    > Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to
+    (xyxy)
+    Args:
+      segment: the segment label
+      width: the width of the image. Defaults to 640
+      height: The height of the image. Defaults to 640
+
+    Returns:
+      the minimum and maximum x and y values of the segment.
+    """
     # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
     x, y = segment.T  # segment xy
     inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
@@ -55,7 +66,18 @@ def segment2box(segment, width=640, height=640):
 
 
 def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
-    # Rescale boxes (xyxy) from img1_shape to img0_shape
+    """
+    > Rescale boxes (xyxy) from img1_shape to img0_shape
+    Args:
+      img1_shape: The shape of the image that the bounding boxes are for.
+      boxes: the bounding boxes of the objects in the image
+      img0_shape: the shape of the original image
+      ratio_pad: a tuple of (ratio, pad)
+
+    Returns:
+      The boxes are being returned.
+    """
+    #
     if ratio_pad is None:  # calculate from img0_shape
         gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
         pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
@@ -70,18 +92,6 @@ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
     return boxes
 
 
-def clip_boxes(boxes, shape):
-    # Clip boxes (xyxy) to image shape (height, width)
-    if isinstance(boxes, torch.Tensor):  # faster individually
-        boxes[..., 0].clamp_(0, shape[1])  # x1
-        boxes[..., 1].clamp_(0, shape[0])  # y1
-        boxes[..., 2].clamp_(0, shape[1])  # x2
-        boxes[..., 3].clamp_(0, shape[0])  # y2
-    else:  # np.array (faster grouped)
-        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
-        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2
-
-
 def make_divisible(x, divisor):
     # Returns nearest x divisible by divisor
     if isinstance(divisor, torch.Tensor):
@@ -101,7 +111,7 @@ def non_max_suppression(
         nm=0,  # number of masks
 ):
     """
-    Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
+    > Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
 
     Arguments:
         prediction (torch.Tensor): A tensor of shape (batch_size, num_boxes, num_classes + 4 + num_masks)
@@ -217,6 +227,25 @@ def non_max_suppression(
     return output
 
 
+def clip_boxes(boxes, shape):
+    """
+    > It takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the
+    shape
+
+    Args:
+      boxes: the bounding boxes to clip
+      shape: the shape of the image
+    """
+    if isinstance(boxes, torch.Tensor):  # faster individually
+        boxes[..., 0].clamp_(0, shape[1])  # x1
+        boxes[..., 1].clamp_(0, shape[0])  # y1
+        boxes[..., 2].clamp_(0, shape[1])  # x2
+        boxes[..., 3].clamp_(0, shape[0])  # y2
+    else:  # np.array (faster grouped)
+        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
+        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2
+
+
 def clip_coords(boxes, shape):
     # Clip bounding xyxy bounding boxes to image shape (height, width)
     if isinstance(boxes, torch.Tensor):  # faster individually
@@ -231,9 +260,16 @@ def clip_coords(boxes, shape):
 
 def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
     """
-    img1_shape: model input shape, [h, w]
-    img0_shape: origin pic shape, [h, w, 3]
-    masks: [h, w, num]
+    > It takes a mask, and resizes it to the original image size
+
+    Args:
+      im1_shape: model input shape, [h, w]
+      masks: [h, w, num]
+      im0_shape: the original image shape
+      ratio_pad: the ratio of the padding to the original image.
+
+    Returns:
+      The masks are being returned.
     """
     # Rescale coordinates (xyxy) from im1_shape to im0_shape
     if ratio_pad is None:  # calculate from im0_shape
@@ -258,7 +294,16 @@ def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
 
 
 def xyxy2xywh(x):
-    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
+    """
+    > It takes a list of bounding boxes, and converts them from the format [x1, y1, x2, y2] to [x, y, w,
+    h]  where xy1=top-left, xy2=bottom-right
+
+    Args:
+      x: the input tensor
+
+    Returns:
+      the center of the box, the width and the height of the box.
+    """
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
     y[..., 0] = (x[..., 0] + x[..., 2]) / 2  # x center
     y[..., 1] = (x[..., 1] + x[..., 3]) / 2  # y center
@@ -268,7 +313,15 @@ def xyxy2xywh(x):
 
 
 def xywh2xyxy(x):
-    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    """
+    > It converts the bounding box from x,y,w,h to x1,y1,x2,y2 where xy1=top-left, xy2=bottom-right
+
+    Args:
+      x: the input tensor
+
+    Returns:
+      the top left and bottom right coordinates of the bounding box.
+    """
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
     y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
     y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
@@ -278,7 +331,19 @@ def xywh2xyxy(x):
 
 
 def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
-    # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    """
+    > It converts the normalized coordinates to the actual coordinates [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+
+    Args:
+      x: the bounding box coordinates
+      w: width of the image. Defaults to 640
+      h: height of the image. Defaults to 640
+      padw: padding width. Defaults to 0
+      padh: height of the padding. Defaults to 0
+
+    Returns:
+      the xyxy coordinates of the bounding box.
+    """
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
     y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw  # top left x
     y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh  # top left y
@@ -288,7 +353,20 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
 
 
 def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
-    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
+    """
+    > It takes in a list of bounding boxes, and returns a list of bounding boxes, but with the x and y
+    coordinates normalized to the width and height of the image
+
+    Args:
+      x: the bounding box coordinates
+      w: width of the image. Defaults to 640
+      h: height of the image. Defaults to 640
+      clip: If True, the boxes will be clipped to the image boundaries. Defaults to False
+      eps: the minimum value of the box's width and height.
+
+    Returns:
+      the xywhn format of the bounding boxes.
+    """
     if clip:
         clip_boxes(x, (h - eps, w - eps))  # warning: inplace clip
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
@@ -300,7 +378,19 @@ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
 
 
 def xyn2xy(x, w=640, h=640, padw=0, padh=0):
-    # Convert normalized segments into pixel segments, shape (n,2)
+    """
+    > It converts normalized segments into pixel segments of shape (n,2)
+
+    Args:
+      x: the normalized coordinates of the bounding box
+      w: width of the image. Defaults to 640
+      h: height of the image. Defaults to 640
+      padw: padding width. Defaults to 0
+      padh: padding height. Defaults to 0
+
+    Returns:
+      the x and y coordinates of the top left corner of the bounding box.
+    """
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
     y[..., 0] = w * x[..., 0] + padw  # top left x
     y[..., 1] = h * x[..., 1] + padh  # top left y
@@ -308,7 +398,15 @@ def xyn2xy(x, w=640, h=640, padw=0, padh=0):
 
 
 def xywh2ltwh(x):
-    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, w, h] where xy1=top-left
+    """
+    > It converts the bounding box from [x, y, w, h] to [x1, y1, w, h] where xy1=top-left
+
+    Args:
+      x: the x coordinate of the center of the bounding box
+
+    Returns:
+      the top left x and y coordinates of the bounding box.
+    """
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
     y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
     y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
@@ -316,7 +414,15 @@ def xywh2ltwh(x):
 
 
 def xyxy2ltwh(x):
-    # Convert nx4 boxes from [x1, y1, x2, y2] to [x1, y1, w, h] where xy1=top-left, xy2=bottom-right
+    """
+    > Convert nx4 boxes from [x1, y1, x2, y2] to [x1, y1, w, h] where xy1=top-left, xy2=bottom-right
+
+    Args:
+      x: the input tensor
+
+    Returns:
+      the xyxy2ltwh function.
+    """
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
     y[:, 2] = x[:, 2] - x[:, 0]  # width
     y[:, 3] = x[:, 3] - x[:, 1]  # height
@@ -324,7 +430,12 @@ def xyxy2ltwh(x):
 
 
 def ltwh2xywh(x):
-    # Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center
+    """
+    > Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center
+
+    Args:
+      x: the input tensor
+    """
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
     y[:, 0] = x[:, 0] + x[:, 2] / 2  # center x
     y[:, 1] = x[:, 1] + x[:, 3] / 2  # center y
@@ -332,7 +443,16 @@ def ltwh2xywh(x):
 
 
 def ltwh2xyxy(x):
-    # Convert nx4 boxes from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    """
+    > It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left,
+    xy2=bottom-right
+
+    Args:
+      x: the input image
+
+    Returns:
+      the xyxy coordinates of the bounding boxes.
+    """
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
     y[:, 2] = x[:, 2] + x[:, 0]  # width
     y[:, 3] = x[:, 3] + x[:, 1]  # height
@@ -340,7 +460,16 @@ def ltwh2xyxy(x):
 
 
 def segments2boxes(segments):
-    # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
+    """
+    > It converts segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
+
+    Args:
+      segments: list of segments, each segment is a list of points, each point is a list of x, y
+    coordinates
+
+    Returns:
+      the xywh coordinates of the bounding boxes.
+    """
     boxes = []
     for s in segments:
         x, y = s.T  # segment xy
@@ -349,7 +478,17 @@ def segments2boxes(segments):
 
 
 def resample_segments(segments, n=1000):
-    # Up-sample an (n,2) segment
+    """
+    > It takes a list of segments (n,2) and returns a list of segments (n,2) where each segment has been
+    up-sampled to n points
+
+    Args:
+      segments: a list of (n,2) arrays, where n is the number of points in the segment.
+      n: number of points to resample the segment to. Defaults to 1000
+
+    Returns:
+      the resampled segments.
+    """
     for i, s in enumerate(segments):
         s = np.concatenate((s, s[0:1, :]), axis=0)
         x = np.linspace(0, len(s) - 1, n)
@@ -360,13 +499,15 @@ def resample_segments(segments, n=1000):
 
 def crop_mask(masks, boxes):
     """
-    "Crop" predicted masks by zeroing out everything not in the predicted bbox.
-    Vectorized by Chong (thanks Chong).
-    Args:
-        - masks should be a size [h, w, n] tensor of masks
-        - boxes should be a size [n, 4] tensor of bbox coords in relative point form
-    """
+    > It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box
 
+    Args:
+      masks: [h, w, n] tensor of masks
+      boxes: [n, 4] tensor of bbox coords in relative point form
+
+    Returns:
+      The masks are being cropped to the bounding box.
+    """
     n, h, w = masks.shape
     x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(1,1,n)
     r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # rows shape(1,w,1)
@@ -377,14 +518,18 @@ def crop_mask(masks, boxes):
 
 def process_mask_upsample(protos, masks_in, bboxes, shape):
     """
-    Crop after upsample.
-    proto_out: [mask_dim, mask_h, mask_w]
-    out_masks: [n, mask_dim], n is number of masks after nms
-    bboxes: [n, 4], n is number of masks after nms
-    shape:input_image_size, (h, w)
-    return: h, w, n
-    """
+    > It takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher
+    quality but is slower.
 
+    Args:
+      protos: [mask_dim, mask_h, mask_w]
+      masks_in: [n, mask_dim], n is number of masks after nms
+      bboxes: [n, 4], n is number of masks after nms
+      shape: the size of the input image
+
+    Returns:
+      mask
+    """
     c, mh, mw = protos.shape  # CHW
     masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
     masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
@@ -394,12 +539,17 @@ def process_mask_upsample(protos, masks_in, bboxes, shape):
 
 def process_mask(protos, masks_in, bboxes, shape, upsample=False):
     """
-    Crop before upsample.
-    proto_out: [mask_dim, mask_h, mask_w]
-    out_masks: [n, mask_dim], n is number of masks after nms
-    bboxes: [n, 4], n is number of masks after nms
-    shape:input_image_size, (h, w)
-    return: h, w, n
+    > It takes the output of the mask head, and applies the mask to the bounding boxes. This is faster but produces
+    downsampled quality of mask
+
+    Args:
+      protos: [mask_dim, mask_h, mask_w]
+      masks_in: [n, mask_dim], n is number of masks after nms
+      bboxes: [n, 4], n is number of masks after nms
+      shape: the size of the input image
+
+    Returns:
+      mask
     """
 
     c, mh, mw = protos.shape  # CHW
@@ -420,12 +570,16 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
 
 def process_mask_native(protos, masks_in, bboxes, shape):
     """
-    Crop after upsample.
-    protos: [mask_dim, mask_h, mask_w]
-    masks_in: [n, mask_dim], n is number of masks after nms
-    bboxes: [n, 4], n is number of masks after nms
-    shape: input_image_size, (h, w)
-    return: h, w, n
+    > It takes the output of the mask head, and crops it after upsampling to the bounding boxes.
+
+    Args:
+      protos: [mask_dim, mask_h, mask_w]
+      masks_in: [n, mask_dim], n is number of masks after nms
+      bboxes: [n, 4], n is number of masks after nms
+      shape: input_image_size, (h, w)
+
+    Returns:
+      masks: [h, w, n]
     """
     c, mh, mw = protos.shape  # CHW
     masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
@@ -441,7 +595,19 @@ def process_mask_native(protos, masks_in, bboxes, shape):
 
 
 def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False):
-    # Rescale coords (xyxy) from img1_shape to img0_shape
+    """
+    > Rescale segment coords (xyxy) from img1_shape to img0_shape
+
+    Args:
+      img1_shape: The shape of the image that the segments are from.
+      segments: the segments to be scaled
+      img0_shape: the shape of the image that the segmentation is being applied to
+      ratio_pad: the ratio of the image size to the padded image size.
+      normalize: If True, the coordinates will be normalized to the range [0, 1]. Defaults to False
+
+    Returns:
+      the segmented image.
+    """
     if ratio_pad is None:  # calculate from img0_shape
         gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
         pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
@@ -460,7 +626,16 @@ def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=F
 
 
 def masks2segments(masks, strategy='largest'):
-    # Convert masks(n,160,160) into segments(n,xy)
+    """
+    > It takes a list of masks(n,h,w) and returns a list of segments(n,xy)
+
+    Args:
+      masks: the output of the model, which is a tensor of shape (batch_size, 160, 160)
+      strategy: 'concat' or 'largest'. Defaults to largest
+
+    Returns:
+      segments (List): list of segment masks
+    """
     segments = []
     for x in masks.int().cpu().numpy().astype('uint8'):
         c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
@@ -476,7 +651,14 @@ def masks2segments(masks, strategy='largest'):
 
 
 def clip_segments(segments, shape):
-    # Clip segments (xy1,xy2,...) to image shape (height, width)
+    """
+    > It takes a list of line segments (x1,y1,x2,y2) and clips them to the image shape (height, width)
+
+    Args:
+      segments: a list of segments, each segment is a list of points, each point is a list of x,y
+    coordinates
+      shape: the shape of the image
+    """
     if isinstance(segments, torch.Tensor):  # faster individually
         segments[:, 0].clamp_(0, shape[1])  # x
         segments[:, 1].clamp_(0, shape[0])  # y