README and Docs updates with A100 TensorRT times (#270)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2023-01-11 21:54:41 +01:00
parent 216cf2ddb6
commit e18ae9d8e1
10 changed files with 250 additions and 241 deletions
--- a/ultralytics/yolo/engine/trainer.py
+++ b/ultralytics/yolo/engine/trainer.py
@ -84,6 +84,7 @@ class BaseTrainer:
        if overrides is None:
            overrides = {}
        self.args = get_config(config, overrides)
+        self.device = utils.torch_utils.select_device(self.args.device, self.args.batch)
        self.check_resume()
        self.console = LOGGER
        self.validator = None
@ -113,7 +114,6 @@ class BaseTrainer:
            print_args(dict(self.args))

        # Device
-        self.device = utils.torch_utils.select_device(self.args.device, self.batch_size)
        self.amp = self.device.type != 'cpu'
        self.scaler = amp.GradScaler(enabled=self.amp)
        if self.device.type == 'cpu':
@ -164,7 +164,15 @@ class BaseTrainer:
            callback(self)

    def train(self):
-        world_size = torch.cuda.device_count()
+        # Allow device='', device=None on Multi-GPU systems to default to device=0
+        if isinstance(self.args.device, int) or self.args.device:  # i.e. device=0 or device=[0,1,2,3]
+            world_size = torch.cuda.device_count()
+        elif torch.cuda.is_available():  # i.e. device=None or device=''
+            world_size = 1  # default to device 0
+        else:  # i.e. device='cpu' or 'mps'
+            world_size = 0
+
+        # Run subprocess if DDP training, else train normally
        if world_size > 1 and "LOCAL_RANK" not in os.environ:
            command = generate_ddp_command(world_size, self)
            try:
--- a/ultralytics/yolo/utils/ops.py
+++ b/ultralytics/yolo/utils/ops.py
@ -1,5 +1,3 @@
-# Ultralytics YOLO 🚀, GPL-3.0 license
-
 import contextlib
 import math
 import re
@ -50,15 +48,15 @@ def coco80_to_coco91_class():  # converts 80-index (val2014) to 91-index (paper)

 def segment2box(segment, width=640, height=640):
    """
-    > Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to
+    Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to
    (xyxy)
    Args:
-      segment: the segment label
-      width: the width of the image. Defaults to 640
-      height: The height of the image. Defaults to 640
+      segment (torch.tensor): the segment label
+      width (int): the width of the image. Defaults to 640
+      height (int): The height of the image. Defaults to 640

    Returns:
-      the minimum and maximum x and y values of the segment.
+      (np.array): the minimum and maximum x and y values of the segment.
    """
    # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
    x, y = segment.T  # segment xy
@ -69,17 +67,16 @@ def segment2box(segment, width=640, height=640):

 def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
    """
-    > Rescale boxes (xyxy) from img1_shape to img0_shape
+    Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in (img1_shape) to the shape of a different image (img0_shape).
    Args:
-      img1_shape: The shape of the image that the bounding boxes are for.
-      boxes: the bounding boxes of the objects in the image
-      img0_shape: the shape of the original image
-      ratio_pad: a tuple of (ratio, pad)
+      img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
+      boxes (torch.tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)
+      img0_shape (tuple): the shape of the target image, in the format of (height, width).
+      ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be calculated based on the size difference between the two images.

    Returns:
-      The boxes are being returned.
+      boxes (torch.tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
    """
-    #
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
@ -113,7 +110,7 @@ def non_max_suppression(
        nm=0,  # number of masks
 ):
    """
-    > Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
+    Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.

    Arguments:
        prediction (torch.Tensor): A tensor of shape (batch_size, num_boxes, num_classes + 4 + num_masks)
@ -134,7 +131,7 @@ def non_max_suppression(
        nm (int): The number of masks output by the model.

    Returns:
-        List[torch.Tensor]: A list of length batch_size, where each element is a tensor of
+        (List[torch.Tensor]): A list of length batch_size, where each element is a tensor of
            shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns
            (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
    """
@ -231,12 +228,12 @@ def non_max_suppression(

 def clip_boxes(boxes, shape):
    """
-    > It takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the
+    It takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the
    shape

    Args:
-      boxes: the bounding boxes to clip
-      shape: the shape of the image
+      boxes (torch.tensor): the bounding boxes to clip
+      shape (tuple): the shape of the image
    """
    if isinstance(boxes, torch.Tensor):  # faster individually
        boxes[..., 0].clamp_(0, shape[1])  # x1
@ -262,16 +259,16 @@ def clip_coords(boxes, shape):

 def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
    """
-    > It takes a mask, and resizes it to the original image size
+    Takes a mask, and resizes it to the original image size

    Args:
-      im1_shape: model input shape, [h, w]
-      masks: [h, w, num]
-      im0_shape: the original image shape
-      ratio_pad: the ratio of the padding to the original image.
+      im1_shape (tuple): model input shape, [h, w]
+      masks (torch.tensor): [h, w, num]
+      im0_shape (tuple): the original image shape
+      ratio_pad (tuple): the ratio of the padding to the original image.

    Returns:
-      The masks are being returned.
+      masks (torch.tensor): The masks that are being returned.
    """
    # Rescale coordinates (xyxy) from im1_shape to im0_shape
    if ratio_pad is None:  # calculate from im0_shape
@ -297,14 +294,12 @@ def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):

 def xyxy2xywh(x):
    """
-    > It takes a list of bounding boxes, and converts them from the format [x1, y1, x2, y2] to [x, y, w,
-    h]  where xy1=top-left, xy2=bottom-right
+    Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format.

    Args:
-      x: the input tensor
-
+        x (np.ndarray) or (torch.Tensor): The input tensor containing the bounding box coordinates in (x1, y1, x2, y2) format.
    Returns:
-      the center of the box, the width and the height of the box.
+       y (numpy.ndarray) or (torch.Tensor): The bounding box coordinates in (x, y, width, height) format.
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = (x[..., 0] + x[..., 2]) / 2  # x center
@ -316,13 +311,12 @@ def xyxy2xywh(x):

 def xywh2xyxy(x):
    """
-    > It converts the bounding box from x,y,w,h to x1,y1,x2,y2 where xy1=top-left, xy2=bottom-right
+    Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the top-left corner and (x2, y2) is the bottom-right corner.

    Args:
-      x: the input tensor
-
+        x (np.ndarray) or (torch.Tensor): The input tensor containing the bounding box coordinates in (x, y, width, height) format.
    Returns:
-      the top left and bottom right coordinates of the bounding box.
+        y (numpy.ndarray) or (torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
@ -334,17 +328,16 @@ def xywh2xyxy(x):

 def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
    """
-    > It converts the normalized coordinates to the actual coordinates [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    Convert normalized bounding box coordinates to pixel coordinates.

    Args:
-      x: the bounding box coordinates
-      w: width of the image. Defaults to 640
-      h: height of the image. Defaults to 640
-      padw: padding width. Defaults to 0
-      padh: height of the padding. Defaults to 0
-
+        x (np.ndarray) or (torch.Tensor): The bounding box coordinates.
+        w (int): Width of the image. Defaults to 640
+        h (int): Height of the image. Defaults to 640
+        padw (int): Padding width. Defaults to 0
+        padh (int): Padding height. Defaults to 0
    Returns:
-      the xyxy coordinates of the bounding box.
+        y (numpy.ndarray) or (torch.Tensor): The coordinates of the bounding box in the format [x1, y1, x2, y2] where x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box.
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw  # top left x
@ -356,18 +349,16 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):

 def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
    """
-    > It takes in a list of bounding boxes, and returns a list of bounding boxes, but with the x and y
-    coordinates normalized to the width and height of the image
+    Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y, width and height are normalized to image dimensions

    Args:
-      x: the bounding box coordinates
-      w: width of the image. Defaults to 640
-      h: height of the image. Defaults to 640
-      clip: If True, the boxes will be clipped to the image boundaries. Defaults to False
-      eps: the minimum value of the box's width and height.
-
+        x (np.ndarray) or (torch.Tensor): The input tensor containing the bounding box coordinates in (x1, y1, x2, y2) format.
+        w (int): The width of the image. Defaults to 640
+        h (int): The height of the image. Defaults to 640
+        clip (bool): If True, the boxes will be clipped to the image boundaries. Defaults to False
+        eps (float): The minimum value of the box's width and height. Defaults to 0.0
    Returns:
-      the xywhn format of the bounding boxes.
+        y (numpy.ndarray) or (torch.Tensor): The bounding box coordinates in (x, y, width, height, normalized) format
    """
    if clip:
        clip_boxes(x, (h - eps, w - eps))  # warning: inplace clip
@ -381,17 +372,16 @@ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):

 def xyn2xy(x, w=640, h=640, padw=0, padh=0):
    """
-    > It converts normalized segments into pixel segments of shape (n,2)
+    Convert normalized coordinates to pixel coordinates of shape (n,2)

    Args:
-      x: the normalized coordinates of the bounding box
-      w: width of the image. Defaults to 640
-      h: height of the image. Defaults to 640
-      padw: padding width. Defaults to 0
-      padh: padding height. Defaults to 0
-
+        x (numpy.ndarray) or (torch.Tensor): The input tensor of normalized bounding box coordinates
+        w (int): The width of the image. Defaults to 640
+        h (int): The height of the image. Defaults to 640
+        padw (int): The width of the padding. Defaults to 0
+        padh (int): The height of the padding. Defaults to 0
    Returns:
-      the x and y coordinates of the top left corner of the bounding box.
+        y (numpy.ndarray) or (torch.Tensor): The x and y coordinates of the top left corner of the bounding box
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = w * x[..., 0] + padw  # top left x
@ -401,13 +391,12 @@ def xyn2xy(x, w=640, h=640, padw=0, padh=0):

 def xywh2ltwh(x):
    """
-    > It converts the bounding box from [x, y, w, h] to [x1, y1, w, h] where xy1=top-left
+    Convert the bounding box format from [x, y, w, h] to [x1, y1, w, h], where x1, y1 are the top-left coordinates.

    Args:
-      x: the x coordinate of the center of the bounding box
-
+        x (numpy.ndarray) or (torch.Tensor): The input tensor with the bounding box coordinates in the xywh format
    Returns:
-      the top left x and y coordinates of the bounding box.
+        y (numpy.ndarray) or (torch.Tensor): The bounding box coordinates in the xyltwh format
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
@ -417,13 +406,12 @@ def xywh2ltwh(x):

 def xyxy2ltwh(x):
    """
-    > Convert nx4 boxes from [x1, y1, x2, y2] to [x1, y1, w, h] where xy1=top-left, xy2=bottom-right
+    Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right

    Args:
-      x: the input tensor
-
+      x (numpy.ndarray) or (torch.Tensor): The input tensor with the bounding boxes coordinates in the xyxy format
    Returns:
-      the xyxy2ltwh function.
+      y (numpy.ndarray) or (torch.Tensor): The bounding box coordinates in the xyltwh format.
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 2] = x[:, 2] - x[:, 0]  # width
@ -433,10 +421,10 @@ def xyxy2ltwh(x):

 def ltwh2xywh(x):
    """
-    > Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center
+    Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center

    Args:
-      x: the input tensor
+      x (torch.tensor): the input tensor
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 0] = x[:, 0] + x[:, 2] / 2  # center x
@ -446,14 +434,13 @@ def ltwh2xywh(x):

 def ltwh2xyxy(x):
    """
-    > It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left,
-    xy2=bottom-right
+    It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right

    Args:
-      x: the input image
+      x (numpy.ndarray) or (torch.Tensor): the input image

    Returns:
-      the xyxy coordinates of the bounding boxes.
+      y (numpy.ndarray) or (torch.Tensor): the xyxy coordinates of the bounding boxes.
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 2] = x[:, 2] + x[:, 0]  # width
@ -463,14 +450,13 @@ def ltwh2xyxy(x):

 def segments2boxes(segments):
    """
-    > It converts segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
+    It converts segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)

    Args:
-      segments: list of segments, each segment is a list of points, each point is a list of x, y
-    coordinates
+      segments (list): list of segments, each segment is a list of points, each point is a list of x, y coordinates

    Returns:
-      the xywh coordinates of the bounding boxes.
+      (np.array): the xywh coordinates of the bounding boxes.
    """
    boxes = []
    for s in segments:
@ -481,15 +467,14 @@ def segments2boxes(segments):

 def resample_segments(segments, n=1000):
    """
-    > It takes a list of segments (n,2) and returns a list of segments (n,2) where each segment has been
-    up-sampled to n points
+    It takes a list of segments (n,2) and returns a list of segments (n,2) where each segment has been up-sampled to n points

    Args:
-      segments: a list of (n,2) arrays, where n is the number of points in the segment.
-      n: number of points to resample the segment to. Defaults to 1000
+      segments (list): a list of (n,2) arrays, where n is the number of points in the segment.
+      n (int): number of points to resample the segment to. Defaults to 1000

    Returns:
-      the resampled segments.
+      segments (list): the resampled segments.
    """
    for i, s in enumerate(segments):
        s = np.concatenate((s, s[0:1, :]), axis=0)
@ -501,14 +486,14 @@ def resample_segments(segments, n=1000):

 def crop_mask(masks, boxes):
    """
-    > It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box
+    It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box

    Args:
-      masks: [h, w, n] tensor of masks
-      boxes: [n, 4] tensor of bbox coords in relative point form
+      masks (torch.tensor): [h, w, n] tensor of masks
+      boxes (torch.tensor): [n, 4] tensor of bbox coordinates in relative point form

    Returns:
-      The masks are being cropped to the bounding box.
+      (torch.tensor): The masks are being cropped to the bounding box.
    """
    n, h, w = masks.shape
    x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(1,1,n)
@ -520,17 +505,17 @@ def crop_mask(masks, boxes):

 def process_mask_upsample(protos, masks_in, bboxes, shape):
    """
-    > It takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher
+    It takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher
    quality but is slower.

    Args:
-      protos: [mask_dim, mask_h, mask_w]
-      masks_in: [n, mask_dim], n is number of masks after nms
-      bboxes: [n, 4], n is number of masks after nms
-      shape: the size of the input image
+      protos (torch.tensor): [mask_dim, mask_h, mask_w]
+      masks_in (torch.tensor): [n, mask_dim], n is number of masks after nms
+      bboxes (torch.tensor): [n, 4], n is number of masks after nms
+      shape (tuple): the size of the input image (h,w)

    Returns:
-      mask
+      (torch.tensor): The upsampled masks.
    """
    c, mh, mw = protos.shape  # CHW
    masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
@ -541,17 +526,17 @@ def process_mask_upsample(protos, masks_in, bboxes, shape):

 def process_mask(protos, masks_in, bboxes, shape, upsample=False):
    """
-    > It takes the output of the mask head, and applies the mask to the bounding boxes. This is faster but produces
+    It takes the output of the mask head, and applies the mask to the bounding boxes. This is faster but produces
    downsampled quality of mask

    Args:
-      protos: [mask_dim, mask_h, mask_w]
-      masks_in: [n, mask_dim], n is number of masks after nms
-      bboxes: [n, 4], n is number of masks after nms
-      shape: the size of the input image
+      protos (torch.tensor): [mask_dim, mask_h, mask_w]
+      masks_in (torch.tensor): [n, mask_dim], n is number of masks after nms
+      bboxes (torch.tensor): [n, 4], n is number of masks after nms
+      shape (tuple): the size of the input image (h,w)

    Returns:
-      mask
+      (torch.tensor): The processed masks.
    """

    c, mh, mw = protos.shape  # CHW
@ -572,16 +557,16 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):

 def process_mask_native(protos, masks_in, bboxes, shape):
    """
-    > It takes the output of the mask head, and crops it after upsampling to the bounding boxes.
+    It takes the output of the mask head, and crops it after upsampling to the bounding boxes.

    Args:
-      protos: [mask_dim, mask_h, mask_w]
-      masks_in: [n, mask_dim], n is number of masks after nms
-      bboxes: [n, 4], n is number of masks after nms
-      shape: input_image_size, (h, w)
+      protos (torch.tensor): [mask_dim, mask_h, mask_w]
+      masks_in (torch.tensor): [n, mask_dim], n is number of masks after nms
+      bboxes (torch.tensor): [n, 4], n is number of masks after nms
+      shape (tuple): the size of the input image (h,w)

    Returns:
-      masks: [h, w, n]
+      masks (torch.tensor): The returned masks with dimensions [h, w, n]
    """
    c, mh, mw = protos.shape  # CHW
    masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
@ -598,17 +583,17 @@ def process_mask_native(protos, masks_in, bboxes, shape):

 def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False):
    """
-    > Rescale segment coords (xyxy) from img1_shape to img0_shape
+    Rescale segment coordinates (xyxy) from img1_shape to img0_shape

    Args:
-      img1_shape: The shape of the image that the segments are from.
-      segments: the segments to be scaled
-      img0_shape: the shape of the image that the segmentation is being applied to
-      ratio_pad: the ratio of the image size to the padded image size.
-      normalize: If True, the coordinates will be normalized to the range [0, 1]. Defaults to False
+      img1_shape (tuple): The shape of the image that the segments are from.
+      segments (torch.tensor): the segments to be scaled
+      img0_shape (tuple): the shape of the image that the segmentation is being applied to
+      ratio_pad (tuple): the ratio of the image size to the padded image size.
+      normalize (bool): If True, the coordinates will be normalized to the range [0, 1]. Defaults to False

    Returns:
-      the segmented image.
+      segments (torch.tensor): the segmented image.
    """
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
@ -629,11 +614,11 @@ def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=F

 def masks2segments(masks, strategy='largest'):
    """
-    > It takes a list of masks(n,h,w) and returns a list of segments(n,xy)
+    It takes a list of masks(n,h,w) and returns a list of segments(n,xy)

    Args:
-      masks: the output of the model, which is a tensor of shape (batch_size, 160, 160)
-      strategy: 'concat' or 'largest'. Defaults to largest
+      masks (torch.tensor): the output of the model, which is a tensor of shape (batch_size, 160, 160)
+      strategy (str): 'concat' or 'largest'. Defaults to largest

    Returns:
      segments (List): list of segment masks
@ -654,12 +639,12 @@ def masks2segments(masks, strategy='largest'):

 def clip_segments(segments, shape):
    """
-    > It takes a list of line segments (x1,y1,x2,y2) and clips them to the image shape (height, width)
+    It takes a list of line segments (x1,y1,x2,y2) and clips them to the image shape (height, width)

    Args:
-      segments: a list of segments, each segment is a list of points, each point is a list of x,y
+      segments (list): a list of segments, each segment is a list of points, each point is a list of x,y
    coordinates
-      shape: the shape of the image
+      shape (tuple): the shape of the image
    """
    if isinstance(segments, torch.Tensor):  # faster individually
        segments[:, 0].clamp_(0, shape[1])  # x
@ -670,5 +655,13 @@ def clip_segments(segments, shape):


 def clean_str(s):
-    # Cleans a string by replacing special characters with underscore _
+    """
+    Cleans a string by replacing special characters with underscore _
+
+    Args:
+      s (str): a string needing special characters replaced
+
+    Returns:
+      (str): a string with special characters replaced by an underscore _
+    """
    return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)