default.yaml type comments (#3237)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2023-06-18 01:24:59 +02:00
parent 21df296425
commit e78fb683f4
5 changed files with 118 additions and 123 deletions
--- a/ultralytics/models/v6/yolov6.yaml
+++ b/ultralytics/models/v6/yolov6.yaml
@ -2,8 +2,8 @@
 # YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/models/yolov6
 # Parameters
 act: nn.ReLU()
 nc: 80  # number of classes
 activation: nn.ReLU()  # (optional) model default activation function
 scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.33, 0.25, 1024]
--- a/ultralytics/nn/tasks.py
+++ b/ultralytics/nn/tasks.py
@ -422,9 +422,7 @@ class RTDETRDetectionModel(DetectionModel):
        # NOTE: preprocess gt_bbox and gt_labels to list.
        bs = len(img)
        batch_idx = batch['batch_idx']
-        gt_groups = []
+        gt_groups = [(batch_idx == i).sum().item() for i in range(bs)]
        for i in range(bs):
            gt_groups.append((batch_idx == i).sum().item())
        targets = {
            'cls': batch['cls'].to(img.device, dtype=torch.long).view(-1),
            'bboxes': batch['bboxes'].to(device=img.device),
@ -606,7 +604,7 @@ def parse_model(d, ch, verbose=True):  # model_dict, input_channels(3)
    # Args
    max_channels = float('inf')
-    nc, act, scales = (d.get(x) for x in ('nc', 'act', 'scales'))
+    nc, act, scales = (d.get(x) for x in ('nc', 'activation', 'scales'))
    depth, width, kpt_shape = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple', 'kpt_shape'))
    if scales:
        scale = d.get('scale')
--- a/ultralytics/vit/sam/modules/decoders.py
+++ b/ultralytics/vit/sam/modules/decoders.py
@ -22,20 +22,15 @@ class MaskDecoder(nn.Module):
        iou_head_hidden_dim: int = 256,
    ) -> None:
        """
-        Predicts masks given an image and prompt embeddings, using a
+        Predicts masks given an image and prompt embeddings, using a transformer architecture.
        transformer architecture.
        Arguments:
-          transformer_dim (int): the channel dimension of the transformer
+            transformer_dim (int): the channel dimension of the transformer module
            transformer (nn.Module): the transformer used to predict masks
-          num_multimask_outputs (int): the number of masks to predict
+            num_multimask_outputs (int): the number of masks to predict when disambiguating masks
-            when disambiguating masks
+            activation (nn.Module): the type of activation to use when upscaling masks
-          activation (nn.Module): the type of activation to use when
+            iou_head_depth (int): the depth of the MLP used to predict mask quality
-            upscaling masks
+            iou_head_hidden_dim (int): the hidden dimension of the MLP used to predict mask quality
          iou_head_depth (int): the depth of the MLP used to predict
            mask quality
          iou_head_hidden_dim (int): the hidden dimension of the MLP
            used to predict mask quality
        """
        super().__init__()
        self.transformer_dim = transformer_dim
@ -75,8 +70,7 @@ class MaskDecoder(nn.Module):
            image_pe (torch.Tensor): positional encoding with the shape of image_embeddings
            sparse_prompt_embeddings (torch.Tensor): the embeddings of the points and boxes
            dense_prompt_embeddings (torch.Tensor): the embeddings of the mask inputs
-          multimask_output (bool): Whether to return multiple masks or a single
+            multimask_output (bool): Whether to return multiple masks or a single mask.
            mask.
        Returns:
            torch.Tensor: batched predicted masks
@ -136,9 +130,11 @@ class MaskDecoder(nn.Module):
        return masks, iou_pred
 # Lightly adapted from
 # https://github.com/facebookresearch/MaskFormer/blob/main/mask_former/modeling/transformer/transformer_predictor.py # noqa
 class MLP(nn.Module):
    """
    Lightly adapted from
    https://github.com/facebookresearch/MaskFormer/blob/main/mask_former/modeling/transformer/transformer_predictor.py
    """
    def __init__(
        self,
--- a/ultralytics/vit/utils/ops.py
+++ b/ultralytics/vit/utils/ops.py
@ -249,7 +249,7 @@ def get_cdn_group(batch,
            attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), max_nums * 2 * (i + 1):num_dn] = True
            attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), :max_nums * 2 * i] = True
    dn_meta = {
-        'dn_pos_idx': [p.reshape(-1) for p in pos_idx.cpu().split([n for n in gt_groups], dim=1)],
+        'dn_pos_idx': [p.reshape(-1) for p in pos_idx.cpu().split(list(gt_groups), dim=1)],
        'dn_num_group': num_group,
        'dn_num_split': [num_dn, num_queries]}
@ -258,5 +258,6 @@ def get_cdn_group(batch,
 def inverse_sigmoid(x, eps=1e-6):
    """Inverse sigmoid function."""
    x = x.clip(min=0., max=1.)
    return torch.log(x / (1 - x + eps) + eps)
--- a/ultralytics/yolo/cfg/default.yaml
+++ b/ultralytics/yolo/cfg/default.yaml
@ -1,117 +1,117 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Default training settings and hyperparameters for medium-augmentation COCO training
-task: detect  # YOLO task, i.e. detect, segment, classify, pose
+task: detect  # (str) YOLO task, i.e. detect, segment, classify, pose
-mode: train  # YOLO mode, i.e. train, val, predict, export, track, benchmark
+mode: train  # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
 # Train settings -------------------------------------------------------------------------------------------------------
-model:  # path to model file, i.e. yolov8n.pt, yolov8n.yaml
+model:  # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml
-data:  # path to data file, i.e. coco128.yaml
+data:  # (str, optional) path to data file, i.e. coco128.yaml
-epochs: 100  # number of epochs to train for
+epochs: 100  # (int) number of epochs to train for
-patience: 50  # epochs to wait for no observable improvement for early stopping of training
+patience: 50  # (int) epochs to wait for no observable improvement for early stopping of training
-batch: 16  # number of images per batch (-1 for AutoBatch)
+batch: 16  # (int) number of images per batch (-1 for AutoBatch)
-imgsz: 640  # size of input images as integer or w,h
+imgsz: 640  # (int) size of input images as integer or w,h
-save: True  # save train checkpoints and predict results
+save: True  # (bool) save train checkpoints and predict results
-save_period: -1 # Save checkpoint every x epochs (disabled if < 1)
+save_period: -1 # (int) Save checkpoint every x epochs (disabled if < 1)
-cache: False  # True/ram, disk or False. Use cache for data loading
+cache: False  # (bool) True/ram, disk or False. Use cache for data loading
-device:  # device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
+device:  # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
-workers: 8  # number of worker threads for data loading (per RANK if DDP)
+workers: 8  # (int) number of worker threads for data loading (per RANK if DDP)
-project:  # project name
+project:  # (str, optional) project name
-name:  # experiment name, results saved to 'project/name' directory
+name:  # (str, optional) experiment name, results saved to 'project/name' directory
-exist_ok: False  # whether to overwrite existing experiment
+exist_ok: False  # (bool) whether to overwrite existing experiment
-pretrained: False  # whether to use a pretrained model
+pretrained: False  # (bool) whether to use a pretrained model
-optimizer: auto  # optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
+optimizer: auto  # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
-verbose: True  # whether to print verbose output
+verbose: True  # (bool) whether to print verbose output
-seed: 0  # random seed for reproducibility
+seed: 0  # (int) random seed for reproducibility
-deterministic: True  # whether to enable deterministic mode
+deterministic: True  # (bool) whether to enable deterministic mode
-single_cls: False  # train multi-class data as single-class
+single_cls: False  # (bool) train multi-class data as single-class
-rect: False  # rectangular training if mode='train' or rectangular validation if mode='val'
+rect: False  # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
-cos_lr: False  # use cosine learning rate scheduler
+cos_lr: False  # (bool) use cosine learning rate scheduler
 close_mosaic: 0  # (int) disable mosaic augmentation for final epochs
-resume: False  # resume training from last checkpoint
+resume: False  # (bool) resume training from last checkpoint
-amp: True  # Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
+amp: True  # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
-fraction: 1.0  # dataset fraction to train on (default is 1.0, all images in train set)
+fraction: 1.0  # (float) dataset fraction to train on (default is 1.0, all images in train set)
-profile: False  # profile ONNX and TensorRT speeds during training for loggers
+profile: False  # (bool) profile ONNX and TensorRT speeds during training for loggers
 # Segmentation
-overlap_mask: True  # masks should overlap during training (segment train only)
+overlap_mask: True  # (bool) masks should overlap during training (segment train only)
-mask_ratio: 4  # mask downsample ratio (segment train only)
+mask_ratio: 4  # (int) mask downsample ratio (segment train only)
 # Classification
-dropout: 0.0  # use dropout regularization (classify train only)
+dropout: 0.0  # (float) use dropout regularization (classify train only)
 # Val/Test settings ----------------------------------------------------------------------------------------------------
-val: True  # validate/test during training
+val: True  # (bool) validate/test during training
-split: val  # dataset split to use for validation, i.e. 'val', 'test' or 'train'
+split: val  # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train'
-save_json: False  # save results to JSON file
+save_json: False  # (bool) save results to JSON file
-save_hybrid: False  # save hybrid version of labels (labels + additional predictions)
+save_hybrid: False  # (bool) save hybrid version of labels (labels + additional predictions)
-conf:  # object confidence threshold for detection (default 0.25 predict, 0.001 val)
+conf:  # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val)
-iou: 0.7  # intersection over union (IoU) threshold for NMS
+iou: 0.7  # (float) intersection over union (IoU) threshold for NMS
-max_det: 300  # maximum number of detections per image
+max_det: 300  # (int) maximum number of detections per image
-half: False  # use half precision (FP16)
+half: False  # (bool) use half precision (FP16)
-dnn: False  # use OpenCV DNN for ONNX inference
+dnn: False  # (bool) use OpenCV DNN for ONNX inference
-plots: True  # save plots during train/val
+plots: True  # (bool) save plots during train/val
 # Prediction settings --------------------------------------------------------------------------------------------------
-source:  # source directory for images or videos
+source:  # (str, optional) source directory for images or videos
-show: False  # show results if possible
+show: False  # (bool) show results if possible
-save_txt: False  # save results as .txt file
+save_txt: False  # (bool) save results as .txt file
-save_conf: False  # save results with confidence scores
+save_conf: False  # (bool) save results with confidence scores
-save_crop: False  # save cropped images with results
+save_crop: False  # (bool) save cropped images with results
-show_labels: True  # show object labels in plots
+show_labels: True  # (bool) show object labels in plots
-show_conf: True  # show object confidence scores in plots
+show_conf: True  # (bool) show object confidence scores in plots
-vid_stride: 1  # video frame-rate stride
+vid_stride: 1  # (int) video frame-rate stride
-line_width:   # line width of the bounding boxes
+line_width:   # (int, optional) line width of the bounding boxes, auto if missing
-visualize: False  # visualize model features
+visualize: False  # (bool) visualize model features
-augment: False  # apply image augmentation to prediction sources
+augment: False  # (bool) apply image augmentation to prediction sources
-agnostic_nms: False  # class-agnostic NMS
+agnostic_nms: False  # (bool) class-agnostic NMS
-classes:  # filter results by class, i.e. class=0, or class=[0,2,3]
+classes:  # (int | list[int], optional) filter results by class, i.e. class=0, or class=[0,2,3]
-retina_masks: False  # use high-resolution segmentation masks
+retina_masks: False  # (bool) use high-resolution segmentation masks
-boxes: True  # Show boxes in segmentation predictions
+boxes: True  # (bool) Show boxes in segmentation predictions
 # Export settings ------------------------------------------------------------------------------------------------------
-format: torchscript  # format to export to
+format: torchscript  # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats
-keras: False  # use Keras
+keras: False  # (bool) use Kera=s
-optimize: False  # TorchScript: optimize for mobile
+optimize: False  # (bool) TorchScript: optimize for mobile
-int8: False  # CoreML/TF INT8 quantization
+int8: False  # (bool) CoreML/TF INT8 quantization
-dynamic: False  # ONNX/TF/TensorRT: dynamic axes
+dynamic: False  # (bool) ONNX/TF/TensorRT: dynamic axes
-simplify: False  # ONNX: simplify model
+simplify: False  # (bool) ONNX: simplify model
-opset:  # ONNX: opset version (optional)
+opset:  # (int, optional) ONNX: opset version
-workspace: 4  # TensorRT: workspace size (GB)
+workspace: 4  # (int) TensorRT: workspace size (GB)
-nms: False  # CoreML: add NMS
+nms: False  # (bool) CoreML: add NMS
 # Hyperparameters ------------------------------------------------------------------------------------------------------
-lr0: 0.01  # initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
+lr0: 0.01  # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
-lrf: 0.01  # final learning rate (lr0 * lrf)
+lrf: 0.01  # (float) final learning rate (lr0 * lrf)
-momentum: 0.937  # SGD momentum/Adam beta1
+momentum: 0.937  # (float) SGD momentum/Adam beta1
-weight_decay: 0.0005  # optimizer weight decay 5e-4
+weight_decay: 0.0005  # (float) optimizer weight decay 5e-4
-warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_epochs: 3.0  # (float) warmup epochs (fractions ok)
-warmup_momentum: 0.8  # warmup initial momentum
+warmup_momentum: 0.8  # (float) warmup initial momentum
-warmup_bias_lr: 0.1  # warmup initial bias lr
+warmup_bias_lr: 0.1  # (float) warmup initial bias lr
-box: 7.5  # box loss gain
+box: 7.5  # (float) box loss gain
-cls: 0.5  # cls loss gain (scale with pixels)
+cls: 0.5  # (float) cls loss gain (scale with pixels)
-dfl: 1.5  # dfl loss gain
+dfl: 1.5  # (float) dfl loss gain
-pose: 12.0  # pose loss gain
+pose: 12.0  # (float) pose loss gain
-kobj: 1.0  # keypoint obj loss gain
+kobj: 1.0  # (float) keypoint obj loss gain
-label_smoothing: 0.0  # label smoothing (fraction)
+label_smoothing: 0.0  # (float) label smoothing (fraction)
-nbs: 64  # nominal batch size
+nbs: 64  # (int) nominal batch size
-hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
+hsv_h: 0.015  # (float) image HSV-Hue augmentation (fraction)
-hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
+hsv_s: 0.7  # (float) image HSV-Saturation augmentation (fraction)
-hsv_v: 0.4  # image HSV-Value augmentation (fraction)
+hsv_v: 0.4  # (float) image HSV-Value augmentation (fraction)
-degrees: 0.0  # image rotation (+/- deg)
+degrees: 0.0  # (float) image rotation (+/- deg)
-translate: 0.1  # image translation (+/- fraction)
+translate: 0.1  # (float) image translation (+/- fraction)
-scale: 0.5  # image scale (+/- gain)
+scale: 0.5  # (float) image scale (+/- gain)
-shear: 0.0  # image shear (+/- deg)
+shear: 0.0  # (float) image shear (+/- deg)
-perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
+perspective: 0.0  # (float) image perspective (+/- fraction), range 0-0.001
-flipud: 0.0  # image flip up-down (probability)
+flipud: 0.0  # (float) image flip up-down (probability)
-fliplr: 0.5  # image flip left-right (probability)
+fliplr: 0.5  # (float) image flip left-right (probability)
-mosaic: 1.0  # image mosaic (probability)
+mosaic: 1.0  # (float) image mosaic (probability)
-mixup: 0.0  # image mixup (probability)
+mixup: 0.0  # (float) image mixup (probability)
-copy_paste: 0.0  # segment copy-paste (probability)
+copy_paste: 0.0  # (float) segment copy-paste (probability)
 # Custom config.yaml ---------------------------------------------------------------------------------------------------
-cfg:  # for overriding defaults.yaml
+cfg:  # (str, optional) for overriding defaults.yaml
 # Debug, do not modify -------------------------------------------------------------------------------------------------
-v5loader: False  # use legacy YOLOv5 dataloader
+v5loader: False  # (bool) use legacy YOLOv5 dataloader (deprecated)
 # Tracker settings ------------------------------------------------------------------------------------------------------
-tracker: botsort.yaml  # tracker type, ['botsort.yaml', 'bytetrack.yaml']
+tracker: botsort.yaml  # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]