From d9a0fba251d723e55858aa301342822e3b002978 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sat, 21 Jan 2023 21:22:40 +0100
Subject: [PATCH] `ultralytics 8.0.14` Hydra removal fixes and cleanup (#542)

Co-authored-by: ayush chaurasia <ayush.chaurarsia@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Kamlesh Kumar <patelkamleshpatel364@gmail.com>
---
 docs/cfg.md                                   | 214 ++++++++++++++++++
 docs/cli.md                                   |   4 +-
 docs/config.md                                | 202 -----------------
 docs/predict.md                               |  18 +-
 docs/tasks/classification.md                  |   2 +-
 docs/tasks/detection.md                       |   2 +-
 docs/tasks/segmentation.md                    |   2 +-
 mkdocs.yml                                    |   2 +-
 setup.py                                      |   3 +-
 tests/test_engine.py                          |   4 +-
 ultralytics/__init__.py                       |   2 +-
 ultralytics/hub/utils.py                      |   4 +-
 ultralytics/nn/tasks.py                       |   1 -
 ultralytics/yolo/{configs => cfg}/__init__.py |  37 +--
 .../yolo/{configs => cfg}/default.yaml        |  39 ++--
 ultralytics/yolo/data/dataloaders/v5loader.py |   2 +-
 ultralytics/yolo/data/dataset.py              |   4 +-
 ultralytics/yolo/engine/exporter.py           |   4 +-
 ultralytics/yolo/engine/model.py              |  10 +-
 ultralytics/yolo/engine/predictor.py          |   9 +-
 ultralytics/yolo/engine/results.py            |  44 ++--
 ultralytics/yolo/engine/trainer.py            |   6 +-
 ultralytics/yolo/engine/validator.py          |   4 +-
 ultralytics/yolo/utils/__init__.py            |   2 +-
 ultralytics/yolo/utils/callbacks/clearml.py   |   2 +-
 ultralytics/yolo/utils/callbacks/comet.py     |   2 +-
 ultralytics/yolo/utils/torch_utils.py         |   3 +-
 ultralytics/yolo/v8/classify/predict.py       |   2 +-
 ultralytics/yolo/v8/detect/predict.py         |   5 +-
 ultralytics/yolo/v8/segment/predict.py        |   5 +-
 30 files changed, 339 insertions(+), 301 deletions(-)
 create mode 100644 docs/cfg.md
 delete mode 100644 docs/config.md
 rename ultralytics/yolo/{configs => cfg}/__init__.py (88%)
 rename ultralytics/yolo/{configs => cfg}/default.yaml (78%)

diff --git a/docs/cfg.md b/docs/cfg.md
new file mode 100644
index 0000000..c1bdab7
--- /dev/null
+++ b/docs/cfg.md
@@ -0,0 +1,214 @@
+YOLO settings and hyperparameters play a critical role in the model's performance, speed, and accuracy. These settings
+and hyperparameters can affect the model's behavior at various stages of the model development process, including
+training, validation, and prediction.
+
+Properly setting and tuning these parameters can have a significant impact on the model's ability to learn effectively
+from the training data and generalize to new data. For example, choosing an appropriate learning rate, batch size, and
+optimization algorithm can greatly affect the model's convergence speed and accuracy. Similarly, setting the correct
+confidence threshold and non-maximum suppression (NMS) threshold can affect the model's performance on detection tasks.
+
+It is important to carefully consider and experiment with these settings and hyperparameters to achieve the best
+possible performance for a given task. This can involve trial and error, as well as using techniques such as
+hyperparameter optimization to search for the optimal set of parameters.
+
+In summary, YOLO settings and hyperparameters are a key factor in the success of a YOLO model, and it is important to
+pay careful attention to them to achieve the desired results.
+
+### Setting the operation type
+
+YOLO models can be used for a variety of tasks, including detection, segmentation, and classification. These tasks
+differ in the type of output they produce and the specific problem they are designed to solve.
+
+- Detection: Detection tasks involve identifying and localizing objects or regions of interest in an image or video.
+  YOLO models can be used for object detection tasks by predicting the bounding boxes and class labels of objects in an
+  image.
+- Segmentation: Segmentation tasks involve dividing an image or video into regions or pixels that correspond to
+  different objects or classes. YOLO models can be used for image segmentation tasks by predicting a mask or label for
+  each pixel in an image.
+- Classification: Classification tasks involve assigning a class label to an input, such as an image or text. YOLO
+  models can be used for image classification tasks by predicting the class label of an input image.
+
+YOLO models can be used in different modes depending on the specific problem you are trying to solve. These modes
+include train, val, and predict.
+
+- Train: The train mode is used to train the model on a dataset. This mode is typically used during the development and
+  testing phase of a model.
+- Val: The val mode is used to evaluate the model's performance on a validation dataset. This mode is typically used to
+  tune the model's hyperparameters and detect overfitting.
+- Predict: The predict mode is used to make predictions with the model on new data. This mode is typically used in
+  production or when deploying the model to users.
+
+| Key    | Value    | Description                                                                                   |
+|--------|----------|-----------------------------------------------------------------------------------------------|
+| task   | 'detect' | inference task, i.e. detect, segment, or classify                                             |
+| mode   | 'train'  | YOLO mode, i.e. train, val, predict, or export                                                |
+| resume | False    | resume training from last checkpoint or custom checkpoint if passed as resume=path/to/best.pt |
+| model  | null     | path to model file, i.e. yolov8n.pt, yolov8n.yaml                                             |
+| data   | null     | path to data file, i.e. i.e. coco128.yaml                                                     |
+
+### Training
+
+Training settings for YOLO models refer to the various hyperparameters and configurations used to train the model on a
+dataset. These settings can affect the model's performance, speed, and accuracy. Some common YOLO training settings
+include the batch size, learning rate, momentum, and weight decay. Other factors that may affect the training process
+include the choice of optimizer, the choice of loss function, and the size and composition of the training dataset. It
+is important to carefully tune and experiment with these settings to achieve the best possible performance for a given
+task.
+
+| Key             | Value  | Description                                                                 |
+|-----------------|--------|-----------------------------------------------------------------------------|
+| model           | null   | path to model file, i.e. yolov8n.pt, yolov8n.yaml                           |
+| data            | null   | path to data file, i.e. i.e. coco128.yaml                                   |
+| epochs          | 100    | number of epochs to train for                                               |
+| patience        | 50     | epochs to wait for no observable improvement for early stopping of training |
+| batch           | 16     | number of images per batch (-1 for AutoBatch)                               |
+| imgsz           | 640    | size of input images as integer or w,h                                      |
+| save            | True   | save train checkpoints and predict results                                  |
+| cache           | False  | True/ram, disk or False. Use cache for data loading                         |
+| device          | null   | device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu        |
+| workers         | 8      | number of worker threads for data loading (per RANK if DDP)                 |
+| project         | null   | project name                                                                |
+| name            | null   | experiment name                                                             |
+| exist_ok        | False  | whether to overwrite existing experiment                                    |
+| pretrained      | False  | whether to use a pretrained model                                           |
+| optimizer       | 'SGD'  | optimizer to use, choices=['SGD', 'Adam', 'AdamW', 'RMSProp']               |
+| verbose         | False  | whether to print verbose output                                             |
+| seed            | 0      | random seed for reproducibility                                             |
+| deterministic   | True   | whether to enable deterministic mode                                        |
+| single_cls      | False  | train multi-class data as single-class                                      |
+| image_weights   | False  | use weighted image selection for training                                   |
+| rect            | False  | support rectangular training                                                |
+| cos_lr          | False  | use cosine learning rate scheduler                                          |
+| close_mosaic    | 10     | disable mosaic augmentation for final 10 epochs                             |
+| resume          | False  | resume training from last checkpoint                                        |
+| lr0             | 0.01   | initial learning rate (i.e. SGD=1E-2, Adam=1E-3)                            |
+| lrf             | 0.01   | final learning rate (lr0 * lrf)                                             |
+| momentum        | 0.937  | SGD momentum/Adam beta1                                                     |
+| weight_decay    | 0.0005 | optimizer weight decay 5e-4                                                 |
+| warmup_epochs   | 3.0    | warmup epochs (fractions ok)                                                |
+| warmup_momentum | 0.8    | warmup initial momentum                                                     |
+| warmup_bias_lr  | 0.1    | warmup initial bias lr                                                      |
+| box             | 7.5    | box loss gain                                                               |
+| cls             | 0.5    | cls loss gain (scale with pixels)                                           |
+| dfl             | 1.5    | dfl loss gain                                                               |
+| fl_gamma        | 0.0    | focal loss gamma (efficientDet default gamma=1.5)                           |
+| label_smoothing | 0.0    | label smoothing (fraction)                                                  |
+| nbs             | 64     | nominal batch size                                                          |
+| overlap_mask    | True   | masks should overlap during training (segment train only)                   |
+| mask_ratio      | 4      | mask downsample ratio (segment train only)                                  |
+| dropout         | 0.0    | use dropout regularization (classify train only)                            |
+
+### Prediction
+
+Prediction settings for YOLO models refer to the various hyperparameters and configurations used to make predictions
+with the model on new data. These settings can affect the model's performance, speed, and accuracy. Some common YOLO
+prediction settings include the confidence threshold, non-maximum suppression (NMS) threshold, and the number of classes
+to consider. Other factors that may affect the prediction process include the size and format of the input data, the
+presence of additional features such as masks or multiple labels per box, and the specific task the model is being used
+for. It is important to carefully tune and experiment with these settings to achieve the best possible performance for a
+given task.
+
+| Key            | Value                | Description                                             |
+|----------------|----------------------|---------------------------------------------------------|
+| source         | 'ultralytics/assets' | source directory for images or videos                   |
+| show           | False                | show results if possible                                |
+| save_txt       | False                | save results as .txt file                               |
+| save_conf      | False                | save results with confidence scores                     |
+| save_crop      | Fasle                | save cropped images with results                        |
+| hide_labels    | False                | hide labels                                             |
+| hide_conf      | False                | hide confidence scores                                  |
+| vid_stride     | False                | video frame-rate stride                                 |
+| line_thickness | 3                    | bounding box thickness (pixels)                         |
+| visualize      | False                | visualize model features                                |
+| augment        | False                | apply image augmentation to prediction sources          |
+| agnostic_nms   | False                | class-agnostic NMS                                      |
+| retina_masks   | False                | use high-resolution segmentation masks                  |
+| classes        | null                 | filter results by class, i.e. class=0, or class=[0,2,3] |
+
+### Validation
+
+Validation settings for YOLO models refer to the various hyperparameters and configurations used to
+evaluate the model's performance on a validation dataset. These settings can affect the model's performance, speed, and
+accuracy. Some common YOLO validation settings include the batch size, the frequency with which validation is performed
+during training, and the metrics used to evaluate the model's performance. Other factors that may affect the validation
+process include the size and composition of the validation dataset and the specific task the model is being used for. It
+is important to carefully tune and experiment with these settings to ensure that the model is performing well on the
+validation dataset and to detect and prevent overfitting.
+
+| Key         | Value | Description                                                                 |
+|-------------|-------|-----------------------------------------------------------------------------|
+| val         | True  | validate/test during training                                               |
+| save_json   | False | save results to JSON file                                                   |
+| save_hybrid | False | save hybrid version of labels (labels + additional predictions)             |
+| conf        | 0.001 | object confidence threshold for detection (default 0.25 predict, 0.001 val) |
+| iou         | 0.6   | intersection over union (IoU) threshold for NMS                             |
+| max_det     | 300   | maximum number of detections per image                                      |
+| half        | True  | use half precision (FP16)                                                   |
+| dnn         | False | use OpenCV DNN for ONNX inference                                           |
+| plots       | False | show plots during training                                                  |
+
+### Export
+
+Export settings for YOLO models refer to the various configurations and options used to save or
+export the model for use in other environments or platforms. These settings can affect the model's performance, size,
+and compatibility with different systems. Some common YOLO export settings include the format of the exported model
+file (e.g. ONNX, TensorFlow SavedModel), the device on which the model will be run (e.g. CPU, GPU), and the presence of
+additional features such as masks or multiple labels per box. Other factors that may affect the export process include
+the specific task the model is being used for and the requirements or constraints of the target environment or platform.
+It is important to carefully consider and configure these settings to ensure that the exported model is optimized for
+the intended use case and can be used effectively in the target environment.
+
+### Augmentation
+
+Augmentation settings for YOLO models refer to the various transformations and modifications
+applied to the training data to increase the diversity and size of the dataset. These settings can affect the model's
+performance, speed, and accuracy. Some common YOLO augmentation settings include the type and intensity of the
+transformations applied (e.g. random flips, rotations, cropping, color changes), the probability with which each
+transformation is applied, and the presence of additional features such as masks or multiple labels per box. Other
+factors that may affect the augmentation process include the size and composition of the original dataset and the
+specific task the model is being used for. It is important to carefully tune and experiment with these settings to
+ensure that the augmented dataset is diverse and representative enough to train a high-performing model.
+
+| Key         | Value | Description                                     |
+|-------------|-------|-------------------------------------------------|
+| hsv_h       | 0.015 | image HSV-Hue augmentation (fraction)           |
+| hsv_s       | 0.7   | image HSV-Saturation augmentation (fraction)    |
+| hsv_v       | 0.4   | image HSV-Value augmentation (fraction)         |
+| degrees     | 0.0   | image rotation (+/- deg)                        |
+| translate   | 0.1   | image translation (+/- fraction)                |
+| scale       | 0.5   | image scale (+/- gain)                          |
+| shear       | 0.0   | image shear (+/- deg)                           |
+| perspective | 0.0   | image perspective (+/- fraction), range 0-0.001 |
+| flipud      | 0.0   | image flip up-down (probability)                |
+| fliplr      | 0.5   | image flip left-right (probability)             |
+| mosaic      | 1.0   | image mosaic (probability)                      |
+| mixup       | 0.0   | image mixup (probability)                       |
+| copy_paste  | 0.0   | segment copy-paste (probability)                |
+
+### Logging, checkpoints, plotting and file management
+
+Logging, checkpoints, plotting, and file management are important considerations when training a YOLO model.
+
+- Logging: It is often helpful to log various metrics and statistics during training to track the model's progress and
+  diagnose any issues that may arise. This can be done using a logging library such as TensorBoard or by writing log
+  messages to a file.
+- Checkpoints: It is a good practice to save checkpoints of the model at regular intervals during training. This allows
+  you to resume training from a previous point if the training process is interrupted or if you want to experiment with
+  different training configurations.
+- Plotting: Visualizing the model's performance and training progress can be helpful for understanding how the model is
+  behaving and identifying potential issues. This can be done using a plotting library such as matplotlib or by
+  generating plots using a logging library such as TensorBoard.
+- File management: Managing the various files generated during the training process, such as model checkpoints, log
+  files, and plots, can be challenging. It is important to have a clear and organized file structure to keep track of
+  these files and make it easy to access and analyze them as needed.
+
+Effective logging, checkpointing, plotting, and file management can help you keep track of the model's progress and make
+it easier to debug and optimize the training process.
+
+| Key      | Value  | Description                                                                                    |
+|----------|--------|------------------------------------------------------------------------------------------------|
+| project  | 'runs' | project name                                                                                   |
+| name     | 'exp'  | experiment name. `exp` gets automatically incremented if not specified, i.e, `exp`, `exp2` ... |
+| exist_ok | False  | whether to overwrite existing experiment                                                       |
+| plots    | False  | save plots during train/val                                                                    |
+| save     | False  | save train checkpoints and predict results                                                     |
\ No newline at end of file
diff --git a/docs/cli.md b/docs/cli.md
index d5b8e93..976001f 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -17,7 +17,7 @@ Where:
   the `TASK` from the model type.
 - `MODE` (required) is one of `[train, val, predict, export]`
 - `ARGS` (optional) are any number of custom `arg=value` pairs like `imgsz=320` that override defaults. 
-  For a full list of available `ARGS` see the [Configuration](config.md) page.
+  For a full list of available `ARGS` see the [Configuration](cfg.md) page.
 
 !!! note ""
 
@@ -30,7 +30,7 @@ Where:
 ## Train
 
 Train YOLOv8n on the COCO128 dataset for 100 epochs at image size 640. For a full list of available arguments see
-the [Configuration](config.md) page.
+the [Configuration](cfg.md) page.
 
 !!! example ""
 
diff --git a/docs/config.md b/docs/config.md
deleted file mode 100644
index 09226d6..0000000
--- a/docs/config.md
+++ /dev/null
@@ -1,202 +0,0 @@
-YOLO settings and hyperparameters play a critical role in the model's performance, speed, and accuracy. These settings
-and hyperparameters can affect the model's behavior at various stages of the model development process, including
-training, validation, and prediction.
-
-Properly setting and tuning these parameters can have a significant impact on the model's ability to learn effectively
-from the training data and generalize to new data. For example, choosing an appropriate learning rate, batch size, and
-optimization algorithm can greatly affect the model's convergence speed and accuracy. Similarly, setting the correct
-confidence threshold and non-maximum suppression (NMS) threshold can affect the model's performance on detection tasks.
-
-It is important to carefully consider and experiment with these settings and hyperparameters to achieve the best
-possible performance for a given task. This can involve trial and error, as well as using techniques such as
-hyperparameter optimization to search for the optimal set of parameters.
-
-In summary, YOLO settings and hyperparameters are a key factor in the success of a YOLO model, and it is important to
-pay careful attention to them to achieve the desired results.
-
-### Setting the operation type
-
-YOLO models can be used for a variety of tasks, including detection, segmentation, and classification. These tasks
-differ in the type of output they produce and the specific problem they are designed to solve.
-
-- Detection: Detection tasks involve identifying and localizing objects or regions of interest in an image or video.
-  YOLO models can be used for object detection tasks by predicting the bounding boxes and class labels of objects in an
-  image.
-- Segmentation: Segmentation tasks involve dividing an image or video into regions or pixels that correspond to
-  different objects or classes. YOLO models can be used for image segmentation tasks by predicting a mask or label for
-  each pixel in an image.
-- Classification: Classification tasks involve assigning a class label to an input, such as an image or text. YOLO
-  models can be used for image classification tasks by predicting the class label of an input image.
-
-YOLO models can be used in different modes depending on the specific problem you are trying to solve. These modes
-include train, val, and predict.
-
-- Train: The train mode is used to train the model on a dataset. This mode is typically used during the development and
-  testing phase of a model.
-- Val: The val mode is used to evaluate the model's performance on a validation dataset. This mode is typically used to
-  tune the model's hyperparameters and detect overfitting.
-- Predict: The predict mode is used to make predictions with the model on new data. This mode is typically used in
-  production or when deploying the model to users.
-
-| Key    | Value    | Description                                                                                            |
-|--------|----------|--------------------------------------------------------------------------------------------------------|
-| task   | `detect` | Set the task via CLI. See Tasks for all supported tasks like - `detect`, `segment`, `classify`         |
-| mode   | `train`  | Set the mode via CLI. It can be `train`, `val`, `predict`, `export`                                    |
-| resume | `False`  | Resume last given task when set to `True`. <br> Resume from a given checkpoint is `model.pt` is passed |
-| model  | null     | Set the model. Format can differ for task type. Supports `model_name`, `model.yaml` & `model.pt`       |
-| data   | null     | Set the data. Format can differ for task type. Supports `data.yaml`, `data_folder`, `dataset_name`     |
-
-### Training
-
-Training settings for YOLO models refer to the various hyperparameters and configurations used to train the model on a
-dataset. These settings can affect the model's performance, speed, and accuracy. Some common YOLO training settings
-include the batch size, learning rate, momentum, and weight decay. Other factors that may affect the training process
-include the choice of optimizer, the choice of loss function, and the size and composition of the training dataset. It
-is important to carefully tune and experiment with these settings to achieve the best possible performance for a given
-task.
-
-| Key             | Value   | Description                                                                 |
-|-----------------|---------|-----------------------------------------------------------------------------|
-| device          | ''      | cuda device, i.e. 0 or 0,1,2,3 or cpu. `''` selects available cuda 0 device |
-| epochs          | 100     | Number of epochs to train                                                   |
-| workers         | 8       | Number of cpu workers used per process. Scales automatically with DDP       |
-| batch           | 16      | Batch size of the dataloader                                                |
-| imgsz           | 640     | Image size of data in dataloader                                            |
-| optimizer       | SGD     | Optimizer used. Supported optimizer are: `Adam`, `SGD`, `RMSProp`           |
-| single_cls      | False   | Train on multi-class data as single-class                                   |
-| image_weights   | False   | Use weighted image selection for training                                   |
-| rect            | False   | Enable rectangular training                                                 |
-| cos_lr          | False   | Use cosine LR scheduler                                                     |
-| lr0             | 0.01    | Initial learning rate                                                       |
-| lrf             | 0.01    | Final OneCycleLR learning rate                                              |
-| momentum        | 0.937   | Use as `momentum` for SGD and `beta1` for Adam                              |
-| weight_decay    | 0.0005  | Optimizer weight decay                                                      |
-| warmup_epochs   | 3.0     | Warmup epochs. Fractions are ok.                                            |
-| warmup_momentum | 0.8     | Warmup initial momentum                                                     |
-| warmup_bias_lr  | 0.1     | Warmup initial bias lr                                                      |
-| box             | 0.05    | Box loss gain                                                               |
-| cls             | 0.5     | cls loss gain                                                               |
-| cls_pw          | 1.0     | cls BCELoss positive_weight                                                 |
-| obj             | 1.0     | bj loss gain (scale with pixels)                                            |
-| obj_pw          | 1.0     | obj BCELoss positive_weight                                                 |
-| iou_t           | 0.20    | IOU training threshold                                                      |
-| anchor_t        | 4.0     | anchor-multiple threshold                                                   |
-| fl_gamma        | 0.0     | focal loss gamma                                                            |
-| label_smoothing | 0.0     |                                                                             |
-| nbs             | 64      | nominal batch size                                                          |
-| overlap_mask    | `True`  | **Segmentation**: Use mask overlapping during training                      |
-| mask_ratio      | 4       | **Segmentation**: Set mask downsampling                                     |
-| dropout         | `False` | **Classification**: Use dropout while training                              |
-
-### Prediction
-
-Prediction settings for YOLO models refer to the various hyperparameters and configurations used to make predictions
-with the model on new data. These settings can affect the model's performance, speed, and accuracy. Some common YOLO
-prediction settings include the confidence threshold, non-maximum suppression (NMS) threshold, and the number of classes
-to consider. Other factors that may affect the prediction process include the size and format of the input data, the
-presence of additional features such as masks or multiple labels per box, and the specific task the model is being used
-for. It is important to carefully tune and experiment with these settings to achieve the best possible performance for a
-given task.
-
-| Key            | Value                | Description                                     |
-|----------------|----------------------|-------------------------------------------------|
-| source         | `ultralytics/assets` | Input source. Accepts image, folder, video, url |
-| show           | `False`              | View the prediction images                      |
-| save_txt       | `False`              | Save the results in a txt file                  |
-| save_conf      | `False`              | Save the condidence scores                      |
-| save_crop      | `Fasle`              |                                                 |
-| hide_labels    | `False`              | Hide the labels                                 |
-| hide_conf      | `False`              | Hide the confidence scores                      |
-| vid_stride     | `False`              | Input video frame-rate stride                   |
-| line_thickness | `3`                  | Bounding-box thickness (pixels)                 |
-| visualize      | `False`              | Visualize model features                        |
-| augment        | `False`              | Augmented inference                             |
-| agnostic_nms   | `False`              | Class-agnostic NMS                              |
-| retina_masks   | `False`              | **Segmentation:** High resolution masks         |
-
-### Validation
-
-Validation settings for YOLO models refer to the various hyperparameters and configurations used to
-evaluate the model's performance on a validation dataset. These settings can affect the model's performance, speed, and
-accuracy. Some common YOLO validation settings include the batch size, the frequency with which validation is performed
-during training, and the metrics used to evaluate the model's performance. Other factors that may affect the validation
-process include the size and composition of the validation dataset and the specific task the model is being used for. It
-is important to carefully tune and experiment with these settings to ensure that the model is performing well on the
-validation dataset and to detect and prevent overfitting.
-
-| Key         | Value   | Description                       |
-|-------------|---------|-----------------------------------|
-| noval       | `False` | ???                               |
-| save_json   | `False` |                                   |
-| save_hybrid | `False` |                                   |
-| conf        | `0.001` | Confidence threshold              |
-| iou         | `0.6`   | IoU threshold                     |
-| max_det     | `300`   | Maximum number of detections      |
-| half        | `True`  | Use .half() mode.                 |
-| dnn         | `False` | Use OpenCV DNN for ONNX inference |
-| plots       | `False` |                                   |
-
-### Export
-
-Export settings for YOLO models refer to the various configurations and options used to save or
-export the model for use in other environments or platforms. These settings can affect the model's performance, size,
-and compatibility with different systems. Some common YOLO export settings include the format of the exported model
-file (e.g. ONNX, TensorFlow SavedModel), the device on which the model will be run (e.g. CPU, GPU), and the presence of
-additional features such as masks or multiple labels per box. Other factors that may affect the export process include
-the specific task the model is being used for and the requirements or constraints of the target environment or platform.
-It is important to carefully consider and configure these settings to ensure that the exported model is optimized for
-the intended use case and can be used effectively in the target environment.
-
-### Augmentation
-
-Augmentation settings for YOLO models refer to the various transformations and modifications
-applied to the training data to increase the diversity and size of the dataset. These settings can affect the model's
-performance, speed, and accuracy. Some common YOLO augmentation settings include the type and intensity of the
-transformations applied (e.g. random flips, rotations, cropping, color changes), the probability with which each
-transformation is applied, and the presence of additional features such as masks or multiple labels per box. Other
-factors that may affect the augmentation process include the size and composition of the original dataset and the
-specific task the model is being used for. It is important to carefully tune and experiment with these settings to
-ensure that the augmented dataset is diverse and representative enough to train a high-performing model.
-
-| hsv_h       | 0.015 | Image HSV-Hue augmentation (fraction)           |
-|-------------|-------|-------------------------------------------------|
-| hsv_s       | 0.7   | Image HSV-Saturation augmentation (fraction)    |
-| hsv_v       | 0.4   | Image HSV-Value augmentation (fraction)         |
-| degrees     | 0.0   | Image rotation (+/- deg)                        |
-| translate   | 0.1   | Image translation (+/- fraction)                |
-| scale       | 0.5   | Image scale (+/- gain)                          |
-| shear       | 0.0   | Image shear (+/- deg)                           |
-| perspective | 0.0   | Image perspective (+/- fraction), range 0-0.001 |
-| flipud      | 0.0   | Image flip up-down (probability)                |
-| fliplr      | 0.5   | Image flip left-right (probability)             |
-| mosaic      | 1.0   | Image mosaic (probability)                      |
-| mixup       | 0.0   | Image mixup (probability)                       |
-| copy_paste  | 0.0   | Segment copy-paste (probability)                |
-
-### Logging, checkpoints, plotting and file management
-
-Logging, checkpoints, plotting, and file management are important considerations when training a YOLO model.
-
-- Logging: It is often helpful to log various metrics and statistics during training to track the model's progress and
-  diagnose any issues that may arise. This can be done using a logging library such as TensorBoard or by writing log
-  messages to a file.
-- Checkpoints: It is a good practice to save checkpoints of the model at regular intervals during training. This allows
-  you to resume training from a previous point if the training process is interrupted or if you want to experiment with
-  different training configurations.
-- Plotting: Visualizing the model's performance and training progress can be helpful for understanding how the model is
-  behaving and identifying potential issues. This can be done using a plotting library such as matplotlib or by
-  generating plots using a logging library such as TensorBoard.
-- File management: Managing the various files generated during the training process, such as model checkpoints, log
-  files, and plots, can be challenging. It is important to have a clear and organized file structure to keep track of
-  these files and make it easy to access and analyze them as needed.
-
-Effective logging, checkpointing, plotting, and file management can help you keep track of the model's progress and make
-it easier to debug and optimize the training process.
-
-| Key       | Value   | Description                                                                                 |
-|-----------|---------|---------------------------------------------------------------------------------------------|
-| project:  | 'runs'  | The project name                                                                            |
-| name:     | 'exp'   | The run name. `exp` gets automatically incremented if not specified, i.e, `exp`, `exp2` ... |
-| exist_ok: | `False` | Will replace current directory contents if set to True and output directory exists.         |
-| plots     | `False` | **Validation**: Save plots while validation                                                 |
-| save      | `False` | Save any plots, models or files                                                             |
\ No newline at end of file
diff --git a/docs/predict.md b/docs/predict.md
index a4df359..8716520 100644
--- a/docs/predict.md
+++ b/docs/predict.md
@@ -48,19 +48,21 @@ box.xyxy
 ```
 - Properties and conversions
 ```
-results.boxes.xyxy   # box with xyxy format, (N, 4)
-results.boxes.xywh   # box with xywh format, (N, 4)
-results.boxes.xyxyn  # box with xyxy format but normalized, (N, 4)
-results.boxes.xywhn  # box with xywh format but normalized, (N, 4)
-results.boxes.conf   # confidence score, (N, 1)
-results.boxes.cls    # cls, (N, 1)
+boxes.xyxy   # box with xyxy format, (N, 4)
+boxes.xywh   # box with xywh format, (N, 4)
+boxes.xyxyn  # box with xyxy format but normalized, (N, 4)
+boxes.xywhn  # box with xywh format but normalized, (N, 4)
+boxes.conf   # confidence score, (N, 1)
+boxes.cls    # cls, (N, 1)
+boxes.data   # raw bboxes tensor, (N, 6) or boxes.boxes .
 ```
 ### Masks
 `Masks` object can be used index, manipulate and convert masks to segments. The segment conversion operation is cached.
 
 ```python
-results.masks.masks     # masks, (N, H, W)
-results.masks.segments  # bounding coordinates of masks, List[segment] * N
+masks = results.masks # Masks object
+masks.segments  # bounding coordinates of masks, List[segment] * N
+masks.data      # raw masks tensor, (N, H, W) or masks.masks 
 ```
 
 ### probs
diff --git a/docs/tasks/classification.md b/docs/tasks/classification.md
index a6e7a99..34e9dea 100644
--- a/docs/tasks/classification.md
+++ b/docs/tasks/classification.md
@@ -16,7 +16,7 @@ of that class are located or what their exact shape is.
 ## Train
 
 Train YOLOv8n-cls on the MNIST160 dataset for 100 epochs at image size 64. For a full list of available arguments
-see the [Configuration](../config.md) page.
+see the [Configuration](../cfg.md) page.
 
 !!! example ""
 
diff --git a/docs/tasks/detection.md b/docs/tasks/detection.md
index 4a7df4b..ac2af7c 100644
--- a/docs/tasks/detection.md
+++ b/docs/tasks/detection.md
@@ -16,7 +16,7 @@ scene, but don't need to know exactly where the object is or its exact shape.
 ## Train
 
 Train YOLOv8n on the COCO128 dataset for 100 epochs at image size 640. For a full list of available arguments see
-the [Configuration](../config.md) page.
+the [Configuration](../cfg.md) page.
 
 !!! example ""
 
diff --git a/docs/tasks/segmentation.md b/docs/tasks/segmentation.md
index 977819e..0a0ffc1 100644
--- a/docs/tasks/segmentation.md
+++ b/docs/tasks/segmentation.md
@@ -16,7 +16,7 @@ segmentation is useful when you need to know not only where objects are in an im
 ## Train
 
 Train YOLOv8n-seg on the COCO128-seg dataset for 100 epochs at image size 640. For a full list of available
-arguments see the [Configuration](../config.md) page.
+arguments see the [Configuration](../cfg.md) page.
 
 !!! example ""
 
diff --git a/mkdocs.yml b/mkdocs.yml
index 249701c..0652d4b 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -85,7 +85,7 @@ nav:
       - CLI: cli.md
       - Python: python.md
       - Predict: predict.md
-      - Configuration: config.md
+      - Configuration: cfg.md
       - Customization Guide: engine.md
   - Ultralytics HUB: hub.md
   - iOS and Android App: app.md
diff --git a/setup.py b/setup.py
index 6aca904..fef391b 100644
--- a/setup.py
+++ b/setup.py
@@ -51,5 +51,4 @@ setup(
         "Operating System :: MacOS", "Operating System :: Microsoft :: Windows"],
     keywords="machine-learning, deep-learning, vision, ML, DL, AI, YOLO, YOLOv3, YOLOv5, YOLOv8, HUB, Ultralytics",
     entry_points={
-        'console_scripts':
-        ['yolo = ultralytics.yolo.configs:entrypoint', 'ultralytics = ultralytics.yolo.configs:entrypoint']})
+        'console_scripts': ['yolo = ultralytics.yolo.cfg:entrypoint', 'ultralytics = ultralytics.yolo.cfg:entrypoint']})
diff --git a/tests/test_engine.py b/tests/test_engine.py
index e3c1d52..eea6bb9 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -2,14 +2,14 @@
 
 from pathlib import Path
 
-from ultralytics.yolo.configs import get_config
+from ultralytics.yolo.cfg import get_cfg
 from ultralytics.yolo.utils import DEFAULT_CFG_PATH, ROOT, SETTINGS
 from ultralytics.yolo.v8 import classify, detect, segment
 
 CFG_DET = 'yolov8n.yaml'
 CFG_SEG = 'yolov8n-seg.yaml'
 CFG_CLS = 'squeezenet1_0'
-CFG = get_config(DEFAULT_CFG_PATH)
+CFG = get_cfg(DEFAULT_CFG_PATH)
 MODEL = Path(SETTINGS['weights_dir']) / 'yolov8n'
 SOURCE = ROOT / "assets"
 
diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index 4682c0c..e6d9b95 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, GPL-3.0 license
 
-__version__ = "8.0.12"
+__version__ = "8.0.14"
 
 from ultralytics.yolo.engine.model import YOLO
 from ultralytics.yolo.utils import ops
diff --git a/ultralytics/hub/utils.py b/ultralytics/hub/utils.py
index 4428cb0..fadb615 100644
--- a/ultralytics/hub/utils.py
+++ b/ultralytics/hub/utils.py
@@ -136,12 +136,12 @@ def sync_analytics(cfg, all_keys=False, enabled=False):
    Sync analytics data if enabled in the global settings
 
     Args:
-        cfg (DictConfig): Configuration for the task and mode.
+        cfg (UltralyticsCFG): Configuration for the task and mode.
         all_keys (bool): Sync all items, not just non-default values.
         enabled (bool): For debugging.
     """
     if SETTINGS['sync'] and RANK in {-1, 0} and enabled:
-        cfg = dict(cfg)  # convert type from DictConfig to dict
+        cfg = dict(cfg)  # convert type from UltralyticsCFG to dict
         if not all_keys:
             cfg = {k: v for k, v in cfg.items() if v != DEFAULT_CFG_DICT.get(k, None)}  # retain non-default values
         cfg['uuid'] = SETTINGS['uuid']  # add the device UUID to the configuration data
diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py
index 77b6bf0..c76ce08 100644
--- a/ultralytics/nn/tasks.py
+++ b/ultralytics/nn/tasks.py
@@ -95,7 +95,6 @@ class BaseModel(nn.Module):
             (nn.Module): The fused model is returned.
         """
         if not self.is_fused():
-            LOGGER.info('Fusing... ')
             for m in self.model.modules():
                 if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
                     m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
diff --git a/ultralytics/yolo/configs/__init__.py b/ultralytics/yolo/cfg/__init__.py
similarity index 88%
rename from ultralytics/yolo/configs/__init__.py
rename to ultralytics/yolo/cfg/__init__.py
index c1aff56..b6ab510 100644
--- a/ultralytics/yolo/configs/__init__.py
+++ b/ultralytics/yolo/cfg/__init__.py
@@ -28,7 +28,7 @@ CLI_HELP_MSG = \
             Where   TASK (optional) is one of [detect, segment, classify]
                     MODE (required) is one of [train, val, predict, export]
                     ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override defaults.
-                        For a full list of available ARGS see https://docs.ultralytics.com/config.
+                        For a full list of available ARGS see https://docs.ultralytics.com/cfg.
 
         Train a detection model for 10 epochs with an initial learning_rate of 0.01
             yolo detect train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01
@@ -48,7 +48,7 @@ CLI_HELP_MSG = \
         yolo checks
         yolo version
         yolo settings
-        yolo copy-config
+        yolo copy-cfg
 
     Docs: https://docs.ultralytics.com/cli
     Community: https://community.ultralytics.com
@@ -56,6 +56,15 @@ CLI_HELP_MSG = \
     """
 
 
+class UltralyticsCFG(SimpleNamespace):
+    """
+    UltralyticsCFG iterable SimpleNamespace class to allow SimpleNamespace to be used with dict() and in for loops
+    """
+
+    def __iter__(self):
+        return iter(vars(self).items())
+
+
 def cfg2dict(cfg):
     """
     Convert a configuration object to a dictionary.
@@ -75,30 +84,30 @@ def cfg2dict(cfg):
     return cfg
 
 
-def get_config(config: Union[str, Path, Dict, SimpleNamespace], overrides: Dict = None):
+def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace], overrides: Dict = None):
     """
     Load and merge configuration data from a file or dictionary.
 
     Args:
-        config (str) or (Path) or (Dict) or (SimpleNamespace): Configuration data.
+        cfg (str) or (Path) or (Dict) or (SimpleNamespace): Configuration data.
         overrides (str) or (Dict), optional: Overrides in the form of a file name or a dictionary. Default is None.
 
     Returns:
         (SimpleNamespace): Training arguments namespace.
     """
-    config = cfg2dict(config)
+    cfg = cfg2dict(cfg)
 
     # Merge overrides
     if overrides:
         overrides = cfg2dict(overrides)
-        check_config_mismatch(config, overrides)
-        config = {**config, **overrides}  # merge config and overrides dicts (prefer overrides)
+        check_cfg_mismatch(cfg, overrides)
+        cfg = {**cfg, **overrides}  # merge cfg and overrides dicts (prefer overrides)
 
     # Return instance
-    return SimpleNamespace(**config)
+    return UltralyticsCFG(**cfg)
 
 
-def check_config_mismatch(base: Dict, custom: Dict):
+def check_cfg_mismatch(base: Dict, custom: Dict):
     """
     This function checks for any mismatched keys between a custom configuration list and a base configuration list.
     If any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
@@ -127,8 +136,8 @@ def entrypoint(debug=False):
     - running special modes like 'checks'
     - passing overrides to the package's configuration
 
-    It uses the package's default config and initializes it using the passed overrides.
-    Then it calls the CLI function with the composed config
+    It uses the package's default cfg and initializes it using the passed overrides.
+    Then it calls the CLI function with the composed cfg
     """
     if debug:
         args = ['train', 'predict', 'model=yolov8n.pt']  # for testing
@@ -149,7 +158,7 @@ def entrypoint(debug=False):
         'checks': checks.check_yolo,
         'version': lambda: LOGGER.info(__version__),
         'settings': print_settings,
-        'copy-config': copy_default_config}
+        'copy-cfg': copy_default_config}
 
     overrides = {}  # basic overrides, i.e. imgsz=320
     defaults = yaml_load(DEFAULT_CFG_PATH)
@@ -190,7 +199,7 @@ def entrypoint(debug=False):
                 f"https://github.com/ultralytics/ultralytics/blob/main/ultralytics/yolo/configs/default.yaml"
                 f"\n{CLI_HELP_MSG}")
 
-    cfg = get_config(defaults, overrides)  # create CFG instance
+    cfg = get_cfg(defaults, overrides)  # create CFG instance
 
     # Mapping from task to module
     module = {"detect": yolo.v8.detect, "segment": yolo.v8.segment, "classify": yolo.v8.classify}.get(cfg.task)
@@ -214,7 +223,7 @@ def copy_default_config():
     new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace('.yaml', '_copy.yaml')
     shutil.copy2(DEFAULT_CFG_PATH, new_file)
     LOGGER.info(f"{PREFIX}{DEFAULT_CFG_PATH} copied to {new_file}\n"
-                f"Usage for running YOLO with this new custom config:\nyolo cfg={new_file} args...")
+                f"Usage for running YOLO with this new custom cfg:\nyolo cfg={new_file} args...")
 
 
 if __name__ == '__main__':
diff --git a/ultralytics/yolo/configs/default.yaml b/ultralytics/yolo/cfg/default.yaml
similarity index 78%
rename from ultralytics/yolo/configs/default.yaml
rename to ultralytics/yolo/cfg/default.yaml
index 1ee17ce..c50686b 100644
--- a/ultralytics/yolo/configs/default.yaml
+++ b/ultralytics/yolo/cfg/default.yaml
@@ -1,20 +1,20 @@
 # Ultralytics YOLO 🚀, GPL-3.0 license
 # Default training settings and hyperparameters for medium-augmentation COCO training
 
-task: "detect"  # choices=['detect', 'segment', 'classify', 'init']  # init is a special case. Specify task to run.
-mode: "train"  # choices=['train', 'val', 'predict']  # mode to run task in.
+task: "detect"  # inference task, i.e. detect, segment, classify
+mode: "train"  # YOLO mode, i.e. train, val, predict, export
 
 # Train settings -------------------------------------------------------------------------------------------------------
-model: null  # i.e. yolov8n.pt, yolov8n.yaml. Path to model file
-data: null  # i.e. coco128.yaml. Path to data file
+model: null  # path to model file, i.e. yolov8n.pt, yolov8n.yaml
+data: null  # path to data file, i.e. i.e. coco128.yaml
 epochs: 100  # number of epochs to train for
 patience: 50  # epochs to wait for no observable improvement for early stopping of training
-batch: 16  # number of images per batch
-imgsz: 640  # size of input images
-save: True  # save checkpoints
+batch: 16  # number of images per batch (-1 for AutoBatch)
+imgsz: 640  # size of input images as integer or w,h
+save: True  # save train checkpoints and predict results
 cache: False  # True/ram, disk or False. Use cache for data loading
-device: null  # cuda device, i.e. 0 or 0,1,2,3 or cpu. Device to run on
-workers: 8  # number of worker threads for data loading
+device: null  # device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
+workers: 8  # number of worker threads for data loading (per RANK if DDP)
 project: null  # project name
 name: null  # experiment name
 exist_ok: False  # whether to overwrite existing experiment
@@ -30,10 +30,10 @@ cos_lr: False  # use cosine learning rate scheduler
 close_mosaic: 10  # disable mosaic augmentation for final 10 epochs
 resume: False  # resume training from last checkpoint
 # Segmentation
-overlap_mask: True  # masks should overlap during training
-mask_ratio: 4  # mask downsample ratio
+overlap_mask: True  # masks should overlap during training (segment train only)
+mask_ratio: 4  # mask downsample ratio (segment train only)
 # Classification
-dropout: 0.0  # use dropout regularization
+dropout: 0.0  # use dropout regularization (classify train only)
 
 # Val/Test settings ----------------------------------------------------------------------------------------------------
 val: True  # validate/test during training
@@ -44,7 +44,7 @@ iou: 0.7  # intersection over union (IoU) threshold for NMS
 max_det: 300  # maximum number of detections per image
 half: False  # use half precision (FP16)
 dnn: False  # use OpenCV DNN for ONNX inference
-plots: True  # show plots during training
+plots: True  # save plots during train/val
 
 # Prediction settings --------------------------------------------------------------------------------------------------
 source: null  # source directory for images or videos
@@ -56,10 +56,11 @@ hide_labels: False  # hide labels
 hide_conf: False  # hide confidence scores
 vid_stride: 1  # video frame-rate stride
 line_thickness: 3  # bounding box thickness (pixels)
-visualize: False  # visualize results
-augment: False  # apply data augmentation to images
+visualize: False  # visualize model features
+augment: False  # apply image augmentation to prediction sources
 agnostic_nms: False  # class-agnostic NMS
-retina_masks: False  # use retina masks for object detection
+retina_masks: False  # use high-resolution segmentation masks
+classes: null  # filter results by class, i.e. class=0, or class=[0,2,3]
 
 # Export settings ------------------------------------------------------------------------------------------------------
 format: torchscript  # format to export to
@@ -73,8 +74,8 @@ workspace: 4  # TensorRT: workspace size (GB)
 nms: False  # CoreML: add NMS
 
 # Hyperparameters ------------------------------------------------------------------------------------------------------
-lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
-lrf: 0.01  # final OneCycleLR learning rate (lr0 * lrf)
+lr0: 0.01  # initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
+lrf: 0.01  # final learning rate (lr0 * lrf)
 momentum: 0.937  # SGD momentum/Adam beta1
 weight_decay: 0.0005  # optimizer weight decay 5e-4
 warmup_epochs: 3.0  # warmup epochs (fractions ok)
@@ -84,7 +85,7 @@ box: 7.5  # box loss gain
 cls: 0.5  # cls loss gain (scale with pixels)
 dfl: 1.5  # dfl loss gain
 fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
-label_smoothing: 0.0
+label_smoothing: 0.0  # label smoothing (fraction)
 nbs: 64  # nominal batch size
 hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
 hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
diff --git a/ultralytics/yolo/data/dataloaders/v5loader.py b/ultralytics/yolo/data/dataloaders/v5loader.py
index 3b4f5f3..cf4feee 100644
--- a/ultralytics/yolo/data/dataloaders/v5loader.py
+++ b/ultralytics/yolo/data/dataloaders/v5loader.py
@@ -615,7 +615,7 @@ class LoadImagesAndLabels(Dataset):
         nm, nf, ne, nc, msgs = 0, 0, 0, 0, []  # number missing, found, empty, corrupt, messages
         desc = f"{prefix}Scanning {path.parent / path.stem}..."
         total = len(self.im_files)
-        with (Pool if total > 10000 else ThreadPool)(NUM_THREADS) as pool:
+        with ThreadPool(NUM_THREADS) as pool:
             results = pool.imap(verify_image_label, zip(self.im_files, self.label_files, repeat(prefix)))
             pbar = tqdm(results, desc=desc, total=total, bar_format=TQDM_BAR_FORMAT)
             for im_file, lb, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
diff --git a/ultralytics/yolo/data/dataset.py b/ultralytics/yolo/data/dataset.py
index 94029fd..bff01e9 100644
--- a/ultralytics/yolo/data/dataset.py
+++ b/ultralytics/yolo/data/dataset.py
@@ -1,7 +1,7 @@
 # Ultralytics YOLO 🚀, GPL-3.0 license
 
 from itertools import repeat
-from multiprocessing.pool import Pool, ThreadPool
+from multiprocessing.pool import ThreadPool
 from pathlib import Path
 
 import torchvision
@@ -51,7 +51,7 @@ class YOLODataset(BaseDataset):
         nm, nf, ne, nc, msgs = 0, 0, 0, 0, []  # number missing, found, empty, corrupt, messages
         desc = f"{self.prefix}Scanning {path.parent / path.stem}..."
         total = len(self.im_files)
-        with (Pool if total > 10000 else ThreadPool)(NUM_THREADS) as pool:
+        with ThreadPool(NUM_THREADS) as pool:
             results = pool.imap(func=verify_image_label,
                                 iterable=zip(self.im_files, self.label_files, repeat(self.prefix),
                                              repeat(self.use_keypoints)))
diff --git a/ultralytics/yolo/engine/exporter.py b/ultralytics/yolo/engine/exporter.py
index 3753cd8..7c1cfac 100644
--- a/ultralytics/yolo/engine/exporter.py
+++ b/ultralytics/yolo/engine/exporter.py
@@ -67,7 +67,7 @@ import torch
 import ultralytics
 from ultralytics.nn.modules import Detect, Segment
 from ultralytics.nn.tasks import ClassificationModel, DetectionModel, SegmentationModel
-from ultralytics.yolo.configs import get_config
+from ultralytics.yolo.cfg import get_cfg
 from ultralytics.yolo.data.dataloaders.stream_loaders import LoadImages
 from ultralytics.yolo.data.utils import check_dataset
 from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, callbacks, colorstr, get_default_args, yaml_save
@@ -134,7 +134,7 @@ class Exporter:
             config (str, optional): Path to a configuration file. Defaults to DEFAULT_CONFIG.
             overrides (dict, optional): Configuration overrides. Defaults to None.
         """
-        self.args = get_config(config, overrides)
+        self.args = get_cfg(config, overrides)
         self.callbacks = defaultdict(list, {k: [v] for k, v in callbacks.default_callbacks.items()})  # add callbacks
         callbacks.add_integration_callbacks(self)
 
diff --git a/ultralytics/yolo/engine/model.py b/ultralytics/yolo/engine/model.py
index ce59e75..f8de777 100644
--- a/ultralytics/yolo/engine/model.py
+++ b/ultralytics/yolo/engine/model.py
@@ -4,7 +4,7 @@ from pathlib import Path
 
 from ultralytics import yolo  # noqa
 from ultralytics.nn.tasks import ClassificationModel, DetectionModel, SegmentationModel, attempt_load_one_weight
-from ultralytics.yolo.configs import get_config
+from ultralytics.yolo.cfg import get_cfg
 from ultralytics.yolo.engine.exporter import Exporter
 from ultralytics.yolo.utils import DEFAULT_CFG_PATH, LOGGER, yaml_load
 from ultralytics.yolo.utils.checks import check_yaml
@@ -136,7 +136,7 @@ class YOLO:
             self.predictor = self.PredictorClass(overrides=overrides)
             self.predictor.setup_model(model=self.model)
         else:  # only update args if predictor is already setup
-            self.predictor.args = get_config(self.predictor.args, overrides)
+            self.predictor.args = get_cfg(self.predictor.args, overrides)
         return self.predictor(source=source, stream=stream, verbose=verbose)
 
     @smart_inference_mode()
@@ -151,7 +151,7 @@ class YOLO:
         overrides = self.overrides.copy()
         overrides.update(kwargs)
         overrides["mode"] = "val"
-        args = get_config(config=DEFAULT_CFG_PATH, overrides=overrides)
+        args = get_cfg(cfg=DEFAULT_CFG_PATH, overrides=overrides)
         args.data = data or args.data
         args.task = self.task
 
@@ -169,7 +169,7 @@ class YOLO:
 
         overrides = self.overrides.copy()
         overrides.update(kwargs)
-        args = get_config(config=DEFAULT_CFG_PATH, overrides=overrides)
+        args = get_cfg(cfg=DEFAULT_CFG_PATH, overrides=overrides)
         args.task = self.task
 
         print(args)
@@ -201,7 +201,7 @@ class YOLO:
             self.trainer.model = self.trainer.get_model(weights=self.model if self.ckpt else None, cfg=self.model.yaml)
             self.model = self.trainer.model
         self.trainer.train()
-        # update model and configs after training
+        # update model and cfg after training
         self.model, _ = attempt_load_one_weight(str(self.trainer.best))
         self.overrides = self.model.args
 
diff --git a/ultralytics/yolo/engine/predictor.py b/ultralytics/yolo/engine/predictor.py
index 546e1ee..32f4eff 100644
--- a/ultralytics/yolo/engine/predictor.py
+++ b/ultralytics/yolo/engine/predictor.py
@@ -33,7 +33,7 @@ from pathlib import Path
 import cv2
 
 from ultralytics.nn.autobackend import AutoBackend
-from ultralytics.yolo.configs import get_config
+from ultralytics.yolo.cfg import get_cfg
 from ultralytics.yolo.data.dataloaders.stream_loaders import LoadImages, LoadPilAndNumpy, LoadScreenshots, LoadStreams
 from ultralytics.yolo.data.utils import IMG_FORMATS, VID_FORMATS
 from ultralytics.yolo.utils import DEFAULT_CFG_PATH, LOGGER, SETTINGS, callbacks, colorstr, ops
@@ -70,7 +70,7 @@ class BasePredictor:
             config (str, optional): Path to a configuration file. Defaults to DEFAULT_CONFIG.
             overrides (dict, optional): Configuration overrides. Defaults to None.
         """
-        self.args = get_config(config, overrides)
+        self.args = get_cfg(config, overrides)
         project = self.args.project or Path(SETTINGS['runs_dir']) / self.args.task
         name = self.args.name or f"{self.args.mode}"
         self.save_dir = increment_path(Path(project) / name, exist_ok=self.args.exist_ok)
@@ -84,6 +84,7 @@ class BasePredictor:
         self.bs = None
         self.imgsz = None
         self.device = None
+        self.classes = self.args.classes
         self.dataset = None
         self.vid_path, self.vid_writer = None, None
         self.annotator = None
@@ -100,7 +101,7 @@ class BasePredictor:
     def write_results(self, results, batch, print_string):
         raise NotImplementedError("print_results function needs to be implemented")
 
-    def postprocess(self, preds, img, orig_img):
+    def postprocess(self, preds, img, orig_img, classes=None):
         return preds
 
     def setup_source(self, source=None):
@@ -195,7 +196,7 @@ class BasePredictor:
 
             # postprocess
             with self.dt[2]:
-                results = self.postprocess(preds, im, im0s)
+                results = self.postprocess(preds, im, im0s, self.classes)
             for i in range(len(im)):
                 p, im0 = (path[i], im0s[i]) if self.webcam or self.from_img else (path, im0s)
                 p = Path(p)
diff --git a/ultralytics/yolo/engine/results.py b/ultralytics/yolo/engine/results.py
index 910abb1..b168da0 100644
--- a/ultralytics/yolo/engine/results.py
+++ b/ultralytics/yolo/engine/results.py
@@ -21,6 +21,8 @@ class Results:
             masks (Masks, optional): A Masks object containing the detection masks.
             probs (torch.Tensor, optional): A tensor containing the detection class probabilities.
             orig_shape (tuple, optional): Original image size.
+            data (torch.Tensor): The raw masks tensor
+
         """
 
     def __init__(self, boxes=None, masks=None, probs=None, orig_shape=None) -> None:
@@ -81,19 +83,20 @@ class Results:
             return len(getattr(self, item))
 
     def __str__(self):
-        return self.__repr__()
+        str_out = ""
+        for item in self.comp:
+            if getattr(self, item) is None:
+                continue
+            str_out = str_out + getattr(self, item).__str__()
+        return str_out
 
     def __repr__(self):
-        s = f'Ultralytics YOLO {self.__class__} instance\n'  # string
-        if self.boxes is not None:
-            s = s + self.boxes.__repr__() + '\n'
-        if self.masks is not None:
-            s = s + self.masks.__repr__() + '\n'
-        if self.probs is not None:
-            s = s + self.probs.__repr__()
-        s += f'original size: {self.orig_shape}\n'
-
-        return s
+        str_out = ""
+        for item in self.comp:
+            if getattr(self, item) is None:
+                continue
+            str_out = str_out + getattr(self, item).__repr__()
+        return str_out
 
     def __getattr__(self, attr):
         name = self.__class__.__name__
@@ -129,6 +132,7 @@ class Boxes:
         xywh (torch.Tensor) or (numpy.ndarray): The boxes in xywh format.
         xyxyn (torch.Tensor) or (numpy.ndarray): The boxes in xyxy format normalized by original image size.
         xywhn (torch.Tensor) or (numpy.ndarray): The boxes in xywh format normalized by original image size.
+        data (torch.Tensor): The raw bboxes tensor
     """
 
     def __init__(self, boxes, orig_shape) -> None:
@@ -198,15 +202,19 @@ class Boxes:
     def shape(self):
         return self.boxes.shape
 
+    @property
+    def data(self):
+        return self.boxes
+
     def __len__(self):  # override len(results)
         return len(self.boxes)
 
     def __str__(self):
-        return self.__repr__()
+        return self.boxes.__str__()
 
     def __repr__(self):
         return (f"Ultralytics YOLO {self.__class__} masks\n" + f"type: {type(self.boxes)}\n" +
-                f"shape: {self.boxes.shape}\n" + f"dtype: {self.boxes.dtype}")
+                f"shape: {self.boxes.shape}\n" + f"dtype: {self.boxes.dtype}\n + {self.boxes.__repr__()}")
 
     def __getitem__(self, idx):
         boxes = self.boxes[idx]
@@ -257,12 +265,16 @@ class Masks:
     def segments(self):
         return [
             ops.scale_segments(self.masks.shape[1:], x, self.orig_shape, normalize=True)
-            for x in reversed(ops.masks2segments(self.masks))]
+            for x in ops.masks2segments(self.masks)]
 
     @property
     def shape(self):
         return self.masks.shape
 
+    @property
+    def data(self):
+        return self.masks
+
     def cpu(self):
         masks = self.masks.cpu()
         return Masks(masks, self.orig_shape)
@@ -283,11 +295,11 @@ class Masks:
         return len(self.masks)
 
     def __str__(self):
-        return self.__repr__()
+        return self.masks.__str__()
 
     def __repr__(self):
         return (f"Ultralytics YOLO {self.__class__} masks\n" + f"type: {type(self.masks)}\n" +
-                f"shape: {self.masks.shape}\n" + f"dtype: {self.masks.dtype}")
+                f"shape: {self.masks.shape}\n" + f"dtype: {self.masks.dtype}\n + {self.masks.__repr__()}")
 
     def __getitem__(self, idx):
         masks = self.masks[idx]
diff --git a/ultralytics/yolo/engine/trainer.py b/ultralytics/yolo/engine/trainer.py
index 5e47f97..e28fa8a 100644
--- a/ultralytics/yolo/engine/trainer.py
+++ b/ultralytics/yolo/engine/trainer.py
@@ -23,7 +23,7 @@ from tqdm import tqdm
 import ultralytics.yolo.utils as utils
 from ultralytics import __version__
 from ultralytics.nn.tasks import attempt_load_one_weight
-from ultralytics.yolo.configs import get_config
+from ultralytics.yolo.cfg import get_cfg
 from ultralytics.yolo.data.utils import check_dataset, check_dataset_yaml
 from ultralytics.yolo.utils import (DEFAULT_CFG_PATH, LOGGER, RANK, SETTINGS, TQDM_BAR_FORMAT, callbacks, colorstr,
                                     yaml_save)
@@ -79,7 +79,7 @@ class BaseTrainer:
             config (str, optional): Path to a configuration file. Defaults to DEFAULT_CONFIG.
             overrides (dict, optional): Configuration overrides. Defaults to None.
         """
-        self.args = get_config(config, overrides)
+        self.args = get_cfg(config, overrides)
         self.device = utils.torch_utils.select_device(self.args.device, self.args.batch)
         self.check_resume()
         self.console = LOGGER
@@ -509,7 +509,7 @@ class BaseTrainer:
             assert args_yaml.is_file(), \
                 FileNotFoundError('Resume checkpoint f{last} not found. '
                                   'Please pass a valid checkpoint to resume from, i.e. yolo resume=path/to/last.pt')
-            args = get_config(args_yaml)  # replace
+            args = get_cfg(args_yaml)  # replace
             args.model, resume = str(last), True  # reinstate
             self.args = args
         self.resume = resume
diff --git a/ultralytics/yolo/engine/validator.py b/ultralytics/yolo/engine/validator.py
index 2277521..b903c11 100644
--- a/ultralytics/yolo/engine/validator.py
+++ b/ultralytics/yolo/engine/validator.py
@@ -8,7 +8,7 @@ import torch
 from tqdm import tqdm
 
 from ultralytics.nn.autobackend import AutoBackend
-from ultralytics.yolo.configs import get_config
+from ultralytics.yolo.cfg import get_cfg
 from ultralytics.yolo.data.utils import check_dataset, check_dataset_yaml
 from ultralytics.yolo.utils import DEFAULT_CFG_PATH, LOGGER, RANK, SETTINGS, TQDM_BAR_FORMAT, callbacks
 from ultralytics.yolo.utils.checks import check_imgsz
@@ -52,7 +52,7 @@ class BaseValidator:
         self.dataloader = dataloader
         self.pbar = pbar
         self.logger = logger or LOGGER
-        self.args = args or get_config(DEFAULT_CFG_PATH)
+        self.args = args or get_cfg(DEFAULT_CFG_PATH)
         self.model = None
         self.data = None
         self.device = None
diff --git a/ultralytics/yolo/utils/__init__.py b/ultralytics/yolo/utils/__init__.py
index e039e26..24b3b29 100644
--- a/ultralytics/yolo/utils/__init__.py
+++ b/ultralytics/yolo/utils/__init__.py
@@ -23,7 +23,7 @@ import yaml
 # Constants
 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[2]  # YOLO
-DEFAULT_CFG_PATH = ROOT / "yolo/configs/default.yaml"
+DEFAULT_CFG_PATH = ROOT / "yolo/cfg/default.yaml"
 RANK = int(os.getenv('RANK', -1))
 NUM_THREADS = min(8, max(1, os.cpu_count() - 1))  # number of YOLOv5 multiprocessing threads
 AUTOINSTALL = str(os.getenv('YOLO_AUTOINSTALL', True)).lower() == 'true'  # global auto-install mode
diff --git a/ultralytics/yolo/utils/callbacks/clearml.py b/ultralytics/yolo/utils/callbacks/clearml.py
index 7a02979..4053f06 100644
--- a/ultralytics/yolo/utils/callbacks/clearml.py
+++ b/ultralytics/yolo/utils/callbacks/clearml.py
@@ -26,7 +26,7 @@ def on_pretrain_routine_start(trainer):
                      output_uri=True,
                      reuse_last_task_id=False,
                      auto_connect_frameworks={'pytorch': False})
-    task.connect(dict(trainer.args), name='General')
+    task.connect(vars(trainer.args), name='General')
 
 
 def on_train_epoch_end(trainer):
diff --git a/ultralytics/yolo/utils/callbacks/comet.py b/ultralytics/yolo/utils/callbacks/comet.py
index 0f6d4f2..6c62f2d 100644
--- a/ultralytics/yolo/utils/callbacks/comet.py
+++ b/ultralytics/yolo/utils/callbacks/comet.py
@@ -11,7 +11,7 @@ except (ModuleNotFoundError, ImportError):
 
 def on_pretrain_routine_start(trainer):
     experiment = comet_ml.Experiment(project_name=trainer.args.project or "YOLOv8")
-    experiment.log_parameters(dict(trainer.args))
+    experiment.log_parameters(vars(trainer.args))
 
 
 def on_train_epoch_end(trainer):
diff --git a/ultralytics/yolo/utils/torch_utils.py b/ultralytics/yolo/utils/torch_utils.py
index ef318f7..c01ea77 100644
--- a/ultralytics/yolo/utils/torch_utils.py
+++ b/ultralytics/yolo/utils/torch_utils.py
@@ -137,9 +137,10 @@ def model_info(model, verbose=False, imgsz=640):
                   (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
 
     flops = get_flops(model, imgsz)
+    fused = ' (fused)' if model.is_fused() else ''
     fs = f', {flops:.1f} GFLOPs' if flops else ''
     m = Path(getattr(model, 'yaml_file', '') or model.yaml.get('yaml_file', '')).stem.replace('yolo', 'YOLO') or 'Model'
-    LOGGER.info(f"{m} summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
+    LOGGER.info(f"{m} summary{fused}: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
 
 
 def get_num_params(model):
diff --git a/ultralytics/yolo/v8/classify/predict.py b/ultralytics/yolo/v8/classify/predict.py
index 2e62cac..bd7768b 100644
--- a/ultralytics/yolo/v8/classify/predict.py
+++ b/ultralytics/yolo/v8/classify/predict.py
@@ -18,7 +18,7 @@ class ClassificationPredictor(BasePredictor):
         img = img.half() if self.model.fp16 else img.float()  # uint8 to fp16/32
         return img
 
-    def postprocess(self, preds, img, orig_img):
+    def postprocess(self, preds, img, orig_img, classes=None):
         results = []
         for i, pred in enumerate(preds):
             shape = orig_img[i].shape if isinstance(orig_img, list) else orig_img.shape
diff --git a/ultralytics/yolo/v8/detect/predict.py b/ultralytics/yolo/v8/detect/predict.py
index 3413d90..452c4f6 100644
--- a/ultralytics/yolo/v8/detect/predict.py
+++ b/ultralytics/yolo/v8/detect/predict.py
@@ -19,12 +19,13 @@ class DetectionPredictor(BasePredictor):
         img /= 255  # 0 - 255 to 0.0 - 1.0
         return img
 
-    def postprocess(self, preds, img, orig_img):
+    def postprocess(self, preds, img, orig_img, classes=None):
         preds = ops.non_max_suppression(preds,
                                         self.args.conf,
                                         self.args.iou,
                                         agnostic=self.args.agnostic_nms,
-                                        max_det=self.args.max_det)
+                                        max_det=self.args.max_det,
+                                        classes=self.args.classes)
 
         results = []
         for i, pred in enumerate(preds):
diff --git a/ultralytics/yolo/v8/segment/predict.py b/ultralytics/yolo/v8/segment/predict.py
index 58597f4..ed8f365 100644
--- a/ultralytics/yolo/v8/segment/predict.py
+++ b/ultralytics/yolo/v8/segment/predict.py
@@ -10,14 +10,15 @@ from ultralytics.yolo.v8.detect.predict import DetectionPredictor
 
 class SegmentationPredictor(DetectionPredictor):
 
-    def postprocess(self, preds, img, orig_img):
+    def postprocess(self, preds, img, orig_img, classes=None):
         # TODO: filter by classes
         p = ops.non_max_suppression(preds[0],
                                     self.args.conf,
                                     self.args.iou,
                                     agnostic=self.args.agnostic_nms,
                                     max_det=self.args.max_det,
-                                    nm=32)
+                                    nm=32,
+                                    classes=self.args.classes)
         results = []
         proto = preds[1][-1]
         for i, pred in enumerate(p):