diff --git a/docs/modes/train.md b/docs/modes/train.md index 32186cd..1d629a9 100644 --- a/docs/modes/train.md +++ b/docs/modes/train.md @@ -82,6 +82,7 @@ task. | `close_mosaic` | `0` | (int) disable mosaic augmentation for final epochs | | `resume` | `False` | resume training from last checkpoint | | `amp` | `True` | Automatic Mixed Precision (AMP) training, choices=[True, False] | +| `fraction` | `1.0` | dataset fraction to train on (default is 1.0, all images in train set) | | `lr0` | `0.01` | initial learning rate (i.e. SGD=1E-2, Adam=1E-3) | | `lrf` | `0.01` | final learning rate (lr0 * lrf) | | `momentum` | `0.937` | SGD momentum/Adam beta1 | diff --git a/docs/usage/cfg.md b/docs/usage/cfg.md index 9113ecf..ae3853c 100644 --- a/docs/usage/cfg.md +++ b/docs/usage/cfg.md @@ -104,6 +104,7 @@ The training settings for YOLO models encompass various hyperparameters and conf | `close_mosaic` | `0` | (int) disable mosaic augmentation for final epochs | | `resume` | `False` | resume training from last checkpoint | | `amp` | `True` | Automatic Mixed Precision (AMP) training, choices=[True, False] | +| `fraction` | `1.0` | dataset fraction to train on (default is 1.0, all images in train set) | | `lr0` | `0.01` | initial learning rate (i.e. SGD=1E-2, Adam=1E-3) | | `lrf` | `0.01` | final learning rate (lr0 * lrf) | | `momentum` | `0.937` | SGD momentum/Adam beta1 | diff --git a/ultralytics/yolo/cfg/__init__.py b/ultralytics/yolo/cfg/__init__.py index a9168a4..c3c8d60 100644 --- a/ultralytics/yolo/cfg/__init__.py +++ b/ultralytics/yolo/cfg/__init__.py @@ -66,7 +66,7 @@ CLI_HELP_MSG = \ CFG_FLOAT_KEYS = 'warmup_epochs', 'box', 'cls', 'dfl', 'degrees', 'shear' CFG_FRACTION_KEYS = ('dropout', 'iou', 'lr0', 'lrf', 'momentum', 'weight_decay', 'warmup_momentum', 'warmup_bias_lr', 'label_smoothing', 'hsv_h', 'hsv_s', 'hsv_v', 'translate', 'scale', 'perspective', 'flipud', - 'fliplr', 'mosaic', 'mixup', 'copy_paste', 'conf', 'iou') # fractional floats limited to 0.0 - 1.0 + 'fliplr', 'mosaic', 'mixup', 'copy_paste', 'conf', 'iou', 'fraction') # fraction floats 0.0 - 1.0 CFG_INT_KEYS = ('epochs', 'patience', 'batch', 'workers', 'seed', 'close_mosaic', 'mask_ratio', 'max_det', 'vid_stride', 'line_width', 'workspace', 'nbs', 'save_period') CFG_BOOL_KEYS = ('save', 'exist_ok', 'verbose', 'deterministic', 'single_cls', 'rect', 'cos_lr', 'overlap_mask', 'val', diff --git a/ultralytics/yolo/cfg/default.yaml b/ultralytics/yolo/cfg/default.yaml index 86087b7..abf12c3 100644 --- a/ultralytics/yolo/cfg/default.yaml +++ b/ultralytics/yolo/cfg/default.yaml @@ -30,6 +30,7 @@ cos_lr: False # use cosine learning rate scheduler close_mosaic: 0 # (int) disable mosaic augmentation for final epochs resume: False # resume training from last checkpoint amp: True # Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check +fraction: 1.0 # dataset fraction to train on (default is 1.0, all images in train set) # Segmentation overlap_mask: True # masks should overlap during training (segment train only) mask_ratio: 4 # mask downsample ratio (segment train only) diff --git a/ultralytics/yolo/data/base.py b/ultralytics/yolo/data/base.py index cbb4843..d2e9793 100644 --- a/ultralytics/yolo/data/base.py +++ b/ultralytics/yolo/data/base.py @@ -36,6 +36,7 @@ class BaseDataset(Dataset): pad (float, optional): Padding. Defaults to 0.0. single_cls (bool, optional): If True, single class training is used. Defaults to False. classes (list): List of included classes. Default is None. + fraction (float): Fraction of dataset to utilize. Default is 1.0 (use all data). Attributes: im_files (list): List of image file paths. @@ -58,13 +59,15 @@ class BaseDataset(Dataset): stride=32, pad=0.5, single_cls=False, - classes=None): + classes=None, + fraction=1.0): super().__init__() self.img_path = img_path self.imgsz = imgsz self.augment = augment self.single_cls = single_cls self.prefix = prefix + self.fraction = fraction self.im_files = self.get_img_files(self.img_path) self.labels = self.get_labels() self.update_labels(include_class=classes) # single_cls and include_class @@ -114,6 +117,8 @@ class BaseDataset(Dataset): assert im_files, f'{self.prefix}No images found' except Exception as e: raise FileNotFoundError(f'{self.prefix}Error loading data from {img_path}\n{HELP_URL}') from e + if self.fraction < 1: + im_files = im_files[:round(len(im_files) * self.fraction)] return im_files def update_labels(self, include_class: Optional[list]): diff --git a/ultralytics/yolo/data/build.py b/ultralytics/yolo/data/build.py index 0b58b4f..54d2d16 100644 --- a/ultralytics/yolo/data/build.py +++ b/ultralytics/yolo/data/build.py @@ -69,7 +69,7 @@ def seed_worker(worker_id): # noqa random.seed(worker_seed) -def build_yolo_dataset(cfg, img_path, batch, data_info, mode='train', rect=False, stride=32): +def build_yolo_dataset(cfg, img_path, batch, data, mode='train', rect=False, stride=32): """Build YOLO Dataset""" return YOLODataset( img_path=img_path, @@ -86,7 +86,8 @@ def build_yolo_dataset(cfg, img_path, batch, data_info, mode='train', rect=False use_segments=cfg.task == 'segment', use_keypoints=cfg.task == 'pose', classes=cfg.classes, - data=data_info) + data=data, + fraction=cfg.fraction if mode == 'train' else 1.0) def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1): diff --git a/ultralytics/yolo/data/dataset.py b/ultralytics/yolo/data/dataset.py index bc0cf7c..b1e7856 100644 --- a/ultralytics/yolo/data/dataset.py +++ b/ultralytics/yolo/data/dataset.py @@ -226,6 +226,8 @@ class ClassificationDataset(torchvision.datasets.ImageFolder): cache (Union[bool, str], optional): Cache setting, can be True, False, 'ram' or 'disk'. Defaults to False. """ super().__init__(root=root) + if augment and args.fraction < 1.0: # reduce training fraction + self.samples = self.samples[:round(len(self.samples) * args.fraction)] self.cache_ram = cache is True or cache == 'ram' self.cache_disk = cache == 'disk' self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples] # file, index, npy, im @@ -269,4 +271,4 @@ class SemanticDataset(BaseDataset): def __init__(self): """Initialize a SemanticDataset object.""" - pass + super().__init__()