New dataset `fraction=1.0` argument (#2860)

single_channel
Glenn Jocher 2 years ago committed by GitHub
parent 61fa5efe6d
commit 0bdd4ad379
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -82,6 +82,7 @@ task.
| `close_mosaic` | `0` | (int) disable mosaic augmentation for final epochs | | `close_mosaic` | `0` | (int) disable mosaic augmentation for final epochs |
| `resume` | `False` | resume training from last checkpoint | | `resume` | `False` | resume training from last checkpoint |
| `amp` | `True` | Automatic Mixed Precision (AMP) training, choices=[True, False] | | `amp` | `True` | Automatic Mixed Precision (AMP) training, choices=[True, False] |
| `fraction` | `1.0` | dataset fraction to train on (default is 1.0, all images in train set) |
| `lr0` | `0.01` | initial learning rate (i.e. SGD=1E-2, Adam=1E-3) | | `lr0` | `0.01` | initial learning rate (i.e. SGD=1E-2, Adam=1E-3) |
| `lrf` | `0.01` | final learning rate (lr0 * lrf) | | `lrf` | `0.01` | final learning rate (lr0 * lrf) |
| `momentum` | `0.937` | SGD momentum/Adam beta1 | | `momentum` | `0.937` | SGD momentum/Adam beta1 |

@ -104,6 +104,7 @@ The training settings for YOLO models encompass various hyperparameters and conf
| `close_mosaic` | `0` | (int) disable mosaic augmentation for final epochs | | `close_mosaic` | `0` | (int) disable mosaic augmentation for final epochs |
| `resume` | `False` | resume training from last checkpoint | | `resume` | `False` | resume training from last checkpoint |
| `amp` | `True` | Automatic Mixed Precision (AMP) training, choices=[True, False] | | `amp` | `True` | Automatic Mixed Precision (AMP) training, choices=[True, False] |
| `fraction` | `1.0` | dataset fraction to train on (default is 1.0, all images in train set) |
| `lr0` | `0.01` | initial learning rate (i.e. SGD=1E-2, Adam=1E-3) | | `lr0` | `0.01` | initial learning rate (i.e. SGD=1E-2, Adam=1E-3) |
| `lrf` | `0.01` | final learning rate (lr0 * lrf) | | `lrf` | `0.01` | final learning rate (lr0 * lrf) |
| `momentum` | `0.937` | SGD momentum/Adam beta1 | | `momentum` | `0.937` | SGD momentum/Adam beta1 |

@ -66,7 +66,7 @@ CLI_HELP_MSG = \
CFG_FLOAT_KEYS = 'warmup_epochs', 'box', 'cls', 'dfl', 'degrees', 'shear' CFG_FLOAT_KEYS = 'warmup_epochs', 'box', 'cls', 'dfl', 'degrees', 'shear'
CFG_FRACTION_KEYS = ('dropout', 'iou', 'lr0', 'lrf', 'momentum', 'weight_decay', 'warmup_momentum', 'warmup_bias_lr', CFG_FRACTION_KEYS = ('dropout', 'iou', 'lr0', 'lrf', 'momentum', 'weight_decay', 'warmup_momentum', 'warmup_bias_lr',
'label_smoothing', 'hsv_h', 'hsv_s', 'hsv_v', 'translate', 'scale', 'perspective', 'flipud', 'label_smoothing', 'hsv_h', 'hsv_s', 'hsv_v', 'translate', 'scale', 'perspective', 'flipud',
'fliplr', 'mosaic', 'mixup', 'copy_paste', 'conf', 'iou') # fractional floats limited to 0.0 - 1.0 'fliplr', 'mosaic', 'mixup', 'copy_paste', 'conf', 'iou', 'fraction') # fraction floats 0.0 - 1.0
CFG_INT_KEYS = ('epochs', 'patience', 'batch', 'workers', 'seed', 'close_mosaic', 'mask_ratio', 'max_det', 'vid_stride', CFG_INT_KEYS = ('epochs', 'patience', 'batch', 'workers', 'seed', 'close_mosaic', 'mask_ratio', 'max_det', 'vid_stride',
'line_width', 'workspace', 'nbs', 'save_period') 'line_width', 'workspace', 'nbs', 'save_period')
CFG_BOOL_KEYS = ('save', 'exist_ok', 'verbose', 'deterministic', 'single_cls', 'rect', 'cos_lr', 'overlap_mask', 'val', CFG_BOOL_KEYS = ('save', 'exist_ok', 'verbose', 'deterministic', 'single_cls', 'rect', 'cos_lr', 'overlap_mask', 'val',

@ -30,6 +30,7 @@ cos_lr: False # use cosine learning rate scheduler
close_mosaic: 0 # (int) disable mosaic augmentation for final epochs close_mosaic: 0 # (int) disable mosaic augmentation for final epochs
resume: False # resume training from last checkpoint resume: False # resume training from last checkpoint
amp: True # Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check amp: True # Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
fraction: 1.0 # dataset fraction to train on (default is 1.0, all images in train set)
# Segmentation # Segmentation
overlap_mask: True # masks should overlap during training (segment train only) overlap_mask: True # masks should overlap during training (segment train only)
mask_ratio: 4 # mask downsample ratio (segment train only) mask_ratio: 4 # mask downsample ratio (segment train only)

@ -36,6 +36,7 @@ class BaseDataset(Dataset):
pad (float, optional): Padding. Defaults to 0.0. pad (float, optional): Padding. Defaults to 0.0.
single_cls (bool, optional): If True, single class training is used. Defaults to False. single_cls (bool, optional): If True, single class training is used. Defaults to False.
classes (list): List of included classes. Default is None. classes (list): List of included classes. Default is None.
fraction (float): Fraction of dataset to utilize. Default is 1.0 (use all data).
Attributes: Attributes:
im_files (list): List of image file paths. im_files (list): List of image file paths.
@ -58,13 +59,15 @@ class BaseDataset(Dataset):
stride=32, stride=32,
pad=0.5, pad=0.5,
single_cls=False, single_cls=False,
classes=None): classes=None,
fraction=1.0):
super().__init__() super().__init__()
self.img_path = img_path self.img_path = img_path
self.imgsz = imgsz self.imgsz = imgsz
self.augment = augment self.augment = augment
self.single_cls = single_cls self.single_cls = single_cls
self.prefix = prefix self.prefix = prefix
self.fraction = fraction
self.im_files = self.get_img_files(self.img_path) self.im_files = self.get_img_files(self.img_path)
self.labels = self.get_labels() self.labels = self.get_labels()
self.update_labels(include_class=classes) # single_cls and include_class self.update_labels(include_class=classes) # single_cls and include_class
@ -114,6 +117,8 @@ class BaseDataset(Dataset):
assert im_files, f'{self.prefix}No images found' assert im_files, f'{self.prefix}No images found'
except Exception as e: except Exception as e:
raise FileNotFoundError(f'{self.prefix}Error loading data from {img_path}\n{HELP_URL}') from e raise FileNotFoundError(f'{self.prefix}Error loading data from {img_path}\n{HELP_URL}') from e
if self.fraction < 1:
im_files = im_files[:round(len(im_files) * self.fraction)]
return im_files return im_files
def update_labels(self, include_class: Optional[list]): def update_labels(self, include_class: Optional[list]):

@ -69,7 +69,7 @@ def seed_worker(worker_id): # noqa
random.seed(worker_seed) random.seed(worker_seed)
def build_yolo_dataset(cfg, img_path, batch, data_info, mode='train', rect=False, stride=32): def build_yolo_dataset(cfg, img_path, batch, data, mode='train', rect=False, stride=32):
"""Build YOLO Dataset""" """Build YOLO Dataset"""
return YOLODataset( return YOLODataset(
img_path=img_path, img_path=img_path,
@ -86,7 +86,8 @@ def build_yolo_dataset(cfg, img_path, batch, data_info, mode='train', rect=False
use_segments=cfg.task == 'segment', use_segments=cfg.task == 'segment',
use_keypoints=cfg.task == 'pose', use_keypoints=cfg.task == 'pose',
classes=cfg.classes, classes=cfg.classes,
data=data_info) data=data,
fraction=cfg.fraction if mode == 'train' else 1.0)
def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1): def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1):

@ -226,6 +226,8 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
cache (Union[bool, str], optional): Cache setting, can be True, False, 'ram' or 'disk'. Defaults to False. cache (Union[bool, str], optional): Cache setting, can be True, False, 'ram' or 'disk'. Defaults to False.
""" """
super().__init__(root=root) super().__init__(root=root)
if augment and args.fraction < 1.0: # reduce training fraction
self.samples = self.samples[:round(len(self.samples) * args.fraction)]
self.cache_ram = cache is True or cache == 'ram' self.cache_ram = cache is True or cache == 'ram'
self.cache_disk = cache == 'disk' self.cache_disk = cache == 'disk'
self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples] # file, index, npy, im self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples] # file, index, npy, im
@ -269,4 +271,4 @@ class SemanticDataset(BaseDataset):
def __init__(self): def __init__(self):
"""Initialize a SemanticDataset object.""" """Initialize a SemanticDataset object."""
pass super().__init__()

Loading…
Cancel
Save