New dataset `fraction=1.0` argument (#2860)

single_channel
Glenn Jocher 2 years ago committed by GitHub
parent 61fa5efe6d
commit 0bdd4ad379
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -82,6 +82,7 @@ task.
| `close_mosaic` | `0` | (int) disable mosaic augmentation for final epochs |
| `resume` | `False` | resume training from last checkpoint |
| `amp` | `True` | Automatic Mixed Precision (AMP) training, choices=[True, False] |
| `fraction` | `1.0` | dataset fraction to train on (default is 1.0, all images in train set) |
| `lr0` | `0.01` | initial learning rate (i.e. SGD=1E-2, Adam=1E-3) |
| `lrf` | `0.01` | final learning rate (lr0 * lrf) |
| `momentum` | `0.937` | SGD momentum/Adam beta1 |

@ -104,6 +104,7 @@ The training settings for YOLO models encompass various hyperparameters and conf
| `close_mosaic` | `0` | (int) disable mosaic augmentation for final epochs |
| `resume` | `False` | resume training from last checkpoint |
| `amp` | `True` | Automatic Mixed Precision (AMP) training, choices=[True, False] |
| `fraction` | `1.0` | dataset fraction to train on (default is 1.0, all images in train set) |
| `lr0` | `0.01` | initial learning rate (i.e. SGD=1E-2, Adam=1E-3) |
| `lrf` | `0.01` | final learning rate (lr0 * lrf) |
| `momentum` | `0.937` | SGD momentum/Adam beta1 |

@ -66,7 +66,7 @@ CLI_HELP_MSG = \
CFG_FLOAT_KEYS = 'warmup_epochs', 'box', 'cls', 'dfl', 'degrees', 'shear'
CFG_FRACTION_KEYS = ('dropout', 'iou', 'lr0', 'lrf', 'momentum', 'weight_decay', 'warmup_momentum', 'warmup_bias_lr',
'label_smoothing', 'hsv_h', 'hsv_s', 'hsv_v', 'translate', 'scale', 'perspective', 'flipud',
'fliplr', 'mosaic', 'mixup', 'copy_paste', 'conf', 'iou') # fractional floats limited to 0.0 - 1.0
'fliplr', 'mosaic', 'mixup', 'copy_paste', 'conf', 'iou', 'fraction') # fraction floats 0.0 - 1.0
CFG_INT_KEYS = ('epochs', 'patience', 'batch', 'workers', 'seed', 'close_mosaic', 'mask_ratio', 'max_det', 'vid_stride',
'line_width', 'workspace', 'nbs', 'save_period')
CFG_BOOL_KEYS = ('save', 'exist_ok', 'verbose', 'deterministic', 'single_cls', 'rect', 'cos_lr', 'overlap_mask', 'val',

@ -30,6 +30,7 @@ cos_lr: False # use cosine learning rate scheduler
close_mosaic: 0 # (int) disable mosaic augmentation for final epochs
resume: False # resume training from last checkpoint
amp: True # Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
fraction: 1.0 # dataset fraction to train on (default is 1.0, all images in train set)
# Segmentation
overlap_mask: True # masks should overlap during training (segment train only)
mask_ratio: 4 # mask downsample ratio (segment train only)

@ -36,6 +36,7 @@ class BaseDataset(Dataset):
pad (float, optional): Padding. Defaults to 0.0.
single_cls (bool, optional): If True, single class training is used. Defaults to False.
classes (list): List of included classes. Default is None.
fraction (float): Fraction of dataset to utilize. Default is 1.0 (use all data).
Attributes:
im_files (list): List of image file paths.
@ -58,13 +59,15 @@ class BaseDataset(Dataset):
stride=32,
pad=0.5,
single_cls=False,
classes=None):
classes=None,
fraction=1.0):
super().__init__()
self.img_path = img_path
self.imgsz = imgsz
self.augment = augment
self.single_cls = single_cls
self.prefix = prefix
self.fraction = fraction
self.im_files = self.get_img_files(self.img_path)
self.labels = self.get_labels()
self.update_labels(include_class=classes) # single_cls and include_class
@ -114,6 +117,8 @@ class BaseDataset(Dataset):
assert im_files, f'{self.prefix}No images found'
except Exception as e:
raise FileNotFoundError(f'{self.prefix}Error loading data from {img_path}\n{HELP_URL}') from e
if self.fraction < 1:
im_files = im_files[:round(len(im_files) * self.fraction)]
return im_files
def update_labels(self, include_class: Optional[list]):

@ -69,7 +69,7 @@ def seed_worker(worker_id): # noqa
random.seed(worker_seed)
def build_yolo_dataset(cfg, img_path, batch, data_info, mode='train', rect=False, stride=32):
def build_yolo_dataset(cfg, img_path, batch, data, mode='train', rect=False, stride=32):
"""Build YOLO Dataset"""
return YOLODataset(
img_path=img_path,
@ -86,7 +86,8 @@ def build_yolo_dataset(cfg, img_path, batch, data_info, mode='train', rect=False
use_segments=cfg.task == 'segment',
use_keypoints=cfg.task == 'pose',
classes=cfg.classes,
data=data_info)
data=data,
fraction=cfg.fraction if mode == 'train' else 1.0)
def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1):

@ -226,6 +226,8 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
cache (Union[bool, str], optional): Cache setting, can be True, False, 'ram' or 'disk'. Defaults to False.
"""
super().__init__(root=root)
if augment and args.fraction < 1.0: # reduce training fraction
self.samples = self.samples[:round(len(self.samples) * args.fraction)]
self.cache_ram = cache is True or cache == 'ram'
self.cache_disk = cache == 'disk'
self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples] # file, index, npy, im
@ -269,4 +271,4 @@ class SemanticDataset(BaseDataset):
def __init__(self):
"""Initialize a SemanticDataset object."""
pass
super().__init__()

Loading…
Cancel
Save