ultralytics 8.0.81 single-line docstring updates (#2061)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2023-04-17 00:45:36 +02:00
parent 5bce1c3021
commit a38f227672
64 changed files with 620 additions and 58 deletions
--- a/ultralytics/yolo/data/augment.py
+++ b/ultralytics/yolo/data/augment.py
@ -26,15 +26,19 @@ class BaseTransform:
        pass

    def apply_image(self, labels):
+        """Applies image transformation to labels."""
        pass

    def apply_instances(self, labels):
+        """Applies transformations to input 'labels' and returns object instances."""
        pass

    def apply_semantic(self, labels):
+        """Applies semantic segmentation to an image."""
        pass

    def __call__(self, labels):
+        """Applies label transformations to an image, instances and semantic masks."""
        self.apply_image(labels)
        self.apply_instances(labels)
        self.apply_semantic(labels)
@ -43,20 +47,25 @@ class BaseTransform:
 class Compose:

    def __init__(self, transforms):
+        """Initializes the Compose object with a list of transforms."""
        self.transforms = transforms

    def __call__(self, data):
+        """Applies a series of transformations to input data."""
        for t in self.transforms:
            data = t(data)
        return data

    def append(self, transform):
+        """Appends a new transform to the existing list of transforms."""
        self.transforms.append(transform)

    def tolist(self):
+        """Converts list of transforms to a standard Python list."""
        return self.transforms

    def __repr__(self):
+        """Return string representation of object."""
        format_string = f'{self.__class__.__name__}('
        for t in self.transforms:
            format_string += '\n'
@ -74,6 +83,7 @@ class BaseMixTransform:
        self.p = p

    def __call__(self, labels):
+        """Applies pre-processing transforms and mixup/mosaic transforms to labels data."""
        if random.uniform(0, 1) > self.p:
            return labels

@ -96,9 +106,11 @@ class BaseMixTransform:
        return labels

    def _mix_transform(self, labels):
+        """Applies MixUp or Mosaic augmentation to the label dictionary."""
        raise NotImplementedError

    def get_indexes(self):
+        """Gets a list of shuffled indexes for mosaic augmentation."""
        raise NotImplementedError


@ -111,6 +123,7 @@ class Mosaic(BaseMixTransform):
    """

    def __init__(self, dataset, imgsz=640, p=1.0, border=(0, 0)):
+        """Initializes the object with a dataset, image size, probability, and border."""
        assert 0 <= p <= 1.0, 'The probability should be in range [0, 1]. ' f'got {p}.'
        super().__init__(dataset=dataset, p=p)
        self.dataset = dataset
@ -118,9 +131,11 @@ class Mosaic(BaseMixTransform):
        self.border = border

    def get_indexes(self):
+        """Return a list of 3 random indexes from the dataset."""
        return [random.randint(0, len(self.dataset) - 1) for _ in range(3)]

    def _mix_transform(self, labels):
+        """Apply mixup transformation to the input image and labels."""
        mosaic_labels = []
        assert labels.get('rect_shape', None) is None, 'rect and mosaic is exclusive.'
        assert len(labels.get('mix_labels', [])) > 0, 'There are no other images for mosaic augment.'
@ -166,6 +181,7 @@ class Mosaic(BaseMixTransform):
        return labels

    def _cat_labels(self, mosaic_labels):
+        """Return labels with mosaic border instances clipped."""
        if len(mosaic_labels) == 0:
            return {}
        cls = []
@ -190,6 +206,7 @@ class MixUp(BaseMixTransform):
        super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)

    def get_indexes(self):
+        """Get a random index from the dataset."""
        return random.randint(0, len(self.dataset) - 1)

    def _mix_transform(self, labels):
@ -400,6 +417,7 @@ class RandomHSV:
        self.vgain = vgain

    def __call__(self, labels):
+        """Applies random horizontal or vertical flip to an image with a given probability."""
        img = labels['img']
        if self.hgain or self.sgain or self.vgain:
            r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1  # random gains
@ -427,6 +445,7 @@ class RandomFlip:
        self.flip_idx = flip_idx

    def __call__(self, labels):
+        """Resize image and padding for detection, instance segmentation, pose."""
        img = labels['img']
        instances = labels.pop('instances')
        instances.convert_bbox(format='xywh')
@ -453,6 +472,7 @@ class LetterBox:
    """Resize image and padding for detection, instance segmentation, pose."""

    def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, stride=32):
+        """Initialize LetterBox object with specific parameters."""
        self.new_shape = new_shape
        self.auto = auto
        self.scaleFill = scaleFill
@ -460,6 +480,7 @@ class LetterBox:
        self.stride = stride

    def __call__(self, labels=None, image=None):
+        """Return updated labels and image with added border."""
        if labels is None:
            labels = {}
        img = labels.get('img') if image is None else image
@ -556,6 +577,7 @@ class CopyPaste:
 class Albumentations:
    # YOLOv8 Albumentations class (optional, only used if package is installed)
    def __init__(self, p=1.0):
+        """Initialize the transform object for YOLO bbox formatted params."""
        self.p = p
        self.transform = None
        prefix = colorstr('albumentations: ')
@ -581,6 +603,7 @@ class Albumentations:
            LOGGER.info(f'{prefix}{e}')

    def __call__(self, labels):
+        """Generates object detections and returns a dictionary with detection results."""
        im = labels['img']
        cls = labels['cls']
        if len(cls):
@ -618,6 +641,7 @@ class Format:
        self.batch_idx = batch_idx  # keep the batch indexes

    def __call__(self, labels):
+        """Return formatted image, classes, bounding boxes & keypoints to be used by 'collate_fn'."""
        img = labels.pop('img')
        h, w = img.shape[:2]
        cls = labels.pop('cls')
@ -647,6 +671,7 @@ class Format:
        return labels

    def _format_img(self, img):
+        """Format the image for YOLOv5 from Numpy array to PyTorch tensor."""
        if len(img.shape) < 3:
            img = np.expand_dims(img, -1)
        img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1])
@ -668,6 +693,7 @@ class Format:


 def v8_transforms(dataset, imgsz, hyp):
+    """Convert images to a size suitable for YOLOv8 training."""
    pre_transform = Compose([
        Mosaic(dataset, imgsz=imgsz, p=hyp.mosaic, border=[-imgsz // 2, -imgsz // 2]),
        CopyPaste(p=hyp.copy_paste),
@ -749,6 +775,7 @@ def classify_albumentations(
 class ClassifyLetterBox:
    # YOLOv8 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
    def __init__(self, size=(640, 640), auto=False, stride=32):
+        """Resizes image and crops it to center with max dimensions 'h' and 'w'."""
        super().__init__()
        self.h, self.w = (size, size) if isinstance(size, int) else size
        self.auto = auto  # pass max size integer, automatically solve for short side using stride
@ -768,6 +795,7 @@ class ClassifyLetterBox:
 class CenterCrop:
    # YOLOv8 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])
    def __init__(self, size=640):
+        """Converts an image from numpy array to PyTorch tensor."""
        super().__init__()
        self.h, self.w = (size, size) if isinstance(size, int) else size

@ -781,6 +809,7 @@ class CenterCrop:
 class ToTensor:
    # YOLOv8 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
    def __init__(self, half=False):
+        """Initialize YOLOv8 ToTensor object with optional half-precision support."""
        super().__init__()
        self.half = half

--- a/ultralytics/yolo/data/base.py
+++ b/ultralytics/yolo/data/base.py
@ -170,6 +170,7 @@ class BaseDataset(Dataset):
            np.save(f.as_posix(), cv2.imread(self.im_files[i]))

    def set_rectangle(self):
+        """Sets the shape of bounding boxes for YOLO detections as rectangles."""
        bi = np.floor(np.arange(self.ni) / self.batch_size).astype(int)  # batch index
        nb = bi[-1] + 1  # number of batches

@ -194,9 +195,11 @@ class BaseDataset(Dataset):
        self.batch = bi  # batch index of image

    def __getitem__(self, index):
+        """Returns transformed label information for given index."""
        return self.transforms(self.get_label_info(index))

    def get_label_info(self, index):
+        """Get and return label information from the dataset."""
        label = deepcopy(self.labels[index])  # requires deepcopy() https://github.com/ultralytics/ultralytics/pull/1948
        label.pop('shape', None)  # shape is for rect, remove it
        label['img'], label['ori_shape'], label['resized_shape'] = self.load_image(index)
@ -208,6 +211,7 @@ class BaseDataset(Dataset):
        return label

    def __len__(self):
+        """Returns the length of the labels list for the dataset."""
        return len(self.labels)

    def update_labels_info(self, label):
--- a/ultralytics/yolo/data/build.py
+++ b/ultralytics/yolo/data/build.py
@ -24,14 +24,17 @@ class InfiniteDataLoader(dataloader.DataLoader):
    """Dataloader that reuses workers. Uses same syntax as vanilla DataLoader."""

    def __init__(self, *args, **kwargs):
+        """Dataloader that infinitely recycles workers, inherits from DataLoader."""
        super().__init__(*args, **kwargs)
        object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
        self.iterator = super().__iter__()

    def __len__(self):
+        """Returns the length of the batch sampler's sampler."""
        return len(self.batch_sampler.sampler)

    def __iter__(self):
+        """Creates a sampler that repeats indefinitely."""
        for _ in range(len(self)):
            yield next(self.iterator)

@ -45,9 +48,11 @@ class _RepeatSampler:
    """

    def __init__(self, sampler):
+        """Initializes an object that repeats a given sampler indefinitely."""
        self.sampler = sampler

    def __iter__(self):
+        """Iterates over the 'sampler' and yields its contents."""
        while True:
            yield from iter(self.sampler)

@ -60,6 +65,7 @@ def seed_worker(worker_id):  # noqa


 def build_dataloader(cfg, batch, img_path, data_info, stride=32, rect=False, rank=-1, mode='train'):
+    """Return an InfiniteDataLoader or DataLoader for training or validation set."""
    assert mode in ['train', 'val']
    shuffle = mode == 'train'
    if cfg.rect and shuffle:
@ -134,6 +140,7 @@ def build_classification_dataloader(path,


 def check_source(source):
+    """Check source type and return corresponding flag values."""
    webcam, screenshot, from_img, in_memory, tensor = False, False, False, False, False
    if isinstance(source, (str, int, Path)):  # int for local usb camera
        source = str(source)
--- a/ultralytics/yolo/data/dataloaders/stream_loaders.py
+++ b/ultralytics/yolo/data/dataloaders/stream_loaders.py
@ -32,6 +32,7 @@ class SourceTypes:
 class LoadStreams:
    # YOLOv8 streamloader, i.e. `yolo predict source='rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP streams`
    def __init__(self, sources='file.streams', imgsz=640, stride=32, auto=True, transforms=None, vid_stride=1):
+        """Initialize instance variables and check for consistent input stream shapes."""
        torch.backends.cudnn.benchmark = True  # faster for fixed-size inference
        self.mode = 'stream'
        self.imgsz = imgsz
@ -97,10 +98,12 @@ class LoadStreams:
            time.sleep(0.0)  # wait time

    def __iter__(self):
+        """Iterates through YOLO image feed and re-opens unresponsive streams."""
        self.count = -1
        return self

    def __next__(self):
+        """Returns source paths, transformed and original images for processing YOLOv5."""
        self.count += 1
        if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'):  # q to quit
            cv2.destroyAllWindows()
@ -117,6 +120,7 @@ class LoadStreams:
        return self.sources, im, im0, None, ''

    def __len__(self):
+        """Return the length of the sources object."""
        return len(self.sources)  # 1E12 frames = 32 streams at 30 FPS for 30 years


@ -153,6 +157,7 @@ class LoadScreenshots:
        self.monitor = {'left': self.left, 'top': self.top, 'width': self.width, 'height': self.height}

    def __iter__(self):
+        """Returns an iterator of the object."""
        return self

    def __next__(self):
@ -173,6 +178,7 @@ class LoadScreenshots:
 class LoadImages:
    # YOLOv8 image/video dataloader, i.e. `yolo predict source=image.jpg/vid.mp4`
    def __init__(self, path, imgsz=640, stride=32, auto=True, transforms=None, vid_stride=1):
+        """Initialize the Dataloader and raise FileNotFoundError if file not found."""
        if isinstance(path, str) and Path(path).suffix == '.txt':  # *.txt file with img/vid/dir on each line
            path = Path(path).read_text().rsplit()
        files = []
@ -211,10 +217,12 @@ class LoadImages:
                                    f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}')

    def __iter__(self):
+        """Returns an iterator object for VideoStream or ImageFolder."""
        self.count = 0
        return self

    def __next__(self):
+        """Return next image, path and metadata from dataset."""
        if self.count == self.nf:
            raise StopIteration
        path = self.files[self.count]
@ -276,12 +284,14 @@ class LoadImages:
        return im

    def __len__(self):
+        """Returns the number of files in the object."""
        return self.nf  # number of files


 class LoadPilAndNumpy:

    def __init__(self, im0, imgsz=640, stride=32, auto=True, transforms=None):
+        """Initialize PIL and Numpy Dataloader."""
        if not isinstance(im0, list):
            im0 = [im0]
        self.paths = [getattr(im, 'filename', f'image{i}.jpg') for i, im in enumerate(im0)]
@ -296,6 +306,7 @@ class LoadPilAndNumpy:

    @staticmethod
    def _single_check(im):
+        """Validate and format an image to numpy array."""
        assert isinstance(im, (Image.Image, np.ndarray)), f'Expected PIL/np.ndarray image type, but got {type(im)}'
        if isinstance(im, Image.Image):
            if im.mode != 'RGB':
@ -305,6 +316,7 @@ class LoadPilAndNumpy:
        return im

    def _single_preprocess(self, im, auto):
+        """Preprocesses a single image for inference."""
        if self.transforms:
            im = self.transforms(im)  # transforms
        else:
@ -314,9 +326,11 @@ class LoadPilAndNumpy:
        return im

    def __len__(self):
+        """Returns the length of the 'im0' attribute."""
        return len(self.im0)

    def __next__(self):
+        """Returns batch paths, images, processed images, None, ''."""
        if self.count == 1:  # loop only once as it's batch inference
            raise StopIteration
        auto = all(x.shape == self.im0[0].shape for x in self.im0) and self.auto
@ -326,6 +340,7 @@ class LoadPilAndNumpy:
        return self.paths, im, self.im0, None, ''

    def __iter__(self):
+        """Enables iteration for class LoadPilAndNumpy."""
        self.count = 0
        return self

@ -338,16 +353,19 @@ class LoadTensor:
        self.mode = 'image'

    def __iter__(self):
+        """Returns an iterator object."""
        self.count = 0
        return self

    def __next__(self):
+        """Return next item in the iterator."""
        if self.count == 1:
            raise StopIteration
        self.count += 1
        return None, self.im0, self.im0, None, ''  # self.paths, im, self.im0, None, ''

    def __len__(self):
+        """Returns the batch size."""
        return self.bs


--- a/ultralytics/yolo/data/dataloaders/v5augmentations.py
+++ b/ultralytics/yolo/data/dataloaders/v5augmentations.py
@ -24,6 +24,7 @@ IMAGENET_STD = 0.229, 0.224, 0.225  # RGB standard deviation
 class Albumentations:
    # YOLOv5 Albumentations class (optional, only used if package is installed)
    def __init__(self, size=640):
+        """Instantiate object with image augmentations for YOLOv5."""
        self.transform = None
        prefix = colorstr('albumentations: ')
        try:
@ -48,6 +49,7 @@ class Albumentations:
            LOGGER.info(f'{prefix}{e}')

    def __call__(self, im, labels, p=1.0):
+        """Transforms input image and labels with probability 'p'."""
        if self.transform and random.random() < p:
            new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0])  # transformed
            im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])])
@ -111,7 +113,7 @@ def replicate(im, labels):


 def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
-    # Resize and pad image while meeting stride-multiple constraints
+    """Resize and pad image while meeting stride-multiple constraints."""
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)
@ -359,6 +361,7 @@ def classify_transforms(size=224):
 class LetterBox:
    # YOLOv5 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
    def __init__(self, size=(640, 640), auto=False, stride=32):
+        """Resizes and crops an image to a specified size for YOLOv5 preprocessing."""
        super().__init__()
        self.h, self.w = (size, size) if isinstance(size, int) else size
        self.auto = auto  # pass max size integer, automatically solve for short side using stride
@ -378,6 +381,7 @@ class LetterBox:
 class CenterCrop:
    # YOLOv5 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])
    def __init__(self, size=640):
+        """Converts input image into tensor for YOLOv5 processing."""
        super().__init__()
        self.h, self.w = (size, size) if isinstance(size, int) else size

@ -391,6 +395,7 @@ class CenterCrop:
 class ToTensor:
    # YOLOv5 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
    def __init__(self, half=False):
+        """Initialize ToTensor class for YOLOv5 image preprocessing."""
        super().__init__()
        self.half = half

--- a/ultralytics/yolo/data/dataloaders/v5loader.py
+++ b/ultralytics/yolo/data/dataloaders/v5loader.py
@ -162,14 +162,17 @@ class InfiniteDataLoader(dataloader.DataLoader):
    """

    def __init__(self, *args, **kwargs):
+        """Dataloader that reuses workers for same syntax as vanilla DataLoader."""
        super().__init__(*args, **kwargs)
        object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
        self.iterator = super().__iter__()

    def __len__(self):
+        """Returns the length of batch_sampler's sampler."""
        return len(self.batch_sampler.sampler)

    def __iter__(self):
+        """Creates a sampler that infinitely repeats."""
        for _ in range(len(self)):
            yield next(self.iterator)

@ -182,9 +185,11 @@ class _RepeatSampler:
    """

    def __init__(self, sampler):
+        """Sampler that repeats dataset samples infinitely."""
        self.sampler = sampler

    def __iter__(self):
+        """Infinite loop iterating over a given sampler."""
        while True:
            yield from iter(self.sampler)

@ -221,6 +226,7 @@ class LoadScreenshots:
        self.monitor = {'left': self.left, 'top': self.top, 'width': self.width, 'height': self.height}

    def __iter__(self):
+        """Iterates over objects with the same structure as the monitor attribute."""
        return self

    def __next__(self):
@ -241,6 +247,7 @@ class LoadScreenshots:
 class LoadImages:
    # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
    def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
+        """Initialize instance variables and check for valid input."""
        if isinstance(path, str) and Path(path).suffix == '.txt':  # *.txt file with img/vid/dir on each line
            path = Path(path).read_text().rsplit()
        files = []
@ -276,10 +283,12 @@ class LoadImages:
                            f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'

    def __iter__(self):
+        """Returns an iterator object for iterating over images or videos found in a directory."""
        self.count = 0
        return self

    def __next__(self):
+        """Iterator's next item, performs transformation on image and returns path, transformed image, original image, capture and size."""
        if self.count == self.nf:
            raise StopIteration
        path = self.files[self.count]
@ -338,12 +347,14 @@ class LoadImages:
        return im

    def __len__(self):
+        """Returns the number of files in the class instance."""
        return self.nf  # number of files


 class LoadStreams:
    # YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP streams`
    def __init__(self, sources='file.streams', img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
+        """Initialize YOLO detector with optional transforms and check input shapes."""
        torch.backends.cudnn.benchmark = True  # faster for fixed-size inference
        self.mode = 'stream'
        self.img_size = img_size
@ -404,10 +415,12 @@ class LoadStreams:
            time.sleep(0.0)  # wait time

    def __iter__(self):
+        """Iterator that returns the class instance."""
        self.count = -1
        return self

    def __next__(self):
+        """Return a tuple containing transformed and resized image data."""
        self.count += 1
        if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'):  # q to quit
            cv2.destroyAllWindows()
@ -424,6 +437,7 @@ class LoadStreams:
        return self.sources, im, im0, None, ''

    def __len__(self):
+        """Returns the number of sources as the length of the object."""
        return len(self.sources)  # 1E12 frames = 32 streams at 30 FPS for 30 years


@ -607,6 +621,7 @@ class LoadImagesAndLabels(Dataset):
        return cache

    def cache_labels(self, path=Path('./labels.cache'), prefix=''):
+        """Cache labels and save as numpy file for next time."""
        # Cache dataset labels, check images and read shapes
        if path.exists():
            path.unlink()  # remove *.cache file if exists
@ -646,9 +661,11 @@ class LoadImagesAndLabels(Dataset):
        return x

    def __len__(self):
+        """Returns the length of 'im_files' attribute."""
        return len(self.im_files)

    def __getitem__(self, index):
+        """Get a sample and its corresponding label, filename and shape from the dataset."""
        index = self.indices[index]  # linear, shuffled, or image_weights

        hyp = self.hyp
@ -1039,6 +1056,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
    """

    def __init__(self, root, augment, imgsz, cache=False):
+        """Initialize YOLO dataset with root, augmentation, image size, and cache parameters."""
        super().__init__(root=root)
        self.torch_transforms = classify_transforms(imgsz)
        self.album_transforms = classify_albumentations(augment, imgsz) if augment else None
@ -1047,6 +1065,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
        self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples]  # file, index, npy, im

    def __getitem__(self, i):
+        """Retrieves data items of 'dataset' via indices & creates InfiniteDataLoader."""
        f, j, fn, im = self.samples[i]  # filename, index, filename.with_suffix('.npy'), image
        if self.cache_ram and im is None:
            im = self.samples[i][3] = cv2.imread(f)
--- a/ultralytics/yolo/data/dataset.py
+++ b/ultralytics/yolo/data/dataset.py
@ -127,6 +127,7 @@ class YOLODataset(BaseDataset):
        return x

    def get_labels(self):
+        """Returns dictionary of labels for YOLO training."""
        self.label_files = img2label_paths(self.im_files)
        cache_path = Path(self.label_files[0]).parent.with_suffix('.cache')
        try:
@ -170,6 +171,7 @@ class YOLODataset(BaseDataset):

    # TODO: use hyp config to set all these augmentations
    def build_transforms(self, hyp=None):
+        """Builds and appends transforms to the list."""
        if self.augment:
            hyp.mosaic = hyp.mosaic if self.augment and not self.rect else 0.0
            hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0
@ -187,6 +189,7 @@ class YOLODataset(BaseDataset):
        return transforms

    def close_mosaic(self, hyp):
+        """Sets mosaic, copy_paste and mixup options to 0.0 and builds transformations."""
        hyp.mosaic = 0.0  # set mosaic ratio=0.0
        hyp.copy_paste = 0.0  # keep the same behavior as previous v8 close-mosaic
        hyp.mixup = 0.0  # keep the same behavior as previous v8 close-mosaic
@ -206,6 +209,7 @@ class YOLODataset(BaseDataset):

    @staticmethod
    def collate_fn(batch):
+        """Collates data samples into batches."""
        new_batch = {}
        keys = batch[0].keys()
        values = list(zip(*[list(b.values()) for b in batch]))
@ -234,6 +238,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
    """

    def __init__(self, root, augment, imgsz, cache=False):
+        """Initialize YOLO object with root, image size, augmentations, and cache settings"""
        super().__init__(root=root)
        self.torch_transforms = classify_transforms(imgsz)
        self.album_transforms = classify_albumentations(augment, imgsz) if augment else None
@ -242,6 +247,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
        self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples]  # file, index, npy, im

    def __getitem__(self, i):
+        """Returns subset of data and targets corresponding to given indices."""
        f, j, fn, im = self.samples[i]  # filename, index, filename.with_suffix('.npy'), image
        if self.cache_ram and im is None:
            im = self.samples[i][3] = cv2.imread(f)
@ -265,4 +271,5 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
 class SemanticDataset(BaseDataset):

    def __init__(self):
+        """Initialize a SemanticDataset object."""
        pass
--- a/ultralytics/yolo/data/utils.py
+++ b/ultralytics/yolo/data/utils.py
@ -359,6 +359,7 @@ class HUBDatasetStats():
        return True, str(dir), self._find_yaml(dir)  # zipped, data_dir, yaml_path

    def _hub_ops(self, f):
+        """Saves a compressed image for HUB previews."""
        compress_one_image(f, self.im_dir / Path(f).name)  # save to dataset-hub

    def get_json(self, save=False, verbose=False):