Fix PIL Image exif_size() function (#4355)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2023-08-14 17:48:54 +02:00
parent c940d29d4f
commit 834f94f899
6 changed files with 55 additions and 23 deletions
--- a/setup.cfg
+++ b/setup.cfg
@ -15,9 +15,6 @@ addopts =
    --doctest-modules
    --durations=25
    --color=yes
    --cov=ultralytics/
    --cov-report=xml
    --no-cov-on-fail
 [coverage:run]
 source = ultralytics/
--- a/tests/test_python.py
+++ b/tests/test_python.py
@ -1,5 +1,4 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 from pathlib import Path
 import cv2
@ -221,3 +220,24 @@ def test_results():
            print(r.path)
            for k in r.keys:
                print(getattr(r, k))
 def test_data_utils():
    # Test functions in ultralytics/data/utils.py
    from ultralytics.data.utils import autosplit, zip_directory
    # from ultralytics.utils.files import WorkingDirectory
    # with WorkingDirectory(ROOT.parent / 'tests'):
    autosplit()
    zip_directory(ROOT / 'assets')  # zip
    Path(ROOT / 'assets.zip').unlink()  # delete zip
    # from ultralytics.data.utils import HUBDatasetStats
    # from ultralytics.utils.downloads import download
    # Path('coco8.zip').unlink(missing_ok=True)
    # download('https://github.com/ultralytics/hub/raw/master/example_datasets/coco8.zip', unzip=False)
    # shutil.move('coco8.zip', 'tests')
    # stats = HUBDatasetStats('tests/coco8.zip', task='detect')
    # stats.get_json(save=False)
    # stats.process_images()
--- a/ultralytics/data/loaders.py
+++ b/ultralytics/data/loaders.py
@ -405,14 +405,14 @@ def get_best_youtube_url(url, use_pafy=True):
    if use_pafy:
        check_requirements(('pafy', 'youtube_dl==2020.12.2'))
        import pafy  # noqa
-        return pafy.new(url).getbest(preftype='mp4').url
+        return pafy.new(url).getbestvideo(preftype='mp4').url
    else:
        check_requirements('yt-dlp')
        import yt_dlp
        with yt_dlp.YoutubeDL({'quiet': True}) as ydl:
            info_dict = ydl.extract_info(url, download=False)  # extract info
        for f in info_dict.get('formats', None):
-            if f['vcodec'] != 'none' and f['acodec'] == 'none' and f['ext'] == 'mp4':
+            if f['vcodec'] != 'none' and f['acodec'] == 'none' and f['ext'] == 'mp4' and f.get('width') > 1280:
                return f.get('url', None)
--- a/ultralytics/data/utils.py
+++ b/ultralytics/data/utils.py
@ -1,6 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 import contextlib
 import hashlib
 import json
 import os
@ -49,13 +48,14 @@ def get_hash(paths):
    return h.hexdigest()  # return hash
-def exif_size(img):
+def exif_size(img: Image.Image):
    """Returns exif-corrected PIL size."""
    s = img.size  # (width, height)
-    with contextlib.suppress(Exception):
+    exif = img.getexif()
-        rotation = dict(img._getexif().items())[orientation]
+    if exif:
        rotation = exif.get(274, None)  # the key for the orientation tag in the EXIF data is 274 (in decimal)
        if rotation in [6, 8]:  # rotation 270 or 90
-            s = (s[1], s[0])
+            s = s[1], s[0]
    return s
@ -190,7 +190,21 @@ def polygons2masks_overlap(imgsz, segments, downsample_ratio=1):
 def check_det_dataset(dataset, autodownload=True):
-    """Download, check and/or unzip dataset if not found locally."""
+    """
    Download, verify, and/or unzip a dataset if not found locally.
    This function checks the availability of a specified dataset, and if not found, it has the option to download and
    unzip the dataset. It then reads and parses the accompanying YAML data, ensuring key requirements are met and also
    resolves paths related to the dataset.
    Args:
        dataset (str): Path to the dataset or dataset descriptor (like a YAML file).
        autodownload (bool, optional): Whether to automatically download the dataset if not found. Defaults to True.
    Returns:
        (dict): Parsed dataset information and paths.
    """
    data = check_file(dataset)
    # Download (optional)
@ -327,7 +341,7 @@ def check_cls_dataset(dataset: str, split=''):
    return {'train': train_set, 'val': val_set or test_set, 'test': test_set or val_set, 'nc': nc, 'names': names}
-class HUBDatasetStats():
+class HUBDatasetStats:
    """
    A class for generating HUB dataset JSON and `-hub` dataset directory.
@ -371,11 +385,10 @@ class HUBDatasetStats():
    def _find_yaml(dir):
        """Return data.yaml file."""
        files = list(dir.glob('*.yaml')) or list(dir.rglob('*.yaml'))  # try root level first and then recursive
-        assert files, f'No *.yaml file found in {dir}'
+        assert files, f'No *.yaml file found in {dir.resolve()}'
        if len(files) > 1:
            files = [f for f in files if f.stem == dir.stem]  # prefer *.yaml files that match dir name
-            assert files, f'Multiple *.yaml files found in {dir}, only 1 *.yaml file allowed'
+        assert len(files) == 1, f"Expected 1 *.yaml file in '{dir.resolve()}', but found {len(files)}.\n{files}"
        assert len(files) == 1, f'Multiple *.yaml files found: {files}, only 1 *.yaml file allowed in {dir}'
        return files[0]
    def _unzip(self, path):
@ -478,6 +491,7 @@ def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
            compress_one_image(f)
        ```
    """
    try:  # use PIL
        im = Image.open(f)
        r = max_dim / max(im.height, im.width)  # ratio
@ -546,18 +560,18 @@ def zip_directory(dir, use_zipfile_library=True):
        shutil.make_archive(dir, 'zip', dir)
-def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
+def autosplit(path=DATASETS_DIR / 'coco8/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
    """
    Autosplit a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files.
    Args:
-        path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco128/images'.
+        path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco8/images'.
        weights (list | tuple, optional): Train, validation, and test split fractions. Defaults to (0.9, 0.1, 0.0).
        annotated_only (bool, optional): If True, only images with an associated txt file are used. Defaults to False.
    Example:
        ```python
-        from ultralytics.utils.dataloaders import autosplit
+        from ultralytics.data.utils import autosplit
        autosplit()
        ```
--- a/ultralytics/engine/model.py
+++ b/ultralytics/engine/model.py
@ -282,6 +282,8 @@ class Model:
        overrides['rect'] = True  # rect batches as default
        overrides.update(kwargs)
        overrides['mode'] = 'val'
        if overrides.get('imgsz') is None:
            overrides['imgsz'] = self.model.args['imgsz']  # use trained imgsz unless custom value is passed
        args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides)
        args.data = data or args.data
        if 'task' in overrides:
@ -289,8 +291,6 @@ class Model:
        else:
            args.task = self.task
        validator = validator or self.smart_load('validator')
        if args.imgsz == DEFAULT_CFG.imgsz and not isinstance(self.model, (str, Path)):
            args.imgsz = self.model.args['imgsz']  # use trained imgsz unless custom value is passed
        args.imgsz = check_imgsz(args.imgsz, max_dim=1)
        validator = validator(args=args, _callbacks=self.callbacks)
--- a/ultralytics/engine/trainer.py
+++ b/ultralytics/engine/trainer.py
@ -5,6 +5,7 @@ Train a model on a dataset
 Usage:
    $ yolo mode=train model=yolov8n.pt data=coco128.yaml imgsz=640 epochs=100 batch=16
 """
 import math
 import os
 import subprocess
@ -48,8 +49,8 @@ class BaseTrainer:
        callbacks (defaultdict): Dictionary of callbacks.
        save_dir (Path): Directory to save results.
        wdir (Path): Directory to save weights.
-        last (Path): Path to last checkpoint.
+        last (Path): Path to the last checkpoint.
-        best (Path): Path to best checkpoint.
+        best (Path): Path to the best checkpoint.
        save_period (int): Save checkpoint every x epochs (disabled if < 1).
        batch_size (int): Batch size for training.
        epochs (int): Number of epochs to train for.