From 834f94f8994a881fbc12dc583fbb917355a38317 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Mon, 14 Aug 2023 17:48:54 +0200
Subject: [PATCH] Fix PIL Image `exif_size()` function (#4355)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 setup.cfg                     |  3 ---
 tests/test_python.py          | 22 ++++++++++++++++++-
 ultralytics/data/loaders.py   |  4 ++--
 ultralytics/data/utils.py     | 40 +++++++++++++++++++++++------------
 ultralytics/engine/model.py   |  4 ++--
 ultralytics/engine/trainer.py |  5 +++--
 6 files changed, 55 insertions(+), 23 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 1cac7ec..6ce9c00 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -15,9 +15,6 @@ addopts =
     --doctest-modules
     --durations=25
     --color=yes
-    --cov=ultralytics/
-    --cov-report=xml
-    --no-cov-on-fail
 
 [coverage:run]
 source = ultralytics/
diff --git a/tests/test_python.py b/tests/test_python.py
index f5de134..561b832 100644
--- a/tests/test_python.py
+++ b/tests/test_python.py
@@ -1,5 +1,4 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-
 from pathlib import Path
 
 import cv2
@@ -221,3 +220,24 @@ def test_results():
             print(r.path)
             for k in r.keys:
                 print(getattr(r, k))
+
+
+def test_data_utils():
+    # Test functions in ultralytics/data/utils.py
+    from ultralytics.data.utils import autosplit, zip_directory
+
+    # from ultralytics.utils.files import WorkingDirectory
+    # with WorkingDirectory(ROOT.parent / 'tests'):
+
+    autosplit()
+    zip_directory(ROOT / 'assets')  # zip
+    Path(ROOT / 'assets.zip').unlink()  # delete zip
+
+    # from ultralytics.data.utils import HUBDatasetStats
+    # from ultralytics.utils.downloads import download
+    # Path('coco8.zip').unlink(missing_ok=True)
+    # download('https://github.com/ultralytics/hub/raw/master/example_datasets/coco8.zip', unzip=False)
+    # shutil.move('coco8.zip', 'tests')
+    # stats = HUBDatasetStats('tests/coco8.zip', task='detect')
+    # stats.get_json(save=False)
+    # stats.process_images()
diff --git a/ultralytics/data/loaders.py b/ultralytics/data/loaders.py
index 88246e5..f84bcad 100644
--- a/ultralytics/data/loaders.py
+++ b/ultralytics/data/loaders.py
@@ -405,14 +405,14 @@ def get_best_youtube_url(url, use_pafy=True):
     if use_pafy:
         check_requirements(('pafy', 'youtube_dl==2020.12.2'))
         import pafy  # noqa
-        return pafy.new(url).getbest(preftype='mp4').url
+        return pafy.new(url).getbestvideo(preftype='mp4').url
     else:
         check_requirements('yt-dlp')
         import yt_dlp
         with yt_dlp.YoutubeDL({'quiet': True}) as ydl:
             info_dict = ydl.extract_info(url, download=False)  # extract info
         for f in info_dict.get('formats', None):
-            if f['vcodec'] != 'none' and f['acodec'] == 'none' and f['ext'] == 'mp4':
+            if f['vcodec'] != 'none' and f['acodec'] == 'none' and f['ext'] == 'mp4' and f.get('width') > 1280:
                 return f.get('url', None)
 
 
diff --git a/ultralytics/data/utils.py b/ultralytics/data/utils.py
index 2f48367..42fdd8e 100644
--- a/ultralytics/data/utils.py
+++ b/ultralytics/data/utils.py
@@ -1,6 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 
-import contextlib
 import hashlib
 import json
 import os
@@ -49,13 +48,14 @@ def get_hash(paths):
     return h.hexdigest()  # return hash
 
 
-def exif_size(img):
+def exif_size(img: Image.Image):
     """Returns exif-corrected PIL size."""
     s = img.size  # (width, height)
-    with contextlib.suppress(Exception):
-        rotation = dict(img._getexif().items())[orientation]
+    exif = img.getexif()
+    if exif:
+        rotation = exif.get(274, None)  # the key for the orientation tag in the EXIF data is 274 (in decimal)
         if rotation in [6, 8]:  # rotation 270 or 90
-            s = (s[1], s[0])
+            s = s[1], s[0]
     return s
 
 
@@ -190,7 +190,21 @@ def polygons2masks_overlap(imgsz, segments, downsample_ratio=1):
 
 
 def check_det_dataset(dataset, autodownload=True):
-    """Download, check and/or unzip dataset if not found locally."""
+    """
+    Download, verify, and/or unzip a dataset if not found locally.
+
+    This function checks the availability of a specified dataset, and if not found, it has the option to download and
+    unzip the dataset. It then reads and parses the accompanying YAML data, ensuring key requirements are met and also
+    resolves paths related to the dataset.
+
+    Args:
+        dataset (str): Path to the dataset or dataset descriptor (like a YAML file).
+        autodownload (bool, optional): Whether to automatically download the dataset if not found. Defaults to True.
+
+    Returns:
+        (dict): Parsed dataset information and paths.
+    """
+
     data = check_file(dataset)
 
     # Download (optional)
@@ -327,7 +341,7 @@ def check_cls_dataset(dataset: str, split=''):
     return {'train': train_set, 'val': val_set or test_set, 'test': test_set or val_set, 'nc': nc, 'names': names}
 
 
-class HUBDatasetStats():
+class HUBDatasetStats:
     """
     A class for generating HUB dataset JSON and `-hub` dataset directory.
 
@@ -371,11 +385,10 @@ class HUBDatasetStats():
     def _find_yaml(dir):
         """Return data.yaml file."""
         files = list(dir.glob('*.yaml')) or list(dir.rglob('*.yaml'))  # try root level first and then recursive
-        assert files, f'No *.yaml file found in {dir}'
+        assert files, f'No *.yaml file found in {dir.resolve()}'
         if len(files) > 1:
             files = [f for f in files if f.stem == dir.stem]  # prefer *.yaml files that match dir name
-            assert files, f'Multiple *.yaml files found in {dir}, only 1 *.yaml file allowed'
-        assert len(files) == 1, f'Multiple *.yaml files found: {files}, only 1 *.yaml file allowed in {dir}'
+        assert len(files) == 1, f"Expected 1 *.yaml file in '{dir.resolve()}', but found {len(files)}.\n{files}"
         return files[0]
 
     def _unzip(self, path):
@@ -478,6 +491,7 @@ def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
             compress_one_image(f)
         ```
     """
+
     try:  # use PIL
         im = Image.open(f)
         r = max_dim / max(im.height, im.width)  # ratio
@@ -546,18 +560,18 @@ def zip_directory(dir, use_zipfile_library=True):
         shutil.make_archive(dir, 'zip', dir)
 
 
-def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
+def autosplit(path=DATASETS_DIR / 'coco8/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
     """
     Autosplit a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files.
 
     Args:
-        path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco128/images'.
+        path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco8/images'.
         weights (list | tuple, optional): Train, validation, and test split fractions. Defaults to (0.9, 0.1, 0.0).
         annotated_only (bool, optional): If True, only images with an associated txt file are used. Defaults to False.
 
     Example:
         ```python
-        from ultralytics.utils.dataloaders import autosplit
+        from ultralytics.data.utils import autosplit
 
         autosplit()
         ```
diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py
index 3e9b465..435ca85 100644
--- a/ultralytics/engine/model.py
+++ b/ultralytics/engine/model.py
@@ -282,6 +282,8 @@ class Model:
         overrides['rect'] = True  # rect batches as default
         overrides.update(kwargs)
         overrides['mode'] = 'val'
+        if overrides.get('imgsz') is None:
+            overrides['imgsz'] = self.model.args['imgsz']  # use trained imgsz unless custom value is passed
         args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides)
         args.data = data or args.data
         if 'task' in overrides:
@@ -289,8 +291,6 @@ class Model:
         else:
             args.task = self.task
         validator = validator or self.smart_load('validator')
-        if args.imgsz == DEFAULT_CFG.imgsz and not isinstance(self.model, (str, Path)):
-            args.imgsz = self.model.args['imgsz']  # use trained imgsz unless custom value is passed
         args.imgsz = check_imgsz(args.imgsz, max_dim=1)
 
         validator = validator(args=args, _callbacks=self.callbacks)
diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py
index a6c52a4..a91cf67 100644
--- a/ultralytics/engine/trainer.py
+++ b/ultralytics/engine/trainer.py
@@ -5,6 +5,7 @@ Train a model on a dataset
 Usage:
     $ yolo mode=train model=yolov8n.pt data=coco128.yaml imgsz=640 epochs=100 batch=16
 """
+
 import math
 import os
 import subprocess
@@ -48,8 +49,8 @@ class BaseTrainer:
         callbacks (defaultdict): Dictionary of callbacks.
         save_dir (Path): Directory to save results.
         wdir (Path): Directory to save weights.
-        last (Path): Path to last checkpoint.
-        best (Path): Path to best checkpoint.
+        last (Path): Path to the last checkpoint.
+        best (Path): Path to the best checkpoint.
         save_period (int): Save checkpoint every x epochs (disabled if < 1).
         batch_size (int): Batch size for training.
         epochs (int): Number of epochs to train for.