From 834f94f8994a881fbc12dc583fbb917355a38317 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 14 Aug 2023 17:48:54 +0200 Subject: [PATCH] Fix PIL Image `exif_size()` function (#4355) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- setup.cfg | 3 --- tests/test_python.py | 22 ++++++++++++++++++- ultralytics/data/loaders.py | 4 ++-- ultralytics/data/utils.py | 40 +++++++++++++++++++++++------------ ultralytics/engine/model.py | 4 ++-- ultralytics/engine/trainer.py | 5 +++-- 6 files changed, 55 insertions(+), 23 deletions(-) diff --git a/setup.cfg b/setup.cfg index 1cac7ec..6ce9c00 100644 --- a/setup.cfg +++ b/setup.cfg @@ -15,9 +15,6 @@ addopts = --doctest-modules --durations=25 --color=yes - --cov=ultralytics/ - --cov-report=xml - --no-cov-on-fail [coverage:run] source = ultralytics/ diff --git a/tests/test_python.py b/tests/test_python.py index f5de134..561b832 100644 --- a/tests/test_python.py +++ b/tests/test_python.py @@ -1,5 +1,4 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license - from pathlib import Path import cv2 @@ -221,3 +220,24 @@ def test_results(): print(r.path) for k in r.keys: print(getattr(r, k)) + + +def test_data_utils(): + # Test functions in ultralytics/data/utils.py + from ultralytics.data.utils import autosplit, zip_directory + + # from ultralytics.utils.files import WorkingDirectory + # with WorkingDirectory(ROOT.parent / 'tests'): + + autosplit() + zip_directory(ROOT / 'assets') # zip + Path(ROOT / 'assets.zip').unlink() # delete zip + + # from ultralytics.data.utils import HUBDatasetStats + # from ultralytics.utils.downloads import download + # Path('coco8.zip').unlink(missing_ok=True) + # download('https://github.com/ultralytics/hub/raw/master/example_datasets/coco8.zip', unzip=False) + # shutil.move('coco8.zip', 'tests') + # stats = HUBDatasetStats('tests/coco8.zip', task='detect') + # stats.get_json(save=False) + # stats.process_images() diff --git a/ultralytics/data/loaders.py b/ultralytics/data/loaders.py index 88246e5..f84bcad 100644 --- a/ultralytics/data/loaders.py +++ b/ultralytics/data/loaders.py @@ -405,14 +405,14 @@ def get_best_youtube_url(url, use_pafy=True): if use_pafy: check_requirements(('pafy', 'youtube_dl==2020.12.2')) import pafy # noqa - return pafy.new(url).getbest(preftype='mp4').url + return pafy.new(url).getbestvideo(preftype='mp4').url else: check_requirements('yt-dlp') import yt_dlp with yt_dlp.YoutubeDL({'quiet': True}) as ydl: info_dict = ydl.extract_info(url, download=False) # extract info for f in info_dict.get('formats', None): - if f['vcodec'] != 'none' and f['acodec'] == 'none' and f['ext'] == 'mp4': + if f['vcodec'] != 'none' and f['acodec'] == 'none' and f['ext'] == 'mp4' and f.get('width') > 1280: return f.get('url', None) diff --git a/ultralytics/data/utils.py b/ultralytics/data/utils.py index 2f48367..42fdd8e 100644 --- a/ultralytics/data/utils.py +++ b/ultralytics/data/utils.py @@ -1,6 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -import contextlib import hashlib import json import os @@ -49,13 +48,14 @@ def get_hash(paths): return h.hexdigest() # return hash -def exif_size(img): +def exif_size(img: Image.Image): """Returns exif-corrected PIL size.""" s = img.size # (width, height) - with contextlib.suppress(Exception): - rotation = dict(img._getexif().items())[orientation] + exif = img.getexif() + if exif: + rotation = exif.get(274, None) # the key for the orientation tag in the EXIF data is 274 (in decimal) if rotation in [6, 8]: # rotation 270 or 90 - s = (s[1], s[0]) + s = s[1], s[0] return s @@ -190,7 +190,21 @@ def polygons2masks_overlap(imgsz, segments, downsample_ratio=1): def check_det_dataset(dataset, autodownload=True): - """Download, check and/or unzip dataset if not found locally.""" + """ + Download, verify, and/or unzip a dataset if not found locally. + + This function checks the availability of a specified dataset, and if not found, it has the option to download and + unzip the dataset. It then reads and parses the accompanying YAML data, ensuring key requirements are met and also + resolves paths related to the dataset. + + Args: + dataset (str): Path to the dataset or dataset descriptor (like a YAML file). + autodownload (bool, optional): Whether to automatically download the dataset if not found. Defaults to True. + + Returns: + (dict): Parsed dataset information and paths. + """ + data = check_file(dataset) # Download (optional) @@ -327,7 +341,7 @@ def check_cls_dataset(dataset: str, split=''): return {'train': train_set, 'val': val_set or test_set, 'test': test_set or val_set, 'nc': nc, 'names': names} -class HUBDatasetStats(): +class HUBDatasetStats: """ A class for generating HUB dataset JSON and `-hub` dataset directory. @@ -371,11 +385,10 @@ class HUBDatasetStats(): def _find_yaml(dir): """Return data.yaml file.""" files = list(dir.glob('*.yaml')) or list(dir.rglob('*.yaml')) # try root level first and then recursive - assert files, f'No *.yaml file found in {dir}' + assert files, f'No *.yaml file found in {dir.resolve()}' if len(files) > 1: files = [f for f in files if f.stem == dir.stem] # prefer *.yaml files that match dir name - assert files, f'Multiple *.yaml files found in {dir}, only 1 *.yaml file allowed' - assert len(files) == 1, f'Multiple *.yaml files found: {files}, only 1 *.yaml file allowed in {dir}' + assert len(files) == 1, f"Expected 1 *.yaml file in '{dir.resolve()}', but found {len(files)}.\n{files}" return files[0] def _unzip(self, path): @@ -478,6 +491,7 @@ def compress_one_image(f, f_new=None, max_dim=1920, quality=50): compress_one_image(f) ``` """ + try: # use PIL im = Image.open(f) r = max_dim / max(im.height, im.width) # ratio @@ -546,18 +560,18 @@ def zip_directory(dir, use_zipfile_library=True): shutil.make_archive(dir, 'zip', dir) -def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False): +def autosplit(path=DATASETS_DIR / 'coco8/images', weights=(0.9, 0.1, 0.0), annotated_only=False): """ Autosplit a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files. Args: - path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco128/images'. + path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco8/images'. weights (list | tuple, optional): Train, validation, and test split fractions. Defaults to (0.9, 0.1, 0.0). annotated_only (bool, optional): If True, only images with an associated txt file are used. Defaults to False. Example: ```python - from ultralytics.utils.dataloaders import autosplit + from ultralytics.data.utils import autosplit autosplit() ``` diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py index 3e9b465..435ca85 100644 --- a/ultralytics/engine/model.py +++ b/ultralytics/engine/model.py @@ -282,6 +282,8 @@ class Model: overrides['rect'] = True # rect batches as default overrides.update(kwargs) overrides['mode'] = 'val' + if overrides.get('imgsz') is None: + overrides['imgsz'] = self.model.args['imgsz'] # use trained imgsz unless custom value is passed args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides) args.data = data or args.data if 'task' in overrides: @@ -289,8 +291,6 @@ class Model: else: args.task = self.task validator = validator or self.smart_load('validator') - if args.imgsz == DEFAULT_CFG.imgsz and not isinstance(self.model, (str, Path)): - args.imgsz = self.model.args['imgsz'] # use trained imgsz unless custom value is passed args.imgsz = check_imgsz(args.imgsz, max_dim=1) validator = validator(args=args, _callbacks=self.callbacks) diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py index a6c52a4..a91cf67 100644 --- a/ultralytics/engine/trainer.py +++ b/ultralytics/engine/trainer.py @@ -5,6 +5,7 @@ Train a model on a dataset Usage: $ yolo mode=train model=yolov8n.pt data=coco128.yaml imgsz=640 epochs=100 batch=16 """ + import math import os import subprocess @@ -48,8 +49,8 @@ class BaseTrainer: callbacks (defaultdict): Dictionary of callbacks. save_dir (Path): Directory to save results. wdir (Path): Directory to save weights. - last (Path): Path to last checkpoint. - best (Path): Path to best checkpoint. + last (Path): Path to the last checkpoint. + best (Path): Path to the best checkpoint. save_period (int): Save checkpoint every x epochs (disabled if < 1). batch_size (int): Batch size for training. epochs (int): Number of epochs to train for.