From 9a0555eca477d98cf044c698cb0e9803030441a6 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Tue, 15 Aug 2023 22:02:23 +0200 Subject: [PATCH] `ultralytics 8.0.155` allow `imgsz` and `batch` resume changes (#4366) Co-authored-by: Mostafa Nemati <58460889+monemati@users.noreply.github.com> Co-authored-by: Eduard Voiculescu --- docs/modes/predict.md | 2 +- docs/reference/trackers/utils/matching.md | 32 --------------- docs/usage/cfg.md | 2 +- examples/YOLOv8-CPP-Inference/README.md | 2 +- tests/test_cli.py | 2 +- tests/test_python.py | 20 ++++----- ultralytics/__init__.py | 2 +- ultralytics/cfg/default.yaml | 2 +- ultralytics/data/build.py | 2 +- ultralytics/data/loaders.py | 29 ++++---------- ultralytics/data/utils.py | 22 ++++------ ultralytics/engine/exporter.py | 11 ++--- ultralytics/engine/trainer.py | 11 +++-- ultralytics/trackers/utils/__init__.py | 1 + ultralytics/trackers/utils/matching.py | 49 ++++++++++++++++++----- 15 files changed, 84 insertions(+), 105 deletions(-) diff --git a/docs/modes/predict.md b/docs/modes/predict.md index ea65d39..e373d01 100644 --- a/docs/modes/predict.md +++ b/docs/modes/predict.md @@ -321,7 +321,7 @@ All supported arguments: | `augment` | `bool` | `False` | apply image augmentation to prediction sources | | `agnostic_nms` | `bool` | `False` | class-agnostic NMS | | `retina_masks` | `bool` | `False` | use high-resolution segmentation masks | -| `classes` | `None or list` | `None` | filter results by class, i.e. class=0, or class=[0,2,3] | +| `classes` | `None or list` | `None` | filter results by class, i.e. classes=0, or classes=[0,2,3] | | `boxes` | `bool` | `True` | Show boxes in segmentation predictions | ## Image and Video Formats diff --git a/docs/reference/trackers/utils/matching.md b/docs/reference/trackers/utils/matching.md index b9bff02..8a4ca90 100644 --- a/docs/reference/trackers/utils/matching.md +++ b/docs/reference/trackers/utils/matching.md @@ -9,50 +9,18 @@ keywords: Ultralytics, Trackers Utils, Matching, merge_matches, linear_assignmen Full source code for this file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/utils/matching.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/utils/matching.py). Help us fix any issues you see by submitting a [Pull Request](https://docs.ultralytics.com/help/contributing/) 🛠️. Thank you 🙏! ---- -## ::: ultralytics.trackers.utils.matching.merge_matches -

- ---- -## ::: ultralytics.trackers.utils.matching._indices_to_matches -

- --- ## ::: ultralytics.trackers.utils.matching.linear_assignment

---- -## ::: ultralytics.trackers.utils.matching.ious -

- --- ## ::: ultralytics.trackers.utils.matching.iou_distance

---- -## ::: ultralytics.trackers.utils.matching.v_iou_distance -

- --- ## ::: ultralytics.trackers.utils.matching.embedding_distance

---- -## ::: ultralytics.trackers.utils.matching.gate_cost_matrix -

- ---- -## ::: ultralytics.trackers.utils.matching.fuse_motion -

- ---- -## ::: ultralytics.trackers.utils.matching.fuse_iou -

- --- ## ::: ultralytics.trackers.utils.matching.fuse_score

- ---- -## ::: ultralytics.trackers.utils.matching.bbox_ious -

diff --git a/docs/usage/cfg.md b/docs/usage/cfg.md index 8db9c6d..f2c0d2f 100644 --- a/docs/usage/cfg.md +++ b/docs/usage/cfg.md @@ -154,7 +154,7 @@ The prediction settings for YOLO models encompass a range of hyperparameters and | `augment` | `False` | apply image augmentation to prediction sources | | `agnostic_nms` | `False` | class-agnostic NMS | | `retina_masks` | `False` | use high-resolution segmentation masks | -| `classes` | `None` | filter results by class, i.e. class=0, or class=[0,2,3] | +| `classes` | `None` | filter results by class, i.e. classes=0, or classes=[0,2,3] | | `boxes` | `True` | Show boxes in segmentation predictions | [Predict Guide](../modes/predict.md){ .md-button .md-button--primary} diff --git a/examples/YOLOv8-CPP-Inference/README.md b/examples/YOLOv8-CPP-Inference/README.md index 8e32cbb..601c1d0 100644 --- a/examples/YOLOv8-CPP-Inference/README.md +++ b/examples/YOLOv8-CPP-Inference/README.md @@ -8,7 +8,7 @@ This example demonstrates how to perform inference using YOLOv8 and YOLOv5 model git clone ultralytics cd ultralytics pip install . -cd examples/cpp_ +cd examples/YOLOv8-CPP-Inference # Add a **yolov8\_.onnx** and/or **yolov5\_.onnx** model(s) to the ultralytics folder. # Edit the **main.cpp** to change the **projectBasePath** to match your user. diff --git a/tests/test_cli.py b/tests/test_cli.py index a5dc8f1..24dc4d2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -55,7 +55,7 @@ def test_predict_online(task, model, data): mode = 'track' if task in ('detect', 'segment', 'pose') else 'predict' # mode for video inference model = WEIGHT_DIR / model run(f'yolo predict model={model}.pt source=https://ultralytics.com/images/bus.jpg imgsz=32') - run(f'yolo {mode} model={model}.pt source=https://ultralytics.com/assets/decelera_landscape_min.mov imgsz=32') + run(f'yolo {mode} model={model}.pt source=https://ultralytics.com/assets/decelera_landscape_min.mov imgsz=96') # Run Python YouTube tracking because CLI is broken. TODO: fix CLI YouTube # run(f'yolo {mode} model={model}.pt source=https://youtu.be/G17sBkb38XQ imgsz=32 tracker=bytetrack.yaml') diff --git a/tests/test_python.py b/tests/test_python.py index a441a55..da59a81 100644 --- a/tests/test_python.py +++ b/tests/test_python.py @@ -18,6 +18,7 @@ WEIGHTS_DIR = Path(SETTINGS['weights_dir']) MODEL = WEIGHTS_DIR / 'path with spaces' / 'yolov8n.pt' # test spaces in path CFG = 'yolov8n.yaml' SOURCE = ROOT / 'assets/bus.jpg' +TMP = (ROOT / '../tests/tmp').resolve() # temp directory for test files SOURCE_GREYSCALE = Path(f'{SOURCE.parent / SOURCE.stem}_greyscale.jpg') SOURCE_RGBA = Path(f'{SOURCE.parent / SOURCE.stem}_4ch.png') @@ -92,7 +93,7 @@ def test_predict_grey_and_4ch(): def test_track_stream(): # Test YouTube streaming inference (short 10 frame video) with non-default ByteTrack tracker model = YOLO(MODEL) - model.track('https://youtu.be/G17sBkb38XQ', imgsz=32, tracker='bytetrack.yaml') + model.track('https://youtu.be/G17sBkb38XQ', imgsz=96, tracker='bytetrack.yaml') def test_val(): @@ -232,16 +233,15 @@ def test_data_utils(): # from ultralytics.utils.files import WorkingDirectory # with WorkingDirectory(ROOT.parent / 'tests'): - Path('tests/coco8.zip').unlink(missing_ok=True) - Path('coco8.zip').unlink(missing_ok=True) + shutil.rmtree(TMP, ignore_errors=True) + TMP.mkdir(parents=True) + download('https://github.com/ultralytics/hub/raw/master/example_datasets/coco8.zip', unzip=False) - shutil.move('coco8.zip', 'tests') - shutil.rmtree('tests/coco8', ignore_errors=True) - stats = HUBDatasetStats('tests/coco8.zip', task='detect') + shutil.move('coco8.zip', TMP) + stats = HUBDatasetStats(TMP / 'coco8.zip', task='detect') stats.get_json(save=False) stats.process_images() - autosplit('tests/coco8') - zip_directory('tests/coco8/images/val') # zip - shutil.rmtree('tests/coco8', ignore_errors=True) - shutil.rmtree('tests/coco8-hub', ignore_errors=True) + autosplit(TMP / 'coco8') + zip_directory(TMP / 'coco8/images/val') # zip + shutil.rmtree(TMP) diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index 204098b..5586d04 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = '8.0.154' +__version__ = '8.0.155' from ultralytics.hub import start from ultralytics.models import RTDETR, SAM, YOLO diff --git a/ultralytics/cfg/default.yaml b/ultralytics/cfg/default.yaml index 4a99b8e..cdeb959 100644 --- a/ultralytics/cfg/default.yaml +++ b/ultralytics/cfg/default.yaml @@ -64,7 +64,7 @@ line_width: # (int, optional) line width of the bounding boxes, auto if missin visualize: False # (bool) visualize model features augment: False # (bool) apply image augmentation to prediction sources agnostic_nms: False # (bool) class-agnostic NMS -classes: # (int | list[int], optional) filter results by class, i.e. class=0, or class=[0,2,3] +classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3] retina_masks: False # (bool) use high-resolution segmentation masks boxes: True # (bool) Show boxes in segmentation predictions diff --git a/ultralytics/data/build.py b/ultralytics/data/build.py index 8fd8602..3248a7e 100644 --- a/ultralytics/data/build.py +++ b/ultralytics/data/build.py @@ -120,7 +120,7 @@ def check_source(source): screenshot = source.lower() == 'screen' if is_url and is_file: source = check_file(source) # download - elif isinstance(source, tuple(LOADERS)): + elif isinstance(source, LOADERS): in_memory = True elif isinstance(source, (list, tuple)): source = autocast_list(source) # convert all list elements to PIL or np arrays diff --git a/ultralytics/data/loaders.py b/ultralytics/data/loaders.py index f84bcad..fdf6167 100644 --- a/ultralytics/data/loaders.py +++ b/ultralytics/data/loaders.py @@ -98,7 +98,7 @@ class LoadStreams: def close(self): """Close stream loader and release resources.""" self.running = False # stop flag for Thread - for i, thread in enumerate(self.threads): + for thread in self.threads: if thread.is_alive(): thread.join(timeout=5) # Add timeout for cap in self.caps: # Iterate through the stored VideoCapture objects @@ -210,7 +210,6 @@ class LoadImages: self.vid_stride = vid_stride # video frame-rate stride self.bs = 1 if any(videos): - self.orientation = None # rotation degrees self._new_video(videos[0]) # new video else: self.cap = None @@ -263,20 +262,6 @@ class LoadImages: self.frame = 0 self.cap = cv2.VideoCapture(path) self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride) - if hasattr(cv2, 'CAP_PROP_ORIENTATION_META'): # cv2<4.6.0 compatibility - self.orientation = int(self.cap.get(cv2.CAP_PROP_ORIENTATION_META)) # rotation degrees - # Disable auto-orientation due to known issues in https://github.com/ultralytics/yolov5/issues/8493 - # self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) - - def _cv2_rotate(self, im): - """Rotate a cv2 video manually.""" - if self.orientation == 0: - return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE) - elif self.orientation == 180: - return cv2.rotate(im, cv2.ROTATE_90_COUNTERCLOCKWISE) - elif self.orientation == 90: - return cv2.rotate(im, cv2.ROTATE_180) - return im def __len__(self): """Returns the number of files in the object.""" @@ -385,10 +370,10 @@ def autocast_list(source): return files -LOADERS = [LoadStreams, LoadPilAndNumpy, LoadImages, LoadScreenshots] +LOADERS = LoadStreams, LoadPilAndNumpy, LoadImages, LoadScreenshots # tuple -def get_best_youtube_url(url, use_pafy=True): +def get_best_youtube_url(url, use_pafy=False): """ Retrieves the URL of the best quality MP4 video stream from a given YouTube video. @@ -411,9 +396,11 @@ def get_best_youtube_url(url, use_pafy=True): import yt_dlp with yt_dlp.YoutubeDL({'quiet': True}) as ydl: info_dict = ydl.extract_info(url, download=False) # extract info - for f in info_dict.get('formats', None): - if f['vcodec'] != 'none' and f['acodec'] == 'none' and f['ext'] == 'mp4' and f.get('width') > 1280: - return f.get('url', None) + for f in reversed(info_dict.get('formats', [])): # reversed because best is usually last + # Find a format with video codec, no audio, *.mp4 extension at least 1920x1080 size + good_size = (f.get('width') or 0) >= 1920 or (f.get('height') or 0) >= 1080 + if good_size and f['vcodec'] != 'none' and f['acodec'] == 'none' and f['ext'] == 'mp4': + return f.get('url') if __name__ == '__main__': diff --git a/ultralytics/data/utils.py b/ultralytics/data/utils.py index 52ce9c4..68b423b 100644 --- a/ultralytics/data/utils.py +++ b/ultralytics/data/utils.py @@ -142,16 +142,12 @@ def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1): downsample_ratio (int): downsample ratio """ mask = np.zeros(imgsz, dtype=np.uint8) - polygons = np.asarray(polygons) - polygons = polygons.astype(np.int32) - shape = polygons.shape - polygons = polygons.reshape(shape[0], -1, 2) + polygons = np.asarray(polygons, dtype=np.int32) + polygons = polygons.reshape((polygons.shape[0], -1, 2)) cv2.fillPoly(mask, polygons, color=color) nh, nw = (imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio) - # NOTE: fillPoly firstly then resize is trying the keep the same way - # of loss calculation when mask-ratio=1. - mask = cv2.resize(mask, (nw, nh)) - return mask + # NOTE: fillPoly first then resize is trying to keep the same way of loss calculation when mask-ratio=1. + return cv2.resize(mask, (nw, nh)) def polygons2masks(imgsz, polygons, color, downsample_ratio=1): @@ -162,11 +158,7 @@ def polygons2masks(imgsz, polygons, color, downsample_ratio=1): color (int): color downsample_ratio (int): downsample ratio """ - masks = [] - for si in range(len(polygons)): - mask = polygon2mask(imgsz, [polygons[si].reshape(-1)], color, downsample_ratio) - masks.append(mask) - return np.array(masks) + return np.array([polygon2mask(imgsz, [x.reshape(-1)], color, downsample_ratio) for x in polygons]) def polygons2masks_overlap(imgsz, segments, downsample_ratio=1): @@ -421,7 +413,7 @@ class HUBDatasetStats: else: raise ValueError('Undefined dataset task.') zipped = zip(labels['cls'], coordinates) - return [[int(c), *(round(float(x), 4) for x in points)] for c, points in zipped] + return [[int(c[0]), *(round(float(x), 4) for x in points)] for c, points in zipped] for split in 'train', 'val', 'test': if self.data.get(split) is None: @@ -563,7 +555,7 @@ def zip_directory(dir, use_zipfile_library=True): def autosplit(path=DATASETS_DIR / 'coco8/images', weights=(0.9, 0.1, 0.0), annotated_only=False): """ - Autosplit a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files. + Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files. Args: path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco8/images'. diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index 02cacf0..275ba88 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -249,11 +249,11 @@ class Exporter: f[4], _ = self.export_coreml() if any((saved_model, pb, tflite, edgetpu, tfjs)): # TensorFlow formats self.args.int8 |= edgetpu - f[5], s_model = self.export_saved_model() + f[5], keras_model = self.export_saved_model() if pb or tfjs: # pb prerequisite to tfjs - f[6], _ = self.export_pb(s_model) + f[6], _ = self.export_pb(keras_model=keras_model) if tflite: - f[7], _ = self.export_tflite(s_model, nms=False, agnostic_nms=self.args.agnostic_nms) + f[7], _ = self.export_tflite(keras_model=keras_model, nms=False, agnostic_nms=self.args.agnostic_nms) if edgetpu: f[8], _ = self.export_edgetpu(tflite_model=Path(f[5]) / f'{self.file.stem}_full_integer_quant.tflite') if tfjs: @@ -671,10 +671,7 @@ class Exporter: for file in f.rglob('*.tflite'): f.unlink() if 'quant_with_int16_act.tflite' in str(f) else self._add_tflite_metadata(file) - # Load saved_model - keras_model = tf.saved_model.load(f, tags=None, options=None) - - return str(f), keras_model + return str(f), tf.saved_model.load(f, tags=None, options=None) # load saved_model as Keras model @try_export def export_pb(self, keras_model, prefix=colorstr('TensorFlow GraphDef:')): diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py index a91cf67..812d3d5 100644 --- a/ultralytics/engine/trainer.py +++ b/ultralytics/engine/trainer.py @@ -81,7 +81,7 @@ class BaseTrainer: overrides (dict, optional): Configuration overrides. Defaults to None. """ self.args = get_cfg(cfg, overrides) - self.check_resume() + self.check_resume(overrides) self.device = select_device(self.args.device, self.args.batch) self.validator = None self.model = None @@ -576,7 +576,7 @@ class BaseTrainer: self.metrics.pop('fitness', None) self.run_callbacks('on_fit_epoch_end') - def check_resume(self): + def check_resume(self, overrides): """Check if resume checkpoint exists and update arguments accordingly.""" resume = self.args.resume if resume: @@ -589,8 +589,13 @@ class BaseTrainer: if not Path(ckpt_args['data']).exists(): ckpt_args['data'] = self.args.data + resume = True self.args = get_cfg(ckpt_args) - self.args.model, resume = str(last), True # reinstate + self.args.model = str(last) # reinstate model + for k in 'imgsz', 'batch': # allow arg updates to reduce memory on resume if crashed due to CUDA OOM + if k in overrides: + setattr(self.args, k, overrides[k]) + except Exception as e: raise FileNotFoundError('Resume checkpoint not found. Please pass a valid checkpoint to resume from, ' "i.e. 'yolo train resume model=path/to/last.pt'") from e diff --git a/ultralytics/trackers/utils/__init__.py b/ultralytics/trackers/utils/__init__.py index e69de29..9e68dc1 100644 --- a/ultralytics/trackers/utils/__init__.py +++ b/ultralytics/trackers/utils/__init__.py @@ -0,0 +1 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license diff --git a/ultralytics/trackers/utils/matching.py b/ultralytics/trackers/utils/matching.py index ef84085..e6dc67d 100644 --- a/ultralytics/trackers/utils/matching.py +++ b/ultralytics/trackers/utils/matching.py @@ -18,7 +18,18 @@ except (ImportError, AssertionError, AttributeError): def linear_assignment(cost_matrix, thresh, use_lap=True): - """Linear assignment implementations with scipy and lap.lapjv.""" + """ + Perform linear assignment using scipy or lap.lapjv. + + Args: + cost_matrix (np.ndarray): The matrix containing cost values for assignments. + thresh (float): Threshold for considering an assignment valid. + use_lap (bool, optional): Whether to use lap.lapjv. Defaults to True. + + Returns: + (tuple): Tuple containing matched indices, unmatched indices from 'a', and unmatched indices from 'b'. + """ + if cost_matrix.size == 0: return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) @@ -42,11 +53,14 @@ def linear_assignment(cost_matrix, thresh, use_lap=True): def iou_distance(atracks, btracks): """ - Compute cost based on IoU - :type atracks: list[STrack] - :type btracks: list[STrack] + Compute cost based on Intersection over Union (IoU) between tracks. + + Args: + atracks (list[STrack] | list[np.ndarray]): List of tracks 'a' or bounding boxes. + btracks (list[STrack] | list[np.ndarray]): List of tracks 'b' or bounding boxes. - :rtype cost_matrix np.ndarray + Returns: + (np.ndarray): Cost matrix computed based on IoU. """ if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) \ @@ -67,10 +81,15 @@ def iou_distance(atracks, btracks): def embedding_distance(tracks, detections, metric='cosine'): """ - :param tracks: list[STrack] - :param detections: list[BaseTrack] - :param metric: - :return: cost_matrix np.ndarray + Compute distance between tracks and detections based on embeddings. + + Args: + tracks (list[STrack]): List of tracks. + detections (list[BaseTrack]): List of detections. + metric (str, optional): Metric for distance computation. Defaults to 'cosine'. + + Returns: + (np.ndarray): Cost matrix computed based on embeddings. """ cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float32) @@ -85,7 +104,17 @@ def embedding_distance(tracks, detections, metric='cosine'): def fuse_score(cost_matrix, detections): - """Fuses cost matrix with detection scores to produce a single similarity matrix.""" + """ + Fuses cost matrix with detection scores to produce a single similarity matrix. + + Args: + cost_matrix (np.ndarray): The matrix containing cost values for assignments. + detections (list[BaseTrack]): List of detections with scores. + + Returns: + (np.ndarray): Fused similarity matrix. + """ + if cost_matrix.size == 0: return cost_matrix iou_sim = 1 - cost_matrix