ultralytics 8.0.19 seg/det dataset warning and DDP-cls/seg fixes (#595)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: 曾逸夫(Zeng Yifu) <41098760+Zengyf-CVer@users.noreply.github.com>
Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com>
This commit is contained in:
Glenn Jocher
2023-01-24 23:22:02 +01:00
committed by GitHub
parent 936414c615
commit 520825c4b2
21 changed files with 103 additions and 63 deletions

View File

@ -611,6 +611,8 @@ class LoadImagesAndLabels(Dataset):
def cache_labels(self, path=Path('./labels.cache'), prefix=''):
# Cache dataset labels, check images and read shapes
if path.exists():
path.unlink() # remove *.cache file if exists
x = {} # dict
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
desc = f"{prefix}Scanning {path.parent / path.stem}..."

View File

@ -47,6 +47,8 @@ class YOLODataset(BaseDataset):
def cache_labels(self, path=Path("./labels.cache")):
# Cache dataset labels, check images and read shapes
if path.exists():
path.unlink() # remove *.cache file if exists
x = {"labels": []}
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
desc = f"{self.prefix}Scanning {path.parent / path.stem}..."
@ -85,7 +87,7 @@ class YOLODataset(BaseDataset):
x["results"] = nf, nm, ne, nc, len(self.im_files)
x["msgs"] = msgs # warnings
x["version"] = self.cache_version # cache version
self.im_files = [lb["im_file"] for lb in x["labels"]]
self.im_files = [lb["im_file"] for lb in x["labels"]] # update im_files
if is_dir_writeable(path.parent):
np.save(str(path), x) # save cache for next time
path.with_suffix(".cache.npy").rename(path) # remove .npy suffix
@ -116,6 +118,17 @@ class YOLODataset(BaseDataset):
# Read cache
[cache.pop(k) for k in ("hash", "version", "msgs")] # remove items
labels = cache["labels"]
# Check if the dataset is all boxes or all segments
len_boxes = sum(len(lb["bboxes"]) for lb in labels)
len_segments = sum(len(lb["segments"]) for lb in labels)
if len_segments and len_boxes != len_segments:
LOGGER.warning(
f"WARNING ⚠️ Box and segment counts should be equal, but got len(segments) = {len_segments}, "
f"len(boxes) = {len_boxes}. To resolve this only boxes will be used and all segments will be removed. "
"To avoid this please supply either a detect or segment dataset, not a detect-segment mixed dataset.")
for lb in labels:
lb["segments"] = []
nl = len(np.concatenate([label["cls"] for label in labels], 0)) # number of labels
assert nl > 0, f"{self.prefix}All labels empty in {cache_path}, can not start training. {HELP_URL}"
return labels

View File

@ -14,7 +14,7 @@ import numpy as np
import torch
from PIL import ExifTags, Image, ImageOps
from ultralytics.yolo.utils import DATASETS_DIR, LOGGER, ROOT, colorstr, yaml_load
from ultralytics.yolo.utils import DATASETS_DIR, LOGGER, ROOT, colorstr, emojis, yaml_load
from ultralytics.yolo.utils.checks import check_file, check_font, is_ascii
from ultralytics.yolo.utils.downloads import download
from ultralytics.yolo.utils.files import unzip_file
@ -202,7 +202,10 @@ def check_det_dataset(dataset, autodownload=True):
# Checks
for k in 'train', 'val', 'names':
assert k in data, f"data.yaml '{k}:' field missing ❌"
if k not in data:
raise SyntaxError(
emojis(f"{dataset} '{k}:' key missing ❌.\n"
f"'train', 'val' and 'names' are required in data.yaml files."))
if isinstance(data['names'], (list, tuple)): # old array format
data['names'] = dict(enumerate(data['names'])) # convert to dict
data['nc'] = len(data['names'])