ultralytics 8.0.94 HUBDatasetStats() Segment and Pose support (#2450)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: JF Chen <k-2feng@hotmail.com>
Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Co-authored-by: Laughing-q <1185102784@qq.com>
This commit is contained in:
Glenn Jocher
2023-05-08 01:59:12 +02:00
committed by GitHub
parent af49a85cf3
commit e21428ca4e
51 changed files with 948 additions and 81 deletions

View File

@ -15,7 +15,7 @@ import psutil
from torch.utils.data import Dataset
from tqdm import tqdm
from ..utils import LOCAL_RANK, LOGGER, NUM_THREADS, TQDM_BAR_FORMAT
from ..utils import DEFAULT_CFG, LOCAL_RANK, LOGGER, NUM_THREADS, TQDM_BAR_FORMAT
from .utils import HELP_URL, IMG_FORMATS
@ -51,7 +51,7 @@ class BaseDataset(Dataset):
imgsz=640,
cache=False,
augment=True,
hyp=None,
hyp=DEFAULT_CFG,
prefix='',
rect=False,
batch_size=None,

View File

@ -71,7 +71,7 @@ def seed_worker(worker_id): # noqa
def build_yolo_dataset(cfg, img_path, batch, data_info, mode='train', rect=False, stride=32):
"""Build YOLO Dataset"""
dataset = YOLODataset(
return YOLODataset(
img_path=img_path,
imgsz=cfg.imgsz,
batch_size=batch,
@ -87,7 +87,6 @@ def build_yolo_dataset(cfg, img_path, batch, data_info, mode='train', rect=False
use_keypoints=cfg.task == 'pose',
classes=cfg.classes,
data=data_info)
return dataset
def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1):

View File

@ -209,7 +209,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
album_transform: Albumentations transforms, used if installed
"""
def __init__(self, root, augment, imgsz, cache=False):
def __init__(self, root, augment=False, imgsz=224, cache=False):
"""Initialize YOLO object with root, image size, augmentations, and cache settings"""
super().__init__(root=root)
self.torch_transforms = classify_transforms(imgsz)

View File

@ -310,17 +310,19 @@ class HUBDatasetStats():
Arguments
path: Path to data.yaml or data.zip (with data.yaml inside data.zip)
task: Dataset task. Options are 'detect', 'segment', 'pose', 'classify'.
autodownload: Attempt to download dataset if not found locally
Usage
from ultralytics.yolo.data.utils import HUBDatasetStats
stats = HUBDatasetStats('coco128.yaml', autodownload=True) # usage 1
stats = HUBDatasetStats('/Users/glennjocher/Downloads/coco6.zip') # usage 2
stats = HUBDatasetStats('/Users/glennjocher/Downloads/coco8.zip', task='detect') # detect dataset
stats = HUBDatasetStats('/Users/glennjocher/Downloads/coco8-seg.zip', task='segment') # segment dataset
stats = HUBDatasetStats('/Users/glennjocher/Downloads/coco8-pose.zip', task='pose') # pose dataset
stats.get_json(save=False)
stats.process_images()
"""
def __init__(self, path='coco128.yaml', autodownload=False):
def __init__(self, path='coco128.yaml', task='detect', autodownload=False):
"""Initialize class."""
zipped, data_dir, yaml_path = self._unzip(Path(path))
try:
@ -336,6 +338,7 @@ class HUBDatasetStats():
self.im_dir.mkdir(parents=True, exist_ok=True) # makes /images
self.stats = {'nc': len(data['names']), 'names': list(data['names'].values())} # statistics dictionary
self.data = data
self.task = task # detect, segment, pose, classify
@staticmethod
def _find_yaml(dir):
@ -352,11 +355,10 @@ class HUBDatasetStats():
"""Unzip data.zip."""
if not str(path).endswith('.zip'): # path is data.yaml
return False, None, path
assert Path(path).is_file(), f'Error unzipping {path}, file not found'
unzip_file(path, path=path.parent)
dir = path.with_suffix('') # dataset directory == zip name
assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/'
return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path
unzip_dir = unzip_file(path, path=path.parent)
assert unzip_dir.is_dir(), f'Error unzipping {path}, {unzip_dir} not found. ' \
f'path/to/abc.zip MUST unzip to path/to/abc/'
return True, str(unzip_dir), self._find_yaml(unzip_dir) # zipped, data_dir, yaml_path
def _hub_ops(self, f):
"""Saves a compressed image for HUB previews."""
@ -364,20 +366,33 @@ class HUBDatasetStats():
def get_json(self, save=False, verbose=False):
"""Return dataset JSON for Ultralytics HUB."""
# from ultralytics.yolo.data import YOLODataset
from ultralytics.yolo.data.dataloaders.v5loader import LoadImagesAndLabels
from ultralytics.yolo.data import YOLODataset # ClassificationDataset
def _round(labels):
"""Update labels to integer class and 6 decimal place floats."""
return [[int(c), *(round(x, 4) for x in points)] for c, *points in labels]
"""Update labels to integer class and 4 decimal place floats."""
if self.task == 'detect':
coordinates = labels['bboxes']
elif self.task == 'segment':
coordinates = [x.flatten() for x in labels['segments']]
elif self.task == 'pose':
n = labels['keypoints'].shape[0]
coordinates = np.concatenate((labels['bboxes'], labels['keypoints'].reshape(n, -1)), 1)
else:
raise ValueError('Undefined dataset task.')
zipped = zip(labels['cls'], coordinates)
return [[int(c), *(round(float(x), 4) for x in points)] for c, points in zipped]
for split in 'train', 'val', 'test':
if self.data.get(split) is None:
self.stats[split] = None # i.e. no test set
continue
dataset = LoadImagesAndLabels(self.data[split]) # load dataset
dataset = YOLODataset(img_path=self.data[split],
data=self.data,
use_segments=self.task == 'segment',
use_keypoints=self.task == 'pose')
x = np.array([
np.bincount(label[:, 0].astype(int), minlength=self.data['nc'])
np.bincount(label['cls'].astype(int).flatten(), minlength=self.data['nc'])
for label in tqdm(dataset.labels, total=len(dataset), desc='Statistics')]) # shape(128x80)
self.stats[split] = {
'instance_stats': {
@ -388,7 +403,7 @@ class HUBDatasetStats():
'unlabelled': int(np.all(x == 0, 1).sum()),
'per_class': (x > 0).sum(0).tolist()},
'labels': [{
str(Path(k).name): _round(v.tolist())} for k, v in zip(dataset.im_files, dataset.labels)]}
Path(k).name: _round(v)} for k, v in zip(dataset.im_files, dataset.labels)]}
# Save, print and return
if save:
@ -402,13 +417,12 @@ class HUBDatasetStats():
def process_images(self):
"""Compress images for Ultralytics HUB."""
# from ultralytics.yolo.data import YOLODataset
from ultralytics.yolo.data.dataloaders.v5loader import LoadImagesAndLabels
from ultralytics.yolo.data import YOLODataset # ClassificationDataset
for split in 'train', 'val', 'test':
if self.data.get(split) is None:
continue
dataset = LoadImagesAndLabels(self.data[split]) # load dataset
dataset = YOLODataset(img_path=self.data[split], data=self.data)
with ThreadPool(NUM_THREADS) as pool:
for _ in tqdm(pool.imap(self._hub_ops, dataset.im_files), total=len(dataset), desc=f'{split} images'):
pass