new check_dataset functions (#43)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
101
ultralytics/yolo/data/datasets/coco128-seg.yaml
Normal file
101
ultralytics/yolo/data/datasets/coco128-seg.yaml
Normal file
@ -0,0 +1,101 @@
|
||||
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
||||
# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
|
||||
# Example usage: python train.py --data coco128.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── coco128-seg ← downloads here (7 MB)
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/coco128-seg # dataset root dir
|
||||
train: images/train2017 # train images (relative to 'path') 128 images
|
||||
val: images/train2017 # val images (relative to 'path') 128 images
|
||||
test: # test images (optional)
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: person
|
||||
1: bicycle
|
||||
2: car
|
||||
3: motorcycle
|
||||
4: airplane
|
||||
5: bus
|
||||
6: train
|
||||
7: truck
|
||||
8: boat
|
||||
9: traffic light
|
||||
10: fire hydrant
|
||||
11: stop sign
|
||||
12: parking meter
|
||||
13: bench
|
||||
14: bird
|
||||
15: cat
|
||||
16: dog
|
||||
17: horse
|
||||
18: sheep
|
||||
19: cow
|
||||
20: elephant
|
||||
21: bear
|
||||
22: zebra
|
||||
23: giraffe
|
||||
24: backpack
|
||||
25: umbrella
|
||||
26: handbag
|
||||
27: tie
|
||||
28: suitcase
|
||||
29: frisbee
|
||||
30: skis
|
||||
31: snowboard
|
||||
32: sports ball
|
||||
33: kite
|
||||
34: baseball bat
|
||||
35: baseball glove
|
||||
36: skateboard
|
||||
37: surfboard
|
||||
38: tennis racket
|
||||
39: bottle
|
||||
40: wine glass
|
||||
41: cup
|
||||
42: fork
|
||||
43: knife
|
||||
44: spoon
|
||||
45: bowl
|
||||
46: banana
|
||||
47: apple
|
||||
48: sandwich
|
||||
49: orange
|
||||
50: broccoli
|
||||
51: carrot
|
||||
52: hot dog
|
||||
53: pizza
|
||||
54: donut
|
||||
55: cake
|
||||
56: chair
|
||||
57: couch
|
||||
58: potted plant
|
||||
59: bed
|
||||
60: dining table
|
||||
61: toilet
|
||||
62: tv
|
||||
63: laptop
|
||||
64: mouse
|
||||
65: remote
|
||||
66: keyboard
|
||||
67: cell phone
|
||||
68: microwave
|
||||
69: oven
|
||||
70: toaster
|
||||
71: sink
|
||||
72: refrigerator
|
||||
73: book
|
||||
74: clock
|
||||
75: vase
|
||||
76: scissors
|
||||
77: teddy bear
|
||||
78: hair drier
|
||||
79: toothbrush
|
||||
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://ultralytics.com/assets/coco128-seg.zip
|
101
ultralytics/yolo/data/datasets/coco128.yaml
Normal file
101
ultralytics/yolo/data/datasets/coco128.yaml
Normal file
@ -0,0 +1,101 @@
|
||||
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
||||
# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
|
||||
# Example usage: python train.py --data coco128.yaml
|
||||
# parent
|
||||
# ├── yolov5
|
||||
# └── datasets
|
||||
# └── coco128 ← downloads here (7 MB)
|
||||
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/coco128 # dataset root dir
|
||||
train: images/train2017 # train images (relative to 'path') 128 images
|
||||
val: images/train2017 # val images (relative to 'path') 128 images
|
||||
test: # test images (optional)
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: person
|
||||
1: bicycle
|
||||
2: car
|
||||
3: motorcycle
|
||||
4: airplane
|
||||
5: bus
|
||||
6: train
|
||||
7: truck
|
||||
8: boat
|
||||
9: traffic light
|
||||
10: fire hydrant
|
||||
11: stop sign
|
||||
12: parking meter
|
||||
13: bench
|
||||
14: bird
|
||||
15: cat
|
||||
16: dog
|
||||
17: horse
|
||||
18: sheep
|
||||
19: cow
|
||||
20: elephant
|
||||
21: bear
|
||||
22: zebra
|
||||
23: giraffe
|
||||
24: backpack
|
||||
25: umbrella
|
||||
26: handbag
|
||||
27: tie
|
||||
28: suitcase
|
||||
29: frisbee
|
||||
30: skis
|
||||
31: snowboard
|
||||
32: sports ball
|
||||
33: kite
|
||||
34: baseball bat
|
||||
35: baseball glove
|
||||
36: skateboard
|
||||
37: surfboard
|
||||
38: tennis racket
|
||||
39: bottle
|
||||
40: wine glass
|
||||
41: cup
|
||||
42: fork
|
||||
43: knife
|
||||
44: spoon
|
||||
45: bowl
|
||||
46: banana
|
||||
47: apple
|
||||
48: sandwich
|
||||
49: orange
|
||||
50: broccoli
|
||||
51: carrot
|
||||
52: hot dog
|
||||
53: pizza
|
||||
54: donut
|
||||
55: cake
|
||||
56: chair
|
||||
57: couch
|
||||
58: potted plant
|
||||
59: bed
|
||||
60: dining table
|
||||
61: toilet
|
||||
62: tv
|
||||
63: laptop
|
||||
64: mouse
|
||||
65: remote
|
||||
66: keyboard
|
||||
67: cell phone
|
||||
68: microwave
|
||||
69: oven
|
||||
70: toaster
|
||||
71: sink
|
||||
72: refrigerator
|
||||
73: book
|
||||
74: clock
|
||||
75: vase
|
||||
76: scissors
|
||||
77: teddy bear
|
||||
78: hair drier
|
||||
79: toothbrush
|
||||
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://ultralytics.com/assets/coco128.zip
|
@ -1,11 +1,22 @@
|
||||
import contextlib
|
||||
import hashlib
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
from tarfile import is_tarfile
|
||||
from zipfile import is_zipfile
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import ExifTags, Image, ImageOps
|
||||
|
||||
from ultralytics.yolo.utils import LOGGER, ROOT, colorstr
|
||||
from ultralytics.yolo.utils.checks import check_file, check_font, is_ascii
|
||||
from ultralytics.yolo.utils.downloads import download
|
||||
from ultralytics.yolo.utils.files import unzip_file, yaml_load
|
||||
|
||||
from ..utils.ops import segments2boxes
|
||||
|
||||
HELP_URL = "See https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data"
|
||||
@ -176,3 +187,89 @@ def polygons2masks_overlap(img_size, segments, downsample_ratio=1):
|
||||
masks = masks + mask
|
||||
masks = np.clip(masks, a_min=0, a_max=i + 1)
|
||||
return masks, index
|
||||
|
||||
|
||||
def check_dataset_yaml(data, autodownload=True):
|
||||
# Download, check and/or unzip dataset if not found locally
|
||||
data = check_file(data)
|
||||
DATASETS_DIR = Path.cwd() / "../datasets"
|
||||
# Download (optional)
|
||||
extract_dir = ''
|
||||
if isinstance(data, (str, Path)) and (is_zipfile(data) or is_tarfile(data)):
|
||||
download(data, dir=f'{DATASETS_DIR}/{Path(data).stem}', unzip=True, delete=False, curl=False, threads=1)
|
||||
data = next((DATASETS_DIR / Path(data).stem).rglob('*.yaml'))
|
||||
extract_dir, autodownload = data.parent, False
|
||||
# Read yaml (optional)
|
||||
if isinstance(data, (str, Path)):
|
||||
data = yaml_load(data) # dictionary
|
||||
|
||||
# Checks
|
||||
for k in 'train', 'val', 'names':
|
||||
assert k in data, f"data.yaml '{k}:' field missing ❌"
|
||||
if isinstance(data['names'], (list, tuple)): # old array format
|
||||
data['names'] = dict(enumerate(data['names'])) # convert to dict
|
||||
data['nc'] = len(data['names'])
|
||||
|
||||
# Resolve paths
|
||||
path = Path(extract_dir or data.get('path') or '') # optional 'path' default to '.'
|
||||
if not path.is_absolute():
|
||||
path = (Path.cwd() / path).resolve()
|
||||
data['path'] = path # download scripts
|
||||
for k in 'train', 'val', 'test':
|
||||
if data.get(k): # prepend path
|
||||
if isinstance(data[k], str):
|
||||
x = (path / data[k]).resolve()
|
||||
if not x.exists() and data[k].startswith('../'):
|
||||
x = (path / data[k][3:]).resolve()
|
||||
data[k] = str(x)
|
||||
else:
|
||||
data[k] = [str((path / x).resolve()) for x in data[k]]
|
||||
|
||||
# Parse yaml
|
||||
train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download'))
|
||||
if val:
|
||||
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
|
||||
if not all(x.exists() for x in val):
|
||||
LOGGER.info('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()])
|
||||
if not s or not autodownload:
|
||||
raise Exception('Dataset not found ❌')
|
||||
t = time.time()
|
||||
if s.startswith('http') and s.endswith('.zip'): # URL
|
||||
f = Path(s).name # filename
|
||||
LOGGER.info(f'Downloading {s} to {f}...')
|
||||
torch.hub.download_url_to_file(s, f)
|
||||
Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True) # create root
|
||||
unzip_file(f, path=DATASETS_DIR) # unzip
|
||||
Path(f).unlink() # remove zip
|
||||
r = None # success
|
||||
elif s.startswith('bash '): # bash script
|
||||
LOGGER.info(f'Running {s} ...')
|
||||
r = os.system(s)
|
||||
else: # python script
|
||||
r = exec(s, {'yaml': data}) # return None
|
||||
dt = f'({round(time.time() - t, 1)}s)'
|
||||
s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f"failure {dt} ❌"
|
||||
LOGGER.info(f"Dataset download {s}")
|
||||
check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf', progress=True) # download fonts
|
||||
return data # dictionary
|
||||
|
||||
|
||||
def check_dataset(dataset: str):
|
||||
data = Path.cwd() / "datasets" / dataset
|
||||
data_dir = data if data.is_dir() else (Path.cwd() / data)
|
||||
if not data_dir.is_dir():
|
||||
LOGGER.info(f'\nDataset not found ⚠️, missing path {data_dir}, attempting download...')
|
||||
t = time.time()
|
||||
if str(data) == 'imagenet':
|
||||
subprocess.run(f"bash {ROOT / 'data/scripts/get_imagenet.sh'}", shell=True, check=True)
|
||||
else:
|
||||
url = f'https://github.com/ultralytics/yolov5/releases/download/v1.0/{dataset}.zip'
|
||||
download(url, dir=data_dir.parent)
|
||||
s = f"Dataset download success ✅ ({time.time() - t:.1f}s), saved to {colorstr('bold', data_dir)}\n"
|
||||
LOGGER.info(s)
|
||||
train_set = data_dir / "train"
|
||||
test_set = data_dir / 'test' if (data_dir / 'test').exists() else data_dir / 'val' # data/test or data/val
|
||||
nc = len([x for x in (data_dir / 'train').glob('*') if x.is_dir()]) # number of classes
|
||||
names = [name for name in os.listdir(data_dir / 'train') if os.path.isdir(data_dir / 'train' / name)]
|
||||
data = {"train": train_set, "val": test_set, "nc": nc, "names": names}
|
||||
return data
|
||||
|
Reference in New Issue
Block a user