ultralytics 8.0.32 HUB and TensorFlow fixes (#870)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Glenn Jocher
2023-02-09 01:47:34 +04:00
committed by GitHub
parent f5d003d05a
commit c9893810c7
14 changed files with 118 additions and 85 deletions

View File

@ -28,7 +28,6 @@ class BaseDataset(Dataset):
self,
img_path,
imgsz=640,
label_path=None,
cache=False,
augment=True,
hyp=None,
@ -42,7 +41,6 @@ class BaseDataset(Dataset):
super().__init__()
self.img_path = img_path
self.imgsz = imgsz
self.label_path = label_path
self.augment = augment
self.single_cls = single_cls
self.prefix = prefix

View File

@ -61,7 +61,7 @@ def seed_worker(worker_id):
random.seed(worker_seed)
def build_dataloader(cfg, batch_size, img_path, stride=32, rect=False, label_path=None, rank=-1, mode="train"):
def build_dataloader(cfg, batch, img_path, stride=32, rect=False, names=None, rank=-1, mode="train"):
assert mode in ["train", "val"]
shuffle = mode == "train"
if cfg.rect and shuffle:
@ -70,9 +70,8 @@ def build_dataloader(cfg, batch_size, img_path, stride=32, rect=False, label_pat
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
dataset = YOLODataset(
img_path=img_path,
label_path=label_path,
imgsz=cfg.imgsz,
batch_size=batch_size,
batch_size=batch,
augment=mode == "train", # augmentation
hyp=cfg, # TODO: probably add a get_hyps_from_cfg function
rect=cfg.rect or rect, # rectangular batches
@ -82,18 +81,19 @@ def build_dataloader(cfg, batch_size, img_path, stride=32, rect=False, label_pat
pad=0.0 if mode == "train" else 0.5,
prefix=colorstr(f"{mode}: "),
use_segments=cfg.task == "segment",
use_keypoints=cfg.task == "keypoint")
use_keypoints=cfg.task == "keypoint",
names=names)
batch_size = min(batch_size, len(dataset))
batch = min(batch, len(dataset))
nd = torch.cuda.device_count() # number of CUDA devices
workers = cfg.workers if mode == "train" else cfg.workers * 2
nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers
nw = min([os.cpu_count() // max(nd, 1), batch if batch > 1 else 0, workers]) # number of workers
sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
loader = DataLoader if cfg.image_weights or cfg.close_mosaic else InfiniteDataLoader # allow attribute updates
generator = torch.Generator()
generator.manual_seed(6148914691236517205 + RANK)
return loader(dataset=dataset,
batch_size=batch_size,
batch_size=batch,
shuffle=shuffle and sampler is None,
num_workers=nw,
sampler=sampler,

View File

@ -14,7 +14,7 @@ from .utils import HELP_URL, LOCAL_RANK, get_hash, img2label_paths, verify_image
class YOLODataset(BaseDataset):
cache_version = 1.0 # dataset labels *.cache version, >= 1.0 for YOLOv8
cache_version = '1.0.1' # dataset labels *.cache version, >= 1.0.0 for YOLOv8
rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4]
"""YOLO Dataset.
Args:
@ -22,28 +22,26 @@ class YOLODataset(BaseDataset):
prefix (str): prefix.
"""
def __init__(
self,
img_path,
imgsz=640,
label_path=None,
cache=False,
augment=True,
hyp=None,
prefix="",
rect=False,
batch_size=None,
stride=32,
pad=0.0,
single_cls=False,
use_segments=False,
use_keypoints=False,
):
def __init__(self,
img_path,
imgsz=640,
cache=False,
augment=True,
hyp=None,
prefix="",
rect=False,
batch_size=None,
stride=32,
pad=0.0,
single_cls=False,
use_segments=False,
use_keypoints=False,
names=None):
self.use_segments = use_segments
self.use_keypoints = use_keypoints
self.names = names
assert not (self.use_segments and self.use_keypoints), "Can not use both segments and keypoints."
super().__init__(img_path, imgsz, label_path, cache, augment, hyp, prefix, rect, batch_size, stride, pad,
single_cls)
super().__init__(img_path, imgsz, cache, augment, hyp, prefix, rect, batch_size, stride, pad, single_cls)
def cache_labels(self, path=Path("./labels.cache")):
# Cache dataset labels, check images and read shapes
@ -56,7 +54,7 @@ class YOLODataset(BaseDataset):
with ThreadPool(NUM_THREADS) as pool:
results = pool.imap(func=verify_image_label,
iterable=zip(self.im_files, self.label_files, repeat(self.prefix),
repeat(self.use_keypoints)))
repeat(self.use_keypoints), repeat(len(self.names))))
pbar = tqdm(results, desc=desc, total=total, bar_format=TQDM_BAR_FORMAT)
for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar:
nm += nm_f

View File

@ -61,7 +61,7 @@ def exif_size(img):
def verify_image_label(args):
# Verify one image-label pair
im_file, lb_file, prefix, keypoint = args
im_file, lb_file, prefix, keypoint, num_cls = args
# number (missing, found, empty, corrupt), message, segments, keypoints
nm, nf, ne, nc, msg, segments, keypoints = 0, 0, 0, 0, "", [], None
try:
@ -97,16 +97,20 @@ def verify_image_label(args):
assert (lb[:, 6::3] <= 1).all(), "non-normalized or out of bounds coordinate labels"
kpts = np.zeros((lb.shape[0], 39))
for i in range(len(lb)):
kpt = np.delete(lb[i, 5:], np.arange(2, lb.shape[1] - 5,
3)) # remove the occlusion parameter from the GT
kpt = np.delete(lb[i, 5:], np.arange(2, lb.shape[1] - 5, 3)) # remove occlusion param from GT
kpts[i] = np.hstack((lb[i, :5], kpt))
lb = kpts
assert lb.shape[1] == 39, "labels require 39 columns each after removing occlusion parameter"
else:
assert lb.shape[1] == 5, f"labels require 5 columns, {lb.shape[1]} columns detected"
assert (lb >= 0).all(), f"negative label values {lb[lb < 0]}"
assert (lb[:, 1:] <=
1).all(), f"non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}"
assert (lb[:, 1:] <= 1).all(), \
f"non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}"
# All labels
max_cls = int(lb[:, 0].max()) # max label count
assert max_cls <= num_cls, \
f'Label class {max_cls} exceeds dataset class count {num_cls}. ' \
f'Possible class labels are 0-{num_cls - 1}'
assert (lb >= 0).all(), f"negative label values {lb[lb < 0]}"
_, i = np.unique(lb, axis=0, return_index=True)
if len(i) < nl: # duplicate row check
lb = lb[i] # remove duplicates
@ -192,8 +196,8 @@ def check_det_dataset(dataset, autodownload=True):
# Download (optional)
extract_dir = ''
if isinstance(data, (str, Path)) and (is_zipfile(data) or is_tarfile(data)):
download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False, threads=1)
data = next((DATASETS_DIR / Path(data).stem).rglob('*.yaml'))
new_dir = safe_download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False)
data = next((DATASETS_DIR / new_dir).rglob('*.yaml'))
extract_dir, autodownload = data.parent, False
# Read yaml (optional)