Rename img_size to imgsz (#86)

This commit is contained in:
Glenn Jocher
2022-12-24 00:39:09 +01:00
committed by GitHub
parent ae2443c210
commit 6432afc5f9
25 changed files with 98 additions and 98 deletions

View File

@ -51,7 +51,7 @@ def check_anchors(dataset, model, thr=4.0, imgsz=640):
else:
LOGGER.info(f'{s}Anchors are a poor fit to dataset ⚠️, attempting to improve...')
na = m.anchors.numel() // 2 # number of anchors
anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
anchors = kmean_anchors(dataset, n=na, imgsz=imgsz, thr=thr, gen=1000, verbose=False)
new_bpr = metric(anchors)[0]
if new_bpr > bpr: # replace anchors
anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
@ -64,13 +64,13 @@ def check_anchors(dataset, model, thr=4.0, imgsz=640):
LOGGER.info(s)
def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
def kmean_anchors(dataset='./data/coco128.yaml', n=9, imgsz=640, thr=4.0, gen=1000, verbose=True):
""" Creates kmeans-evolved anchors from training dataset
Arguments:
dataset: path to data.yaml, or a loaded dataset
n: number of anchors
img_size: image size used for training
imgsz: image size used for training
thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
gen: generations to evolve anchors using genetic algorithm
verbose: print all results
@ -101,7 +101,7 @@ def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen
x, best = metric(k, wh0)
bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr
s = f'{PREFIX}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr\n' \
f'{PREFIX}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' \
f'{PREFIX}n={n}, imgsz={imgsz}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' \
f'past_thr={x[x > thr].mean():.3f}-mean: '
for x in k:
s += '%i,%i, ' % (round(x[0]), round(x[1]))
@ -116,7 +116,7 @@ def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen
dataset = BaseDataset(data_dict['train'], augment=True, rect=True)
# Get label wh
shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
# Filter
@ -135,7 +135,7 @@ def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen
assert n == len(k) # kmeans may return fewer points than requested if wh is insufficient or too similar
except Exception:
LOGGER.warning(f'{PREFIX}WARNING ⚠️ switching strategies from kmeans to random init')
k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init
k = np.sort(npr.rand(n * 2)).reshape(n, 2) * imgsz # random init
wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0))
k = print_results(k, verbose=False)

View File

@ -10,7 +10,7 @@ model: null # i.e. yolov5s.pt, yolo.yaml
data: null # i.e. coco128.yaml
epochs: 300
batch_size: 16
img_size: 640
imgsz: 640
nosave: False
cache: False # True/ram, disk or False
device: '' # cuda device, i.e. 0 or 0,1,2,3 or cpu

View File

@ -51,8 +51,8 @@ class BaseModel(nn.Module):
self.info()
return self
def info(self, verbose=False, img_size=640): # print model information
model_info(self, verbose, img_size)
def info(self, verbose=False, imgsz=640): # print model information
model_info(self, verbose, imgsz)
def _apply(self, fn):
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
@ -117,7 +117,7 @@ class DetectionModel(BaseModel):
return self._forward_once(x, profile, visualize) # single-scale inference, train
def _forward_augment(self, x):
img_size = x.shape[-2:] # height, width
imgsz = x.shape[-2:] # height, width
s = [1, 0.83, 0.67] # scales
f = [None, 3, None] # flips (2-ud, 3-lr)
y = [] # outputs
@ -125,25 +125,25 @@ class DetectionModel(BaseModel):
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
yi = self._forward_once(xi)[0] # forward
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
yi = self._descale_pred(yi, fi, si, img_size)
yi = self._descale_pred(yi, fi, si, imgsz)
y.append(yi)
y = self._clip_augmented(y) # clip augmented tails
return torch.cat(y, 1), None # augmented inference, train
def _descale_pred(self, p, flips, scale, img_size):
def _descale_pred(self, p, flips, scale, imgsz):
# de-scale predictions following augmented inference (inverse operation)
if self.inplace:
p[..., :4] /= scale # de-scale
if flips == 2:
p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
p[..., 1] = imgsz[0] - p[..., 1] # de-flip ud
elif flips == 3:
p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
p[..., 0] = imgsz[1] - p[..., 0] # de-flip lr
else:
x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
if flips == 2:
y = img_size[0] - y # de-flip ud
y = imgsz[0] - y # de-flip ud
elif flips == 3:
x = img_size[1] - x # de-flip lr
x = imgsz[1] - x # de-flip lr
p = torch.cat((x, y, wh, p[..., 4:]), -1)
return p

View File

@ -124,7 +124,7 @@ def fuse_conv_and_bn(conv, bn):
def model_info(model, verbose=False, imgsz=640):
# Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
# Model information. imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320]
n_p = get_num_params(model)
n_g = get_num_gradients(model) # number gradients
if verbose:
@ -185,11 +185,11 @@ def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416)
return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean
def check_img_size(imgsz, s=32, floor=0):
def check_imgsz(imgsz, s=32, floor=0):
# Verify image size is a multiple of stride s in each dimension
if isinstance(imgsz, int): # integer i.e. img_size=640
if isinstance(imgsz, int): # integer i.e. imgsz=640
new_size = max(make_divisible(imgsz, int(s)), floor)
else: # list i.e. img_size=[640, 480]
else: # list i.e. imgsz=[640, 480]
imgsz = list(imgsz) # convert to list if tuple
new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz]
if new_size != imgsz: