Fix dataloader2 (#35)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
single_channel
Laughing 2 years ago committed by GitHub
parent 523eff99e2
commit c617ee1c79
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -26,4 +26,4 @@ flipud: 0.0 # image flip up-down (probability)
fliplr: 0.5 # image flip left-right (probability) fliplr: 0.5 # image flip left-right (probability)
mosaic: 1.0 # image mosaic (probability) mosaic: 1.0 # image mosaic (probability)
mixup: 0.0 # image mixup (probability) mixup: 0.0 # image mixup (probability)
copy_paste: 0.0 # segment copy-paste (probability) copy_paste: 0.5 # segment copy-paste (probability)

@ -67,23 +67,25 @@ def plot_keypoint(img, keypoints, color, tl):
with open("ultralytics/tests/data/dataloader/hyp_test.yaml") as f: with open("ultralytics/tests/data/dataloader/hyp_test.yaml") as f:
hyp = OmegaConf.load(f) hyp = OmegaConf.load(f)
dataloader, dataset = build_dataloader(
def test(augment, rect):
dataloader, _ = build_dataloader(
img_path="/d/dataset/COCO/images/val2017", img_path="/d/dataset/COCO/images/val2017",
img_size=640, img_size=640,
label_path=None, label_path=None,
cache=False, cache=False,
hyp=hyp, hyp=hyp,
augment=False, augment=augment,
prefix="", prefix="",
rect=False, rect=rect,
batch_size=4, batch_size=4,
stride=32, stride=32,
pad=0.5, pad=0.5,
use_segments=False, use_segments=False,
use_keypoints=True, use_keypoints=True,
) )
for d in dataloader: for d in dataloader:
idx = 1 # show which image inside one batch idx = 1 # show which image inside one batch
img = d["img"][idx].numpy() img = d["img"][idx].numpy()
img = np.ascontiguousarray(img.transpose(1, 2, 0)) img = np.ascontiguousarray(img.transpose(1, 2, 0))
@ -108,7 +110,17 @@ for d in dataloader:
y2 = y + h / 2 y2 = y + h / 2
c = int(cls[i][0]) c = int(cls[i][0])
# print(x1, y1, x2, y2) # print(x1, y1, x2, y2)
plot_one_box([int(x1), int(y1), int(x2), int(y2)], img, keypoints=keypoints[i], label=f"{c}", color=colors(c)) plot_one_box([int(x1), int(y1), int(x2), int(y2)],
img,
keypoints=keypoints[i],
label=f"{c}",
color=colors(c))
cv2.imshow("p", img) cv2.imshow("p", img)
if cv2.waitKey(0) == ord("q"): if cv2.waitKey(0) == ord("q"):
break break
if __name__ == "__main__":
test(augment=True, rect=False)
test(augment=False, rect=True)
test(augment=False, rect=False)

@ -55,23 +55,33 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=None):
with open("ultralytics/tests/data/dataloader/hyp_test.yaml") as f: with open("ultralytics/tests/data/dataloader/hyp_test.yaml") as f:
hyp = OmegaConf.load(f) hyp = OmegaConf.load(f)
dataloader, dataset = build_dataloader(
def test(augment, rect):
dataloader, _ = build_dataloader(
img_path="/d/dataset/COCO/coco128-seg/images", img_path="/d/dataset/COCO/coco128-seg/images",
img_size=640, img_size=640,
label_path=None, label_path=None,
cache=False, cache=False,
hyp=hyp, hyp=hyp,
augment=False, augment=augment,
prefix="", prefix="",
rect=False, rect=rect,
batch_size=4, batch_size=4,
stride=32, stride=32,
pad=0.5, pad=0.5,
use_segments=True, use_segments=True,
use_keypoints=False, use_keypoints=False,
) )
for d in dataloader:
# info
im_file = d["im_file"]
ori_shape = d["ori_shape"]
resize_shape = d["resized_shape"]
print(ori_shape, resize_shape)
print(im_file)
for d in dataloader: # labels
idx = 1 # show which image inside one batch idx = 1 # show which image inside one batch
img = d["img"][idx].numpy() img = d["img"][idx].numpy()
img = np.ascontiguousarray(img.transpose(1, 2, 0)) img = np.ascontiguousarray(img.transpose(1, 2, 0))
@ -110,3 +120,9 @@ for d in dataloader:
cv2.imshow("p", img) cv2.imshow("p", img)
if cv2.waitKey(0) == ord("q"): if cv2.waitKey(0) == ord("q"):
break break
if __name__ == "__main__":
test(augment=True, rect=False)
test(augment=False, rect=True)
test(augment=False, rect=False)

@ -184,7 +184,7 @@ class Mosaic(BaseMixTransform):
cls.append(labels["cls"]) cls.append(labels["cls"])
instances.append(labels["instances"]) instances.append(labels["instances"])
final_labels = { final_labels = {
"ori_shape": (self.img_size * 2, self.img_size * 2), "ori_shape": mosaic_labels[0]["ori_shape"],
"resized_shape": (self.img_size * 2, self.img_size * 2), "resized_shape": (self.img_size * 2, self.img_size * 2),
"im_file": mosaic_labels[0]["im_file"], "im_file": mosaic_labels[0]["im_file"],
"cls": np.concatenate(cls, 0)} "cls": np.concatenate(cls, 0)}
@ -351,7 +351,7 @@ class RandomPerspective:
""" """
img = labels["img"] img = labels["img"]
cls = labels["cls"] cls = labels["cls"]
instances = labels["instances"] instances = labels.pop("instances")
# make sure the coord formats are right # make sure the coord formats are right
instances.convert_bbox(format="xyxy") instances.convert_bbox(format="xyxy")
instances.denormalize(*img.shape[:2][::-1]) instances.denormalize(*img.shape[:2][::-1])
@ -372,6 +372,7 @@ class RandomPerspective:
if keypoints is not None: if keypoints is not None:
keypoints = self.apply_keypoints(keypoints, M) keypoints = self.apply_keypoints(keypoints, M)
new_instances = Instances(bboxes, segments, keypoints, bbox_format="xyxy", normalized=False) new_instances = Instances(bboxes, segments, keypoints, bbox_format="xyxy", normalized=False)
# clip
new_instances.clip(*self.size) new_instances.clip(*self.size)
# filter instances # filter instances
@ -381,9 +382,9 @@ class RandomPerspective:
box2=new_instances.bboxes.T, box2=new_instances.bboxes.T,
area_thr=0.01 if len(segments) else 0.10) area_thr=0.01 if len(segments) else 0.10)
labels["instances"] = new_instances[i] labels["instances"] = new_instances[i]
# clip
labels["cls"] = cls[i] labels["cls"] = cls[i]
labels["img"] = img labels["img"] = img
labels["resized_shape"] = img.shape[:2]
return labels return labels
def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n) def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
@ -430,7 +431,7 @@ class RandomFlip:
def __call__(self, labels): def __call__(self, labels):
img = labels["img"] img = labels["img"]
instances = labels["instances"] instances = labels.pop("instances")
instances.convert_bbox(format="xywh") instances.convert_bbox(format="xywh")
h, w = img.shape[:2] h, w = img.shape[:2]
h = 1 if instances.normalized else h h = 1 if instances.normalized else h
@ -439,13 +440,11 @@ class RandomFlip:
# Flip up-down # Flip up-down
if self.direction == "vertical" and random.random() < self.p: if self.direction == "vertical" and random.random() < self.p:
img = np.flipud(img) img = np.flipud(img)
img = np.ascontiguousarray(img)
instances.flipud(h) instances.flipud(h)
if self.direction == "horizontal" and random.random() < self.p: if self.direction == "horizontal" and random.random() < self.p:
img = np.fliplr(img) img = np.fliplr(img)
img = np.ascontiguousarray(img)
instances.fliplr(w) instances.fliplr(w)
labels["img"] = img labels["img"] = np.ascontiguousarray(img)
labels["instances"] = instances labels["instances"] = instances
return labels return labels
@ -463,7 +462,7 @@ class LetterBox:
def __call__(self, labels={}, image=None): def __call__(self, labels={}, image=None):
img = image or labels["img"] img = image or labels["img"]
shape = img.shape[:2] # current shape [height, width] shape = img.shape[:2] # current shape [height, width]
new_shape = labels.get("rect_shape", self.new_shape) new_shape = labels.pop("rect_shape", self.new_shape)
if isinstance(new_shape, int): if isinstance(new_shape, int):
new_shape = (new_shape, new_shape) new_shape = (new_shape, new_shape)
@ -495,6 +494,7 @@ class LetterBox:
labels = self._update_labels(labels, ratio, dw, dh) labels = self._update_labels(labels, ratio, dw, dh)
labels["img"] = img labels["img"] = img
labels["resized_shape"] = new_shape
return labels return labels
def _update_labels(self, labels, ratio, padw, padh): def _update_labels(self, labels, ratio, padw, padh):
@ -515,26 +515,21 @@ class CopyPaste:
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy) # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
im = labels["img"] im = labels["img"]
cls = labels["cls"] cls = labels["cls"]
bboxes = labels["instances"].bboxes instances = labels.pop("instances")
segments = labels["instances"].segments # n, 1000, 2 instances.convert_bbox(format="xyxy")
keypoints = labels["instances"].keypoints if self.p and len(instances.segments):
if self.p and len(segments): n = len(instances)
n = len(segments)
h, w, _ = im.shape # height, width, channels h, w, _ = im.shape # height, width, channels
im_new = np.zeros(im.shape, np.uint8) im_new = np.zeros(im.shape, np.uint8)
# TODO: this implement can be parallel since segments are ndarray, also might work with Instances inside j = random.sample(range(n), k=round(self.p * n))
for j in random.sample(range(n), k=round(self.p * n)): c, instance = cls[j], instances[j]
c, b, s = cls[j], bboxes[j], segments[j] instance.fliplr(w)
box = w - b[2], b[1], w - b[0], b[3] ioa = bbox_ioa(instance.bboxes, instances.bboxes) # intersection over area, (N, M)
ioa = bbox_ioa(box, bboxes) # intersection over area i = (ioa < 0.30).all(1) # (N, )
if (ioa < 0.30).all(): # allow 30% obscuration of existing labels if i.sum():
bboxes = np.concatenate((bboxes, [box]), 0) cls = np.concatenate((cls, c[i]), axis=0)
cls = np.concatenate((cls, c[None]), axis=0) instances = Instances.concatenate((instances, instance[i]), axis=0)
segments = np.concatenate((segments, np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)[None]), 0) cv2.drawContours(im_new, instances.segments[j][i].astype(np.int32), -1, (255, 255, 255), cv2.FILLED)
if keypoints is not None:
keypoints = np.concatenate(
(keypoints, np.concatenate((w - keypoints[j][:, 0:1], keypoints[j][:, 1:2]), 1)), 0)
cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
result = cv2.bitwise_and(src1=im, src2=im_new) result = cv2.bitwise_and(src1=im, src2=im_new)
result = cv2.flip(result, 1) # augment segments (flip left-right) result = cv2.flip(result, 1) # augment segments (flip left-right)
@ -543,7 +538,7 @@ class CopyPaste:
im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug
labels["img"] = im labels["img"] = im
labels["cls"] = cls labels["cls"] = cls
labels["instances"].update(bboxes, segments, keypoints) labels["instances"] = instances
return labels return labels

@ -252,23 +252,36 @@ class Instances:
) )
def flipud(self, h): def flipud(self, h):
# this function may not be very logical, just for clean code when using augment flipud if self._bboxes.format == "xyxy":
y1 = self.bboxes[:, 1].copy()
y2 = self.bboxes[:, 3].copy()
self.bboxes[:, 1] = h - y2
self.bboxes[:, 3] = h - y1
else:
self.bboxes[:, 1] = h - self.bboxes[:, 1] self.bboxes[:, 1] = h - self.bboxes[:, 1]
self.segments[..., 1] = h - self.segments[..., 1] self.segments[..., 1] = h - self.segments[..., 1]
if self.keypoints is not None: if self.keypoints is not None:
self.keypoints[..., 1] = h - self.keypoints[..., 1] self.keypoints[..., 1] = h - self.keypoints[..., 1]
def fliplr(self, w): def fliplr(self, w):
# this function may not be very logical, just for clean code when using augment fliplr if self._bboxes.format == "xyxy":
x1 = self.bboxes[:, 0].copy()
x2 = self.bboxes[:, 2].copy()
self.bboxes[:, 0] = w - x2
self.bboxes[:, 2] = w - x1
else:
self.bboxes[:, 0] = w - self.bboxes[:, 0] self.bboxes[:, 0] = w - self.bboxes[:, 0]
self.segments[..., 0] = w - self.segments[..., 0] self.segments[..., 0] = w - self.segments[..., 0]
if self.keypoints is not None: if self.keypoints is not None:
self.keypoints[..., 0] = w - self.keypoints[..., 0] self.keypoints[..., 0] = w - self.keypoints[..., 0]
def clip(self, w, h): def clip(self, w, h):
ori_format = self._bboxes.format
self.convert_bbox(format="xyxy") self.convert_bbox(format="xyxy")
self.bboxes[:, [0, 2]] = self.bboxes[:, [0, 2]].clip(0, w) self.bboxes[:, [0, 2]] = self.bboxes[:, [0, 2]].clip(0, w)
self.bboxes[:, [1, 3]] = self.bboxes[:, [1, 3]].clip(0, h) self.bboxes[:, [1, 3]] = self.bboxes[:, [1, 3]].clip(0, h)
if ori_format != "xyxy":
self.convert_bbox(format=ori_format)
self.segments[..., 0] = self.segments[..., 0].clip(0, w) self.segments[..., 0] = self.segments[..., 0].clip(0, w)
self.segments[..., 1] = self.segments[..., 1].clip(0, h) self.segments[..., 1] = self.segments[..., 1].clip(0, h)
if self.keypoints is not None: if self.keypoints is not None:

@ -14,18 +14,18 @@ def box_area(box):
def bbox_ioa(box1, box2, eps=1e-7): def bbox_ioa(box1, box2, eps=1e-7):
"""Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2 """Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2
box1: np.array of shape(4) box1: np.array of shape(nx4)
box2: np.array of shape(nx4) box2: np.array of shape(mx4)
returns: np.array of shape(n) returns: np.array of shape(nxm)
""" """
# Get the coordinates of bounding boxes # Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1 b1_x1, b1_y1, b1_x2, b1_y2 = box1.T
b2_x1, b2_y1, b2_x2, b2_y2 = box2.T b2_x1, b2_y1, b2_x2, b2_y2 = box2.T
# Intersection area # Intersection area
inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \ inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \
(np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0) (np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0)
# box2 area # box2 area
box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps

Loading…
Cancel
Save