From d92dce3a46b7b91a2128e8b20327f3a7c4587084 Mon Sep 17 00:00:00 2001
From: Samuel Dobron <xdobro23@stud.fit.vut.cz>
Date: Mon, 29 Apr 2024 20:57:53 +0200
Subject: [PATCH] augmentations for 16bit grayscaled

---
 ultralytics/data/augment.py             | 13 +++++++------
 ultralytics/data/dataset.py             |  1 -
 ultralytics/models/yolo/detect/train.py |  2 +-
 ultralytics/models/yolo/detect/val.py   |  2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/ultralytics/data/augment.py b/ultralytics/data/augment.py
index f0e5e44..7cffdf8 100644
--- a/ultralytics/data/augment.py
+++ b/ultralytics/data/augment.py
@@ -163,7 +163,7 @@ class Mosaic(BaseMixTransform):
 
             # Place img in img4
             if i == 0:  # top left
-                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                img4 = np.full((s * 2, s * 2, 1), 114, dtype=np.float32)  # base image with 4 tiles
                 x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
                 x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
             elif i == 1:  # top right
@@ -176,7 +176,7 @@ class Mosaic(BaseMixTransform):
                 x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
                 x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
 
-            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+            img4[y1a:y2a, x1a:x2a, 0] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
             padw = x1a - x1b
             padh = y1a - y1b
 
@@ -199,7 +199,7 @@ class Mosaic(BaseMixTransform):
 
             # Place img in img9
             if i == 0:  # center
-                img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                img9 = np.full((s * 3, s * 3, 1), 114, dtype=np.float32)  # base image with 4 tiles
                 h0, w0 = h, w
                 c = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinates
             elif i == 1:  # top
@@ -223,7 +223,7 @@ class Mosaic(BaseMixTransform):
             x1, y1, x2, y2 = (max(x, 0) for x in c)  # allocate coords
 
             # Image
-            img9[y1:y2, x1:x2] = img[y1 - padh:, x1 - padw:]  # img9[ymin:ymax, xmin:xmax]
+            img9[y1:y2, x1:x2, 0] = img[y1 - padh:, x1 - padw:]  # img9[ymin:ymax, xmin:xmax]
             hp, wp = h, w  # height, width previous for next iteration
 
             # Labels assuming imgsz*2 mosaic size
@@ -485,6 +485,7 @@ class RandomHSV:
     def __call__(self, labels):
         """Applies random horizontal or vertical flip to an image with a given probability."""
         img = labels['img']
+        return labels  # TODO:
         if self.hgain or self.sgain or self.vgain:
             r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1  # random gains
             hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
@@ -712,7 +713,7 @@ class Format:
     def __call__(self, labels):
         """Return formatted image, classes, bounding boxes & keypoints to be used by 'collate_fn'."""
         img = labels.pop('img')
-        h, w = img.shape[:2]
+        h, w = img.shape[0], img.shape[1]
         cls = labels.pop('cls')
         instances = labels.pop('instances')
         instances.convert_bbox(format=self.bbox_format)
@@ -906,5 +907,5 @@ class ToTensor:
         im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1])  # HWC to CHW -> BGR to RGB -> contiguous
         im = torch.from_numpy(im)  # to torch
         im = im.half() if self.half else im.float()  # uint8 to fp16/32
-        im /= 255.0  # 0-255 to 0.0-1.0
+        im /= 65535
         return im
diff --git a/ultralytics/data/dataset.py b/ultralytics/data/dataset.py
index 041d8eb..6d62039 100644
--- a/ultralytics/data/dataset.py
+++ b/ultralytics/data/dataset.py
@@ -145,7 +145,6 @@ class YOLODataset(BaseDataset):
     # TODO: use hyp config to set all these augmentations
     def build_transforms(self, hyp=None):
         """Builds and appends transforms to the list."""
-        self.augment = False
         if self.augment:
             hyp.mosaic = hyp.mosaic if self.augment and not self.rect else 0.0
             hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0
diff --git a/ultralytics/models/yolo/detect/train.py b/ultralytics/models/yolo/detect/train.py
index 56d9243..7f183e9 100644
--- a/ultralytics/models/yolo/detect/train.py
+++ b/ultralytics/models/yolo/detect/train.py
@@ -53,7 +53,7 @@ class DetectionTrainer(BaseTrainer):
 
     def preprocess_batch(self, batch):
         """Preprocesses a batch of images by scaling and converting to float."""
-        batch['img'] = batch['img'].to(self.device, non_blocking=True).float() / 255
+        batch['img'] = batch['img'].to(self.device, non_blocking=True).float() / 65535.0  # uint16 to float16
         return batch
 
     def set_model_attributes(self):
diff --git a/ultralytics/models/yolo/detect/val.py b/ultralytics/models/yolo/detect/val.py
index 6199f77..f979455 100644
--- a/ultralytics/models/yolo/detect/val.py
+++ b/ultralytics/models/yolo/detect/val.py
@@ -44,7 +44,7 @@ class DetectionValidator(BaseValidator):
     def preprocess(self, batch):
         """Preprocesses batch of images for YOLO training."""
         batch['img'] = batch['img'].to(self.device, non_blocking=True)
-        batch['img'] = (batch['img'].half() if self.args.half else batch['img'].float()) / 255
+        batch['img'] = (batch['img'].half() if self.args.half else batch['img'].float()) / 65535.0  # uint16 to float16
         for k in ['batch_idx', 'cls', 'bboxes']:
             batch[k] = batch[k].to(self.device)