ultralytics 8.0.41 TF SavedModel and EdgeTPU export (#1034)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Noobtoss <96134731+Noobtoss@users.noreply.github.com> Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
2023-02-20 01:27:28 +01:00
parent 4b866c9718
commit f6e393c1d2
64 changed files with 604 additions and 351 deletions
--- a/ultralytics/nn/autobackend.py
+++ b/ultralytics/nn/autobackend.py
@ -28,13 +28,18 @@ def check_class_names(names):
        if not all(isinstance(k, int) for k in names.keys()):  # convert string keys to int, i.e. '0' to 0
            names = {int(k): v for k, v in names.items()}
        if isinstance(names[0], str) and names[0].startswith('n0'):  # imagenet class codes, i.e. 'n01440764'
-            map = yaml_load(ROOT / 'yolo/data/datasets/ImageNet.yaml')['map']  # human-readable names
+            map = yaml_load(ROOT / 'datasets/ImageNet.yaml')['map']  # human-readable names
            names = {k: map[v] for k, v in names.items()}
    return names


 class AutoBackend(nn.Module):

+    def _apply_default_class_names(self, data):
+        with contextlib.suppress(Exception):
+            return yaml_load(check_yaml(data))['names']
+        return {i: f'class{i}' for i in range(999)}  # return default if above errors
+
    def __init__(self, weights='yolov8n.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
        """
        MultiBackend class for python inference on various platforms using Ultralytics YOLO.
@ -53,7 +58,7 @@ class AutoBackend(nn.Module):
            | PyTorch               | *.pt             |
            | TorchScript           | *.torchscript    |
            | ONNX Runtime          | *.onnx           |
-            | ONNX OpenCV DNN       | *.onnx --dnn     |
+            | ONNX OpenCV DNN       | *.onnx dnn=True  |
            | OpenVINO              | *.xml            |
            | CoreML                | *.mlmodel        |
            | TensorRT              | *.engine         |
@ -142,13 +147,9 @@ class AutoBackend(nn.Module):
            logger = trt.Logger(trt.Logger.INFO)
            # Read file
            with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
-                # Read metadata length
-                meta_len = int.from_bytes(f.read(4), byteorder='little')
-                # Read metadata
-                meta = json.loads(f.read(meta_len).decode('utf-8'))
-                stride, names = int(meta['stride']), meta['names']
-                # Read engine
-                model = runtime.deserialize_cuda_engine(f.read())
+                meta_len = int.from_bytes(f.read(4), byteorder='little')  # read metadata length
+                meta = json.loads(f.read(meta_len).decode('utf-8'))  # read metadata
+                model = runtime.deserialize_cuda_engine(f.read())  # read engine
            context = model.create_execution_context()
            bindings = OrderedDict()
            output_names = []
@ -170,6 +171,7 @@ class AutoBackend(nn.Module):
                bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
            binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
            batch_size = bindings['images'].shape[0]  # if dynamic, this is instead max batch size
+            stride, names = int(meta['stride']), meta['names']
        elif coreml:  # CoreML
            LOGGER.info(f'Loading {w} for CoreML inference...')
            import coremltools as ct
@ -179,6 +181,7 @@ class AutoBackend(nn.Module):
            import tensorflow as tf
            keras = False  # assume TF1 saved_model
            model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
+            w = Path(w) / 'metadata.yaml'
        elif pb:  # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
            LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
            import tensorflow as tf
@ -265,7 +268,7 @@ class AutoBackend(nn.Module):

        # Check names
        if 'names' not in locals():  # names missing
-            names = yaml_load(check_yaml(data))['names'] if data else {i: f'class{i}' for i in range(999)}  # assign
+            names = self._apply_default_class_names(data)
        names = check_class_names(names)

        self.__dict__.update(locals())  # assign all variables to self
@ -324,7 +327,9 @@ class AutoBackend(nn.Module):
                box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]])  # xyxy pixels
                conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
                y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
-            else:
+            elif len(y) == 1:  # classification model
+                y = list(y.values())
+            elif len(y) == 2:  # segmentation model
                y = list(reversed(y.values()))  # reversed for segmentation models (pred, proto)
        elif self.paddle:  # PaddlePaddle
            im = im.cpu().numpy().astype(np.float32)
@ -337,8 +342,14 @@ class AutoBackend(nn.Module):
            im = im.cpu().numpy()
            if self.saved_model:  # SavedModel
                y = self.model(im, training=False) if self.keras else self.model(im)
+                if not isinstance(y, list):
+                    y = [y]
            elif self.pb:  # GraphDef
                y = self.frozen_func(x=self.tf.constant(im))
+                if len(y) == 2 and len(self.names) == 999:  # segments and names not defined
+                    ip, ib = (0, 1) if len(y[0].shape) == 4 else (1, 0)  # index of protos, boxes
+                    nc = y[ib].shape[1] - y[ip].shape[3] - 4  # y = (1, 160, 160, 32), (1, 116, 8400)
+                    self.names = {i: f'class{i}' for i in range(nc)}
            else:  # Lite or Edge TPU
                input = self.input_details[0]
                int8 = input['dtype'] == np.uint8  # is TFLite quantized uint8 model
@ -354,12 +365,16 @@ class AutoBackend(nn.Module):
                        scale, zero_point = output['quantization']
                        x = (x.astype(np.float32) - zero_point) * scale  # re-scale
                    y.append(x)
-                # TF segment fixes: export is reversed vs ONNX export and protos are transposed
-                if len(self.output_details) == 2:  # segment
-                    y = [y[1], np.transpose(y[0], (0, 3, 1, 2))]
+            # TF segment fixes: export is reversed vs ONNX export and protos are transposed
+            if len(y) == 2:  # segment with (det, proto) output order reversed
+                if len(y[1].shape) != 4:
+                    y = list(reversed(y))  # should be y = (1, 116, 8400), (1, 160, 160, 32)
+                y[1] = np.transpose(y[1], (0, 3, 1, 2))  # should be y = (1, 116, 8400), (1, 32, 160, 160)
            y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
            # y[0][..., :4] *= [w, h, w, h]  # xywh normalized to pixels

+        # for x in y:
+        #     print(type(x), len(x)) if isinstance(x, (list, tuple)) else print(type(x), x.shape)  # debug shapes
        if isinstance(y, (list, tuple)):
            return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
        else:
@ -375,7 +390,7 @@ class AutoBackend(nn.Module):
         Returns:
             (torch.Tensor): The converted tensor
         """
-        return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
+        return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x

    def warmup(self, imgsz=(1, 3, 640, 640)):
        """
--- a/ultralytics/nn/autoshape.py
+++ b/ultralytics/nn/autoshape.py
@ -79,7 +79,7 @@ class AutoShape(nn.Module):
                with amp.autocast(autocast):
                    return self.model(ims.to(p.device).type_as(p), augment=augment)  # inference

-            # Pre-process
+            # Preprocess
            n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims])  # number, list of images
            shape0, shape1, files = [], [], []  # image and inference shapes, filenames
            for i, im in enumerate(ims):
@ -108,7 +108,7 @@ class AutoShape(nn.Module):
            with dt[1]:
                y = self.model(x, augment=augment)  # forward

-            # Post-process
+            # Postprocess
            with dt[2]:
                y = non_max_suppression(y if self.dmb else y[0],
                                        self.conf,
@ -181,7 +181,7 @@ class Detections:
                self.ims[i] = np.asarray(im)
        if pprint:
            s = s.lstrip('\n')
-            return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
+            return f'{s}\nSpeed: %.1fms preprocess, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
        if crop:
            if save:
                LOGGER.info(f'Saved results to {save_dir}\n')
--- a/ultralytics/nn/modules.py
+++ b/ultralytics/nn/modules.py
@ -174,7 +174,7 @@ class C2(nn.Module):
        self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)))

    def forward(self, x):
-        a, b = self.cv1(x).split((self.c, self.c), 1)
+        a, b = self.cv1(x).chunk(2, 1)
        return self.cv2(torch.cat((self.m(a), b), 1))


@ -188,6 +188,11 @@ class C2f(nn.Module):
        self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))

    def forward(self, x):
+        y = list(self.cv1(x).chunk(2, 1))
+        y.extend(m(y[-1]) for m in self.m)
+        return self.cv2(torch.cat(y, 1))
+
+    def forward_split(self, x):
        y = list(self.cv1(x).split((self.c, self.c), 1))
        y.extend(m(y[-1]) for m in self.m)
        return self.cv2(torch.cat(y, 1))
@ -405,7 +410,12 @@ class Detect(nn.Module):
            self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
            self.shape = shape

-        box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1)
+        if self.export and self.format == 'edgetpu':  # FlexSplitV ops issue
+            x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
+            box = x_cat[:, :self.reg_max * 4]
+            cls = x_cat[:, self.reg_max * 4:]
+        else:
+            box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1)
        dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
        y = torch.cat((dbox, cls.sigmoid()), 1)
        return y if self.export else (y, x)
--- a/ultralytics/nn/tasks.py
+++ b/ultralytics/nn/tasks.py
@ -1,6 +1,5 @@
 # Ultralytics YOLO 🚀, GPL-3.0 license

-import ast
 import contextlib
 from copy import deepcopy
 from pathlib import Path
@ -338,7 +337,7 @@ def torch_safe_load(weight):

    file = attempt_download_asset(weight)  # search online if missing locally
    try:
-        return torch.load(file, map_location='cpu')  # load
+        return torch.load(file, map_location='cpu'), file  # load
    except ModuleNotFoundError as e:
        if e.name == 'omegaconf':  # e.name is missing module name
            LOGGER.warning(f'WARNING ⚠️ {weight} requires {e.name}, which is not in ultralytics requirements.'
@ -347,7 +346,7 @@ def torch_safe_load(weight):
                           f'download updated models from https://github.com/ultralytics/assets/releases/tag/v0.0.0')
        if e.name != 'models':
            check_requirements(e.name)  # install missing module
-        return torch.load(file, map_location='cpu')  # load
+        return torch.load(file, map_location='cpu'), file  # load


 def attempt_load_weights(weights, device=None, inplace=True, fuse=False):
@ -355,13 +354,13 @@ def attempt_load_weights(weights, device=None, inplace=True, fuse=False):

    ensemble = Ensemble()
    for w in weights if isinstance(weights, list) else [weights]:
-        ckpt = torch_safe_load(w)  # load ckpt
+        ckpt, w = torch_safe_load(w)  # load ckpt
        args = {**DEFAULT_CFG_DICT, **ckpt['train_args']}  # combine model and default args, preferring model args
        model = (ckpt.get('ema') or ckpt['model']).to(device).float()  # FP32 model

        # Model compatibility updates
        model.args = args  # attach args to model
-        model.pt_path = weights  # attach *.pt file path to model
+        model.pt_path = w  # attach *.pt file path to model
        model.task = guess_model_task(model)
        if not hasattr(model, 'stride'):
            model.stride = torch.tensor([32.])
@ -392,7 +391,7 @@ def attempt_load_weights(weights, device=None, inplace=True, fuse=False):

 def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
    # Loads a single model weights
-    ckpt = torch_safe_load(weight)  # load ckpt
+    ckpt, weight = torch_safe_load(weight)  # load ckpt
    args = {**DEFAULT_CFG_DICT, **ckpt['train_args']}  # combine model and default args, preferring model args
    model = (ckpt.get('ema') or ckpt['model']).to(device).float()  # FP32 model