ultralytics 8.0.41 TF SavedModel and EdgeTPU export (#1034)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Noobtoss <96134731+Noobtoss@users.noreply.github.com>
Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
This commit is contained in:
Glenn Jocher
2023-02-20 01:27:28 +01:00
committed by GitHub
parent 4b866c9718
commit f6e393c1d2
64 changed files with 604 additions and 351 deletions

View File

@ -28,13 +28,18 @@ def check_class_names(names):
if not all(isinstance(k, int) for k in names.keys()): # convert string keys to int, i.e. '0' to 0
names = {int(k): v for k, v in names.items()}
if isinstance(names[0], str) and names[0].startswith('n0'): # imagenet class codes, i.e. 'n01440764'
map = yaml_load(ROOT / 'yolo/data/datasets/ImageNet.yaml')['map'] # human-readable names
map = yaml_load(ROOT / 'datasets/ImageNet.yaml')['map'] # human-readable names
names = {k: map[v] for k, v in names.items()}
return names
class AutoBackend(nn.Module):
def _apply_default_class_names(self, data):
with contextlib.suppress(Exception):
return yaml_load(check_yaml(data))['names']
return {i: f'class{i}' for i in range(999)} # return default if above errors
def __init__(self, weights='yolov8n.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
"""
MultiBackend class for python inference on various platforms using Ultralytics YOLO.
@ -53,7 +58,7 @@ class AutoBackend(nn.Module):
| PyTorch | *.pt |
| TorchScript | *.torchscript |
| ONNX Runtime | *.onnx |
| ONNX OpenCV DNN | *.onnx --dnn |
| ONNX OpenCV DNN | *.onnx dnn=True |
| OpenVINO | *.xml |
| CoreML | *.mlmodel |
| TensorRT | *.engine |
@ -142,13 +147,9 @@ class AutoBackend(nn.Module):
logger = trt.Logger(trt.Logger.INFO)
# Read file
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
# Read metadata length
meta_len = int.from_bytes(f.read(4), byteorder='little')
# Read metadata
meta = json.loads(f.read(meta_len).decode('utf-8'))
stride, names = int(meta['stride']), meta['names']
# Read engine
model = runtime.deserialize_cuda_engine(f.read())
meta_len = int.from_bytes(f.read(4), byteorder='little') # read metadata length
meta = json.loads(f.read(meta_len).decode('utf-8')) # read metadata
model = runtime.deserialize_cuda_engine(f.read()) # read engine
context = model.create_execution_context()
bindings = OrderedDict()
output_names = []
@ -170,6 +171,7 @@ class AutoBackend(nn.Module):
bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size
stride, names = int(meta['stride']), meta['names']
elif coreml: # CoreML
LOGGER.info(f'Loading {w} for CoreML inference...')
import coremltools as ct
@ -179,6 +181,7 @@ class AutoBackend(nn.Module):
import tensorflow as tf
keras = False # assume TF1 saved_model
model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
w = Path(w) / 'metadata.yaml'
elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
import tensorflow as tf
@ -265,7 +268,7 @@ class AutoBackend(nn.Module):
# Check names
if 'names' not in locals(): # names missing
names = yaml_load(check_yaml(data))['names'] if data else {i: f'class{i}' for i in range(999)} # assign
names = self._apply_default_class_names(data)
names = check_class_names(names)
self.__dict__.update(locals()) # assign all variables to self
@ -324,7 +327,9 @@ class AutoBackend(nn.Module):
box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
else:
elif len(y) == 1: # classification model
y = list(y.values())
elif len(y) == 2: # segmentation model
y = list(reversed(y.values())) # reversed for segmentation models (pred, proto)
elif self.paddle: # PaddlePaddle
im = im.cpu().numpy().astype(np.float32)
@ -337,8 +342,14 @@ class AutoBackend(nn.Module):
im = im.cpu().numpy()
if self.saved_model: # SavedModel
y = self.model(im, training=False) if self.keras else self.model(im)
if not isinstance(y, list):
y = [y]
elif self.pb: # GraphDef
y = self.frozen_func(x=self.tf.constant(im))
if len(y) == 2 and len(self.names) == 999: # segments and names not defined
ip, ib = (0, 1) if len(y[0].shape) == 4 else (1, 0) # index of protos, boxes
nc = y[ib].shape[1] - y[ip].shape[3] - 4 # y = (1, 160, 160, 32), (1, 116, 8400)
self.names = {i: f'class{i}' for i in range(nc)}
else: # Lite or Edge TPU
input = self.input_details[0]
int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
@ -354,12 +365,16 @@ class AutoBackend(nn.Module):
scale, zero_point = output['quantization']
x = (x.astype(np.float32) - zero_point) * scale # re-scale
y.append(x)
# TF segment fixes: export is reversed vs ONNX export and protos are transposed
if len(self.output_details) == 2: # segment
y = [y[1], np.transpose(y[0], (0, 3, 1, 2))]
# TF segment fixes: export is reversed vs ONNX export and protos are transposed
if len(y) == 2: # segment with (det, proto) output order reversed
if len(y[1].shape) != 4:
y = list(reversed(y)) # should be y = (1, 116, 8400), (1, 160, 160, 32)
y[1] = np.transpose(y[1], (0, 3, 1, 2)) # should be y = (1, 116, 8400), (1, 32, 160, 160)
y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
# y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels
# for x in y:
# print(type(x), len(x)) if isinstance(x, (list, tuple)) else print(type(x), x.shape) # debug shapes
if isinstance(y, (list, tuple)):
return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
else:
@ -375,7 +390,7 @@ class AutoBackend(nn.Module):
Returns:
(torch.Tensor): The converted tensor
"""
return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
def warmup(self, imgsz=(1, 3, 640, 640)):
"""

View File

@ -79,7 +79,7 @@ class AutoShape(nn.Module):
with amp.autocast(autocast):
return self.model(ims.to(p.device).type_as(p), augment=augment) # inference
# Pre-process
# Preprocess
n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
for i, im in enumerate(ims):
@ -108,7 +108,7 @@ class AutoShape(nn.Module):
with dt[1]:
y = self.model(x, augment=augment) # forward
# Post-process
# Postprocess
with dt[2]:
y = non_max_suppression(y if self.dmb else y[0],
self.conf,
@ -181,7 +181,7 @@ class Detections:
self.ims[i] = np.asarray(im)
if pprint:
s = s.lstrip('\n')
return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
return f'{s}\nSpeed: %.1fms preprocess, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
if crop:
if save:
LOGGER.info(f'Saved results to {save_dir}\n')

View File

@ -174,7 +174,7 @@ class C2(nn.Module):
self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)))
def forward(self, x):
a, b = self.cv1(x).split((self.c, self.c), 1)
a, b = self.cv1(x).chunk(2, 1)
return self.cv2(torch.cat((self.m(a), b), 1))
@ -188,6 +188,11 @@ class C2f(nn.Module):
self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
def forward(self, x):
y = list(self.cv1(x).chunk(2, 1))
y.extend(m(y[-1]) for m in self.m)
return self.cv2(torch.cat(y, 1))
def forward_split(self, x):
y = list(self.cv1(x).split((self.c, self.c), 1))
y.extend(m(y[-1]) for m in self.m)
return self.cv2(torch.cat(y, 1))
@ -405,7 +410,12 @@ class Detect(nn.Module):
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
self.shape = shape
box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1)
if self.export and self.format == 'edgetpu': # FlexSplitV ops issue
x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
box = x_cat[:, :self.reg_max * 4]
cls = x_cat[:, self.reg_max * 4:]
else:
box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1)
dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
y = torch.cat((dbox, cls.sigmoid()), 1)
return y if self.export else (y, x)

View File

@ -1,6 +1,5 @@
# Ultralytics YOLO 🚀, GPL-3.0 license
import ast
import contextlib
from copy import deepcopy
from pathlib import Path
@ -338,7 +337,7 @@ def torch_safe_load(weight):
file = attempt_download_asset(weight) # search online if missing locally
try:
return torch.load(file, map_location='cpu') # load
return torch.load(file, map_location='cpu'), file # load
except ModuleNotFoundError as e:
if e.name == 'omegaconf': # e.name is missing module name
LOGGER.warning(f'WARNING ⚠️ {weight} requires {e.name}, which is not in ultralytics requirements.'
@ -347,7 +346,7 @@ def torch_safe_load(weight):
f'download updated models from https://github.com/ultralytics/assets/releases/tag/v0.0.0')
if e.name != 'models':
check_requirements(e.name) # install missing module
return torch.load(file, map_location='cpu') # load
return torch.load(file, map_location='cpu'), file # load
def attempt_load_weights(weights, device=None, inplace=True, fuse=False):
@ -355,13 +354,13 @@ def attempt_load_weights(weights, device=None, inplace=True, fuse=False):
ensemble = Ensemble()
for w in weights if isinstance(weights, list) else [weights]:
ckpt = torch_safe_load(w) # load ckpt
ckpt, w = torch_safe_load(w) # load ckpt
args = {**DEFAULT_CFG_DICT, **ckpt['train_args']} # combine model and default args, preferring model args
model = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model
# Model compatibility updates
model.args = args # attach args to model
model.pt_path = weights # attach *.pt file path to model
model.pt_path = w # attach *.pt file path to model
model.task = guess_model_task(model)
if not hasattr(model, 'stride'):
model.stride = torch.tensor([32.])
@ -392,7 +391,7 @@ def attempt_load_weights(weights, device=None, inplace=True, fuse=False):
def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
# Loads a single model weights
ckpt = torch_safe_load(weight) # load ckpt
ckpt, weight = torch_safe_load(weight) # load ckpt
args = {**DEFAULT_CFG_DICT, **ckpt['train_args']} # combine model and default args, preferring model args
model = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model