|
|
@ -83,16 +83,23 @@ class AutoBackend(nn.Module):
|
|
|
|
nn_module = isinstance(weights, torch.nn.Module)
|
|
|
|
nn_module = isinstance(weights, torch.nn.Module)
|
|
|
|
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, ncnn, triton = \
|
|
|
|
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, ncnn, triton = \
|
|
|
|
self._model_type(w)
|
|
|
|
self._model_type(w)
|
|
|
|
fp16 &= pt or jit or onnx or engine or nn_module or triton # FP16
|
|
|
|
fp16 &= pt or jit or onnx or xml or engine or nn_module or triton # FP16
|
|
|
|
nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH)
|
|
|
|
nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH)
|
|
|
|
stride = 32 # default stride
|
|
|
|
stride = 32 # default stride
|
|
|
|
model, metadata = None, None
|
|
|
|
model, metadata = None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Set device
|
|
|
|
cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA
|
|
|
|
cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA
|
|
|
|
|
|
|
|
if cuda and not any([nn_module, pt, jit, engine]): # GPU dataloader formats
|
|
|
|
|
|
|
|
device = torch.device('cpu')
|
|
|
|
|
|
|
|
cuda = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Download if not local
|
|
|
|
if not (pt or triton or nn_module):
|
|
|
|
if not (pt or triton or nn_module):
|
|
|
|
w = attempt_download_asset(w) # download if not local
|
|
|
|
w = attempt_download_asset(w)
|
|
|
|
|
|
|
|
|
|
|
|
# NOTE: special case: in-memory pytorch model
|
|
|
|
# Load model
|
|
|
|
if nn_module:
|
|
|
|
if nn_module: # in-memory PyTorch model
|
|
|
|
model = weights.to(device)
|
|
|
|
model = weights.to(device)
|
|
|
|
model = model.fuse(verbose=verbose) if fuse else model
|
|
|
|
model = model.fuse(verbose=verbose) if fuse else model
|
|
|
|
if hasattr(model, 'kpt_shape'):
|
|
|
|
if hasattr(model, 'kpt_shape'):
|
|
|
@ -269,14 +276,13 @@ class AutoBackend(nn.Module):
|
|
|
|
net.load_model(str(w.with_suffix('.bin')))
|
|
|
|
net.load_model(str(w.with_suffix('.bin')))
|
|
|
|
metadata = w.parent / 'metadata.yaml'
|
|
|
|
metadata = w.parent / 'metadata.yaml'
|
|
|
|
elif triton: # NVIDIA Triton Inference Server
|
|
|
|
elif triton: # NVIDIA Triton Inference Server
|
|
|
|
LOGGER.info('Triton Inference Server not supported...')
|
|
|
|
"""TODO
|
|
|
|
'''
|
|
|
|
|
|
|
|
TODO:
|
|
|
|
|
|
|
|
check_requirements('tritonclient[all]')
|
|
|
|
check_requirements('tritonclient[all]')
|
|
|
|
from utils.triton import TritonRemoteModel
|
|
|
|
from utils.triton import TritonRemoteModel
|
|
|
|
model = TritonRemoteModel(url=w)
|
|
|
|
model = TritonRemoteModel(url=w)
|
|
|
|
nhwc = model.runtime.startswith("tensorflow")
|
|
|
|
nhwc = model.runtime.startswith("tensorflow")
|
|
|
|
'''
|
|
|
|
"""
|
|
|
|
|
|
|
|
raise NotImplementedError('Triton Inference Server is not currently supported.')
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
from ultralytics.yolo.engine.exporter import export_formats
|
|
|
|
from ultralytics.yolo.engine.exporter import export_formats
|
|
|
|
raise TypeError(f"model='{w}' is not a supported model format. "
|
|
|
|
raise TypeError(f"model='{w}' is not a supported model format. "
|
|
|
|