diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py
index 31be6f2..6654862 100644
--- a/ultralytics/engine/exporter.py
+++ b/ultralytics/engine/exporter.py
@@ -593,14 +593,43 @@ class Exporter:
         f_onnx, _ = self.export_onnx()
 
         # Export to TF
-        int8 = '-oiqt -qt per-tensor' if self.args.int8 else ''
-        cmd = f'onnx2tf -i "{f_onnx}" -o "{f}" -nuo --non_verbose {int8}'
-        LOGGER.info(f"\n{prefix} running '{cmd}'")
+        tmp_file = f / 'tmp_tflite_int8_calibration_images.npy'  # int8 calibration images file
+        if self.args.int8:
+            if self.args.data:
+                import numpy as np
+
+                from ultralytics.data.dataset import YOLODataset
+                from ultralytics.data.utils import check_det_dataset
+
+                # Generate calibration data for integer quantization
+                LOGGER.info(f"{prefix} collecting INT8 calibration images from 'data={self.args.data}'")
+                dataset = YOLODataset(check_det_dataset(self.args.data)['val'], imgsz=self.imgsz[0], augment=False)
+                images = []
+                n_images = 100  # maximum number of images
+                for n, batch in enumerate(dataset):
+                    if n >= n_images:
+                        break
+                    im = batch['img'].permute(1, 2, 0)[None]  # list to nparray, CHW to BHWC,
+                    images.append(im)
+                f.mkdir()
+                images = torch.cat(images, 0).float()
+                # mean = images.view(-1, 3).mean(0)  # imagenet mean [123.675, 116.28, 103.53]
+                # std = images.view(-1, 3).std(0)  # imagenet std [58.395, 57.12, 57.375]
+                np.save(str(tmp_file), images.numpy())  # BHWC
+                int8 = f'-oiqt -qt per-tensor -cind images "{tmp_file}" "[[[[0, 0, 0]]]]" "[[[[255, 255, 255]]]]"'
+            else:
+                int8 = '-oiqt -qt per-tensor'
+        else:
+            int8 = ''
+
+        cmd = f'onnx2tf -i "{f_onnx}" -o "{f}" -nuo --non_verbose {int8}'.strip()
+        LOGGER.info(f"{prefix} running '{cmd}'")
         subprocess.run(cmd, shell=True)
         yaml_save(f / 'metadata.yaml', self.metadata)  # add metadata.yaml
 
         # Remove/rename TFLite models
         if self.args.int8:
+            tmp_file.unlink(missing_ok=True)
             for file in f.rglob('*_dynamic_range_quant.tflite'):
                 file.rename(file.with_name(file.stem.replace('_dynamic_range_quant', '_int8') + file.suffix))
             for file in f.rglob('*_integer_quant_with_int16_act.tflite'):
diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py
index 5ff1147..9ca82f9 100644
--- a/ultralytics/engine/model.py
+++ b/ultralytics/engine/model.py
@@ -343,6 +343,8 @@ class YOLO:
             overrides['imgsz'] = self.model.args['imgsz']  # use trained imgsz unless custom value is passed
         if 'batch' not in kwargs:
             overrides['batch'] = 1  # default to 1 if not modified
+        if 'data' not in kwargs:
+            overrides['data'] = None  # default to None if not modified (avoid int8 calibration with coco.yaml)
         args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides)
         args.task = self.task
         return Exporter(overrides=args, _callbacks=self.callbacks)(model=self.model)
diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py
index d409268..df41dec 100644
--- a/ultralytics/nn/autobackend.py
+++ b/ultralytics/nn/autobackend.py
@@ -400,21 +400,21 @@ class AutoBackend(nn.Module):
                     nc = y[ib].shape[1] - y[ip].shape[3] - 4  # y = (1, 160, 160, 32), (1, 116, 8400)
                     self.names = {i: f'class{i}' for i in range(nc)}
             else:  # Lite or Edge TPU
-                input = self.input_details[0]
-                int8 = input['dtype'] == np.int8  # is TFLite quantized int8 model
-                if int8:
-                    scale, zero_point = input['quantization']
-                    im = (im / scale + zero_point).astype(np.int8)  # de-scale
-                self.interpreter.set_tensor(input['index'], im)
+                details = self.input_details[0]
+                integer = details['dtype'] in (np.int8, np.int16)  # is TFLite quantized int8 or int16 model
+                if integer:
+                    scale, zero_point = details['quantization']
+                    im = (im / scale + zero_point).astype(details['dtype'])  # de-scale
+                self.interpreter.set_tensor(details['index'], im)
                 self.interpreter.invoke()
                 y = []
                 for output in self.output_details:
                     x = self.interpreter.get_tensor(output['index'])
-                    if int8:
+                    if integer:
                         scale, zero_point = output['quantization']
                         x = (x.astype(np.float32) - zero_point) * scale  # re-scale
                     if x.ndim > 2:  # if task is not classification
-                        # Unnormalize xywh with input image size
+                        # Denormalize xywh with input image size
                         # xywh are normalized in TFLite/EdgeTPU to mitigate quantization error of integer models
                         # See this PR for details: https://github.com/ultralytics/ultralytics/pull/1695
                         x[:, 0] *= w