From a7c249e9c125564fb2cdfb35d4761e611337eb5c Mon Sep 17 00:00:00 2001
From: Semih Demirel <85176438+semihhdemirel@users.noreply.github.com>
Date: Mon, 5 Jun 2023 19:57:03 +0300
Subject: [PATCH] [Example] YOLOv8-ONNXRuntime (#2992)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
---
 examples/README.md                    |   1 +
 examples/YOLOv8-ONNXRuntime/README.md |  43 +++++
 examples/YOLOv8-ONNXRuntime/main.py   | 230 ++++++++++++++++++++++++++
 3 files changed, 274 insertions(+)
 create mode 100644 examples/YOLOv8-ONNXRuntime/README.md
 create mode 100644 examples/YOLOv8-ONNXRuntime/main.py

diff --git a/examples/README.md b/examples/README.md
index e77cebd..d2c27c1 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -10,6 +10,7 @@ This repository features a collection of real-world applications and walkthrough
 | [YOLO OpenCV ONNX Detection Python](./YOLOv8-OpenCV-ONNX-Python)                                               | OpenCV/Python/ONNX | [Farid Inawan](https://github.com/frdteknikelektro) |
 | [YOLO .Net ONNX Detection C#](https://www.nuget.org/packages/Yolov8.Net)                                       | C# .Net            | [Samuel Stainback](https://github.com/sstainba)     |
 | [YOLOv8 on NVIDIA Jetson(TensorRT and DeepStream)](https://wiki.seeedstudio.com/YOLOv8-DeepStream-TRT-Jetson/) | Python             | [Lakshantha](https://github.com/lakshanthad)        |
+| [YOLOv8 ONNXRuntime Python](./YOLOv8-ONNXRuntime)                                                              | Python/ONNXRuntime | [Semih Demirel](https://github.com/semihhdemirel)   |
 
 ### How to Contribute
 
diff --git a/examples/YOLOv8-ONNXRuntime/README.md b/examples/YOLOv8-ONNXRuntime/README.md
new file mode 100644
index 0000000..b206b2e
--- /dev/null
+++ b/examples/YOLOv8-ONNXRuntime/README.md
@@ -0,0 +1,43 @@
+# YOLOv8 - ONNX Runtime
+
+This project implements YOLOv8 using ONNX Runtime.
+
+## Installation
+
+To run this project, you need to install the required dependencies. The following instructions will guide you through the installation process.
+
+### Installing Required Dependencies
+
+You can install the required dependencies by running the following command:
+
+```bash
+pip install -r requirements.txt
+```
+
+### Installing `onnxruntime-gpu`
+
+If you have an NVIDIA GPU and want to leverage GPU acceleration, you can install the onnxruntime-gpu package using the following command:
+
+```bash
+pip install onnxruntime-gpu
+```
+
+Note: Make sure you have the appropriate GPU drivers installed on your system.
+
+### Installing `onnxruntime` (CPU version)
+
+If you don't have an NVIDIA GPU or prefer to use the CPU version of onnxruntime, you can install the onnxruntime package using the following command:
+
+```bash
+pip install onnxruntime
+```
+
+### Usage
+
+After successfully installing the required packages, you can run the YOLOv8 implementation using the following command:
+
+```bash
+python main.py --model yolov8n.onnx --img image.jpg --conf-thres 0.5 --iou-thres 0.5
+```
+
+Make sure to replace yolov8n.onnx with the path to your YOLOv8 ONNX model file, image.jpg with the path to your input image, and adjust the confidence threshold (conf-thres) and IoU threshold (iou-thres) values as needed.
diff --git a/examples/YOLOv8-ONNXRuntime/main.py b/examples/YOLOv8-ONNXRuntime/main.py
new file mode 100644
index 0000000..9c59100
--- /dev/null
+++ b/examples/YOLOv8-ONNXRuntime/main.py
@@ -0,0 +1,230 @@
+import argparse
+
+import cv2
+import numpy as np
+import onnxruntime as ort
+import torch
+
+from ultralytics.yolo.utils import ROOT, yaml_load
+from ultralytics.yolo.utils.checks import check_requirements, check_yaml
+
+
+class Yolov8:
+
+    def __init__(self, onnx_model, input_image, confidence_thres, iou_thres):
+        """
+        Initializes an instance of the Yolov8 class.
+
+        Args:
+            onnx_model: Path to the ONNX model.
+            input_image: Path to the input image.
+            confidence_thres: Confidence threshold for filtering detections.
+            iou_thres: IoU (Intersection over Union) threshold for non-maximum suppression.
+        """
+        self.onnx_model = onnx_model
+        self.input_image = input_image
+        self.confidence_thres = confidence_thres
+        self.iou_thres = iou_thres
+
+        # Load the class names from the COCO dataset
+        self.classes = yaml_load(check_yaml('coco128.yaml'))['names']
+
+        # Generate a color palette for the classes
+        self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))
+
+    def draw_detections(self, img, box, score, class_id):
+        """
+        Draws bounding boxes and labels on the input image based on the detected objects.
+
+        Args:
+            img: The input image to draw detections on.
+            box: Detected bounding box.
+            score: Corresponding detection score.
+            class_id: Class ID for the detected object.
+
+        Returns:
+            None
+        """
+
+        # Extract the coordinates of the bounding box
+        x1, y1, w, h = box
+
+        # Retrieve the color for the class ID
+        color = self.color_palette[class_id]
+
+        # Draw the bounding box on the image
+        cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
+
+        # Create the label text with class name and score
+        label = f'{self.classes[class_id]}: {score:.2f}'
+
+        # Calculate the dimensions of the label text
+        (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+
+        # Calculate the position of the label text
+        label_x = x1
+        label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
+
+        # Draw a filled rectangle as the background for the label text
+        cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color,
+                      cv2.FILLED)
+
+        # Draw the label text on the image
+        cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
+
+    def preprocess(self):
+        """
+        Preprocesses the input image before performing inference.
+
+        Returns:
+            image_data: Preprocessed image data ready for inference.
+        """
+        # Read the input image using OpenCV
+        self.img = cv2.imread(self.input_image)
+
+        # Get the height and width of the input image
+        self.img_height, self.img_width = self.img.shape[:2]
+
+        # Convert the image color space from BGR to RGB
+        img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB)
+
+        # Resize the image to match the input shape
+        img = cv2.resize(img, (self.input_width, self.input_height))
+
+        # Normalize the image data by dividing it by 255.0
+        image_data = np.array(img) / 255.0
+
+        # Transpose the image to have the channel dimension as the first dimension
+        image_data = np.transpose(image_data, (2, 0, 1))  # Channel first
+
+        # Expand the dimensions of the image data to match the expected input shape
+        image_data = np.expand_dims(image_data, axis=0).astype(np.float32)
+
+        # Return the preprocessed image data
+        return image_data
+
+    def postprocess(self, input_image, output):
+        """
+        Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs.
+
+        Args:
+            input_image (numpy.ndarray): The input image.
+            output (numpy.ndarray): The output of the model.
+
+        Returns:
+            numpy.ndarray: The input image with detections drawn on it.
+        """
+
+        # Transpose and squeeze the output to match the expected shape
+        outputs = np.transpose(np.squeeze(output[0]))
+
+        # Get the number of rows in the outputs array
+        rows = outputs.shape[0]
+
+        # Lists to store the bounding boxes, scores, and class IDs of the detections
+        boxes = []
+        scores = []
+        class_ids = []
+
+        # Calculate the scaling factors for the bounding box coordinates
+        x_factor = self.img_width / self.input_width
+        y_factor = self.img_height / self.input_height
+
+        # Iterate over each row in the outputs array
+        for i in range(rows):
+            # Extract the class scores from the current row
+            classes_scores = outputs[i][4:]
+
+            # Find the maximum score among the class scores
+            max_score = np.amax(classes_scores)
+
+            # If the maximum score is above the confidence threshold
+            if max_score >= self.confidence_thres:
+                # Get the class ID with the highest score
+                class_id = np.argmax(classes_scores)
+
+                # Extract the bounding box coordinates from the current row
+                x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3]
+
+                # Calculate the scaled coordinates of the bounding box
+                left = int((x - w / 2) * x_factor)
+                top = int((y - h / 2) * y_factor)
+                width = int(w * x_factor)
+                height = int(h * y_factor)
+
+                # Add the class ID, score, and box coordinates to the respective lists
+                class_ids.append(class_id)
+                scores.append(max_score)
+                boxes.append([left, top, width, height])
+
+        # Apply non-maximum suppression to filter out overlapping bounding boxes
+        indices = cv2.dnn.NMSBoxes(boxes, scores, self.confidence_thres, self.iou_thres)
+
+        # Iterate over the selected indices after non-maximum suppression
+        for i in indices:
+            # Get the box, score, and class ID corresponding to the index
+            box = boxes[i]
+            score = scores[i]
+            class_id = class_ids[i]
+
+            # Draw the detection on the input image
+            self.draw_detections(input_image, box, score, class_id)
+
+        # Return the modified input image
+        return input_image
+
+    def main(self):
+        """
+        Performs inference using an ONNX model and returns the output image with drawn detections.
+
+        Returns:
+            output_img: The output image with drawn detections.
+        """
+        # Create an inference session using the ONNX model and specify execution providers
+        session = ort.InferenceSession(self.onnx_model, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
+
+        # Get the model inputs
+        model_inputs = session.get_inputs()
+
+        # Store the shape of the input for later use
+        input_shape = model_inputs[0].shape
+        self.input_width = input_shape[2]
+        self.input_height = input_shape[3]
+
+        # Preprocess the image data
+        img_data = self.preprocess()
+
+        # Run inference using the preprocessed image data
+        outputs = session.run(None, {model_inputs[0].name: img_data})
+
+        # Perform post-processing on the outputs to obtain output image.
+        output_img = self.postprocess(self.img, outputs)
+
+        # Return the resulting output image
+        return output_img
+
+
+if __name__ == '__main__':
+    # Create an argument parser to handle command-line arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model', type=str, default='yolov8n.onnx', help='Input your ONNX model.')
+    parser.add_argument('--img', type=str, default=str(ROOT / 'assets/bus.jpg'), help='Path to input image.')
+    parser.add_argument('--conf-thres', type=float, default=0.5, help='Confidence threshold')
+    parser.add_argument('--iou-thres', type=float, default=0.5, help='NMS IoU threshold')
+    args = parser.parse_args()
+
+    # Check the requirements and select the appropriate backend (CPU or GPU)
+    check_requirements('onnxruntime-gpu' if torch.cuda.is_available() else 'onnxruntime')
+
+    # Create an instance of the Yolov8 class with the specified arguments
+    detection = Yolov8(args.model, args.img, args.conf_thres, args.iou_thres)
+
+    # Perform object detection and obtain the output image
+    output_image = detection.main()
+
+    # Display the output image in a window
+    cv2.namedWindow('Output', cv2.WINDOW_NORMAL)
+    cv2.imshow('Output', output_image)
+
+    # Wait for a key press to exit
+    cv2.waitKey(0)