From a7c249e9c125564fb2cdfb35d4761e611337eb5c Mon Sep 17 00:00:00 2001 From: Semih Demirel <85176438+semihhdemirel@users.noreply.github.com> Date: Mon, 5 Jun 2023 19:57:03 +0300 Subject: [PATCH] [Example] YOLOv8-ONNXRuntime (#2992) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ayush Chaurasia --- examples/README.md | 1 + examples/YOLOv8-ONNXRuntime/README.md | 43 +++++ examples/YOLOv8-ONNXRuntime/main.py | 230 ++++++++++++++++++++++++++ 3 files changed, 274 insertions(+) create mode 100644 examples/YOLOv8-ONNXRuntime/README.md create mode 100644 examples/YOLOv8-ONNXRuntime/main.py diff --git a/examples/README.md b/examples/README.md index e77cebd..d2c27c1 100644 --- a/examples/README.md +++ b/examples/README.md @@ -10,6 +10,7 @@ This repository features a collection of real-world applications and walkthrough | [YOLO OpenCV ONNX Detection Python](./YOLOv8-OpenCV-ONNX-Python) | OpenCV/Python/ONNX | [Farid Inawan](https://github.com/frdteknikelektro) | | [YOLO .Net ONNX Detection C#](https://www.nuget.org/packages/Yolov8.Net) | C# .Net | [Samuel Stainback](https://github.com/sstainba) | | [YOLOv8 on NVIDIA Jetson(TensorRT and DeepStream)](https://wiki.seeedstudio.com/YOLOv8-DeepStream-TRT-Jetson/) | Python | [Lakshantha](https://github.com/lakshanthad) | +| [YOLOv8 ONNXRuntime Python](./YOLOv8-ONNXRuntime) | Python/ONNXRuntime | [Semih Demirel](https://github.com/semihhdemirel) | ### How to Contribute diff --git a/examples/YOLOv8-ONNXRuntime/README.md b/examples/YOLOv8-ONNXRuntime/README.md new file mode 100644 index 0000000..b206b2e --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime/README.md @@ -0,0 +1,43 @@ +# YOLOv8 - ONNX Runtime + +This project implements YOLOv8 using ONNX Runtime. + +## Installation + +To run this project, you need to install the required dependencies. The following instructions will guide you through the installation process. + +### Installing Required Dependencies + +You can install the required dependencies by running the following command: + +```bash +pip install -r requirements.txt +``` + +### Installing `onnxruntime-gpu` + +If you have an NVIDIA GPU and want to leverage GPU acceleration, you can install the onnxruntime-gpu package using the following command: + +```bash +pip install onnxruntime-gpu +``` + +Note: Make sure you have the appropriate GPU drivers installed on your system. + +### Installing `onnxruntime` (CPU version) + +If you don't have an NVIDIA GPU or prefer to use the CPU version of onnxruntime, you can install the onnxruntime package using the following command: + +```bash +pip install onnxruntime +``` + +### Usage + +After successfully installing the required packages, you can run the YOLOv8 implementation using the following command: + +```bash +python main.py --model yolov8n.onnx --img image.jpg --conf-thres 0.5 --iou-thres 0.5 +``` + +Make sure to replace yolov8n.onnx with the path to your YOLOv8 ONNX model file, image.jpg with the path to your input image, and adjust the confidence threshold (conf-thres) and IoU threshold (iou-thres) values as needed. diff --git a/examples/YOLOv8-ONNXRuntime/main.py b/examples/YOLOv8-ONNXRuntime/main.py new file mode 100644 index 0000000..9c59100 --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime/main.py @@ -0,0 +1,230 @@ +import argparse + +import cv2 +import numpy as np +import onnxruntime as ort +import torch + +from ultralytics.yolo.utils import ROOT, yaml_load +from ultralytics.yolo.utils.checks import check_requirements, check_yaml + + +class Yolov8: + + def __init__(self, onnx_model, input_image, confidence_thres, iou_thres): + """ + Initializes an instance of the Yolov8 class. + + Args: + onnx_model: Path to the ONNX model. + input_image: Path to the input image. + confidence_thres: Confidence threshold for filtering detections. + iou_thres: IoU (Intersection over Union) threshold for non-maximum suppression. + """ + self.onnx_model = onnx_model + self.input_image = input_image + self.confidence_thres = confidence_thres + self.iou_thres = iou_thres + + # Load the class names from the COCO dataset + self.classes = yaml_load(check_yaml('coco128.yaml'))['names'] + + # Generate a color palette for the classes + self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3)) + + def draw_detections(self, img, box, score, class_id): + """ + Draws bounding boxes and labels on the input image based on the detected objects. + + Args: + img: The input image to draw detections on. + box: Detected bounding box. + score: Corresponding detection score. + class_id: Class ID for the detected object. + + Returns: + None + """ + + # Extract the coordinates of the bounding box + x1, y1, w, h = box + + # Retrieve the color for the class ID + color = self.color_palette[class_id] + + # Draw the bounding box on the image + cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2) + + # Create the label text with class name and score + label = f'{self.classes[class_id]}: {score:.2f}' + + # Calculate the dimensions of the label text + (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) + + # Calculate the position of the label text + label_x = x1 + label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10 + + # Draw a filled rectangle as the background for the label text + cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, + cv2.FILLED) + + # Draw the label text on the image + cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA) + + def preprocess(self): + """ + Preprocesses the input image before performing inference. + + Returns: + image_data: Preprocessed image data ready for inference. + """ + # Read the input image using OpenCV + self.img = cv2.imread(self.input_image) + + # Get the height and width of the input image + self.img_height, self.img_width = self.img.shape[:2] + + # Convert the image color space from BGR to RGB + img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB) + + # Resize the image to match the input shape + img = cv2.resize(img, (self.input_width, self.input_height)) + + # Normalize the image data by dividing it by 255.0 + image_data = np.array(img) / 255.0 + + # Transpose the image to have the channel dimension as the first dimension + image_data = np.transpose(image_data, (2, 0, 1)) # Channel first + + # Expand the dimensions of the image data to match the expected input shape + image_data = np.expand_dims(image_data, axis=0).astype(np.float32) + + # Return the preprocessed image data + return image_data + + def postprocess(self, input_image, output): + """ + Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs. + + Args: + input_image (numpy.ndarray): The input image. + output (numpy.ndarray): The output of the model. + + Returns: + numpy.ndarray: The input image with detections drawn on it. + """ + + # Transpose and squeeze the output to match the expected shape + outputs = np.transpose(np.squeeze(output[0])) + + # Get the number of rows in the outputs array + rows = outputs.shape[0] + + # Lists to store the bounding boxes, scores, and class IDs of the detections + boxes = [] + scores = [] + class_ids = [] + + # Calculate the scaling factors for the bounding box coordinates + x_factor = self.img_width / self.input_width + y_factor = self.img_height / self.input_height + + # Iterate over each row in the outputs array + for i in range(rows): + # Extract the class scores from the current row + classes_scores = outputs[i][4:] + + # Find the maximum score among the class scores + max_score = np.amax(classes_scores) + + # If the maximum score is above the confidence threshold + if max_score >= self.confidence_thres: + # Get the class ID with the highest score + class_id = np.argmax(classes_scores) + + # Extract the bounding box coordinates from the current row + x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3] + + # Calculate the scaled coordinates of the bounding box + left = int((x - w / 2) * x_factor) + top = int((y - h / 2) * y_factor) + width = int(w * x_factor) + height = int(h * y_factor) + + # Add the class ID, score, and box coordinates to the respective lists + class_ids.append(class_id) + scores.append(max_score) + boxes.append([left, top, width, height]) + + # Apply non-maximum suppression to filter out overlapping bounding boxes + indices = cv2.dnn.NMSBoxes(boxes, scores, self.confidence_thres, self.iou_thres) + + # Iterate over the selected indices after non-maximum suppression + for i in indices: + # Get the box, score, and class ID corresponding to the index + box = boxes[i] + score = scores[i] + class_id = class_ids[i] + + # Draw the detection on the input image + self.draw_detections(input_image, box, score, class_id) + + # Return the modified input image + return input_image + + def main(self): + """ + Performs inference using an ONNX model and returns the output image with drawn detections. + + Returns: + output_img: The output image with drawn detections. + """ + # Create an inference session using the ONNX model and specify execution providers + session = ort.InferenceSession(self.onnx_model, providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) + + # Get the model inputs + model_inputs = session.get_inputs() + + # Store the shape of the input for later use + input_shape = model_inputs[0].shape + self.input_width = input_shape[2] + self.input_height = input_shape[3] + + # Preprocess the image data + img_data = self.preprocess() + + # Run inference using the preprocessed image data + outputs = session.run(None, {model_inputs[0].name: img_data}) + + # Perform post-processing on the outputs to obtain output image. + output_img = self.postprocess(self.img, outputs) + + # Return the resulting output image + return output_img + + +if __name__ == '__main__': + # Create an argument parser to handle command-line arguments + parser = argparse.ArgumentParser() + parser.add_argument('--model', type=str, default='yolov8n.onnx', help='Input your ONNX model.') + parser.add_argument('--img', type=str, default=str(ROOT / 'assets/bus.jpg'), help='Path to input image.') + parser.add_argument('--conf-thres', type=float, default=0.5, help='Confidence threshold') + parser.add_argument('--iou-thres', type=float, default=0.5, help='NMS IoU threshold') + args = parser.parse_args() + + # Check the requirements and select the appropriate backend (CPU or GPU) + check_requirements('onnxruntime-gpu' if torch.cuda.is_available() else 'onnxruntime') + + # Create an instance of the Yolov8 class with the specified arguments + detection = Yolov8(args.model, args.img, args.conf_thres, args.iou_thres) + + # Perform object detection and obtain the output image + output_image = detection.main() + + # Display the output image in a window + cv2.namedWindow('Output', cv2.WINDOW_NORMAL) + cv2.imshow('Output', output_image) + + # Wait for a key press to exit + cv2.waitKey(0)