From fd5be10c663d51e0054bcff2c45c62af80ae4708 Mon Sep 17 00:00:00 2001
From: JustasBart <40023722+JustasBart@users.noreply.github.com>
Date: Mon, 13 Feb 2023 08:47:24 +0000
Subject: [PATCH] Add YOLOv8 ONNX C++ inference Example (#856)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
---
 examples/README.md                           |   6 +-
 examples/Yolov8_CPP_Inference/CMakeLists.txt |  28 +++
 examples/Yolov8_CPP_Inference/README.md      |  53 ++++++
 examples/Yolov8_CPP_Inference/inference.cpp  | 185 +++++++++++++++++++
 examples/Yolov8_CPP_Inference/inference.h    |  52 ++++++
 examples/Yolov8_CPP_Inference/main.cpp       |  70 +++++++
 examples/inventory-management/README.md      |   1 -
 7 files changed, 391 insertions(+), 4 deletions(-)
 create mode 100644 examples/Yolov8_CPP_Inference/CMakeLists.txt
 create mode 100644 examples/Yolov8_CPP_Inference/README.md
 create mode 100644 examples/Yolov8_CPP_Inference/inference.cpp
 create mode 100644 examples/Yolov8_CPP_Inference/inference.h
 create mode 100644 examples/Yolov8_CPP_Inference/main.cpp
 delete mode 100644 examples/inventory-management/README.md

diff --git a/examples/README.md b/examples/README.md
index 1f185da..2c3367d 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -2,9 +2,9 @@ This is a list of real-world applications and walkthroughs. These can be folders
 
 ## Ultralytics YOLO example applications
 
-| Title                                                              | Format          | Contributor                               |
-| ------------------------------------------------------------------ | --------------- | ----------------------------------------- |
-| \[Dummy\] [Inventory managenet using YOLO](./inventory-management) | python/notebook | [AyushExel](https://github.com/AyushExel) |
+| Title                                                           | Format   | Contributor                                       |
+| --------------------------------------------------------------- | -------- | ------------------------------------------------- |
+| [Yolov8/yolov5 ONNX Inference with C++](./Yolov8_CPP_Inference) | C++/ONNX | [Justas Bartnykas](https://github.com/JustasBart) |
 
 ## How can you contribute ?
 
diff --git a/examples/Yolov8_CPP_Inference/CMakeLists.txt b/examples/Yolov8_CPP_Inference/CMakeLists.txt
new file mode 100644
index 0000000..bc2f33f
--- /dev/null
+++ b/examples/Yolov8_CPP_Inference/CMakeLists.txt
@@ -0,0 +1,28 @@
+cmake_minimum_required(VERSION 3.5)
+
+project(Yolov8CPPInference VERSION 0.1)
+
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+
+# CUDA
+set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda")
+find_package(CUDA 11 REQUIRED)
+
+set(CMAKE_CUDA_STANDARD 11)
+set(CMAKE_CUDA_STANDARD_REQUIRED ON)
+# !CUDA
+
+# OpenCV
+find_package(OpenCV REQUIRED)
+include_directories(${OpenCV_INCLUDE_DIRS})
+# !OpenCV
+
+set(PROJECT_SOURCES
+    main.cpp
+
+    inference.h
+    inference.cpp
+)
+
+add_executable(Yolov8CPPInference ${PROJECT_SOURCES})
+target_link_libraries(Yolov8CPPInference ${OpenCV_LIBS})
diff --git a/examples/Yolov8_CPP_Inference/README.md b/examples/Yolov8_CPP_Inference/README.md
new file mode 100644
index 0000000..b138146
--- /dev/null
+++ b/examples/Yolov8_CPP_Inference/README.md
@@ -0,0 +1,53 @@
+# yolov8/yolov5 Inference C++
+
+Usage:
+
+```
+# git clone ultralytics
+pip install .
+cd examples/cpp_
+
+Add a **yolov8\_.onnx** and/or **yolov5\_.onnx** model(s) to the ultralytics folder.
+Edit the **main.cpp** to change the **projectBasePath** to match your user.
+
+Note that by default the CMake file will try and import the CUDA library to be used with the OpenCVs dnn (cuDNN) GPU Inference.
+If your OpenCV build does not use CUDA/cuDNN you can remove that import call and run the example on CPU.
+
+mkdir build
+cd build
+cmake ..
+make
+./Yolov8CPPInference
+```
+
+To export yolov8 models:
+
+```
+yolo export \
+model=yolov8s.pt \
+imgsz=[480,640] \
+format=onnx \
+opset=12
+```
+
+To export yolov5 models:
+
+```
+python3 export.py \
+--weights yolov5s.pt \
+--img 480 640 \
+--include onnx \
+--opset 12
+```
+
+yolov8s.onnx:
+
+![image](https://user-images.githubusercontent.com/40023722/217356132-a4cecf2e-2729-4acb-b80a-6559022d7707.png)
+
+yolov5s.onnx:
+
+![image](https://user-images.githubusercontent.com/40023722/217357005-07464492-d1da-42e3-98a7-fc753f87d5e6.png)
+
+This repository is based on OpenCVs dnn API to run an ONNX exported model of either yolov5/yolov8 (In theory should work for yolov6 and yolov7 but not tested). Note that for this example the networks are exported as rectangular (640x480) resolutions, but it would work for any resolution that you export as although you might want to use the letterBox approach for square images depending on your use-case.
+
+The **main** branch version is based on using Qt as a GUI wrapper the main interest here is the **Inference** class file which shows how to transpose yolov8 models to work as yolov5 models.
diff --git a/examples/Yolov8_CPP_Inference/inference.cpp b/examples/Yolov8_CPP_Inference/inference.cpp
new file mode 100644
index 0000000..b45830e
--- /dev/null
+++ b/examples/Yolov8_CPP_Inference/inference.cpp
@@ -0,0 +1,185 @@
+#include "inference.h"
+
+Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda)
+{
+    modelPath = onnxModelPath;
+    modelShape = modelInputShape;
+    classesPath = classesTxtFile;
+    cudaEnabled = runWithCuda;
+
+    loadOnnxNetwork();
+    // loadClassesFromFile(); The classes are hard-coded for this example
+}
+
+std::vector<Detection> Inference::runInference(const cv::Mat &input)
+{
+    cv::Mat modelInput = input;
+    if (letterBoxForSquare && modelShape.width == modelShape.height)
+        modelInput = formatToSquare(modelInput);
+
+    cv::Mat blob;
+    cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
+    net.setInput(blob);
+
+    std::vector<cv::Mat> outputs;
+    net.forward(outputs, net.getUnconnectedOutLayersNames());
+
+    int rows = outputs[0].size[1];
+    int dimensions = outputs[0].size[2];
+
+    bool yolov8 = false;
+    // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
+    // yolov8 has an output of shape (batchSize, 84,  8400) (Num classes + box[x,y,w,h])
+    if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
+    {
+        yolov8 = true;
+        rows = outputs[0].size[2];
+        dimensions = outputs[0].size[1];
+
+        outputs[0] = outputs[0].reshape(1, dimensions);
+        cv::transpose(outputs[0], outputs[0]);
+    }
+    float *data = (float *)outputs[0].data;
+
+    float x_factor = modelInput.cols / modelShape.width;
+    float y_factor = modelInput.rows / modelShape.height;
+
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+
+    for (int i = 0; i < rows; ++i)
+    {
+        if (yolov8)
+        {
+            float *classes_scores = data+4;
+
+            cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
+            cv::Point class_id;
+            double maxClassScore;
+
+            minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
+
+            if (maxClassScore > modelScoreThreshold)
+            {
+                confidences.push_back(maxClassScore);
+                class_ids.push_back(class_id.x);
+
+                float x = data[0];
+                float y = data[1];
+                float w = data[2];
+                float h = data[3];
+
+                int left = int((x - 0.5 * w) * x_factor);
+                int top = int((y - 0.5 * h) * y_factor);
+
+                int width = int(w * x_factor);
+                int height = int(h * y_factor);
+
+                boxes.push_back(cv::Rect(left, top, width, height));
+            }
+        }
+        else // yolov5
+        {
+            float confidence = data[4];
+
+            if (confidence >= modelConfidenseThreshold)
+            {
+                float *classes_scores = data+5;
+
+                cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
+                cv::Point class_id;
+                double max_class_score;
+
+                minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
+
+                if (max_class_score > modelScoreThreshold)
+                {
+                    confidences.push_back(confidence);
+                    class_ids.push_back(class_id.x);
+
+                    float x = data[0];
+                    float y = data[1];
+                    float w = data[2];
+                    float h = data[3];
+
+                    int left = int((x - 0.5 * w) * x_factor);
+                    int top = int((y - 0.5 * h) * y_factor);
+
+                    int width = int(w * x_factor);
+                    int height = int(h * y_factor);
+
+                    boxes.push_back(cv::Rect(left, top, width, height));
+                }
+            }
+        }
+
+        data += dimensions;
+    }
+
+    std::vector<int> nms_result;
+    cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
+
+    std::vector<Detection> detections{};
+    for (unsigned long i = 0; i < nms_result.size(); ++i)
+    {
+        int idx = nms_result[i];
+
+        Detection result;
+        result.class_id = class_ids[idx];
+        result.confidence = confidences[idx];
+
+        std::random_device rd;
+        std::mt19937 gen(rd());
+        std::uniform_int_distribution<int> dis(100, 255);
+        result.color = cv::Scalar(dis(gen),
+                                  dis(gen),
+                                  dis(gen));
+
+        result.className = classes[result.class_id];
+        result.box = boxes[idx];
+
+        detections.push_back(result);
+    }
+
+    return detections;
+}
+
+void Inference::loadClassesFromFile()
+{
+    std::ifstream inputFile(classesPath);
+    if (inputFile.is_open())
+    {
+        std::string classLine;
+        while (std::getline(inputFile, classLine))
+            classes.push_back(classLine);
+        inputFile.close();
+    }
+}
+
+void Inference::loadOnnxNetwork()
+{
+    net = cv::dnn::readNetFromONNX(modelPath);
+    if (cudaEnabled)
+    {
+        std::cout << "\nRunning on CUDA" << std::endl;
+        net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
+        net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
+    }
+    else
+    {
+        std::cout << "\nRunning on CPU" << std::endl;
+        net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
+        net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
+    }
+}
+
+cv::Mat Inference::formatToSquare(const cv::Mat &source)
+{
+    int col = source.cols;
+    int row = source.rows;
+    int _max = MAX(col, row);
+    cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
+    source.copyTo(result(cv::Rect(0, 0, col, row)));
+    return result;
+}
diff --git a/examples/Yolov8_CPP_Inference/inference.h b/examples/Yolov8_CPP_Inference/inference.h
new file mode 100644
index 0000000..5763e10
--- /dev/null
+++ b/examples/Yolov8_CPP_Inference/inference.h
@@ -0,0 +1,52 @@
+#ifndef INFERENCE_H
+#define INFERENCE_H
+
+// Cpp native
+#include <fstream>
+#include <vector>
+#include <string>
+#include <random>
+
+// OpenCV / DNN / Inference
+#include <opencv2/imgproc.hpp>
+#include <opencv2/opencv.hpp>
+#include <opencv2/dnn.hpp>
+
+struct Detection
+{
+    int class_id{0};
+    std::string className{};
+    float confidence{0.0};
+    cv::Scalar color{};
+    cv::Rect box{};
+};
+
+class Inference
+{
+public:
+    Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape = {640, 640}, const std::string &classesTxtFile = "", const bool &runWithCuda = true);
+    std::vector<Detection> runInference(const cv::Mat &input);
+
+private:
+    void loadClassesFromFile();
+    void loadOnnxNetwork();
+    cv::Mat formatToSquare(const cv::Mat &source);
+
+    std::string modelPath{};
+    std::string classesPath{};
+    bool cudaEnabled{};
+
+    std::vector<std::string> classes{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
+
+    cv::Size2f modelShape{};
+
+    float modelConfidenseThreshold {0.25};
+    float modelScoreThreshold      {0.45};
+    float modelNMSThreshold        {0.50};
+
+    bool letterBoxForSquare = true;
+
+    cv::dnn::Net net;
+};
+
+#endif // INFERENCE_H
diff --git a/examples/Yolov8_CPP_Inference/main.cpp b/examples/Yolov8_CPP_Inference/main.cpp
new file mode 100644
index 0000000..6d1ba98
--- /dev/null
+++ b/examples/Yolov8_CPP_Inference/main.cpp
@@ -0,0 +1,70 @@
+#include <iostream>
+#include <vector>
+#include <getopt.h>
+
+#include <opencv2/opencv.hpp>
+
+#include "inference.h"
+
+using namespace std;
+using namespace cv;
+
+int main(int argc, char **argv)
+{
+    std::string projectBasePath = "/home/user/ultralytics"; // Set your ultralytics base path
+
+    bool runOnGPU = true;
+
+    //
+    // Pass in either:
+    //
+    // "yolov8s.onnx" or "yolov5s.onnx"
+    //
+    // To run Inference with yolov8/yolov5 (ONNX)
+    //
+
+    // Note that in this example the classes are hard-coded and 'classes.txt' is a place holder.
+    Inference inf(projectBasePath + "/yolov8s.onnx", cv::Size(640, 480), "classes.txt", runOnGPU);
+
+    std::vector<std::string> imageNames;
+    imageNames.push_back(projectBasePath + "/ultralytics/assets/bus.jpg");
+    imageNames.push_back(projectBasePath + "/ultralytics/assets/zidane.jpg");
+
+    for (int i = 0; i < imageNames.size(); ++i)
+    {
+        cv::Mat frame = cv::imread(imageNames[i]);
+
+        // Inference starts here...
+        std::vector<Detection> output = inf.runInference(frame);
+
+        int detections = output.size();
+        std::cout << "Number of detections:" << detections << std::endl;
+
+        for (int i = 0; i < detections; ++i)
+        {
+            Detection detection = output[i];
+
+            cv::Rect box = detection.box;
+            cv::Scalar color = detection.color;
+
+            // Detection box
+            cv::rectangle(frame, box, color, 2);
+
+            // Detection box text
+            std::string classString = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
+            cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
+            cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);
+
+            cv::rectangle(frame, textBox, color, cv::FILLED);
+            cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);
+        }
+        // Inference ends here...
+
+        // This is only for preview purposes
+        float scale = 0.8;
+        cv::resize(frame, frame, cv::Size(frame.cols*scale, frame.rows*scale));
+        cv::imshow("Inference", frame);
+
+        cv::waitKey(-1);
+    }
+}
diff --git a/examples/inventory-management/README.md b/examples/inventory-management/README.md
deleted file mode 100644
index 8f56257..0000000
--- a/examples/inventory-management/README.md
+++ /dev/null
@@ -1 +0,0 @@
-This is a dummy project