Add YOLOv8 ONNX C++ inference Example (#856)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com>single_channel
parent
977fd8f0b8
commit
fd5be10c66
@ -0,0 +1,28 @@
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
|
||||
project(Yolov8CPPInference VERSION 0.1)
|
||||
|
||||
set(CMAKE_INCLUDE_CURRENT_DIR ON)
|
||||
|
||||
# CUDA
|
||||
set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda")
|
||||
find_package(CUDA 11 REQUIRED)
|
||||
|
||||
set(CMAKE_CUDA_STANDARD 11)
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
|
||||
# !CUDA
|
||||
|
||||
# OpenCV
|
||||
find_package(OpenCV REQUIRED)
|
||||
include_directories(${OpenCV_INCLUDE_DIRS})
|
||||
# !OpenCV
|
||||
|
||||
set(PROJECT_SOURCES
|
||||
main.cpp
|
||||
|
||||
inference.h
|
||||
inference.cpp
|
||||
)
|
||||
|
||||
add_executable(Yolov8CPPInference ${PROJECT_SOURCES})
|
||||
target_link_libraries(Yolov8CPPInference ${OpenCV_LIBS})
|
@ -0,0 +1,53 @@
|
||||
# yolov8/yolov5 Inference C++
|
||||
|
||||
Usage:
|
||||
|
||||
```
|
||||
# git clone ultralytics
|
||||
pip install .
|
||||
cd examples/cpp_
|
||||
|
||||
Add a **yolov8\_.onnx** and/or **yolov5\_.onnx** model(s) to the ultralytics folder.
|
||||
Edit the **main.cpp** to change the **projectBasePath** to match your user.
|
||||
|
||||
Note that by default the CMake file will try and import the CUDA library to be used with the OpenCVs dnn (cuDNN) GPU Inference.
|
||||
If your OpenCV build does not use CUDA/cuDNN you can remove that import call and run the example on CPU.
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
make
|
||||
./Yolov8CPPInference
|
||||
```
|
||||
|
||||
To export yolov8 models:
|
||||
|
||||
```
|
||||
yolo export \
|
||||
model=yolov8s.pt \
|
||||
imgsz=[480,640] \
|
||||
format=onnx \
|
||||
opset=12
|
||||
```
|
||||
|
||||
To export yolov5 models:
|
||||
|
||||
```
|
||||
python3 export.py \
|
||||
--weights yolov5s.pt \
|
||||
--img 480 640 \
|
||||
--include onnx \
|
||||
--opset 12
|
||||
```
|
||||
|
||||
yolov8s.onnx:
|
||||
|
||||
![image](https://user-images.githubusercontent.com/40023722/217356132-a4cecf2e-2729-4acb-b80a-6559022d7707.png)
|
||||
|
||||
yolov5s.onnx:
|
||||
|
||||
![image](https://user-images.githubusercontent.com/40023722/217357005-07464492-d1da-42e3-98a7-fc753f87d5e6.png)
|
||||
|
||||
This repository is based on OpenCVs dnn API to run an ONNX exported model of either yolov5/yolov8 (In theory should work for yolov6 and yolov7 but not tested). Note that for this example the networks are exported as rectangular (640x480) resolutions, but it would work for any resolution that you export as although you might want to use the letterBox approach for square images depending on your use-case.
|
||||
|
||||
The **main** branch version is based on using Qt as a GUI wrapper the main interest here is the **Inference** class file which shows how to transpose yolov8 models to work as yolov5 models.
|
@ -0,0 +1,185 @@
|
||||
#include "inference.h"
|
||||
|
||||
Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda)
|
||||
{
|
||||
modelPath = onnxModelPath;
|
||||
modelShape = modelInputShape;
|
||||
classesPath = classesTxtFile;
|
||||
cudaEnabled = runWithCuda;
|
||||
|
||||
loadOnnxNetwork();
|
||||
// loadClassesFromFile(); The classes are hard-coded for this example
|
||||
}
|
||||
|
||||
std::vector<Detection> Inference::runInference(const cv::Mat &input)
|
||||
{
|
||||
cv::Mat modelInput = input;
|
||||
if (letterBoxForSquare && modelShape.width == modelShape.height)
|
||||
modelInput = formatToSquare(modelInput);
|
||||
|
||||
cv::Mat blob;
|
||||
cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
|
||||
net.setInput(blob);
|
||||
|
||||
std::vector<cv::Mat> outputs;
|
||||
net.forward(outputs, net.getUnconnectedOutLayersNames());
|
||||
|
||||
int rows = outputs[0].size[1];
|
||||
int dimensions = outputs[0].size[2];
|
||||
|
||||
bool yolov8 = false;
|
||||
// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
|
||||
// yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
|
||||
if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
|
||||
{
|
||||
yolov8 = true;
|
||||
rows = outputs[0].size[2];
|
||||
dimensions = outputs[0].size[1];
|
||||
|
||||
outputs[0] = outputs[0].reshape(1, dimensions);
|
||||
cv::transpose(outputs[0], outputs[0]);
|
||||
}
|
||||
float *data = (float *)outputs[0].data;
|
||||
|
||||
float x_factor = modelInput.cols / modelShape.width;
|
||||
float y_factor = modelInput.rows / modelShape.height;
|
||||
|
||||
std::vector<int> class_ids;
|
||||
std::vector<float> confidences;
|
||||
std::vector<cv::Rect> boxes;
|
||||
|
||||
for (int i = 0; i < rows; ++i)
|
||||
{
|
||||
if (yolov8)
|
||||
{
|
||||
float *classes_scores = data+4;
|
||||
|
||||
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
|
||||
cv::Point class_id;
|
||||
double maxClassScore;
|
||||
|
||||
minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
|
||||
|
||||
if (maxClassScore > modelScoreThreshold)
|
||||
{
|
||||
confidences.push_back(maxClassScore);
|
||||
class_ids.push_back(class_id.x);
|
||||
|
||||
float x = data[0];
|
||||
float y = data[1];
|
||||
float w = data[2];
|
||||
float h = data[3];
|
||||
|
||||
int left = int((x - 0.5 * w) * x_factor);
|
||||
int top = int((y - 0.5 * h) * y_factor);
|
||||
|
||||
int width = int(w * x_factor);
|
||||
int height = int(h * y_factor);
|
||||
|
||||
boxes.push_back(cv::Rect(left, top, width, height));
|
||||
}
|
||||
}
|
||||
else // yolov5
|
||||
{
|
||||
float confidence = data[4];
|
||||
|
||||
if (confidence >= modelConfidenseThreshold)
|
||||
{
|
||||
float *classes_scores = data+5;
|
||||
|
||||
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
|
||||
cv::Point class_id;
|
||||
double max_class_score;
|
||||
|
||||
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
|
||||
|
||||
if (max_class_score > modelScoreThreshold)
|
||||
{
|
||||
confidences.push_back(confidence);
|
||||
class_ids.push_back(class_id.x);
|
||||
|
||||
float x = data[0];
|
||||
float y = data[1];
|
||||
float w = data[2];
|
||||
float h = data[3];
|
||||
|
||||
int left = int((x - 0.5 * w) * x_factor);
|
||||
int top = int((y - 0.5 * h) * y_factor);
|
||||
|
||||
int width = int(w * x_factor);
|
||||
int height = int(h * y_factor);
|
||||
|
||||
boxes.push_back(cv::Rect(left, top, width, height));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
data += dimensions;
|
||||
}
|
||||
|
||||
std::vector<int> nms_result;
|
||||
cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
|
||||
|
||||
std::vector<Detection> detections{};
|
||||
for (unsigned long i = 0; i < nms_result.size(); ++i)
|
||||
{
|
||||
int idx = nms_result[i];
|
||||
|
||||
Detection result;
|
||||
result.class_id = class_ids[idx];
|
||||
result.confidence = confidences[idx];
|
||||
|
||||
std::random_device rd;
|
||||
std::mt19937 gen(rd());
|
||||
std::uniform_int_distribution<int> dis(100, 255);
|
||||
result.color = cv::Scalar(dis(gen),
|
||||
dis(gen),
|
||||
dis(gen));
|
||||
|
||||
result.className = classes[result.class_id];
|
||||
result.box = boxes[idx];
|
||||
|
||||
detections.push_back(result);
|
||||
}
|
||||
|
||||
return detections;
|
||||
}
|
||||
|
||||
void Inference::loadClassesFromFile()
|
||||
{
|
||||
std::ifstream inputFile(classesPath);
|
||||
if (inputFile.is_open())
|
||||
{
|
||||
std::string classLine;
|
||||
while (std::getline(inputFile, classLine))
|
||||
classes.push_back(classLine);
|
||||
inputFile.close();
|
||||
}
|
||||
}
|
||||
|
||||
void Inference::loadOnnxNetwork()
|
||||
{
|
||||
net = cv::dnn::readNetFromONNX(modelPath);
|
||||
if (cudaEnabled)
|
||||
{
|
||||
std::cout << "\nRunning on CUDA" << std::endl;
|
||||
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
|
||||
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "\nRunning on CPU" << std::endl;
|
||||
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
|
||||
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat Inference::formatToSquare(const cv::Mat &source)
|
||||
{
|
||||
int col = source.cols;
|
||||
int row = source.rows;
|
||||
int _max = MAX(col, row);
|
||||
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
|
||||
source.copyTo(result(cv::Rect(0, 0, col, row)));
|
||||
return result;
|
||||
}
|
@ -0,0 +1,52 @@
|
||||
#ifndef INFERENCE_H
|
||||
#define INFERENCE_H
|
||||
|
||||
// Cpp native
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <random>
|
||||
|
||||
// OpenCV / DNN / Inference
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <opencv2/dnn.hpp>
|
||||
|
||||
struct Detection
|
||||
{
|
||||
int class_id{0};
|
||||
std::string className{};
|
||||
float confidence{0.0};
|
||||
cv::Scalar color{};
|
||||
cv::Rect box{};
|
||||
};
|
||||
|
||||
class Inference
|
||||
{
|
||||
public:
|
||||
Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape = {640, 640}, const std::string &classesTxtFile = "", const bool &runWithCuda = true);
|
||||
std::vector<Detection> runInference(const cv::Mat &input);
|
||||
|
||||
private:
|
||||
void loadClassesFromFile();
|
||||
void loadOnnxNetwork();
|
||||
cv::Mat formatToSquare(const cv::Mat &source);
|
||||
|
||||
std::string modelPath{};
|
||||
std::string classesPath{};
|
||||
bool cudaEnabled{};
|
||||
|
||||
std::vector<std::string> classes{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
|
||||
|
||||
cv::Size2f modelShape{};
|
||||
|
||||
float modelConfidenseThreshold {0.25};
|
||||
float modelScoreThreshold {0.45};
|
||||
float modelNMSThreshold {0.50};
|
||||
|
||||
bool letterBoxForSquare = true;
|
||||
|
||||
cv::dnn::Net net;
|
||||
};
|
||||
|
||||
#endif // INFERENCE_H
|
@ -0,0 +1,70 @@
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <getopt.h>
|
||||
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
#include "inference.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
std::string projectBasePath = "/home/user/ultralytics"; // Set your ultralytics base path
|
||||
|
||||
bool runOnGPU = true;
|
||||
|
||||
//
|
||||
// Pass in either:
|
||||
//
|
||||
// "yolov8s.onnx" or "yolov5s.onnx"
|
||||
//
|
||||
// To run Inference with yolov8/yolov5 (ONNX)
|
||||
//
|
||||
|
||||
// Note that in this example the classes are hard-coded and 'classes.txt' is a place holder.
|
||||
Inference inf(projectBasePath + "/yolov8s.onnx", cv::Size(640, 480), "classes.txt", runOnGPU);
|
||||
|
||||
std::vector<std::string> imageNames;
|
||||
imageNames.push_back(projectBasePath + "/ultralytics/assets/bus.jpg");
|
||||
imageNames.push_back(projectBasePath + "/ultralytics/assets/zidane.jpg");
|
||||
|
||||
for (int i = 0; i < imageNames.size(); ++i)
|
||||
{
|
||||
cv::Mat frame = cv::imread(imageNames[i]);
|
||||
|
||||
// Inference starts here...
|
||||
std::vector<Detection> output = inf.runInference(frame);
|
||||
|
||||
int detections = output.size();
|
||||
std::cout << "Number of detections:" << detections << std::endl;
|
||||
|
||||
for (int i = 0; i < detections; ++i)
|
||||
{
|
||||
Detection detection = output[i];
|
||||
|
||||
cv::Rect box = detection.box;
|
||||
cv::Scalar color = detection.color;
|
||||
|
||||
// Detection box
|
||||
cv::rectangle(frame, box, color, 2);
|
||||
|
||||
// Detection box text
|
||||
std::string classString = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
|
||||
cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
|
||||
cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);
|
||||
|
||||
cv::rectangle(frame, textBox, color, cv::FILLED);
|
||||
cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);
|
||||
}
|
||||
// Inference ends here...
|
||||
|
||||
// This is only for preview purposes
|
||||
float scale = 0.8;
|
||||
cv::resize(frame, frame, cv::Size(frame.cols*scale, frame.rows*scale));
|
||||
cv::imshow("Inference", frame);
|
||||
|
||||
cv::waitKey(-1);
|
||||
}
|
||||
}
|
@ -1 +0,0 @@
|
||||
This is a dummy project
|
Loading…
Reference in new issue