commit
157639e9e3
@ -0,0 +1,17 @@
|
|||||||
|
# ide related
|
||||||
|
.vs/*
|
||||||
|
.idea/*
|
||||||
|
# build
|
||||||
|
.build/*
|
||||||
|
x64/*
|
||||||
|
cmake-build-debug/*
|
||||||
|
cmake-build-debug-visual-studio/*
|
||||||
|
# ignore CMakeLists.txt but add example file
|
||||||
|
CMakeLists.txt
|
||||||
|
# dlls:
|
||||||
|
*.dll
|
||||||
|
# vs-like objects
|
||||||
|
*.sln
|
||||||
|
*vcxproj*
|
||||||
|
packages.config
|
||||||
|
packages/*
|
@ -0,0 +1,64 @@
|
|||||||
|
CMAKE_MINIMUM_REQUIRED(VERSION 3.0.0)
|
||||||
|
project(YOLOv8CPP)
|
||||||
|
|
||||||
|
SET(CMAKE_CXX_STANDARD 17)
|
||||||
|
SET(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
# SET (OpenCV_DIR your/path/to/opencv/build/x64/vc16/lib) # opencv lib root
|
||||||
|
# SET (OpenCV_BIN_DIR your/path/to/opencv/build/x64/vc16/bin) #opencv bin root
|
||||||
|
|
||||||
|
# SET (OpenCV_DEBUG_DLL_FILENAME opencv_world480d.dll) # change filenames
|
||||||
|
# SET (OpenCV_RELEASE_DLL_FILENAME opencv_world480.dll) # change filenames
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
|
||||||
|
SET (ONNXRUNTIME_DIR /home/xdobro23/PP1/onnxruntime/onnxruntime-linux-x64-gpu-1.18.1) # onnxruntime root
|
||||||
|
|
||||||
|
FIND_PACKAGE(OpenCV REQUIRED)
|
||||||
|
set(CMAKE_BUILD_TYPE Debug)
|
||||||
|
# --- Configure your project files ---
|
||||||
|
include_directories(include) # Include your header files directory
|
||||||
|
|
||||||
|
# Recursively collect all source files under 'src' directory
|
||||||
|
file(GLOB_RECURSE CURR_SOURCES src/*.cpp)
|
||||||
|
|
||||||
|
# Create the executable
|
||||||
|
add_executable(YOLOv8CPP ${CURR_SOURCES})
|
||||||
|
|
||||||
|
target_compile_features(YOLOv8CPP PRIVATE cxx_std_17)
|
||||||
|
|
||||||
|
TARGET_INCLUDE_DIRECTORIES(YOLOv8CPP PRIVATE "${ONNXRUNTIME_DIR}/include")
|
||||||
|
|
||||||
|
|
||||||
|
TARGET_LINK_LIBRARIES(YOLOv8CPP ${OpenCV_LIBS})
|
||||||
|
|
||||||
|
if (WIN32)
|
||||||
|
TARGET_LINK_LIBRARIES(YOLOv8CPP "${ONNXRUNTIME_DIR}/lib/onnxruntime.lib")
|
||||||
|
|
||||||
|
# some changes to the original version:
|
||||||
|
# copy onnxruntime dll
|
||||||
|
add_custom_command(TARGET YOLOv8CPP POST_BUILD
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||||
|
"${ONNXRUNTIME_DIR}/lib/onnxruntime.dll"
|
||||||
|
"$<TARGET_FILE_DIR:YOLOv8CPP>"
|
||||||
|
)
|
||||||
|
# copy opencv
|
||||||
|
#[[ add_custom_command(TARGET YOLOv8CPP POST_BUILD
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy_if_differentdpkg -L libcv2
|
||||||
|
"${OpenCV_DIR}/${OpenCV_DLL_FILENAME}"
|
||||||
|
"$<TARGET_FILE_DIR:YOLOv8CPP>"
|
||||||
|
)]]
|
||||||
|
add_custom_command(TARGET YOLOv8CPP POST_BUILD
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||||
|
"${OpenCV_BIN_DIR}/${OpenCV_DEBUG_DLL_FILENAME}"
|
||||||
|
"$<TARGET_FILE_DIR:YOLOv8CPP>"
|
||||||
|
)
|
||||||
|
# add release
|
||||||
|
add_custom_command(TARGET YOLOv8CPP POST_BUILD
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||||
|
"${OpenCV_BIN_DIR}/${OpenCV_RELEASE_DLL_FILENAME}"
|
||||||
|
"$<TARGET_FILE_DIR:YOLOv8CPP>"
|
||||||
|
)
|
||||||
|
|
||||||
|
endif(WIN32)
|
||||||
|
|
||||||
|
if (UNIX)
|
||||||
|
TARGET_LINK_LIBRARIES(YOLOv8CPP "${ONNXRUNTIME_DIR}/lib/libonnxruntime.so")
|
||||||
|
endif(UNIX)
|
@ -0,0 +1,34 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace MetadataConstants {
|
||||||
|
inline const std::string IMGSZ = "imgsz";
|
||||||
|
inline const std::string STRIDE = "stride";
|
||||||
|
inline const std::string NC = "nc";
|
||||||
|
inline const std::string CH = "ch";
|
||||||
|
inline const std::string DATE = "date";
|
||||||
|
inline const std::string VERSION = "version";
|
||||||
|
inline const std::string TASK = "task";
|
||||||
|
inline const std::string BATCH = "batch";
|
||||||
|
inline const std::string NAMES = "names";
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace OnnxProviders {
|
||||||
|
inline const std::string CPU = "cpu";
|
||||||
|
inline const std::string CUDA = "cuda";
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace OnnxInitializers
|
||||||
|
{
|
||||||
|
inline const int UNINITIALIZED_STRIDE = -1;
|
||||||
|
inline const int UNINITIALIZED_NC = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
namespace YoloTasks
|
||||||
|
{
|
||||||
|
inline const std::string SEGMENT = "segment";
|
||||||
|
inline const std::string DETECT = "detect";
|
||||||
|
inline const std::string POSE = "pose";
|
||||||
|
inline const std::string CLASSIFY = "classify";
|
||||||
|
}
|
@ -0,0 +1,90 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <filesystem>
|
||||||
|
#include <vector>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <opencv2/core/mat.hpp>
|
||||||
|
|
||||||
|
#include "onnx_model_base.h"
|
||||||
|
#include "constants.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Represents the results of YOLO prediction.
|
||||||
|
*
|
||||||
|
* This structure stores information about a detected object, including its class index,
|
||||||
|
* confidence score, bounding box, semantic segmentation mask, and keypoints (if available).
|
||||||
|
*/
|
||||||
|
struct YoloResults {
|
||||||
|
int class_idx{}; ///< The class index of the detected object.
|
||||||
|
float conf{}; ///< The confidence score of the detection.
|
||||||
|
cv::Rect_<float> bbox; ///< The bounding box of the detected object.
|
||||||
|
cv::Mat mask; ///< The semantic segmentation mask (if available).
|
||||||
|
std::vector<float> keypoints{}; ///< Keypoints representing the object's pose (if available).
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ImageInfo {
|
||||||
|
cv::Size raw_size; // add additional attrs if you need
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class AutoBackendOnnx : public OnnxModelBase {
|
||||||
|
public:
|
||||||
|
// constructors
|
||||||
|
AutoBackendOnnx(const char* modelPath, const char* logid, const char* provider,
|
||||||
|
const std::vector<int>& imgsz, const int& stride,
|
||||||
|
const int& nc, std::unordered_map<int, std::string> names);
|
||||||
|
|
||||||
|
AutoBackendOnnx(const char* modelPath, const char* logid, const char* provider);
|
||||||
|
|
||||||
|
// getters
|
||||||
|
virtual const std::vector<int>& getImgsz();
|
||||||
|
virtual const int& getStride();
|
||||||
|
virtual const int& getCh();
|
||||||
|
virtual const int& getNc();
|
||||||
|
virtual const std::unordered_map<int, std::string>& getNames();
|
||||||
|
virtual const std::vector<int64_t>& getInputTensorShape();
|
||||||
|
virtual const int& getWidth();
|
||||||
|
virtual const int& getHeight();
|
||||||
|
virtual const cv::Size& getCvSize();
|
||||||
|
virtual const std::string& getTask();
|
||||||
|
/**
|
||||||
|
* @brief Runs object detection on an input image.
|
||||||
|
*
|
||||||
|
* This method performs object detection on the input image and returns the detected objects as YoloResults.
|
||||||
|
*
|
||||||
|
* @param image The input image to run object detection on.
|
||||||
|
* @param conf The confidence threshold for object detection.
|
||||||
|
* @param iou The intersection-over-union (IoU) threshold for non-maximum suppression.
|
||||||
|
* @param mask_threshold The threshold for the semantic segmentation mask.
|
||||||
|
* @param conversionCode An optional conversion code for image format conversion (e.g., cv::COLOR_BGR2RGB).
|
||||||
|
* Default value is -1, indicating no conversion.
|
||||||
|
* TODO: use some constant from some namespace rather than hardcoded values here
|
||||||
|
*
|
||||||
|
* @return A vector of YoloResults representing the detected objects.
|
||||||
|
*/
|
||||||
|
virtual std::vector<YoloResults> predict_once(cv::Mat& image, float& conf, float& iou, float& mask_threshold, int conversionCode = -1, bool verbose = true);
|
||||||
|
virtual std::vector<YoloResults> predict_once(const std::filesystem::path& imagePath, float& conf, float& iou, float& mask_threshold, int conversionCode = -1, bool verbose = true);
|
||||||
|
virtual std::vector<YoloResults> predict_once(const std::string& imagePath, float& conf, float& iou, float& mask_threshold, int conversionCode = -1, bool verbose = true);
|
||||||
|
|
||||||
|
virtual void fill_blob(cv::Mat& image, float*& blob, std::vector<int64_t>& inputTensorShape);
|
||||||
|
virtual void postprocess_masks(cv::Mat& output0, cv::Mat& output1, ImageInfo para, std::vector<YoloResults>& output,
|
||||||
|
int& class_names_num, float& conf_threshold, float& iou_threshold,
|
||||||
|
int& iw, int& ih, int& mw, int& mh, int& masks_features_num, float mask_threshold = 0.50f);
|
||||||
|
|
||||||
|
virtual void postprocess_detects(cv::Mat& output0, ImageInfo image_info, std::vector<YoloResults>& output,
|
||||||
|
int& class_names_num, float& conf_threshold, float& iou_threshold);
|
||||||
|
virtual void postprocess_kpts(cv::Mat& output0, ImageInfo& image_info, std::vector<YoloResults>& output,
|
||||||
|
int& class_names_num, float& conf_threshold, float& iou_threshold);
|
||||||
|
static void _get_mask2(const cv::Mat& mask_info, const cv::Mat& mask_data, const ImageInfo& image_info, cv::Rect bound, cv::Mat& mask_out,
|
||||||
|
float& mask_thresh, int& iw, int& ih, int& mw, int& mh, int& masks_features_num, bool round_downsampled = false);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
std::vector<int> imgsz_;
|
||||||
|
int stride_ = OnnxInitializers::UNINITIALIZED_STRIDE;
|
||||||
|
int nc_ = OnnxInitializers::UNINITIALIZED_NC; //
|
||||||
|
int ch_ = 3;
|
||||||
|
std::unordered_map<int, std::string> names_;
|
||||||
|
std::vector<int64_t> inputTensorShape_;
|
||||||
|
cv::Size cvSize_;
|
||||||
|
std::string task_;
|
||||||
|
//cv::MatSize cvMatSize_;
|
||||||
|
};
|
@ -0,0 +1,39 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <onnxruntime_cxx_api.h>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This interface must provide only required arguments to load any onnx model regarding specific info -
|
||||||
|
* - i.e. modelPath will always be required, provider like "cpu" or "cuda" the same, since these are parameters you need
|
||||||
|
* to set up `sessionOptions` or `session` objects properly, but image size is not needed for pure onnx graph to be loaded so do NOT include it here
|
||||||
|
*/
|
||||||
|
class OnnxModelBase {
|
||||||
|
public:
|
||||||
|
OnnxModelBase(const char* modelPath, const char* logid, const char* provider);
|
||||||
|
//OnnxModelBase(); // no default constructor should be there
|
||||||
|
//virtual ~OnnxModelBase();
|
||||||
|
virtual const std::vector<std::string>& getInputNames(); // = 0
|
||||||
|
virtual const std::vector<std::string>& getOutputNames();
|
||||||
|
virtual const std::vector<const char*> getOutputNamesCStr();
|
||||||
|
virtual const std::vector<const char*> getInputNamesCStr();
|
||||||
|
virtual const Ort::ModelMetadata& getModelMetadata();
|
||||||
|
virtual const std::unordered_map<std::string, std::string>& getMetadata();
|
||||||
|
virtual const char* getModelPath();
|
||||||
|
virtual const Ort::Session& getSession();
|
||||||
|
//virtual std::vector<Ort::Value> forward(std::vector<Ort::Value> inputTensors);
|
||||||
|
virtual std::vector<Ort::Value> forward(std::vector<Ort::Value>& inputTensors);
|
||||||
|
Ort::Session session{ nullptr };
|
||||||
|
|
||||||
|
protected:
|
||||||
|
const char* modelPath_;
|
||||||
|
Ort::Env env{ nullptr };
|
||||||
|
|
||||||
|
std::vector<std::string> inputNodeNames;
|
||||||
|
std::vector<std::string> outputNodeNames;
|
||||||
|
Ort::ModelMetadata model_metadata{ nullptr };
|
||||||
|
std::unordered_map<std::string, std::string> metadata;
|
||||||
|
std::vector<const char*> outputNamesCStr;
|
||||||
|
std::vector<const char*> inputNamesCStr;
|
||||||
|
};
|
@ -0,0 +1,20 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <opencv2/core/types.hpp>
|
||||||
|
|
||||||
|
void letterbox(const cv::Mat& image,
|
||||||
|
cv::Mat& outImage,
|
||||||
|
const cv::Size& newShape = cv::Size(640, 640),
|
||||||
|
cv::Scalar_<double> color = cv::Scalar(), bool auto_ = true,
|
||||||
|
bool scaleFill = false,
|
||||||
|
bool scaleUp = true,
|
||||||
|
int stride = 32
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
cv::Mat scale_image(const cv::Mat& resized_mask, const cv::Size& im0_shape, const std::pair<float,
|
||||||
|
cv::Point2f>& ratio_pad = std::make_pair(-1.0f, cv::Point2f(-1.0f, -1.0f)));
|
||||||
|
|
||||||
|
void scale_image2(
|
||||||
|
cv::Mat& scaled_mask, const cv::Mat& resized_mask, const cv::Size& im0_shape,
|
||||||
|
const std::pair<float, cv::Point2f>& ratio_pad = std::make_pair(-1.0f, cv::Point2f(-1.0f, -1.0f))
|
||||||
|
);
|
@ -0,0 +1,44 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <opencv2/core/types.hpp>
|
||||||
|
|
||||||
|
//cv::Rect scaleCoords(const cv::Size& imageShape, const cv::Rect& coords, const cv::Size& imageOriginalShape);
|
||||||
|
/**
|
||||||
|
* Scales a bounding box from the shape of the input image to the shape of an original image.
|
||||||
|
*
|
||||||
|
* @param img1_shape The shape (height, width) of the input image for the model.
|
||||||
|
* @param box The bounding box to be scaled, specified as cv::Rect_<float>.
|
||||||
|
* @param img0_shape The shape (height, width) of the original target image.
|
||||||
|
* @param ratio_pad An optional parameter that specifies scaling and padding factors as a pair of values.
|
||||||
|
* The first value (ratio) is used for scaling, and the second value (pad) is used for padding.
|
||||||
|
* If not provided, default values will be used.
|
||||||
|
* @param padding An optional boolean parameter that specifies whether padding should be applied.
|
||||||
|
* If set to true, padding will be applied to the bounding box.
|
||||||
|
*
|
||||||
|
* @return A scaled bounding box specified as cv::Rect_<float>.
|
||||||
|
*
|
||||||
|
* This function rescales a bounding box from the shape of the input image (img1_shape) to the shape of an original image (img0_shape).
|
||||||
|
*/
|
||||||
|
cv::Rect_<float> scale_boxes(const cv::Size& img1_shape, cv::Rect_<float>& box, const cv::Size& img0_shape, std::pair<float, cv::Point2f> ratio_pad = std::make_pair(-1.0f, cv::Point2f(-1.0f, -1.0f)), bool padding = true);
|
||||||
|
void clip_boxes(cv::Rect& box, const cv::Size& shape);
|
||||||
|
void clip_boxes(cv::Rect_<float>& box, const cv::Size& shape);
|
||||||
|
void clip_boxes(std::vector<cv::Rect>& boxes, const cv::Size& shape);
|
||||||
|
void clip_boxes(std::vector<cv::Rect_<float>>& boxes, const cv::Size& shape);
|
||||||
|
|
||||||
|
//void clip_coords(cv::Mat& coords, const cv::Size& shape);
|
||||||
|
//cv::Mat scale_coords(const cv::Size& img1_shape, cv::Mat& coords, const cv::Size& img0_shape);
|
||||||
|
void clip_coords(std::vector<float>& coords, const cv::Size& shape);
|
||||||
|
std::vector<float> scale_coords(const cv::Size& img1_shape, std::vector<float>& coords, const cv::Size& img0_shape);
|
||||||
|
|
||||||
|
cv::Mat crop_mask(const cv::Mat& mask, const cv::Rect& box);
|
||||||
|
|
||||||
|
|
||||||
|
struct NMSResult{
|
||||||
|
std::vector<cv::Rect> bboxes;
|
||||||
|
std::vector<float> confidences;
|
||||||
|
std::vector<int> classes;
|
||||||
|
std::vector<std::vector<float>> rest;
|
||||||
|
};
|
||||||
|
|
||||||
|
//std::tuple<std::vector<cv::Rect_<float>>, std::vector<float>, std::vector<int>, std::vector<std::vector<float>>>
|
||||||
|
std::tuple<std::vector<cv::Rect>, std::vector<float>, std::vector<int>, std::vector<std::vector<float>>>
|
||||||
|
non_max_suppression(const cv::Mat& output0, int class_names_num, int total_features_num, double conf_threshold, float iou_threshold);
|
@ -0,0 +1,359 @@
|
|||||||
|
#include <random>
|
||||||
|
|
||||||
|
#include <filesystem>
|
||||||
|
#include "nn/onnx_model_base.h"
|
||||||
|
#include "nn/autobackend.h"
|
||||||
|
#include <opencv2/opencv.hpp>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "utils/augment.h"
|
||||||
|
#include "constants.h"
|
||||||
|
#include "utils/common.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
|
|
||||||
|
// Define the skeleton and color mappings
|
||||||
|
std::vector<std::vector<int>> skeleton = {{16, 14}, {14, 12}, {17, 15}, {15, 13}, {12, 13}, {6, 12}, {7, 13}, {6, 7},
|
||||||
|
{6, 8}, {7, 9}, {8, 10}, {9, 11}, {2, 3}, {1, 2}, {1, 3}, {2, 4}, {3, 5}, {4, 6}, {5, 7}};
|
||||||
|
|
||||||
|
std::vector<cv::Scalar> posePalette = {
|
||||||
|
cv::Scalar(255, 128, 0), cv::Scalar(255, 153, 51), cv::Scalar(255, 178, 102), cv::Scalar(230, 230, 0), cv::Scalar(255, 153, 255),
|
||||||
|
cv::Scalar(153, 204, 255), cv::Scalar(255, 102, 255), cv::Scalar(255, 51, 255), cv::Scalar(102, 178, 255), cv::Scalar(51, 153, 255),
|
||||||
|
cv::Scalar(255, 153, 153), cv::Scalar(255, 102, 102), cv::Scalar(255, 51, 51), cv::Scalar(153, 255, 153), cv::Scalar(102, 255, 102),
|
||||||
|
cv::Scalar(51, 255, 51), cv::Scalar(0, 255, 0), cv::Scalar(0, 0, 255), cv::Scalar(255, 0, 0), cv::Scalar(255, 255, 255)
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<int> limbColorIndices = {9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16};
|
||||||
|
std::vector<int> kptColorIndices = {16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cv::Scalar generateRandomColor(int numChannels) {
|
||||||
|
if (numChannels < 1 || numChannels > 3) {
|
||||||
|
throw std::invalid_argument("Invalid number of channels. Must be between 1 and 3.");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::random_device rd;
|
||||||
|
std::mt19937 gen(rd());
|
||||||
|
std::uniform_int_distribution<int> dis(0, 255);
|
||||||
|
|
||||||
|
cv::Scalar color;
|
||||||
|
for (int i = 0; i < numChannels; i++) {
|
||||||
|
color[i] = dis(gen); // for each channel separately generate value
|
||||||
|
}
|
||||||
|
|
||||||
|
return color;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<cv::Scalar> generateRandomColors(int class_names_num, int numChannels) {
|
||||||
|
std::vector<cv::Scalar> colors;
|
||||||
|
for (int i = 0; i < class_names_num; i++) {
|
||||||
|
cv::Scalar color = generateRandomColor(numChannels);
|
||||||
|
colors.push_back(color);
|
||||||
|
}
|
||||||
|
return colors;
|
||||||
|
}
|
||||||
|
|
||||||
|
void plot_masks(cv::Mat img, std::vector<YoloResults>& result, std::vector<cv::Scalar> color,
|
||||||
|
std::unordered_map<int, std::string>& names)
|
||||||
|
{
|
||||||
|
cv::Mat mask = img.clone();
|
||||||
|
for (int i = 0; i < result.size(); i++)
|
||||||
|
{
|
||||||
|
float left, top;
|
||||||
|
left = result[i].bbox.x;
|
||||||
|
top = result[i].bbox.y;
|
||||||
|
int color_num = i;
|
||||||
|
int& class_idx = result[i].class_idx;
|
||||||
|
rectangle(img, result[i].bbox, color[result[i].class_idx], 2);
|
||||||
|
|
||||||
|
// try to get string value corresponding to given class_idx
|
||||||
|
std::string class_name;
|
||||||
|
auto it = names.find(class_idx);
|
||||||
|
if (it != names.end()) {
|
||||||
|
class_name = it->second;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::cerr << "Warning: class_idx not found in names for class_idx = " << class_idx << std::endl;
|
||||||
|
// then convert it to string anyway
|
||||||
|
class_name = std::to_string(class_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result[i].mask.rows && result[i].mask.cols > 0)
|
||||||
|
{
|
||||||
|
mask(result[i].bbox).setTo(color[result[i].class_idx], result[i].mask);
|
||||||
|
}
|
||||||
|
std::stringstream labelStream;
|
||||||
|
labelStream << class_name << " " << std::fixed << std::setprecision(2) << result[i].conf;
|
||||||
|
std::string label = labelStream.str();
|
||||||
|
|
||||||
|
cv::Size text_size = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.6, 2, nullptr);
|
||||||
|
cv::Rect rect_to_fill(left - 1, top - text_size.height - 5, text_size.width + 2, text_size.height + 5);
|
||||||
|
cv::Scalar text_color = cv::Scalar(255.0, 255.0, 255.0);
|
||||||
|
rectangle(img, rect_to_fill, color[result[i].class_idx], -1);
|
||||||
|
|
||||||
|
putText(img, label, cv::Point(left - 1.5, top - 2.5), cv::FONT_HERSHEY_SIMPLEX, 0.6, text_color, 2);
|
||||||
|
}
|
||||||
|
addWeighted(img, 0.6, mask, 0.4, 0, img); //add mask to src
|
||||||
|
resize(img, img, img.size());
|
||||||
|
imshow("img", img);
|
||||||
|
cv::waitKey();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//void plot_keypoints(cv::Mat& image, const std::vector<std::vector<float>>& keypoints, const cv::Size& shape) {
|
||||||
|
void plot_keypoints(cv::Mat& image, const std::vector<YoloResults>& results, const cv::Size& shape) {
|
||||||
|
|
||||||
|
int radius = 5;
|
||||||
|
bool drawLines = true;
|
||||||
|
|
||||||
|
if (results.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<cv::Scalar> limbColorPalette;
|
||||||
|
std::vector<cv::Scalar> kptColorPalette;
|
||||||
|
|
||||||
|
for (int index : limbColorIndices) {
|
||||||
|
limbColorPalette.push_back(posePalette[index]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int index : kptColorIndices) {
|
||||||
|
kptColorPalette.push_back(posePalette[index]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& res: results) {
|
||||||
|
auto keypoint = res.keypoints;
|
||||||
|
bool isPose = keypoint.size() == 51; // numKeypoints == 17 && keypoints[0].size() == 3;
|
||||||
|
drawLines &= isPose;
|
||||||
|
|
||||||
|
// draw points
|
||||||
|
for (int i = 0; i < 17; i++) {
|
||||||
|
int idx = i * 3;
|
||||||
|
int x_coord = static_cast<int>(keypoint[idx]);
|
||||||
|
int y_coord = static_cast<int>(keypoint[idx + 1]);
|
||||||
|
|
||||||
|
if (x_coord % shape.width != 0 && y_coord % shape.height != 0) {
|
||||||
|
if (keypoint.size() == 3) {
|
||||||
|
float conf = keypoint[2];
|
||||||
|
if (conf < 0.5) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cv::Scalar color_k = isPose ? kptColorPalette[i] : cv::Scalar(0, 0,
|
||||||
|
255); // Default to red if not in pose mode
|
||||||
|
cv::circle(image, cv::Point(x_coord, y_coord), radius, color_k, -1, cv::LINE_AA);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// draw lines
|
||||||
|
if (drawLines) {
|
||||||
|
for (int i = 0; i < skeleton.size(); i++) {
|
||||||
|
const std::vector<int> &sk = skeleton[i];
|
||||||
|
int idx1 = sk[0] - 1;
|
||||||
|
int idx2 = sk[1] - 1;
|
||||||
|
|
||||||
|
int idx1_x_pos = idx1 * 3;
|
||||||
|
int idx2_x_pos = idx2 * 3;
|
||||||
|
|
||||||
|
int x1 = static_cast<int>(keypoint[idx1_x_pos]);
|
||||||
|
int y1 = static_cast<int>(keypoint[idx1_x_pos + 1]);
|
||||||
|
int x2 = static_cast<int>(keypoint[idx2_x_pos]);
|
||||||
|
int y2 = static_cast<int>(keypoint[idx2_x_pos + 1]);
|
||||||
|
|
||||||
|
float conf1 = keypoint[idx1_x_pos + 2];
|
||||||
|
float conf2 = keypoint[idx2_x_pos + 2];
|
||||||
|
|
||||||
|
// Check confidence thresholds
|
||||||
|
if (conf1 < 0.5 || conf2 < 0.5) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if positions are within bounds
|
||||||
|
if (x1 % shape.width == 0 || y1 % shape.height == 0 || x1 < 0 || y1 < 0 ||
|
||||||
|
x2 % shape.width == 0 || y2 % shape.height == 0 || x2 < 0 || y2 < 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Draw a line between keypoints
|
||||||
|
cv::Scalar color_limb = limbColorPalette[i];
|
||||||
|
cv::line(image, cv::Point(x1, y1), cv::Point(x2, y2), color_limb, 2, cv::LINE_AA);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void plot_results(cv::Mat img, std::vector<YoloResults>& results,
|
||||||
|
std::vector<cv::Scalar> color, std::unordered_map<int, std::string>& names,
|
||||||
|
const cv::Size& shape
|
||||||
|
) {
|
||||||
|
|
||||||
|
cv::Mat mask = img.clone();
|
||||||
|
|
||||||
|
int radius = 5;
|
||||||
|
bool drawLines = true;
|
||||||
|
|
||||||
|
auto raw_image_shape = img.size();
|
||||||
|
std::vector<cv::Scalar> limbColorPalette;
|
||||||
|
std::vector<cv::Scalar> kptColorPalette;
|
||||||
|
|
||||||
|
for (int index : limbColorIndices) {
|
||||||
|
limbColorPalette.push_back(posePalette[index]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int index : kptColorIndices) {
|
||||||
|
kptColorPalette.push_back(posePalette[index]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& res : results) {
|
||||||
|
float left = res.bbox.x;
|
||||||
|
float top = res.bbox.y;
|
||||||
|
int color_num = res.class_idx;
|
||||||
|
|
||||||
|
// Draw bounding box
|
||||||
|
rectangle(img, res.bbox, color[res.class_idx], 2);
|
||||||
|
|
||||||
|
// Try to get the class name corresponding to the given class_idx
|
||||||
|
std::string class_name;
|
||||||
|
auto it = names.find(res.class_idx);
|
||||||
|
if (it != names.end()) {
|
||||||
|
class_name = it->second;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::cerr << "Warning: class_idx not found in names for class_idx = " << res.class_idx << std::endl;
|
||||||
|
// Then convert it to a string anyway
|
||||||
|
class_name = std::to_string(res.class_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Draw mask if available
|
||||||
|
if (res.mask.rows && res.mask.cols > 0) {
|
||||||
|
mask(res.bbox).setTo(color[res.class_idx], res.mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create label
|
||||||
|
std::stringstream labelStream;
|
||||||
|
labelStream << class_name << " " << std::fixed << std::setprecision(2) << res.conf;
|
||||||
|
std::string label = labelStream.str();
|
||||||
|
|
||||||
|
cv::Size text_size = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.6, 2, nullptr);
|
||||||
|
cv::Rect rect_to_fill(left - 1, top - text_size.height - 5, text_size.width + 2, text_size.height + 5);
|
||||||
|
cv::Scalar text_color = cv::Scalar(255.0, 255.0, 255.0);
|
||||||
|
rectangle(img, rect_to_fill, color[res.class_idx], -1);
|
||||||
|
putText(img, label, cv::Point(left - 1.5, top - 2.5), cv::FONT_HERSHEY_SIMPLEX, 0.6, text_color, 2);
|
||||||
|
|
||||||
|
// Check if keypoints are available
|
||||||
|
if (!res.keypoints.empty()) {
|
||||||
|
auto keypoint = res.keypoints;
|
||||||
|
bool isPose = keypoint.size() == 51; // numKeypoints == 17 && keypoints[0].size() == 3;
|
||||||
|
drawLines &= isPose;
|
||||||
|
|
||||||
|
// draw points
|
||||||
|
for (int i = 0; i < 17; i++) {
|
||||||
|
int idx = i * 3;
|
||||||
|
int x_coord = static_cast<int>(keypoint[idx]);
|
||||||
|
int y_coord = static_cast<int>(keypoint[idx + 1]);
|
||||||
|
|
||||||
|
if (x_coord % raw_image_shape.width != 0 && y_coord % raw_image_shape.height != 0) {
|
||||||
|
if (keypoint.size() == 3) {
|
||||||
|
float conf = keypoint[2];
|
||||||
|
if (conf < 0.5) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cv::Scalar color_k = isPose ? kptColorPalette[i] : cv::Scalar(0, 0,
|
||||||
|
255); // Default to red if not in pose mode
|
||||||
|
cv::circle(img, cv::Point(x_coord, y_coord), radius, color_k, -1, cv::LINE_AA);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// draw lines
|
||||||
|
if (drawLines) {
|
||||||
|
for (int i = 0; i < skeleton.size(); i++) {
|
||||||
|
const std::vector<int> &sk = skeleton[i];
|
||||||
|
int idx1 = sk[0] - 1;
|
||||||
|
int idx2 = sk[1] - 1;
|
||||||
|
|
||||||
|
int idx1_x_pos = idx1 * 3;
|
||||||
|
int idx2_x_pos = idx2 * 3;
|
||||||
|
|
||||||
|
int x1 = static_cast<int>(keypoint[idx1_x_pos]);
|
||||||
|
int y1 = static_cast<int>(keypoint[idx1_x_pos + 1]);
|
||||||
|
int x2 = static_cast<int>(keypoint[idx2_x_pos]);
|
||||||
|
int y2 = static_cast<int>(keypoint[idx2_x_pos + 1]);
|
||||||
|
|
||||||
|
float conf1 = keypoint[idx1_x_pos + 2];
|
||||||
|
float conf2 = keypoint[idx2_x_pos + 2];
|
||||||
|
|
||||||
|
// Check confidence thresholds
|
||||||
|
if (conf1 < 0.5 || conf2 < 0.5) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if positions are within bounds
|
||||||
|
if (x1 % raw_image_shape.width == 0 || y1 % raw_image_shape.height == 0 || x1 < 0 || y1 < 0 ||
|
||||||
|
x2 % raw_image_shape.width == 0 || y2 % raw_image_shape.height == 0 || x2 < 0 || y2 < 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Draw a line between keypoints
|
||||||
|
cv::Scalar color_limb = limbColorPalette[i];
|
||||||
|
cv::line(img, cv::Point(x1, y1), cv::Point(x2, y2), color_limb, 2, cv::LINE_AA);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Combine the image and mask
|
||||||
|
addWeighted(img, 0.6, mask, 0.4, 0, img);
|
||||||
|
// resize(img, img, img.size());
|
||||||
|
// resize(img, img, shape);
|
||||||
|
// // Show the image
|
||||||
|
// imshow("img", img);
|
||||||
|
// cv::waitKey();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
std::string img_path = "/home/xdobro23/path_to_16bit_3_channel_image.png";
|
||||||
|
const std::string& modelPath = "/home/xdobro23/PP1/ultralytics/runs/detect/train35/weights/best.onnx"; // pose
|
||||||
|
|
||||||
|
fs::path imageFilePath(img_path);
|
||||||
|
fs::path newFilePath = imageFilePath.stem();
|
||||||
|
newFilePath += "-kpt-cpp";
|
||||||
|
newFilePath += imageFilePath.extension();
|
||||||
|
assert(newFilePath != imageFilePath);
|
||||||
|
std::cout << "newFilePath: " << newFilePath << std::endl;
|
||||||
|
|
||||||
|
const std::string& onnx_provider = OnnxProviders::CPU; // "cpu";
|
||||||
|
const std::string& onnx_logid = "yolov8_inference2";
|
||||||
|
float mask_threshold = 0.5f; // in python it's 0.5 and you can see that at ultralytics/utils/ops.process_mask line 705 (ultralytics.__version__ == .160)
|
||||||
|
float conf_threshold = 0.30f;
|
||||||
|
float iou_threshold = 0.45f; // 0.70f;
|
||||||
|
int conversion_code = -1;
|
||||||
|
cv::Mat img = cv::imread(img_path, cv::IMREAD_UNCHANGED);
|
||||||
|
if (img.empty()) {
|
||||||
|
std::cerr << "Error: Unable to load image" << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
AutoBackendOnnx model(modelPath.c_str(), onnx_logid.c_str(), onnx_provider.c_str());
|
||||||
|
std::cout << "Model loaded" << std::endl;
|
||||||
|
std::vector<YoloResults> objs = model.predict_once(img, conf_threshold, iou_threshold, mask_threshold);
|
||||||
|
std::cout << "Got results" << std::endl;
|
||||||
|
std::vector<cv::Scalar> colors = generateRandomColors(model.getNc(), model.getCh());
|
||||||
|
std::unordered_map<int, std::string> names = model.getNames();
|
||||||
|
|
||||||
|
std::vector<std::vector<float>> keypointsVector;
|
||||||
|
for (const YoloResults& result : objs) {
|
||||||
|
keypointsVector.push_back(result.keypoints);
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::cvtColor(img, img, cv::COLOR_RGB2BGR);
|
||||||
|
cv::Size show_shape = img.size(); // cv::Size(1280, 720); // img.size()
|
||||||
|
plot_results(img, objs, colors, names, show_shape);
|
||||||
|
// plot_masks(img, objs, colors, names);
|
||||||
|
cv::imshow("img", img);
|
||||||
|
cv::waitKey();
|
||||||
|
return -1;
|
||||||
|
}
|
@ -0,0 +1,520 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "nn/autobackend.h"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <ostream>
|
||||||
|
#include <filesystem>
|
||||||
|
|
||||||
|
#include <opencv2/opencv.hpp>
|
||||||
|
#include <opencv2/imgcodecs.hpp>
|
||||||
|
#include <opencv2/imgproc.hpp>
|
||||||
|
#include <opencv2/core/mat.hpp>
|
||||||
|
|
||||||
|
#include "utils/augment.h"
|
||||||
|
#include "constants.h"
|
||||||
|
#include "utils/common.h"
|
||||||
|
#include "utils/ops.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
|
|
||||||
|
AutoBackendOnnx::AutoBackendOnnx(const char* modelPath, const char* logid, const char* provider,
|
||||||
|
const std::vector<int>& imgsz, const int& stride,
|
||||||
|
const int& nc, const std::unordered_map<int, std::string> names)
|
||||||
|
: OnnxModelBase(modelPath, logid, provider), imgsz_(imgsz), stride_(stride), nc_(nc), names_(names),
|
||||||
|
inputTensorShape_()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
AutoBackendOnnx::AutoBackendOnnx(const char* modelPath, const char* logid, const char* provider)
|
||||||
|
: OnnxModelBase(modelPath, logid, provider) {
|
||||||
|
// init metadata etc
|
||||||
|
OnnxModelBase(modelPath, logid, provider);
|
||||||
|
// then try to get additional info from metadata like imgsz, stride etc;
|
||||||
|
// ideally you should get all of them but you'll raise error if smth is not in metadata (or not under the appropriate keys)
|
||||||
|
const std::unordered_map<std::string, std::string>& base_metadata = OnnxModelBase::getMetadata();
|
||||||
|
|
||||||
|
// post init imgsz
|
||||||
|
auto imgsz_iterator = base_metadata.find(MetadataConstants::IMGSZ);
|
||||||
|
if (imgsz_iterator != base_metadata.end()) {
|
||||||
|
// parse it and convert to int iterable
|
||||||
|
std::vector<int> imgsz = convertStringVectorToInts(parseVectorString(imgsz_iterator->second));
|
||||||
|
// set it here:
|
||||||
|
if (imgsz_.empty()) {
|
||||||
|
imgsz_ = imgsz;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::cerr << "Warning: Cannot get imgsz value from metadata" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// post init stride
|
||||||
|
auto stride_item = base_metadata.find(MetadataConstants::STRIDE);
|
||||||
|
if (stride_item != base_metadata.end()) {
|
||||||
|
// parse it and convert to int iterable
|
||||||
|
int stide_int = std::stoi(stride_item->second);
|
||||||
|
// set it here:
|
||||||
|
if (stride_ == OnnxInitializers::UNINITIALIZED_STRIDE) {
|
||||||
|
stride_ = stide_int;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::cerr << "Warning: Cannot get stride value from metadata" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// post init names
|
||||||
|
auto names_item = base_metadata.find(MetadataConstants::NAMES);
|
||||||
|
if (names_item != base_metadata.end()) {
|
||||||
|
// parse it and convert to int iterable
|
||||||
|
std::unordered_map<int, std::string> names = parseNames(names_item->second);
|
||||||
|
std::cout << "***Names from metadata***" << std::endl;
|
||||||
|
for (const auto& pair : names) {
|
||||||
|
std::cout << "Key: " << pair.first << ", Value: " << pair.second << std::endl;
|
||||||
|
}
|
||||||
|
// set it here:
|
||||||
|
if (names_.empty()) {
|
||||||
|
names_ = names;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::cerr << "Warning: Cannot get names value from metadata" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// post init number of classes - you can do that only and only if names_ is not empty and nc was not initialized previously
|
||||||
|
if (nc_ == OnnxInitializers::UNINITIALIZED_NC && !names_.empty()) {
|
||||||
|
nc_ = names_.size();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::cerr << "Warning: Cannot get nc value from metadata (probably names wasn't set)" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!imgsz_.empty() && inputTensorShape_.empty())
|
||||||
|
{
|
||||||
|
inputTensorShape_ = { 1, ch_, getHeight(), getWidth() };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!imgsz_.empty())
|
||||||
|
{
|
||||||
|
// Initialize cvSize_ using getHeight() and getWidth()
|
||||||
|
//cvSize_ = cv::MatSize()
|
||||||
|
cvSize_ = cv::Size(getWidth(), getHeight());
|
||||||
|
//cvMatSize_ = cv::MatSize(cvSize_.width, cvSize_.height);
|
||||||
|
}
|
||||||
|
|
||||||
|
// task init:
|
||||||
|
auto task_item = base_metadata.find(MetadataConstants::TASK);
|
||||||
|
if (task_item != base_metadata.end()) {
|
||||||
|
// parse it and convert to int iterable
|
||||||
|
std::string task = std::string(task_item->second);
|
||||||
|
// set it here:
|
||||||
|
if (task_.empty())
|
||||||
|
{
|
||||||
|
task_ = task;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::cerr << "Warning: Cannot get task value from metadata" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: raise assert if imgsz_ and task_ were not initialized (since you don't know in that case which postprocessing to use)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
const std::vector<int>& AutoBackendOnnx::getImgsz() {
|
||||||
|
return imgsz_;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int& AutoBackendOnnx::getHeight()
|
||||||
|
{
|
||||||
|
return imgsz_[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
const int& AutoBackendOnnx::getWidth()
|
||||||
|
{
|
||||||
|
return imgsz_[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
const int& AutoBackendOnnx::getStride() {
|
||||||
|
return stride_;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int& AutoBackendOnnx::getCh() {
|
||||||
|
return ch_;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int& AutoBackendOnnx::getNc() {
|
||||||
|
return nc_;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::unordered_map<int, std::string>& AutoBackendOnnx::getNames() {
|
||||||
|
return names_;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const cv::Size& AutoBackendOnnx::getCvSize()
|
||||||
|
{
|
||||||
|
return cvSize_;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<int64_t>& AutoBackendOnnx::getInputTensorShape()
|
||||||
|
{
|
||||||
|
return inputTensorShape_;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string& AutoBackendOnnx::getTask()
|
||||||
|
{
|
||||||
|
return task_;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<YoloResults> AutoBackendOnnx::predict_once(const std::string& imagePath, float& conf, float& iou, float& mask_threshold,
|
||||||
|
int conversionCode, bool verbose) {
|
||||||
|
// Convert the string imagePath to an object of type std::filesystem::path
|
||||||
|
fs::path imageFilePath(imagePath);
|
||||||
|
// Call the predict_once method, converting the image to a cv::Mat
|
||||||
|
return predict_once(imageFilePath, conf, iou, mask_threshold, conversionCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<YoloResults> AutoBackendOnnx::predict_once(const fs::path& imagePath, float& conf, float& iou, float& mask_threshold,
|
||||||
|
int conversionCode, bool verbose) {
|
||||||
|
// Check if the specified path exists
|
||||||
|
if (!fs::exists(imagePath)) {
|
||||||
|
std::cerr << "Error: File does not exist: " << imagePath << std::endl;
|
||||||
|
// Return an empty vector or throw an exception, depending on your logic
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load the image into a cv::Mat
|
||||||
|
cv::Mat image = cv::imread(imagePath.string(), cv::IMREAD_UNCHANGED);
|
||||||
|
|
||||||
|
// Check if loading the image was successful
|
||||||
|
if (image.empty()) {
|
||||||
|
std::cerr << "Error: Failed to load image: " << imagePath << std::endl;
|
||||||
|
// Return an empty vector or throw an exception, depending on your logic
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// now do some preprocessing based on channels info:
|
||||||
|
int required_image_channels = this->getCh();
|
||||||
|
/*assert(required_image_channels == image.channels() && "");*/
|
||||||
|
// Assert that the number of channels in the input image matches the required number of channels for the model
|
||||||
|
if (required_image_channels != image.channels()) {
|
||||||
|
const std::string& errorMessage = "Error: Number of image channels does not match the required channels.\n"
|
||||||
|
"Number of channels in the image: " + std::to_string(image.channels());
|
||||||
|
throw std::runtime_error(errorMessage);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call overloaded one
|
||||||
|
return predict_once(image, conf, iou, mask_threshold, conversionCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<YoloResults> AutoBackendOnnx::predict_once(cv::Mat& image, float& conf, float& iou, float& mask_threshold, int conversionCode, bool verbose) {
|
||||||
|
double preprocess_time = 0.0;
|
||||||
|
double inference_time = 0.0;
|
||||||
|
double postprocess_time = 0.0;
|
||||||
|
Timer preprocess_timer = Timer(preprocess_time, verbose);
|
||||||
|
// 1. preprocess
|
||||||
|
float* blob = nullptr;
|
||||||
|
//double* blob = nullptr;
|
||||||
|
std::vector<Ort::Value> inputTensors;
|
||||||
|
if (conversionCode >= 0) {
|
||||||
|
cv::cvtColor(image, image, conversionCode);
|
||||||
|
}
|
||||||
|
std::vector<int64_t> inputTensorShape;
|
||||||
|
// TODO: for classify task preprocessed image will be different (!):
|
||||||
|
cv::Mat preprocessed_img;
|
||||||
|
cv::Size new_shape = cv::Size(getWidth(), getHeight());
|
||||||
|
const bool& scaleFill = false; // false
|
||||||
|
const bool& auto_ = false; // true
|
||||||
|
letterbox(image, preprocessed_img, new_shape, cv::Scalar(), auto_, scaleFill, true, getStride());
|
||||||
|
fill_blob(preprocessed_img, blob, inputTensorShape);
|
||||||
|
int64_t inputTensorSize = vector_product(inputTensorShape);
|
||||||
|
std::vector<float> inputTensorValues(blob, blob + inputTensorSize);
|
||||||
|
|
||||||
|
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
|
||||||
|
OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
|
||||||
|
|
||||||
|
inputTensors.push_back(Ort::Value::CreateTensor<float>(
|
||||||
|
memoryInfo, inputTensorValues.data(), inputTensorSize,
|
||||||
|
inputTensorShape.data(), inputTensorShape.size()
|
||||||
|
));
|
||||||
|
preprocess_timer.Stop();
|
||||||
|
Timer inference_timer = Timer(inference_time, verbose);
|
||||||
|
// 2. inference
|
||||||
|
std::vector<Ort::Value> outputTensors = forward(inputTensors);
|
||||||
|
inference_timer.Stop();
|
||||||
|
Timer postprocess_timer = Timer(postprocess_time, verbose);
|
||||||
|
// create container for the results
|
||||||
|
std::vector<YoloResults> results;
|
||||||
|
// 3. postprocess based on task:
|
||||||
|
std::unordered_map<int, std::string> names = this->getNames();
|
||||||
|
// 4. cleanup blob since it was created using the "new" keyword during the `fill_blob` func call
|
||||||
|
delete[] blob;
|
||||||
|
|
||||||
|
int class_names_num = names.size();
|
||||||
|
if (task_ == YoloTasks::SEGMENT) {
|
||||||
|
|
||||||
|
// get outputs info
|
||||||
|
std::vector<int64_t> outputTensor0Shape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
||||||
|
std::vector<int64_t> outputTensor1Shape = outputTensors[1].GetTensorTypeAndShapeInfo().GetShape();
|
||||||
|
// get outputs
|
||||||
|
float* all_data0 = outputTensors[0].GetTensorMutableData<float>();
|
||||||
|
|
||||||
|
cv::Mat output0 = cv::Mat(cv::Size((int)outputTensor0Shape[2], (int)outputTensor0Shape[1]), CV_32F, all_data0).t(); // [bs, features, preds_num]=>[bs, preds_num, features]
|
||||||
|
auto mask_shape = outputTensor1Shape;
|
||||||
|
std::vector<int> mask_sz = { 1,(int)mask_shape[1],(int)mask_shape[2],(int)mask_shape[3] };
|
||||||
|
cv::Mat output1 = cv::Mat(mask_sz, CV_32F, outputTensors[1].GetTensorMutableData<float>());
|
||||||
|
|
||||||
|
int iw = this->getWidth();
|
||||||
|
int ih = this->getHeight();
|
||||||
|
int mask_features_num = outputTensor1Shape[1];
|
||||||
|
int mh = outputTensor1Shape[2];
|
||||||
|
int mw = outputTensor1Shape[3];
|
||||||
|
ImageInfo img_info = { image.size() };
|
||||||
|
postprocess_masks(output0, output1, img_info, results, class_names_num, conf, iou,
|
||||||
|
iw, ih, mw, mh, mask_features_num, mask_threshold);
|
||||||
|
}
|
||||||
|
else if (task_ == YoloTasks::DETECT) {
|
||||||
|
ImageInfo img_info = { image.size() };
|
||||||
|
std::vector<int64_t> outputTensor0Shape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
||||||
|
float* all_data0 = outputTensors[0].GetTensorMutableData<float>();
|
||||||
|
cv::Mat output0 = cv::Mat(cv::Size((int)outputTensor0Shape[2], (int)outputTensor0Shape[1]), CV_32F, all_data0).t(); // [bs, features, preds_num]=>[bs, preds_num, features]
|
||||||
|
postprocess_detects(output0, img_info, results, class_names_num, conf, iou);
|
||||||
|
}
|
||||||
|
else if (task_ == YoloTasks::POSE) {
|
||||||
|
ImageInfo image_info = { image.size() };
|
||||||
|
std::vector<int64_t> outputTensor0Shape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
||||||
|
float* all_data0 = outputTensors[0].GetTensorMutableData<float>();
|
||||||
|
cv::Mat output0 = cv::Mat(cv::Size((int)outputTensor0Shape[2], (int)outputTensor0Shape[1]), CV_32F, all_data0).t(); // [bs, features, preds_num]=>[bs, preds_num, features]
|
||||||
|
postprocess_kpts(output0, image_info, results, class_names_num, conf, iou);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw std::runtime_error("NotImplementedError: task: " + task_);
|
||||||
|
}
|
||||||
|
|
||||||
|
postprocess_timer.Stop();
|
||||||
|
if (verbose) {
|
||||||
|
std::cout << std::fixed << std::setprecision(1);
|
||||||
|
std::cout << "image: " << preprocessed_img.rows << "x" << preprocessed_img.cols << " " << results.size() << " objs, ";
|
||||||
|
std::cout << (preprocess_time + inference_time + postprocess_time) * 1000.0 << "ms" << std::endl;
|
||||||
|
std::cout << "Speed: " << (preprocess_time * 1000.0) << "ms preprocess, ";
|
||||||
|
std::cout << (inference_time * 1000.0) << "ms inference, ";
|
||||||
|
std::cout << (postprocess_time * 1000.0) << "ms postprocess per image ";
|
||||||
|
std::cout << "at shape (1, " << image.channels() << ", " << preprocessed_img.rows << ", " << preprocessed_img.cols << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void AutoBackendOnnx::postprocess_masks(cv::Mat& output0, cv::Mat& output1, ImageInfo image_info, std::vector<YoloResults>& output,
|
||||||
|
int& class_names_num, float& conf_threshold, float& iou_threshold,
|
||||||
|
int& iw, int& ih, int& mw, int& mh, int& masks_features_num, float mask_threshold /* = 0.5f */)
|
||||||
|
{
|
||||||
|
output.clear();
|
||||||
|
std::vector<int> class_ids;
|
||||||
|
std::vector<float> confidences;
|
||||||
|
std::vector<cv::Rect> boxes;
|
||||||
|
std::vector<std::vector<float>> masks;
|
||||||
|
// 4 - your default number of rect parameters {x, y, w, h}
|
||||||
|
int data_width = class_names_num + 4 + masks_features_num;
|
||||||
|
int rows = output0.rows;
|
||||||
|
float* pdata = (float*)output0.data;
|
||||||
|
for (int r = 0; r < rows; ++r)
|
||||||
|
{
|
||||||
|
cv::Mat scores(1, class_names_num, CV_32FC1, pdata + 4);
|
||||||
|
cv::Point class_id;
|
||||||
|
double max_conf;
|
||||||
|
minMaxLoc(scores, 0, &max_conf, 0, &class_id);
|
||||||
|
if (max_conf > conf_threshold)
|
||||||
|
{
|
||||||
|
masks.push_back(std::vector<float>(pdata + 4 + class_names_num, pdata + data_width));
|
||||||
|
class_ids.push_back(class_id.x);
|
||||||
|
confidences.push_back(max_conf);
|
||||||
|
|
||||||
|
float out_w = pdata[2];
|
||||||
|
float out_h = pdata[3];
|
||||||
|
float out_left = MAX((pdata[0] - 0.5 * out_w + 0.5), 0);
|
||||||
|
float out_top = MAX((pdata[1] - 0.5 * out_h + 0.5), 0);
|
||||||
|
cv::Rect_ <float> bbox = cv::Rect(out_left, out_top, (out_w + 0.5), (out_h + 0.5));
|
||||||
|
cv::Rect_<float> scaled_bbox = scale_boxes(getCvSize(), bbox, image_info.raw_size);
|
||||||
|
boxes.push_back(scaled_bbox);
|
||||||
|
}
|
||||||
|
pdata += data_width; // next pred
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
//float masks_threshold = 0.50;
|
||||||
|
//int top_k = 500;
|
||||||
|
//const float& nmsde_eta = 1.0f;
|
||||||
|
std::vector<int> nms_result;
|
||||||
|
cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, iou_threshold, nms_result); // , nms_eta, top_k);
|
||||||
|
|
||||||
|
// select all of the protos tensor
|
||||||
|
cv::Size downsampled_size = cv::Size(mw, mh);
|
||||||
|
std::vector<cv::Range> roi_rangs = { cv::Range(0, 1), cv::Range::all(),
|
||||||
|
cv::Range(0, downsampled_size.height), cv::Range(0, downsampled_size.width) };
|
||||||
|
cv::Mat temp_mask = output1(roi_rangs).clone();
|
||||||
|
cv::Mat proto = temp_mask.reshape(0, { masks_features_num, downsampled_size.width * downsampled_size.height });
|
||||||
|
|
||||||
|
for (int i = 0; i < nms_result.size(); ++i)
|
||||||
|
{
|
||||||
|
int idx = nms_result[i];
|
||||||
|
boxes[idx] = boxes[idx] & cv::Rect(0, 0, image_info.raw_size.width, image_info.raw_size.height);
|
||||||
|
YoloResults result = { class_ids[idx] ,confidences[idx] ,boxes[idx] };
|
||||||
|
_get_mask2(cv::Mat(masks[idx]).t(), proto, image_info, boxes[idx], result.mask, mask_threshold,
|
||||||
|
iw, ih, mw, mh, masks_features_num);
|
||||||
|
output.push_back(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void AutoBackendOnnx::postprocess_detects(cv::Mat& output0, ImageInfo image_info, std::vector<YoloResults>& output,
|
||||||
|
int& class_names_num, float& conf_threshold, float& iou_threshold)
|
||||||
|
{
|
||||||
|
output.clear();
|
||||||
|
std::vector<int> class_ids;
|
||||||
|
std::vector<float> confidences;
|
||||||
|
std::vector<cv::Rect> boxes;
|
||||||
|
std::vector<std::vector<float>> masks;
|
||||||
|
// 4 - your default number of rect parameters {x, y, w, h}
|
||||||
|
int data_width = class_names_num + 4;
|
||||||
|
int rows = output0.rows;
|
||||||
|
float* pdata = (float*)output0.data;
|
||||||
|
|
||||||
|
for (int r = 0; r < rows; ++r)
|
||||||
|
{
|
||||||
|
cv::Mat scores(1, class_names_num, CV_32FC1, pdata + 4);
|
||||||
|
cv::Point class_id;
|
||||||
|
double max_conf;
|
||||||
|
minMaxLoc(scores, nullptr, &max_conf, nullptr, &class_id);
|
||||||
|
|
||||||
|
if (max_conf > conf_threshold)
|
||||||
|
{
|
||||||
|
masks.emplace_back(pdata + 4 + class_names_num, pdata + data_width);
|
||||||
|
class_ids.push_back(class_id.x);
|
||||||
|
confidences.push_back((float) max_conf);
|
||||||
|
|
||||||
|
float out_w = pdata[2];
|
||||||
|
float out_h = pdata[3];
|
||||||
|
float out_left = MAX((pdata[0] - 0.5 * out_w + 0.5), 0);
|
||||||
|
float out_top = MAX((pdata[1] - 0.5 * out_h + 0.5), 0);
|
||||||
|
|
||||||
|
cv::Rect_ <float> bbox = cv::Rect_ <float> (out_left, out_top, (out_w + 0.5), (out_h + 0.5));
|
||||||
|
cv::Rect_<float> scaled_bbox = scale_boxes(getCvSize(), bbox, image_info.raw_size);
|
||||||
|
|
||||||
|
boxes.push_back(scaled_bbox);
|
||||||
|
}
|
||||||
|
pdata += data_width; // next pred
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> nms_result;
|
||||||
|
cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, iou_threshold, nms_result); // , nms_eta, top_k);
|
||||||
|
for (int idx : nms_result)
|
||||||
|
{
|
||||||
|
boxes[idx] = boxes[idx] & cv::Rect(0, 0, image_info.raw_size.width, image_info.raw_size.height);
|
||||||
|
YoloResults result = { class_ids[idx] ,confidences[idx] ,boxes[idx] };
|
||||||
|
output.push_back(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AutoBackendOnnx::postprocess_kpts(cv::Mat& output0, ImageInfo& image_info, std::vector<YoloResults>& output,
|
||||||
|
int& class_names_num, float& conf_threshold, float& iou_threshold)
|
||||||
|
{
|
||||||
|
std::vector<cv::Rect> boxes;
|
||||||
|
std::vector<float> confidences;
|
||||||
|
std::vector<int> class_ids;
|
||||||
|
std::vector<std::vector<float>> rest;
|
||||||
|
std::tie(boxes, confidences, class_ids, rest) = non_max_suppression(output0, class_names_num, output0.cols, conf_threshold, iou_threshold);
|
||||||
|
cv::Size img1_shape = getCvSize();
|
||||||
|
auto bound_bbox = cv::Rect_ <float> (0, 0, image_info.raw_size.width, image_info.raw_size.height);
|
||||||
|
for (int i = 0; i < boxes.size(); i++) {
|
||||||
|
// pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round()
|
||||||
|
// pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:]
|
||||||
|
// pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, shape)
|
||||||
|
// path = self.batch[0]
|
||||||
|
// img_path = path[i] if isinstance(path, list) else path
|
||||||
|
// results.append(
|
||||||
|
// Results(orig_img=orig_img,
|
||||||
|
// path=img_path,
|
||||||
|
// names=self.model.names,
|
||||||
|
// boxes=pred[:, :6],
|
||||||
|
// keypoints=pred_kpts))
|
||||||
|
cv::Rect_<float> bbox = boxes[i];
|
||||||
|
auto scaled_bbox = scale_boxes(img1_shape, bbox, image_info.raw_size);
|
||||||
|
scaled_bbox = scaled_bbox & bound_bbox;
|
||||||
|
// cv::Mat kpt = cv::Mat(rest[i]).t();
|
||||||
|
// scale_coords(img1_shape, kpt, image_info.raw_size);
|
||||||
|
// TODO: overload scale_coords so that will accept cv::Mat of shape [17, 3]
|
||||||
|
// so that it will be more similar to what we have in python
|
||||||
|
std::vector<float> kpt = scale_coords(img1_shape, rest[i], image_info.raw_size);
|
||||||
|
YoloResults tmp_res = { class_ids[i], confidences[i], scaled_bbox, {}, kpt};
|
||||||
|
output.push_back(tmp_res);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AutoBackendOnnx::_get_mask2(const cv::Mat& masks_features,
|
||||||
|
const cv::Mat& proto,
|
||||||
|
const ImageInfo& image_info, const cv::Rect bound, cv::Mat& mask_out,
|
||||||
|
float& mask_thresh, int& iw, int& ih, int& mw, int& mh, int& masks_features_num,
|
||||||
|
bool round_downsampled)
|
||||||
|
|
||||||
|
{
|
||||||
|
cv::Size img0_shape = image_info.raw_size;
|
||||||
|
cv::Size img1_shape = cv::Size(iw, ih);
|
||||||
|
cv::Size downsampled_size = cv::Size(mw, mh);
|
||||||
|
|
||||||
|
cv::Rect_<float> bound_float(
|
||||||
|
static_cast<float>(bound.x),
|
||||||
|
static_cast<float>(bound.y),
|
||||||
|
static_cast<float>(bound.width),
|
||||||
|
static_cast<float>(bound.height)
|
||||||
|
);
|
||||||
|
|
||||||
|
cv::Rect_<float> downsampled_bbox = scale_boxes(img0_shape, bound_float, downsampled_size);
|
||||||
|
cv::Size bound_size = cv::Size(mw, mh);
|
||||||
|
clip_boxes(downsampled_bbox, bound_size);
|
||||||
|
|
||||||
|
cv::Mat matmul_res = (masks_features * proto).t();
|
||||||
|
matmul_res = matmul_res.reshape(1, { downsampled_size.height, downsampled_size.width });
|
||||||
|
// apply sigmoid to the mask:
|
||||||
|
cv::Mat sigmoid_mask;
|
||||||
|
exp(-matmul_res, sigmoid_mask);
|
||||||
|
sigmoid_mask = 1.0 / (1.0 + sigmoid_mask);
|
||||||
|
cv::Mat resized_mask;
|
||||||
|
cv::Rect_<float> input_bbox = scale_boxes(img0_shape, bound_float, img1_shape);
|
||||||
|
cv::resize(sigmoid_mask, resized_mask, img1_shape, 0, 0, cv::INTER_LANCZOS4);
|
||||||
|
cv::Mat pre_out_mask = resized_mask(input_bbox);
|
||||||
|
cv::Mat scaled_mask;
|
||||||
|
scale_image2(scaled_mask, resized_mask, img0_shape);
|
||||||
|
cv::resize(scaled_mask, mask_out, img0_shape);
|
||||||
|
mask_out = mask_out(bound) > mask_thresh;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void AutoBackendOnnx::fill_blob(cv::Mat& image, float*& blob, std::vector<int64_t>& inputTensorShape) {
|
||||||
|
|
||||||
|
cv::Mat floatImage;
|
||||||
|
if (inputTensorShape.empty())
|
||||||
|
{
|
||||||
|
inputTensorShape = getInputTensorShape();
|
||||||
|
}
|
||||||
|
int inputChannelsNum = inputTensorShape[1];
|
||||||
|
int rtype = CV_32FC3;
|
||||||
|
image.convertTo(floatImage, rtype, 1.0f / 65635.0);
|
||||||
|
blob = new float[floatImage.cols * floatImage.rows * floatImage.channels()];
|
||||||
|
cv::Size floatImageSize{ floatImage.cols, floatImage.rows };
|
||||||
|
|
||||||
|
// hwc -> chw
|
||||||
|
std::vector<cv::Mat> chw(floatImage.channels());
|
||||||
|
for (int i = 0; i < floatImage.channels(); ++i)
|
||||||
|
{
|
||||||
|
chw[i] = cv::Mat(floatImageSize, CV_32FC1, blob + i * floatImageSize.width * floatImageSize.height);
|
||||||
|
}
|
||||||
|
cv::split(floatImage, chw);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,181 @@
|
|||||||
|
#include "nn/onnx_model_base.h"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <onnxruntime_cxx_api.h>
|
||||||
|
#include <onnxruntime_c_api.h>
|
||||||
|
|
||||||
|
#include "constants.h"
|
||||||
|
#include "utils/common.h"
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Base class for any onnx model regarding the target.
|
||||||
|
*
|
||||||
|
* Wraps OrtApi.
|
||||||
|
*
|
||||||
|
* The caller provides a model path, logid, and provider.
|
||||||
|
*
|
||||||
|
* See the output logs for more information on warnings/errors that occur while processing the model.
|
||||||
|
*
|
||||||
|
* @param[in] modelPath Path to the model file.
|
||||||
|
* @param[in] logid Log identifier.
|
||||||
|
* @param[in] provider Provider (e.g., "CPU" or "CUDA"). (NOTE: for now only CPU is supported)
|
||||||
|
*/
|
||||||
|
|
||||||
|
OnnxModelBase::OnnxModelBase(const char* modelPath, const char* logid, const char* provider)
|
||||||
|
//: modelPath_(modelPath), env(std::move(env)), session(std::move(session))
|
||||||
|
: modelPath_(modelPath)
|
||||||
|
{
|
||||||
|
|
||||||
|
// TODO: too bad passing `ORT_LOGGING_LEVEL_WARNING` by default - for some cases
|
||||||
|
// info level would make sense too
|
||||||
|
env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, logid);
|
||||||
|
Ort::SessionOptions sessionOptions = Ort::SessionOptions();
|
||||||
|
|
||||||
|
std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
|
||||||
|
auto cudaAvailable = std::find(availableProviders.begin(), availableProviders.end(), "CUDAExecutionProvider");
|
||||||
|
OrtCUDAProviderOptions cudaOption;
|
||||||
|
|
||||||
|
if (provider == OnnxProviders::CUDA.c_str()) { // strcmp(provider, OnnxProviders::CUDA.c_str()) == true strcmp(provider, "cuda") // (providerStr == "cuda")
|
||||||
|
if (cudaAvailable == availableProviders.end()) {
|
||||||
|
std::cout << "CUDA is not supported by your ONNXRuntime build. Fallback to CPU." << std::endl;
|
||||||
|
//std::cout << "Inference device: CPU" << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
//std::cout << "Inference device: GPU" << std::endl;
|
||||||
|
sessionOptions.AppendExecutionProvider_CUDA(cudaOption);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (provider == OnnxProviders::CPU.c_str()) { // strcmp(provider, OnnxProviders::CPU.c_str()) == true) (providerStr == "cpu") {
|
||||||
|
// "cpu" by default
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw std::runtime_error("NotImplemented provider=" + std::string(provider));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Inference device: " << std::string(provider) << std::endl;
|
||||||
|
#ifdef _WIN32
|
||||||
|
auto modelPathW = get_win_path(modelPath); // For Windows (wstring)
|
||||||
|
session = Ort::Session(env, modelPathW.c_str(), sessionOptions);
|
||||||
|
#else
|
||||||
|
session = Ort::Session(env, modelPath, sessionOptions); // For Linux (string)
|
||||||
|
#endif
|
||||||
|
//session = Ort::Session(env)
|
||||||
|
// https://github.com/microsoft/onnxruntime/issues/14157
|
||||||
|
//std::vector<const char*> inputNodeNames; //
|
||||||
|
// ----------------
|
||||||
|
// init input names
|
||||||
|
inputNodeNames;
|
||||||
|
std::vector<Ort::AllocatedStringPtr> inputNodeNameAllocatedStrings; // <-- newly added
|
||||||
|
Ort::AllocatorWithDefaultOptions allocator;
|
||||||
|
auto inputNodesNum = session.GetInputCount();
|
||||||
|
for (int i = 0; i < inputNodesNum; i++) {
|
||||||
|
auto input_name = session.GetInputNameAllocated(i, allocator);
|
||||||
|
inputNodeNameAllocatedStrings.push_back(std::move(input_name));
|
||||||
|
inputNodeNames.push_back(inputNodeNameAllocatedStrings.back().get());
|
||||||
|
}
|
||||||
|
// -----------------
|
||||||
|
// init output names
|
||||||
|
outputNodeNames;
|
||||||
|
auto outputNodesNum = session.GetOutputCount();
|
||||||
|
std::vector<Ort::AllocatedStringPtr> outputNodeNameAllocatedStrings; // <-- newly added
|
||||||
|
Ort::AllocatorWithDefaultOptions output_names_allocator;
|
||||||
|
for (int i = 0; i < outputNodesNum; i++)
|
||||||
|
{
|
||||||
|
auto output_name = session.GetOutputNameAllocated(i, output_names_allocator);
|
||||||
|
outputNodeNameAllocatedStrings.push_back(std::move(output_name));
|
||||||
|
outputNodeNames.push_back(outputNodeNameAllocatedStrings.back().get());
|
||||||
|
}
|
||||||
|
// -------------------------
|
||||||
|
// initialize model metadata
|
||||||
|
model_metadata = session.GetModelMetadata();
|
||||||
|
Ort::AllocatorWithDefaultOptions metadata_allocator;
|
||||||
|
|
||||||
|
std::vector<Ort::AllocatedStringPtr> metadataAllocatedKeys = model_metadata.GetCustomMetadataMapKeysAllocated(metadata_allocator);
|
||||||
|
std::vector<std::string> metadata_keys;
|
||||||
|
metadata_keys.reserve(metadataAllocatedKeys.size());
|
||||||
|
|
||||||
|
for (const Ort::AllocatedStringPtr& allocatedString : metadataAllocatedKeys) {
|
||||||
|
metadata_keys.emplace_back(allocatedString.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------
|
||||||
|
// initialize metadata as the dict
|
||||||
|
// even though we know exactly what metadata we intend to use
|
||||||
|
// base onnx class should not have any ultralytics yolo-specific attributes like stride, task etc, so keep it clean as much as possible
|
||||||
|
for (const std::string& key : metadata_keys) {
|
||||||
|
Ort::AllocatedStringPtr metadata_value = model_metadata.LookupCustomMetadataMapAllocated(key.c_str(), metadata_allocator);
|
||||||
|
if (metadata_value != nullptr) {
|
||||||
|
auto raw_metadata_value = metadata_value.get();
|
||||||
|
metadata[key] = std::string(raw_metadata_value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// initialize cstr
|
||||||
|
for (const std::string& name : outputNodeNames) {
|
||||||
|
outputNamesCStr.push_back(name.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const std::string& name : inputNodeNames)
|
||||||
|
{
|
||||||
|
inputNamesCStr.push_back(name.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<std::string>& OnnxModelBase::getInputNames() {
|
||||||
|
return inputNodeNames;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<std::string>& OnnxModelBase::getOutputNames() {
|
||||||
|
return outputNodeNames;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Ort::ModelMetadata& OnnxModelBase::getModelMetadata()
|
||||||
|
{
|
||||||
|
return model_metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::unordered_map<std::string, std::string>& OnnxModelBase::getMetadata()
|
||||||
|
{
|
||||||
|
return metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const Ort::Session& OnnxModelBase::getSession()
|
||||||
|
{
|
||||||
|
return session;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* OnnxModelBase::getModelPath()
|
||||||
|
{
|
||||||
|
return modelPath_;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<const char*> OnnxModelBase::getOutputNamesCStr()
|
||||||
|
{
|
||||||
|
return outputNamesCStr;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<const char*> OnnxModelBase::getInputNamesCStr()
|
||||||
|
{
|
||||||
|
return inputNamesCStr;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Ort::Value> OnnxModelBase::forward(std::vector<Ort::Value>& inputTensors)
|
||||||
|
{
|
||||||
|
// todo: make runOptions parameter here
|
||||||
|
|
||||||
|
return session.Run(Ort::RunOptions{ nullptr },
|
||||||
|
inputNamesCStr.data(),
|
||||||
|
inputTensors.data(),
|
||||||
|
inputNamesCStr.size(),
|
||||||
|
outputNamesCStr.data(),
|
||||||
|
outputNamesCStr.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
//OnnxModelBase::~OnnxModelBase() {
|
||||||
|
// // empty body
|
||||||
|
//}
|
@ -0,0 +1,145 @@
|
|||||||
|
#include <opencv2/core.hpp>
|
||||||
|
#include <opencv2/imgproc.hpp>
|
||||||
|
#include <opencv2/core/mat.hpp>
|
||||||
|
#include <opencv2/core/types.hpp>
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief padding value when letterbox changes image size ratio
|
||||||
|
*/
|
||||||
|
const int& DEFAULT_LETTERBOX_PAD_VALUE = 114;
|
||||||
|
|
||||||
|
|
||||||
|
void letterbox(const cv::Mat& image,
|
||||||
|
cv::Mat& outImage,
|
||||||
|
const cv::Size& newShape,
|
||||||
|
cv::Scalar_<double> color,
|
||||||
|
bool auto_,
|
||||||
|
bool scaleFill,
|
||||||
|
bool scaleUp, int stride
|
||||||
|
) {
|
||||||
|
cv::Size shape = image.size();
|
||||||
|
float r = std::min(static_cast<float>(newShape.height) / static_cast<float>(shape.height),
|
||||||
|
static_cast<float>(newShape.width) / static_cast<float>(shape.width));
|
||||||
|
if (!scaleUp)
|
||||||
|
r = std::min(r, 1.0f);
|
||||||
|
|
||||||
|
float ratio[2]{ r, r };
|
||||||
|
int newUnpad[2]{ static_cast<int>(std::round(static_cast<float>(shape.width) * r)),
|
||||||
|
static_cast<int>(std::round(static_cast<float>(shape.height) * r)) };
|
||||||
|
|
||||||
|
auto dw = static_cast<float>(newShape.width - newUnpad[0]);
|
||||||
|
auto dh = static_cast<float>(newShape.height - newUnpad[1]);
|
||||||
|
|
||||||
|
if (auto_)
|
||||||
|
{
|
||||||
|
dw = static_cast<float>((static_cast<int>(dw) % stride));
|
||||||
|
dh = static_cast<float>((static_cast<int>(dh) % stride));
|
||||||
|
}
|
||||||
|
else if (scaleFill)
|
||||||
|
{
|
||||||
|
dw = 0.0f;
|
||||||
|
dh = 0.0f;
|
||||||
|
newUnpad[0] = newShape.width;
|
||||||
|
newUnpad[1] = newShape.height;
|
||||||
|
ratio[0] = static_cast<float>(newShape.width) / static_cast<float>(shape.width);
|
||||||
|
ratio[1] = static_cast<float>(newShape.height) / static_cast<float>(shape.height);
|
||||||
|
}
|
||||||
|
|
||||||
|
dw /= 2.0f;
|
||||||
|
dh /= 2.0f;
|
||||||
|
|
||||||
|
//cv::Mat outImage;
|
||||||
|
if (shape.width != newUnpad[0] || shape.height != newUnpad[1])
|
||||||
|
{
|
||||||
|
cv::resize(image, outImage, cv::Size(newUnpad[0], newUnpad[1]));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
outImage = image.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
int top = static_cast<int>(std::round(dh - 0.1f));
|
||||||
|
int bottom = static_cast<int>(std::round(dh + 0.1f));
|
||||||
|
int left = static_cast<int>(std::round(dw - 0.1f));
|
||||||
|
int right = static_cast<int>(std::round(dw + 0.1f));
|
||||||
|
|
||||||
|
|
||||||
|
if (color == cv::Scalar()) {
|
||||||
|
color = cv::Scalar(DEFAULT_LETTERBOX_PAD_VALUE, DEFAULT_LETTERBOX_PAD_VALUE, DEFAULT_LETTERBOX_PAD_VALUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::Mat scale_image(const cv::Mat& resized_mask, const cv::Size& im0_shape, const std::pair<float, cv::Point2f>& ratio_pad) {
|
||||||
|
cv::Size im1_shape = resized_mask.size();
|
||||||
|
|
||||||
|
// Check if resizing is needed
|
||||||
|
if (im1_shape == im0_shape) {
|
||||||
|
return resized_mask.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
float gain, pad_x, pad_y;
|
||||||
|
|
||||||
|
if (ratio_pad.first < 0.0f) {
|
||||||
|
gain = std::min(static_cast<float>(im1_shape.height) / static_cast<float>(im0_shape.height),
|
||||||
|
static_cast<float>(im1_shape.width) / static_cast<float>(im0_shape.width));
|
||||||
|
pad_x = (im1_shape.width - im0_shape.width * gain) / 2.0f;
|
||||||
|
pad_y = (im1_shape.height - im0_shape.height * gain) / 2.0f;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
gain = ratio_pad.first;
|
||||||
|
pad_x = ratio_pad.second.x;
|
||||||
|
pad_y = ratio_pad.second.y;
|
||||||
|
}
|
||||||
|
|
||||||
|
int top = static_cast<int>(pad_y);
|
||||||
|
int left = static_cast<int>(pad_x);
|
||||||
|
int bottom = static_cast<int>(im1_shape.height - pad_y);
|
||||||
|
int right = static_cast<int>(im1_shape.width - pad_x);
|
||||||
|
|
||||||
|
// Clip and resize the mask
|
||||||
|
cv::Rect clipped_rect(left, top, right - left, bottom - top);
|
||||||
|
cv::Mat clipped_mask = resized_mask(clipped_rect);
|
||||||
|
cv::Mat scaled_mask;
|
||||||
|
cv::resize(clipped_mask, scaled_mask, im0_shape);
|
||||||
|
|
||||||
|
return scaled_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void scale_image2(cv::Mat& scaled_mask, const cv::Mat& resized_mask, const cv::Size& im0_shape, const std::pair<float, cv::Point2f>& ratio_pad) {
|
||||||
|
cv::Size im1_shape = resized_mask.size();
|
||||||
|
|
||||||
|
// Check if resizing is needed
|
||||||
|
if (im1_shape == im0_shape) {
|
||||||
|
scaled_mask = resized_mask.clone();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
float gain, pad_x, pad_y;
|
||||||
|
|
||||||
|
if (ratio_pad.first < 0.0f) {
|
||||||
|
gain = std::min(static_cast<float>(im1_shape.height) / static_cast<float>(im0_shape.height),
|
||||||
|
static_cast<float>(im1_shape.width) / static_cast<float>(im0_shape.width));
|
||||||
|
pad_x = (im1_shape.width - im0_shape.width * gain) / 2.0f;
|
||||||
|
pad_y = (im1_shape.height - im0_shape.height * gain) / 2.0f;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
gain = ratio_pad.first;
|
||||||
|
pad_x = ratio_pad.second.x;
|
||||||
|
pad_y = ratio_pad.second.y;
|
||||||
|
}
|
||||||
|
|
||||||
|
int top = static_cast<int>(pad_y);
|
||||||
|
int left = static_cast<int>(pad_x);
|
||||||
|
int bottom = static_cast<int>(im1_shape.height - pad_y);
|
||||||
|
int right = static_cast<int>(im1_shape.width - pad_x);
|
||||||
|
|
||||||
|
// Clip and resize the mask
|
||||||
|
cv::Rect clipped_rect(left, top, right - left, bottom - top);
|
||||||
|
cv::Mat clipped_mask = resized_mask(clipped_rect);
|
||||||
|
cv::resize(clipped_mask, scaled_mask, im0_shape);
|
||||||
|
}
|
@ -0,0 +1,217 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <opencv2/opencv.hpp>
|
||||||
|
//#include <opencv2/imgcodecs.hpp>
|
||||||
|
//#include <opencv2/imgproc.hpp>
|
||||||
|
|
||||||
|
#include <opencv2/core/mat.hpp>
|
||||||
|
#include <opencv2/core/types.hpp>
|
||||||
|
#include <opencv2/core.hpp>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void clip_boxes(cv::Rect& box, const cv::Size& shape) {
|
||||||
|
box.x = std::max(0, std::min(box.x, shape.width));
|
||||||
|
box.y = std::max(0, std::min(box.y, shape.height));
|
||||||
|
box.width = std::max(0, std::min(box.width, shape.width - box.x));
|
||||||
|
box.height = std::max(0, std::min(box.height, shape.height - box.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
void clip_boxes(cv::Rect_<float>& box, const cv::Size& shape) {
|
||||||
|
box.x = std::max(0.0f, std::min(box.x, static_cast<float>(shape.width)));
|
||||||
|
box.y = std::max(0.0f, std::min(box.y, static_cast<float>(shape.height)));
|
||||||
|
box.width = std::max(0.0f, std::min(box.width, static_cast<float>(shape.width - box.x)));
|
||||||
|
box.height = std::max(0.0f, std::min(box.height, static_cast<float>(shape.height - box.y)));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void clip_boxes(std::vector<cv::Rect>& boxes, const cv::Size& shape) {
|
||||||
|
for (cv::Rect& box : boxes) {
|
||||||
|
clip_boxes(box, shape);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void clip_boxes(std::vector<cv::Rect_<float>>& boxes, const cv::Size& shape) {
|
||||||
|
for (cv::Rect_<float>& box : boxes) {
|
||||||
|
clip_boxes(box, shape);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// source: ultralytics/utils/ops.py scale_boxes lines 99+ (ultralytics==8.0.160)
|
||||||
|
cv::Rect_<float> scale_boxes(const cv::Size& img1_shape, cv::Rect_<float>& box, const cv::Size& img0_shape,
|
||||||
|
std::pair<float, cv::Point2f> ratio_pad = std::make_pair(-1.0f, cv::Point2f(-1.0f, -1.0f)), bool padding = true) {
|
||||||
|
|
||||||
|
float gain, pad_x, pad_y;
|
||||||
|
|
||||||
|
if (ratio_pad.first < 0.0f) {
|
||||||
|
gain = std::min(static_cast<float>(img1_shape.height) / static_cast<float>(img0_shape.height),
|
||||||
|
static_cast<float>(img1_shape.width) / static_cast<float>(img0_shape.width));
|
||||||
|
pad_x = roundf((img1_shape.width - img0_shape.width * gain) / 2.0f - 0.1f);
|
||||||
|
pad_y = roundf((img1_shape.height - img0_shape.height * gain) / 2.0f - 0.1f);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
gain = ratio_pad.first;
|
||||||
|
pad_x = ratio_pad.second.x;
|
||||||
|
pad_y = ratio_pad.second.y;
|
||||||
|
}
|
||||||
|
|
||||||
|
//cv::Rect scaledCoords(box);
|
||||||
|
cv::Rect_<float> scaledCoords(box);
|
||||||
|
|
||||||
|
if (padding) {
|
||||||
|
scaledCoords.x -= pad_x;
|
||||||
|
scaledCoords.y -= pad_y;
|
||||||
|
}
|
||||||
|
|
||||||
|
scaledCoords.x /= gain;
|
||||||
|
scaledCoords.y /= gain;
|
||||||
|
scaledCoords.width /= gain;
|
||||||
|
scaledCoords.height /= gain;
|
||||||
|
|
||||||
|
// Clip the box to the bounds of the image
|
||||||
|
clip_boxes(scaledCoords, img0_shape);
|
||||||
|
|
||||||
|
return scaledCoords;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//void clip_coords(cv::Mat& coords, const cv::Size& shape) {
|
||||||
|
// // Clip x-coordinates to the image width
|
||||||
|
// cv::Mat xCoords = coords.col(0);
|
||||||
|
// cv::Mat yCoords = coords.col(1);
|
||||||
|
//
|
||||||
|
// for (int i = 0; i < coords.rows; ++i) {
|
||||||
|
// xCoords.at<float>(i) = std::max(std::min(xCoords.at<float>(i), static_cast<float>(shape.width - 1)), 0.0f);
|
||||||
|
// yCoords.at<float>(i) = std::max(std::min(yCoords.at<float>(i), static_cast<float>(shape.height - 1)), 0.0f);
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
|
||||||
|
void clip_coords(std::vector<float>& coords, const cv::Size& shape) {
|
||||||
|
// Assuming coords are of shape [1, 17, 3]
|
||||||
|
for (int i = 0; i < coords.size(); i += 3) {
|
||||||
|
coords[i] = std::min(std::max(coords[i], 0.0f), static_cast<float>(shape.width - 1)); // x
|
||||||
|
coords[i + 1] = std::min(std::max(coords[i + 1], 0.0f), static_cast<float>(shape.height - 1)); // y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// source: ultralytics/utils/ops.py scale_coords lines 753+ (ultralytics==8.0.160)
|
||||||
|
//cv::Mat scale_coords(const cv::Size& img1_shape, cv::Mat& coords, const cv::Size& img0_shape)
|
||||||
|
//cv::Mat scale_coords(const cv::Size& img1_shape, std::vector<float> coords, const cv::Size& img0_shape)
|
||||||
|
std::vector<float> scale_coords(const cv::Size& img1_shape, std::vector<float>& coords, const cv::Size& img0_shape)
|
||||||
|
{
|
||||||
|
// cv::Mat scaledCoords = coords.clone();
|
||||||
|
std::vector<float> scaledCoords = coords;
|
||||||
|
|
||||||
|
// Calculate gain and padding
|
||||||
|
double gain = std::min(static_cast<double>(img1_shape.width) / img0_shape.width, static_cast<double>(img1_shape.height) / img0_shape.height);
|
||||||
|
cv::Point2d pad((img1_shape.width - img0_shape.width * gain) / 2, (img1_shape.height - img0_shape.height * gain) / 2);
|
||||||
|
|
||||||
|
// Apply padding
|
||||||
|
// scaledCoords.col(0) = (scaledCoords.col(0) - pad.x);
|
||||||
|
// scaledCoords.col(1) = (scaledCoords.col(1) - pad.y);
|
||||||
|
// Assuming coords are of shape [1, 17, 3]
|
||||||
|
for (int i = 0; i < scaledCoords.size(); i += 3) {
|
||||||
|
scaledCoords[i] -= pad.x; // x padding
|
||||||
|
scaledCoords[i + 1] -= pad.y; // y padding
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scale coordinates
|
||||||
|
// scaledCoords.col(0) /= gain;
|
||||||
|
// scaledCoords.col(1) /= gain;
|
||||||
|
// Assuming coords are of shape [1, 17, 3]
|
||||||
|
for (int i = 0; i < scaledCoords.size(); i += 3) {
|
||||||
|
scaledCoords[i] /= gain;
|
||||||
|
scaledCoords[i + 1] /= gain;
|
||||||
|
}
|
||||||
|
|
||||||
|
clip_coords(scaledCoords, img0_shape);
|
||||||
|
return scaledCoords;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cv::Mat crop_mask(const cv::Mat& mask, const cv::Rect& box) {
|
||||||
|
int h = mask.rows;
|
||||||
|
int w = mask.cols;
|
||||||
|
|
||||||
|
int x1 = box.x;
|
||||||
|
int y1 = box.y;
|
||||||
|
int x2 = box.x + box.width;
|
||||||
|
int y2 = box.y + box.height;
|
||||||
|
|
||||||
|
cv::Mat cropped_mask = cv::Mat::zeros(h, w, mask.type());
|
||||||
|
|
||||||
|
for (int r = 0; r < h; ++r) {
|
||||||
|
for (int c = 0; c < w; ++c) {
|
||||||
|
if (r >= y1 && r < y2 && c >= x1 && c < x2) {
|
||||||
|
cropped_mask.at<float>(r, c) = mask.at<float>(r, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return cropped_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
//std::tuple<std::vector<cv::Rect_<float>>, std::vector<float>, std::vector<int>, std::vector<std::vector<float>>>
|
||||||
|
std::tuple<std::vector<cv::Rect>, std::vector<float>, std::vector<int>, std::vector<std::vector<float>>>
|
||||||
|
non_max_suppression(const cv::Mat& output0, int class_names_num, int data_width, double conf_threshold,
|
||||||
|
float iou_threshold) {
|
||||||
|
|
||||||
|
std::vector<int> class_ids;
|
||||||
|
std::vector<float> confidences;
|
||||||
|
// std::vector<cv::Rect_<float>> boxes;
|
||||||
|
std::vector<cv::Rect> boxes;
|
||||||
|
std::vector<std::vector<float>> rest;
|
||||||
|
|
||||||
|
int rest_start_pos = class_names_num + 4;
|
||||||
|
int rest_features = data_width - rest_start_pos;
|
||||||
|
// int data_width = rest_start_pos + total_features_num;
|
||||||
|
|
||||||
|
int rows = output0.rows;
|
||||||
|
float* pdata = (float*) output0.data;
|
||||||
|
|
||||||
|
for (int r = 0; r < rows; ++r) {
|
||||||
|
cv::Mat scores(1, class_names_num, CV_32FC1, pdata + 4);
|
||||||
|
cv::Point class_id;
|
||||||
|
double max_conf;
|
||||||
|
minMaxLoc(scores, nullptr, &max_conf, nullptr, &class_id);
|
||||||
|
|
||||||
|
if (max_conf > conf_threshold) {
|
||||||
|
std::vector<float> mask_data(pdata + 4 + class_names_num, pdata + data_width);
|
||||||
|
class_ids.push_back(class_id.x);
|
||||||
|
confidences.push_back((float) max_conf);
|
||||||
|
|
||||||
|
float out_w = pdata[2];
|
||||||
|
float out_h = pdata[3];
|
||||||
|
float out_left = MAX((pdata[0] - 0.5 * out_w + 0.5), 0);
|
||||||
|
float out_top = MAX((pdata[1] - 0.5 * out_h + 0.5), 0);
|
||||||
|
cv::Rect_<float> bbox(out_left, out_top, (out_w + 0.5), (out_h + 0.5));
|
||||||
|
boxes.push_back(bbox);
|
||||||
|
if (rest_features > 0) {
|
||||||
|
std::vector<float> rest_data(pdata + rest_start_pos, pdata + data_width);
|
||||||
|
rest.push_back(rest_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pdata += data_width; // next prediction
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
//float masks_threshold = 0.50;
|
||||||
|
//int top_k = 500;
|
||||||
|
//const float& nmsde_eta = 1.0f;
|
||||||
|
std::vector<int> nms_result;
|
||||||
|
cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, iou_threshold, nms_result); // , nms_eta, top_k);
|
||||||
|
// cv::dnn::NMSBoxes(boxes, confidences, );
|
||||||
|
std::vector<int> nms_class_ids;
|
||||||
|
std::vector<float> nms_confidences;
|
||||||
|
// std::vector<cv::Rect_<float>> boxes;
|
||||||
|
std::vector<cv::Rect> nms_boxes;
|
||||||
|
std::vector<std::vector<float>> nms_rest;
|
||||||
|
for (int idx: nms_result) {
|
||||||
|
nms_class_ids.push_back(class_ids[idx]);
|
||||||
|
nms_confidences.push_back(confidences[idx]);
|
||||||
|
nms_boxes.push_back(boxes[idx]);
|
||||||
|
nms_rest.push_back(rest[idx]);
|
||||||
|
}
|
||||||
|
return std::make_tuple(nms_boxes, nms_confidences, nms_class_ids, nms_rest);
|
||||||
|
}
|
Loading…
Reference in new issue