init commit

This commit is contained in:
2024-07-13 13:15:28 +00:00
commit 157639e9e3
14 changed files with 1903 additions and 0 deletions

34
include/constants.h Normal file
View File

@ -0,0 +1,34 @@
#pragma once
#include <string>
namespace MetadataConstants {
inline const std::string IMGSZ = "imgsz";
inline const std::string STRIDE = "stride";
inline const std::string NC = "nc";
inline const std::string CH = "ch";
inline const std::string DATE = "date";
inline const std::string VERSION = "version";
inline const std::string TASK = "task";
inline const std::string BATCH = "batch";
inline const std::string NAMES = "names";
}
namespace OnnxProviders {
inline const std::string CPU = "cpu";
inline const std::string CUDA = "cuda";
}
namespace OnnxInitializers
{
inline const int UNINITIALIZED_STRIDE = -1;
inline const int UNINITIALIZED_NC = -1;
}
namespace YoloTasks
{
inline const std::string SEGMENT = "segment";
inline const std::string DETECT = "detect";
inline const std::string POSE = "pose";
inline const std::string CLASSIFY = "classify";
}

90
include/nn/autobackend.h Normal file
View File

@ -0,0 +1,90 @@
#pragma once
#include <filesystem>
#include <vector>
#include <unordered_map>
#include <opencv2/core/mat.hpp>
#include "onnx_model_base.h"
#include "constants.h"
/**
* @brief Represents the results of YOLO prediction.
*
* This structure stores information about a detected object, including its class index,
* confidence score, bounding box, semantic segmentation mask, and keypoints (if available).
*/
struct YoloResults {
int class_idx{}; ///< The class index of the detected object.
float conf{}; ///< The confidence score of the detection.
cv::Rect_<float> bbox; ///< The bounding box of the detected object.
cv::Mat mask; ///< The semantic segmentation mask (if available).
std::vector<float> keypoints{}; ///< Keypoints representing the object's pose (if available).
};
struct ImageInfo {
cv::Size raw_size; // add additional attrs if you need
};
class AutoBackendOnnx : public OnnxModelBase {
public:
// constructors
AutoBackendOnnx(const char* modelPath, const char* logid, const char* provider,
const std::vector<int>& imgsz, const int& stride,
const int& nc, std::unordered_map<int, std::string> names);
AutoBackendOnnx(const char* modelPath, const char* logid, const char* provider);
// getters
virtual const std::vector<int>& getImgsz();
virtual const int& getStride();
virtual const int& getCh();
virtual const int& getNc();
virtual const std::unordered_map<int, std::string>& getNames();
virtual const std::vector<int64_t>& getInputTensorShape();
virtual const int& getWidth();
virtual const int& getHeight();
virtual const cv::Size& getCvSize();
virtual const std::string& getTask();
/**
* @brief Runs object detection on an input image.
*
* This method performs object detection on the input image and returns the detected objects as YoloResults.
*
* @param image The input image to run object detection on.
* @param conf The confidence threshold for object detection.
* @param iou The intersection-over-union (IoU) threshold for non-maximum suppression.
* @param mask_threshold The threshold for the semantic segmentation mask.
* @param conversionCode An optional conversion code for image format conversion (e.g., cv::COLOR_BGR2RGB).
* Default value is -1, indicating no conversion.
* TODO: use some constant from some namespace rather than hardcoded values here
*
* @return A vector of YoloResults representing the detected objects.
*/
virtual std::vector<YoloResults> predict_once(cv::Mat& image, float& conf, float& iou, float& mask_threshold, int conversionCode = -1, bool verbose = true);
virtual std::vector<YoloResults> predict_once(const std::filesystem::path& imagePath, float& conf, float& iou, float& mask_threshold, int conversionCode = -1, bool verbose = true);
virtual std::vector<YoloResults> predict_once(const std::string& imagePath, float& conf, float& iou, float& mask_threshold, int conversionCode = -1, bool verbose = true);
virtual void fill_blob(cv::Mat& image, float*& blob, std::vector<int64_t>& inputTensorShape);
virtual void postprocess_masks(cv::Mat& output0, cv::Mat& output1, ImageInfo para, std::vector<YoloResults>& output,
int& class_names_num, float& conf_threshold, float& iou_threshold,
int& iw, int& ih, int& mw, int& mh, int& masks_features_num, float mask_threshold = 0.50f);
virtual void postprocess_detects(cv::Mat& output0, ImageInfo image_info, std::vector<YoloResults>& output,
int& class_names_num, float& conf_threshold, float& iou_threshold);
virtual void postprocess_kpts(cv::Mat& output0, ImageInfo& image_info, std::vector<YoloResults>& output,
int& class_names_num, float& conf_threshold, float& iou_threshold);
static void _get_mask2(const cv::Mat& mask_info, const cv::Mat& mask_data, const ImageInfo& image_info, cv::Rect bound, cv::Mat& mask_out,
float& mask_thresh, int& iw, int& ih, int& mw, int& mh, int& masks_features_num, bool round_downsampled = false);
protected:
std::vector<int> imgsz_;
int stride_ = OnnxInitializers::UNINITIALIZED_STRIDE;
int nc_ = OnnxInitializers::UNINITIALIZED_NC; //
int ch_ = 3;
std::unordered_map<int, std::string> names_;
std::vector<int64_t> inputTensorShape_;
cv::Size cvSize_;
std::string task_;
//cv::MatSize cvMatSize_;
};

View File

@ -0,0 +1,39 @@
#pragma once
#include <onnxruntime_cxx_api.h>
#include <string>
#include <unordered_map>
#include <vector>
/*
* This interface must provide only required arguments to load any onnx model regarding specific info -
* - i.e. modelPath will always be required, provider like "cpu" or "cuda" the same, since these are parameters you need
* to set up `sessionOptions` or `session` objects properly, but image size is not needed for pure onnx graph to be loaded so do NOT include it here
*/
class OnnxModelBase {
public:
OnnxModelBase(const char* modelPath, const char* logid, const char* provider);
//OnnxModelBase(); // no default constructor should be there
//virtual ~OnnxModelBase();
virtual const std::vector<std::string>& getInputNames(); // = 0
virtual const std::vector<std::string>& getOutputNames();
virtual const std::vector<const char*> getOutputNamesCStr();
virtual const std::vector<const char*> getInputNamesCStr();
virtual const Ort::ModelMetadata& getModelMetadata();
virtual const std::unordered_map<std::string, std::string>& getMetadata();
virtual const char* getModelPath();
virtual const Ort::Session& getSession();
//virtual std::vector<Ort::Value> forward(std::vector<Ort::Value> inputTensors);
virtual std::vector<Ort::Value> forward(std::vector<Ort::Value>& inputTensors);
Ort::Session session{ nullptr };
protected:
const char* modelPath_;
Ort::Env env{ nullptr };
std::vector<std::string> inputNodeNames;
std::vector<std::string> outputNodeNames;
Ort::ModelMetadata model_metadata{ nullptr };
std::unordered_map<std::string, std::string> metadata;
std::vector<const char*> outputNamesCStr;
std::vector<const char*> inputNamesCStr;
};

20
include/utils/augment.h Normal file
View File

@ -0,0 +1,20 @@
#pragma once
#include <opencv2/core/types.hpp>
void letterbox(const cv::Mat& image,
cv::Mat& outImage,
const cv::Size& newShape = cv::Size(640, 640),
cv::Scalar_<double> color = cv::Scalar(), bool auto_ = true,
bool scaleFill = false,
bool scaleUp = true,
int stride = 32
);
cv::Mat scale_image(const cv::Mat& resized_mask, const cv::Size& im0_shape, const std::pair<float,
cv::Point2f>& ratio_pad = std::make_pair(-1.0f, cv::Point2f(-1.0f, -1.0f)));
void scale_image2(
cv::Mat& scaled_mask, const cv::Mat& resized_mask, const cv::Size& im0_shape,
const std::pair<float, cv::Point2f>& ratio_pad = std::make_pair(-1.0f, cv::Point2f(-1.0f, -1.0f))
);

26
include/utils/common.h Normal file
View File

@ -0,0 +1,26 @@
#ifndef COMMON_UTILS_H
#define COMMON_UTILS_H
#include <chrono>
#include <string>
#include <unordered_map>
#include <vector>
class Timer {
public:
Timer(double& accumulator, bool isEnabled = true);
void Stop();
private:
double& accumulator;
bool isEnabled;
std::chrono::time_point<std::chrono::high_resolution_clock> start;
};
std::wstring get_win_path(const std::string& path);
std::vector<std::string> parseVectorString(const std::string& input);
std::vector<int> convertStringVectorToInts(const std::vector<std::string>& input);
std::unordered_map<int, std::string> parseNames(const std::string& input);
int64_t vector_product(const std::vector<int64_t>& vec);
#endif // COMMON_H COMMON_UTILS_H

44
include/utils/ops.h Normal file
View File

@ -0,0 +1,44 @@
#pragma once
#include <opencv2/core/types.hpp>
//cv::Rect scaleCoords(const cv::Size& imageShape, const cv::Rect& coords, const cv::Size& imageOriginalShape);
/**
* Scales a bounding box from the shape of the input image to the shape of an original image.
*
* @param img1_shape The shape (height, width) of the input image for the model.
* @param box The bounding box to be scaled, specified as cv::Rect_<float>.
* @param img0_shape The shape (height, width) of the original target image.
* @param ratio_pad An optional parameter that specifies scaling and padding factors as a pair of values.
* The first value (ratio) is used for scaling, and the second value (pad) is used for padding.
* If not provided, default values will be used.
* @param padding An optional boolean parameter that specifies whether padding should be applied.
* If set to true, padding will be applied to the bounding box.
*
* @return A scaled bounding box specified as cv::Rect_<float>.
*
* This function rescales a bounding box from the shape of the input image (img1_shape) to the shape of an original image (img0_shape).
*/
cv::Rect_<float> scale_boxes(const cv::Size& img1_shape, cv::Rect_<float>& box, const cv::Size& img0_shape, std::pair<float, cv::Point2f> ratio_pad = std::make_pair(-1.0f, cv::Point2f(-1.0f, -1.0f)), bool padding = true);
void clip_boxes(cv::Rect& box, const cv::Size& shape);
void clip_boxes(cv::Rect_<float>& box, const cv::Size& shape);
void clip_boxes(std::vector<cv::Rect>& boxes, const cv::Size& shape);
void clip_boxes(std::vector<cv::Rect_<float>>& boxes, const cv::Size& shape);
//void clip_coords(cv::Mat& coords, const cv::Size& shape);
//cv::Mat scale_coords(const cv::Size& img1_shape, cv::Mat& coords, const cv::Size& img0_shape);
void clip_coords(std::vector<float>& coords, const cv::Size& shape);
std::vector<float> scale_coords(const cv::Size& img1_shape, std::vector<float>& coords, const cv::Size& img0_shape);
cv::Mat crop_mask(const cv::Mat& mask, const cv::Rect& box);
struct NMSResult{
std::vector<cv::Rect> bboxes;
std::vector<float> confidences;
std::vector<int> classes;
std::vector<std::vector<float>> rest;
};
//std::tuple<std::vector<cv::Rect_<float>>, std::vector<float>, std::vector<int>, std::vector<std::vector<float>>>
std::tuple<std::vector<cv::Rect>, std::vector<float>, std::vector<int>, std::vector<std::vector<float>>>
non_max_suppression(const cv::Mat& output0, int class_names_num, int total_features_num, double conf_threshold, float iou_threshold);