init commit

2024-07-13 13:15:28 +00:00
commit 157639e9e3
14 changed files with 1903 additions and 0 deletions
--- a/include/nn/autobackend.h
+++ b/include/nn/autobackend.h
@ -0,0 +1,90 @@
+#pragma once
+#include <filesystem>
+#include <vector>
+#include <unordered_map>
+#include <opencv2/core/mat.hpp>
+
+#include "onnx_model_base.h"
+#include "constants.h"
+
+/**
+ * @brief Represents the results of YOLO prediction.
+ *
+ * This structure stores information about a detected object, including its class index,
+ * confidence score, bounding box, semantic segmentation mask, and keypoints (if available).
+ */
+struct YoloResults {
+    int class_idx{};                  ///< The class index of the detected object.
+    float conf{};                     ///< The confidence score of the detection.
+    cv::Rect_<float> bbox;            ///< The bounding box of the detected object.
+    cv::Mat mask;                     ///< The semantic segmentation mask (if available).
+    std::vector<float> keypoints{};   ///< Keypoints representing the object's pose (if available).
+};
+
+struct ImageInfo {
+    cv::Size raw_size;  // add additional attrs if you need
+};
+
+
+class AutoBackendOnnx : public OnnxModelBase {
+public:
+    // constructors
+    AutoBackendOnnx(const char* modelPath, const char* logid, const char* provider,
+        const std::vector<int>& imgsz, const int& stride,
+        const int& nc, std::unordered_map<int, std::string> names);
+
+    AutoBackendOnnx(const char* modelPath, const char* logid, const char* provider);
+
+    // getters
+    virtual const std::vector<int>& getImgsz();
+    virtual const int& getStride();
+    virtual const int& getCh();
+    virtual const int& getNc();
+    virtual const std::unordered_map<int, std::string>& getNames();
+    virtual const std::vector<int64_t>& getInputTensorShape();
+    virtual const int& getWidth();
+    virtual const int& getHeight();
+    virtual const cv::Size& getCvSize();
+    virtual const std::string& getTask();
+    /**
+     * @brief Runs object detection on an input image.
+     *
+     * This method performs object detection on the input image and returns the detected objects as YoloResults.
+     *
+     * @param image The input image to run object detection on.
+     * @param conf The confidence threshold for object detection.
+     * @param iou The intersection-over-union (IoU) threshold for non-maximum suppression.
+     * @param mask_threshold The threshold for the semantic segmentation mask.
+     * @param conversionCode An optional conversion code for image format conversion (e.g., cv::COLOR_BGR2RGB).
+     *                      Default value is -1, indicating no conversion.
+     *                      TODO: use some constant from some namespace rather than hardcoded values here
+     *
+     * @return A vector of YoloResults representing the detected objects.
+     */
+    virtual std::vector<YoloResults> predict_once(cv::Mat& image, float& conf, float& iou, float& mask_threshold, int conversionCode = -1, bool verbose = true);
+    virtual std::vector<YoloResults> predict_once(const std::filesystem::path& imagePath, float& conf, float& iou, float& mask_threshold, int conversionCode = -1, bool verbose = true);
+    virtual std::vector<YoloResults> predict_once(const std::string& imagePath, float& conf, float& iou, float& mask_threshold, int conversionCode = -1, bool verbose = true);
+
+    virtual void fill_blob(cv::Mat& image, float*& blob, std::vector<int64_t>& inputTensorShape);
+    virtual void postprocess_masks(cv::Mat& output0, cv::Mat& output1, ImageInfo para, std::vector<YoloResults>& output,
+        int& class_names_num, float& conf_threshold, float& iou_threshold,
+        int& iw, int& ih, int& mw, int& mh, int& masks_features_num, float mask_threshold = 0.50f);
+
+    virtual void postprocess_detects(cv::Mat& output0, ImageInfo image_info, std::vector<YoloResults>& output,
+        int& class_names_num, float& conf_threshold, float& iou_threshold);
+    virtual void postprocess_kpts(cv::Mat& output0, ImageInfo& image_info, std::vector<YoloResults>& output,
+                                  int& class_names_num, float& conf_threshold, float& iou_threshold);
+    static void _get_mask2(const cv::Mat& mask_info, const cv::Mat& mask_data, const ImageInfo& image_info, cv::Rect bound, cv::Mat& mask_out,
+        float& mask_thresh, int& iw, int& ih, int& mw, int& mh, int& masks_features_num, bool round_downsampled = false);
+
+protected:
+    std::vector<int> imgsz_;
+    int stride_ = OnnxInitializers::UNINITIALIZED_STRIDE;
+    int nc_ = OnnxInitializers::UNINITIALIZED_NC; //
+    int ch_ = 3;
+    std::unordered_map<int, std::string> names_;
+    std::vector<int64_t> inputTensorShape_;
+    cv::Size cvSize_;
+    std::string task_;
+    //cv::MatSize cvMatSize_;
+};
--- a/include/nn/onnx_model_base.h
+++ b/include/nn/onnx_model_base.h
@ -0,0 +1,39 @@
+#pragma once
+#include <onnxruntime_cxx_api.h>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+/*
+ * This interface must provide only required arguments to load any onnx model regarding specific info -
+ *  - i.e. modelPath will always be required, provider like "cpu" or "cuda" the same, since these are parameters you need
+ *  to set up `sessionOptions` or `session` objects properly, but image size is not needed for pure onnx graph to be loaded so do NOT include it here
+ */
+class OnnxModelBase {
+public:
+    OnnxModelBase(const char* modelPath, const char* logid, const char* provider);
+    //OnnxModelBase();  // no default constructor should be there
+    //virtual ~OnnxModelBase();
+    virtual const std::vector<std::string>& getInputNames(); // = 0
+    virtual const std::vector<std::string>& getOutputNames();
+    virtual const std::vector<const char*> getOutputNamesCStr();
+    virtual const std::vector<const char*> getInputNamesCStr();
+    virtual const Ort::ModelMetadata& getModelMetadata();
+    virtual const std::unordered_map<std::string, std::string>& getMetadata();
+    virtual const char* getModelPath();
+    virtual const Ort::Session& getSession();
+    //virtual std::vector<Ort::Value> forward(std::vector<Ort::Value> inputTensors);
+    virtual std::vector<Ort::Value> forward(std::vector<Ort::Value>& inputTensors);
+    Ort::Session session{ nullptr };
+
+protected:
+    const char* modelPath_;
+    Ort::Env env{ nullptr };
+
+    std::vector<std::string> inputNodeNames;
+    std::vector<std::string> outputNodeNames;
+    Ort::ModelMetadata model_metadata{ nullptr };
+    std::unordered_map<std::string, std::string> metadata;
+    std::vector<const char*> outputNamesCStr;
+    std::vector<const char*> inputNamesCStr;
+};