@ -3,39 +3,35 @@
# define benchmark
# define benchmark
DCSP_CORE : : DCSP_CORE ( )
DCSP_CORE : : DCSP_CORE ( ) {
{
}
}
DCSP_CORE : : ~ DCSP_CORE ( )
DCSP_CORE : : ~ DCSP_CORE ( ) {
{
delete session ;
delete session ;
}
}
# ifdef USE_CUDA
namespace Ort
namespace Ort
{
{
template < >
template < >
struct TypeToTensorType < half > { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16 ; } ;
struct TypeToTensorType < half > { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16 ; } ;
}
}
# endif
template < typename T >
template < typename T >
char * BlobFromImage ( cv : : Mat & iImg , T & iBlob )
char * BlobFromImage ( cv : : Mat & iImg , T & iBlob ) {
{
int channels = iImg . channels ( ) ;
int channels = iImg . channels ( ) ;
int imgHeight = iImg . rows ;
int imgHeight = iImg . rows ;
int imgWidth = iImg . cols ;
int imgWidth = iImg . cols ;
for ( int c = 0 ; c < channels ; c + + )
for ( int c = 0 ; c < channels ; c + + ) {
{
for ( int h = 0 ; h < imgHeight ; h + + ) {
for ( int h = 0 ; h < imgHeight ; h + + )
for ( int w = 0 ; w < imgWidth ; w + + ) {
{
iBlob [ c * imgWidth * imgHeight + h * imgWidth + w ] = typename std : : remove_pointer < T > : : type (
for ( int w = 0 ; w < imgWidth ; w + + )
( iImg . at < cv : : Vec3b > ( h , w ) [ c ] ) / 255.0f ) ;
{
iBlob [ c * imgWidth * imgHeight + h * imgWidth + w ] = typename std : : remove_pointer < T > : : type ( ( iImg . at < cv : : Vec3b > ( h , w ) [ c ] ) / 255.0f ) ;
}
}
}
}
}
}
@ -43,12 +39,10 @@ char* BlobFromImage(cv::Mat& iImg, T& iBlob)
}
}
char * PostProcess ( cv : : Mat & iImg , std : : vector < int > iImgSize , cv : : Mat & oImg )
char * PostProcess ( cv : : Mat & iImg , std : : vector < int > iImgSize , cv : : Mat & oImg ) {
{
cv : : Mat img = iImg . clone ( ) ;
cv : : Mat img = iImg . clone ( ) ;
cv : : resize ( iImg , oImg , cv : : Size ( iImgSize . at ( 0 ) , iImgSize . at ( 1 ) ) ) ;
cv : : resize ( iImg , oImg , cv : : Size ( iImgSize . at ( 0 ) , iImgSize . at ( 1 ) ) ) ;
if ( img . channels ( ) = = 1 )
if ( img . channels ( ) = = 1 ) {
{
cv : : cvtColor ( oImg , oImg , cv : : COLOR_GRAY2BGR ) ;
cv : : cvtColor ( oImg , oImg , cv : : COLOR_GRAY2BGR ) ;
}
}
cv : : cvtColor ( oImg , oImg , cv : : COLOR_BGR2RGB ) ;
cv : : cvtColor ( oImg , oImg , cv : : COLOR_BGR2RGB ) ;
@ -56,27 +50,23 @@ char* PostProcess(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oImg)
}
}
char * DCSP_CORE : : CreateSession ( DCSP_INIT_PARAM & iParams )
char * DCSP_CORE : : CreateSession ( DCSP_INIT_PARAM & iParams ) {
{
char * Ret = RET_OK ;
char * Ret = RET_OK ;
std : : regex pattern ( " [ \u4e00 - \u9fa5 ] " ) ;
std : : regex pattern ( " [ \u4e00 - \u9fa5 ] " ) ;
bool result = std : : regex_search ( iParams . ModelPath , pattern ) ;
bool result = std : : regex_search ( iParams . ModelPath , pattern ) ;
if ( result )
if ( result ) {
{
Ret = " [DCSP_ONNX]:Model path error.Change your model path without chinese characters. " ;
Ret = " [DCSP_ONNX]:Model path error.Change your model path without chinese characters. " ;
std : : cout < < Ret < < std : : endl ;
std : : cout < < Ret < < std : : endl ;
return Ret ;
return Ret ;
}
}
try
try {
{
rectConfidenceThreshold = iParams . RectConfidenceThreshold ;
rectConfidenceThreshold = iParams . RectConfidenceThreshold ;
iouThreshold = iParams . iouThreshold ;
iouThreshold = iParams . iouThreshold ;
imgSize = iParams . imgSize ;
imgSize = iParams . imgSize ;
modelType = iParams . ModelType ;
modelType = iParams . ModelType ;
env = Ort : : Env ( ORT_LOGGING_LEVEL_WARNING , " Yolo " ) ;
env = Ort : : Env ( ORT_LOGGING_LEVEL_WARNING , " Yolo " ) ;
Ort : : SessionOptions sessionOption ;
Ort : : SessionOptions sessionOption ;
if ( iParams . CudaEnable )
if ( iParams . CudaEnable ) {
{
cudaEnable = iParams . CudaEnable ;
cudaEnable = iParams . CudaEnable ;
OrtCUDAProviderOptions cudaOption ;
OrtCUDAProviderOptions cudaOption ;
cudaOption . device_id = 0 ;
cudaOption . device_id = 0 ;
@ -99,16 +89,14 @@ char* DCSP_CORE::CreateSession(DCSP_INIT_PARAM &iParams)
session = new Ort : : Session ( env , modelPath , sessionOption ) ;
session = new Ort : : Session ( env , modelPath , sessionOption ) ;
Ort : : AllocatorWithDefaultOptions allocator ;
Ort : : AllocatorWithDefaultOptions allocator ;
size_t inputNodesNum = session - > GetInputCount ( ) ;
size_t inputNodesNum = session - > GetInputCount ( ) ;
for ( size_t i = 0 ; i < inputNodesNum ; i + + )
for ( size_t i = 0 ; i < inputNodesNum ; i + + ) {
{
Ort : : AllocatedStringPtr input_node_name = session - > GetInputNameAllocated ( i , allocator ) ;
Ort : : AllocatedStringPtr input_node_name = session - > GetInputNameAllocated ( i , allocator ) ;
char * temp_buf = new char [ 50 ] ;
char * temp_buf = new char [ 50 ] ;
strcpy ( temp_buf , input_node_name . get ( ) ) ;
strcpy ( temp_buf , input_node_name . get ( ) ) ;
inputNodeNames . push_back ( temp_buf ) ;
inputNodeNames . push_back ( temp_buf ) ;
}
}
size_t OutputNodesNum = session - > GetOutputCount ( ) ;
size_t OutputNodesNum = session - > GetOutputCount ( ) ;
for ( size_t i = 0 ; i < OutputNodesNum ; i + + )
for ( size_t i = 0 ; i < OutputNodesNum ; i + + ) {
{
Ort : : AllocatedStringPtr output_node_name = session - > GetOutputNameAllocated ( i , allocator ) ;
Ort : : AllocatedStringPtr output_node_name = session - > GetOutputNameAllocated ( i , allocator ) ;
char * temp_buf = new char [ 10 ] ;
char * temp_buf = new char [ 10 ] ;
strcpy ( temp_buf , output_node_name . get ( ) ) ;
strcpy ( temp_buf , output_node_name . get ( ) ) ;
@ -118,8 +106,7 @@ char* DCSP_CORE::CreateSession(DCSP_INIT_PARAM &iParams)
WarmUpSession ( ) ;
WarmUpSession ( ) ;
return RET_OK ;
return RET_OK ;
}
}
catch ( const std : : exception & e )
catch ( const std : : exception & e ) {
{
const char * str1 = " [DCSP_ONNX]: " ;
const char * str1 = " [DCSP_ONNX]: " ;
const char * str2 = e . what ( ) ;
const char * str2 = e . what ( ) ;
std : : string result = std : : string ( str1 ) + std : : string ( str2 ) ;
std : : string result = std : : string ( str1 ) + std : : string ( str2 ) ;
@ -133,8 +120,7 @@ char* DCSP_CORE::CreateSession(DCSP_INIT_PARAM &iParams)
}
}
char * DCSP_CORE : : RunSession ( cv : : Mat & iImg , std : : vector < DCSP_RESULT > & oResult )
char * DCSP_CORE : : RunSession ( cv : : Mat & iImg , std : : vector < DCSP_RESULT > & oResult ) {
{
# ifdef benchmark
# ifdef benchmark
clock_t starttime_1 = clock ( ) ;
clock_t starttime_1 = clock ( ) ;
# endif // benchmark
# endif // benchmark
@ -142,19 +128,18 @@ char* DCSP_CORE::RunSession(cv::Mat &iImg, std::vector<DCSP_RESULT>& oResult)
char * Ret = RET_OK ;
char * Ret = RET_OK ;
cv : : Mat processedImg ;
cv : : Mat processedImg ;
PostProcess ( iImg , imgSize , processedImg ) ;
PostProcess ( iImg , imgSize , processedImg ) ;
if ( modelType < 4 )
if ( modelType < 4 ) {
{
float * blob = new float [ processedImg . total ( ) * 3 ] ;
float * blob = new float [ processedImg . total ( ) * 3 ] ;
BlobFromImage ( processedImg , blob ) ;
BlobFromImage ( processedImg , blob ) ;
std : : vector < int64_t > inputNodeDims = { 1 , 3 , imgSize . at ( 0 ) , imgSize . at ( 1 ) } ;
std : : vector < int64_t > inputNodeDims = { 1 , 3 , imgSize . at ( 0 ) , imgSize . at ( 1 ) } ;
TensorProcess ( starttime_1 , iImg , blob , inputNodeDims , oResult ) ;
TensorProcess ( starttime_1 , iImg , blob , inputNodeDims , oResult ) ;
}
} else {
else
# ifdef USE_CUDA
{
half * blob = new half [ processedImg . total ( ) * 3 ] ;
half * blob = new half [ processedImg . total ( ) * 3 ] ;
BlobFromImage ( processedImg , blob ) ;
BlobFromImage ( processedImg , blob ) ;
std : : vector < int64_t > inputNodeDims = { 1 , 3 , imgSize . at ( 0 ) , imgSize . at ( 1 ) } ;
std : : vector < int64_t > inputNodeDims = { 1 , 3 , imgSize . at ( 0 ) , imgSize . at ( 1 ) } ;
TensorProcess ( starttime_1 , iImg , blob , inputNodeDims , oResult ) ;
TensorProcess ( starttime_1 , iImg , blob , inputNodeDims , oResult ) ;
# endif
}
}
return Ret ;
return Ret ;
@ -162,13 +147,16 @@ char* DCSP_CORE::RunSession(cv::Mat &iImg, std::vector<DCSP_RESULT>& oResult)
template < typename N >
template < typename N >
char * DCSP_CORE : : TensorProcess ( clock_t & starttime_1 , cv : : Mat & iImg , N & blob , std : : vector < int64_t > & inputNodeDims , std : : vector < DCSP_RESULT > & oResult )
char * DCSP_CORE : : TensorProcess ( clock_t & starttime_1 , cv : : Mat & iImg , N & blob , std : : vector < int64_t > & inputNodeDims ,
{
std : : vector < DCSP_RESULT > & oResult ) {
Ort : : Value inputTensor = Ort : : Value : : CreateTensor < typename std : : remove_pointer < N > : : type > ( Ort : : MemoryInfo : : CreateCpu ( OrtDeviceAllocator , OrtMemTypeCPU ) , blob , 3 * imgSize . at ( 0 ) * imgSize . at ( 1 ) , inputNodeDims . data ( ) , inputNodeDims . size ( ) ) ;
Ort : : Value inputTensor = Ort : : Value : : CreateTensor < typename std : : remove_pointer < N > : : type > (
Ort : : MemoryInfo : : CreateCpu ( OrtDeviceAllocator , OrtMemTypeCPU ) , blob , 3 * imgSize . at ( 0 ) * imgSize . at ( 1 ) ,
inputNodeDims . data ( ) , inputNodeDims . size ( ) ) ;
# ifdef benchmark
# ifdef benchmark
clock_t starttime_2 = clock ( ) ;
clock_t starttime_2 = clock ( ) ;
# endif // benchmark
# endif // benchmark
auto outputTensor = session - > Run ( options , inputNodeNames . data ( ) , & inputTensor , 1 , outputNodeNames . data ( ) , outputNodeNames . size ( ) ) ;
auto outputTensor = session - > Run ( options , inputNodeNames . data ( ) , & inputTensor , 1 , outputNodeNames . data ( ) ,
outputNodeNames . size ( ) ) ;
# ifdef benchmark
# ifdef benchmark
clock_t starttime_3 = clock ( ) ;
clock_t starttime_3 = clock ( ) ;
# endif // benchmark
# endif // benchmark
@ -178,8 +166,7 @@ char* DCSP_CORE::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std
std : : vector < int64_t > outputNodeDims = tensor_info . GetShape ( ) ;
std : : vector < int64_t > outputNodeDims = tensor_info . GetShape ( ) ;
auto output = outputTensor . front ( ) . GetTensorMutableData < typename std : : remove_pointer < N > : : type > ( ) ;
auto output = outputTensor . front ( ) . GetTensorMutableData < typename std : : remove_pointer < N > : : type > ( ) ;
delete blob ;
delete blob ;
switch ( modelType )
switch ( modelType ) {
{
case 1 : //V8_ORIGIN_FP32
case 1 : //V8_ORIGIN_FP32
case 4 : //V8_ORIGIN_FP16
case 4 : //V8_ORIGIN_FP16
{
{
@ -195,15 +182,13 @@ char* DCSP_CORE::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std
float x_factor = iImg . cols / 640. ;
float x_factor = iImg . cols / 640. ;
float y_factor = iImg . rows / 640. ;
float y_factor = iImg . rows / 640. ;
for ( int i = 0 ; i < strideNum ; + + i )
for ( int i = 0 ; i < strideNum ; + + i ) {
{
float * classesScores = data + 4 ;
float * classesScores = data + 4 ;
cv : : Mat scores ( 1 , this - > classes . size ( ) , CV_32FC1 , classesScores ) ;
cv : : Mat scores ( 1 , this - > classes . size ( ) , CV_32FC1 , classesScores ) ;
cv : : Point class_id ;
cv : : Point class_id ;
double maxClassScore ;
double maxClassScore ;
cv : : minMaxLoc ( scores , 0 , & maxClassScore , 0 , & class_id ) ;
cv : : minMaxLoc ( scores , 0 , & maxClassScore , 0 , & class_id ) ;
if ( maxClassScore > rectConfidenceThreshold )
if ( maxClassScore > rectConfidenceThreshold ) {
{
confidences . push_back ( maxClassScore ) ;
confidences . push_back ( maxClassScore ) ;
class_ids . push_back ( class_id . x ) ;
class_ids . push_back ( class_id . x ) ;
@ -226,8 +211,7 @@ char* DCSP_CORE::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std
std : : vector < int > nmsResult ;
std : : vector < int > nmsResult ;
cv : : dnn : : NMSBoxes ( boxes , confidences , rectConfidenceThreshold , iouThreshold , nmsResult ) ;
cv : : dnn : : NMSBoxes ( boxes , confidences , rectConfidenceThreshold , iouThreshold , nmsResult ) ;
for ( int i = 0 ; i < nmsResult . size ( ) ; + + i )
for ( int i = 0 ; i < nmsResult . size ( ) ; + + i ) {
{
int idx = nmsResult [ i ] ;
int idx = nmsResult [ i ] ;
DCSP_RESULT result ;
DCSP_RESULT result ;
result . classId = class_ids [ idx ] ;
result . classId = class_ids [ idx ] ;
@ -242,13 +226,12 @@ char* DCSP_CORE::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std
double pre_process_time = ( double ) ( starttime_2 - starttime_1 ) / CLOCKS_PER_SEC * 1000 ;
double pre_process_time = ( double ) ( starttime_2 - starttime_1 ) / CLOCKS_PER_SEC * 1000 ;
double process_time = ( double ) ( starttime_3 - starttime_2 ) / CLOCKS_PER_SEC * 1000 ;
double process_time = ( double ) ( starttime_3 - starttime_2 ) / CLOCKS_PER_SEC * 1000 ;
double post_process_time = ( double ) ( starttime_4 - starttime_3 ) / CLOCKS_PER_SEC * 1000 ;
double post_process_time = ( double ) ( starttime_4 - starttime_3 ) / CLOCKS_PER_SEC * 1000 ;
if ( cudaEnable )
if ( cudaEnable ) {
{
std : : cout < < " [DCSP_ONNX(CUDA)]: " < < pre_process_time < < " ms pre-process, " < < process_time
std : : cout < < " [DCSP_ONNX(CUDA)]: " < < pre_process_time < < " ms pre-process, " < < process_time < < " ms inference, " < < post_process_time < < " ms post-process. " < < std : : endl ;
< < " ms inference, " < < post_process_time < < " ms post-process. " < < std : : endl ;
}
} else {
else
std : : cout < < " [DCSP_ONNX(CPU)]: " < < pre_process_time < < " ms pre-process, " < < process_time
{
< < " ms inference, " < < post_process_time < < " ms post-process. " < < std : : endl ;
std : : cout < < " [DCSP_ONNX(CPU)]: " < < pre_process_time < < " ms pre-process, " < < process_time < < " ms inference, " < < post_process_time < < " ms post-process. " < < std : : endl ;
}
}
# endif // benchmark
# endif // benchmark
@ -259,29 +242,28 @@ char* DCSP_CORE::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std
}
}
char * DCSP_CORE : : WarmUpSession ( )
char * DCSP_CORE : : WarmUpSession ( ) {
{
clock_t starttime_1 = clock ( ) ;
clock_t starttime_1 = clock ( ) ;
cv : : Mat iImg = cv : : Mat ( cv : : Size ( imgSize . at ( 0 ) , imgSize . at ( 1 ) ) , CV_8UC3 ) ;
cv : : Mat iImg = cv : : Mat ( cv : : Size ( imgSize . at ( 0 ) , imgSize . at ( 1 ) ) , CV_8UC3 ) ;
cv : : Mat processedImg ;
cv : : Mat processedImg ;
PostProcess ( iImg , imgSize , processedImg ) ;
PostProcess ( iImg , imgSize , processedImg ) ;
if ( modelType < 4 )
if ( modelType < 4 ) {
{
float * blob = new float [ iImg . total ( ) * 3 ] ;
float * blob = new float [ iImg . total ( ) * 3 ] ;
BlobFromImage ( processedImg , blob ) ;
BlobFromImage ( processedImg , blob ) ;
std : : vector < int64_t > YOLO_input_node_dims = { 1 , 3 , imgSize . at ( 0 ) , imgSize . at ( 1 ) } ;
std : : vector < int64_t > YOLO_input_node_dims = { 1 , 3 , imgSize . at ( 0 ) , imgSize . at ( 1 ) } ;
Ort : : Value input_tensor = Ort : : Value : : CreateTensor < float > ( Ort : : MemoryInfo : : CreateCpu ( OrtDeviceAllocator , OrtMemTypeCPU ) , blob , 3 * imgSize . at ( 0 ) * imgSize . at ( 1 ) , YOLO_input_node_dims . data ( ) , YOLO_input_node_dims . size ( ) ) ;
Ort : : Value input_tensor = Ort : : Value : : CreateTensor < float > (
auto output_tensors = session - > Run ( options , inputNodeNames . data ( ) , & input_tensor , 1 , outputNodeNames . data ( ) , outputNodeNames . size ( ) ) ;
Ort : : MemoryInfo : : CreateCpu ( OrtDeviceAllocator , OrtMemTypeCPU ) , blob , 3 * imgSize . at ( 0 ) * imgSize . at ( 1 ) ,
YOLO_input_node_dims . data ( ) , YOLO_input_node_dims . size ( ) ) ;
auto output_tensors = session - > Run ( options , inputNodeNames . data ( ) , & input_tensor , 1 , outputNodeNames . data ( ) ,
outputNodeNames . size ( ) ) ;
delete [ ] blob ;
delete [ ] blob ;
clock_t starttime_4 = clock ( ) ;
clock_t starttime_4 = clock ( ) ;
double post_process_time = ( double ) ( starttime_4 - starttime_1 ) / CLOCKS_PER_SEC * 1000 ;
double post_process_time = ( double ) ( starttime_4 - starttime_1 ) / CLOCKS_PER_SEC * 1000 ;
if ( cudaEnable )
if ( cudaEnable ) {
{
std : : cout < < " [DCSP_ONNX(CUDA)]: " < < " Cuda warm-up cost " < < post_process_time < < " ms. " < < std : : endl ;
std : : cout < < " [DCSP_ONNX(CUDA)]: " < < " Cuda warm-up cost " < < post_process_time < < " ms. " < < std : : endl ;
}
}
}
} else {
else
# ifdef USE_CUDA
{
half * blob = new half [ iImg . total ( ) * 3 ] ;
half * blob = new half [ iImg . total ( ) * 3 ] ;
BlobFromImage ( processedImg , blob ) ;
BlobFromImage ( processedImg , blob ) ;
std : : vector < int64_t > YOLO_input_node_dims = { 1 , 3 , imgSize . at ( 0 ) , imgSize . at ( 1 ) } ;
std : : vector < int64_t > YOLO_input_node_dims = { 1 , 3 , imgSize . at ( 0 ) , imgSize . at ( 1 ) } ;
@ -294,6 +276,7 @@ char* DCSP_CORE::WarmUpSession()
{
{
std : : cout < < " [DCSP_ONNX(CUDA)]: " < < " Cuda warm-up cost " < < post_process_time < < " ms. " < < std : : endl ;
std : : cout < < " [DCSP_ONNX(CUDA)]: " < < " Cuda warm-up cost " < < post_process_time < < " ms. " < < std : : endl ;
}
}
# endif
}
}
return RET_OK ;
return RET_OK ;
}
}