Initial commit.
Final release of the project Anonymizer (2015). Project settings for the Qt Creator (ver. 3.6).
This commit is contained in:
522
3rdparty/include/opencv2/core/affine.hpp
vendored
Normal file
522
3rdparty/include/opencv2/core/affine.hpp
vendored
Normal file
@ -0,0 +1,522 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_AFFINE3_HPP__
|
||||
#define __OPENCV_CORE_AFFINE3_HPP__
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
//! @addtogroup core
|
||||
//! @{
|
||||
|
||||
/** @brief Affine transform
|
||||
@todo document
|
||||
*/
|
||||
template<typename T>
|
||||
class Affine3
|
||||
{
|
||||
public:
|
||||
typedef T float_type;
|
||||
typedef Matx<float_type, 3, 3> Mat3;
|
||||
typedef Matx<float_type, 4, 4> Mat4;
|
||||
typedef Vec<float_type, 3> Vec3;
|
||||
|
||||
Affine3();
|
||||
|
||||
//! Augmented affine matrix
|
||||
Affine3(const Mat4& affine);
|
||||
|
||||
//! Rotation matrix
|
||||
Affine3(const Mat3& R, const Vec3& t = Vec3::all(0));
|
||||
|
||||
//! Rodrigues vector
|
||||
Affine3(const Vec3& rvec, const Vec3& t = Vec3::all(0));
|
||||
|
||||
//! Combines all contructors above. Supports 4x4, 4x3, 3x3, 1x3, 3x1 sizes of data matrix
|
||||
explicit Affine3(const Mat& data, const Vec3& t = Vec3::all(0));
|
||||
|
||||
//! From 16th element array
|
||||
explicit Affine3(const float_type* vals);
|
||||
|
||||
//! Create identity transform
|
||||
static Affine3 Identity();
|
||||
|
||||
//! Rotation matrix
|
||||
void rotation(const Mat3& R);
|
||||
|
||||
//! Rodrigues vector
|
||||
void rotation(const Vec3& rvec);
|
||||
|
||||
//! Combines rotation methods above. Suports 3x3, 1x3, 3x1 sizes of data matrix;
|
||||
void rotation(const Mat& data);
|
||||
|
||||
void linear(const Mat3& L);
|
||||
void translation(const Vec3& t);
|
||||
|
||||
Mat3 rotation() const;
|
||||
Mat3 linear() const;
|
||||
Vec3 translation() const;
|
||||
|
||||
//! Rodrigues vector
|
||||
Vec3 rvec() const;
|
||||
|
||||
Affine3 inv(int method = cv::DECOMP_SVD) const;
|
||||
|
||||
//! a.rotate(R) is equivalent to Affine(R, 0) * a;
|
||||
Affine3 rotate(const Mat3& R) const;
|
||||
|
||||
//! a.rotate(R) is equivalent to Affine(rvec, 0) * a;
|
||||
Affine3 rotate(const Vec3& rvec) const;
|
||||
|
||||
//! a.translate(t) is equivalent to Affine(E, t) * a;
|
||||
Affine3 translate(const Vec3& t) const;
|
||||
|
||||
//! a.concatenate(affine) is equivalent to affine * a;
|
||||
Affine3 concatenate(const Affine3& affine) const;
|
||||
|
||||
template <typename Y> operator Affine3<Y>() const;
|
||||
|
||||
template <typename Y> Affine3<Y> cast() const;
|
||||
|
||||
Mat4 matrix;
|
||||
|
||||
#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
|
||||
Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine);
|
||||
Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine);
|
||||
operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const;
|
||||
operator Eigen::Transform<T, 3, Eigen::Affine>() const;
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename T> static
|
||||
Affine3<T> operator*(const Affine3<T>& affine1, const Affine3<T>& affine2);
|
||||
|
||||
template<typename T, typename V> static
|
||||
V operator*(const Affine3<T>& affine, const V& vector);
|
||||
|
||||
typedef Affine3<float> Affine3f;
|
||||
typedef Affine3<double> Affine3d;
|
||||
|
||||
static Vec3f operator*(const Affine3f& affine, const Vec3f& vector);
|
||||
static Vec3d operator*(const Affine3d& affine, const Vec3d& vector);
|
||||
|
||||
template<typename _Tp> class DataType< Affine3<_Tp> >
|
||||
{
|
||||
public:
|
||||
typedef Affine3<_Tp> value_type;
|
||||
typedef Affine3<typename DataType<_Tp>::work_type> work_type;
|
||||
typedef _Tp channel_type;
|
||||
|
||||
enum { generic_type = 0,
|
||||
depth = DataType<channel_type>::depth,
|
||||
channels = 16,
|
||||
fmt = DataType<channel_type>::fmt + ((channels - 1) << 8),
|
||||
type = CV_MAKETYPE(depth, channels)
|
||||
};
|
||||
|
||||
typedef Vec<channel_type, channels> vec_type;
|
||||
};
|
||||
|
||||
//! @} core
|
||||
|
||||
}
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
// Implementaiton
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T>::Affine3()
|
||||
: matrix(Mat4::eye())
|
||||
{}
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T>::Affine3(const Mat4& affine)
|
||||
: matrix(affine)
|
||||
{}
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T>::Affine3(const Mat3& R, const Vec3& t)
|
||||
{
|
||||
rotation(R);
|
||||
translation(t);
|
||||
matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
|
||||
matrix.val[15] = 1;
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T>::Affine3(const Vec3& _rvec, const Vec3& t)
|
||||
{
|
||||
rotation(_rvec);
|
||||
translation(t);
|
||||
matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
|
||||
matrix.val[15] = 1;
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T>::Affine3(const cv::Mat& data, const Vec3& t)
|
||||
{
|
||||
CV_Assert(data.type() == cv::DataType<T>::type);
|
||||
|
||||
if (data.cols == 4 && data.rows == 4)
|
||||
{
|
||||
data.copyTo(matrix);
|
||||
return;
|
||||
}
|
||||
else if (data.cols == 4 && data.rows == 3)
|
||||
{
|
||||
rotation(data(Rect(0, 0, 3, 3)));
|
||||
translation(data(Rect(3, 0, 1, 3)));
|
||||
return;
|
||||
}
|
||||
|
||||
rotation(data);
|
||||
translation(t);
|
||||
matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
|
||||
matrix.val[15] = 1;
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T>::Affine3(const float_type* vals) : matrix(vals)
|
||||
{}
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T> cv::Affine3<T>::Identity()
|
||||
{
|
||||
return Affine3<T>(cv::Affine3<T>::Mat4::eye());
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
void cv::Affine3<T>::rotation(const Mat3& R)
|
||||
{
|
||||
linear(R);
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
void cv::Affine3<T>::rotation(const Vec3& _rvec)
|
||||
{
|
||||
double rx = _rvec[0], ry = _rvec[1], rz = _rvec[2];
|
||||
double theta = std::sqrt(rx*rx + ry*ry + rz*rz);
|
||||
|
||||
if (theta < DBL_EPSILON)
|
||||
rotation(Mat3::eye());
|
||||
else
|
||||
{
|
||||
const double I[] = { 1, 0, 0, 0, 1, 0, 0, 0, 1 };
|
||||
|
||||
double c = std::cos(theta);
|
||||
double s = std::sin(theta);
|
||||
double c1 = 1. - c;
|
||||
double itheta = (theta != 0) ? 1./theta : 0.;
|
||||
|
||||
rx *= itheta; ry *= itheta; rz *= itheta;
|
||||
|
||||
double rrt[] = { rx*rx, rx*ry, rx*rz, rx*ry, ry*ry, ry*rz, rx*rz, ry*rz, rz*rz };
|
||||
double _r_x_[] = { 0, -rz, ry, rz, 0, -rx, -ry, rx, 0 };
|
||||
Mat3 R;
|
||||
|
||||
// R = cos(theta)*I + (1 - cos(theta))*r*rT + sin(theta)*[r_x]
|
||||
// where [r_x] is [0 -rz ry; rz 0 -rx; -ry rx 0]
|
||||
for(int k = 0; k < 9; ++k)
|
||||
R.val[k] = static_cast<float_type>(c*I[k] + c1*rrt[k] + s*_r_x_[k]);
|
||||
|
||||
rotation(R);
|
||||
}
|
||||
}
|
||||
|
||||
//Combines rotation methods above. Suports 3x3, 1x3, 3x1 sizes of data matrix;
|
||||
template<typename T> inline
|
||||
void cv::Affine3<T>::rotation(const cv::Mat& data)
|
||||
{
|
||||
CV_Assert(data.type() == cv::DataType<T>::type);
|
||||
|
||||
if (data.cols == 3 && data.rows == 3)
|
||||
{
|
||||
Mat3 R;
|
||||
data.copyTo(R);
|
||||
rotation(R);
|
||||
}
|
||||
else if ((data.cols == 3 && data.rows == 1) || (data.cols == 1 && data.rows == 3))
|
||||
{
|
||||
Vec3 _rvec;
|
||||
data.reshape(1, 3).copyTo(_rvec);
|
||||
rotation(_rvec);
|
||||
}
|
||||
else
|
||||
CV_Assert(!"Input marix can be 3x3, 1x3 or 3x1");
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
void cv::Affine3<T>::linear(const Mat3& L)
|
||||
{
|
||||
matrix.val[0] = L.val[0]; matrix.val[1] = L.val[1]; matrix.val[ 2] = L.val[2];
|
||||
matrix.val[4] = L.val[3]; matrix.val[5] = L.val[4]; matrix.val[ 6] = L.val[5];
|
||||
matrix.val[8] = L.val[6]; matrix.val[9] = L.val[7]; matrix.val[10] = L.val[8];
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
void cv::Affine3<T>::translation(const Vec3& t)
|
||||
{
|
||||
matrix.val[3] = t[0]; matrix.val[7] = t[1]; matrix.val[11] = t[2];
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
typename cv::Affine3<T>::Mat3 cv::Affine3<T>::rotation() const
|
||||
{
|
||||
return linear();
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
typename cv::Affine3<T>::Mat3 cv::Affine3<T>::linear() const
|
||||
{
|
||||
typename cv::Affine3<T>::Mat3 R;
|
||||
R.val[0] = matrix.val[0]; R.val[1] = matrix.val[1]; R.val[2] = matrix.val[ 2];
|
||||
R.val[3] = matrix.val[4]; R.val[4] = matrix.val[5]; R.val[5] = matrix.val[ 6];
|
||||
R.val[6] = matrix.val[8]; R.val[7] = matrix.val[9]; R.val[8] = matrix.val[10];
|
||||
return R;
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
typename cv::Affine3<T>::Vec3 cv::Affine3<T>::translation() const
|
||||
{
|
||||
return Vec3(matrix.val[3], matrix.val[7], matrix.val[11]);
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
typename cv::Affine3<T>::Vec3 cv::Affine3<T>::rvec() const
|
||||
{
|
||||
cv::Vec3d w;
|
||||
cv::Matx33d u, vt, R = rotation();
|
||||
cv::SVD::compute(R, w, u, vt, cv::SVD::FULL_UV + cv::SVD::MODIFY_A);
|
||||
R = u * vt;
|
||||
|
||||
double rx = R.val[7] - R.val[5];
|
||||
double ry = R.val[2] - R.val[6];
|
||||
double rz = R.val[3] - R.val[1];
|
||||
|
||||
double s = std::sqrt((rx*rx + ry*ry + rz*rz)*0.25);
|
||||
double c = (R.val[0] + R.val[4] + R.val[8] - 1) * 0.5;
|
||||
c = c > 1.0 ? 1.0 : c < -1.0 ? -1.0 : c;
|
||||
double theta = acos(c);
|
||||
|
||||
if( s < 1e-5 )
|
||||
{
|
||||
if( c > 0 )
|
||||
rx = ry = rz = 0;
|
||||
else
|
||||
{
|
||||
double t;
|
||||
t = (R.val[0] + 1) * 0.5;
|
||||
rx = std::sqrt(std::max(t, 0.0));
|
||||
t = (R.val[4] + 1) * 0.5;
|
||||
ry = std::sqrt(std::max(t, 0.0)) * (R.val[1] < 0 ? -1.0 : 1.0);
|
||||
t = (R.val[8] + 1) * 0.5;
|
||||
rz = std::sqrt(std::max(t, 0.0)) * (R.val[2] < 0 ? -1.0 : 1.0);
|
||||
|
||||
if( fabs(rx) < fabs(ry) && fabs(rx) < fabs(rz) && (R.val[5] > 0) != (ry*rz > 0) )
|
||||
rz = -rz;
|
||||
theta /= std::sqrt(rx*rx + ry*ry + rz*rz);
|
||||
rx *= theta;
|
||||
ry *= theta;
|
||||
rz *= theta;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
double vth = 1/(2*s);
|
||||
vth *= theta;
|
||||
rx *= vth; ry *= vth; rz *= vth;
|
||||
}
|
||||
|
||||
return cv::Vec3d(rx, ry, rz);
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T> cv::Affine3<T>::inv(int method) const
|
||||
{
|
||||
return matrix.inv(method);
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T> cv::Affine3<T>::rotate(const Mat3& R) const
|
||||
{
|
||||
Mat3 Lc = linear();
|
||||
Vec3 tc = translation();
|
||||
Mat4 result;
|
||||
result.val[12] = result.val[13] = result.val[14] = 0;
|
||||
result.val[15] = 1;
|
||||
|
||||
for(int j = 0; j < 3; ++j)
|
||||
{
|
||||
for(int i = 0; i < 3; ++i)
|
||||
{
|
||||
float_type value = 0;
|
||||
for(int k = 0; k < 3; ++k)
|
||||
value += R(j, k) * Lc(k, i);
|
||||
result(j, i) = value;
|
||||
}
|
||||
|
||||
result(j, 3) = R.row(j).dot(tc.t());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T> cv::Affine3<T>::rotate(const Vec3& _rvec) const
|
||||
{
|
||||
return rotate(Affine3f(_rvec).rotation());
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T> cv::Affine3<T>::translate(const Vec3& t) const
|
||||
{
|
||||
Mat4 m = matrix;
|
||||
m.val[ 3] += t[0];
|
||||
m.val[ 7] += t[1];
|
||||
m.val[11] += t[2];
|
||||
return m;
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T> cv::Affine3<T>::concatenate(const Affine3<T>& affine) const
|
||||
{
|
||||
return (*this).rotate(affine.rotation()).translate(affine.translation());
|
||||
}
|
||||
|
||||
template<typename T> template <typename Y> inline
|
||||
cv::Affine3<T>::operator Affine3<Y>() const
|
||||
{
|
||||
return Affine3<Y>(matrix);
|
||||
}
|
||||
|
||||
template<typename T> template <typename Y> inline
|
||||
cv::Affine3<Y> cv::Affine3<T>::cast() const
|
||||
{
|
||||
return Affine3<Y>(matrix);
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T> cv::operator*(const cv::Affine3<T>& affine1, const cv::Affine3<T>& affine2)
|
||||
{
|
||||
return affine2.concatenate(affine1);
|
||||
}
|
||||
|
||||
template<typename T, typename V> inline
|
||||
V cv::operator*(const cv::Affine3<T>& affine, const V& v)
|
||||
{
|
||||
const typename Affine3<T>::Mat4& m = affine.matrix;
|
||||
|
||||
V r;
|
||||
r.x = m.val[0] * v.x + m.val[1] * v.y + m.val[ 2] * v.z + m.val[ 3];
|
||||
r.y = m.val[4] * v.x + m.val[5] * v.y + m.val[ 6] * v.z + m.val[ 7];
|
||||
r.z = m.val[8] * v.x + m.val[9] * v.y + m.val[10] * v.z + m.val[11];
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline
|
||||
cv::Vec3f cv::operator*(const cv::Affine3f& affine, const cv::Vec3f& v)
|
||||
{
|
||||
const cv::Matx44f& m = affine.matrix;
|
||||
cv::Vec3f r;
|
||||
r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
|
||||
r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
|
||||
r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline
|
||||
cv::Vec3d cv::operator*(const cv::Affine3d& affine, const cv::Vec3d& v)
|
||||
{
|
||||
const cv::Matx44d& m = affine.matrix;
|
||||
cv::Vec3d r;
|
||||
r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
|
||||
r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
|
||||
r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine)
|
||||
{
|
||||
cv::Mat(4, 4, cv::DataType<T>::type, affine.matrix().data()).copyTo(matrix);
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine)
|
||||
{
|
||||
Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> a = affine;
|
||||
cv::Mat(4, 4, cv::DataType<T>::type, a.matrix().data()).copyTo(matrix);
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const
|
||||
{
|
||||
Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> r;
|
||||
cv::Mat hdr(4, 4, cv::DataType<T>::type, r.matrix().data());
|
||||
cv::Mat(matrix, false).copyTo(hdr);
|
||||
return r;
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine>() const
|
||||
{
|
||||
return this->operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>();
|
||||
}
|
||||
|
||||
#endif /* defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H */
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* __OPENCV_CORE_AFFINE3_HPP__ */
|
745
3rdparty/include/opencv2/core/base.hpp
vendored
Normal file
745
3rdparty/include/opencv2/core/base.hpp
vendored
Normal file
@ -0,0 +1,745 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2014, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_BASE_HPP__
|
||||
#define __OPENCV_CORE_BASE_HPP__
|
||||
|
||||
#ifndef __cplusplus
|
||||
# error base.hpp header must be compiled as C++
|
||||
#endif
|
||||
|
||||
#include <climits>
|
||||
|
||||
#include "opencv2/core/cvdef.h"
|
||||
#include "opencv2/core/cvstd.hpp"
|
||||
#include "opencv2/hal.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
namespace Error {
|
||||
//! error codes
|
||||
enum Code {
|
||||
StsOk= 0, //!< everithing is ok
|
||||
StsBackTrace= -1, //!< pseudo error for back trace
|
||||
StsError= -2, //!< unknown /unspecified error
|
||||
StsInternal= -3, //!< internal error (bad state)
|
||||
StsNoMem= -4, //!< insufficient memory
|
||||
StsBadArg= -5, //!< function arg/param is bad
|
||||
StsBadFunc= -6, //!< unsupported function
|
||||
StsNoConv= -7, //!< iter. didn't converge
|
||||
StsAutoTrace= -8, //!< tracing
|
||||
HeaderIsNull= -9, //!< image header is NULL
|
||||
BadImageSize= -10, //!< image size is invalid
|
||||
BadOffset= -11, //!< offset is invalid
|
||||
BadDataPtr= -12, //!<
|
||||
BadStep= -13, //!<
|
||||
BadModelOrChSeq= -14, //!<
|
||||
BadNumChannels= -15, //!<
|
||||
BadNumChannel1U= -16, //!<
|
||||
BadDepth= -17, //!<
|
||||
BadAlphaChannel= -18, //!<
|
||||
BadOrder= -19, //!<
|
||||
BadOrigin= -20, //!<
|
||||
BadAlign= -21, //!<
|
||||
BadCallBack= -22, //!<
|
||||
BadTileSize= -23, //!<
|
||||
BadCOI= -24, //!<
|
||||
BadROISize= -25, //!<
|
||||
MaskIsTiled= -26, //!<
|
||||
StsNullPtr= -27, //!< null pointer
|
||||
StsVecLengthErr= -28, //!< incorrect vector length
|
||||
StsFilterStructContentErr= -29, //!< incorr. filter structure content
|
||||
StsKernelStructContentErr= -30, //!< incorr. transform kernel content
|
||||
StsFilterOffsetErr= -31, //!< incorrect filter ofset value
|
||||
StsBadSize= -201, //!< the input/output structure size is incorrect
|
||||
StsDivByZero= -202, //!< division by zero
|
||||
StsInplaceNotSupported= -203, //!< in-place operation is not supported
|
||||
StsObjectNotFound= -204, //!< request can't be completed
|
||||
StsUnmatchedFormats= -205, //!< formats of input/output arrays differ
|
||||
StsBadFlag= -206, //!< flag is wrong or not supported
|
||||
StsBadPoint= -207, //!< bad CvPoint
|
||||
StsBadMask= -208, //!< bad format of mask (neither 8uC1 nor 8sC1)
|
||||
StsUnmatchedSizes= -209, //!< sizes of input/output structures do not match
|
||||
StsUnsupportedFormat= -210, //!< the data format/type is not supported by the function
|
||||
StsOutOfRange= -211, //!< some of parameters are out of range
|
||||
StsParseError= -212, //!< invalid syntax/structure of the parsed file
|
||||
StsNotImplemented= -213, //!< the requested function/feature is not implemented
|
||||
StsBadMemBlock= -214, //!< an allocated block has been corrupted
|
||||
StsAssert= -215, //!< assertion failed
|
||||
GpuNotSupported= -216,
|
||||
GpuApiCallError= -217,
|
||||
OpenGlNotSupported= -218,
|
||||
OpenGlApiCallError= -219,
|
||||
OpenCLApiCallError= -220,
|
||||
OpenCLDoubleNotSupported= -221,
|
||||
OpenCLInitError= -222,
|
||||
OpenCLNoAMDBlasFft= -223
|
||||
};
|
||||
} //Error
|
||||
|
||||
//! @} core_utils
|
||||
|
||||
//! @addtogroup core_array
|
||||
//! @{
|
||||
|
||||
//! matrix decomposition types
|
||||
enum DecompTypes {
|
||||
/** Gaussian elimination with the optimal pivot element chosen. */
|
||||
DECOMP_LU = 0,
|
||||
/** singular value decomposition (SVD) method; the system can be over-defined and/or the matrix
|
||||
src1 can be singular */
|
||||
DECOMP_SVD = 1,
|
||||
/** eigenvalue decomposition; the matrix src1 must be symmetrical */
|
||||
DECOMP_EIG = 2,
|
||||
/** Cholesky \f$LL^T\f$ factorization; the matrix src1 must be symmetrical and positively
|
||||
defined */
|
||||
DECOMP_CHOLESKY = 3,
|
||||
/** QR factorization; the system can be over-defined and/or the matrix src1 can be singular */
|
||||
DECOMP_QR = 4,
|
||||
/** while all the previous flags are mutually exclusive, this flag can be used together with
|
||||
any of the previous; it means that the normal equations
|
||||
\f$\texttt{src1}^T\cdot\texttt{src1}\cdot\texttt{dst}=\texttt{src1}^T\texttt{src2}\f$ are
|
||||
solved instead of the original system
|
||||
\f$\texttt{src1}\cdot\texttt{dst}=\texttt{src2}\f$ */
|
||||
DECOMP_NORMAL = 16
|
||||
};
|
||||
|
||||
/** norm types
|
||||
- For one array:
|
||||
\f[norm = \forkthree{\|\texttt{src1}\|_{L_{\infty}} = \max _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM\_INF}\) }
|
||||
{ \| \texttt{src1} \| _{L_1} = \sum _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM\_L1}\) }
|
||||
{ \| \texttt{src1} \| _{L_2} = \sqrt{\sum_I \texttt{src1}(I)^2} }{if \(\texttt{normType} = \texttt{NORM\_L2}\) }\f]
|
||||
|
||||
- Absolute norm for two arrays
|
||||
\f[norm = \forkthree{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} = \max _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM\_INF}\) }
|
||||
{ \| \texttt{src1} - \texttt{src2} \| _{L_1} = \sum _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM\_L1}\) }
|
||||
{ \| \texttt{src1} - \texttt{src2} \| _{L_2} = \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if \(\texttt{normType} = \texttt{NORM\_L2}\) }\f]
|
||||
|
||||
- Relative norm for two arrays
|
||||
\f[norm = \forkthree{\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} }{\|\texttt{src2}\|_{L_{\infty}} }}{if \(\texttt{normType} = \texttt{NORM\_RELATIVE\_INF}\) }
|
||||
{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if \(\texttt{normType} = \texttt{NORM\_RELATIVE\_L1}\) }
|
||||
{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if \(\texttt{normType} = \texttt{NORM\_RELATIVE\_L2}\) }\f]
|
||||
*/
|
||||
enum NormTypes { NORM_INF = 1,
|
||||
NORM_L1 = 2,
|
||||
NORM_L2 = 4,
|
||||
NORM_L2SQR = 5,
|
||||
NORM_HAMMING = 6,
|
||||
NORM_HAMMING2 = 7,
|
||||
NORM_TYPE_MASK = 7,
|
||||
NORM_RELATIVE = 8, //!< flag
|
||||
NORM_MINMAX = 32 //!< flag
|
||||
};
|
||||
|
||||
//! comparison types
|
||||
enum CmpTypes { CMP_EQ = 0, //!< src1 is equal to src2.
|
||||
CMP_GT = 1, //!< src1 is greater than src2.
|
||||
CMP_GE = 2, //!< src1 is greater than or equal to src2.
|
||||
CMP_LT = 3, //!< src1 is less than src2.
|
||||
CMP_LE = 4, //!< src1 is less than or equal to src2.
|
||||
CMP_NE = 5 //!< src1 is unequal to src2.
|
||||
};
|
||||
|
||||
//! generalized matrix multiplication flags
|
||||
enum GemmFlags { GEMM_1_T = 1, //!< transposes src1
|
||||
GEMM_2_T = 2, //!< transposes src2
|
||||
GEMM_3_T = 4 //!< transposes src3
|
||||
};
|
||||
|
||||
enum DftFlags {
|
||||
/** performs an inverse 1D or 2D transform instead of the default forward
|
||||
transform. */
|
||||
DFT_INVERSE = 1,
|
||||
/** scales the result: divide it by the number of array elements. Normally, it is
|
||||
combined with DFT_INVERSE. */
|
||||
DFT_SCALE = 2,
|
||||
/** performs a forward or inverse transform of every individual row of the input
|
||||
matrix; this flag enables you to transform multiple vectors simultaneously and can be used to
|
||||
decrease the overhead (which is sometimes several times larger than the processing itself) to
|
||||
perform 3D and higher-dimensional transformations and so forth.*/
|
||||
DFT_ROWS = 4,
|
||||
/** performs a forward transformation of 1D or 2D real array; the result,
|
||||
though being a complex array, has complex-conjugate symmetry (*CCS*, see the function
|
||||
description below for details), and such an array can be packed into a real array of the same
|
||||
size as input, which is the fastest option and which is what the function does by default;
|
||||
however, you may wish to get a full complex array (for simpler spectrum analysis, and so on) -
|
||||
pass the flag to enable the function to produce a full-size complex output array. */
|
||||
DFT_COMPLEX_OUTPUT = 16,
|
||||
/** performs an inverse transformation of a 1D or 2D complex array; the
|
||||
result is normally a complex array of the same size, however, if the input array has
|
||||
conjugate-complex symmetry (for example, it is a result of forward transformation with
|
||||
DFT_COMPLEX_OUTPUT flag), the output is a real array; while the function itself does not
|
||||
check whether the input is symmetrical or not, you can pass the flag and then the function
|
||||
will assume the symmetry and produce the real output array (note that when the input is packed
|
||||
into a real array and inverse transformation is executed, the function treats the input as a
|
||||
packed complex-conjugate symmetrical array, and the output will also be a real array). */
|
||||
DFT_REAL_OUTPUT = 32,
|
||||
/** performs an inverse 1D or 2D transform instead of the default forward transform. */
|
||||
DCT_INVERSE = DFT_INVERSE,
|
||||
/** performs a forward or inverse transform of every individual row of the input
|
||||
matrix. This flag enables you to transform multiple vectors simultaneously and can be used to
|
||||
decrease the overhead (which is sometimes several times larger than the processing itself) to
|
||||
perform 3D and higher-dimensional transforms and so forth.*/
|
||||
DCT_ROWS = DFT_ROWS
|
||||
};
|
||||
|
||||
//! Various border types, image boundaries are denoted with `|`
|
||||
//! @see borderInterpolate, copyMakeBorder
|
||||
enum BorderTypes {
|
||||
BORDER_CONSTANT = 0, //!< `iiiiii|abcdefgh|iiiiiii` with some specified `i`
|
||||
BORDER_REPLICATE = 1, //!< `aaaaaa|abcdefgh|hhhhhhh`
|
||||
BORDER_REFLECT = 2, //!< `fedcba|abcdefgh|hgfedcb`
|
||||
BORDER_WRAP = 3, //!< `cdefgh|abcdefgh|abcdefg`
|
||||
BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba`
|
||||
BORDER_TRANSPARENT = 5, //!< `uvwxyz|absdefgh|ijklmno`
|
||||
|
||||
BORDER_REFLECT101 = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
|
||||
BORDER_DEFAULT = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
|
||||
BORDER_ISOLATED = 16 //!< do not look outside of ROI
|
||||
};
|
||||
|
||||
//! @} core_array
|
||||
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
//////////////// static assert /////////////////
|
||||
#define CVAUX_CONCAT_EXP(a, b) a##b
|
||||
#define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b)
|
||||
|
||||
#if defined(__clang__)
|
||||
# ifndef __has_extension
|
||||
# define __has_extension __has_feature /* compatibility, for older versions of clang */
|
||||
# endif
|
||||
# if __has_extension(cxx_static_assert)
|
||||
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
|
||||
# endif
|
||||
#elif defined(__GNUC__)
|
||||
# if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
|
||||
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
|
||||
# endif
|
||||
#elif defined(_MSC_VER)
|
||||
# if _MSC_VER >= 1600 /* MSVC 10 */
|
||||
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
|
||||
# endif
|
||||
#endif
|
||||
#ifndef CV_StaticAssert
|
||||
# if defined(__GNUC__) && (__GNUC__ > 3) && (__GNUC_MINOR__ > 2)
|
||||
# define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); })
|
||||
# else
|
||||
template <bool x> struct CV_StaticAssert_failed;
|
||||
template <> struct CV_StaticAssert_failed<true> { enum { val = 1 }; };
|
||||
template<int x> struct CV_StaticAssert_test {};
|
||||
# define CV_StaticAssert(condition, reason)\
|
||||
typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast<bool>(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Suppress warning "-Wdeprecated-declarations" / C4996
|
||||
#if defined(_MSC_VER)
|
||||
#define CV_DO_PRAGMA(x) __pragma(x)
|
||||
#elif defined(__GNUC__)
|
||||
#define CV_DO_PRAGMA(x) _Pragma (#x)
|
||||
#else
|
||||
#define CV_DO_PRAGMA(x)
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define CV_SUPPRESS_DEPRECATED_START \
|
||||
CV_DO_PRAGMA(warning(push)) \
|
||||
CV_DO_PRAGMA(warning(disable: 4996))
|
||||
#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop))
|
||||
#elif defined (__clang__) || ((__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 5))))
|
||||
#define CV_SUPPRESS_DEPRECATED_START \
|
||||
CV_DO_PRAGMA(GCC diagnostic push) \
|
||||
CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
|
||||
#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop)
|
||||
#else
|
||||
#define CV_SUPPRESS_DEPRECATED_START
|
||||
#define CV_SUPPRESS_DEPRECATED_END
|
||||
#endif
|
||||
//! @endcond
|
||||
|
||||
/*! @brief Signals an error and raises the exception.
|
||||
|
||||
By default the function prints information about the error to stderr,
|
||||
then it either stops if setBreakOnError() had been called before or raises the exception.
|
||||
It is possible to alternate error processing by using redirectError().
|
||||
@param _code - error code (Error::Code)
|
||||
@param _err - error description
|
||||
@param _func - function name. Available only when the compiler supports getting it
|
||||
@param _file - source file name where the error has occured
|
||||
@param _line - line number in the source file where the error has occured
|
||||
@see CV_Error, CV_Error_, CV_ErrorNoReturn, CV_ErrorNoReturn_, CV_Assert, CV_DbgAssert
|
||||
*/
|
||||
CV_EXPORTS void error(int _code, const String& _err, const char* _func, const char* _file, int _line);
|
||||
|
||||
#ifdef __GNUC__
|
||||
# if defined __clang__ || defined __APPLE__
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC diagnostic ignored "-Winvalid-noreturn"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/** same as cv::error, but does not return */
|
||||
CV_INLINE CV_NORETURN void errorNoReturn(int _code, const String& _err, const char* _func, const char* _file, int _line)
|
||||
{
|
||||
error(_code, _err, _func, _file, _line);
|
||||
#ifdef __GNUC__
|
||||
# if !defined __clang__ && !defined __APPLE__
|
||||
// this suppresses this warning: "noreturn" function does return [enabled by default]
|
||||
__builtin_trap();
|
||||
// or use infinite loop: for (;;) {}
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
#ifdef __GNUC__
|
||||
# if defined __clang__ || defined __APPLE__
|
||||
# pragma GCC diagnostic pop
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__
|
||||
#define CV_Func __func__
|
||||
#elif defined _MSC_VER
|
||||
#define CV_Func __FUNCTION__
|
||||
#else
|
||||
#define CV_Func ""
|
||||
#endif
|
||||
|
||||
/** @brief Call the error handler.
|
||||
|
||||
Currently, the error handler prints the error code and the error message to the standard
|
||||
error stream `stderr`. In the Debug configuration, it then provokes memory access violation, so that
|
||||
the execution stack and all the parameters can be analyzed by the debugger. In the Release
|
||||
configuration, the exception is thrown.
|
||||
|
||||
@param code one of Error::Code
|
||||
@param msg error message
|
||||
*/
|
||||
#define CV_Error( code, msg ) cv::error( code, msg, CV_Func, __FILE__, __LINE__ )
|
||||
|
||||
/** @brief Call the error handler.
|
||||
|
||||
This macro can be used to construct an error message on-fly to include some dynamic information,
|
||||
for example:
|
||||
@code
|
||||
// note the extra parentheses around the formatted text message
|
||||
CV_Error_( CV_StsOutOfRange,
|
||||
("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue));
|
||||
@endcode
|
||||
@param code one of Error::Code
|
||||
@param args printf-like formatted error message in parentheses
|
||||
*/
|
||||
#define CV_Error_( code, args ) cv::error( code, cv::format args, CV_Func, __FILE__, __LINE__ )
|
||||
|
||||
/** @brief Checks a condition at runtime and throws exception if it fails
|
||||
|
||||
The macros CV_Assert (and CV_DbgAssert(expr)) evaluate the specified expression. If it is 0, the macros
|
||||
raise an error (see cv::error). The macro CV_Assert checks the condition in both Debug and Release
|
||||
configurations while CV_DbgAssert is only retained in the Debug configuration.
|
||||
*/
|
||||
#define CV_Assert( expr ) if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ )
|
||||
|
||||
/** same as CV_Error(code,msg), but does not return */
|
||||
#define CV_ErrorNoReturn( code, msg ) cv::errorNoReturn( code, msg, CV_Func, __FILE__, __LINE__ )
|
||||
|
||||
/** same as CV_Error_(code,args), but does not return */
|
||||
#define CV_ErrorNoReturn_( code, args ) cv::errorNoReturn( code, cv::format args, CV_Func, __FILE__, __LINE__ )
|
||||
|
||||
/** replaced with CV_Assert(expr) in Debug configuration */
|
||||
#ifdef _DEBUG
|
||||
# define CV_DbgAssert(expr) CV_Assert(expr)
|
||||
#else
|
||||
# define CV_DbgAssert(expr)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
|
||||
* bit count of A exclusive XOR'ed with B
|
||||
*/
|
||||
struct CV_EXPORTS Hamming
|
||||
{
|
||||
enum { normType = NORM_HAMMING };
|
||||
typedef unsigned char ValueType;
|
||||
typedef int ResultType;
|
||||
|
||||
/** this will count the bits in a ^ b
|
||||
*/
|
||||
ResultType operator()( const unsigned char* a, const unsigned char* b, int size ) const;
|
||||
};
|
||||
|
||||
typedef Hamming HammingLUT;
|
||||
|
||||
/////////////////////////////////// inline norms ////////////////////////////////////
|
||||
|
||||
template<typename _Tp> inline _Tp cv_abs(_Tp x) { return std::abs(x); }
|
||||
inline int cv_abs(uchar x) { return x; }
|
||||
inline int cv_abs(schar x) { return std::abs(x); }
|
||||
inline int cv_abs(ushort x) { return x; }
|
||||
inline int cv_abs(short x) { return std::abs(x); }
|
||||
|
||||
template<typename _Tp, typename _AccTp> static inline
|
||||
_AccTp normL2Sqr(const _Tp* a, int n)
|
||||
{
|
||||
_AccTp s = 0;
|
||||
int i=0;
|
||||
#if CV_ENABLE_UNROLLED
|
||||
for( ; i <= n - 4; i += 4 )
|
||||
{
|
||||
_AccTp v0 = a[i], v1 = a[i+1], v2 = a[i+2], v3 = a[i+3];
|
||||
s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
|
||||
}
|
||||
#endif
|
||||
for( ; i < n; i++ )
|
||||
{
|
||||
_AccTp v = a[i];
|
||||
s += v*v;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
template<typename _Tp, typename _AccTp> static inline
|
||||
_AccTp normL1(const _Tp* a, int n)
|
||||
{
|
||||
_AccTp s = 0;
|
||||
int i = 0;
|
||||
#if CV_ENABLE_UNROLLED
|
||||
for(; i <= n - 4; i += 4 )
|
||||
{
|
||||
s += (_AccTp)cv_abs(a[i]) + (_AccTp)cv_abs(a[i+1]) +
|
||||
(_AccTp)cv_abs(a[i+2]) + (_AccTp)cv_abs(a[i+3]);
|
||||
}
|
||||
#endif
|
||||
for( ; i < n; i++ )
|
||||
s += cv_abs(a[i]);
|
||||
return s;
|
||||
}
|
||||
|
||||
template<typename _Tp, typename _AccTp> static inline
|
||||
_AccTp normInf(const _Tp* a, int n)
|
||||
{
|
||||
_AccTp s = 0;
|
||||
for( int i = 0; i < n; i++ )
|
||||
s = std::max(s, (_AccTp)cv_abs(a[i]));
|
||||
return s;
|
||||
}
|
||||
|
||||
template<typename _Tp, typename _AccTp> static inline
|
||||
_AccTp normL2Sqr(const _Tp* a, const _Tp* b, int n)
|
||||
{
|
||||
_AccTp s = 0;
|
||||
int i= 0;
|
||||
#if CV_ENABLE_UNROLLED
|
||||
for(; i <= n - 4; i += 4 )
|
||||
{
|
||||
_AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
|
||||
s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
|
||||
}
|
||||
#endif
|
||||
for( ; i < n; i++ )
|
||||
{
|
||||
_AccTp v = _AccTp(a[i] - b[i]);
|
||||
s += v*v;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
static inline float normL2Sqr(const float* a, const float* b, int n)
|
||||
{
|
||||
float s = 0.f;
|
||||
for( int i = 0; i < n; i++ )
|
||||
{
|
||||
float v = a[i] - b[i];
|
||||
s += v*v;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
template<typename _Tp, typename _AccTp> static inline
|
||||
_AccTp normL1(const _Tp* a, const _Tp* b, int n)
|
||||
{
|
||||
_AccTp s = 0;
|
||||
int i= 0;
|
||||
#if CV_ENABLE_UNROLLED
|
||||
for(; i <= n - 4; i += 4 )
|
||||
{
|
||||
_AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
|
||||
s += std::abs(v0) + std::abs(v1) + std::abs(v2) + std::abs(v3);
|
||||
}
|
||||
#endif
|
||||
for( ; i < n; i++ )
|
||||
{
|
||||
_AccTp v = _AccTp(a[i] - b[i]);
|
||||
s += std::abs(v);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
inline float normL1(const float* a, const float* b, int n)
|
||||
{
|
||||
float s = 0.f;
|
||||
for( int i = 0; i < n; i++ )
|
||||
{
|
||||
s += std::abs(a[i] - b[i]);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
inline int normL1(const uchar* a, const uchar* b, int n)
|
||||
{
|
||||
int s = 0;
|
||||
for( int i = 0; i < n; i++ )
|
||||
{
|
||||
s += std::abs(a[i] - b[i]);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
template<typename _Tp, typename _AccTp> static inline
|
||||
_AccTp normInf(const _Tp* a, const _Tp* b, int n)
|
||||
{
|
||||
_AccTp s = 0;
|
||||
for( int i = 0; i < n; i++ )
|
||||
{
|
||||
_AccTp v0 = a[i] - b[i];
|
||||
s = std::max(s, std::abs(v0));
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
/** @brief Computes the cube root of an argument.
|
||||
|
||||
The function cubeRoot computes \f$\sqrt[3]{\texttt{val}}\f$. Negative arguments are handled correctly.
|
||||
NaN and Inf are not handled. The accuracy approaches the maximum possible accuracy for
|
||||
single-precision data.
|
||||
@param val A function argument.
|
||||
*/
|
||||
CV_EXPORTS_W float cubeRoot(float val);
|
||||
|
||||
/** @brief Calculates the angle of a 2D vector in degrees.
|
||||
|
||||
The function fastAtan2 calculates the full-range angle of an input 2D vector. The angle is measured
|
||||
in degrees and varies from 0 to 360 degrees. The accuracy is about 0.3 degrees.
|
||||
@param x x-coordinate of the vector.
|
||||
@param y y-coordinate of the vector.
|
||||
*/
|
||||
CV_EXPORTS_W float fastAtan2(float y, float x);
|
||||
|
||||
/** proxy for hal::LU */
|
||||
CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
|
||||
/** proxy for hal::LU */
|
||||
CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
|
||||
/** proxy for hal::Cholesky */
|
||||
CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
|
||||
/** proxy for hal::Cholesky */
|
||||
CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
|
||||
|
||||
////////////////// forward declarations for important OpenCV types //////////////////
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
template<typename _Tp, int cn> class Vec;
|
||||
template<typename _Tp, int m, int n> class Matx;
|
||||
|
||||
template<typename _Tp> class Complex;
|
||||
template<typename _Tp> class Point_;
|
||||
template<typename _Tp> class Point3_;
|
||||
template<typename _Tp> class Size_;
|
||||
template<typename _Tp> class Rect_;
|
||||
template<typename _Tp> class Scalar_;
|
||||
|
||||
class CV_EXPORTS RotatedRect;
|
||||
class CV_EXPORTS Range;
|
||||
class CV_EXPORTS TermCriteria;
|
||||
class CV_EXPORTS KeyPoint;
|
||||
class CV_EXPORTS DMatch;
|
||||
class CV_EXPORTS RNG;
|
||||
|
||||
class CV_EXPORTS Mat;
|
||||
class CV_EXPORTS MatExpr;
|
||||
|
||||
class CV_EXPORTS UMat;
|
||||
|
||||
class CV_EXPORTS SparseMat;
|
||||
typedef Mat MatND;
|
||||
|
||||
template<typename _Tp> class Mat_;
|
||||
template<typename _Tp> class SparseMat_;
|
||||
|
||||
class CV_EXPORTS MatConstIterator;
|
||||
class CV_EXPORTS SparseMatIterator;
|
||||
class CV_EXPORTS SparseMatConstIterator;
|
||||
template<typename _Tp> class MatIterator_;
|
||||
template<typename _Tp> class MatConstIterator_;
|
||||
template<typename _Tp> class SparseMatIterator_;
|
||||
template<typename _Tp> class SparseMatConstIterator_;
|
||||
|
||||
namespace ogl
|
||||
{
|
||||
class CV_EXPORTS Buffer;
|
||||
class CV_EXPORTS Texture2D;
|
||||
class CV_EXPORTS Arrays;
|
||||
}
|
||||
|
||||
namespace cuda
|
||||
{
|
||||
class CV_EXPORTS GpuMat;
|
||||
class CV_EXPORTS HostMem;
|
||||
class CV_EXPORTS Stream;
|
||||
class CV_EXPORTS Event;
|
||||
}
|
||||
|
||||
namespace cudev
|
||||
{
|
||||
template <typename _Tp> class GpuMat_;
|
||||
}
|
||||
|
||||
namespace ipp
|
||||
{
|
||||
CV_EXPORTS void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL,
|
||||
int line = 0);
|
||||
CV_EXPORTS int getIppStatus();
|
||||
CV_EXPORTS String getIppErrorLocation();
|
||||
CV_EXPORTS bool useIPP();
|
||||
CV_EXPORTS void setUseIPP(bool flag);
|
||||
|
||||
} // ipp
|
||||
|
||||
//! @endcond
|
||||
|
||||
//! @} core_utils
|
||||
|
||||
//! @addtogroup core_utils_neon
|
||||
//! @{
|
||||
|
||||
#if CV_NEON
|
||||
|
||||
inline int32x2_t cv_vrnd_s32_f32(float32x2_t v)
|
||||
{
|
||||
static int32x2_t v_sign = vdup_n_s32(1 << 31),
|
||||
v_05 = vreinterpret_s32_f32(vdup_n_f32(0.5f));
|
||||
|
||||
int32x2_t v_addition = vorr_s32(v_05, vand_s32(v_sign, vreinterpret_s32_f32(v)));
|
||||
return vcvt_s32_f32(vadd_f32(v, vreinterpret_f32_s32(v_addition)));
|
||||
}
|
||||
|
||||
inline int32x4_t cv_vrndq_s32_f32(float32x4_t v)
|
||||
{
|
||||
static int32x4_t v_sign = vdupq_n_s32(1 << 31),
|
||||
v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f));
|
||||
|
||||
int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(v)));
|
||||
return vcvtq_s32_f32(vaddq_f32(v, vreinterpretq_f32_s32(v_addition)));
|
||||
}
|
||||
|
||||
inline uint32x2_t cv_vrnd_u32_f32(float32x2_t v)
|
||||
{
|
||||
static float32x2_t v_05 = vdup_n_f32(0.5f);
|
||||
return vcvt_u32_f32(vadd_f32(v, v_05));
|
||||
}
|
||||
|
||||
inline uint32x4_t cv_vrndq_u32_f32(float32x4_t v)
|
||||
{
|
||||
static float32x4_t v_05 = vdupq_n_f32(0.5f);
|
||||
return vcvtq_u32_f32(vaddq_f32(v, v_05));
|
||||
}
|
||||
|
||||
inline float32x4_t cv_vrecpq_f32(float32x4_t val)
|
||||
{
|
||||
float32x4_t reciprocal = vrecpeq_f32(val);
|
||||
reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
|
||||
reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
|
||||
return reciprocal;
|
||||
}
|
||||
|
||||
inline float32x2_t cv_vrecp_f32(float32x2_t val)
|
||||
{
|
||||
float32x2_t reciprocal = vrecpe_f32(val);
|
||||
reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
|
||||
reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
|
||||
return reciprocal;
|
||||
}
|
||||
|
||||
inline float32x4_t cv_vrsqrtq_f32(float32x4_t val)
|
||||
{
|
||||
float32x4_t e = vrsqrteq_f32(val);
|
||||
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
|
||||
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
|
||||
return e;
|
||||
}
|
||||
|
||||
inline float32x2_t cv_vrsqrt_f32(float32x2_t val)
|
||||
{
|
||||
float32x2_t e = vrsqrte_f32(val);
|
||||
e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
|
||||
e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
|
||||
return e;
|
||||
}
|
||||
|
||||
inline float32x4_t cv_vsqrtq_f32(float32x4_t val)
|
||||
{
|
||||
return cv_vrecpq_f32(cv_vrsqrtq_f32(val));
|
||||
}
|
||||
|
||||
inline float32x2_t cv_vsqrt_f32(float32x2_t val)
|
||||
{
|
||||
return cv_vrecp_f32(cv_vrsqrt_f32(val));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//! @} core_utils_neon
|
||||
|
||||
} // cv
|
||||
|
||||
#include "sse_utils.hpp"
|
||||
|
||||
#endif //__OPENCV_CORE_BASE_HPP__
|
31
3rdparty/include/opencv2/core/bufferpool.hpp
vendored
Normal file
31
3rdparty/include/opencv2/core/bufferpool.hpp
vendored
Normal file
@ -0,0 +1,31 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
|
||||
|
||||
#ifndef __OPENCV_CORE_BUFFER_POOL_HPP__
|
||||
#define __OPENCV_CORE_BUFFER_POOL_HPP__
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
//! @addtogroup core
|
||||
//! @{
|
||||
|
||||
class BufferPoolController
|
||||
{
|
||||
protected:
|
||||
~BufferPoolController() { }
|
||||
public:
|
||||
virtual size_t getReservedSize() const = 0;
|
||||
virtual size_t getMaxReservedSize() const = 0;
|
||||
virtual void setMaxReservedSize(size_t size) = 0;
|
||||
virtual void freeAllReservedBuffers() = 0;
|
||||
};
|
||||
|
||||
//! @}
|
||||
|
||||
}
|
||||
|
||||
#endif // __OPENCV_CORE_BUFFER_POOL_HPP__
|
48
3rdparty/include/opencv2/core/core.hpp
vendored
Normal file
48
3rdparty/include/opencv2/core/core.hpp
vendored
Normal file
@ -0,0 +1,48 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifdef __OPENCV_BUILD
|
||||
#error this is a compatibility header which should not be used inside the OpenCV library
|
||||
#endif
|
||||
|
||||
#include "opencv2/core.hpp"
|
3152
3rdparty/include/opencv2/core/core_c.h
vendored
Normal file
3152
3rdparty/include/opencv2/core/core_c.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
845
3rdparty/include/opencv2/core/cuda.hpp
vendored
Normal file
845
3rdparty/include/opencv2/core/cuda.hpp
vendored
Normal file
@ -0,0 +1,845 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_CUDA_HPP__
|
||||
#define __OPENCV_CORE_CUDA_HPP__
|
||||
|
||||
#ifndef __cplusplus
|
||||
# error cuda.hpp header must be compiled as C++
|
||||
#endif
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/core/cuda_types.hpp"
|
||||
|
||||
/**
|
||||
@defgroup cuda CUDA-accelerated Computer Vision
|
||||
@{
|
||||
@defgroup cudacore Core part
|
||||
@{
|
||||
@defgroup cudacore_init Initalization and Information
|
||||
@defgroup cudacore_struct Data Structures
|
||||
@}
|
||||
@}
|
||||
*/
|
||||
|
||||
namespace cv { namespace cuda {
|
||||
|
||||
//! @addtogroup cudacore_struct
|
||||
//! @{
|
||||
|
||||
//===================================================================================
|
||||
// GpuMat
|
||||
//===================================================================================
|
||||
|
||||
/** @brief Base storage class for GPU memory with reference counting.
|
||||
|
||||
Its interface matches the Mat interface with the following limitations:
|
||||
|
||||
- no arbitrary dimensions support (only 2D)
|
||||
- no functions that return references to their data (because references on GPU are not valid for
|
||||
CPU)
|
||||
- no expression templates technique support
|
||||
|
||||
Beware that the latter limitation may lead to overloaded matrix operators that cause memory
|
||||
allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
|
||||
passed directly to the kernel.
|
||||
|
||||
@note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
|
||||
aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
|
||||
|
||||
@note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
|
||||
on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
|
||||
release function returns error if the CUDA context has been destroyed before.
|
||||
|
||||
@sa Mat
|
||||
*/
|
||||
class CV_EXPORTS GpuMat
|
||||
{
|
||||
public:
|
||||
class CV_EXPORTS Allocator
|
||||
{
|
||||
public:
|
||||
virtual ~Allocator() {}
|
||||
|
||||
// allocator must fill data, step and refcount fields
|
||||
virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
|
||||
virtual void free(GpuMat* mat) = 0;
|
||||
};
|
||||
|
||||
//! default allocator
|
||||
static Allocator* defaultAllocator();
|
||||
static void setDefaultAllocator(Allocator* allocator);
|
||||
|
||||
//! default constructor
|
||||
explicit GpuMat(Allocator* allocator = defaultAllocator());
|
||||
|
||||
//! constructs GpuMat of the specified size and type
|
||||
GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
|
||||
GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());
|
||||
|
||||
//! constucts GpuMat and fills it with the specified value _s
|
||||
GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator());
|
||||
GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator());
|
||||
|
||||
//! copy constructor
|
||||
GpuMat(const GpuMat& m);
|
||||
|
||||
//! constructor for GpuMat headers pointing to user-allocated data
|
||||
GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
|
||||
GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
|
||||
|
||||
//! creates a GpuMat header for a part of the bigger matrix
|
||||
GpuMat(const GpuMat& m, Range rowRange, Range colRange);
|
||||
GpuMat(const GpuMat& m, Rect roi);
|
||||
|
||||
//! builds GpuMat from host memory (Blocking call)
|
||||
explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator());
|
||||
|
||||
//! destructor - calls release()
|
||||
~GpuMat();
|
||||
|
||||
//! assignment operators
|
||||
GpuMat& operator =(const GpuMat& m);
|
||||
|
||||
//! allocates new GpuMat data unless the GpuMat already has specified size and type
|
||||
void create(int rows, int cols, int type);
|
||||
void create(Size size, int type);
|
||||
|
||||
//! decreases reference counter, deallocate the data when reference counter reaches 0
|
||||
void release();
|
||||
|
||||
//! swaps with other smart pointer
|
||||
void swap(GpuMat& mat);
|
||||
|
||||
//! pefroms upload data to GpuMat (Blocking call)
|
||||
void upload(InputArray arr);
|
||||
|
||||
//! pefroms upload data to GpuMat (Non-Blocking call)
|
||||
void upload(InputArray arr, Stream& stream);
|
||||
|
||||
//! pefroms download data from device to host memory (Blocking call)
|
||||
void download(OutputArray dst) const;
|
||||
|
||||
//! pefroms download data from device to host memory (Non-Blocking call)
|
||||
void download(OutputArray dst, Stream& stream) const;
|
||||
|
||||
//! returns deep copy of the GpuMat, i.e. the data is copied
|
||||
GpuMat clone() const;
|
||||
|
||||
//! copies the GpuMat content to device memory (Blocking call)
|
||||
void copyTo(OutputArray dst) const;
|
||||
|
||||
//! copies the GpuMat content to device memory (Non-Blocking call)
|
||||
void copyTo(OutputArray dst, Stream& stream) const;
|
||||
|
||||
//! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
|
||||
void copyTo(OutputArray dst, InputArray mask) const;
|
||||
|
||||
//! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
|
||||
void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
|
||||
|
||||
//! sets some of the GpuMat elements to s (Blocking call)
|
||||
GpuMat& setTo(Scalar s);
|
||||
|
||||
//! sets some of the GpuMat elements to s (Non-Blocking call)
|
||||
GpuMat& setTo(Scalar s, Stream& stream);
|
||||
|
||||
//! sets some of the GpuMat elements to s, according to the mask (Blocking call)
|
||||
GpuMat& setTo(Scalar s, InputArray mask);
|
||||
|
||||
//! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
|
||||
GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
|
||||
|
||||
//! converts GpuMat to another datatype (Blocking call)
|
||||
void convertTo(OutputArray dst, int rtype) const;
|
||||
|
||||
//! converts GpuMat to another datatype (Non-Blocking call)
|
||||
void convertTo(OutputArray dst, int rtype, Stream& stream) const;
|
||||
|
||||
//! converts GpuMat to another datatype with scaling (Blocking call)
|
||||
void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
|
||||
|
||||
//! converts GpuMat to another datatype with scaling (Non-Blocking call)
|
||||
void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
|
||||
|
||||
//! converts GpuMat to another datatype with scaling (Non-Blocking call)
|
||||
void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
|
||||
|
||||
void assignTo(GpuMat& m, int type=-1) const;
|
||||
|
||||
//! returns pointer to y-th row
|
||||
uchar* ptr(int y = 0);
|
||||
const uchar* ptr(int y = 0) const;
|
||||
|
||||
//! template version of the above method
|
||||
template<typename _Tp> _Tp* ptr(int y = 0);
|
||||
template<typename _Tp> const _Tp* ptr(int y = 0) const;
|
||||
|
||||
template <typename _Tp> operator PtrStepSz<_Tp>() const;
|
||||
template <typename _Tp> operator PtrStep<_Tp>() const;
|
||||
|
||||
//! returns a new GpuMat header for the specified row
|
||||
GpuMat row(int y) const;
|
||||
|
||||
//! returns a new GpuMat header for the specified column
|
||||
GpuMat col(int x) const;
|
||||
|
||||
//! ... for the specified row span
|
||||
GpuMat rowRange(int startrow, int endrow) const;
|
||||
GpuMat rowRange(Range r) const;
|
||||
|
||||
//! ... for the specified column span
|
||||
GpuMat colRange(int startcol, int endcol) const;
|
||||
GpuMat colRange(Range r) const;
|
||||
|
||||
//! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
|
||||
GpuMat operator ()(Range rowRange, Range colRange) const;
|
||||
GpuMat operator ()(Rect roi) const;
|
||||
|
||||
//! creates alternative GpuMat header for the same data, with different
|
||||
//! number of channels and/or different number of rows
|
||||
GpuMat reshape(int cn, int rows = 0) const;
|
||||
|
||||
//! locates GpuMat header within a parent GpuMat
|
||||
void locateROI(Size& wholeSize, Point& ofs) const;
|
||||
|
||||
//! moves/resizes the current GpuMat ROI inside the parent GpuMat
|
||||
GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
|
||||
|
||||
//! returns true iff the GpuMat data is continuous
|
||||
//! (i.e. when there are no gaps between successive rows)
|
||||
bool isContinuous() const;
|
||||
|
||||
//! returns element size in bytes
|
||||
size_t elemSize() const;
|
||||
|
||||
//! returns the size of element channel in bytes
|
||||
size_t elemSize1() const;
|
||||
|
||||
//! returns element type
|
||||
int type() const;
|
||||
|
||||
//! returns element type
|
||||
int depth() const;
|
||||
|
||||
//! returns number of channels
|
||||
int channels() const;
|
||||
|
||||
//! returns step/elemSize1()
|
||||
size_t step1() const;
|
||||
|
||||
//! returns GpuMat size : width == number of columns, height == number of rows
|
||||
Size size() const;
|
||||
|
||||
//! returns true if GpuMat data is NULL
|
||||
bool empty() const;
|
||||
|
||||
/*! includes several bit-fields:
|
||||
- the magic signature
|
||||
- continuity flag
|
||||
- depth
|
||||
- number of channels
|
||||
*/
|
||||
int flags;
|
||||
|
||||
//! the number of rows and columns
|
||||
int rows, cols;
|
||||
|
||||
//! a distance between successive rows in bytes; includes the gap if any
|
||||
size_t step;
|
||||
|
||||
//! pointer to the data
|
||||
uchar* data;
|
||||
|
||||
//! pointer to the reference counter;
|
||||
//! when GpuMat points to user-allocated data, the pointer is NULL
|
||||
int* refcount;
|
||||
|
||||
//! helper fields used in locateROI and adjustROI
|
||||
uchar* datastart;
|
||||
const uchar* dataend;
|
||||
|
||||
//! allocator
|
||||
Allocator* allocator;
|
||||
};
|
||||
|
||||
/** @brief Creates a continuous matrix.
|
||||
|
||||
@param rows Row count.
|
||||
@param cols Column count.
|
||||
@param type Type of the matrix.
|
||||
@param arr Destination matrix. This parameter changes only if it has a proper type and area (
|
||||
\f$\texttt{rows} \times \texttt{cols}\f$ ).
|
||||
|
||||
Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
|
||||
end of each row.
|
||||
*/
|
||||
CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);
|
||||
|
||||
/** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
|
||||
|
||||
@param rows Minimum desired number of rows.
|
||||
@param cols Minimum desired number of columns.
|
||||
@param type Desired matrix type.
|
||||
@param arr Destination matrix.
|
||||
|
||||
The function does not reallocate memory if the matrix has proper attributes already.
|
||||
*/
|
||||
CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
|
||||
|
||||
//! BufferPool management (must be called before Stream creation)
|
||||
CV_EXPORTS void setBufferPoolUsage(bool on);
|
||||
CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
|
||||
|
||||
//===================================================================================
|
||||
// HostMem
|
||||
//===================================================================================
|
||||
|
||||
/** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
|
||||
|
||||
Its interface is also Mat-like but with additional memory type parameters.
|
||||
|
||||
- **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
|
||||
uploading/downloading data from/to GPU.
|
||||
- **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
|
||||
address space, if supported.
|
||||
- **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
|
||||
used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
|
||||
utilization.
|
||||
|
||||
@note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
|
||||
Pinned Memory APIs* document or *CUDA C Programming Guide*.
|
||||
*/
|
||||
class CV_EXPORTS HostMem
|
||||
{
|
||||
public:
|
||||
enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
|
||||
|
||||
static MatAllocator* getAllocator(AllocType alloc_type = PAGE_LOCKED);
|
||||
|
||||
explicit HostMem(AllocType alloc_type = PAGE_LOCKED);
|
||||
|
||||
HostMem(const HostMem& m);
|
||||
|
||||
HostMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED);
|
||||
HostMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED);
|
||||
|
||||
//! creates from host memory with coping data
|
||||
explicit HostMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED);
|
||||
|
||||
~HostMem();
|
||||
|
||||
HostMem& operator =(const HostMem& m);
|
||||
|
||||
//! swaps with other smart pointer
|
||||
void swap(HostMem& b);
|
||||
|
||||
//! returns deep copy of the matrix, i.e. the data is copied
|
||||
HostMem clone() const;
|
||||
|
||||
//! allocates new matrix data unless the matrix already has specified size and type.
|
||||
void create(int rows, int cols, int type);
|
||||
void create(Size size, int type);
|
||||
|
||||
//! creates alternative HostMem header for the same data, with different
|
||||
//! number of channels and/or different number of rows
|
||||
HostMem reshape(int cn, int rows = 0) const;
|
||||
|
||||
//! decrements reference counter and released memory if needed.
|
||||
void release();
|
||||
|
||||
//! returns matrix header with disabled reference counting for HostMem data.
|
||||
Mat createMatHeader() const;
|
||||
|
||||
/** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
|
||||
for it.
|
||||
|
||||
This can be done only if memory was allocated with the SHARED flag and if it is supported by the
|
||||
hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
|
||||
eliminates an extra copy.
|
||||
*/
|
||||
GpuMat createGpuMatHeader() const;
|
||||
|
||||
// Please see cv::Mat for descriptions
|
||||
bool isContinuous() const;
|
||||
size_t elemSize() const;
|
||||
size_t elemSize1() const;
|
||||
int type() const;
|
||||
int depth() const;
|
||||
int channels() const;
|
||||
size_t step1() const;
|
||||
Size size() const;
|
||||
bool empty() const;
|
||||
|
||||
// Please see cv::Mat for descriptions
|
||||
int flags;
|
||||
int rows, cols;
|
||||
size_t step;
|
||||
|
||||
uchar* data;
|
||||
int* refcount;
|
||||
|
||||
uchar* datastart;
|
||||
const uchar* dataend;
|
||||
|
||||
AllocType alloc_type;
|
||||
};
|
||||
|
||||
/** @brief Page-locks the memory of matrix and maps it for the device(s).
|
||||
|
||||
@param m Input matrix.
|
||||
*/
|
||||
CV_EXPORTS void registerPageLocked(Mat& m);
|
||||
|
||||
/** @brief Unmaps the memory of matrix and makes it pageable again.
|
||||
|
||||
@param m Input matrix.
|
||||
*/
|
||||
CV_EXPORTS void unregisterPageLocked(Mat& m);
|
||||
|
||||
//===================================================================================
|
||||
// Stream
|
||||
//===================================================================================
|
||||
|
||||
/** @brief This class encapsulates a queue of asynchronous calls.
|
||||
|
||||
@note Currently, you may face problems if an operation is enqueued twice with different data. Some
|
||||
functions use the constant GPU memory, and next call may update the memory before the previous one
|
||||
has been finished. But calling different operations asynchronously is safe because each operation
|
||||
has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
|
||||
also safe. :
|
||||
*/
|
||||
class CV_EXPORTS Stream
|
||||
{
|
||||
typedef void (Stream::*bool_type)() const;
|
||||
void this_type_does_not_support_comparisons() const {}
|
||||
|
||||
public:
|
||||
typedef void (*StreamCallback)(int status, void* userData);
|
||||
|
||||
//! creates a new asynchronous stream
|
||||
Stream();
|
||||
|
||||
/** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
|
||||
*/
|
||||
bool queryIfComplete() const;
|
||||
|
||||
/** @brief Blocks the current CPU thread until all operations in the stream are complete.
|
||||
*/
|
||||
void waitForCompletion();
|
||||
|
||||
/** @brief Makes a compute stream wait on an event.
|
||||
*/
|
||||
void waitEvent(const Event& event);
|
||||
|
||||
/** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
|
||||
completed.
|
||||
|
||||
@note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
|
||||
that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
|
||||
Callbacks without a mandated order (in independent streams) execute in undefined order and may be
|
||||
serialized.
|
||||
*/
|
||||
void enqueueHostCallback(StreamCallback callback, void* userData);
|
||||
|
||||
//! return Stream object for default CUDA stream
|
||||
static Stream& Null();
|
||||
|
||||
//! returns true if stream object is not default (!= 0)
|
||||
operator bool_type() const;
|
||||
|
||||
class Impl;
|
||||
|
||||
private:
|
||||
Ptr<Impl> impl_;
|
||||
Stream(const Ptr<Impl>& impl);
|
||||
|
||||
friend struct StreamAccessor;
|
||||
friend class BufferPool;
|
||||
friend class DefaultDeviceInitializer;
|
||||
};
|
||||
|
||||
class CV_EXPORTS Event
|
||||
{
|
||||
public:
|
||||
enum CreateFlags
|
||||
{
|
||||
DEFAULT = 0x00, /**< Default event flag */
|
||||
BLOCKING_SYNC = 0x01, /**< Event uses blocking synchronization */
|
||||
DISABLE_TIMING = 0x02, /**< Event will not record timing data */
|
||||
INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */
|
||||
};
|
||||
|
||||
explicit Event(CreateFlags flags = DEFAULT);
|
||||
|
||||
//! records an event
|
||||
void record(Stream& stream = Stream::Null());
|
||||
|
||||
//! queries an event's status
|
||||
bool queryIfComplete() const;
|
||||
|
||||
//! waits for an event to complete
|
||||
void waitForCompletion();
|
||||
|
||||
//! computes the elapsed time between events
|
||||
static float elapsedTime(const Event& start, const Event& end);
|
||||
|
||||
class Impl;
|
||||
|
||||
private:
|
||||
Ptr<Impl> impl_;
|
||||
|
||||
friend struct EventAccessor;
|
||||
};
|
||||
|
||||
//! @} cudacore_struct
|
||||
|
||||
//===================================================================================
|
||||
// Initialization & Info
|
||||
//===================================================================================
|
||||
|
||||
//! @addtogroup cudacore_init
|
||||
//! @{
|
||||
|
||||
/** @brief Returns the number of installed CUDA-enabled devices.
|
||||
|
||||
Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
|
||||
this function returns 0.
|
||||
*/
|
||||
CV_EXPORTS int getCudaEnabledDeviceCount();
|
||||
|
||||
/** @brief Sets a device and initializes it for the current thread.
|
||||
|
||||
@param device System index of a CUDA device starting with 0.
|
||||
|
||||
If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
|
||||
*/
|
||||
CV_EXPORTS void setDevice(int device);
|
||||
|
||||
/** @brief Returns the current device index set by cuda::setDevice or initialized by default.
|
||||
*/
|
||||
CV_EXPORTS int getDevice();
|
||||
|
||||
/** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
|
||||
process.
|
||||
|
||||
Any subsequent API call to this device will reinitialize the device.
|
||||
*/
|
||||
CV_EXPORTS void resetDevice();
|
||||
|
||||
/** @brief Enumeration providing CUDA computing features.
|
||||
*/
|
||||
enum FeatureSet
|
||||
{
|
||||
FEATURE_SET_COMPUTE_10 = 10,
|
||||
FEATURE_SET_COMPUTE_11 = 11,
|
||||
FEATURE_SET_COMPUTE_12 = 12,
|
||||
FEATURE_SET_COMPUTE_13 = 13,
|
||||
FEATURE_SET_COMPUTE_20 = 20,
|
||||
FEATURE_SET_COMPUTE_21 = 21,
|
||||
FEATURE_SET_COMPUTE_30 = 30,
|
||||
FEATURE_SET_COMPUTE_32 = 32,
|
||||
FEATURE_SET_COMPUTE_35 = 35,
|
||||
FEATURE_SET_COMPUTE_50 = 50,
|
||||
|
||||
GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
|
||||
SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
|
||||
NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
|
||||
WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
|
||||
DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
|
||||
};
|
||||
|
||||
//! checks whether current device supports the given feature
|
||||
CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
|
||||
|
||||
/** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
|
||||
built for.
|
||||
|
||||
According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
|
||||
capability can always be compiled to binary code of greater or equal compute capability".
|
||||
*/
|
||||
class CV_EXPORTS TargetArchs
|
||||
{
|
||||
public:
|
||||
/** @brief The following method checks whether the module was built with the support of the given feature:
|
||||
|
||||
@param feature_set Features to be checked. See :ocvcuda::FeatureSet.
|
||||
*/
|
||||
static bool builtWith(FeatureSet feature_set);
|
||||
|
||||
/** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
|
||||
code for the given architecture(s):
|
||||
|
||||
@param major Major compute capability version.
|
||||
@param minor Minor compute capability version.
|
||||
*/
|
||||
static bool has(int major, int minor);
|
||||
static bool hasPtx(int major, int minor);
|
||||
static bool hasBin(int major, int minor);
|
||||
|
||||
static bool hasEqualOrLessPtx(int major, int minor);
|
||||
static bool hasEqualOrGreater(int major, int minor);
|
||||
static bool hasEqualOrGreaterPtx(int major, int minor);
|
||||
static bool hasEqualOrGreaterBin(int major, int minor);
|
||||
};
|
||||
|
||||
/** @brief Class providing functionality for querying the specified GPU properties.
|
||||
*/
|
||||
class CV_EXPORTS DeviceInfo
|
||||
{
|
||||
public:
|
||||
//! creates DeviceInfo object for the current GPU
|
||||
DeviceInfo();
|
||||
|
||||
/** @brief The constructors.
|
||||
|
||||
@param device_id System index of the CUDA device starting with 0.
|
||||
|
||||
Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
|
||||
constructs an object for the current device.
|
||||
*/
|
||||
DeviceInfo(int device_id);
|
||||
|
||||
/** @brief Returns system index of the CUDA device starting with 0.
|
||||
*/
|
||||
int deviceID() const;
|
||||
|
||||
//! ASCII string identifying device
|
||||
const char* name() const;
|
||||
|
||||
//! global memory available on device in bytes
|
||||
size_t totalGlobalMem() const;
|
||||
|
||||
//! shared memory available per block in bytes
|
||||
size_t sharedMemPerBlock() const;
|
||||
|
||||
//! 32-bit registers available per block
|
||||
int regsPerBlock() const;
|
||||
|
||||
//! warp size in threads
|
||||
int warpSize() const;
|
||||
|
||||
//! maximum pitch in bytes allowed by memory copies
|
||||
size_t memPitch() const;
|
||||
|
||||
//! maximum number of threads per block
|
||||
int maxThreadsPerBlock() const;
|
||||
|
||||
//! maximum size of each dimension of a block
|
||||
Vec3i maxThreadsDim() const;
|
||||
|
||||
//! maximum size of each dimension of a grid
|
||||
Vec3i maxGridSize() const;
|
||||
|
||||
//! clock frequency in kilohertz
|
||||
int clockRate() const;
|
||||
|
||||
//! constant memory available on device in bytes
|
||||
size_t totalConstMem() const;
|
||||
|
||||
//! major compute capability
|
||||
int majorVersion() const;
|
||||
|
||||
//! minor compute capability
|
||||
int minorVersion() const;
|
||||
|
||||
//! alignment requirement for textures
|
||||
size_t textureAlignment() const;
|
||||
|
||||
//! pitch alignment requirement for texture references bound to pitched memory
|
||||
size_t texturePitchAlignment() const;
|
||||
|
||||
//! number of multiprocessors on device
|
||||
int multiProcessorCount() const;
|
||||
|
||||
//! specified whether there is a run time limit on kernels
|
||||
bool kernelExecTimeoutEnabled() const;
|
||||
|
||||
//! device is integrated as opposed to discrete
|
||||
bool integrated() const;
|
||||
|
||||
//! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
|
||||
bool canMapHostMemory() const;
|
||||
|
||||
enum ComputeMode
|
||||
{
|
||||
ComputeModeDefault, /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */
|
||||
ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */
|
||||
ComputeModeProhibited, /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */
|
||||
ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */
|
||||
};
|
||||
|
||||
//! compute mode
|
||||
ComputeMode computeMode() const;
|
||||
|
||||
//! maximum 1D texture size
|
||||
int maxTexture1D() const;
|
||||
|
||||
//! maximum 1D mipmapped texture size
|
||||
int maxTexture1DMipmap() const;
|
||||
|
||||
//! maximum size for 1D textures bound to linear memory
|
||||
int maxTexture1DLinear() const;
|
||||
|
||||
//! maximum 2D texture dimensions
|
||||
Vec2i maxTexture2D() const;
|
||||
|
||||
//! maximum 2D mipmapped texture dimensions
|
||||
Vec2i maxTexture2DMipmap() const;
|
||||
|
||||
//! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
|
||||
Vec3i maxTexture2DLinear() const;
|
||||
|
||||
//! maximum 2D texture dimensions if texture gather operations have to be performed
|
||||
Vec2i maxTexture2DGather() const;
|
||||
|
||||
//! maximum 3D texture dimensions
|
||||
Vec3i maxTexture3D() const;
|
||||
|
||||
//! maximum Cubemap texture dimensions
|
||||
int maxTextureCubemap() const;
|
||||
|
||||
//! maximum 1D layered texture dimensions
|
||||
Vec2i maxTexture1DLayered() const;
|
||||
|
||||
//! maximum 2D layered texture dimensions
|
||||
Vec3i maxTexture2DLayered() const;
|
||||
|
||||
//! maximum Cubemap layered texture dimensions
|
||||
Vec2i maxTextureCubemapLayered() const;
|
||||
|
||||
//! maximum 1D surface size
|
||||
int maxSurface1D() const;
|
||||
|
||||
//! maximum 2D surface dimensions
|
||||
Vec2i maxSurface2D() const;
|
||||
|
||||
//! maximum 3D surface dimensions
|
||||
Vec3i maxSurface3D() const;
|
||||
|
||||
//! maximum 1D layered surface dimensions
|
||||
Vec2i maxSurface1DLayered() const;
|
||||
|
||||
//! maximum 2D layered surface dimensions
|
||||
Vec3i maxSurface2DLayered() const;
|
||||
|
||||
//! maximum Cubemap surface dimensions
|
||||
int maxSurfaceCubemap() const;
|
||||
|
||||
//! maximum Cubemap layered surface dimensions
|
||||
Vec2i maxSurfaceCubemapLayered() const;
|
||||
|
||||
//! alignment requirements for surfaces
|
||||
size_t surfaceAlignment() const;
|
||||
|
||||
//! device can possibly execute multiple kernels concurrently
|
||||
bool concurrentKernels() const;
|
||||
|
||||
//! device has ECC support enabled
|
||||
bool ECCEnabled() const;
|
||||
|
||||
//! PCI bus ID of the device
|
||||
int pciBusID() const;
|
||||
|
||||
//! PCI device ID of the device
|
||||
int pciDeviceID() const;
|
||||
|
||||
//! PCI domain ID of the device
|
||||
int pciDomainID() const;
|
||||
|
||||
//! true if device is a Tesla device using TCC driver, false otherwise
|
||||
bool tccDriver() const;
|
||||
|
||||
//! number of asynchronous engines
|
||||
int asyncEngineCount() const;
|
||||
|
||||
//! device shares a unified address space with the host
|
||||
bool unifiedAddressing() const;
|
||||
|
||||
//! peak memory clock frequency in kilohertz
|
||||
int memoryClockRate() const;
|
||||
|
||||
//! global memory bus width in bits
|
||||
int memoryBusWidth() const;
|
||||
|
||||
//! size of L2 cache in bytes
|
||||
int l2CacheSize() const;
|
||||
|
||||
//! maximum resident threads per multiprocessor
|
||||
int maxThreadsPerMultiProcessor() const;
|
||||
|
||||
//! gets free and total device memory
|
||||
void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
|
||||
size_t freeMemory() const;
|
||||
size_t totalMemory() const;
|
||||
|
||||
/** @brief Provides information on CUDA feature support.
|
||||
|
||||
@param feature_set Features to be checked. See cuda::FeatureSet.
|
||||
|
||||
This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
|
||||
*/
|
||||
bool supports(FeatureSet feature_set) const;
|
||||
|
||||
/** @brief Checks the CUDA module and device compatibility.
|
||||
|
||||
This function returns true if the CUDA module can be run on the specified device. Otherwise, it
|
||||
returns false .
|
||||
*/
|
||||
bool isCompatible() const;
|
||||
|
||||
private:
|
||||
int device_id_;
|
||||
};
|
||||
|
||||
CV_EXPORTS void printCudaDeviceInfo(int device);
|
||||
CV_EXPORTS void printShortCudaDeviceInfo(int device);
|
||||
|
||||
//! @} cudacore_init
|
||||
|
||||
}} // namespace cv { namespace cuda {
|
||||
|
||||
|
||||
#include "opencv2/core/cuda.inl.hpp"
|
||||
|
||||
#endif /* __OPENCV_CORE_CUDA_HPP__ */
|
621
3rdparty/include/opencv2/core/cuda.inl.hpp
vendored
Normal file
621
3rdparty/include/opencv2/core/cuda.inl.hpp
vendored
Normal file
@ -0,0 +1,621 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_CUDAINL_HPP__
|
||||
#define __OPENCV_CORE_CUDAINL_HPP__
|
||||
|
||||
#include "opencv2/core/cuda.hpp"
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda {
|
||||
|
||||
//===================================================================================
|
||||
// GpuMat
|
||||
//===================================================================================
|
||||
|
||||
inline
|
||||
GpuMat::GpuMat(Allocator* allocator_)
|
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||
{}
|
||||
|
||||
inline
|
||||
GpuMat::GpuMat(int rows_, int cols_, int type_, Allocator* allocator_)
|
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||
{
|
||||
if (rows_ > 0 && cols_ > 0)
|
||||
create(rows_, cols_, type_);
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat::GpuMat(Size size_, int type_, Allocator* allocator_)
|
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||
{
|
||||
if (size_.height > 0 && size_.width > 0)
|
||||
create(size_.height, size_.width, type_);
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_, Allocator* allocator_)
|
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||
{
|
||||
if (rows_ > 0 && cols_ > 0)
|
||||
{
|
||||
create(rows_, cols_, type_);
|
||||
setTo(s_);
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat::GpuMat(Size size_, int type_, Scalar s_, Allocator* allocator_)
|
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||
{
|
||||
if (size_.height > 0 && size_.width > 0)
|
||||
{
|
||||
create(size_.height, size_.width, type_);
|
||||
setTo(s_);
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat::GpuMat(const GpuMat& m)
|
||||
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), allocator(m.allocator)
|
||||
{
|
||||
if (refcount)
|
||||
CV_XADD(refcount, 1);
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat::GpuMat(InputArray arr, Allocator* allocator_) :
|
||||
flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||
{
|
||||
upload(arr);
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat::~GpuMat()
|
||||
{
|
||||
release();
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat& GpuMat::operator =(const GpuMat& m)
|
||||
{
|
||||
if (this != &m)
|
||||
{
|
||||
GpuMat temp(m);
|
||||
swap(temp);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
void GpuMat::create(Size size_, int type_)
|
||||
{
|
||||
create(size_.height, size_.width, type_);
|
||||
}
|
||||
|
||||
inline
|
||||
void GpuMat::swap(GpuMat& b)
|
||||
{
|
||||
std::swap(flags, b.flags);
|
||||
std::swap(rows, b.rows);
|
||||
std::swap(cols, b.cols);
|
||||
std::swap(step, b.step);
|
||||
std::swap(data, b.data);
|
||||
std::swap(datastart, b.datastart);
|
||||
std::swap(dataend, b.dataend);
|
||||
std::swap(refcount, b.refcount);
|
||||
std::swap(allocator, b.allocator);
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat GpuMat::clone() const
|
||||
{
|
||||
GpuMat m;
|
||||
copyTo(m);
|
||||
return m;
|
||||
}
|
||||
|
||||
inline
|
||||
void GpuMat::copyTo(OutputArray dst, InputArray mask) const
|
||||
{
|
||||
copyTo(dst, mask, Stream::Null());
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat& GpuMat::setTo(Scalar s)
|
||||
{
|
||||
return setTo(s, Stream::Null());
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat& GpuMat::setTo(Scalar s, InputArray mask)
|
||||
{
|
||||
return setTo(s, mask, Stream::Null());
|
||||
}
|
||||
|
||||
inline
|
||||
void GpuMat::convertTo(OutputArray dst, int rtype) const
|
||||
{
|
||||
convertTo(dst, rtype, Stream::Null());
|
||||
}
|
||||
|
||||
inline
|
||||
void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, double beta) const
|
||||
{
|
||||
convertTo(dst, rtype, alpha, beta, Stream::Null());
|
||||
}
|
||||
|
||||
inline
|
||||
void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const
|
||||
{
|
||||
convertTo(dst, rtype, alpha, 0.0, stream);
|
||||
}
|
||||
|
||||
inline
|
||||
void GpuMat::assignTo(GpuMat& m, int _type) const
|
||||
{
|
||||
if (_type < 0)
|
||||
m = *this;
|
||||
else
|
||||
convertTo(m, _type);
|
||||
}
|
||||
|
||||
inline
|
||||
uchar* GpuMat::ptr(int y)
|
||||
{
|
||||
CV_DbgAssert( (unsigned)y < (unsigned)rows );
|
||||
return data + step * y;
|
||||
}
|
||||
|
||||
inline
|
||||
const uchar* GpuMat::ptr(int y) const
|
||||
{
|
||||
CV_DbgAssert( (unsigned)y < (unsigned)rows );
|
||||
return data + step * y;
|
||||
}
|
||||
|
||||
template<typename _Tp> inline
|
||||
_Tp* GpuMat::ptr(int y)
|
||||
{
|
||||
return (_Tp*)ptr(y);
|
||||
}
|
||||
|
||||
template<typename _Tp> inline
|
||||
const _Tp* GpuMat::ptr(int y) const
|
||||
{
|
||||
return (const _Tp*)ptr(y);
|
||||
}
|
||||
|
||||
template <class T> inline
|
||||
GpuMat::operator PtrStepSz<T>() const
|
||||
{
|
||||
return PtrStepSz<T>(rows, cols, (T*)data, step);
|
||||
}
|
||||
|
||||
template <class T> inline
|
||||
GpuMat::operator PtrStep<T>() const
|
||||
{
|
||||
return PtrStep<T>((T*)data, step);
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat GpuMat::row(int y) const
|
||||
{
|
||||
return GpuMat(*this, Range(y, y+1), Range::all());
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat GpuMat::col(int x) const
|
||||
{
|
||||
return GpuMat(*this, Range::all(), Range(x, x+1));
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat GpuMat::rowRange(int startrow, int endrow) const
|
||||
{
|
||||
return GpuMat(*this, Range(startrow, endrow), Range::all());
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat GpuMat::rowRange(Range r) const
|
||||
{
|
||||
return GpuMat(*this, r, Range::all());
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat GpuMat::colRange(int startcol, int endcol) const
|
||||
{
|
||||
return GpuMat(*this, Range::all(), Range(startcol, endcol));
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat GpuMat::colRange(Range r) const
|
||||
{
|
||||
return GpuMat(*this, Range::all(), r);
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat GpuMat::operator ()(Range rowRange_, Range colRange_) const
|
||||
{
|
||||
return GpuMat(*this, rowRange_, colRange_);
|
||||
}
|
||||
|
||||
inline
|
||||
GpuMat GpuMat::operator ()(Rect roi) const
|
||||
{
|
||||
return GpuMat(*this, roi);
|
||||
}
|
||||
|
||||
inline
|
||||
bool GpuMat::isContinuous() const
|
||||
{
|
||||
return (flags & Mat::CONTINUOUS_FLAG) != 0;
|
||||
}
|
||||
|
||||
inline
|
||||
size_t GpuMat::elemSize() const
|
||||
{
|
||||
return CV_ELEM_SIZE(flags);
|
||||
}
|
||||
|
||||
inline
|
||||
size_t GpuMat::elemSize1() const
|
||||
{
|
||||
return CV_ELEM_SIZE1(flags);
|
||||
}
|
||||
|
||||
inline
|
||||
int GpuMat::type() const
|
||||
{
|
||||
return CV_MAT_TYPE(flags);
|
||||
}
|
||||
|
||||
inline
|
||||
int GpuMat::depth() const
|
||||
{
|
||||
return CV_MAT_DEPTH(flags);
|
||||
}
|
||||
|
||||
inline
|
||||
int GpuMat::channels() const
|
||||
{
|
||||
return CV_MAT_CN(flags);
|
||||
}
|
||||
|
||||
inline
|
||||
size_t GpuMat::step1() const
|
||||
{
|
||||
return step / elemSize1();
|
||||
}
|
||||
|
||||
inline
|
||||
Size GpuMat::size() const
|
||||
{
|
||||
return Size(cols, rows);
|
||||
}
|
||||
|
||||
inline
|
||||
bool GpuMat::empty() const
|
||||
{
|
||||
return data == 0;
|
||||
}
|
||||
|
||||
static inline
|
||||
GpuMat createContinuous(int rows, int cols, int type)
|
||||
{
|
||||
GpuMat m;
|
||||
createContinuous(rows, cols, type, m);
|
||||
return m;
|
||||
}
|
||||
|
||||
static inline
|
||||
void createContinuous(Size size, int type, OutputArray arr)
|
||||
{
|
||||
createContinuous(size.height, size.width, type, arr);
|
||||
}
|
||||
|
||||
static inline
|
||||
GpuMat createContinuous(Size size, int type)
|
||||
{
|
||||
GpuMat m;
|
||||
createContinuous(size, type, m);
|
||||
return m;
|
||||
}
|
||||
|
||||
static inline
|
||||
void ensureSizeIsEnough(Size size, int type, OutputArray arr)
|
||||
{
|
||||
ensureSizeIsEnough(size.height, size.width, type, arr);
|
||||
}
|
||||
|
||||
static inline
|
||||
void swap(GpuMat& a, GpuMat& b)
|
||||
{
|
||||
a.swap(b);
|
||||
}
|
||||
|
||||
//===================================================================================
|
||||
// HostMem
|
||||
//===================================================================================
|
||||
|
||||
inline
|
||||
HostMem::HostMem(AllocType alloc_type_)
|
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
|
||||
{
|
||||
}
|
||||
|
||||
inline
|
||||
HostMem::HostMem(const HostMem& m)
|
||||
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
|
||||
{
|
||||
if( refcount )
|
||||
CV_XADD(refcount, 1);
|
||||
}
|
||||
|
||||
inline
|
||||
HostMem::HostMem(int rows_, int cols_, int type_, AllocType alloc_type_)
|
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
|
||||
{
|
||||
if (rows_ > 0 && cols_ > 0)
|
||||
create(rows_, cols_, type_);
|
||||
}
|
||||
|
||||
inline
|
||||
HostMem::HostMem(Size size_, int type_, AllocType alloc_type_)
|
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
|
||||
{
|
||||
if (size_.height > 0 && size_.width > 0)
|
||||
create(size_.height, size_.width, type_);
|
||||
}
|
||||
|
||||
inline
|
||||
HostMem::HostMem(InputArray arr, AllocType alloc_type_)
|
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
|
||||
{
|
||||
arr.getMat().copyTo(*this);
|
||||
}
|
||||
|
||||
inline
|
||||
HostMem::~HostMem()
|
||||
{
|
||||
release();
|
||||
}
|
||||
|
||||
inline
|
||||
HostMem& HostMem::operator =(const HostMem& m)
|
||||
{
|
||||
if (this != &m)
|
||||
{
|
||||
HostMem temp(m);
|
||||
swap(temp);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
void HostMem::swap(HostMem& b)
|
||||
{
|
||||
std::swap(flags, b.flags);
|
||||
std::swap(rows, b.rows);
|
||||
std::swap(cols, b.cols);
|
||||
std::swap(step, b.step);
|
||||
std::swap(data, b.data);
|
||||
std::swap(datastart, b.datastart);
|
||||
std::swap(dataend, b.dataend);
|
||||
std::swap(refcount, b.refcount);
|
||||
std::swap(alloc_type, b.alloc_type);
|
||||
}
|
||||
|
||||
inline
|
||||
HostMem HostMem::clone() const
|
||||
{
|
||||
HostMem m(size(), type(), alloc_type);
|
||||
createMatHeader().copyTo(m);
|
||||
return m;
|
||||
}
|
||||
|
||||
inline
|
||||
void HostMem::create(Size size_, int type_)
|
||||
{
|
||||
create(size_.height, size_.width, type_);
|
||||
}
|
||||
|
||||
inline
|
||||
Mat HostMem::createMatHeader() const
|
||||
{
|
||||
return Mat(size(), type(), data, step);
|
||||
}
|
||||
|
||||
inline
|
||||
bool HostMem::isContinuous() const
|
||||
{
|
||||
return (flags & Mat::CONTINUOUS_FLAG) != 0;
|
||||
}
|
||||
|
||||
inline
|
||||
size_t HostMem::elemSize() const
|
||||
{
|
||||
return CV_ELEM_SIZE(flags);
|
||||
}
|
||||
|
||||
inline
|
||||
size_t HostMem::elemSize1() const
|
||||
{
|
||||
return CV_ELEM_SIZE1(flags);
|
||||
}
|
||||
|
||||
inline
|
||||
int HostMem::type() const
|
||||
{
|
||||
return CV_MAT_TYPE(flags);
|
||||
}
|
||||
|
||||
inline
|
||||
int HostMem::depth() const
|
||||
{
|
||||
return CV_MAT_DEPTH(flags);
|
||||
}
|
||||
|
||||
inline
|
||||
int HostMem::channels() const
|
||||
{
|
||||
return CV_MAT_CN(flags);
|
||||
}
|
||||
|
||||
inline
|
||||
size_t HostMem::step1() const
|
||||
{
|
||||
return step / elemSize1();
|
||||
}
|
||||
|
||||
inline
|
||||
Size HostMem::size() const
|
||||
{
|
||||
return Size(cols, rows);
|
||||
}
|
||||
|
||||
inline
|
||||
bool HostMem::empty() const
|
||||
{
|
||||
return data == 0;
|
||||
}
|
||||
|
||||
static inline
|
||||
void swap(HostMem& a, HostMem& b)
|
||||
{
|
||||
a.swap(b);
|
||||
}
|
||||
|
||||
//===================================================================================
|
||||
// Stream
|
||||
//===================================================================================
|
||||
|
||||
inline
|
||||
Stream::Stream(const Ptr<Impl>& impl)
|
||||
: impl_(impl)
|
||||
{
|
||||
}
|
||||
|
||||
//===================================================================================
|
||||
// Initialization & Info
|
||||
//===================================================================================
|
||||
|
||||
inline
|
||||
bool TargetArchs::has(int major, int minor)
|
||||
{
|
||||
return hasPtx(major, minor) || hasBin(major, minor);
|
||||
}
|
||||
|
||||
inline
|
||||
bool TargetArchs::hasEqualOrGreater(int major, int minor)
|
||||
{
|
||||
return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
|
||||
}
|
||||
|
||||
inline
|
||||
DeviceInfo::DeviceInfo()
|
||||
{
|
||||
device_id_ = getDevice();
|
||||
}
|
||||
|
||||
inline
|
||||
DeviceInfo::DeviceInfo(int device_id)
|
||||
{
|
||||
CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() );
|
||||
device_id_ = device_id;
|
||||
}
|
||||
|
||||
inline
|
||||
int DeviceInfo::deviceID() const
|
||||
{
|
||||
return device_id_;
|
||||
}
|
||||
|
||||
inline
|
||||
size_t DeviceInfo::freeMemory() const
|
||||
{
|
||||
size_t _totalMemory, _freeMemory;
|
||||
queryMemory(_totalMemory, _freeMemory);
|
||||
return _freeMemory;
|
||||
}
|
||||
|
||||
inline
|
||||
size_t DeviceInfo::totalMemory() const
|
||||
{
|
||||
size_t _totalMemory, _freeMemory;
|
||||
queryMemory(_totalMemory, _freeMemory);
|
||||
return _totalMemory;
|
||||
}
|
||||
|
||||
inline
|
||||
bool DeviceInfo::supports(FeatureSet feature_set) const
|
||||
{
|
||||
int version = majorVersion() * 10 + minorVersion();
|
||||
return version >= feature_set;
|
||||
}
|
||||
|
||||
|
||||
}} // namespace cv { namespace cuda {
|
||||
|
||||
//===================================================================================
|
||||
// Mat
|
||||
//===================================================================================
|
||||
|
||||
namespace cv {
|
||||
|
||||
inline
|
||||
Mat::Mat(const cuda::GpuMat& m)
|
||||
: flags(0), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows)
|
||||
{
|
||||
m.download(*this);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CORE_CUDAINL_HPP__
|
211
3rdparty/include/opencv2/core/cuda/block.hpp
vendored
Normal file
211
3rdparty/include/opencv2/core/cuda/block.hpp
vendored
Normal file
@ -0,0 +1,211 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_DEVICE_BLOCK_HPP__
|
||||
#define __OPENCV_CUDA_DEVICE_BLOCK_HPP__
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
struct Block
|
||||
{
|
||||
static __device__ __forceinline__ unsigned int id()
|
||||
{
|
||||
return blockIdx.x;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int stride()
|
||||
{
|
||||
return blockDim.x * blockDim.y * blockDim.z;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ void sync()
|
||||
{
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ int flattenedThreadId()
|
||||
{
|
||||
return threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
|
||||
}
|
||||
|
||||
template<typename It, typename T>
|
||||
static __device__ __forceinline__ void fill(It beg, It end, const T& value)
|
||||
{
|
||||
int STRIDE = stride();
|
||||
It t = beg + flattenedThreadId();
|
||||
|
||||
for(; t < end; t += STRIDE)
|
||||
*t = value;
|
||||
}
|
||||
|
||||
template<typename OutIt, typename T>
|
||||
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
|
||||
{
|
||||
int STRIDE = stride();
|
||||
int tid = flattenedThreadId();
|
||||
value += tid;
|
||||
|
||||
for(OutIt t = beg + tid; t < end; t += STRIDE, value += STRIDE)
|
||||
*t = value;
|
||||
}
|
||||
|
||||
template<typename InIt, typename OutIt>
|
||||
static __device__ __forceinline__ void copy(InIt beg, InIt end, OutIt out)
|
||||
{
|
||||
int STRIDE = stride();
|
||||
InIt t = beg + flattenedThreadId();
|
||||
OutIt o = out + (t - beg);
|
||||
|
||||
for(; t < end; t += STRIDE, o += STRIDE)
|
||||
*o = *t;
|
||||
}
|
||||
|
||||
template<typename InIt, typename OutIt, class UnOp>
|
||||
static __device__ __forceinline__ void transfrom(InIt beg, InIt end, OutIt out, UnOp op)
|
||||
{
|
||||
int STRIDE = stride();
|
||||
InIt t = beg + flattenedThreadId();
|
||||
OutIt o = out + (t - beg);
|
||||
|
||||
for(; t < end; t += STRIDE, o += STRIDE)
|
||||
*o = op(*t);
|
||||
}
|
||||
|
||||
template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
|
||||
static __device__ __forceinline__ void transfrom(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
|
||||
{
|
||||
int STRIDE = stride();
|
||||
InIt1 t1 = beg1 + flattenedThreadId();
|
||||
InIt2 t2 = beg2 + flattenedThreadId();
|
||||
OutIt o = out + (t1 - beg1);
|
||||
|
||||
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE)
|
||||
*o = op(*t1, *t2);
|
||||
}
|
||||
|
||||
template<int CTA_SIZE, typename T, class BinOp>
|
||||
static __device__ __forceinline__ void reduce(volatile T* buffer, BinOp op)
|
||||
{
|
||||
int tid = flattenedThreadId();
|
||||
T val = buffer[tid];
|
||||
|
||||
if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
|
||||
if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
|
||||
if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
|
||||
if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); }
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); }
|
||||
if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); }
|
||||
if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); }
|
||||
if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); }
|
||||
if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); }
|
||||
if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); }
|
||||
}
|
||||
}
|
||||
|
||||
template<int CTA_SIZE, typename T, class BinOp>
|
||||
static __device__ __forceinline__ T reduce(volatile T* buffer, T init, BinOp op)
|
||||
{
|
||||
int tid = flattenedThreadId();
|
||||
T val = buffer[tid] = init;
|
||||
__syncthreads();
|
||||
|
||||
if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
|
||||
if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
|
||||
if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
|
||||
if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); }
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); }
|
||||
if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); }
|
||||
if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); }
|
||||
if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); }
|
||||
if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); }
|
||||
if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); }
|
||||
}
|
||||
__syncthreads();
|
||||
return buffer[0];
|
||||
}
|
||||
|
||||
template <typename T, class BinOp>
|
||||
static __device__ __forceinline__ void reduce_n(T* data, unsigned int n, BinOp op)
|
||||
{
|
||||
int ftid = flattenedThreadId();
|
||||
int sft = stride();
|
||||
|
||||
if (sft < n)
|
||||
{
|
||||
for (unsigned int i = sft + ftid; i < n; i += sft)
|
||||
data[ftid] = op(data[ftid], data[i]);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
n = sft;
|
||||
}
|
||||
|
||||
while (n > 1)
|
||||
{
|
||||
unsigned int half = n/2;
|
||||
|
||||
if (ftid < half)
|
||||
data[ftid] = op(data[ftid], data[n - ftid - 1]);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
n = n - half;
|
||||
}
|
||||
}
|
||||
};
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif /* __OPENCV_CUDA_DEVICE_BLOCK_HPP__ */
|
722
3rdparty/include/opencv2/core/cuda/border_interpolate.hpp
vendored
Normal file
722
3rdparty/include/opencv2/core/cuda/border_interpolate.hpp
vendored
Normal file
@ -0,0 +1,722 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__
|
||||
#define __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__
|
||||
|
||||
#include "saturate_cast.hpp"
|
||||
#include "vec_traits.hpp"
|
||||
#include "vec_math.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
//////////////////////////////////////////////////////////////
|
||||
// BrdConstant
|
||||
|
||||
template <typename D> struct BrdRowConstant
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits<D>::all(0)) : width(width_), val(val_) {}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
|
||||
{
|
||||
return x >= 0 ? saturate_cast<D>(data[x]) : val;
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
|
||||
{
|
||||
return x < width ? saturate_cast<D>(data[x]) : val;
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
|
||||
{
|
||||
return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val;
|
||||
}
|
||||
|
||||
int width;
|
||||
D val;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdColConstant
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits<D>::all(0)) : height(height_), val(val_) {}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
|
||||
{
|
||||
return y >= 0 ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
|
||||
{
|
||||
return y < height ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
|
||||
{
|
||||
return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
|
||||
}
|
||||
|
||||
int height;
|
||||
D val;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdConstant
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
__host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) : height(height_), width(width_), val(val_)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
|
||||
{
|
||||
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(((const T*)((const uchar*)data + y * step))[x]) : val;
|
||||
}
|
||||
|
||||
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
|
||||
{
|
||||
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
|
||||
}
|
||||
|
||||
int height;
|
||||
int width;
|
||||
D val;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// BrdReplicate
|
||||
|
||||
template <typename D> struct BrdRowReplicate
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdRowReplicate(int width, U) : last_col(width - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_col_low(int x) const
|
||||
{
|
||||
return ::max(x, 0);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_high(int x) const
|
||||
{
|
||||
return ::min(x, last_col);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col(int x) const
|
||||
{
|
||||
return idx_col_low(idx_col_high(x));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col_low(x)]);
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col_high(x)]);
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col(x)]);
|
||||
}
|
||||
|
||||
int last_col;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdColReplicate
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdColReplicate(int height, U) : last_row(height - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_row_low(int y) const
|
||||
{
|
||||
return ::max(y, 0);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_high(int y) const
|
||||
{
|
||||
return ::min(y, last_row);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row(int y) const
|
||||
{
|
||||
return idx_row_low(idx_row_high(y));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const T*)((const char*)data + idx_row_low(y) * step));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const T*)((const char*)data + idx_row_high(y) * step));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdReplicate
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
__host__ __device__ __forceinline__ BrdReplicate(int height, int width) : last_row(height - 1), last_col(width - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_row_low(int y) const
|
||||
{
|
||||
return ::max(y, 0);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_high(int y) const
|
||||
{
|
||||
return ::min(y, last_row);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row(int y) const
|
||||
{
|
||||
return idx_row_low(idx_row_high(y));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_low(int x) const
|
||||
{
|
||||
return ::max(x, 0);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_high(int x) const
|
||||
{
|
||||
return ::min(x, last_col);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col(int x) const
|
||||
{
|
||||
return idx_col_low(idx_col_high(x));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
|
||||
}
|
||||
|
||||
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
|
||||
{
|
||||
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
int last_col;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// BrdReflect101
|
||||
|
||||
template <typename D> struct BrdRowReflect101
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdRowReflect101(int width, U) : last_col(width - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_col_low(int x) const
|
||||
{
|
||||
return ::abs(x) % (last_col + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_high(int x) const
|
||||
{
|
||||
return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col(int x) const
|
||||
{
|
||||
return idx_col_low(idx_col_high(x));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col_low(x)]);
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col_high(x)]);
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col(x)]);
|
||||
}
|
||||
|
||||
int last_col;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdColReflect101
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdColReflect101(int height, U) : last_row(height - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_row_low(int y) const
|
||||
{
|
||||
return ::abs(y) % (last_row + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_high(int y) const
|
||||
{
|
||||
return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row(int y) const
|
||||
{
|
||||
return idx_row_low(idx_row_high(y));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdReflect101
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
__host__ __device__ __forceinline__ BrdReflect101(int height, int width) : last_row(height - 1), last_col(width - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_row_low(int y) const
|
||||
{
|
||||
return ::abs(y) % (last_row + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_high(int y) const
|
||||
{
|
||||
return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row(int y) const
|
||||
{
|
||||
return idx_row_low(idx_row_high(y));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_low(int x) const
|
||||
{
|
||||
return ::abs(x) % (last_col + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_high(int x) const
|
||||
{
|
||||
return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col(int x) const
|
||||
{
|
||||
return idx_col_low(idx_col_high(x));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
|
||||
}
|
||||
|
||||
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
|
||||
{
|
||||
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
int last_col;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// BrdReflect
|
||||
|
||||
template <typename D> struct BrdRowReflect
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdRowReflect(int width) : last_col(width - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdRowReflect(int width, U) : last_col(width - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_col_low(int x) const
|
||||
{
|
||||
return (::abs(x) - (x < 0)) % (last_col + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_high(int x) const
|
||||
{
|
||||
return ::abs(last_col - ::abs(last_col - x) + (x > last_col)) % (last_col + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col(int x) const
|
||||
{
|
||||
return idx_col_high(::abs(x) - (x < 0));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col_low(x)]);
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col_high(x)]);
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col(x)]);
|
||||
}
|
||||
|
||||
int last_col;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdColReflect
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdColReflect(int height) : last_row(height - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdColReflect(int height, U) : last_row(height - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_row_low(int y) const
|
||||
{
|
||||
return (::abs(y) - (y < 0)) % (last_row + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_high(int y) const
|
||||
{
|
||||
return ::abs(last_row - ::abs(last_row - y) + (y > last_row)) % (last_row + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row(int y) const
|
||||
{
|
||||
return idx_row_high(::abs(y) - (y < 0));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdReflect
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
__host__ __device__ __forceinline__ BrdReflect(int height, int width) : last_row(height - 1), last_col(width - 1) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdReflect(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_row_low(int y) const
|
||||
{
|
||||
return (::abs(y) - (y < 0)) % (last_row + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_high(int y) const
|
||||
{
|
||||
return /*::abs*/(last_row - ::abs(last_row - y) + (y > last_row)) /*% (last_row + 1)*/;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row(int y) const
|
||||
{
|
||||
return idx_row_low(idx_row_high(y));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_low(int x) const
|
||||
{
|
||||
return (::abs(x) - (x < 0)) % (last_col + 1);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_high(int x) const
|
||||
{
|
||||
return (last_col - ::abs(last_col - x) + (x > last_col));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col(int x) const
|
||||
{
|
||||
return idx_col_low(idx_col_high(x));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
|
||||
}
|
||||
|
||||
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
|
||||
{
|
||||
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
int last_col;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// BrdWrap
|
||||
|
||||
template <typename D> struct BrdRowWrap
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdRowWrap(int width_) : width(width_) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdRowWrap(int width_, U) : width(width_) {}
|
||||
|
||||
__device__ __forceinline__ int idx_col_low(int x) const
|
||||
{
|
||||
return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_high(int x) const
|
||||
{
|
||||
return (x < width) * x + (x >= width) * (x % width);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col(int x) const
|
||||
{
|
||||
return idx_col_high(idx_col_low(x));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col_low(x)]);
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col_high(x)]);
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_col(x)]);
|
||||
}
|
||||
|
||||
int width;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdColWrap
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ BrdColWrap(int height_) : height(height_) {}
|
||||
template <typename U> __host__ __device__ __forceinline__ BrdColWrap(int height_, U) : height(height_) {}
|
||||
|
||||
__device__ __forceinline__ int idx_row_low(int y) const
|
||||
{
|
||||
return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_high(int y) const
|
||||
{
|
||||
return (y < height) * y + (y >= height) * (y % height);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row(int y) const
|
||||
{
|
||||
return idx_row_high(idx_row_low(y));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
|
||||
}
|
||||
|
||||
int height;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdWrap
|
||||
{
|
||||
typedef D result_type;
|
||||
|
||||
__host__ __device__ __forceinline__ BrdWrap(int height_, int width_) :
|
||||
height(height_), width(width_)
|
||||
{
|
||||
}
|
||||
template <typename U>
|
||||
__host__ __device__ __forceinline__ BrdWrap(int height_, int width_, U) :
|
||||
height(height_), width(width_)
|
||||
{
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_low(int y) const
|
||||
{
|
||||
return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row_high(int y) const
|
||||
{
|
||||
return (y < height) * y + (y >= height) * (y % height);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_row(int y) const
|
||||
{
|
||||
return idx_row_high(idx_row_low(y));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_low(int x) const
|
||||
{
|
||||
return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col_high(int x) const
|
||||
{
|
||||
return (x < width) * x + (x >= width) * (x % width);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_col(int x) const
|
||||
{
|
||||
return idx_col_high(idx_col_low(x));
|
||||
}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
|
||||
{
|
||||
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
|
||||
}
|
||||
|
||||
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
|
||||
{
|
||||
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
|
||||
}
|
||||
|
||||
int height;
|
||||
int width;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// BorderReader
|
||||
|
||||
template <typename Ptr2D, typename B> struct BorderReader
|
||||
{
|
||||
typedef typename B::result_type elem_type;
|
||||
typedef typename Ptr2D::index_type index_type;
|
||||
|
||||
__host__ __device__ __forceinline__ BorderReader(const Ptr2D& ptr_, const B& b_) : ptr(ptr_), b(b_) {}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const
|
||||
{
|
||||
return b.at(y, x, ptr);
|
||||
}
|
||||
|
||||
Ptr2D ptr;
|
||||
B b;
|
||||
};
|
||||
|
||||
// under win32 there is some bug with templated types that passed as kernel parameters
|
||||
// with this specialization all works fine
|
||||
template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> >
|
||||
{
|
||||
typedef typename BrdConstant<D>::result_type elem_type;
|
||||
typedef typename Ptr2D::index_type index_type;
|
||||
|
||||
__host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant<D>& b) :
|
||||
src(src_), height(b.height), width(b.width), val(b.val)
|
||||
{
|
||||
}
|
||||
|
||||
__device__ __forceinline__ D operator ()(index_type y, index_type x) const
|
||||
{
|
||||
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
int height;
|
||||
int width;
|
||||
D val;
|
||||
};
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__
|
309
3rdparty/include/opencv2/core/cuda/color.hpp
vendored
Normal file
309
3rdparty/include/opencv2/core/cuda/color.hpp
vendored
Normal file
@ -0,0 +1,309 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_COLOR_HPP__
|
||||
#define __OPENCV_CUDA_COLOR_HPP__
|
||||
|
||||
#include "detail/color_detail.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
// All OPENCV_CUDA_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
|
||||
// template <typename T> class ColorSpace1_to_ColorSpace2_traits
|
||||
// {
|
||||
// typedef ... functor_type;
|
||||
// static __host__ __device__ functor_type create_functor();
|
||||
// };
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
|
||||
OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab, 3, 3, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab, 4, 3, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab4, 3, 4, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab4, 4, 4, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab, 3, 3, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab, 4, 3, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab4, 3, 4, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab4, 4, 4, true, 0)
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab, 3, 3, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab, 4, 3, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab4, 3, 4, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab4, 4, 4, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab, 3, 3, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab, 4, 3, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab4, 3, 4, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab4, 4, 4, false, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgb, 3, 3, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgb, 4, 3, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgba, 3, 4, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgba, 4, 4, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgr, 3, 3, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgr, 4, 3, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgra, 3, 4, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgra, 4, 4, true, 0)
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgb, 3, 3, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgb, 4, 3, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgba, 3, 4, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgba, 4, 4, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgr, 3, 3, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgr, 4, 3, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgra, 3, 4, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgra, 4, 4, false, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv, 3, 3, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv, 4, 3, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv4, 3, 4, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv4, 4, 4, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv, 3, 3, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv, 4, 3, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv4, 3, 4, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv4, 4, 4, true, 0)
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv, 3, 3, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv, 4, 3, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv4, 3, 4, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv4, 4, 4, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv, 3, 3, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv, 4, 3, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv4, 3, 4, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv4, 4, 4, false, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgb, 3, 3, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgb, 4, 3, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgba, 3, 4, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgba, 4, 4, true, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgr, 3, 3, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgr, 4, 3, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgra, 3, 4, true, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgra, 4, 4, true, 0)
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgb, 3, 3, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgb, 4, 3, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgba, 3, 4, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgba, 4, 4, false, 2)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgr, 3, 3, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgr, 4, 3, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgra, 3, 4, false, 0)
|
||||
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgra, 4, 4, false, 0)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__
|
109
3rdparty/include/opencv2/core/cuda/common.hpp
vendored
Normal file
109
3rdparty/include/opencv2/core/cuda/common.hpp
vendored
Normal file
@ -0,0 +1,109 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_COMMON_HPP__
|
||||
#define __OPENCV_CUDA_COMMON_HPP__
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "opencv2/core/cuda_types.hpp"
|
||||
#include "opencv2/core/cvdef.h"
|
||||
#include "opencv2/core/base.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
#ifndef CV_PI_F
|
||||
#ifndef CV_PI
|
||||
#define CV_PI_F 3.14159265f
|
||||
#else
|
||||
#define CV_PI_F ((float)CV_PI)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace cv { namespace cuda {
|
||||
static inline void checkCudaError(cudaError_t err, const char* file, const int line, const char* func)
|
||||
{
|
||||
if (cudaSuccess != err)
|
||||
cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
|
||||
}
|
||||
}}
|
||||
|
||||
#ifndef cudaSafeCall
|
||||
#define cudaSafeCall(expr) cv::cuda::checkCudaError(expr, __FILE__, __LINE__, CV_Func)
|
||||
#endif
|
||||
|
||||
namespace cv { namespace cuda
|
||||
{
|
||||
template <typename T> static inline bool isAligned(const T* ptr, size_t size)
|
||||
{
|
||||
return reinterpret_cast<size_t>(ptr) % size == 0;
|
||||
}
|
||||
|
||||
static inline bool isAligned(size_t step, size_t size)
|
||||
{
|
||||
return step % size == 0;
|
||||
}
|
||||
}}
|
||||
|
||||
namespace cv { namespace cuda
|
||||
{
|
||||
namespace device
|
||||
{
|
||||
__host__ __device__ __forceinline__ int divUp(int total, int grain)
|
||||
{
|
||||
return (total + grain - 1) / grain;
|
||||
}
|
||||
|
||||
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
|
||||
{
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||
}
|
||||
}
|
||||
}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_COMMON_HPP__
|
113
3rdparty/include/opencv2/core/cuda/datamov_utils.hpp
vendored
Normal file
113
3rdparty/include/opencv2/core/cuda/datamov_utils.hpp
vendored
Normal file
@ -0,0 +1,113 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_DATAMOV_UTILS_HPP__
|
||||
#define __OPENCV_CUDA_DATAMOV_UTILS_HPP__
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
|
||||
|
||||
// for Fermi memory space is detected automatically
|
||||
template <typename T> struct ForceGlob
|
||||
{
|
||||
__device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
|
||||
};
|
||||
|
||||
#else // __CUDA_ARCH__ >= 200
|
||||
|
||||
#if defined(_WIN64) || defined(__LP64__)
|
||||
// 64-bit register modifier for inlined asm
|
||||
#define OPENCV_CUDA_ASM_PTR "l"
|
||||
#else
|
||||
// 32-bit register modifier for inlined asm
|
||||
#define OPENCV_CUDA_ASM_PTR "r"
|
||||
#endif
|
||||
|
||||
template<class T> struct ForceGlob;
|
||||
|
||||
#define OPENCV_CUDA_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
|
||||
template <> struct ForceGlob<base_type> \
|
||||
{ \
|
||||
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
||||
{ \
|
||||
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
|
||||
} \
|
||||
};
|
||||
|
||||
#define OPENCV_CUDA_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
|
||||
template <> struct ForceGlob<base_type> \
|
||||
{ \
|
||||
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
||||
{ \
|
||||
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(uchar, u8)
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(schar, s8)
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(char, b8)
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB (ushort, u16, h)
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB (short, s16, h)
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB (uint, u32, r)
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB (int, s32, r)
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB (float, f32, f)
|
||||
OPENCV_CUDA_DEFINE_FORCE_GLOB (double, f64, d)
|
||||
|
||||
#undef OPENCV_CUDA_DEFINE_FORCE_GLOB
|
||||
#undef OPENCV_CUDA_DEFINE_FORCE_GLOB_B
|
||||
#undef OPENCV_CUDA_ASM_PTR
|
||||
|
||||
#endif // __CUDA_ARCH__ >= 200
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_DATAMOV_UTILS_HPP__
|
1980
3rdparty/include/opencv2/core/cuda/detail/color_detail.hpp
vendored
Normal file
1980
3rdparty/include/opencv2/core/cuda/detail/color_detail.hpp
vendored
Normal file
File diff suppressed because one or more lines are too long
365
3rdparty/include/opencv2/core/cuda/detail/reduce.hpp
vendored
Normal file
365
3rdparty/include/opencv2/core/cuda/detail/reduce.hpp
vendored
Normal file
@ -0,0 +1,365 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_REDUCE_DETAIL_HPP__
|
||||
#define __OPENCV_CUDA_REDUCE_DETAIL_HPP__
|
||||
|
||||
#include <thrust/tuple.h>
|
||||
#include "../warp.hpp"
|
||||
#include "../warp_shuffle.hpp"
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace reduce_detail
|
||||
{
|
||||
template <typename T> struct GetType;
|
||||
template <typename T> struct GetType<T*>
|
||||
{
|
||||
typedef T type;
|
||||
};
|
||||
template <typename T> struct GetType<volatile T*>
|
||||
{
|
||||
typedef T type;
|
||||
};
|
||||
template <typename T> struct GetType<T&>
|
||||
{
|
||||
typedef T type;
|
||||
};
|
||||
|
||||
template <unsigned int I, unsigned int N>
|
||||
struct For
|
||||
{
|
||||
template <class PointerTuple, class ValTuple>
|
||||
static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
|
||||
{
|
||||
thrust::get<I>(smem)[tid] = thrust::get<I>(val);
|
||||
|
||||
For<I + 1, N>::loadToSmem(smem, val, tid);
|
||||
}
|
||||
template <class PointerTuple, class ValTuple>
|
||||
static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
|
||||
{
|
||||
thrust::get<I>(val) = thrust::get<I>(smem)[tid];
|
||||
|
||||
For<I + 1, N>::loadFromSmem(smem, val, tid);
|
||||
}
|
||||
|
||||
template <class PointerTuple, class ValTuple, class OpTuple>
|
||||
static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op)
|
||||
{
|
||||
typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta];
|
||||
thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
|
||||
|
||||
For<I + 1, N>::merge(smem, val, tid, delta, op);
|
||||
}
|
||||
template <class ValTuple, class OpTuple>
|
||||
static __device__ void mergeShfl(const ValTuple& val, unsigned int delta, unsigned int width, const OpTuple& op)
|
||||
{
|
||||
typename GetType<typename thrust::tuple_element<I, ValTuple>::type>::type reg = shfl_down(thrust::get<I>(val), delta, width);
|
||||
thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
|
||||
|
||||
For<I + 1, N>::mergeShfl(val, delta, width, op);
|
||||
}
|
||||
};
|
||||
template <unsigned int N>
|
||||
struct For<N, N>
|
||||
{
|
||||
template <class PointerTuple, class ValTuple>
|
||||
static __device__ void loadToSmem(const PointerTuple&, const ValTuple&, unsigned int)
|
||||
{
|
||||
}
|
||||
template <class PointerTuple, class ValTuple>
|
||||
static __device__ void loadFromSmem(const PointerTuple&, const ValTuple&, unsigned int)
|
||||
{
|
||||
}
|
||||
|
||||
template <class PointerTuple, class ValTuple, class OpTuple>
|
||||
static __device__ void merge(const PointerTuple&, const ValTuple&, unsigned int, unsigned int, const OpTuple&)
|
||||
{
|
||||
}
|
||||
template <class ValTuple, class OpTuple>
|
||||
static __device__ void mergeShfl(const ValTuple&, unsigned int, unsigned int, const OpTuple&)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, unsigned int tid)
|
||||
{
|
||||
smem[tid] = val;
|
||||
}
|
||||
template <typename T>
|
||||
__device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, unsigned int tid)
|
||||
{
|
||||
val = smem[tid];
|
||||
}
|
||||
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
|
||||
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
|
||||
__device__ __forceinline__ void loadToSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
|
||||
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
|
||||
unsigned int tid)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid);
|
||||
}
|
||||
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
|
||||
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
|
||||
__device__ __forceinline__ void loadFromSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
|
||||
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
|
||||
unsigned int tid)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid);
|
||||
}
|
||||
|
||||
template <typename T, class Op>
|
||||
__device__ __forceinline__ void merge(volatile T* smem, T& val, unsigned int tid, unsigned int delta, const Op& op)
|
||||
{
|
||||
T reg = smem[tid + delta];
|
||||
smem[tid] = val = op(val, reg);
|
||||
}
|
||||
template <typename T, class Op>
|
||||
__device__ __forceinline__ void mergeShfl(T& val, unsigned int delta, unsigned int width, const Op& op)
|
||||
{
|
||||
T reg = shfl_down(val, delta, width);
|
||||
val = op(val, reg);
|
||||
}
|
||||
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
|
||||
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
|
||||
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
|
||||
__device__ __forceinline__ void merge(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
|
||||
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
|
||||
unsigned int tid,
|
||||
unsigned int delta,
|
||||
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op);
|
||||
}
|
||||
template <typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
|
||||
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
|
||||
__device__ __forceinline__ void mergeShfl(const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
|
||||
unsigned int delta,
|
||||
unsigned int width,
|
||||
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9> >::value>::mergeShfl(val, delta, width, op);
|
||||
}
|
||||
|
||||
template <unsigned int N> struct Generic
|
||||
{
|
||||
template <typename Pointer, typename Reference, class Op>
|
||||
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
|
||||
{
|
||||
loadToSmem(smem, val, tid);
|
||||
if (N >= 32)
|
||||
__syncthreads();
|
||||
|
||||
if (N >= 2048)
|
||||
{
|
||||
if (tid < 1024)
|
||||
merge(smem, val, tid, 1024, op);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 1024)
|
||||
{
|
||||
if (tid < 512)
|
||||
merge(smem, val, tid, 512, op);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 512)
|
||||
{
|
||||
if (tid < 256)
|
||||
merge(smem, val, tid, 256, op);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 256)
|
||||
{
|
||||
if (tid < 128)
|
||||
merge(smem, val, tid, 128, op);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 128)
|
||||
{
|
||||
if (tid < 64)
|
||||
merge(smem, val, tid, 64, op);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 64)
|
||||
{
|
||||
if (tid < 32)
|
||||
merge(smem, val, tid, 32, op);
|
||||
}
|
||||
|
||||
if (tid < 16)
|
||||
{
|
||||
merge(smem, val, tid, 16, op);
|
||||
merge(smem, val, tid, 8, op);
|
||||
merge(smem, val, tid, 4, op);
|
||||
merge(smem, val, tid, 2, op);
|
||||
merge(smem, val, tid, 1, op);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned int I, typename Pointer, typename Reference, class Op>
|
||||
struct Unroll
|
||||
{
|
||||
static __device__ void loopShfl(Reference val, Op op, unsigned int N)
|
||||
{
|
||||
mergeShfl(val, I, N, op);
|
||||
Unroll<I / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
|
||||
}
|
||||
static __device__ void loop(Pointer smem, Reference val, unsigned int tid, Op op)
|
||||
{
|
||||
merge(smem, val, tid, I, op);
|
||||
Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
|
||||
}
|
||||
};
|
||||
template <typename Pointer, typename Reference, class Op>
|
||||
struct Unroll<0, Pointer, Reference, Op>
|
||||
{
|
||||
static __device__ void loopShfl(Reference, Op, unsigned int)
|
||||
{
|
||||
}
|
||||
static __device__ void loop(Pointer, Reference, unsigned int, Op)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned int N> struct WarpOptimized
|
||||
{
|
||||
template <typename Pointer, typename Reference, class Op>
|
||||
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
(void) smem;
|
||||
(void) tid;
|
||||
|
||||
Unroll<N / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
|
||||
#else
|
||||
loadToSmem(smem, val, tid);
|
||||
|
||||
if (tid < N / 2)
|
||||
Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned int N> struct GenericOptimized32
|
||||
{
|
||||
enum { M = N / 32 };
|
||||
|
||||
template <typename Pointer, typename Reference, class Op>
|
||||
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
|
||||
{
|
||||
const unsigned int laneId = Warp::laneId();
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize);
|
||||
|
||||
if (laneId == 0)
|
||||
loadToSmem(smem, val, tid / 32);
|
||||
#else
|
||||
loadToSmem(smem, val, tid);
|
||||
|
||||
if (laneId < 16)
|
||||
Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (laneId == 0)
|
||||
loadToSmem(smem, val, tid / 32);
|
||||
#endif
|
||||
|
||||
__syncthreads();
|
||||
|
||||
loadFromSmem(smem, val, tid);
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
Unroll<M / 2, Pointer, Reference, Op>::loopShfl(val, op, M);
|
||||
#else
|
||||
Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <bool val, class T1, class T2> struct StaticIf;
|
||||
template <class T1, class T2> struct StaticIf<true, T1, T2>
|
||||
{
|
||||
typedef T1 type;
|
||||
};
|
||||
template <class T1, class T2> struct StaticIf<false, T1, T2>
|
||||
{
|
||||
typedef T2 type;
|
||||
};
|
||||
|
||||
template <unsigned int N> struct IsPowerOf2
|
||||
{
|
||||
enum { value = ((N != 0) && !(N & (N - 1))) };
|
||||
};
|
||||
|
||||
template <unsigned int N> struct Dispatcher
|
||||
{
|
||||
typedef typename StaticIf<
|
||||
(N <= 32) && IsPowerOf2<N>::value,
|
||||
WarpOptimized<N>,
|
||||
typename StaticIf<
|
||||
(N <= 1024) && IsPowerOf2<N>::value,
|
||||
GenericOptimized32<N>,
|
||||
Generic<N>
|
||||
>::type
|
||||
>::type reductor;
|
||||
};
|
||||
}
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_REDUCE_DETAIL_HPP__
|
502
3rdparty/include/opencv2/core/cuda/detail/reduce_key_val.hpp
vendored
Normal file
502
3rdparty/include/opencv2/core/cuda/detail/reduce_key_val.hpp
vendored
Normal file
@ -0,0 +1,502 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__
|
||||
#define __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__
|
||||
|
||||
#include <thrust/tuple.h>
|
||||
#include "../warp.hpp"
|
||||
#include "../warp_shuffle.hpp"
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace reduce_key_val_detail
|
||||
{
|
||||
template <typename T> struct GetType;
|
||||
template <typename T> struct GetType<T*>
|
||||
{
|
||||
typedef T type;
|
||||
};
|
||||
template <typename T> struct GetType<volatile T*>
|
||||
{
|
||||
typedef T type;
|
||||
};
|
||||
template <typename T> struct GetType<T&>
|
||||
{
|
||||
typedef T type;
|
||||
};
|
||||
|
||||
template <unsigned int I, unsigned int N>
|
||||
struct For
|
||||
{
|
||||
template <class PointerTuple, class ReferenceTuple>
|
||||
static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
|
||||
{
|
||||
thrust::get<I>(smem)[tid] = thrust::get<I>(data);
|
||||
|
||||
For<I + 1, N>::loadToSmem(smem, data, tid);
|
||||
}
|
||||
template <class PointerTuple, class ReferenceTuple>
|
||||
static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
|
||||
{
|
||||
thrust::get<I>(data) = thrust::get<I>(smem)[tid];
|
||||
|
||||
For<I + 1, N>::loadFromSmem(smem, data, tid);
|
||||
}
|
||||
|
||||
template <class ReferenceTuple>
|
||||
static __device__ void copyShfl(const ReferenceTuple& val, unsigned int delta, int width)
|
||||
{
|
||||
thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
|
||||
|
||||
For<I + 1, N>::copyShfl(val, delta, width);
|
||||
}
|
||||
template <class PointerTuple, class ReferenceTuple>
|
||||
static __device__ void copy(const PointerTuple& svals, const ReferenceTuple& val, unsigned int tid, unsigned int delta)
|
||||
{
|
||||
thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
|
||||
|
||||
For<I + 1, N>::copy(svals, val, tid, delta);
|
||||
}
|
||||
|
||||
template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
|
||||
static __device__ void mergeShfl(const KeyReferenceTuple& key, const ValReferenceTuple& val, const CmpTuple& cmp, unsigned int delta, int width)
|
||||
{
|
||||
typename GetType<typename thrust::tuple_element<I, KeyReferenceTuple>::type>::type reg = shfl_down(thrust::get<I>(key), delta, width);
|
||||
|
||||
if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
|
||||
{
|
||||
thrust::get<I>(key) = reg;
|
||||
thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
|
||||
}
|
||||
|
||||
For<I + 1, N>::mergeShfl(key, val, cmp, delta, width);
|
||||
}
|
||||
template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
|
||||
static __device__ void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key,
|
||||
const ValPointerTuple& svals, const ValReferenceTuple& val,
|
||||
const CmpTuple& cmp,
|
||||
unsigned int tid, unsigned int delta)
|
||||
{
|
||||
typename GetType<typename thrust::tuple_element<I, KeyPointerTuple>::type>::type reg = thrust::get<I>(skeys)[tid + delta];
|
||||
|
||||
if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
|
||||
{
|
||||
thrust::get<I>(skeys)[tid] = thrust::get<I>(key) = reg;
|
||||
thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
|
||||
}
|
||||
|
||||
For<I + 1, N>::merge(skeys, key, svals, val, cmp, tid, delta);
|
||||
}
|
||||
};
|
||||
template <unsigned int N>
|
||||
struct For<N, N>
|
||||
{
|
||||
template <class PointerTuple, class ReferenceTuple>
|
||||
static __device__ void loadToSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
|
||||
{
|
||||
}
|
||||
template <class PointerTuple, class ReferenceTuple>
|
||||
static __device__ void loadFromSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
|
||||
{
|
||||
}
|
||||
|
||||
template <class ReferenceTuple>
|
||||
static __device__ void copyShfl(const ReferenceTuple&, unsigned int, int)
|
||||
{
|
||||
}
|
||||
template <class PointerTuple, class ReferenceTuple>
|
||||
static __device__ void copy(const PointerTuple&, const ReferenceTuple&, unsigned int, unsigned int)
|
||||
{
|
||||
}
|
||||
|
||||
template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
|
||||
static __device__ void mergeShfl(const KeyReferenceTuple&, const ValReferenceTuple&, const CmpTuple&, unsigned int, int)
|
||||
{
|
||||
}
|
||||
template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
|
||||
static __device__ void merge(const KeyPointerTuple&, const KeyReferenceTuple&,
|
||||
const ValPointerTuple&, const ValReferenceTuple&,
|
||||
const CmpTuple&,
|
||||
unsigned int, unsigned int)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// loadToSmem
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid)
|
||||
{
|
||||
smem[tid] = data;
|
||||
}
|
||||
template <typename T>
|
||||
__device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid)
|
||||
{
|
||||
data = smem[tid];
|
||||
}
|
||||
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
|
||||
__device__ __forceinline__ void loadToSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
|
||||
unsigned int tid)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid);
|
||||
}
|
||||
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
|
||||
__device__ __forceinline__ void loadFromSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
|
||||
unsigned int tid)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// copyVals
|
||||
|
||||
template <typename V>
|
||||
__device__ __forceinline__ void copyValsShfl(V& val, unsigned int delta, int width)
|
||||
{
|
||||
val = shfl_down(val, delta, width);
|
||||
}
|
||||
template <typename V>
|
||||
__device__ __forceinline__ void copyVals(volatile V* svals, V& val, unsigned int tid, unsigned int delta)
|
||||
{
|
||||
svals[tid] = val = svals[tid + delta];
|
||||
}
|
||||
template <typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
|
||||
__device__ __forceinline__ void copyValsShfl(const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||
unsigned int delta,
|
||||
int width)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9> >::value>::copyShfl(val, delta, width);
|
||||
}
|
||||
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
|
||||
__device__ __forceinline__ void copyVals(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||
unsigned int tid, unsigned int delta)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::copy(svals, val, tid, delta);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// merge
|
||||
|
||||
template <typename K, typename V, class Cmp>
|
||||
__device__ __forceinline__ void mergeShfl(K& key, V& val, const Cmp& cmp, unsigned int delta, int width)
|
||||
{
|
||||
K reg = shfl_down(key, delta, width);
|
||||
|
||||
if (cmp(reg, key))
|
||||
{
|
||||
key = reg;
|
||||
copyValsShfl(val, delta, width);
|
||||
}
|
||||
}
|
||||
template <typename K, typename V, class Cmp>
|
||||
__device__ __forceinline__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, unsigned int tid, unsigned int delta)
|
||||
{
|
||||
K reg = skeys[tid + delta];
|
||||
|
||||
if (cmp(reg, key))
|
||||
{
|
||||
skeys[tid] = key = reg;
|
||||
copyVals(svals, val, tid, delta);
|
||||
}
|
||||
}
|
||||
template <typename K,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||
class Cmp>
|
||||
__device__ __forceinline__ void mergeShfl(K& key,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||
const Cmp& cmp,
|
||||
unsigned int delta, int width)
|
||||
{
|
||||
K reg = shfl_down(key, delta, width);
|
||||
|
||||
if (cmp(reg, key))
|
||||
{
|
||||
key = reg;
|
||||
copyValsShfl(val, delta, width);
|
||||
}
|
||||
}
|
||||
template <typename K,
|
||||
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||
class Cmp>
|
||||
__device__ __forceinline__ void merge(volatile K* skeys, K& key,
|
||||
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||
const Cmp& cmp, unsigned int tid, unsigned int delta)
|
||||
{
|
||||
K reg = skeys[tid + delta];
|
||||
|
||||
if (cmp(reg, key))
|
||||
{
|
||||
skeys[tid] = key = reg;
|
||||
copyVals(svals, val, tid, delta);
|
||||
}
|
||||
}
|
||||
template <typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
|
||||
__device__ __forceinline__ void mergeShfl(const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
|
||||
unsigned int delta, int width)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9> >::value>::mergeShfl(key, val, cmp, delta, width);
|
||||
}
|
||||
template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
|
||||
typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
|
||||
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
|
||||
__device__ __forceinline__ void merge(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
|
||||
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
|
||||
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
|
||||
unsigned int tid, unsigned int delta)
|
||||
{
|
||||
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::merge(skeys, key, svals, val, cmp, tid, delta);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Generic
|
||||
|
||||
template <unsigned int N> struct Generic
|
||||
{
|
||||
template <class KP, class KR, class VP, class VR, class Cmp>
|
||||
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
|
||||
{
|
||||
loadToSmem(skeys, key, tid);
|
||||
loadValsToSmem(svals, val, tid);
|
||||
if (N >= 32)
|
||||
__syncthreads();
|
||||
|
||||
if (N >= 2048)
|
||||
{
|
||||
if (tid < 1024)
|
||||
merge(skeys, key, svals, val, cmp, tid, 1024);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 1024)
|
||||
{
|
||||
if (tid < 512)
|
||||
merge(skeys, key, svals, val, cmp, tid, 512);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 512)
|
||||
{
|
||||
if (tid < 256)
|
||||
merge(skeys, key, svals, val, cmp, tid, 256);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 256)
|
||||
{
|
||||
if (tid < 128)
|
||||
merge(skeys, key, svals, val, cmp, tid, 128);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 128)
|
||||
{
|
||||
if (tid < 64)
|
||||
merge(skeys, key, svals, val, cmp, tid, 64);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
if (N >= 64)
|
||||
{
|
||||
if (tid < 32)
|
||||
merge(skeys, key, svals, val, cmp, tid, 32);
|
||||
}
|
||||
|
||||
if (tid < 16)
|
||||
{
|
||||
merge(skeys, key, svals, val, cmp, tid, 16);
|
||||
merge(skeys, key, svals, val, cmp, tid, 8);
|
||||
merge(skeys, key, svals, val, cmp, tid, 4);
|
||||
merge(skeys, key, svals, val, cmp, tid, 2);
|
||||
merge(skeys, key, svals, val, cmp, tid, 1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned int I, class KP, class KR, class VP, class VR, class Cmp>
|
||||
struct Unroll
|
||||
{
|
||||
static __device__ void loopShfl(KR key, VR val, Cmp cmp, unsigned int N)
|
||||
{
|
||||
mergeShfl(key, val, cmp, I, N);
|
||||
Unroll<I / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
|
||||
}
|
||||
static __device__ void loop(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
|
||||
{
|
||||
merge(skeys, key, svals, val, cmp, tid, I);
|
||||
Unroll<I / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
|
||||
}
|
||||
};
|
||||
template <class KP, class KR, class VP, class VR, class Cmp>
|
||||
struct Unroll<0, KP, KR, VP, VR, Cmp>
|
||||
{
|
||||
static __device__ void loopShfl(KR, VR, Cmp, unsigned int)
|
||||
{
|
||||
}
|
||||
static __device__ void loop(KP, KR, VP, VR, unsigned int, Cmp)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned int N> struct WarpOptimized
|
||||
{
|
||||
template <class KP, class KR, class VP, class VR, class Cmp>
|
||||
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
|
||||
{
|
||||
#if 0 // __CUDA_ARCH__ >= 300
|
||||
(void) skeys;
|
||||
(void) svals;
|
||||
(void) tid;
|
||||
|
||||
Unroll<N / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
|
||||
#else
|
||||
loadToSmem(skeys, key, tid);
|
||||
loadToSmem(svals, val, tid);
|
||||
|
||||
if (tid < N / 2)
|
||||
Unroll<N / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned int N> struct GenericOptimized32
|
||||
{
|
||||
enum { M = N / 32 };
|
||||
|
||||
template <class KP, class KR, class VP, class VR, class Cmp>
|
||||
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
|
||||
{
|
||||
const unsigned int laneId = Warp::laneId();
|
||||
|
||||
#if 0 // __CUDA_ARCH__ >= 300
|
||||
Unroll<16, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, warpSize);
|
||||
|
||||
if (laneId == 0)
|
||||
{
|
||||
loadToSmem(skeys, key, tid / 32);
|
||||
loadToSmem(svals, val, tid / 32);
|
||||
}
|
||||
#else
|
||||
loadToSmem(skeys, key, tid);
|
||||
loadToSmem(svals, val, tid);
|
||||
|
||||
if (laneId < 16)
|
||||
Unroll<16, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (laneId == 0)
|
||||
{
|
||||
loadToSmem(skeys, key, tid / 32);
|
||||
loadToSmem(svals, val, tid / 32);
|
||||
}
|
||||
#endif
|
||||
|
||||
__syncthreads();
|
||||
|
||||
loadFromSmem(skeys, key, tid);
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
#if 0 // __CUDA_ARCH__ >= 300
|
||||
loadFromSmem(svals, val, tid);
|
||||
|
||||
Unroll<M / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, M);
|
||||
#else
|
||||
Unroll<M / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <bool val, class T1, class T2> struct StaticIf;
|
||||
template <class T1, class T2> struct StaticIf<true, T1, T2>
|
||||
{
|
||||
typedef T1 type;
|
||||
};
|
||||
template <class T1, class T2> struct StaticIf<false, T1, T2>
|
||||
{
|
||||
typedef T2 type;
|
||||
};
|
||||
|
||||
template <unsigned int N> struct IsPowerOf2
|
||||
{
|
||||
enum { value = ((N != 0) && !(N & (N - 1))) };
|
||||
};
|
||||
|
||||
template <unsigned int N> struct Dispatcher
|
||||
{
|
||||
typedef typename StaticIf<
|
||||
(N <= 32) && IsPowerOf2<N>::value,
|
||||
WarpOptimized<N>,
|
||||
typename StaticIf<
|
||||
(N <= 1024) && IsPowerOf2<N>::value,
|
||||
GenericOptimized32<N>,
|
||||
Generic<N>
|
||||
>::type
|
||||
>::type reductor;
|
||||
};
|
||||
}
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__
|
399
3rdparty/include/opencv2/core/cuda/detail/transform_detail.hpp
vendored
Normal file
399
3rdparty/include/opencv2/core/cuda/detail/transform_detail.hpp
vendored
Normal file
@ -0,0 +1,399 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__
|
||||
#define __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../vec_traits.hpp"
|
||||
#include "../functional.hpp"
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace transform_detail
|
||||
{
|
||||
//! Read Write Traits
|
||||
|
||||
template <typename T, typename D, int shift> struct UnaryReadWriteTraits
|
||||
{
|
||||
typedef typename TypeVec<T, shift>::vec_type read_type;
|
||||
typedef typename TypeVec<D, shift>::vec_type write_type;
|
||||
};
|
||||
|
||||
template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
|
||||
{
|
||||
typedef typename TypeVec<T1, shift>::vec_type read_type1;
|
||||
typedef typename TypeVec<T2, shift>::vec_type read_type2;
|
||||
typedef typename TypeVec<D, shift>::vec_type write_type;
|
||||
};
|
||||
|
||||
//! Transform kernels
|
||||
|
||||
template <int shift> struct OpUnroller;
|
||||
template <> struct OpUnroller<1>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<2>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<3>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src.z);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src1.z, src2.z);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<4>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src.z);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.w = op(src.w);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src1.z, src2.z);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.w = op(src1.w, src2.w);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<8>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.a0 = op(src.a0);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.a1 = op(src.a1);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.a2 = op(src.a2);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.a3 = op(src.a3);
|
||||
if (mask(y, x_shifted + 4))
|
||||
dst.a4 = op(src.a4);
|
||||
if (mask(y, x_shifted + 5))
|
||||
dst.a5 = op(src.a5);
|
||||
if (mask(y, x_shifted + 6))
|
||||
dst.a6 = op(src.a6);
|
||||
if (mask(y, x_shifted + 7))
|
||||
dst.a7 = op(src.a7);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.a0 = op(src1.a0, src2.a0);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.a1 = op(src1.a1, src2.a1);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.a2 = op(src1.a2, src2.a2);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.a3 = op(src1.a3, src2.a3);
|
||||
if (mask(y, x_shifted + 4))
|
||||
dst.a4 = op(src1.a4, src2.a4);
|
||||
if (mask(y, x_shifted + 5))
|
||||
dst.a5 = op(src1.a5, src2.a5);
|
||||
if (mask(y, x_shifted + 6))
|
||||
dst.a6 = op(src1.a6, src2.a6);
|
||||
if (mask(y, x_shifted + 7))
|
||||
dst.a7 = op(src1.a7, src2.a7);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
|
||||
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;
|
||||
|
||||
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int x_shifted = x * ft::smart_shift;
|
||||
|
||||
if (y < src_.rows)
|
||||
{
|
||||
const T* src = src_.ptr(y);
|
||||
D* dst = dst_.ptr(y);
|
||||
|
||||
if (x_shifted + ft::smart_shift - 1 < src_.cols)
|
||||
{
|
||||
const read_type src_n_el = ((const read_type*)src)[x];
|
||||
write_type dst_n_el = ((const write_type*)dst)[x];
|
||||
|
||||
OpUnroller<ft::smart_shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
|
||||
|
||||
((write_type*)dst)[x] = dst_n_el;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
|
||||
{
|
||||
if (mask(y, real_x))
|
||||
dst[real_x] = op(src[real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
__global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < src.cols && y < src.rows && mask(y, x))
|
||||
{
|
||||
dst.ptr(y)[x] = op(src.ptr(y)[x]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
|
||||
const Mask mask, const BinOp op)
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
|
||||
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
|
||||
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;
|
||||
|
||||
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int x_shifted = x * ft::smart_shift;
|
||||
|
||||
if (y < src1_.rows)
|
||||
{
|
||||
const T1* src1 = src1_.ptr(y);
|
||||
const T2* src2 = src2_.ptr(y);
|
||||
D* dst = dst_.ptr(y);
|
||||
|
||||
if (x_shifted + ft::smart_shift - 1 < src1_.cols)
|
||||
{
|
||||
const read_type1 src1_n_el = ((const read_type1*)src1)[x];
|
||||
const read_type2 src2_n_el = ((const read_type2*)src2)[x];
|
||||
write_type dst_n_el = ((const write_type*)dst)[x];
|
||||
|
||||
OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
|
||||
|
||||
((write_type*)dst)[x] = dst_n_el;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
|
||||
{
|
||||
if (mask(y, real_x))
|
||||
dst[real_x] = op(src1[real_x], src2[real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
|
||||
const Mask mask, const BinOp op)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < src1.cols && y < src1.rows && mask(y, x))
|
||||
{
|
||||
const T1 src1_data = src1.ptr(y)[x];
|
||||
const T2 src2_data = src2.ptr(y)[x];
|
||||
dst.ptr(y)[x] = op(src1_data, src2_data);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool UseSmart> struct TransformDispatcher;
|
||||
template<> struct TransformDispatcher<false>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
|
||||
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
|
||||
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
|
||||
|
||||
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
|
||||
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
|
||||
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
|
||||
|
||||
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<> struct TransformDispatcher<true>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
|
||||
CV_StaticAssert(ft::smart_shift != 1, "");
|
||||
|
||||
if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
|
||||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
|
||||
{
|
||||
TransformDispatcher<false>::call(src, dst, op, mask, stream);
|
||||
return;
|
||||
}
|
||||
|
||||
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
|
||||
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
|
||||
|
||||
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
|
||||
CV_StaticAssert(ft::smart_shift != 1, "");
|
||||
|
||||
if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
|
||||
!isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
|
||||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
|
||||
{
|
||||
TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
|
||||
return;
|
||||
}
|
||||
|
||||
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
|
||||
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
|
||||
|
||||
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
} // namespace transform_detail
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__
|
191
3rdparty/include/opencv2/core/cuda/detail/type_traits_detail.hpp
vendored
Normal file
191
3rdparty/include/opencv2/core/cuda/detail/type_traits_detail.hpp
vendored
Normal file
@ -0,0 +1,191 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP__
|
||||
#define __OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP__
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../vec_traits.hpp"
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace type_traits_detail
|
||||
{
|
||||
template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
|
||||
template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };
|
||||
|
||||
template <typename T> struct IsSignedIntergral { enum {value = 0}; };
|
||||
template <> struct IsSignedIntergral<schar> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<char1> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<short> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<short1> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<int> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<int1> { enum {value = 1}; };
|
||||
|
||||
template <typename T> struct IsUnsignedIntegral { enum {value = 0}; };
|
||||
template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<uchar1> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<ushort1> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<uint1> { enum {value = 1}; };
|
||||
|
||||
template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; };
|
||||
template <> struct IsIntegral<char> { enum {value = 1}; };
|
||||
template <> struct IsIntegral<bool> { enum {value = 1}; };
|
||||
|
||||
template <typename T> struct IsFloat { enum {value = 0}; };
|
||||
template <> struct IsFloat<float> { enum {value = 1}; };
|
||||
template <> struct IsFloat<double> { enum {value = 1}; };
|
||||
|
||||
template <typename T> struct IsVec { enum {value = 0}; };
|
||||
template <> struct IsVec<uchar1> { enum {value = 1}; };
|
||||
template <> struct IsVec<uchar2> { enum {value = 1}; };
|
||||
template <> struct IsVec<uchar3> { enum {value = 1}; };
|
||||
template <> struct IsVec<uchar4> { enum {value = 1}; };
|
||||
template <> struct IsVec<uchar8> { enum {value = 1}; };
|
||||
template <> struct IsVec<char1> { enum {value = 1}; };
|
||||
template <> struct IsVec<char2> { enum {value = 1}; };
|
||||
template <> struct IsVec<char3> { enum {value = 1}; };
|
||||
template <> struct IsVec<char4> { enum {value = 1}; };
|
||||
template <> struct IsVec<char8> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort1> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort2> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort3> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort4> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort8> { enum {value = 1}; };
|
||||
template <> struct IsVec<short1> { enum {value = 1}; };
|
||||
template <> struct IsVec<short2> { enum {value = 1}; };
|
||||
template <> struct IsVec<short3> { enum {value = 1}; };
|
||||
template <> struct IsVec<short4> { enum {value = 1}; };
|
||||
template <> struct IsVec<short8> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint1> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint2> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint3> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint4> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint8> { enum {value = 1}; };
|
||||
template <> struct IsVec<int1> { enum {value = 1}; };
|
||||
template <> struct IsVec<int2> { enum {value = 1}; };
|
||||
template <> struct IsVec<int3> { enum {value = 1}; };
|
||||
template <> struct IsVec<int4> { enum {value = 1}; };
|
||||
template <> struct IsVec<int8> { enum {value = 1}; };
|
||||
template <> struct IsVec<float1> { enum {value = 1}; };
|
||||
template <> struct IsVec<float2> { enum {value = 1}; };
|
||||
template <> struct IsVec<float3> { enum {value = 1}; };
|
||||
template <> struct IsVec<float4> { enum {value = 1}; };
|
||||
template <> struct IsVec<float8> { enum {value = 1}; };
|
||||
template <> struct IsVec<double1> { enum {value = 1}; };
|
||||
template <> struct IsVec<double2> { enum {value = 1}; };
|
||||
template <> struct IsVec<double3> { enum {value = 1}; };
|
||||
template <> struct IsVec<double4> { enum {value = 1}; };
|
||||
template <> struct IsVec<double8> { enum {value = 1}; };
|
||||
|
||||
template <class U> struct AddParameterType { typedef const U& type; };
|
||||
template <class U> struct AddParameterType<U&> { typedef U& type; };
|
||||
template <> struct AddParameterType<void> { typedef void type; };
|
||||
|
||||
template <class U> struct ReferenceTraits
|
||||
{
|
||||
enum { value = false };
|
||||
typedef U type;
|
||||
};
|
||||
template <class U> struct ReferenceTraits<U&>
|
||||
{
|
||||
enum { value = true };
|
||||
typedef U type;
|
||||
};
|
||||
|
||||
template <class U> struct PointerTraits
|
||||
{
|
||||
enum { value = false };
|
||||
typedef void type;
|
||||
};
|
||||
template <class U> struct PointerTraits<U*>
|
||||
{
|
||||
enum { value = true };
|
||||
typedef U type;
|
||||
};
|
||||
template <class U> struct PointerTraits<U*&>
|
||||
{
|
||||
enum { value = true };
|
||||
typedef U type;
|
||||
};
|
||||
|
||||
template <class U> struct UnConst
|
||||
{
|
||||
typedef U type;
|
||||
enum { value = 0 };
|
||||
};
|
||||
template <class U> struct UnConst<const U>
|
||||
{
|
||||
typedef U type;
|
||||
enum { value = 1 };
|
||||
};
|
||||
template <class U> struct UnConst<const U&>
|
||||
{
|
||||
typedef U& type;
|
||||
enum { value = 1 };
|
||||
};
|
||||
|
||||
template <class U> struct UnVolatile
|
||||
{
|
||||
typedef U type;
|
||||
enum { value = 0 };
|
||||
};
|
||||
template <class U> struct UnVolatile<volatile U>
|
||||
{
|
||||
typedef U type;
|
||||
enum { value = 1 };
|
||||
};
|
||||
template <class U> struct UnVolatile<volatile U&>
|
||||
{
|
||||
typedef U& type;
|
||||
enum { value = 1 };
|
||||
};
|
||||
} // namespace type_traits_detail
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP__
|
121
3rdparty/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
vendored
Normal file
121
3rdparty/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
vendored
Normal file
@ -0,0 +1,121 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP__
|
||||
#define __OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP__
|
||||
|
||||
#include "../datamov_utils.hpp"
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace vec_distance_detail
|
||||
{
|
||||
template <int THREAD_DIM, int N> struct UnrollVecDiffCached
|
||||
{
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind)
|
||||
{
|
||||
if (ind < len)
|
||||
{
|
||||
T1 val1 = *vecCached++;
|
||||
|
||||
T2 val2;
|
||||
ForceGlob<T2>::Load(vecGlob, ind, val2);
|
||||
|
||||
dist.reduceIter(val1, val2);
|
||||
|
||||
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist)
|
||||
{
|
||||
T1 val1 = *vecCached++;
|
||||
|
||||
T2 val2;
|
||||
ForceGlob<T2>::Load(vecGlob, 0, val2);
|
||||
vecGlob += THREAD_DIM;
|
||||
|
||||
dist.reduceIter(val1, val2);
|
||||
|
||||
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcWithoutCheck(vecCached, vecGlob, dist);
|
||||
}
|
||||
};
|
||||
template <int THREAD_DIM> struct UnrollVecDiffCached<THREAD_DIM, 0>
|
||||
{
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN> struct VecDiffCachedCalculator;
|
||||
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, false>
|
||||
{
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
|
||||
{
|
||||
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcCheck(vecCached, vecGlob, len, dist, tid);
|
||||
}
|
||||
};
|
||||
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, true>
|
||||
{
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
|
||||
{
|
||||
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
|
||||
}
|
||||
};
|
||||
} // namespace vec_distance_detail
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP__
|
88
3rdparty/include/opencv2/core/cuda/dynamic_smem.hpp
vendored
Normal file
88
3rdparty/include/opencv2/core/cuda/dynamic_smem.hpp
vendored
Normal file
@ -0,0 +1,88 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_DYNAMIC_SMEM_HPP__
|
||||
#define __OPENCV_CUDA_DYNAMIC_SMEM_HPP__
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template<class T> struct DynamicSharedMem
|
||||
{
|
||||
__device__ __forceinline__ operator T*()
|
||||
{
|
||||
extern __shared__ int __smem[];
|
||||
return (T*)__smem;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator const T*() const
|
||||
{
|
||||
extern __shared__ int __smem[];
|
||||
return (T*)__smem;
|
||||
}
|
||||
};
|
||||
|
||||
// specialize for double to avoid unaligned memory access compile errors
|
||||
template<> struct DynamicSharedMem<double>
|
||||
{
|
||||
__device__ __forceinline__ operator double*()
|
||||
{
|
||||
extern __shared__ double __smem_d[];
|
||||
return (double*)__smem_d;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator const double*() const
|
||||
{
|
||||
extern __shared__ double __smem_d[];
|
||||
return (double*)__smem_d;
|
||||
}
|
||||
};
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_DYNAMIC_SMEM_HPP__
|
269
3rdparty/include/opencv2/core/cuda/emulation.hpp
vendored
Normal file
269
3rdparty/include/opencv2/core/cuda/emulation.hpp
vendored
Normal file
@ -0,0 +1,269 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_EMULATION_HPP_
|
||||
#define OPENCV_CUDA_EMULATION_HPP_
|
||||
|
||||
#include "common.hpp"
|
||||
#include "warp_reduce.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
struct Emulation
|
||||
{
|
||||
|
||||
static __device__ __forceinline__ int syncthreadsOr(int pred)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
|
||||
// just campilation stab
|
||||
return 0;
|
||||
#else
|
||||
return __syncthreads_or(pred);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int CTA_SIZE>
|
||||
static __forceinline__ __device__ int Ballot(int predicate)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
|
||||
return __ballot(predicate);
|
||||
#else
|
||||
__shared__ volatile int cta_buffer[CTA_SIZE];
|
||||
|
||||
int tid = threadIdx.x;
|
||||
cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
|
||||
return warp_reduce(cta_buffer);
|
||||
#endif
|
||||
}
|
||||
|
||||
struct smem
|
||||
{
|
||||
enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };
|
||||
|
||||
template<typename T>
|
||||
static __device__ __forceinline__ T atomicInc(T* address, T val)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
|
||||
T count;
|
||||
unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
|
||||
do
|
||||
{
|
||||
count = *address & TAG_MASK;
|
||||
count = tag | (count + 1);
|
||||
*address = count;
|
||||
} while (*address != count);
|
||||
|
||||
return (count & TAG_MASK) - 1;
|
||||
#else
|
||||
return ::atomicInc(address, val);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static __device__ __forceinline__ T atomicAdd(T* address, T val)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
|
||||
T count;
|
||||
unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
|
||||
do
|
||||
{
|
||||
count = *address & TAG_MASK;
|
||||
count = tag | (count + val);
|
||||
*address = count;
|
||||
} while (*address != count);
|
||||
|
||||
return (count & TAG_MASK) - val;
|
||||
#else
|
||||
return ::atomicAdd(address, val);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static __device__ __forceinline__ T atomicMin(T* address, T val)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
|
||||
T count = ::min(*address, val);
|
||||
do
|
||||
{
|
||||
*address = count;
|
||||
} while (*address > count);
|
||||
|
||||
return count;
|
||||
#else
|
||||
return ::atomicMin(address, val);
|
||||
#endif
|
||||
}
|
||||
}; // struct cmem
|
||||
|
||||
struct glob
|
||||
{
|
||||
static __device__ __forceinline__ int atomicAdd(int* address, int val)
|
||||
{
|
||||
return ::atomicAdd(address, val);
|
||||
}
|
||||
static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val)
|
||||
{
|
||||
return ::atomicAdd(address, val);
|
||||
}
|
||||
static __device__ __forceinline__ float atomicAdd(float* address, float val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 200
|
||||
return ::atomicAdd(address, val);
|
||||
#else
|
||||
int* address_as_i = (int*) address;
|
||||
int old = *address_as_i, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_i, assumed,
|
||||
__float_as_int(val + __int_as_float(assumed)));
|
||||
} while (assumed != old);
|
||||
return __int_as_float(old);
|
||||
#endif
|
||||
}
|
||||
static __device__ __forceinline__ double atomicAdd(double* address, double val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 130
|
||||
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
||||
unsigned long long int old = *address_as_ull, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_ull, assumed,
|
||||
__double_as_longlong(val + __longlong_as_double(assumed)));
|
||||
} while (assumed != old);
|
||||
return __longlong_as_double(old);
|
||||
#else
|
||||
(void) address;
|
||||
(void) val;
|
||||
return 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ int atomicMin(int* address, int val)
|
||||
{
|
||||
return ::atomicMin(address, val);
|
||||
}
|
||||
static __device__ __forceinline__ float atomicMin(float* address, float val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 120
|
||||
int* address_as_i = (int*) address;
|
||||
int old = *address_as_i, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_i, assumed,
|
||||
__float_as_int(::fminf(val, __int_as_float(assumed))));
|
||||
} while (assumed != old);
|
||||
return __int_as_float(old);
|
||||
#else
|
||||
(void) address;
|
||||
(void) val;
|
||||
return 0.0f;
|
||||
#endif
|
||||
}
|
||||
static __device__ __forceinline__ double atomicMin(double* address, double val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 130
|
||||
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
||||
unsigned long long int old = *address_as_ull, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_ull, assumed,
|
||||
__double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
|
||||
} while (assumed != old);
|
||||
return __longlong_as_double(old);
|
||||
#else
|
||||
(void) address;
|
||||
(void) val;
|
||||
return 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ int atomicMax(int* address, int val)
|
||||
{
|
||||
return ::atomicMax(address, val);
|
||||
}
|
||||
static __device__ __forceinline__ float atomicMax(float* address, float val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 120
|
||||
int* address_as_i = (int*) address;
|
||||
int old = *address_as_i, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_i, assumed,
|
||||
__float_as_int(::fmaxf(val, __int_as_float(assumed))));
|
||||
} while (assumed != old);
|
||||
return __int_as_float(old);
|
||||
#else
|
||||
(void) address;
|
||||
(void) val;
|
||||
return 0.0f;
|
||||
#endif
|
||||
}
|
||||
static __device__ __forceinline__ double atomicMax(double* address, double val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 130
|
||||
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
||||
unsigned long long int old = *address_as_ull, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_ull, assumed,
|
||||
__double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
|
||||
} while (assumed != old);
|
||||
return __longlong_as_double(old);
|
||||
#else
|
||||
(void) address;
|
||||
(void) val;
|
||||
return 0.0;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
}; //struct Emulation
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif /* OPENCV_CUDA_EMULATION_HPP_ */
|
286
3rdparty/include/opencv2/core/cuda/filters.hpp
vendored
Normal file
286
3rdparty/include/opencv2/core/cuda/filters.hpp
vendored
Normal file
@ -0,0 +1,286 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_FILTERS_HPP__
|
||||
#define __OPENCV_CUDA_FILTERS_HPP__
|
||||
|
||||
#include "saturate_cast.hpp"
|
||||
#include "vec_traits.hpp"
|
||||
#include "vec_math.hpp"
|
||||
#include "type_traits.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <typename Ptr2D> struct PointFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
|
||||
: src(src_)
|
||||
{
|
||||
(void)fx;
|
||||
(void)fy;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
return src(__float2int_rz(y), __float2int_rz(x));
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
};
|
||||
|
||||
template <typename Ptr2D> struct LinearFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
|
||||
: src(src_)
|
||||
{
|
||||
(void)fx;
|
||||
(void)fy;
|
||||
}
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
|
||||
work_type out = VecTraits<work_type>::all(0);
|
||||
|
||||
const int x1 = __float2int_rd(x);
|
||||
const int y1 = __float2int_rd(y);
|
||||
const int x2 = x1 + 1;
|
||||
const int y2 = y1 + 1;
|
||||
|
||||
elem_type src_reg = src(y1, x1);
|
||||
out = out + src_reg * ((x2 - x) * (y2 - y));
|
||||
|
||||
src_reg = src(y1, x2);
|
||||
out = out + src_reg * ((x - x1) * (y2 - y));
|
||||
|
||||
src_reg = src(y2, x1);
|
||||
out = out + src_reg * ((x2 - x) * (y - y1));
|
||||
|
||||
src_reg = src(y2, x2);
|
||||
out = out + src_reg * ((x - x1) * (y - y1));
|
||||
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
};
|
||||
|
||||
template <typename Ptr2D> struct CubicFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
|
||||
: src(src_)
|
||||
{
|
||||
(void)fx;
|
||||
(void)fy;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ float bicubicCoeff(float x_)
|
||||
{
|
||||
float x = fabsf(x_);
|
||||
if (x <= 1.0f)
|
||||
{
|
||||
return x * x * (1.5f * x - 2.5f) + 1.0f;
|
||||
}
|
||||
else if (x < 2.0f)
|
||||
{
|
||||
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
const float xmin = ::ceilf(x - 2.0f);
|
||||
const float xmax = ::floorf(x + 2.0f);
|
||||
|
||||
const float ymin = ::ceilf(y - 2.0f);
|
||||
const float ymax = ::floorf(y + 2.0f);
|
||||
|
||||
work_type sum = VecTraits<work_type>::all(0);
|
||||
float wsum = 0.0f;
|
||||
|
||||
for (float cy = ymin; cy <= ymax; cy += 1.0f)
|
||||
{
|
||||
for (float cx = xmin; cx <= xmax; cx += 1.0f)
|
||||
{
|
||||
const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy);
|
||||
sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx));
|
||||
wsum += w;
|
||||
}
|
||||
}
|
||||
|
||||
work_type res = (!wsum)? VecTraits<work_type>::all(0) : sum / wsum;
|
||||
|
||||
return saturate_cast<elem_type>(res);
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
};
|
||||
// for integer scaling
|
||||
template <typename Ptr2D> struct IntegerAreaFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
|
||||
: src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
float fsx1 = x * scale_x;
|
||||
float fsx2 = fsx1 + scale_x;
|
||||
|
||||
int sx1 = __float2int_ru(fsx1);
|
||||
int sx2 = __float2int_rd(fsx2);
|
||||
|
||||
float fsy1 = y * scale_y;
|
||||
float fsy2 = fsy1 + scale_y;
|
||||
|
||||
int sy1 = __float2int_ru(fsy1);
|
||||
int sy2 = __float2int_rd(fsy2);
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
work_type out = VecTraits<work_type>::all(0.f);
|
||||
|
||||
for(int dy = sy1; dy < sy2; ++dy)
|
||||
for(int dx = sx1; dx < sx2; ++dx)
|
||||
{
|
||||
out = out + src(dy, dx) * scale;
|
||||
}
|
||||
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
float scale_x, scale_y ,scale;
|
||||
};
|
||||
|
||||
template <typename Ptr2D> struct AreaFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
|
||||
: src(src_), scale_x(scale_x_), scale_y(scale_y_){}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
float fsx1 = x * scale_x;
|
||||
float fsx2 = fsx1 + scale_x;
|
||||
|
||||
int sx1 = __float2int_ru(fsx1);
|
||||
int sx2 = __float2int_rd(fsx2);
|
||||
|
||||
float fsy1 = y * scale_y;
|
||||
float fsy2 = fsy1 + scale_y;
|
||||
|
||||
int sy1 = __float2int_ru(fsy1);
|
||||
int sy2 = __float2int_rd(fsy2);
|
||||
|
||||
float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
work_type out = VecTraits<work_type>::all(0.f);
|
||||
|
||||
for (int dy = sy1; dy < sy2; ++dy)
|
||||
{
|
||||
for (int dx = sx1; dx < sx2; ++dx)
|
||||
out = out + src(dy, dx) * scale;
|
||||
|
||||
if (sx1 > fsx1)
|
||||
out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
|
||||
|
||||
if (sx2 < fsx2)
|
||||
out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
|
||||
}
|
||||
|
||||
if (sy1 > fsy1)
|
||||
for (int dx = sx1; dx < sx2; ++dx)
|
||||
out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
|
||||
|
||||
if (sy2 < fsy2)
|
||||
for (int dx = sx1; dx < sx2; ++dx)
|
||||
out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
|
||||
|
||||
if ((sy1 > fsy1) && (sx1 > fsx1))
|
||||
out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
|
||||
|
||||
if ((sy1 > fsy1) && (sx2 < fsx2))
|
||||
out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
|
||||
|
||||
if ((sy2 < fsy2) && (sx2 < fsx2))
|
||||
out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
|
||||
|
||||
if ((sy2 < fsy2) && (sx1 > fsx1))
|
||||
out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
|
||||
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
float scale_x, scale_y;
|
||||
int width, haight;
|
||||
};
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_FILTERS_HPP__
|
79
3rdparty/include/opencv2/core/cuda/funcattrib.hpp
vendored
Normal file
79
3rdparty/include/opencv2/core/cuda/funcattrib.hpp
vendored
Normal file
@ -0,0 +1,79 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP_
|
||||
#define __OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP_
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template<class Func>
|
||||
void printFuncAttrib(Func& func)
|
||||
{
|
||||
|
||||
cudaFuncAttributes attrs;
|
||||
cudaFuncGetAttributes(&attrs, func);
|
||||
|
||||
printf("=== Function stats ===\n");
|
||||
printf("Name: \n");
|
||||
printf("sharedSizeBytes = %d\n", attrs.sharedSizeBytes);
|
||||
printf("constSizeBytes = %d\n", attrs.constSizeBytes);
|
||||
printf("localSizeBytes = %d\n", attrs.localSizeBytes);
|
||||
printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock);
|
||||
printf("numRegs = %d\n", attrs.numRegs);
|
||||
printf("ptxVersion = %d\n", attrs.ptxVersion);
|
||||
printf("binaryVersion = %d\n", attrs.binaryVersion);
|
||||
printf("\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif /* __OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP_ */
|
797
3rdparty/include/opencv2/core/cuda/functional.hpp
vendored
Normal file
797
3rdparty/include/opencv2/core/cuda/functional.hpp
vendored
Normal file
@ -0,0 +1,797 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_FUNCTIONAL_HPP__
|
||||
#define __OPENCV_CUDA_FUNCTIONAL_HPP__
|
||||
|
||||
#include <functional>
|
||||
#include "saturate_cast.hpp"
|
||||
#include "vec_traits.hpp"
|
||||
#include "type_traits.hpp"
|
||||
#include "device_functions.h"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
// Function Objects
|
||||
template<typename Argument, typename Result> struct unary_function : public std::unary_function<Argument, Result> {};
|
||||
template<typename Argument1, typename Argument2, typename Result> struct binary_function : public std::binary_function<Argument1, Argument2, Result> {};
|
||||
|
||||
// Arithmetic Operations
|
||||
template <typename T> struct plus : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a + b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ plus() {}
|
||||
__host__ __device__ __forceinline__ plus(const plus&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct minus : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a - b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ minus() {}
|
||||
__host__ __device__ __forceinline__ minus(const minus&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct multiplies : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a * b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ multiplies() {}
|
||||
__host__ __device__ __forceinline__ multiplies(const multiplies&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct divides : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a / b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ divides() {}
|
||||
__host__ __device__ __forceinline__ divides(const divides&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct modulus : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a % b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ modulus() {}
|
||||
__host__ __device__ __forceinline__ modulus(const modulus&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct negate : unary_function<T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a) const
|
||||
{
|
||||
return -a;
|
||||
}
|
||||
__host__ __device__ __forceinline__ negate() {}
|
||||
__host__ __device__ __forceinline__ negate(const negate&) {}
|
||||
};
|
||||
|
||||
// Comparison Operations
|
||||
template <typename T> struct equal_to : binary_function<T, T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a == b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ equal_to() {}
|
||||
__host__ __device__ __forceinline__ equal_to(const equal_to&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct not_equal_to : binary_function<T, T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a != b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ not_equal_to() {}
|
||||
__host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct greater : binary_function<T, T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a > b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ greater() {}
|
||||
__host__ __device__ __forceinline__ greater(const greater&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct less : binary_function<T, T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a < b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ less() {}
|
||||
__host__ __device__ __forceinline__ less(const less&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct greater_equal : binary_function<T, T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a >= b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ greater_equal() {}
|
||||
__host__ __device__ __forceinline__ greater_equal(const greater_equal&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct less_equal : binary_function<T, T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a <= b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ less_equal() {}
|
||||
__host__ __device__ __forceinline__ less_equal(const less_equal&) {}
|
||||
};
|
||||
|
||||
// Logical Operations
|
||||
template <typename T> struct logical_and : binary_function<T, T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a && b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ logical_and() {}
|
||||
__host__ __device__ __forceinline__ logical_and(const logical_and&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct logical_or : binary_function<T, T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a || b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ logical_or() {}
|
||||
__host__ __device__ __forceinline__ logical_or(const logical_or&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct logical_not : unary_function<T, bool>
|
||||
{
|
||||
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a) const
|
||||
{
|
||||
return !a;
|
||||
}
|
||||
__host__ __device__ __forceinline__ logical_not() {}
|
||||
__host__ __device__ __forceinline__ logical_not(const logical_not&) {}
|
||||
};
|
||||
|
||||
// Bitwise Operations
|
||||
template <typename T> struct bit_and : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a & b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ bit_and() {}
|
||||
__host__ __device__ __forceinline__ bit_and(const bit_and&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct bit_or : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a | b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ bit_or() {}
|
||||
__host__ __device__ __forceinline__ bit_or(const bit_or&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct bit_xor : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||
typename TypeTraits<T>::ParameterType b) const
|
||||
{
|
||||
return a ^ b;
|
||||
}
|
||||
__host__ __device__ __forceinline__ bit_xor() {}
|
||||
__host__ __device__ __forceinline__ bit_xor(const bit_xor&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct bit_not : unary_function<T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType v) const
|
||||
{
|
||||
return ~v;
|
||||
}
|
||||
__host__ __device__ __forceinline__ bit_not() {}
|
||||
__host__ __device__ __forceinline__ bit_not(const bit_not&) {}
|
||||
};
|
||||
|
||||
// Generalized Identity Operations
|
||||
template <typename T> struct identity : unary_function<T, T>
|
||||
{
|
||||
__device__ __forceinline__ typename TypeTraits<T>::ParameterType operator()(typename TypeTraits<T>::ParameterType x) const
|
||||
{
|
||||
return x;
|
||||
}
|
||||
__host__ __device__ __forceinline__ identity() {}
|
||||
__host__ __device__ __forceinline__ identity(const identity&) {}
|
||||
};
|
||||
|
||||
template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
|
||||
{
|
||||
__device__ __forceinline__ typename TypeTraits<T1>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
|
||||
{
|
||||
return lhs;
|
||||
}
|
||||
__host__ __device__ __forceinline__ project1st() {}
|
||||
__host__ __device__ __forceinline__ project1st(const project1st&) {}
|
||||
};
|
||||
|
||||
template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
|
||||
{
|
||||
__device__ __forceinline__ typename TypeTraits<T2>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
|
||||
{
|
||||
return rhs;
|
||||
}
|
||||
__host__ __device__ __forceinline__ project2nd() {}
|
||||
__host__ __device__ __forceinline__ project2nd(const project2nd&) {}
|
||||
};
|
||||
|
||||
// Min/Max Operations
|
||||
|
||||
#define OPENCV_CUDA_IMPLEMENT_MINMAX(name, type, op) \
|
||||
template <> struct name<type> : binary_function<type, type, type> \
|
||||
{ \
|
||||
__device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
|
||||
__host__ __device__ __forceinline__ name() {}\
|
||||
__host__ __device__ __forceinline__ name(const name&) {}\
|
||||
};
|
||||
|
||||
template <typename T> struct maximum : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
|
||||
{
|
||||
return max(lhs, rhs);
|
||||
}
|
||||
__host__ __device__ __forceinline__ maximum() {}
|
||||
__host__ __device__ __forceinline__ maximum(const maximum&) {}
|
||||
};
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uchar, ::max)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, schar, ::max)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, char, ::max)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, ushort, ::max)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, short, ::max)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, int, ::max)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uint, ::max)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, float, ::fmax)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, double, ::fmax)
|
||||
|
||||
template <typename T> struct minimum : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
|
||||
{
|
||||
return min(lhs, rhs);
|
||||
}
|
||||
__host__ __device__ __forceinline__ minimum() {}
|
||||
__host__ __device__ __forceinline__ minimum(const minimum&) {}
|
||||
};
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uchar, ::min)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, schar, ::min)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, char, ::min)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, ushort, ::min)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, short, ::min)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, int, ::min)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uint, ::min)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, float, ::fmin)
|
||||
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, double, ::fmin)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_MINMAX
|
||||
|
||||
// Math functions
|
||||
|
||||
template <typename T> struct abs_func : unary_function<T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType x) const
|
||||
{
|
||||
return abs(x);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<unsigned char> : unary_function<unsigned char, unsigned char>
|
||||
{
|
||||
__device__ __forceinline__ unsigned char operator ()(unsigned char x) const
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<signed char> : unary_function<signed char, signed char>
|
||||
{
|
||||
__device__ __forceinline__ signed char operator ()(signed char x) const
|
||||
{
|
||||
return ::abs((int)x);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<char> : unary_function<char, char>
|
||||
{
|
||||
__device__ __forceinline__ char operator ()(char x) const
|
||||
{
|
||||
return ::abs((int)x);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<unsigned short> : unary_function<unsigned short, unsigned short>
|
||||
{
|
||||
__device__ __forceinline__ unsigned short operator ()(unsigned short x) const
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<short> : unary_function<short, short>
|
||||
{
|
||||
__device__ __forceinline__ short operator ()(short x) const
|
||||
{
|
||||
return ::abs((int)x);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<unsigned int> : unary_function<unsigned int, unsigned int>
|
||||
{
|
||||
__device__ __forceinline__ unsigned int operator ()(unsigned int x) const
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<int> : unary_function<int, int>
|
||||
{
|
||||
__device__ __forceinline__ int operator ()(int x) const
|
||||
{
|
||||
return ::abs(x);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<float> : unary_function<float, float>
|
||||
{
|
||||
__device__ __forceinline__ float operator ()(float x) const
|
||||
{
|
||||
return ::fabsf(x);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<double> : unary_function<double, double>
|
||||
{
|
||||
__device__ __forceinline__ double operator ()(double x) const
|
||||
{
|
||||
return ::fabs(x);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
|
||||
#define OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(name, func) \
|
||||
template <typename T> struct name ## _func : unary_function<T, float> \
|
||||
{ \
|
||||
__device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v) const \
|
||||
{ \
|
||||
return func ## f(v); \
|
||||
} \
|
||||
__host__ __device__ __forceinline__ name ## _func() {} \
|
||||
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||
}; \
|
||||
template <> struct name ## _func<double> : unary_function<double, double> \
|
||||
{ \
|
||||
__device__ __forceinline__ double operator ()(double v) const \
|
||||
{ \
|
||||
return func(v); \
|
||||
} \
|
||||
__host__ __device__ __forceinline__ name ## _func() {} \
|
||||
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||
};
|
||||
|
||||
#define OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(name, func) \
|
||||
template <typename T> struct name ## _func : binary_function<T, T, float> \
|
||||
{ \
|
||||
__device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v1, typename TypeTraits<T>::ParameterType v2) const \
|
||||
{ \
|
||||
return func ## f(v1, v2); \
|
||||
} \
|
||||
__host__ __device__ __forceinline__ name ## _func() {} \
|
||||
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||
}; \
|
||||
template <> struct name ## _func<double> : binary_function<double, double, double> \
|
||||
{ \
|
||||
__device__ __forceinline__ double operator ()(double v1, double v2) const \
|
||||
{ \
|
||||
return func(v1, v2); \
|
||||
} \
|
||||
__host__ __device__ __forceinline__ name ## _func() {} \
|
||||
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||
};
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp, ::exp)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp2, ::exp2)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp10, ::exp10)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log, ::log)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log2, ::log2)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log10, ::log10)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sin, ::sin)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cos, ::cos)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tan, ::tan)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asin, ::asin)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acos, ::acos)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atan, ::atan)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sinh, ::sinh)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cosh, ::cosh)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tanh, ::tanh)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asinh, ::asinh)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acosh, ::acosh)
|
||||
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atanh, ::atanh)
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(hypot, ::hypot)
|
||||
OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(atan2, ::atan2)
|
||||
OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(pow, ::pow)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR
|
||||
#undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR_NO_DOUBLE
|
||||
#undef OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR
|
||||
|
||||
template<typename T> struct hypot_sqr_func : binary_function<T, T, float>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType src1, typename TypeTraits<T>::ParameterType src2) const
|
||||
{
|
||||
return src1 * src1 + src2 * src2;
|
||||
}
|
||||
__host__ __device__ __forceinline__ hypot_sqr_func() {}
|
||||
__host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {}
|
||||
};
|
||||
|
||||
// Saturate Cast Functor
|
||||
template <typename T, typename D> struct saturate_cast_func : unary_function<T, D>
|
||||
{
|
||||
__device__ __forceinline__ D operator ()(typename TypeTraits<T>::ParameterType v) const
|
||||
{
|
||||
return saturate_cast<D>(v);
|
||||
}
|
||||
__host__ __device__ __forceinline__ saturate_cast_func() {}
|
||||
__host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {}
|
||||
};
|
||||
|
||||
// Threshold Functors
|
||||
template <typename T> struct thresh_binary_func : unary_function<T, T>
|
||||
{
|
||||
__host__ __device__ __forceinline__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||
{
|
||||
return (src > thresh) * maxVal;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ thresh_binary_func() {}
|
||||
__host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
|
||||
: thresh(other.thresh), maxVal(other.maxVal) {}
|
||||
|
||||
T thresh;
|
||||
T maxVal;
|
||||
};
|
||||
|
||||
template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
|
||||
{
|
||||
__host__ __device__ __forceinline__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||
{
|
||||
return (src <= thresh) * maxVal;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ thresh_binary_inv_func() {}
|
||||
__host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
|
||||
: thresh(other.thresh), maxVal(other.maxVal) {}
|
||||
|
||||
T thresh;
|
||||
T maxVal;
|
||||
};
|
||||
|
||||
template <typename T> struct thresh_trunc_func : unary_function<T, T>
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
|
||||
|
||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||
{
|
||||
return minimum<T>()(src, thresh);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ thresh_trunc_func() {}
|
||||
__host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
|
||||
: thresh(other.thresh) {}
|
||||
|
||||
T thresh;
|
||||
};
|
||||
|
||||
template <typename T> struct thresh_to_zero_func : unary_function<T, T>
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
|
||||
|
||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||
{
|
||||
return (src > thresh) * src;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ thresh_to_zero_func() {}
|
||||
__host__ __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
|
||||
: thresh(other.thresh) {}
|
||||
|
||||
T thresh;
|
||||
};
|
||||
|
||||
template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
|
||||
|
||||
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||
{
|
||||
return (src <= thresh) * src;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ thresh_to_zero_inv_func() {}
|
||||
__host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
|
||||
: thresh(other.thresh) {}
|
||||
|
||||
T thresh;
|
||||
};
|
||||
|
||||
// Function Object Adaptors
|
||||
template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool>
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ unary_negate(const Predicate& p) : pred(p) {}
|
||||
|
||||
__device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::argument_type>::ParameterType x) const
|
||||
{
|
||||
return !pred(x);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ unary_negate() {}
|
||||
__host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {}
|
||||
|
||||
Predicate pred;
|
||||
};
|
||||
|
||||
template <typename Predicate> __host__ __device__ __forceinline__ unary_negate<Predicate> not1(const Predicate& pred)
|
||||
{
|
||||
return unary_negate<Predicate>(pred);
|
||||
}
|
||||
|
||||
template <typename Predicate> struct binary_negate : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ binary_negate(const Predicate& p) : pred(p) {}
|
||||
|
||||
__device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::first_argument_type>::ParameterType x,
|
||||
typename TypeTraits<typename Predicate::second_argument_type>::ParameterType y) const
|
||||
{
|
||||
return !pred(x,y);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ binary_negate() {}
|
||||
__host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {}
|
||||
|
||||
Predicate pred;
|
||||
};
|
||||
|
||||
template <typename BinaryPredicate> __host__ __device__ __forceinline__ binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred)
|
||||
{
|
||||
return binary_negate<BinaryPredicate>(pred);
|
||||
}
|
||||
|
||||
template <typename Op> struct binder1st : unary_function<typename Op::second_argument_type, typename Op::result_type>
|
||||
{
|
||||
__host__ __device__ __forceinline__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {}
|
||||
|
||||
__device__ __forceinline__ typename Op::result_type operator ()(typename TypeTraits<typename Op::second_argument_type>::ParameterType a) const
|
||||
{
|
||||
return op(arg1, a);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ binder1st() {}
|
||||
__host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {}
|
||||
|
||||
Op op;
|
||||
typename Op::first_argument_type arg1;
|
||||
};
|
||||
|
||||
template <typename Op, typename T> __host__ __device__ __forceinline__ binder1st<Op> bind1st(const Op& op, const T& x)
|
||||
{
|
||||
return binder1st<Op>(op, typename Op::first_argument_type(x));
|
||||
}
|
||||
|
||||
template <typename Op> struct binder2nd : unary_function<typename Op::first_argument_type, typename Op::result_type>
|
||||
{
|
||||
__host__ __device__ __forceinline__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {}
|
||||
|
||||
__forceinline__ __device__ typename Op::result_type operator ()(typename TypeTraits<typename Op::first_argument_type>::ParameterType a) const
|
||||
{
|
||||
return op(a, arg2);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ binder2nd() {}
|
||||
__host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {}
|
||||
|
||||
Op op;
|
||||
typename Op::second_argument_type arg2;
|
||||
};
|
||||
|
||||
template <typename Op, typename T> __host__ __device__ __forceinline__ binder2nd<Op> bind2nd(const Op& op, const T& x)
|
||||
{
|
||||
return binder2nd<Op>(op, typename Op::second_argument_type(x));
|
||||
}
|
||||
|
||||
// Functor Traits
|
||||
template <typename F> struct IsUnaryFunction
|
||||
{
|
||||
typedef char Yes;
|
||||
struct No {Yes a[2];};
|
||||
|
||||
template <typename T, typename D> static Yes check(unary_function<T, D>);
|
||||
static No check(...);
|
||||
|
||||
static F makeF();
|
||||
|
||||
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
|
||||
};
|
||||
|
||||
template <typename F> struct IsBinaryFunction
|
||||
{
|
||||
typedef char Yes;
|
||||
struct No {Yes a[2];};
|
||||
|
||||
template <typename T1, typename T2, typename D> static Yes check(binary_function<T1, T2, D>);
|
||||
static No check(...);
|
||||
|
||||
static F makeF();
|
||||
|
||||
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
|
||||
};
|
||||
|
||||
namespace functional_detail
|
||||
{
|
||||
template <size_t src_elem_size, size_t dst_elem_size> struct UnOpShift { enum { shift = 1 }; };
|
||||
template <size_t src_elem_size> struct UnOpShift<src_elem_size, 1> { enum { shift = 4 }; };
|
||||
template <size_t src_elem_size> struct UnOpShift<src_elem_size, 2> { enum { shift = 2 }; };
|
||||
|
||||
template <typename T, typename D> struct DefaultUnaryShift
|
||||
{
|
||||
enum { shift = UnOpShift<sizeof(T), sizeof(D)>::shift };
|
||||
};
|
||||
|
||||
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size> struct BinOpShift { enum { shift = 1 }; };
|
||||
template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 1> { enum { shift = 4 }; };
|
||||
template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 2> { enum { shift = 2 }; };
|
||||
|
||||
template <typename T1, typename T2, typename D> struct DefaultBinaryShift
|
||||
{
|
||||
enum { shift = BinOpShift<sizeof(T1), sizeof(T2), sizeof(D)>::shift };
|
||||
};
|
||||
|
||||
template <typename Func, bool unary = IsUnaryFunction<Func>::value> struct ShiftDispatcher;
|
||||
template <typename Func> struct ShiftDispatcher<Func, true>
|
||||
{
|
||||
enum { shift = DefaultUnaryShift<typename Func::argument_type, typename Func::result_type>::shift };
|
||||
};
|
||||
template <typename Func> struct ShiftDispatcher<Func, false>
|
||||
{
|
||||
enum { shift = DefaultBinaryShift<typename Func::first_argument_type, typename Func::second_argument_type, typename Func::result_type>::shift };
|
||||
};
|
||||
}
|
||||
|
||||
template <typename Func> struct DefaultTransformShift
|
||||
{
|
||||
enum { shift = functional_detail::ShiftDispatcher<Func>::shift };
|
||||
};
|
||||
|
||||
template <typename Func> struct DefaultTransformFunctorTraits
|
||||
{
|
||||
enum { simple_block_dim_x = 16 };
|
||||
enum { simple_block_dim_y = 16 };
|
||||
|
||||
enum { smart_block_dim_x = 16 };
|
||||
enum { smart_block_dim_y = 16 };
|
||||
enum { smart_shift = DefaultTransformShift<Func>::shift };
|
||||
};
|
||||
|
||||
template <typename Func> struct TransformFunctorTraits : DefaultTransformFunctorTraits<Func> {};
|
||||
|
||||
#define OPENCV_CUDA_TRANSFORM_FUNCTOR_TRAITS(type) \
|
||||
template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type >
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_FUNCTIONAL_HPP__
|
128
3rdparty/include/opencv2/core/cuda/limits.hpp
vendored
Normal file
128
3rdparty/include/opencv2/core/cuda/limits.hpp
vendored
Normal file
@ -0,0 +1,128 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_LIMITS_HPP__
|
||||
#define __OPENCV_CUDA_LIMITS_HPP__
|
||||
|
||||
#include <limits.h>
|
||||
#include <float.h>
|
||||
#include "common.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <class T> struct numeric_limits;
|
||||
|
||||
template <> struct numeric_limits<bool>
|
||||
{
|
||||
__device__ __forceinline__ static bool min() { return false; }
|
||||
__device__ __forceinline__ static bool max() { return true; }
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template <> struct numeric_limits<signed char>
|
||||
{
|
||||
__device__ __forceinline__ static signed char min() { return SCHAR_MIN; }
|
||||
__device__ __forceinline__ static signed char max() { return SCHAR_MAX; }
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template <> struct numeric_limits<unsigned char>
|
||||
{
|
||||
__device__ __forceinline__ static unsigned char min() { return 0; }
|
||||
__device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; }
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template <> struct numeric_limits<short>
|
||||
{
|
||||
__device__ __forceinline__ static short min() { return SHRT_MIN; }
|
||||
__device__ __forceinline__ static short max() { return SHRT_MAX; }
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template <> struct numeric_limits<unsigned short>
|
||||
{
|
||||
__device__ __forceinline__ static unsigned short min() { return 0; }
|
||||
__device__ __forceinline__ static unsigned short max() { return USHRT_MAX; }
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template <> struct numeric_limits<int>
|
||||
{
|
||||
__device__ __forceinline__ static int min() { return INT_MIN; }
|
||||
__device__ __forceinline__ static int max() { return INT_MAX; }
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template <> struct numeric_limits<unsigned int>
|
||||
{
|
||||
__device__ __forceinline__ static unsigned int min() { return 0; }
|
||||
__device__ __forceinline__ static unsigned int max() { return UINT_MAX; }
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template <> struct numeric_limits<float>
|
||||
{
|
||||
__device__ __forceinline__ static float min() { return FLT_MIN; }
|
||||
__device__ __forceinline__ static float max() { return FLT_MAX; }
|
||||
__device__ __forceinline__ static float epsilon() { return FLT_EPSILON; }
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template <> struct numeric_limits<double>
|
||||
{
|
||||
__device__ __forceinline__ static double min() { return DBL_MIN; }
|
||||
__device__ __forceinline__ static double max() { return DBL_MAX; }
|
||||
__device__ __forceinline__ static double epsilon() { return DBL_EPSILON; }
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
}}} // namespace cv { namespace cuda { namespace cudev {
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_LIMITS_HPP__
|
205
3rdparty/include/opencv2/core/cuda/reduce.hpp
vendored
Normal file
205
3rdparty/include/opencv2/core/cuda/reduce.hpp
vendored
Normal file
@ -0,0 +1,205 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_REDUCE_HPP__
|
||||
#define __OPENCV_CUDA_REDUCE_HPP__
|
||||
|
||||
#include <thrust/tuple.h>
|
||||
#include "detail/reduce.hpp"
|
||||
#include "detail/reduce_key_val.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <int N, typename T, class Op>
|
||||
__device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op)
|
||||
{
|
||||
reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op);
|
||||
}
|
||||
template <int N,
|
||||
typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
|
||||
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
|
||||
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
|
||||
__device__ __forceinline__ void reduce(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
|
||||
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
|
||||
unsigned int tid,
|
||||
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
|
||||
{
|
||||
reduce_detail::Dispatcher<N>::reductor::template reduce<
|
||||
const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>&,
|
||||
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>&,
|
||||
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
|
||||
}
|
||||
|
||||
template <unsigned int N, typename K, typename V, class Cmp>
|
||||
__device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, unsigned int tid, const Cmp& cmp)
|
||||
{
|
||||
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&, volatile V*, V&, const Cmp&>(skeys, key, svals, val, tid, cmp);
|
||||
}
|
||||
template <unsigned int N,
|
||||
typename K,
|
||||
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||
class Cmp>
|
||||
__device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key,
|
||||
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||
unsigned int tid, const Cmp& cmp)
|
||||
{
|
||||
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&,
|
||||
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
|
||||
const Cmp&>(skeys, key, svals, val, tid, cmp);
|
||||
}
|
||||
template <unsigned int N,
|
||||
typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
|
||||
typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
|
||||
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
|
||||
__device__ __forceinline__ void reduceKeyVal(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
|
||||
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
|
||||
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||
unsigned int tid,
|
||||
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp)
|
||||
{
|
||||
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<
|
||||
const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>&,
|
||||
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>&,
|
||||
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
|
||||
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
|
||||
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>&
|
||||
>(skeys, key, svals, val, tid, cmp);
|
||||
}
|
||||
|
||||
// smem_tuple
|
||||
|
||||
template <typename T0>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*>
|
||||
smem_tuple(T0* t0)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*>
|
||||
smem_tuple(T0* t0, T1* t1)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*, volatile T2*>
|
||||
smem_tuple(T0* t0, T1* t1, T2* t2)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2, typename T3>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*>
|
||||
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2, typename T3, typename T4>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*>
|
||||
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*>
|
||||
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*>
|
||||
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*>
|
||||
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*>
|
||||
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8);
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9>
|
||||
__device__ __forceinline__
|
||||
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*, volatile T9*>
|
||||
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8, T9* t9)
|
||||
{
|
||||
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9);
|
||||
}
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_UTILITY_HPP__
|
292
3rdparty/include/opencv2/core/cuda/saturate_cast.hpp
vendored
Normal file
292
3rdparty/include/opencv2/core/cuda/saturate_cast.hpp
vendored
Normal file
@ -0,0 +1,292 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_SATURATE_CAST_HPP__
|
||||
#define __OPENCV_CUDA_SATURATE_CAST_HPP__
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
|
||||
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
|
||||
{
|
||||
uint res = 0;
|
||||
int vi = v;
|
||||
asm("cvt.sat.u8.s8 %0, %1;" : "=r"(res) : "r"(vi));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.u8.s16 %0, %1;" : "=r"(res) : "h"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.u8.u16 %0, %1;" : "=r"(res) : "h"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.u8.s32 %0, %1;" : "=r"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.u8.u32 %0, %1;" : "=r"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.rni.sat.u8.f32 %0, %1;" : "=r"(res) : "f"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 130
|
||||
uint res = 0;
|
||||
asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v));
|
||||
return res;
|
||||
#else
|
||||
return saturate_cast<uchar>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
|
||||
{
|
||||
uint res = 0;
|
||||
uint vi = v;
|
||||
asm("cvt.sat.s8.u8 %0, %1;" : "=r"(res) : "r"(vi));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.s8.s16 %0, %1;" : "=r"(res) : "h"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.s8.u16 %0, %1;" : "=r"(res) : "h"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.s8.s32 %0, %1;" : "=r"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.s8.u32 %0, %1;" : "=r"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.rni.sat.s8.f32 %0, %1;" : "=r"(res) : "f"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 130
|
||||
uint res = 0;
|
||||
asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v));
|
||||
return res;
|
||||
#else
|
||||
return saturate_cast<schar>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
|
||||
{
|
||||
ushort res = 0;
|
||||
int vi = v;
|
||||
asm("cvt.sat.u16.s8 %0, %1;" : "=h"(res) : "r"(vi));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
|
||||
{
|
||||
ushort res = 0;
|
||||
asm("cvt.sat.u16.s16 %0, %1;" : "=h"(res) : "h"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
|
||||
{
|
||||
ushort res = 0;
|
||||
asm("cvt.sat.u16.s32 %0, %1;" : "=h"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
|
||||
{
|
||||
ushort res = 0;
|
||||
asm("cvt.sat.u16.u32 %0, %1;" : "=h"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
|
||||
{
|
||||
ushort res = 0;
|
||||
asm("cvt.rni.sat.u16.f32 %0, %1;" : "=h"(res) : "f"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 130
|
||||
ushort res = 0;
|
||||
asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v));
|
||||
return res;
|
||||
#else
|
||||
return saturate_cast<ushort>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
|
||||
{
|
||||
short res = 0;
|
||||
asm("cvt.sat.s16.u16 %0, %1;" : "=h"(res) : "h"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(int v)
|
||||
{
|
||||
short res = 0;
|
||||
asm("cvt.sat.s16.s32 %0, %1;" : "=h"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
|
||||
{
|
||||
short res = 0;
|
||||
asm("cvt.sat.s16.u32 %0, %1;" : "=h"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(float v)
|
||||
{
|
||||
short res = 0;
|
||||
asm("cvt.rni.sat.s16.f32 %0, %1;" : "=h"(res) : "f"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(double v)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 130
|
||||
short res = 0;
|
||||
asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v));
|
||||
return res;
|
||||
#else
|
||||
return saturate_cast<short>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ int saturate_cast<int>(uint v)
|
||||
{
|
||||
int res = 0;
|
||||
asm("cvt.sat.s32.u32 %0, %1;" : "=r"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ int saturate_cast<int>(float v)
|
||||
{
|
||||
return __float2int_rn(v);
|
||||
}
|
||||
template<> __device__ __forceinline__ int saturate_cast<int>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
return __double2int_rn(v);
|
||||
#else
|
||||
return saturate_cast<int>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(schar v)
|
||||
{
|
||||
uint res = 0;
|
||||
int vi = v;
|
||||
asm("cvt.sat.u32.s8 %0, %1;" : "=r"(res) : "r"(vi));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(short v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.u32.s16 %0, %1;" : "=r"(res) : "h"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(int v)
|
||||
{
|
||||
uint res = 0;
|
||||
asm("cvt.sat.u32.s32 %0, %1;" : "=r"(res) : "r"(v));
|
||||
return res;
|
||||
}
|
||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
|
||||
{
|
||||
return __float2uint_rn(v);
|
||||
}
|
||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
return __double2uint_rn(v);
|
||||
#else
|
||||
return saturate_cast<uint>((float)v);
|
||||
#endif
|
||||
}
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif /* __OPENCV_CUDA_SATURATE_CAST_HPP__ */
|
258
3rdparty/include/opencv2/core/cuda/scan.hpp
vendored
Normal file
258
3rdparty/include/opencv2/core/cuda/scan.hpp
vendored
Normal file
@ -0,0 +1,258 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_SCAN_HPP__
|
||||
#define __OPENCV_CUDA_SCAN_HPP__
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/utility.hpp"
|
||||
#include "opencv2/core/cuda/warp.hpp"
|
||||
#include "opencv2/core/cuda/warp_shuffle.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
enum ScanKind { EXCLUSIVE = 0, INCLUSIVE = 1 };
|
||||
|
||||
template <ScanKind Kind, typename T, typename F> struct WarpScan
|
||||
{
|
||||
__device__ __forceinline__ WarpScan() {}
|
||||
__device__ __forceinline__ WarpScan(const WarpScan& other) { (void)other; }
|
||||
|
||||
__device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
|
||||
{
|
||||
const unsigned int lane = idx & 31;
|
||||
F op;
|
||||
|
||||
if ( lane >= 1) ptr [idx ] = op(ptr [idx - 1], ptr [idx]);
|
||||
if ( lane >= 2) ptr [idx ] = op(ptr [idx - 2], ptr [idx]);
|
||||
if ( lane >= 4) ptr [idx ] = op(ptr [idx - 4], ptr [idx]);
|
||||
if ( lane >= 8) ptr [idx ] = op(ptr [idx - 8], ptr [idx]);
|
||||
if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
|
||||
|
||||
if( Kind == INCLUSIVE )
|
||||
return ptr [idx];
|
||||
else
|
||||
return (lane > 0) ? ptr [idx - 1] : 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ unsigned int index(const unsigned int tid)
|
||||
{
|
||||
return tid;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void init(volatile T *ptr){}
|
||||
|
||||
static const int warp_offset = 0;
|
||||
|
||||
typedef WarpScan<INCLUSIVE, T, F> merge;
|
||||
};
|
||||
|
||||
template <ScanKind Kind , typename T, typename F> struct WarpScanNoComp
|
||||
{
|
||||
__device__ __forceinline__ WarpScanNoComp() {}
|
||||
__device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { (void)other; }
|
||||
|
||||
__device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
|
||||
{
|
||||
const unsigned int lane = threadIdx.x & 31;
|
||||
F op;
|
||||
|
||||
ptr [idx ] = op(ptr [idx - 1], ptr [idx]);
|
||||
ptr [idx ] = op(ptr [idx - 2], ptr [idx]);
|
||||
ptr [idx ] = op(ptr [idx - 4], ptr [idx]);
|
||||
ptr [idx ] = op(ptr [idx - 8], ptr [idx]);
|
||||
ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
|
||||
|
||||
if( Kind == INCLUSIVE )
|
||||
return ptr [idx];
|
||||
else
|
||||
return (lane > 0) ? ptr [idx - 1] : 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ unsigned int index(const unsigned int tid)
|
||||
{
|
||||
return (tid >> warp_log) * warp_smem_stride + 16 + (tid & warp_mask);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void init(volatile T *ptr)
|
||||
{
|
||||
ptr[threadIdx.x] = 0;
|
||||
}
|
||||
|
||||
static const int warp_smem_stride = 32 + 16 + 1;
|
||||
static const int warp_offset = 16;
|
||||
static const int warp_log = 5;
|
||||
static const int warp_mask = 31;
|
||||
|
||||
typedef WarpScanNoComp<INCLUSIVE, T, F> merge;
|
||||
};
|
||||
|
||||
template <ScanKind Kind , typename T, typename Sc, typename F> struct BlockScan
|
||||
{
|
||||
__device__ __forceinline__ BlockScan() {}
|
||||
__device__ __forceinline__ BlockScan(const BlockScan& other) { (void)other; }
|
||||
|
||||
__device__ __forceinline__ T operator()(volatile T *ptr)
|
||||
{
|
||||
const unsigned int tid = threadIdx.x;
|
||||
const unsigned int lane = tid & warp_mask;
|
||||
const unsigned int warp = tid >> warp_log;
|
||||
|
||||
Sc scan;
|
||||
typename Sc::merge merge_scan;
|
||||
const unsigned int idx = scan.index(tid);
|
||||
|
||||
T val = scan(ptr, idx);
|
||||
__syncthreads ();
|
||||
|
||||
if( warp == 0)
|
||||
scan.init(ptr);
|
||||
__syncthreads ();
|
||||
|
||||
if( lane == 31 )
|
||||
ptr [scan.warp_offset + warp ] = (Kind == INCLUSIVE) ? val : ptr [idx];
|
||||
__syncthreads ();
|
||||
|
||||
if( warp == 0 )
|
||||
merge_scan(ptr, idx);
|
||||
__syncthreads();
|
||||
|
||||
if ( warp > 0)
|
||||
val = ptr [scan.warp_offset + warp - 1] + val;
|
||||
__syncthreads ();
|
||||
|
||||
ptr[idx] = val;
|
||||
__syncthreads ();
|
||||
|
||||
return val ;
|
||||
}
|
||||
|
||||
static const int warp_log = 5;
|
||||
static const int warp_mask = 31;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
const unsigned int laneId = cv::cuda::device::Warp::laneId();
|
||||
|
||||
// scan on shuffl functions
|
||||
#pragma unroll
|
||||
for (int i = 1; i <= (OPENCV_CUDA_WARP_SIZE / 2); i *= 2)
|
||||
{
|
||||
const T n = cv::cuda::device::shfl_up(idata, i);
|
||||
if (laneId >= i)
|
||||
idata += n;
|
||||
}
|
||||
|
||||
return idata;
|
||||
#else
|
||||
unsigned int pos = 2 * tid - (tid & (OPENCV_CUDA_WARP_SIZE - 1));
|
||||
s_Data[pos] = 0;
|
||||
pos += OPENCV_CUDA_WARP_SIZE;
|
||||
s_Data[pos] = idata;
|
||||
|
||||
s_Data[pos] += s_Data[pos - 1];
|
||||
s_Data[pos] += s_Data[pos - 2];
|
||||
s_Data[pos] += s_Data[pos - 4];
|
||||
s_Data[pos] += s_Data[pos - 8];
|
||||
s_Data[pos] += s_Data[pos - 16];
|
||||
|
||||
return s_Data[pos];
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T warpScanExclusive(T idata, volatile T* s_Data, unsigned int tid)
|
||||
{
|
||||
return warpScanInclusive(idata, s_Data, tid) - idata;
|
||||
}
|
||||
|
||||
template <int tiNumScanThreads, typename T>
|
||||
__device__ T blockScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
|
||||
{
|
||||
if (tiNumScanThreads > OPENCV_CUDA_WARP_SIZE)
|
||||
{
|
||||
//Bottom-level inclusive warp scan
|
||||
T warpResult = warpScanInclusive(idata, s_Data, tid);
|
||||
|
||||
//Save top elements of each warp for exclusive warp scan
|
||||
//sync to wait for warp scans to complete (because s_Data is being overwritten)
|
||||
__syncthreads();
|
||||
if ((tid & (OPENCV_CUDA_WARP_SIZE - 1)) == (OPENCV_CUDA_WARP_SIZE - 1))
|
||||
{
|
||||
s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE] = warpResult;
|
||||
}
|
||||
|
||||
//wait for warp scans to complete
|
||||
__syncthreads();
|
||||
|
||||
if (tid < (tiNumScanThreads / OPENCV_CUDA_WARP_SIZE) )
|
||||
{
|
||||
//grab top warp elements
|
||||
T val = s_Data[tid];
|
||||
//calculate exclusive scan and write back to shared memory
|
||||
s_Data[tid] = warpScanExclusive(val, s_Data, tid);
|
||||
}
|
||||
|
||||
//return updated warp scans with exclusive scan results
|
||||
__syncthreads();
|
||||
|
||||
return warpResult + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE];
|
||||
}
|
||||
else
|
||||
{
|
||||
return warpScanInclusive(idata, s_Data, tid);
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_SCAN_HPP__
|
869
3rdparty/include/opencv2/core/cuda/simd_functions.hpp
vendored
Normal file
869
3rdparty/include/opencv2/core/cuda/simd_functions.hpp
vendored
Normal file
@ -0,0 +1,869 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* Neither the name of NVIDIA Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_SIMD_FUNCTIONS_HPP__
|
||||
#define __OPENCV_CUDA_SIMD_FUNCTIONS_HPP__
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
// 2
|
||||
|
||||
static __device__ __forceinline__ unsigned int vadd2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s;
|
||||
s = a ^ b; // sum bits
|
||||
r = a + b; // actual sum
|
||||
s = s ^ r; // determine carry-ins for each bit position
|
||||
s = s & 0x00010000; // carry-in to high word (= carry-out from low word)
|
||||
r = r - s; // subtract out carry-out from low word
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsub2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s;
|
||||
s = a ^ b; // sum bits
|
||||
r = a - b; // actual sum
|
||||
s = s ^ r; // determine carry-ins for each bit position
|
||||
s = s & 0x00010000; // borrow to high word
|
||||
r = r + s; // compensate for borrow from low word
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vabsdiff2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s, t, u, v;
|
||||
s = a & 0x0000ffff; // extract low halfword
|
||||
r = b & 0x0000ffff; // extract low halfword
|
||||
u = ::max(r, s); // maximum of low halfwords
|
||||
v = ::min(r, s); // minimum of low halfwords
|
||||
s = a & 0xffff0000; // extract high halfword
|
||||
r = b & 0xffff0000; // extract high halfword
|
||||
t = ::max(r, s); // maximum of high halfwords
|
||||
s = ::min(r, s); // minimum of high halfwords
|
||||
r = u | t; // maximum of both halfwords
|
||||
s = v | s; // minimum of both halfwords
|
||||
r = r - s; // |a - b| = max(a,b) - min(a,b);
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vavg2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, s;
|
||||
|
||||
// HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
|
||||
// (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
|
||||
s = a ^ b;
|
||||
r = a & b;
|
||||
s = s & 0xfffefffe; // ensure shift doesn't cross halfword boundaries
|
||||
s = s >> 1;
|
||||
s = r + s;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vavrg2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vavrg2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
// HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
|
||||
// (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
|
||||
unsigned int s;
|
||||
s = a ^ b;
|
||||
r = a | b;
|
||||
s = s & 0xfffefffe; // ensure shift doesn't cross half-word boundaries
|
||||
s = s >> 1;
|
||||
r = r - s;
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vseteq2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset2.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
unsigned int c;
|
||||
r = a ^ b; // 0x0000 if a == b
|
||||
c = r | 0x80008000; // set msbs, to catch carry out
|
||||
r = r ^ c; // extract msbs, msb = 1 if r < 0x8000
|
||||
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
|
||||
c = r & ~c; // msb = 1, if r was 0x0000
|
||||
r = c >> 15; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpeq2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vseteq2(a, b);
|
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else
|
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
r = a ^ b; // 0x0000 if a == b
|
||||
c = r | 0x80008000; // set msbs, to catch carry out
|
||||
r = r ^ c; // extract msbs, msb = 1 if r < 0x8000
|
||||
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
|
||||
c = r & ~c; // msb = 1, if r was 0x0000
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetge2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset2.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int c;
|
||||
asm("not.b32 %0, %0;" : "+r"(b));
|
||||
c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpge2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetge2(a, b);
|
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else
|
||||
asm("not.b32 %0, %0;" : "+r"(b));
|
||||
c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetgt2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset2.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int c;
|
||||
asm("not.b32 %0, %0;" : "+r"(b));
|
||||
c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
|
||||
c = c & 0x80008000; // msbs = carry-outs
|
||||
r = c >> 15; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpgt2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetgt2(a, b);
|
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else
|
||||
asm("not.b32 %0, %0;" : "+r"(b));
|
||||
c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
|
||||
c = c & 0x80008000; // msbs = carry-outs
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetle2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset2.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int c;
|
||||
asm("not.b32 %0, %0;" : "+r"(a));
|
||||
c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmple2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetle2(a, b);
|
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else
|
||||
asm("not.b32 %0, %0;" : "+r"(a));
|
||||
c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetlt2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset2.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int c;
|
||||
asm("not.b32 %0, %0;" : "+r"(a));
|
||||
c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmplt2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetlt2(a, b);
|
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else
|
||||
asm("not.b32 %0, %0;" : "+r"(a));
|
||||
c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetne2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm ("vset2.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
unsigned int c;
|
||||
r = a ^ b; // 0x0000 if a == b
|
||||
c = r | 0x80008000; // set msbs, to catch carry out
|
||||
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
|
||||
c = r | c; // msb = 1, if r was not 0x0000
|
||||
c = c & 0x80008000; // extract msbs
|
||||
r = c >> 15; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpne2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetne2(a, b);
|
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else
|
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
r = a ^ b; // 0x0000 if a == b
|
||||
c = r | 0x80008000; // set msbs, to catch carry out
|
||||
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
|
||||
c = r | c; // msb = 1, if r was not 0x0000
|
||||
c = c & 0x80008000; // extract msbs
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vmax2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s, t, u;
|
||||
r = a & 0x0000ffff; // extract low halfword
|
||||
s = b & 0x0000ffff; // extract low halfword
|
||||
t = ::max(r, s); // maximum of low halfwords
|
||||
r = a & 0xffff0000; // extract high halfword
|
||||
s = b & 0xffff0000; // extract high halfword
|
||||
u = ::max(r, s); // maximum of high halfwords
|
||||
r = t | u; // combine halfword maximums
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vmin2(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s, t, u;
|
||||
r = a & 0x0000ffff; // extract low halfword
|
||||
s = b & 0x0000ffff; // extract low halfword
|
||||
t = ::min(r, s); // minimum of low halfwords
|
||||
r = a & 0xffff0000; // extract high halfword
|
||||
s = b & 0xffff0000; // extract high halfword
|
||||
u = ::min(r, s); // minimum of high halfwords
|
||||
r = t | u; // combine halfword minimums
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
// 4
|
||||
|
||||
static __device__ __forceinline__ unsigned int vadd4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s, t;
|
||||
s = a ^ b; // sum bits
|
||||
r = a & 0x7f7f7f7f; // clear msbs
|
||||
t = b & 0x7f7f7f7f; // clear msbs
|
||||
s = s & 0x80808080; // msb sum bits
|
||||
r = r + t; // add without msbs, record carry-out in msbs
|
||||
r = r ^ s; // sum of msb sum and carry-in bits, w/o carry-out
|
||||
#endif /* __CUDA_ARCH__ >= 300 */
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsub4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s, t;
|
||||
s = a ^ ~b; // inverted sum bits
|
||||
r = a | 0x80808080; // set msbs
|
||||
t = b & 0x7f7f7f7f; // clear msbs
|
||||
s = s & 0x80808080; // inverted msb sum bits
|
||||
r = r - t; // subtract w/o msbs, record inverted borrows in msb
|
||||
r = r ^ s; // combine inverted msb sum bits and borrows
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vavg4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, s;
|
||||
|
||||
// HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
|
||||
// (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
|
||||
s = a ^ b;
|
||||
r = a & b;
|
||||
s = s & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
|
||||
s = s >> 1;
|
||||
s = r + s;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vavrg4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vavrg4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
// HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
|
||||
// (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
|
||||
unsigned int c;
|
||||
c = a ^ b;
|
||||
r = a | b;
|
||||
c = c & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
|
||||
c = c >> 1;
|
||||
r = r - c;
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vseteq4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
unsigned int c;
|
||||
r = a ^ b; // 0x00 if a == b
|
||||
c = r | 0x80808080; // set msbs, to catch carry out
|
||||
r = r ^ c; // extract msbs, msb = 1 if r < 0x80
|
||||
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
|
||||
c = r & ~c; // msb = 1, if r was 0x00
|
||||
r = c >> 7; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpeq4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, t;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vseteq4(a, b);
|
||||
t = r << 8; // convert bool
|
||||
r = t - r; // to mask
|
||||
#else
|
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
t = a ^ b; // 0x00 if a == b
|
||||
r = t | 0x80808080; // set msbs, to catch carry out
|
||||
t = t ^ r; // extract msbs, msb = 1 if t < 0x80
|
||||
r = r - 0x01010101; // msb = 0, if t was 0x00 or 0x80
|
||||
r = t & ~r; // msb = 1, if t was 0x00
|
||||
t = r >> 7; // build mask
|
||||
t = r - t; // from
|
||||
r = t | r; // msbs
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetle4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset4.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int c;
|
||||
asm("not.b32 %0, %0;" : "+r"(a));
|
||||
c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
|
||||
c = c & 0x80808080; // msb = carry-outs
|
||||
r = c >> 7; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmple4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetle4(a, b);
|
||||
c = r << 8; // convert bool
|
||||
r = c - r; // to mask
|
||||
#else
|
||||
asm("not.b32 %0, %0;" : "+r"(a));
|
||||
c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
|
||||
c = c & 0x80808080; // msbs = carry-outs
|
||||
r = c >> 7; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetlt4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset4.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int c;
|
||||
asm("not.b32 %0, %0;" : "+r"(a));
|
||||
c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
|
||||
c = c & 0x80808080; // msb = carry-outs
|
||||
r = c >> 7; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmplt4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetlt4(a, b);
|
||||
c = r << 8; // convert bool
|
||||
r = c - r; // to mask
|
||||
#else
|
||||
asm("not.b32 %0, %0;" : "+r"(a));
|
||||
c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
|
||||
c = c & 0x80808080; // msbs = carry-outs
|
||||
r = c >> 7; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetge4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset4.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int c;
|
||||
asm("not.b32 %0, %0;" : "+r"(b));
|
||||
c = vavrg4(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
|
||||
c = c & 0x80808080; // msb = carry-outs
|
||||
r = c >> 7; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpge4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, s;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetge4(a, b);
|
||||
s = r << 8; // convert bool
|
||||
r = s - r; // to mask
|
||||
#else
|
||||
asm ("not.b32 %0,%0;" : "+r"(b));
|
||||
r = vavrg4 (a, b); // (a + ~b + 1) / 2 = (a - b) / 2
|
||||
r = r & 0x80808080; // msb = carry-outs
|
||||
s = r >> 7; // build mask
|
||||
s = r - s; // from
|
||||
r = s | r; // msbs
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetgt4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset4.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int c;
|
||||
asm("not.b32 %0, %0;" : "+r"(b));
|
||||
c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
|
||||
c = c & 0x80808080; // msb = carry-outs
|
||||
r = c >> 7; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpgt4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetgt4(a, b);
|
||||
c = r << 8; // convert bool
|
||||
r = c - r; // to mask
|
||||
#else
|
||||
asm("not.b32 %0, %0;" : "+r"(b));
|
||||
c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
|
||||
c = c & 0x80808080; // msb = carry-outs
|
||||
r = c >> 7; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vsetne4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vset4.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
unsigned int c;
|
||||
r = a ^ b; // 0x00 if a == b
|
||||
c = r | 0x80808080; // set msbs, to catch carry out
|
||||
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
|
||||
c = r | c; // msb = 1, if r was not 0x00
|
||||
c = c & 0x80808080; // extract msbs
|
||||
r = c >> 7; // convert to bool
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpne4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r, c;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
r = vsetne4(a, b);
|
||||
c = r << 8; // convert bool
|
||||
r = c - r; // to mask
|
||||
#else
|
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
r = a ^ b; // 0x00 if a == b
|
||||
c = r | 0x80808080; // set msbs, to catch carry out
|
||||
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
|
||||
c = r | c; // msb = 1, if r was not 0x00
|
||||
c = c & 0x80808080; // extract msbs
|
||||
r = c >> 7; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vabsdiff4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s;
|
||||
s = vcmpge4(a, b); // mask = 0xff if a >= b
|
||||
r = a ^ b; //
|
||||
s = (r & s) ^ b; // select a when a >= b, else select b => max(a,b)
|
||||
r = s ^ r; // select a when b >= a, else select b => min(a,b)
|
||||
r = s - r; // |a - b| = max(a,b) - min(a,b);
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vmax4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s;
|
||||
s = vcmpge4(a, b); // mask = 0xff if a >= b
|
||||
r = a & s; // select a when b >= a
|
||||
s = b & ~s; // select b when b < a
|
||||
r = r | s; // combine byte selections
|
||||
#endif
|
||||
|
||||
return r; // byte-wise unsigned maximum
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ unsigned int vmin4(unsigned int a, unsigned int b)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#elif __CUDA_ARCH__ >= 200
|
||||
asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||
#else
|
||||
unsigned int s;
|
||||
s = vcmpge4(b, a); // mask = 0xff if a >= b
|
||||
r = a & s; // select a when b >= a
|
||||
s = b & ~s; // select b when b < a
|
||||
r = r | s; // combine byte selections
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_SIMD_FUNCTIONS_HPP__
|
75
3rdparty/include/opencv2/core/cuda/transform.hpp
vendored
Normal file
75
3rdparty/include/opencv2/core/cuda/transform.hpp
vendored
Normal file
@ -0,0 +1,75 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_TRANSFORM_HPP__
|
||||
#define __OPENCV_CUDA_TRANSFORM_HPP__
|
||||
|
||||
#include "common.hpp"
|
||||
#include "utility.hpp"
|
||||
#include "detail/transform_detail.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static inline void transform(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, const Mask& mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_TRANSFORM_HPP__
|
90
3rdparty/include/opencv2/core/cuda/type_traits.hpp
vendored
Normal file
90
3rdparty/include/opencv2/core/cuda/type_traits.hpp
vendored
Normal file
@ -0,0 +1,90 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_TYPE_TRAITS_HPP__
|
||||
#define __OPENCV_CUDA_TYPE_TRAITS_HPP__
|
||||
|
||||
#include "detail/type_traits_detail.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <typename T> struct IsSimpleParameter
|
||||
{
|
||||
enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value ||
|
||||
type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value};
|
||||
};
|
||||
|
||||
template <typename T> struct TypeTraits
|
||||
{
|
||||
typedef typename type_traits_detail::UnConst<T>::type NonConstType;
|
||||
typedef typename type_traits_detail::UnVolatile<T>::type NonVolatileType;
|
||||
typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType;
|
||||
typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type PointeeType;
|
||||
typedef typename type_traits_detail::ReferenceTraits<T>::type ReferredType;
|
||||
|
||||
enum { isConst = type_traits_detail::UnConst<T>::value };
|
||||
enum { isVolatile = type_traits_detail::UnVolatile<T>::value };
|
||||
|
||||
enum { isReference = type_traits_detail::ReferenceTraits<UnqualifiedType>::value };
|
||||
enum { isPointer = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value };
|
||||
|
||||
enum { isUnsignedInt = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value };
|
||||
enum { isSignedInt = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value };
|
||||
enum { isIntegral = type_traits_detail::IsIntegral<UnqualifiedType>::value };
|
||||
enum { isFloat = type_traits_detail::IsFloat<UnqualifiedType>::value };
|
||||
enum { isArith = isIntegral || isFloat };
|
||||
enum { isVec = type_traits_detail::IsVec<UnqualifiedType>::value };
|
||||
|
||||
typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value,
|
||||
T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
|
||||
};
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_TYPE_TRAITS_HPP__
|
221
3rdparty/include/opencv2/core/cuda/utility.hpp
vendored
Normal file
221
3rdparty/include/opencv2/core/cuda/utility.hpp
vendored
Normal file
@ -0,0 +1,221 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_UTILITY_HPP__
|
||||
#define __OPENCV_CUDA_UTILITY_HPP__
|
||||
|
||||
#include "saturate_cast.hpp"
|
||||
#include "datamov_utils.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
#define OPENCV_CUDA_LOG_WARP_SIZE (5)
|
||||
#define OPENCV_CUDA_WARP_SIZE (1 << OPENCV_CUDA_LOG_WARP_SIZE)
|
||||
#define OPENCV_CUDA_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
|
||||
#define OPENCV_CUDA_MEM_BANKS (1 << OPENCV_CUDA_LOG_MEM_BANKS)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// swap
|
||||
|
||||
template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
|
||||
{
|
||||
const T temp = a;
|
||||
a = b;
|
||||
b = temp;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Mask Reader
|
||||
|
||||
struct SingleMask
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {}
|
||||
__host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
return mask.ptr(y)[x] != 0;
|
||||
}
|
||||
|
||||
PtrStepb mask;
|
||||
};
|
||||
|
||||
struct SingleMaskChannels
|
||||
{
|
||||
__host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_)
|
||||
: mask(mask_), channels(channels_) {}
|
||||
__host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_)
|
||||
:mask(mask_.mask), channels(mask_.channels){}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
return mask.ptr(y)[x / channels] != 0;
|
||||
}
|
||||
|
||||
PtrStepb mask;
|
||||
int channels;
|
||||
};
|
||||
|
||||
struct MaskCollection
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_)
|
||||
: maskCollection(maskCollection_) {}
|
||||
|
||||
__device__ __forceinline__ MaskCollection(const MaskCollection& masks_)
|
||||
: maskCollection(masks_.maskCollection), curMask(masks_.curMask){}
|
||||
|
||||
__device__ __forceinline__ void next()
|
||||
{
|
||||
curMask = *maskCollection++;
|
||||
}
|
||||
__device__ __forceinline__ void setMask(int z)
|
||||
{
|
||||
curMask = maskCollection[z];
|
||||
}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
uchar val;
|
||||
return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(y), x, val), (val != 0));
|
||||
}
|
||||
|
||||
const PtrStepb* maskCollection;
|
||||
PtrStepb curMask;
|
||||
};
|
||||
|
||||
struct WithOutMask
|
||||
{
|
||||
__host__ __device__ __forceinline__ WithOutMask(){}
|
||||
__host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){}
|
||||
|
||||
__device__ __forceinline__ void next() const
|
||||
{
|
||||
}
|
||||
__device__ __forceinline__ void setMask(int) const
|
||||
{
|
||||
}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int, int) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int, int, int) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ bool check(int, int)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ bool check(int, int, int)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Solve linear system
|
||||
|
||||
// solve 2x2 linear system Ax=b
|
||||
template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
|
||||
{
|
||||
T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
|
||||
|
||||
if (det != 0)
|
||||
{
|
||||
double invdet = 1.0 / det;
|
||||
|
||||
x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1]));
|
||||
|
||||
x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0]));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// solve 3x3 linear system Ax=b
|
||||
template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
|
||||
{
|
||||
T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
|
||||
- A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
|
||||
+ A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
|
||||
|
||||
if (det != 0)
|
||||
{
|
||||
double invdet = 1.0 / det;
|
||||
|
||||
x[0] = saturate_cast<T>(invdet *
|
||||
(b[0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
|
||||
A[0][1] * (b[1] * A[2][2] - A[1][2] * b[2] ) +
|
||||
A[0][2] * (b[1] * A[2][1] - A[1][1] * b[2] )));
|
||||
|
||||
x[1] = saturate_cast<T>(invdet *
|
||||
(A[0][0] * (b[1] * A[2][2] - A[1][2] * b[2] ) -
|
||||
b[0] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
|
||||
A[0][2] * (A[1][0] * b[2] - b[1] * A[2][0])));
|
||||
|
||||
x[2] = saturate_cast<T>(invdet *
|
||||
(A[0][0] * (A[1][1] * b[2] - b[1] * A[2][1]) -
|
||||
A[0][1] * (A[1][0] * b[2] - b[1] * A[2][0]) +
|
||||
b[0] * (A[1][0] * A[2][1] - A[1][1] * A[2][0])));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_UTILITY_HPP__
|
232
3rdparty/include/opencv2/core/cuda/vec_distance.hpp
vendored
Normal file
232
3rdparty/include/opencv2/core/cuda/vec_distance.hpp
vendored
Normal file
@ -0,0 +1,232 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_VEC_DISTANCE_HPP__
|
||||
#define __OPENCV_CUDA_VEC_DISTANCE_HPP__
|
||||
|
||||
#include "reduce.hpp"
|
||||
#include "functional.hpp"
|
||||
#include "detail/vec_distance_detail.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <typename T> struct L1Dist
|
||||
{
|
||||
typedef int value_type;
|
||||
typedef int result_type;
|
||||
|
||||
__device__ __forceinline__ L1Dist() : mySum(0) {}
|
||||
|
||||
__device__ __forceinline__ void reduceIter(int val1, int val2)
|
||||
{
|
||||
mySum = __sad(val1, val2, mySum);
|
||||
}
|
||||
|
||||
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
|
||||
{
|
||||
reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator int() const
|
||||
{
|
||||
return mySum;
|
||||
}
|
||||
|
||||
int mySum;
|
||||
};
|
||||
template <> struct L1Dist<float>
|
||||
{
|
||||
typedef float value_type;
|
||||
typedef float result_type;
|
||||
|
||||
__device__ __forceinline__ L1Dist() : mySum(0.0f) {}
|
||||
|
||||
__device__ __forceinline__ void reduceIter(float val1, float val2)
|
||||
{
|
||||
mySum += ::fabs(val1 - val2);
|
||||
}
|
||||
|
||||
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
|
||||
{
|
||||
reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator float() const
|
||||
{
|
||||
return mySum;
|
||||
}
|
||||
|
||||
float mySum;
|
||||
};
|
||||
|
||||
struct L2Dist
|
||||
{
|
||||
typedef float value_type;
|
||||
typedef float result_type;
|
||||
|
||||
__device__ __forceinline__ L2Dist() : mySum(0.0f) {}
|
||||
|
||||
__device__ __forceinline__ void reduceIter(float val1, float val2)
|
||||
{
|
||||
float reg = val1 - val2;
|
||||
mySum += reg * reg;
|
||||
}
|
||||
|
||||
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
|
||||
{
|
||||
reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator float() const
|
||||
{
|
||||
return sqrtf(mySum);
|
||||
}
|
||||
|
||||
float mySum;
|
||||
};
|
||||
|
||||
struct HammingDist
|
||||
{
|
||||
typedef int value_type;
|
||||
typedef int result_type;
|
||||
|
||||
__device__ __forceinline__ HammingDist() : mySum(0) {}
|
||||
|
||||
__device__ __forceinline__ void reduceIter(int val1, int val2)
|
||||
{
|
||||
mySum += __popc(val1 ^ val2);
|
||||
}
|
||||
|
||||
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
|
||||
{
|
||||
reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator int() const
|
||||
{
|
||||
return mySum;
|
||||
}
|
||||
|
||||
int mySum;
|
||||
};
|
||||
|
||||
// calc distance between two vectors in global memory
|
||||
template <int THREAD_DIM, typename Dist, typename T1, typename T2>
|
||||
__device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
|
||||
{
|
||||
for (int i = tid; i < len; i += THREAD_DIM)
|
||||
{
|
||||
T1 val1;
|
||||
ForceGlob<T1>::Load(vec1, i, val1);
|
||||
|
||||
T2 val2;
|
||||
ForceGlob<T2>::Load(vec2, i, val2);
|
||||
|
||||
dist.reduceIter(val1, val2);
|
||||
}
|
||||
|
||||
dist.reduceAll<THREAD_DIM>(smem, tid);
|
||||
}
|
||||
|
||||
// calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
|
||||
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
|
||||
__device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
|
||||
{
|
||||
vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
|
||||
|
||||
dist.reduceAll<THREAD_DIM>(smem, tid);
|
||||
}
|
||||
|
||||
// calc distance between two vectors in global memory
|
||||
template <int THREAD_DIM, typename T1> struct VecDiffGlobal
|
||||
{
|
||||
explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
|
||||
{
|
||||
vec1 = vec1_;
|
||||
}
|
||||
|
||||
template <typename T2, typename Dist>
|
||||
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
|
||||
{
|
||||
calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
|
||||
}
|
||||
|
||||
const T1* vec1;
|
||||
};
|
||||
|
||||
// calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
|
||||
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
|
||||
{
|
||||
template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
|
||||
{
|
||||
if (glob_tid < len)
|
||||
smem[glob_tid] = vec1[glob_tid];
|
||||
__syncthreads();
|
||||
|
||||
U* vec1ValsPtr = vec1Vals;
|
||||
|
||||
#pragma unroll
|
||||
for (int i = tid; i < MAX_LEN; i += THREAD_DIM)
|
||||
*vec1ValsPtr++ = smem[i];
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
template <typename T2, typename Dist>
|
||||
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
|
||||
{
|
||||
calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
|
||||
}
|
||||
|
||||
U vec1Vals[MAX_LEN / THREAD_DIM];
|
||||
};
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_VEC_DISTANCE_HPP__
|
930
3rdparty/include/opencv2/core/cuda/vec_math.hpp
vendored
Normal file
930
3rdparty/include/opencv2/core/cuda/vec_math.hpp
vendored
Normal file
@ -0,0 +1,930 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_VECMATH_HPP__
|
||||
#define __OPENCV_CUDA_VECMATH_HPP__
|
||||
|
||||
#include "vec_traits.hpp"
|
||||
#include "saturate_cast.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
|
||||
// saturate_cast
|
||||
|
||||
namespace vec_math_detail
|
||||
{
|
||||
template <int cn, typename VecD> struct SatCastHelper;
|
||||
template <typename VecD> struct SatCastHelper<1, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCastHelper<2, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCastHelper<3, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCastHelper<4, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v)
|
||||
{
|
||||
return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
template<typename T> static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||
|
||||
// unary operators
|
||||
|
||||
#define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \
|
||||
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 1>::make(op (a.x)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 2>::make(op (a.x), op (a.y)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 3>::make(op (a.x), op (a.y), op (a.z)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 4>::make(op (a.x), op (a.y), op (a.z), op (a.w)); \
|
||||
}
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint)
|
||||
|
||||
#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP
|
||||
|
||||
// unary functions
|
||||
|
||||
#define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \
|
||||
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 1>::make(func (a.x)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 2>::make(func (a.x), func (a.y)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 3>::make(func (a.x), func (a.y), func (a.z)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \
|
||||
}
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabs, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double)
|
||||
|
||||
#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC
|
||||
|
||||
// binary operators (vec & vec)
|
||||
|
||||
#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \
|
||||
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 1>::make(a.x op b.x); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 2>::make(a.x op b.x, a.y op b.y); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 3>::make(a.x op b.x, a.y op b.y, a.z op b.z); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 4>::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \
|
||||
}
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint)
|
||||
|
||||
#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP
|
||||
|
||||
// binary operators (vec & scalar)
|
||||
|
||||
#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \
|
||||
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 1>::make(a.x op s); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 1>::make(s op b.x); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 2>::make(a.x op s, a.y op s); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 2>::make(s op b.x, s op b.y); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 3>::make(a.x op s, a.y op s, a.z op s); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 3>::make(s op b.x, s op b.y, s op b.z); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 4>::make(a.x op s, a.y op s, a.z op s, a.w op s); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 4>::make(s op b.x, s op b.y, s op b.z, s op b.w); \
|
||||
}
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint)
|
||||
|
||||
#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP
|
||||
|
||||
// binary function (vec & vec)
|
||||
|
||||
#define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \
|
||||
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 1>::make(func (a.x, b.x)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 2>::make(func (a.x, b.x), func (a.y, b.y)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 3>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 4>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \
|
||||
}
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float)
|
||||
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double)
|
||||
|
||||
#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC
|
||||
|
||||
// binary function (vec & scalar)
|
||||
|
||||
#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \
|
||||
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 1>::make(func ((output_type) a.x, (output_type) s)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 1>::make(func ((output_type) s, (output_type) b.x)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 2>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 2>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 3>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 3>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 4>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \
|
||||
} \
|
||||
__device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \
|
||||
{ \
|
||||
return VecTraits<output_type ## 4>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \
|
||||
}
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double)
|
||||
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double)
|
||||
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)
|
||||
|
||||
#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC
|
||||
|
||||
}}} // namespace cv { namespace cuda { namespace device
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_VECMATH_HPP__
|
288
3rdparty/include/opencv2/core/cuda/vec_traits.hpp
vendored
Normal file
288
3rdparty/include/opencv2/core/cuda/vec_traits.hpp
vendored
Normal file
@ -0,0 +1,288 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_VEC_TRAITS_HPP__
|
||||
#define __OPENCV_CUDA_VEC_TRAITS_HPP__
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template<typename T, int N> struct TypeVec;
|
||||
|
||||
struct __align__(8) uchar8
|
||||
{
|
||||
uchar a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7)
|
||||
{
|
||||
uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(8) char8
|
||||
{
|
||||
schar a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7)
|
||||
{
|
||||
char8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(16) ushort8
|
||||
{
|
||||
ushort a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7)
|
||||
{
|
||||
ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(16) short8
|
||||
{
|
||||
short a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7)
|
||||
{
|
||||
short8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(32) uint8
|
||||
{
|
||||
uint a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7)
|
||||
{
|
||||
uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(32) int8
|
||||
{
|
||||
int a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7)
|
||||
{
|
||||
int8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(32) float8
|
||||
{
|
||||
float a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7)
|
||||
{
|
||||
float8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct double8
|
||||
{
|
||||
double a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7)
|
||||
{
|
||||
double8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
|
||||
#define OPENCV_CUDA_IMPLEMENT_TYPE_VEC(type) \
|
||||
template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
|
||||
template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \
|
||||
template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \
|
||||
template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \
|
||||
template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; }; \
|
||||
template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; };
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uchar)
|
||||
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(char)
|
||||
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(ushort)
|
||||
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(short)
|
||||
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(int)
|
||||
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uint)
|
||||
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(float)
|
||||
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(double)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_TYPE_VEC
|
||||
|
||||
template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
|
||||
template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
|
||||
template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
|
||||
template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
|
||||
template<> struct TypeVec<schar, 8> { typedef char8 vec_type; };
|
||||
|
||||
template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
|
||||
template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
|
||||
template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
|
||||
template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
|
||||
template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };
|
||||
|
||||
template<typename T> struct VecTraits;
|
||||
|
||||
#define OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(type) \
|
||||
template<> struct VecTraits<type> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=1}; \
|
||||
static __device__ __host__ __forceinline__ type all(type v) {return v;} \
|
||||
static __device__ __host__ __forceinline__ type make(type x) {return x;} \
|
||||
static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 1> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=1}; \
|
||||
static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
|
||||
static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 2> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=2}; \
|
||||
static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
|
||||
static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 3> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=3}; \
|
||||
static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
|
||||
static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 4> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=4}; \
|
||||
static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
|
||||
static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 8> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=8}; \
|
||||
static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \
|
||||
static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
|
||||
};
|
||||
|
||||
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uchar)
|
||||
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(ushort)
|
||||
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(short)
|
||||
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(int)
|
||||
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uint)
|
||||
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(float)
|
||||
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(double)
|
||||
|
||||
#undef OPENCV_CUDA_IMPLEMENT_VEC_TRAITS
|
||||
|
||||
template<> struct VecTraits<char>
|
||||
{
|
||||
typedef char elem_type;
|
||||
enum {cn=1};
|
||||
static __device__ __host__ __forceinline__ char all(char v) {return v;}
|
||||
static __device__ __host__ __forceinline__ char make(char x) {return x;}
|
||||
static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
|
||||
};
|
||||
template<> struct VecTraits<schar>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=1};
|
||||
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
|
||||
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
|
||||
static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
|
||||
};
|
||||
template<> struct VecTraits<char1>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=1};
|
||||
static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
|
||||
static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
|
||||
static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
|
||||
};
|
||||
template<> struct VecTraits<char2>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=2};
|
||||
static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
|
||||
static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
|
||||
static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
|
||||
};
|
||||
template<> struct VecTraits<char3>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=3};
|
||||
static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
|
||||
static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
|
||||
static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
|
||||
};
|
||||
template<> struct VecTraits<char4>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=4};
|
||||
static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
|
||||
static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
|
||||
static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
|
||||
};
|
||||
template<> struct VecTraits<char8>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=8};
|
||||
static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
|
||||
static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
|
||||
static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
|
||||
};
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_VEC_TRAITS_HPP__
|
139
3rdparty/include/opencv2/core/cuda/warp.hpp
vendored
Normal file
139
3rdparty/include/opencv2/core/cuda/warp.hpp
vendored
Normal file
@ -0,0 +1,139 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_DEVICE_WARP_HPP__
|
||||
#define __OPENCV_CUDA_DEVICE_WARP_HPP__
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
struct Warp
|
||||
{
|
||||
enum
|
||||
{
|
||||
LOG_WARP_SIZE = 5,
|
||||
WARP_SIZE = 1 << LOG_WARP_SIZE,
|
||||
STRIDE = WARP_SIZE
|
||||
};
|
||||
|
||||
/** \brief Returns the warp lane ID of the calling thread. */
|
||||
static __device__ __forceinline__ unsigned int laneId()
|
||||
{
|
||||
unsigned int ret;
|
||||
asm("mov.u32 %0, %laneid;" : "=r"(ret) );
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename It, typename T>
|
||||
static __device__ __forceinline__ void fill(It beg, It end, const T& value)
|
||||
{
|
||||
for(It t = beg + laneId(); t < end; t += STRIDE)
|
||||
*t = value;
|
||||
}
|
||||
|
||||
template<typename InIt, typename OutIt>
|
||||
static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
|
||||
{
|
||||
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
|
||||
*out = *t;
|
||||
return out;
|
||||
}
|
||||
|
||||
template<typename InIt, typename OutIt, class UnOp>
|
||||
static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
|
||||
{
|
||||
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
|
||||
*out = op(*t);
|
||||
return out;
|
||||
}
|
||||
|
||||
template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
|
||||
static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
|
||||
{
|
||||
unsigned int lane = laneId();
|
||||
|
||||
InIt1 t1 = beg1 + lane;
|
||||
InIt2 t2 = beg2 + lane;
|
||||
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
|
||||
*out = op(*t1, *t2);
|
||||
return out;
|
||||
}
|
||||
|
||||
template <class T, class BinOp>
|
||||
static __device__ __forceinline__ T reduce(volatile T *ptr, BinOp op)
|
||||
{
|
||||
const unsigned int lane = laneId();
|
||||
|
||||
if (lane < 16)
|
||||
{
|
||||
T partial = ptr[lane];
|
||||
|
||||
ptr[lane] = partial = op(partial, ptr[lane + 16]);
|
||||
ptr[lane] = partial = op(partial, ptr[lane + 8]);
|
||||
ptr[lane] = partial = op(partial, ptr[lane + 4]);
|
||||
ptr[lane] = partial = op(partial, ptr[lane + 2]);
|
||||
ptr[lane] = partial = op(partial, ptr[lane + 1]);
|
||||
}
|
||||
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
template<typename OutIt, typename T>
|
||||
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
|
||||
{
|
||||
unsigned int lane = laneId();
|
||||
value += lane;
|
||||
|
||||
for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
|
||||
*t = value;
|
||||
}
|
||||
};
|
||||
}}} // namespace cv { namespace cuda { namespace cudev
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif /* __OPENCV_CUDA_DEVICE_WARP_HPP__ */
|
76
3rdparty/include/opencv2/core/cuda/warp_reduce.hpp
vendored
Normal file
76
3rdparty/include/opencv2/core/cuda/warp_reduce.hpp
vendored
Normal file
@ -0,0 +1,76 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_CUDA_WARP_REDUCE_HPP__
|
||||
#define OPENCV_CUDA_WARP_REDUCE_HPP__
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <class T>
|
||||
__device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
|
||||
{
|
||||
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
|
||||
|
||||
if (lane < 16)
|
||||
{
|
||||
T partial = ptr[tid];
|
||||
|
||||
ptr[tid] = partial = partial + ptr[tid + 16];
|
||||
ptr[tid] = partial = partial + ptr[tid + 8];
|
||||
ptr[tid] = partial = partial + ptr[tid + 4];
|
||||
ptr[tid] = partial = partial + ptr[tid + 2];
|
||||
ptr[tid] = partial = partial + ptr[tid + 1];
|
||||
}
|
||||
|
||||
return ptr[tid - lane];
|
||||
}
|
||||
}}} // namespace cv { namespace cuda { namespace cudev {
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif /* OPENCV_CUDA_WARP_REDUCE_HPP__ */
|
153
3rdparty/include/opencv2/core/cuda/warp_shuffle.hpp
vendored
Normal file
153
3rdparty/include/opencv2/core/cuda/warp_shuffle.hpp
vendored
Normal file
@ -0,0 +1,153 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_WARP_SHUFFLE_HPP__
|
||||
#define __OPENCV_CUDA_WARP_SHUFFLE_HPP__
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
return __shfl(val, srcLane, width);
|
||||
#else
|
||||
return T();
|
||||
#endif
|
||||
}
|
||||
__device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
return (unsigned int) __shfl((int) val, srcLane, width);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
__device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
int lo = __double2loint(val);
|
||||
int hi = __double2hiint(val);
|
||||
|
||||
lo = __shfl(lo, srcLane, width);
|
||||
hi = __shfl(hi, srcLane, width);
|
||||
|
||||
return __hiloint2double(hi, lo);
|
||||
#else
|
||||
return 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
return __shfl_down(val, delta, width);
|
||||
#else
|
||||
return T();
|
||||
#endif
|
||||
}
|
||||
__device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
return (unsigned int) __shfl_down((int) val, delta, width);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
__device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
int lo = __double2loint(val);
|
||||
int hi = __double2hiint(val);
|
||||
|
||||
lo = __shfl_down(lo, delta, width);
|
||||
hi = __shfl_down(hi, delta, width);
|
||||
|
||||
return __hiloint2double(hi, lo);
|
||||
#else
|
||||
return 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
return __shfl_up(val, delta, width);
|
||||
#else
|
||||
return T();
|
||||
#endif
|
||||
}
|
||||
__device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
return (unsigned int) __shfl_up((int) val, delta, width);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
__device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
int lo = __double2loint(val);
|
||||
int hi = __double2hiint(val);
|
||||
|
||||
lo = __shfl_up(lo, delta, width);
|
||||
hi = __shfl_up(hi, delta, width);
|
||||
|
||||
return __hiloint2double(hi, lo);
|
||||
#else
|
||||
return 0.0;
|
||||
#endif
|
||||
}
|
||||
}}}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CUDA_WARP_SHUFFLE_HPP__
|
87
3rdparty/include/opencv2/core/cuda_stream_accessor.hpp
vendored
Normal file
87
3rdparty/include/opencv2/core/cuda_stream_accessor.hpp
vendored
Normal file
@ -0,0 +1,87 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP__
|
||||
#define __OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP__
|
||||
|
||||
#ifndef __cplusplus
|
||||
# error cuda_stream_accessor.hpp header must be compiled as C++
|
||||
#endif
|
||||
|
||||
/** @file cuda_stream_accessor.hpp
|
||||
* This is only header file that depends on CUDA Runtime API. All other headers are independent.
|
||||
*/
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "opencv2/core/cvdef.h"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace cuda
|
||||
{
|
||||
|
||||
//! @addtogroup cudacore_struct
|
||||
//! @{
|
||||
|
||||
class Stream;
|
||||
class Event;
|
||||
|
||||
/** @brief Class that enables getting cudaStream_t from cuda::Stream
|
||||
*/
|
||||
struct StreamAccessor
|
||||
{
|
||||
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
|
||||
};
|
||||
|
||||
/** @brief Class that enables getting cudaEvent_t from cuda::Event
|
||||
*/
|
||||
struct EventAccessor
|
||||
{
|
||||
CV_EXPORTS static cudaEvent_t getEvent(const Event& event);
|
||||
};
|
||||
|
||||
//! @}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* __OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP__ */
|
135
3rdparty/include/opencv2/core/cuda_types.hpp
vendored
Normal file
135
3rdparty/include/opencv2/core/cuda_types.hpp
vendored
Normal file
@ -0,0 +1,135 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_CUDA_TYPES_HPP__
|
||||
#define __OPENCV_CORE_CUDA_TYPES_HPP__
|
||||
|
||||
#ifndef __cplusplus
|
||||
# error cuda_types.hpp header must be compiled as C++
|
||||
#endif
|
||||
|
||||
/** @file
|
||||
* @deprecated Use @ref cudev instead.
|
||||
*/
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
#ifdef __CUDACC__
|
||||
#define __CV_CUDA_HOST_DEVICE__ __host__ __device__ __forceinline__
|
||||
#else
|
||||
#define __CV_CUDA_HOST_DEVICE__
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace cuda
|
||||
{
|
||||
|
||||
// Simple lightweight structures that encapsulates information about an image on device.
|
||||
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
|
||||
|
||||
template <typename T> struct DevPtr
|
||||
{
|
||||
typedef T elem_type;
|
||||
typedef int index_type;
|
||||
|
||||
enum { elem_size = sizeof(elem_type) };
|
||||
|
||||
T* data;
|
||||
|
||||
__CV_CUDA_HOST_DEVICE__ DevPtr() : data(0) {}
|
||||
__CV_CUDA_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
|
||||
|
||||
__CV_CUDA_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
|
||||
__CV_CUDA_HOST_DEVICE__ operator T*() { return data; }
|
||||
__CV_CUDA_HOST_DEVICE__ operator const T*() const { return data; }
|
||||
};
|
||||
|
||||
template <typename T> struct PtrSz : public DevPtr<T>
|
||||
{
|
||||
__CV_CUDA_HOST_DEVICE__ PtrSz() : size(0) {}
|
||||
__CV_CUDA_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
|
||||
|
||||
size_t size;
|
||||
};
|
||||
|
||||
template <typename T> struct PtrStep : public DevPtr<T>
|
||||
{
|
||||
__CV_CUDA_HOST_DEVICE__ PtrStep() : step(0) {}
|
||||
__CV_CUDA_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
|
||||
|
||||
size_t step;
|
||||
|
||||
__CV_CUDA_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr<T>::data + y * step); }
|
||||
__CV_CUDA_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
|
||||
|
||||
__CV_CUDA_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
|
||||
__CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
|
||||
};
|
||||
|
||||
template <typename T> struct PtrStepSz : public PtrStep<T>
|
||||
{
|
||||
__CV_CUDA_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
|
||||
__CV_CUDA_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
|
||||
: PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
|
||||
|
||||
template <typename U>
|
||||
explicit PtrStepSz(const PtrStepSz<U>& d) : PtrStep<T>((T*)d.data, d.step), cols(d.cols), rows(d.rows){}
|
||||
|
||||
int cols;
|
||||
int rows;
|
||||
};
|
||||
|
||||
typedef PtrStepSz<unsigned char> PtrStepSzb;
|
||||
typedef PtrStepSz<float> PtrStepSzf;
|
||||
typedef PtrStepSz<int> PtrStepSzi;
|
||||
|
||||
typedef PtrStep<unsigned char> PtrStepb;
|
||||
typedef PtrStep<float> PtrStepf;
|
||||
typedef PtrStep<int> PtrStepi;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif /* __OPENCV_CORE_CUDA_TYPES_HPP__ */
|
231
3rdparty/include/opencv2/core/cvdef.h
vendored
Normal file
231
3rdparty/include/opencv2/core/cvdef.h
vendored
Normal file
@ -0,0 +1,231 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_CVDEF_H__
|
||||
#define __OPENCV_CORE_CVDEF_H__
|
||||
|
||||
#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
|
||||
# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
|
||||
#endif
|
||||
|
||||
// undef problematic defines sometimes defined by system headers (windows.h in particular)
|
||||
#undef small
|
||||
#undef min
|
||||
#undef max
|
||||
#undef abs
|
||||
#undef Complex
|
||||
|
||||
#include "opencv2/hal/defs.h"
|
||||
|
||||
#ifdef __OPENCV_BUILD
|
||||
# define DISABLE_OPENCV_24_COMPATIBILITY
|
||||
#endif
|
||||
|
||||
#if (defined WIN32 || defined _WIN32 || defined WINCE || defined __CYGWIN__) && defined CVAPI_EXPORTS
|
||||
# define CV_EXPORTS __declspec(dllexport)
|
||||
#elif defined __GNUC__ && __GNUC__ >= 4
|
||||
# define CV_EXPORTS __attribute__ ((visibility ("default")))
|
||||
#else
|
||||
# define CV_EXPORTS
|
||||
#endif
|
||||
|
||||
#ifndef CV_EXTERN_C
|
||||
# ifdef __cplusplus
|
||||
# define CV_EXTERN_C extern "C"
|
||||
# else
|
||||
# define CV_EXTERN_C
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* special informative macros for wrapper generators */
|
||||
#define CV_EXPORTS_W CV_EXPORTS
|
||||
#define CV_EXPORTS_W_SIMPLE CV_EXPORTS
|
||||
#define CV_EXPORTS_AS(synonym) CV_EXPORTS
|
||||
#define CV_EXPORTS_W_MAP CV_EXPORTS
|
||||
#define CV_IN_OUT
|
||||
#define CV_OUT
|
||||
#define CV_PROP
|
||||
#define CV_PROP_RW
|
||||
#define CV_WRAP
|
||||
#define CV_WRAP_AS(synonym)
|
||||
|
||||
/****************************************************************************************\
|
||||
* Matrix type (Mat) *
|
||||
\****************************************************************************************/
|
||||
|
||||
#define CV_CN_MAX 512
|
||||
#define CV_CN_SHIFT 3
|
||||
#define CV_DEPTH_MAX (1 << CV_CN_SHIFT)
|
||||
|
||||
#define CV_8U 0
|
||||
#define CV_8S 1
|
||||
#define CV_16U 2
|
||||
#define CV_16S 3
|
||||
#define CV_32S 4
|
||||
#define CV_32F 5
|
||||
#define CV_64F 6
|
||||
#define CV_USRTYPE1 7
|
||||
|
||||
#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1)
|
||||
#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK)
|
||||
|
||||
#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT))
|
||||
#define CV_MAKE_TYPE CV_MAKETYPE
|
||||
|
||||
#define CV_8UC1 CV_MAKETYPE(CV_8U,1)
|
||||
#define CV_8UC2 CV_MAKETYPE(CV_8U,2)
|
||||
#define CV_8UC3 CV_MAKETYPE(CV_8U,3)
|
||||
#define CV_8UC4 CV_MAKETYPE(CV_8U,4)
|
||||
#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n))
|
||||
|
||||
#define CV_8SC1 CV_MAKETYPE(CV_8S,1)
|
||||
#define CV_8SC2 CV_MAKETYPE(CV_8S,2)
|
||||
#define CV_8SC3 CV_MAKETYPE(CV_8S,3)
|
||||
#define CV_8SC4 CV_MAKETYPE(CV_8S,4)
|
||||
#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n))
|
||||
|
||||
#define CV_16UC1 CV_MAKETYPE(CV_16U,1)
|
||||
#define CV_16UC2 CV_MAKETYPE(CV_16U,2)
|
||||
#define CV_16UC3 CV_MAKETYPE(CV_16U,3)
|
||||
#define CV_16UC4 CV_MAKETYPE(CV_16U,4)
|
||||
#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n))
|
||||
|
||||
#define CV_16SC1 CV_MAKETYPE(CV_16S,1)
|
||||
#define CV_16SC2 CV_MAKETYPE(CV_16S,2)
|
||||
#define CV_16SC3 CV_MAKETYPE(CV_16S,3)
|
||||
#define CV_16SC4 CV_MAKETYPE(CV_16S,4)
|
||||
#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n))
|
||||
|
||||
#define CV_32SC1 CV_MAKETYPE(CV_32S,1)
|
||||
#define CV_32SC2 CV_MAKETYPE(CV_32S,2)
|
||||
#define CV_32SC3 CV_MAKETYPE(CV_32S,3)
|
||||
#define CV_32SC4 CV_MAKETYPE(CV_32S,4)
|
||||
#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n))
|
||||
|
||||
#define CV_32FC1 CV_MAKETYPE(CV_32F,1)
|
||||
#define CV_32FC2 CV_MAKETYPE(CV_32F,2)
|
||||
#define CV_32FC3 CV_MAKETYPE(CV_32F,3)
|
||||
#define CV_32FC4 CV_MAKETYPE(CV_32F,4)
|
||||
#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n))
|
||||
|
||||
#define CV_64FC1 CV_MAKETYPE(CV_64F,1)
|
||||
#define CV_64FC2 CV_MAKETYPE(CV_64F,2)
|
||||
#define CV_64FC3 CV_MAKETYPE(CV_64F,3)
|
||||
#define CV_64FC4 CV_MAKETYPE(CV_64F,4)
|
||||
#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n))
|
||||
|
||||
#define CV_MAT_CN_MASK ((CV_CN_MAX - 1) << CV_CN_SHIFT)
|
||||
#define CV_MAT_CN(flags) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1)
|
||||
#define CV_MAT_TYPE_MASK (CV_DEPTH_MAX*CV_CN_MAX - 1)
|
||||
#define CV_MAT_TYPE(flags) ((flags) & CV_MAT_TYPE_MASK)
|
||||
#define CV_MAT_CONT_FLAG_SHIFT 14
|
||||
#define CV_MAT_CONT_FLAG (1 << CV_MAT_CONT_FLAG_SHIFT)
|
||||
#define CV_IS_MAT_CONT(flags) ((flags) & CV_MAT_CONT_FLAG)
|
||||
#define CV_IS_CONT_MAT CV_IS_MAT_CONT
|
||||
#define CV_SUBMAT_FLAG_SHIFT 15
|
||||
#define CV_SUBMAT_FLAG (1 << CV_SUBMAT_FLAG_SHIFT)
|
||||
#define CV_IS_SUBMAT(flags) ((flags) & CV_MAT_SUBMAT_FLAG)
|
||||
|
||||
/* Size of each channel item,
|
||||
0x124489 = 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
|
||||
#define CV_ELEM_SIZE1(type) \
|
||||
((((sizeof(size_t)<<28)|0x8442211) >> CV_MAT_DEPTH(type)*4) & 15)
|
||||
|
||||
/* 0x3a50 = 11 10 10 01 01 00 00 ~ array of log2(sizeof(arr_type_elem)) */
|
||||
#define CV_ELEM_SIZE(type) \
|
||||
(CV_MAT_CN(type) << ((((sizeof(size_t)/4+1)*16384|0x3a50) >> CV_MAT_DEPTH(type)*2) & 3))
|
||||
|
||||
#ifndef MIN
|
||||
# define MIN(a,b) ((a) > (b) ? (b) : (a))
|
||||
#endif
|
||||
|
||||
#ifndef MAX
|
||||
# define MAX(a,b) ((a) < (b) ? (b) : (a))
|
||||
#endif
|
||||
|
||||
/****************************************************************************************\
|
||||
* exchange-add operation for atomic operations on reference counters *
|
||||
\****************************************************************************************/
|
||||
|
||||
#if defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32)
|
||||
// atomic increment on the linux version of the Intel(tm) compiler
|
||||
# define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast<void*>(reinterpret_cast<volatile void*>(addr)), delta)
|
||||
#elif defined __GNUC__
|
||||
# if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__)
|
||||
# ifdef __ATOMIC_ACQ_REL
|
||||
# define CV_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
|
||||
# else
|
||||
# define CV_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4)
|
||||
# endif
|
||||
# else
|
||||
# if defined __ATOMIC_ACQ_REL && !defined __clang__
|
||||
// version for gcc >= 4.7
|
||||
# define CV_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL)
|
||||
# else
|
||||
# define CV_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta))
|
||||
# endif
|
||||
# endif
|
||||
#elif defined _MSC_VER && !defined RC_INVOKED
|
||||
# include <intrin.h>
|
||||
# define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
|
||||
#else
|
||||
CV_INLINE CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; }
|
||||
#endif
|
||||
|
||||
|
||||
/****************************************************************************************\
|
||||
* CV_NORETURN attribute *
|
||||
\****************************************************************************************/
|
||||
|
||||
#ifndef CV_NORETURN
|
||||
# if defined(__GNUC__)
|
||||
# define CV_NORETURN __attribute__((__noreturn__))
|
||||
# elif defined(_MSC_VER) && (_MSC_VER >= 1300)
|
||||
# define CV_NORETURN __declspec(noreturn)
|
||||
# else
|
||||
# define CV_NORETURN /* nothing by default */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif // __OPENCV_CORE_CVDEF_H__
|
1059
3rdparty/include/opencv2/core/cvstd.hpp
vendored
Normal file
1059
3rdparty/include/opencv2/core/cvstd.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
267
3rdparty/include/opencv2/core/cvstd.inl.hpp
vendored
Normal file
267
3rdparty/include/opencv2/core/cvstd.inl.hpp
vendored
Normal file
@ -0,0 +1,267 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_CVSTDINL_HPP__
|
||||
#define __OPENCV_CORE_CVSTDINL_HPP__
|
||||
|
||||
#ifndef OPENCV_NOSTL
|
||||
# include <complex>
|
||||
# include <ostream>
|
||||
#endif
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv
|
||||
{
|
||||
#ifndef OPENCV_NOSTL
|
||||
|
||||
template<typename _Tp> class DataType< std::complex<_Tp> >
|
||||
{
|
||||
public:
|
||||
typedef std::complex<_Tp> value_type;
|
||||
typedef value_type work_type;
|
||||
typedef _Tp channel_type;
|
||||
|
||||
enum { generic_type = 0,
|
||||
depth = DataType<channel_type>::depth,
|
||||
channels = 2,
|
||||
fmt = DataType<channel_type>::fmt + ((channels - 1) << 8),
|
||||
type = CV_MAKETYPE(depth, channels) };
|
||||
|
||||
typedef Vec<channel_type, channels> vec_type;
|
||||
};
|
||||
|
||||
inline
|
||||
String::String(const std::string& str)
|
||||
: cstr_(0), len_(0)
|
||||
{
|
||||
if (!str.empty())
|
||||
{
|
||||
size_t len = str.size();
|
||||
memcpy(allocate(len), str.c_str(), len);
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
String::String(const std::string& str, size_t pos, size_t len)
|
||||
: cstr_(0), len_(0)
|
||||
{
|
||||
size_t strlen = str.size();
|
||||
pos = max(pos, strlen);
|
||||
len = min(strlen - pos, len);
|
||||
if (!len) return;
|
||||
memcpy(allocate(len), str.c_str() + pos, len);
|
||||
}
|
||||
|
||||
inline
|
||||
String& String::operator = (const std::string& str)
|
||||
{
|
||||
deallocate();
|
||||
if (!str.empty())
|
||||
{
|
||||
size_t len = str.size();
|
||||
memcpy(allocate(len), str.c_str(), len);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
String& String::operator += (const std::string& str)
|
||||
{
|
||||
*this = *this + str;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
String::operator std::string() const
|
||||
{
|
||||
return std::string(cstr_, len_);
|
||||
}
|
||||
|
||||
inline
|
||||
String operator + (const String& lhs, const std::string& rhs)
|
||||
{
|
||||
String s;
|
||||
size_t rhslen = rhs.size();
|
||||
s.allocate(lhs.len_ + rhslen);
|
||||
memcpy(s.cstr_, lhs.cstr_, lhs.len_);
|
||||
memcpy(s.cstr_ + lhs.len_, rhs.c_str(), rhslen);
|
||||
return s;
|
||||
}
|
||||
|
||||
inline
|
||||
String operator + (const std::string& lhs, const String& rhs)
|
||||
{
|
||||
String s;
|
||||
size_t lhslen = lhs.size();
|
||||
s.allocate(lhslen + rhs.len_);
|
||||
memcpy(s.cstr_, lhs.c_str(), lhslen);
|
||||
memcpy(s.cstr_ + lhslen, rhs.cstr_, rhs.len_);
|
||||
return s;
|
||||
}
|
||||
|
||||
inline
|
||||
FileNode::operator std::string() const
|
||||
{
|
||||
String value;
|
||||
read(*this, value, value);
|
||||
return value;
|
||||
}
|
||||
|
||||
template<> inline
|
||||
void operator >> (const FileNode& n, std::string& value)
|
||||
{
|
||||
String val;
|
||||
read(n, val, val);
|
||||
value = val;
|
||||
}
|
||||
|
||||
template<> inline
|
||||
FileStorage& operator << (FileStorage& fs, const std::string& value)
|
||||
{
|
||||
return fs << cv::String(value);
|
||||
}
|
||||
|
||||
static inline
|
||||
std::ostream& operator << (std::ostream& os, const String& str)
|
||||
{
|
||||
return os << str.c_str();
|
||||
}
|
||||
|
||||
static inline
|
||||
std::ostream& operator << (std::ostream& out, Ptr<Formatted> fmtd)
|
||||
{
|
||||
fmtd->reset();
|
||||
for(const char* str = fmtd->next(); str; str = fmtd->next())
|
||||
out << str;
|
||||
return out;
|
||||
}
|
||||
|
||||
static inline
|
||||
std::ostream& operator << (std::ostream& out, const Mat& mtx)
|
||||
{
|
||||
return out << Formatter::get()->format(mtx);
|
||||
}
|
||||
|
||||
template<typename _Tp> static inline
|
||||
std::ostream& operator << (std::ostream& out, const std::vector<Point_<_Tp> >& vec)
|
||||
{
|
||||
return out << Formatter::get()->format(Mat(vec));
|
||||
}
|
||||
|
||||
|
||||
template<typename _Tp> static inline
|
||||
std::ostream& operator << (std::ostream& out, const std::vector<Point3_<_Tp> >& vec)
|
||||
{
|
||||
return out << Formatter::get()->format(Mat(vec));
|
||||
}
|
||||
|
||||
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
std::ostream& operator << (std::ostream& out, const Matx<_Tp, m, n>& matx)
|
||||
{
|
||||
return out << Formatter::get()->format(Mat(matx));
|
||||
}
|
||||
|
||||
template<typename _Tp> static inline
|
||||
std::ostream& operator << (std::ostream& out, const Point_<_Tp>& p)
|
||||
{
|
||||
out << "[" << p.x << ", " << p.y << "]";
|
||||
return out;
|
||||
}
|
||||
|
||||
template<typename _Tp> static inline
|
||||
std::ostream& operator << (std::ostream& out, const Point3_<_Tp>& p)
|
||||
{
|
||||
out << "[" << p.x << ", " << p.y << ", " << p.z << "]";
|
||||
return out;
|
||||
}
|
||||
|
||||
template<typename _Tp, int n> static inline
|
||||
std::ostream& operator << (std::ostream& out, const Vec<_Tp, n>& vec)
|
||||
{
|
||||
out << "[";
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning( push )
|
||||
#pragma warning( disable: 4127 )
|
||||
#endif
|
||||
if(Vec<_Tp, n>::depth < CV_32F)
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning( pop )
|
||||
#endif
|
||||
{
|
||||
for (int i = 0; i < n - 1; ++i) {
|
||||
out << (int)vec[i] << ", ";
|
||||
}
|
||||
out << (int)vec[n-1] << "]";
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < n - 1; ++i) {
|
||||
out << vec[i] << ", ";
|
||||
}
|
||||
out << vec[n-1] << "]";
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
template<typename _Tp> static inline
|
||||
std::ostream& operator << (std::ostream& out, const Size_<_Tp>& size)
|
||||
{
|
||||
return out << "[" << size.width << " x " << size.height << "]";
|
||||
}
|
||||
|
||||
template<typename _Tp> static inline
|
||||
std::ostream& operator << (std::ostream& out, const Rect_<_Tp>& rect)
|
||||
{
|
||||
return out << "[" << rect.width << " x " << rect.height << " from (" << rect.x << ", " << rect.y << ")]";
|
||||
}
|
||||
|
||||
|
||||
#endif // OPENCV_NOSTL
|
||||
} // cv
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CORE_CVSTDINL_HPP__
|
105
3rdparty/include/opencv2/core/directx.hpp
vendored
Normal file
105
3rdparty/include/opencv2/core/directx.hpp
vendored
Normal file
@ -0,0 +1,105 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the copyright holders or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_DIRECTX_HPP__
|
||||
#define __OPENCV_CORE_DIRECTX_HPP__
|
||||
|
||||
#include "mat.hpp"
|
||||
#include "ocl.hpp"
|
||||
|
||||
#if !defined(__d3d11_h__)
|
||||
struct ID3D11Device;
|
||||
struct ID3D11Texture2D;
|
||||
#endif
|
||||
|
||||
#if !defined(__d3d10_h__)
|
||||
struct ID3D10Device;
|
||||
struct ID3D10Texture2D;
|
||||
#endif
|
||||
|
||||
#if !defined(_D3D9_H_)
|
||||
struct IDirect3DDevice9;
|
||||
struct IDirect3DDevice9Ex;
|
||||
struct IDirect3DSurface9;
|
||||
#endif
|
||||
|
||||
|
||||
namespace cv { namespace directx {
|
||||
|
||||
namespace ocl {
|
||||
using namespace cv::ocl;
|
||||
|
||||
//! @addtogroup core_directx
|
||||
//! @{
|
||||
|
||||
// TODO static functions in the Context class
|
||||
CV_EXPORTS Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device);
|
||||
CV_EXPORTS Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device);
|
||||
CV_EXPORTS Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDevice9Ex);
|
||||
CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9);
|
||||
|
||||
//! @}
|
||||
|
||||
} // namespace cv::directx::ocl
|
||||
|
||||
//! @addtogroup core_directx
|
||||
//! @{
|
||||
|
||||
CV_EXPORTS void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D);
|
||||
CV_EXPORTS void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst);
|
||||
|
||||
CV_EXPORTS void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D);
|
||||
CV_EXPORTS void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst);
|
||||
|
||||
CV_EXPORTS void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurface9, void* surfaceSharedHandle = NULL);
|
||||
CV_EXPORTS void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArray dst, void* surfaceSharedHandle = NULL);
|
||||
|
||||
// Get OpenCV type from DirectX type, return -1 if there is no equivalent
|
||||
CV_EXPORTS int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT); // enum DXGI_FORMAT for D3D10/D3D11
|
||||
|
||||
// Get OpenCV type from DirectX type, return -1 if there is no equivalent
|
||||
CV_EXPORTS int getTypeFromD3DFORMAT(const int iD3DFORMAT); // enum D3DTYPE for D3D9
|
||||
|
||||
//! @}
|
||||
|
||||
} } // namespace cv::directx
|
||||
|
||||
#endif // __OPENCV_CORE_DIRECTX_HPP__
|
280
3rdparty/include/opencv2/core/eigen.hpp
vendored
Normal file
280
3rdparty/include/opencv2/core/eigen.hpp
vendored
Normal file
@ -0,0 +1,280 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
|
||||
#ifndef __OPENCV_CORE_EIGEN_HPP__
|
||||
#define __OPENCV_CORE_EIGEN_HPP__
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
|
||||
#if defined _MSC_VER && _MSC_VER >= 1200
|
||||
#pragma warning( disable: 4714 ) //__forceinline is not inlined
|
||||
#pragma warning( disable: 4127 ) //conditional expression is constant
|
||||
#pragma warning( disable: 4244 ) //conversion from '__int64' to 'int', possible loss of data
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
//! @addtogroup core_eigen
|
||||
//! @{
|
||||
|
||||
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
|
||||
void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, Mat& dst )
|
||||
{
|
||||
if( !(src.Flags & Eigen::RowMajorBit) )
|
||||
{
|
||||
Mat _src(src.cols(), src.rows(), DataType<_Tp>::type,
|
||||
(void*)src.data(), src.stride()*sizeof(_Tp));
|
||||
transpose(_src, dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
Mat _src(src.rows(), src.cols(), DataType<_Tp>::type,
|
||||
(void*)src.data(), src.stride()*sizeof(_Tp));
|
||||
_src.copyTo(dst);
|
||||
}
|
||||
}
|
||||
|
||||
// Matx case
|
||||
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
|
||||
void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src,
|
||||
Matx<_Tp, _rows, _cols>& dst )
|
||||
{
|
||||
if( !(src.Flags & Eigen::RowMajorBit) )
|
||||
{
|
||||
dst = Matx<_Tp, _cols, _rows>(static_cast<const _Tp*>(src.data())).t();
|
||||
}
|
||||
else
|
||||
{
|
||||
dst = Matx<_Tp, _rows, _cols>(static_cast<const _Tp*>(src.data()));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
|
||||
void cv2eigen( const Mat& src,
|
||||
Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
|
||||
{
|
||||
CV_DbgAssert(src.rows == _rows && src.cols == _cols);
|
||||
if( !(dst.Flags & Eigen::RowMajorBit) )
|
||||
{
|
||||
const Mat _dst(src.cols, src.rows, DataType<_Tp>::type,
|
||||
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
|
||||
if( src.type() == _dst.type() )
|
||||
transpose(src, _dst);
|
||||
else if( src.cols == src.rows )
|
||||
{
|
||||
src.convertTo(_dst, _dst.type());
|
||||
transpose(_dst, _dst);
|
||||
}
|
||||
else
|
||||
Mat(src.t()).convertTo(_dst, _dst.type());
|
||||
}
|
||||
else
|
||||
{
|
||||
const Mat _dst(src.rows, src.cols, DataType<_Tp>::type,
|
||||
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
|
||||
src.convertTo(_dst, _dst.type());
|
||||
}
|
||||
}
|
||||
|
||||
// Matx case
|
||||
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
|
||||
void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
|
||||
Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
|
||||
{
|
||||
if( !(dst.Flags & Eigen::RowMajorBit) )
|
||||
{
|
||||
const Mat _dst(_cols, _rows, DataType<_Tp>::type,
|
||||
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
|
||||
transpose(src, _dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
const Mat _dst(_rows, _cols, DataType<_Tp>::type,
|
||||
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
|
||||
Mat(src).copyTo(_dst);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _Tp> static inline
|
||||
void cv2eigen( const Mat& src,
|
||||
Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
|
||||
{
|
||||
dst.resize(src.rows, src.cols);
|
||||
if( !(dst.Flags & Eigen::RowMajorBit) )
|
||||
{
|
||||
const Mat _dst(src.cols, src.rows, DataType<_Tp>::type,
|
||||
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
|
||||
if( src.type() == _dst.type() )
|
||||
transpose(src, _dst);
|
||||
else if( src.cols == src.rows )
|
||||
{
|
||||
src.convertTo(_dst, _dst.type());
|
||||
transpose(_dst, _dst);
|
||||
}
|
||||
else
|
||||
Mat(src.t()).convertTo(_dst, _dst.type());
|
||||
}
|
||||
else
|
||||
{
|
||||
const Mat _dst(src.rows, src.cols, DataType<_Tp>::type,
|
||||
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
|
||||
src.convertTo(_dst, _dst.type());
|
||||
}
|
||||
}
|
||||
|
||||
// Matx case
|
||||
template<typename _Tp, int _rows, int _cols> static inline
|
||||
void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
|
||||
Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
|
||||
{
|
||||
dst.resize(_rows, _cols);
|
||||
if( !(dst.Flags & Eigen::RowMajorBit) )
|
||||
{
|
||||
const Mat _dst(_cols, _rows, DataType<_Tp>::type,
|
||||
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
|
||||
transpose(src, _dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
const Mat _dst(_rows, _cols, DataType<_Tp>::type,
|
||||
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
|
||||
Mat(src).copyTo(_dst);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _Tp> static inline
|
||||
void cv2eigen( const Mat& src,
|
||||
Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
|
||||
{
|
||||
CV_Assert(src.cols == 1);
|
||||
dst.resize(src.rows);
|
||||
|
||||
if( !(dst.Flags & Eigen::RowMajorBit) )
|
||||
{
|
||||
const Mat _dst(src.cols, src.rows, DataType<_Tp>::type,
|
||||
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
|
||||
if( src.type() == _dst.type() )
|
||||
transpose(src, _dst);
|
||||
else
|
||||
Mat(src.t()).convertTo(_dst, _dst.type());
|
||||
}
|
||||
else
|
||||
{
|
||||
const Mat _dst(src.rows, src.cols, DataType<_Tp>::type,
|
||||
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
|
||||
src.convertTo(_dst, _dst.type());
|
||||
}
|
||||
}
|
||||
|
||||
// Matx case
|
||||
template<typename _Tp, int _rows> static inline
|
||||
void cv2eigen( const Matx<_Tp, _rows, 1>& src,
|
||||
Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
|
||||
{
|
||||
dst.resize(_rows);
|
||||
|
||||
if( !(dst.Flags & Eigen::RowMajorBit) )
|
||||
{
|
||||
const Mat _dst(1, _rows, DataType<_Tp>::type,
|
||||
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
|
||||
transpose(src, _dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
const Mat _dst(_rows, 1, DataType<_Tp>::type,
|
||||
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
|
||||
src.copyTo(_dst);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename _Tp> static inline
|
||||
void cv2eigen( const Mat& src,
|
||||
Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
|
||||
{
|
||||
CV_Assert(src.rows == 1);
|
||||
dst.resize(src.cols);
|
||||
if( !(dst.Flags & Eigen::RowMajorBit) )
|
||||
{
|
||||
const Mat _dst(src.cols, src.rows, DataType<_Tp>::type,
|
||||
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
|
||||
if( src.type() == _dst.type() )
|
||||
transpose(src, _dst);
|
||||
else
|
||||
Mat(src.t()).convertTo(_dst, _dst.type());
|
||||
}
|
||||
else
|
||||
{
|
||||
const Mat _dst(src.rows, src.cols, DataType<_Tp>::type,
|
||||
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
|
||||
src.convertTo(_dst, _dst.type());
|
||||
}
|
||||
}
|
||||
|
||||
//Matx
|
||||
template<typename _Tp, int _cols> static inline
|
||||
void cv2eigen( const Matx<_Tp, 1, _cols>& src,
|
||||
Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
|
||||
{
|
||||
dst.resize(_cols);
|
||||
if( !(dst.Flags & Eigen::RowMajorBit) )
|
||||
{
|
||||
const Mat _dst(_cols, 1, DataType<_Tp>::type,
|
||||
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
|
||||
transpose(src, _dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
const Mat _dst(1, _cols, DataType<_Tp>::type,
|
||||
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
|
||||
Mat(src).copyTo(_dst);
|
||||
}
|
||||
}
|
||||
|
||||
//! @}
|
||||
|
||||
} // cv
|
||||
|
||||
#endif
|
195
3rdparty/include/opencv2/core/ippasync.hpp
vendored
Normal file
195
3rdparty/include/opencv2/core/ippasync.hpp
vendored
Normal file
@ -0,0 +1,195 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2015, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_IPPASYNC_HPP__
|
||||
#define __OPENCV_CORE_IPPASYNC_HPP__
|
||||
|
||||
#ifdef HAVE_IPP_A
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
#include <ipp_async_op.h>
|
||||
#include <ipp_async_accel.h>
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
namespace hpp
|
||||
{
|
||||
|
||||
/** @addtogroup core_ipp
|
||||
This section describes conversion between OpenCV and [Intel® IPP Asynchronous
|
||||
C/C++](http://software.intel.com/en-us/intel-ipp-preview) library. [Getting Started
|
||||
Guide](http://registrationcenter.intel.com/irc_nas/3727/ipp_async_get_started.htm) help you to
|
||||
install the library, configure header and library build paths.
|
||||
*/
|
||||
//! @{
|
||||
|
||||
//! convert OpenCV data type to hppDataType
|
||||
inline int toHppType(const int cvType)
|
||||
{
|
||||
int depth = CV_MAT_DEPTH(cvType);
|
||||
int hppType = depth == CV_8U ? HPP_DATA_TYPE_8U :
|
||||
depth == CV_16U ? HPP_DATA_TYPE_16U :
|
||||
depth == CV_16S ? HPP_DATA_TYPE_16S :
|
||||
depth == CV_32S ? HPP_DATA_TYPE_32S :
|
||||
depth == CV_32F ? HPP_DATA_TYPE_32F :
|
||||
depth == CV_64F ? HPP_DATA_TYPE_64F : -1;
|
||||
CV_Assert( hppType >= 0 );
|
||||
return hppType;
|
||||
}
|
||||
|
||||
//! convert hppDataType to OpenCV data type
|
||||
inline int toCvType(const int hppType)
|
||||
{
|
||||
int cvType = hppType == HPP_DATA_TYPE_8U ? CV_8U :
|
||||
hppType == HPP_DATA_TYPE_16U ? CV_16U :
|
||||
hppType == HPP_DATA_TYPE_16S ? CV_16S :
|
||||
hppType == HPP_DATA_TYPE_32S ? CV_32S :
|
||||
hppType == HPP_DATA_TYPE_32F ? CV_32F :
|
||||
hppType == HPP_DATA_TYPE_64F ? CV_64F : -1;
|
||||
CV_Assert( cvType >= 0 );
|
||||
return cvType;
|
||||
}
|
||||
|
||||
/** @brief Convert hppiMatrix to Mat.
|
||||
|
||||
This function allocates and initializes new matrix (if needed) that has the same size and type as
|
||||
input matrix. Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
|
||||
@param src input hppiMatrix.
|
||||
@param dst output matrix.
|
||||
@param accel accelerator instance (see hpp::getHpp for the list of acceleration framework types).
|
||||
@param cn number of channels.
|
||||
*/
|
||||
inline void copyHppToMat(hppiMatrix* src, Mat& dst, hppAccel accel, int cn)
|
||||
{
|
||||
hppDataType type;
|
||||
hpp32u width, height;
|
||||
hppStatus sts;
|
||||
|
||||
if (src == NULL)
|
||||
return dst.release();
|
||||
|
||||
sts = hppiInquireMatrix(src, &type, &width, &height);
|
||||
|
||||
CV_Assert( sts == HPP_STATUS_NO_ERROR);
|
||||
|
||||
int matType = CV_MAKETYPE(toCvType(type), cn);
|
||||
|
||||
CV_Assert(width%cn == 0);
|
||||
|
||||
width /= cn;
|
||||
|
||||
dst.create((int)height, (int)width, (int)matType);
|
||||
|
||||
size_t newSize = (size_t)(height*(hpp32u)(dst.step));
|
||||
|
||||
sts = hppiGetMatrixData(accel,src,(hpp32u)(dst.step),dst.data,&newSize);
|
||||
|
||||
CV_Assert( sts == HPP_STATUS_NO_ERROR);
|
||||
}
|
||||
|
||||
/** @brief Create Mat from hppiMatrix.
|
||||
|
||||
This function allocates and initializes the Mat that has the same size and type as input matrix.
|
||||
Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
|
||||
@param src input hppiMatrix.
|
||||
@param accel accelerator instance (see hpp::getHpp for the list of acceleration framework types).
|
||||
@param cn number of channels.
|
||||
@sa howToUseIPPAconversion, hpp::copyHppToMat, hpp::getHpp.
|
||||
*/
|
||||
inline Mat getMat(hppiMatrix* src, hppAccel accel, int cn)
|
||||
{
|
||||
Mat dst;
|
||||
copyHppToMat(src, dst, accel, cn);
|
||||
return dst;
|
||||
}
|
||||
|
||||
/** @brief Create hppiMatrix from Mat.
|
||||
|
||||
This function allocates and initializes the hppiMatrix that has the same size and type as input
|
||||
matrix, returns the hppiMatrix*.
|
||||
|
||||
If you want to use zero-copy for GPU you should to have 4KB aligned matrix data. See details
|
||||
[hppiCreateSharedMatrix](http://software.intel.com/ru-ru/node/501697).
|
||||
|
||||
Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
|
||||
|
||||
@note The hppiMatrix pointer to the image buffer in system memory refers to the src.data. Control
|
||||
the lifetime of the matrix and don't change its data, if there is no special need.
|
||||
@param src input matrix.
|
||||
@param accel accelerator instance. Supports type:
|
||||
- **HPP_ACCEL_TYPE_CPU** - accelerated by optimized CPU instructions.
|
||||
- **HPP_ACCEL_TYPE_GPU** - accelerated by GPU programmable units or fixed-function
|
||||
accelerators.
|
||||
- **HPP_ACCEL_TYPE_ANY** - any acceleration or no acceleration available.
|
||||
@sa howToUseIPPAconversion, hpp::getMat
|
||||
*/
|
||||
inline hppiMatrix* getHpp(const Mat& src, hppAccel accel)
|
||||
{
|
||||
int htype = toHppType(src.type());
|
||||
int cn = src.channels();
|
||||
|
||||
CV_Assert(src.data);
|
||||
hppAccelType accelType = hppQueryAccelType(accel);
|
||||
|
||||
if (accelType!=HPP_ACCEL_TYPE_CPU)
|
||||
{
|
||||
hpp32u pitch, size;
|
||||
hppQueryMatrixAllocParams(accel, src.cols*cn, src.rows, htype, &pitch, &size);
|
||||
if (pitch!=0 && size!=0)
|
||||
if ((int)(src.data)%4096==0 && pitch==(hpp32u)(src.step))
|
||||
{
|
||||
return hppiCreateSharedMatrix(htype, src.cols*cn, src.rows, src.data, pitch, size);
|
||||
}
|
||||
}
|
||||
|
||||
return hppiCreateMatrix(htype, src.cols*cn, src.rows, src.data, (hpp32s)(src.step));;
|
||||
}
|
||||
|
||||
//! @}
|
||||
}}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
3398
3rdparty/include/opencv2/core/mat.hpp
vendored
Normal file
3398
3rdparty/include/opencv2/core/mat.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3436
3rdparty/include/opencv2/core/mat.inl.hpp
vendored
Normal file
3436
3rdparty/include/opencv2/core/mat.inl.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1376
3rdparty/include/opencv2/core/matx.hpp
vendored
Normal file
1376
3rdparty/include/opencv2/core/matx.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
691
3rdparty/include/opencv2/core/ocl.hpp
vendored
Normal file
691
3rdparty/include/opencv2/core/ocl.hpp
vendored
Normal file
@ -0,0 +1,691 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the OpenCV Foundation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_OPENCL_HPP__
|
||||
#define __OPENCV_OPENCL_HPP__
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
|
||||
namespace cv { namespace ocl {
|
||||
|
||||
//! @addtogroup core_opencl
|
||||
//! @{
|
||||
|
||||
CV_EXPORTS_W bool haveOpenCL();
|
||||
CV_EXPORTS_W bool useOpenCL();
|
||||
CV_EXPORTS_W bool haveAmdBlas();
|
||||
CV_EXPORTS_W bool haveAmdFft();
|
||||
CV_EXPORTS_W void setUseOpenCL(bool flag);
|
||||
CV_EXPORTS_W void finish();
|
||||
|
||||
CV_EXPORTS bool haveSVM();
|
||||
|
||||
class CV_EXPORTS Context;
|
||||
class CV_EXPORTS Device;
|
||||
class CV_EXPORTS Kernel;
|
||||
class CV_EXPORTS Program;
|
||||
class CV_EXPORTS ProgramSource;
|
||||
class CV_EXPORTS Queue;
|
||||
class CV_EXPORTS PlatformInfo;
|
||||
class CV_EXPORTS Image2D;
|
||||
|
||||
class CV_EXPORTS Device
|
||||
{
|
||||
public:
|
||||
Device();
|
||||
explicit Device(void* d);
|
||||
Device(const Device& d);
|
||||
Device& operator = (const Device& d);
|
||||
~Device();
|
||||
|
||||
void set(void* d);
|
||||
|
||||
enum
|
||||
{
|
||||
TYPE_DEFAULT = (1 << 0),
|
||||
TYPE_CPU = (1 << 1),
|
||||
TYPE_GPU = (1 << 2),
|
||||
TYPE_ACCELERATOR = (1 << 3),
|
||||
TYPE_DGPU = TYPE_GPU + (1 << 16),
|
||||
TYPE_IGPU = TYPE_GPU + (1 << 17),
|
||||
TYPE_ALL = 0xFFFFFFFF
|
||||
};
|
||||
|
||||
String name() const;
|
||||
String extensions() const;
|
||||
String version() const;
|
||||
String vendorName() const;
|
||||
String OpenCL_C_Version() const;
|
||||
String OpenCLVersion() const;
|
||||
int deviceVersionMajor() const;
|
||||
int deviceVersionMinor() const;
|
||||
String driverVersion() const;
|
||||
void* ptr() const;
|
||||
|
||||
int type() const;
|
||||
|
||||
int addressBits() const;
|
||||
bool available() const;
|
||||
bool compilerAvailable() const;
|
||||
bool linkerAvailable() const;
|
||||
|
||||
enum
|
||||
{
|
||||
FP_DENORM=(1 << 0),
|
||||
FP_INF_NAN=(1 << 1),
|
||||
FP_ROUND_TO_NEAREST=(1 << 2),
|
||||
FP_ROUND_TO_ZERO=(1 << 3),
|
||||
FP_ROUND_TO_INF=(1 << 4),
|
||||
FP_FMA=(1 << 5),
|
||||
FP_SOFT_FLOAT=(1 << 6),
|
||||
FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7)
|
||||
};
|
||||
int doubleFPConfig() const;
|
||||
int singleFPConfig() const;
|
||||
int halfFPConfig() const;
|
||||
|
||||
bool endianLittle() const;
|
||||
bool errorCorrectionSupport() const;
|
||||
|
||||
enum
|
||||
{
|
||||
EXEC_KERNEL=(1 << 0),
|
||||
EXEC_NATIVE_KERNEL=(1 << 1)
|
||||
};
|
||||
int executionCapabilities() const;
|
||||
|
||||
size_t globalMemCacheSize() const;
|
||||
|
||||
enum
|
||||
{
|
||||
NO_CACHE=0,
|
||||
READ_ONLY_CACHE=1,
|
||||
READ_WRITE_CACHE=2
|
||||
};
|
||||
int globalMemCacheType() const;
|
||||
int globalMemCacheLineSize() const;
|
||||
size_t globalMemSize() const;
|
||||
|
||||
size_t localMemSize() const;
|
||||
enum
|
||||
{
|
||||
NO_LOCAL_MEM=0,
|
||||
LOCAL_IS_LOCAL=1,
|
||||
LOCAL_IS_GLOBAL=2
|
||||
};
|
||||
int localMemType() const;
|
||||
bool hostUnifiedMemory() const;
|
||||
|
||||
bool imageSupport() const;
|
||||
|
||||
bool imageFromBufferSupport() const;
|
||||
uint imagePitchAlignment() const;
|
||||
uint imageBaseAddressAlignment() const;
|
||||
|
||||
size_t image2DMaxWidth() const;
|
||||
size_t image2DMaxHeight() const;
|
||||
|
||||
size_t image3DMaxWidth() const;
|
||||
size_t image3DMaxHeight() const;
|
||||
size_t image3DMaxDepth() const;
|
||||
|
||||
size_t imageMaxBufferSize() const;
|
||||
size_t imageMaxArraySize() const;
|
||||
|
||||
enum
|
||||
{
|
||||
UNKNOWN_VENDOR=0,
|
||||
VENDOR_AMD=1,
|
||||
VENDOR_INTEL=2,
|
||||
VENDOR_NVIDIA=3
|
||||
};
|
||||
int vendorID() const;
|
||||
// FIXIT
|
||||
// dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform.
|
||||
// This method should use platform name instead of vendor name.
|
||||
// After fix restore code in arithm.cpp: ocl_compare()
|
||||
inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
|
||||
inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
|
||||
inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }
|
||||
|
||||
int maxClockFrequency() const;
|
||||
int maxComputeUnits() const;
|
||||
int maxConstantArgs() const;
|
||||
size_t maxConstantBufferSize() const;
|
||||
|
||||
size_t maxMemAllocSize() const;
|
||||
size_t maxParameterSize() const;
|
||||
|
||||
int maxReadImageArgs() const;
|
||||
int maxWriteImageArgs() const;
|
||||
int maxSamplers() const;
|
||||
|
||||
size_t maxWorkGroupSize() const;
|
||||
int maxWorkItemDims() const;
|
||||
void maxWorkItemSizes(size_t*) const;
|
||||
|
||||
int memBaseAddrAlign() const;
|
||||
|
||||
int nativeVectorWidthChar() const;
|
||||
int nativeVectorWidthShort() const;
|
||||
int nativeVectorWidthInt() const;
|
||||
int nativeVectorWidthLong() const;
|
||||
int nativeVectorWidthFloat() const;
|
||||
int nativeVectorWidthDouble() const;
|
||||
int nativeVectorWidthHalf() const;
|
||||
|
||||
int preferredVectorWidthChar() const;
|
||||
int preferredVectorWidthShort() const;
|
||||
int preferredVectorWidthInt() const;
|
||||
int preferredVectorWidthLong() const;
|
||||
int preferredVectorWidthFloat() const;
|
||||
int preferredVectorWidthDouble() const;
|
||||
int preferredVectorWidthHalf() const;
|
||||
|
||||
size_t printfBufferSize() const;
|
||||
size_t profilingTimerResolution() const;
|
||||
|
||||
static const Device& getDefault();
|
||||
|
||||
protected:
|
||||
struct Impl;
|
||||
Impl* p;
|
||||
};
|
||||
|
||||
|
||||
class CV_EXPORTS Context
|
||||
{
|
||||
public:
|
||||
Context();
|
||||
explicit Context(int dtype);
|
||||
~Context();
|
||||
Context(const Context& c);
|
||||
Context& operator = (const Context& c);
|
||||
|
||||
bool create();
|
||||
bool create(int dtype);
|
||||
size_t ndevices() const;
|
||||
const Device& device(size_t idx) const;
|
||||
Program getProg(const ProgramSource& prog,
|
||||
const String& buildopt, String& errmsg);
|
||||
|
||||
static Context& getDefault(bool initialize = true);
|
||||
void* ptr() const;
|
||||
|
||||
friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
|
||||
|
||||
bool useSVM() const;
|
||||
void setUseSVM(bool enabled);
|
||||
|
||||
struct Impl;
|
||||
Impl* p;
|
||||
};
|
||||
|
||||
class CV_EXPORTS Platform
|
||||
{
|
||||
public:
|
||||
Platform();
|
||||
~Platform();
|
||||
Platform(const Platform& p);
|
||||
Platform& operator = (const Platform& p);
|
||||
|
||||
void* ptr() const;
|
||||
static Platform& getDefault();
|
||||
|
||||
friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
|
||||
protected:
|
||||
struct Impl;
|
||||
Impl* p;
|
||||
};
|
||||
|
||||
// TODO Move to internal header
|
||||
void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
|
||||
|
||||
class CV_EXPORTS Queue
|
||||
{
|
||||
public:
|
||||
Queue();
|
||||
explicit Queue(const Context& c, const Device& d=Device());
|
||||
~Queue();
|
||||
Queue(const Queue& q);
|
||||
Queue& operator = (const Queue& q);
|
||||
|
||||
bool create(const Context& c=Context(), const Device& d=Device());
|
||||
void finish();
|
||||
void* ptr() const;
|
||||
static Queue& getDefault();
|
||||
|
||||
protected:
|
||||
struct Impl;
|
||||
Impl* p;
|
||||
};
|
||||
|
||||
|
||||
class CV_EXPORTS KernelArg
|
||||
{
|
||||
public:
|
||||
enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, PTR_ONLY = 16, NO_SIZE=256 };
|
||||
KernelArg(int _flags, UMat* _m, int wscale=1, int iwscale=1, const void* _obj=0, size_t _sz=0);
|
||||
KernelArg();
|
||||
|
||||
static KernelArg Local() { return KernelArg(LOCAL, 0); }
|
||||
static KernelArg PtrWriteOnly(const UMat& m)
|
||||
{ return KernelArg(PTR_ONLY+WRITE_ONLY, (UMat*)&m); }
|
||||
static KernelArg PtrReadOnly(const UMat& m)
|
||||
{ return KernelArg(PTR_ONLY+READ_ONLY, (UMat*)&m); }
|
||||
static KernelArg PtrReadWrite(const UMat& m)
|
||||
{ return KernelArg(PTR_ONLY+READ_WRITE, (UMat*)&m); }
|
||||
static KernelArg ReadWrite(const UMat& m, int wscale=1, int iwscale=1)
|
||||
{ return KernelArg(READ_WRITE, (UMat*)&m, wscale, iwscale); }
|
||||
static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1, int iwscale=1)
|
||||
{ return KernelArg(READ_WRITE+NO_SIZE, (UMat*)&m, wscale, iwscale); }
|
||||
static KernelArg ReadOnly(const UMat& m, int wscale=1, int iwscale=1)
|
||||
{ return KernelArg(READ_ONLY, (UMat*)&m, wscale, iwscale); }
|
||||
static KernelArg WriteOnly(const UMat& m, int wscale=1, int iwscale=1)
|
||||
{ return KernelArg(WRITE_ONLY, (UMat*)&m, wscale, iwscale); }
|
||||
static KernelArg ReadOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1)
|
||||
{ return KernelArg(READ_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); }
|
||||
static KernelArg WriteOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1)
|
||||
{ return KernelArg(WRITE_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); }
|
||||
static KernelArg Constant(const Mat& m);
|
||||
template<typename _Tp> static KernelArg Constant(const _Tp* arr, size_t n)
|
||||
{ return KernelArg(CONSTANT, 0, 1, 1, (void*)arr, n); }
|
||||
|
||||
int flags;
|
||||
UMat* m;
|
||||
const void* obj;
|
||||
size_t sz;
|
||||
int wscale, iwscale;
|
||||
};
|
||||
|
||||
|
||||
class CV_EXPORTS Kernel
|
||||
{
|
||||
public:
|
||||
Kernel();
|
||||
Kernel(const char* kname, const Program& prog);
|
||||
Kernel(const char* kname, const ProgramSource& prog,
|
||||
const String& buildopts = String(), String* errmsg=0);
|
||||
~Kernel();
|
||||
Kernel(const Kernel& k);
|
||||
Kernel& operator = (const Kernel& k);
|
||||
|
||||
bool empty() const;
|
||||
bool create(const char* kname, const Program& prog);
|
||||
bool create(const char* kname, const ProgramSource& prog,
|
||||
const String& buildopts, String* errmsg=0);
|
||||
|
||||
int set(int i, const void* value, size_t sz);
|
||||
int set(int i, const Image2D& image2D);
|
||||
int set(int i, const UMat& m);
|
||||
int set(int i, const KernelArg& arg);
|
||||
template<typename _Tp> int set(int i, const _Tp& value)
|
||||
{ return set(i, &value, sizeof(value)); }
|
||||
|
||||
template<typename _Tp0>
|
||||
Kernel& args(const _Tp0& a0)
|
||||
{
|
||||
set(0, a0); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1)
|
||||
{
|
||||
int i = set(0, a0); set(i, a1); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2)
|
||||
{
|
||||
int i = set(0, a0); i = set(i, a1); set(i, a2); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3)
|
||||
{
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
|
||||
const _Tp3& a3, const _Tp4& a4)
|
||||
{
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2);
|
||||
i = set(i, a3); set(i, a4); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2,
|
||||
typename _Tp3, typename _Tp4, typename _Tp5>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
|
||||
const _Tp3& a3, const _Tp4& a4, const _Tp5& a5)
|
||||
{
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2);
|
||||
i = set(i, a3); i = set(i, a4); set(i, a5); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
|
||||
typename _Tp4, typename _Tp5, typename _Tp6>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
|
||||
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6)
|
||||
{
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3);
|
||||
i = set(i, a4); i = set(i, a5); set(i, a6); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
|
||||
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
|
||||
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7)
|
||||
{
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3);
|
||||
i = set(i, a4); i = set(i, a5); i = set(i, a6); set(i, a7); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
|
||||
typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
|
||||
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
|
||||
const _Tp8& a8)
|
||||
{
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
|
||||
i = set(i, a5); i = set(i, a6); i = set(i, a7); set(i, a8); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
|
||||
typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
|
||||
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
|
||||
const _Tp8& a8, const _Tp9& a9)
|
||||
{
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
|
||||
i = set(i, a6); i = set(i, a7); i = set(i, a8); set(i, a9); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
|
||||
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
|
||||
typename _Tp8, typename _Tp9, typename _Tp10>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
|
||||
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
|
||||
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10)
|
||||
{
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
|
||||
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); set(i, a10); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
|
||||
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
|
||||
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
|
||||
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
|
||||
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11)
|
||||
{
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
|
||||
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); set(i, a11); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
|
||||
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
|
||||
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
|
||||
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
|
||||
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
|
||||
const _Tp12& a12)
|
||||
{
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
|
||||
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
|
||||
set(i, a12); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
|
||||
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
|
||||
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
|
||||
typename _Tp13>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
|
||||
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
|
||||
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
|
||||
const _Tp12& a12, const _Tp13& a13)
|
||||
{
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
|
||||
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
|
||||
i = set(i, a12); set(i, a13); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
|
||||
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
|
||||
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
|
||||
typename _Tp13, typename _Tp14>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
|
||||
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
|
||||
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
|
||||
const _Tp12& a12, const _Tp13& a13, const _Tp14& a14)
|
||||
{
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
|
||||
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
|
||||
i = set(i, a12); i = set(i, a13); set(i, a14); return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
|
||||
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
|
||||
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
|
||||
typename _Tp13, typename _Tp14, typename _Tp15>
|
||||
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
|
||||
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
|
||||
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
|
||||
const _Tp12& a12, const _Tp13& a13, const _Tp14& a14, const _Tp15& a15)
|
||||
{
|
||||
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
|
||||
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
|
||||
i = set(i, a12); i = set(i, a13); i = set(i, a14); set(i, a15); return *this;
|
||||
}
|
||||
|
||||
bool run(int dims, size_t globalsize[],
|
||||
size_t localsize[], bool sync, const Queue& q=Queue());
|
||||
bool runTask(bool sync, const Queue& q=Queue());
|
||||
|
||||
size_t workGroupSize() const;
|
||||
size_t preferedWorkGroupSizeMultiple() const;
|
||||
bool compileWorkGroupSize(size_t wsz[]) const;
|
||||
size_t localMemSize() const;
|
||||
|
||||
void* ptr() const;
|
||||
struct Impl;
|
||||
|
||||
protected:
|
||||
Impl* p;
|
||||
};
|
||||
|
||||
class CV_EXPORTS Program
|
||||
{
|
||||
public:
|
||||
Program();
|
||||
Program(const ProgramSource& src,
|
||||
const String& buildflags, String& errmsg);
|
||||
explicit Program(const String& buf);
|
||||
Program(const Program& prog);
|
||||
|
||||
Program& operator = (const Program& prog);
|
||||
~Program();
|
||||
|
||||
bool create(const ProgramSource& src,
|
||||
const String& buildflags, String& errmsg);
|
||||
bool read(const String& buf, const String& buildflags);
|
||||
bool write(String& buf) const;
|
||||
|
||||
const ProgramSource& source() const;
|
||||
void* ptr() const;
|
||||
|
||||
String getPrefix() const;
|
||||
static String getPrefix(const String& buildflags);
|
||||
|
||||
protected:
|
||||
struct Impl;
|
||||
Impl* p;
|
||||
};
|
||||
|
||||
|
||||
class CV_EXPORTS ProgramSource
|
||||
{
|
||||
public:
|
||||
typedef uint64 hash_t;
|
||||
|
||||
ProgramSource();
|
||||
explicit ProgramSource(const String& prog);
|
||||
explicit ProgramSource(const char* prog);
|
||||
~ProgramSource();
|
||||
ProgramSource(const ProgramSource& prog);
|
||||
ProgramSource& operator = (const ProgramSource& prog);
|
||||
|
||||
const String& source() const;
|
||||
hash_t hash() const;
|
||||
|
||||
protected:
|
||||
struct Impl;
|
||||
Impl* p;
|
||||
};
|
||||
|
||||
class CV_EXPORTS PlatformInfo
|
||||
{
|
||||
public:
|
||||
PlatformInfo();
|
||||
explicit PlatformInfo(void* id);
|
||||
~PlatformInfo();
|
||||
|
||||
PlatformInfo(const PlatformInfo& i);
|
||||
PlatformInfo& operator =(const PlatformInfo& i);
|
||||
|
||||
String name() const;
|
||||
String vendor() const;
|
||||
String version() const;
|
||||
int deviceNumber() const;
|
||||
void getDevice(Device& device, int d) const;
|
||||
|
||||
protected:
|
||||
struct Impl;
|
||||
Impl* p;
|
||||
};
|
||||
|
||||
CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf);
|
||||
CV_EXPORTS const char* typeToStr(int t);
|
||||
CV_EXPORTS const char* memopTypeToStr(int t);
|
||||
CV_EXPORTS const char* vecopTypeToStr(int t);
|
||||
CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL);
|
||||
CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info);
|
||||
|
||||
|
||||
enum OclVectorStrategy
|
||||
{
|
||||
// all matrices have its own vector width
|
||||
OCL_VECTOR_OWN = 0,
|
||||
// all matrices have maximal vector width among all matrices
|
||||
// (useful for cases when matrices have different data types)
|
||||
OCL_VECTOR_MAX = 1,
|
||||
|
||||
// default strategy
|
||||
OCL_VECTOR_DEFAULT = OCL_VECTOR_OWN
|
||||
};
|
||||
|
||||
CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
|
||||
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
|
||||
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
|
||||
OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
|
||||
|
||||
CV_EXPORTS int checkOptimalVectorWidth(const int *vectorWidths,
|
||||
InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
|
||||
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
|
||||
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
|
||||
OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
|
||||
|
||||
// with OCL_VECTOR_MAX strategy
|
||||
CV_EXPORTS int predictOptimalVectorWidthMax(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
|
||||
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
|
||||
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray());
|
||||
|
||||
CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m);
|
||||
|
||||
class CV_EXPORTS Image2D
|
||||
{
|
||||
public:
|
||||
Image2D();
|
||||
|
||||
// src: The UMat from which to get image properties and data
|
||||
// norm: Flag to enable the use of normalized channel data types
|
||||
// alias: Flag indicating that the image should alias the src UMat.
|
||||
// If true, changes to the image or src will be reflected in
|
||||
// both objects.
|
||||
explicit Image2D(const UMat &src, bool norm = false, bool alias = false);
|
||||
Image2D(const Image2D & i);
|
||||
~Image2D();
|
||||
|
||||
Image2D & operator = (const Image2D & i);
|
||||
|
||||
// Indicates if creating an aliased image should succeed. Depends on the
|
||||
// underlying platform and the dimensions of the UMat.
|
||||
static bool canCreateAlias(const UMat &u);
|
||||
|
||||
// Indicates if the image format is supported.
|
||||
static bool isFormatSupported(int depth, int cn, bool norm);
|
||||
|
||||
void* ptr() const;
|
||||
protected:
|
||||
struct Impl;
|
||||
Impl* p;
|
||||
};
|
||||
|
||||
|
||||
CV_EXPORTS MatAllocator* getOpenCLAllocator();
|
||||
|
||||
|
||||
#ifdef __OPENCV_BUILD
|
||||
namespace internal {
|
||||
|
||||
CV_EXPORTS bool isPerformanceCheckBypassed();
|
||||
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
|
||||
|
||||
CV_EXPORTS bool isCLBuffer(UMat& u);
|
||||
|
||||
} // namespace internal
|
||||
#endif
|
||||
|
||||
//! @}
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
64
3rdparty/include/opencv2/core/ocl_genbase.hpp
vendored
Normal file
64
3rdparty/include/opencv2/core/ocl_genbase.hpp
vendored
Normal file
@ -0,0 +1,64 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the OpenCV Foundation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_OPENCL_GENBASE_HPP__
|
||||
#define __OPENCV_OPENCL_GENBASE_HPP__
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
struct ProgramEntry
|
||||
{
|
||||
const char* name;
|
||||
const char* programStr;
|
||||
const char* programHash;
|
||||
};
|
||||
|
||||
//! @endcond
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
684
3rdparty/include/opencv2/core/opengl.hpp
vendored
Normal file
684
3rdparty/include/opencv2/core/opengl.hpp
vendored
Normal file
@ -0,0 +1,684 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_OPENGL_HPP__
|
||||
#define __OPENCV_CORE_OPENGL_HPP__
|
||||
|
||||
#ifndef __cplusplus
|
||||
# error opengl.hpp header must be compiled as C++
|
||||
#endif
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
|
||||
namespace cv { namespace ogl {
|
||||
|
||||
/** @addtogroup core_opengl
|
||||
This section describes OpenGL interoperability.
|
||||
|
||||
To enable OpenGL support, configure OpenCV using CMake with WITH_OPENGL=ON . Currently OpenGL is
|
||||
supported only with WIN32, GTK and Qt backends on Windows and Linux (MacOS and Android are not
|
||||
supported). For GTK backend gtkglext-1.0 library is required.
|
||||
|
||||
To use OpenGL functionality you should first create OpenGL context (window or frame buffer). You can
|
||||
do this with namedWindow function or with other OpenGL toolkit (GLUT, for example).
|
||||
*/
|
||||
//! @{
|
||||
|
||||
/////////////////// OpenGL Objects ///////////////////
|
||||
|
||||
/** @brief Smart pointer for OpenGL buffer object with reference counting.
|
||||
|
||||
Buffer Objects are OpenGL objects that store an array of unformatted memory allocated by the OpenGL
|
||||
context. These can be used to store vertex data, pixel data retrieved from images or the
|
||||
framebuffer, and a variety of other things.
|
||||
|
||||
ogl::Buffer has interface similar with Mat interface and represents 2D array memory.
|
||||
|
||||
ogl::Buffer supports memory transfers between host and device and also can be mapped to CUDA memory.
|
||||
*/
|
||||
class CV_EXPORTS Buffer
|
||||
{
|
||||
public:
|
||||
/** @brief The target defines how you intend to use the buffer object.
|
||||
*/
|
||||
enum Target
|
||||
{
|
||||
ARRAY_BUFFER = 0x8892, //!< The buffer will be used as a source for vertex data
|
||||
ELEMENT_ARRAY_BUFFER = 0x8893, //!< The buffer will be used for indices (in glDrawElements, for example)
|
||||
PIXEL_PACK_BUFFER = 0x88EB, //!< The buffer will be used for reading from OpenGL textures
|
||||
PIXEL_UNPACK_BUFFER = 0x88EC //!< The buffer will be used for writing to OpenGL textures
|
||||
};
|
||||
|
||||
enum Access
|
||||
{
|
||||
READ_ONLY = 0x88B8,
|
||||
WRITE_ONLY = 0x88B9,
|
||||
READ_WRITE = 0x88BA
|
||||
};
|
||||
|
||||
/** @brief The constructors.
|
||||
|
||||
Creates empty ogl::Buffer object, creates ogl::Buffer object from existed buffer ( abufId
|
||||
parameter), allocates memory for ogl::Buffer object or copies from host/device memory.
|
||||
*/
|
||||
Buffer();
|
||||
|
||||
/** @overload
|
||||
@param arows Number of rows in a 2D array.
|
||||
@param acols Number of columns in a 2D array.
|
||||
@param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
|
||||
@param abufId Buffer object name.
|
||||
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
|
||||
*/
|
||||
Buffer(int arows, int acols, int atype, unsigned int abufId, bool autoRelease = false);
|
||||
|
||||
/** @overload
|
||||
@param asize 2D array size.
|
||||
@param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
|
||||
@param abufId Buffer object name.
|
||||
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
|
||||
*/
|
||||
Buffer(Size asize, int atype, unsigned int abufId, bool autoRelease = false);
|
||||
|
||||
/** @overload
|
||||
@param arows Number of rows in a 2D array.
|
||||
@param acols Number of columns in a 2D array.
|
||||
@param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
|
||||
@param target Buffer usage. See cv::ogl::Buffer::Target .
|
||||
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
|
||||
*/
|
||||
Buffer(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
|
||||
|
||||
/** @overload
|
||||
@param asize 2D array size.
|
||||
@param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
|
||||
@param target Buffer usage. See cv::ogl::Buffer::Target .
|
||||
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
|
||||
*/
|
||||
Buffer(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
|
||||
|
||||
/** @overload
|
||||
@param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or std::vector ).
|
||||
@param target Buffer usage. See cv::ogl::Buffer::Target .
|
||||
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
|
||||
*/
|
||||
explicit Buffer(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false);
|
||||
|
||||
/** @brief Allocates memory for ogl::Buffer object.
|
||||
|
||||
@param arows Number of rows in a 2D array.
|
||||
@param acols Number of columns in a 2D array.
|
||||
@param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
|
||||
@param target Buffer usage. See cv::ogl::Buffer::Target .
|
||||
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
|
||||
*/
|
||||
void create(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
|
||||
|
||||
/** @overload
|
||||
@param asize 2D array size.
|
||||
@param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
|
||||
@param target Buffer usage. See cv::ogl::Buffer::Target .
|
||||
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
|
||||
*/
|
||||
void create(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
|
||||
|
||||
/** @brief Decrements the reference counter and destroys the buffer object if needed.
|
||||
|
||||
The function will call setAutoRelease(true) .
|
||||
*/
|
||||
void release();
|
||||
|
||||
/** @brief Sets auto release mode.
|
||||
|
||||
The lifetime of the OpenGL object is tied to the lifetime of the context. If OpenGL context was
|
||||
bound to a window it could be released at any time (user can close a window). If object's destructor
|
||||
is called after destruction of the context it will cause an error. Thus ogl::Buffer doesn't destroy
|
||||
OpenGL object in destructor by default (all OpenGL resources will be released with OpenGL context).
|
||||
This function can force ogl::Buffer destructor to destroy OpenGL object.
|
||||
@param flag Auto release mode (if true, release will be called in object's destructor).
|
||||
*/
|
||||
void setAutoRelease(bool flag);
|
||||
|
||||
/** @brief Copies from host/device memory to OpenGL buffer.
|
||||
@param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or std::vector ).
|
||||
@param target Buffer usage. See cv::ogl::Buffer::Target .
|
||||
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
|
||||
*/
|
||||
void copyFrom(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false);
|
||||
|
||||
/** @overload */
|
||||
void copyFrom(InputArray arr, cuda::Stream& stream, Target target = ARRAY_BUFFER, bool autoRelease = false);
|
||||
|
||||
/** @brief Copies from OpenGL buffer to host/device memory or another OpenGL buffer object.
|
||||
|
||||
@param arr Destination array (host or device memory, can be Mat , cuda::GpuMat , std::vector or
|
||||
ogl::Buffer ).
|
||||
*/
|
||||
void copyTo(OutputArray arr) const;
|
||||
|
||||
/** @overload */
|
||||
void copyTo(OutputArray arr, cuda::Stream& stream) const;
|
||||
|
||||
/** @brief Creates a full copy of the buffer object and the underlying data.
|
||||
|
||||
@param target Buffer usage for destination buffer.
|
||||
@param autoRelease Auto release mode for destination buffer.
|
||||
*/
|
||||
Buffer clone(Target target = ARRAY_BUFFER, bool autoRelease = false) const;
|
||||
|
||||
/** @brief Binds OpenGL buffer to the specified buffer binding point.
|
||||
|
||||
@param target Binding point. See cv::ogl::Buffer::Target .
|
||||
*/
|
||||
void bind(Target target) const;
|
||||
|
||||
/** @brief Unbind any buffers from the specified binding point.
|
||||
|
||||
@param target Binding point. See cv::ogl::Buffer::Target .
|
||||
*/
|
||||
static void unbind(Target target);
|
||||
|
||||
/** @brief Maps OpenGL buffer to host memory.
|
||||
|
||||
mapHost maps to the client's address space the entire data store of the buffer object. The data can
|
||||
then be directly read and/or written relative to the returned pointer, depending on the specified
|
||||
access policy.
|
||||
|
||||
A mapped data store must be unmapped with ogl::Buffer::unmapHost before its buffer object is used.
|
||||
|
||||
This operation can lead to memory transfers between host and device.
|
||||
|
||||
Only one buffer object can be mapped at a time.
|
||||
@param access Access policy, indicating whether it will be possible to read from, write to, or both
|
||||
read from and write to the buffer object's mapped data store. The symbolic constant must be
|
||||
ogl::Buffer::READ_ONLY , ogl::Buffer::WRITE_ONLY or ogl::Buffer::READ_WRITE .
|
||||
*/
|
||||
Mat mapHost(Access access);
|
||||
|
||||
/** @brief Unmaps OpenGL buffer.
|
||||
*/
|
||||
void unmapHost();
|
||||
|
||||
//! map to device memory (blocking)
|
||||
cuda::GpuMat mapDevice();
|
||||
void unmapDevice();
|
||||
|
||||
/** @brief Maps OpenGL buffer to CUDA device memory.
|
||||
|
||||
This operatation doesn't copy data. Several buffer objects can be mapped to CUDA memory at a time.
|
||||
|
||||
A mapped data store must be unmapped with ogl::Buffer::unmapDevice before its buffer object is used.
|
||||
*/
|
||||
cuda::GpuMat mapDevice(cuda::Stream& stream);
|
||||
|
||||
/** @brief Unmaps OpenGL buffer.
|
||||
*/
|
||||
void unmapDevice(cuda::Stream& stream);
|
||||
|
||||
int rows() const;
|
||||
int cols() const;
|
||||
Size size() const;
|
||||
bool empty() const;
|
||||
|
||||
int type() const;
|
||||
int depth() const;
|
||||
int channels() const;
|
||||
int elemSize() const;
|
||||
int elemSize1() const;
|
||||
|
||||
//! get OpenGL opject id
|
||||
unsigned int bufId() const;
|
||||
|
||||
class Impl;
|
||||
|
||||
private:
|
||||
Ptr<Impl> impl_;
|
||||
int rows_;
|
||||
int cols_;
|
||||
int type_;
|
||||
};
|
||||
|
||||
/** @brief Smart pointer for OpenGL 2D texture memory with reference counting.
|
||||
*/
|
||||
class CV_EXPORTS Texture2D
|
||||
{
|
||||
public:
|
||||
/** @brief An Image Format describes the way that the images in Textures store their data.
|
||||
*/
|
||||
enum Format
|
||||
{
|
||||
NONE = 0,
|
||||
DEPTH_COMPONENT = 0x1902, //!< Depth
|
||||
RGB = 0x1907, //!< Red, Green, Blue
|
||||
RGBA = 0x1908 //!< Red, Green, Blue, Alpha
|
||||
};
|
||||
|
||||
/** @brief The constructors.
|
||||
|
||||
Creates empty ogl::Texture2D object, allocates memory for ogl::Texture2D object or copies from
|
||||
host/device memory.
|
||||
*/
|
||||
Texture2D();
|
||||
|
||||
/** @overload */
|
||||
Texture2D(int arows, int acols, Format aformat, unsigned int atexId, bool autoRelease = false);
|
||||
|
||||
/** @overload */
|
||||
Texture2D(Size asize, Format aformat, unsigned int atexId, bool autoRelease = false);
|
||||
|
||||
/** @overload
|
||||
@param arows Number of rows.
|
||||
@param acols Number of columns.
|
||||
@param aformat Image format. See cv::ogl::Texture2D::Format .
|
||||
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
|
||||
*/
|
||||
Texture2D(int arows, int acols, Format aformat, bool autoRelease = false);
|
||||
|
||||
/** @overload
|
||||
@param asize 2D array size.
|
||||
@param aformat Image format. See cv::ogl::Texture2D::Format .
|
||||
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
|
||||
*/
|
||||
Texture2D(Size asize, Format aformat, bool autoRelease = false);
|
||||
|
||||
/** @overload
|
||||
@param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or ogl::Buffer ).
|
||||
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
|
||||
*/
|
||||
explicit Texture2D(InputArray arr, bool autoRelease = false);
|
||||
|
||||
/** @brief Allocates memory for ogl::Texture2D object.
|
||||
|
||||
@param arows Number of rows.
|
||||
@param acols Number of columns.
|
||||
@param aformat Image format. See cv::ogl::Texture2D::Format .
|
||||
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
|
||||
*/
|
||||
void create(int arows, int acols, Format aformat, bool autoRelease = false);
|
||||
/** @overload
|
||||
@param asize 2D array size.
|
||||
@param aformat Image format. See cv::ogl::Texture2D::Format .
|
||||
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
|
||||
*/
|
||||
void create(Size asize, Format aformat, bool autoRelease = false);
|
||||
|
||||
/** @brief Decrements the reference counter and destroys the texture object if needed.
|
||||
|
||||
The function will call setAutoRelease(true) .
|
||||
*/
|
||||
void release();
|
||||
|
||||
/** @brief Sets auto release mode.
|
||||
|
||||
@param flag Auto release mode (if true, release will be called in object's destructor).
|
||||
|
||||
The lifetime of the OpenGL object is tied to the lifetime of the context. If OpenGL context was
|
||||
bound to a window it could be released at any time (user can close a window). If object's destructor
|
||||
is called after destruction of the context it will cause an error. Thus ogl::Texture2D doesn't
|
||||
destroy OpenGL object in destructor by default (all OpenGL resources will be released with OpenGL
|
||||
context). This function can force ogl::Texture2D destructor to destroy OpenGL object.
|
||||
*/
|
||||
void setAutoRelease(bool flag);
|
||||
|
||||
/** @brief Copies from host/device memory to OpenGL texture.
|
||||
|
||||
@param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or ogl::Buffer ).
|
||||
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
|
||||
*/
|
||||
void copyFrom(InputArray arr, bool autoRelease = false);
|
||||
|
||||
/** @brief Copies from OpenGL texture to host/device memory or another OpenGL texture object.
|
||||
|
||||
@param arr Destination array (host or device memory, can be Mat , cuda::GpuMat , ogl::Buffer or
|
||||
ogl::Texture2D ).
|
||||
@param ddepth Destination depth.
|
||||
@param autoRelease Auto release mode for destination buffer (if arr is OpenGL buffer or texture).
|
||||
*/
|
||||
void copyTo(OutputArray arr, int ddepth = CV_32F, bool autoRelease = false) const;
|
||||
|
||||
/** @brief Binds texture to current active texture unit for GL_TEXTURE_2D target.
|
||||
*/
|
||||
void bind() const;
|
||||
|
||||
int rows() const;
|
||||
int cols() const;
|
||||
Size size() const;
|
||||
bool empty() const;
|
||||
|
||||
Format format() const;
|
||||
|
||||
//! get OpenGL opject id
|
||||
unsigned int texId() const;
|
||||
|
||||
class Impl;
|
||||
|
||||
private:
|
||||
Ptr<Impl> impl_;
|
||||
int rows_;
|
||||
int cols_;
|
||||
Format format_;
|
||||
};
|
||||
|
||||
/** @brief Wrapper for OpenGL Client-Side Vertex arrays.
|
||||
|
||||
ogl::Arrays stores vertex data in ogl::Buffer objects.
|
||||
*/
|
||||
class CV_EXPORTS Arrays
|
||||
{
|
||||
public:
|
||||
/** @brief Default constructor
|
||||
*/
|
||||
Arrays();
|
||||
|
||||
/** @brief Sets an array of vertex coordinates.
|
||||
@param vertex array with vertex coordinates, can be both host and device memory.
|
||||
*/
|
||||
void setVertexArray(InputArray vertex);
|
||||
|
||||
/** @brief Resets vertex coordinates.
|
||||
*/
|
||||
void resetVertexArray();
|
||||
|
||||
/** @brief Sets an array of vertex colors.
|
||||
@param color array with vertex colors, can be both host and device memory.
|
||||
*/
|
||||
void setColorArray(InputArray color);
|
||||
|
||||
/** @brief Resets vertex colors.
|
||||
*/
|
||||
void resetColorArray();
|
||||
|
||||
/** @brief Sets an array of vertex normals.
|
||||
@param normal array with vertex normals, can be both host and device memory.
|
||||
*/
|
||||
void setNormalArray(InputArray normal);
|
||||
|
||||
/** @brief Resets vertex normals.
|
||||
*/
|
||||
void resetNormalArray();
|
||||
|
||||
/** @brief Sets an array of vertex texture coordinates.
|
||||
@param texCoord array with vertex texture coordinates, can be both host and device memory.
|
||||
*/
|
||||
void setTexCoordArray(InputArray texCoord);
|
||||
|
||||
/** @brief Resets vertex texture coordinates.
|
||||
*/
|
||||
void resetTexCoordArray();
|
||||
|
||||
/** @brief Releases all inner buffers.
|
||||
*/
|
||||
void release();
|
||||
|
||||
/** @brief Sets auto release mode all inner buffers.
|
||||
@param flag Auto release mode.
|
||||
*/
|
||||
void setAutoRelease(bool flag);
|
||||
|
||||
/** @brief Binds all vertex arrays.
|
||||
*/
|
||||
void bind() const;
|
||||
|
||||
/** @brief Returns the vertex count.
|
||||
*/
|
||||
int size() const;
|
||||
bool empty() const;
|
||||
|
||||
private:
|
||||
int size_;
|
||||
Buffer vertex_;
|
||||
Buffer color_;
|
||||
Buffer normal_;
|
||||
Buffer texCoord_;
|
||||
};
|
||||
|
||||
/////////////////// Render Functions ///////////////////
|
||||
|
||||
//! render mode
|
||||
enum RenderModes {
|
||||
POINTS = 0x0000,
|
||||
LINES = 0x0001,
|
||||
LINE_LOOP = 0x0002,
|
||||
LINE_STRIP = 0x0003,
|
||||
TRIANGLES = 0x0004,
|
||||
TRIANGLE_STRIP = 0x0005,
|
||||
TRIANGLE_FAN = 0x0006,
|
||||
QUADS = 0x0007,
|
||||
QUAD_STRIP = 0x0008,
|
||||
POLYGON = 0x0009
|
||||
};
|
||||
|
||||
/** @brief Render OpenGL texture or primitives.
|
||||
@param tex Texture to draw.
|
||||
@param wndRect Region of window, where to draw a texture (normalized coordinates).
|
||||
@param texRect Region of texture to draw (normalized coordinates).
|
||||
*/
|
||||
CV_EXPORTS void render(const Texture2D& tex,
|
||||
Rect_<double> wndRect = Rect_<double>(0.0, 0.0, 1.0, 1.0),
|
||||
Rect_<double> texRect = Rect_<double>(0.0, 0.0, 1.0, 1.0));
|
||||
|
||||
/** @overload
|
||||
@param arr Array of privitives vertices.
|
||||
@param mode Render mode. One of cv::ogl::RenderModes
|
||||
@param color Color for all vertices. Will be used if arr doesn't contain color array.
|
||||
*/
|
||||
CV_EXPORTS void render(const Arrays& arr, int mode = POINTS, Scalar color = Scalar::all(255));
|
||||
|
||||
/** @overload
|
||||
@param arr Array of privitives vertices.
|
||||
@param indices Array of vertices indices (host or device memory).
|
||||
@param mode Render mode. One of cv::ogl::RenderModes
|
||||
@param color Color for all vertices. Will be used if arr doesn't contain color array.
|
||||
*/
|
||||
CV_EXPORTS void render(const Arrays& arr, InputArray indices, int mode = POINTS, Scalar color = Scalar::all(255));
|
||||
|
||||
//! @} core_opengl
|
||||
|
||||
}} // namespace cv::ogl
|
||||
|
||||
namespace cv { namespace cuda {
|
||||
|
||||
//! @addtogroup cuda
|
||||
//! @{
|
||||
|
||||
/** @brief Sets a CUDA device and initializes it for the current thread with OpenGL interoperability.
|
||||
|
||||
This function should be explicitly called after OpenGL context creation and before any CUDA calls.
|
||||
@param device System index of a CUDA device starting with 0.
|
||||
@ingroup core_opengl
|
||||
*/
|
||||
CV_EXPORTS void setGlDevice(int device = 0);
|
||||
|
||||
//! @}
|
||||
|
||||
}}
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline
|
||||
cv::ogl::Buffer::Buffer(int arows, int acols, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
|
||||
{
|
||||
create(arows, acols, atype, target, autoRelease);
|
||||
}
|
||||
|
||||
inline
|
||||
cv::ogl::Buffer::Buffer(Size asize, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
|
||||
{
|
||||
create(asize, atype, target, autoRelease);
|
||||
}
|
||||
|
||||
inline
|
||||
void cv::ogl::Buffer::create(Size asize, int atype, Target target, bool autoRelease)
|
||||
{
|
||||
create(asize.height, asize.width, atype, target, autoRelease);
|
||||
}
|
||||
|
||||
inline
|
||||
int cv::ogl::Buffer::rows() const
|
||||
{
|
||||
return rows_;
|
||||
}
|
||||
|
||||
inline
|
||||
int cv::ogl::Buffer::cols() const
|
||||
{
|
||||
return cols_;
|
||||
}
|
||||
|
||||
inline
|
||||
cv::Size cv::ogl::Buffer::size() const
|
||||
{
|
||||
return Size(cols_, rows_);
|
||||
}
|
||||
|
||||
inline
|
||||
bool cv::ogl::Buffer::empty() const
|
||||
{
|
||||
return rows_ == 0 || cols_ == 0;
|
||||
}
|
||||
|
||||
inline
|
||||
int cv::ogl::Buffer::type() const
|
||||
{
|
||||
return type_;
|
||||
}
|
||||
|
||||
inline
|
||||
int cv::ogl::Buffer::depth() const
|
||||
{
|
||||
return CV_MAT_DEPTH(type_);
|
||||
}
|
||||
|
||||
inline
|
||||
int cv::ogl::Buffer::channels() const
|
||||
{
|
||||
return CV_MAT_CN(type_);
|
||||
}
|
||||
|
||||
inline
|
||||
int cv::ogl::Buffer::elemSize() const
|
||||
{
|
||||
return CV_ELEM_SIZE(type_);
|
||||
}
|
||||
|
||||
inline
|
||||
int cv::ogl::Buffer::elemSize1() const
|
||||
{
|
||||
return CV_ELEM_SIZE1(type_);
|
||||
}
|
||||
|
||||
///////
|
||||
|
||||
inline
|
||||
cv::ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
|
||||
{
|
||||
create(arows, acols, aformat, autoRelease);
|
||||
}
|
||||
|
||||
inline
|
||||
cv::ogl::Texture2D::Texture2D(Size asize, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
|
||||
{
|
||||
create(asize, aformat, autoRelease);
|
||||
}
|
||||
|
||||
inline
|
||||
void cv::ogl::Texture2D::create(Size asize, Format aformat, bool autoRelease)
|
||||
{
|
||||
create(asize.height, asize.width, aformat, autoRelease);
|
||||
}
|
||||
|
||||
inline
|
||||
int cv::ogl::Texture2D::rows() const
|
||||
{
|
||||
return rows_;
|
||||
}
|
||||
|
||||
inline
|
||||
int cv::ogl::Texture2D::cols() const
|
||||
{
|
||||
return cols_;
|
||||
}
|
||||
|
||||
inline
|
||||
cv::Size cv::ogl::Texture2D::size() const
|
||||
{
|
||||
return Size(cols_, rows_);
|
||||
}
|
||||
|
||||
inline
|
||||
bool cv::ogl::Texture2D::empty() const
|
||||
{
|
||||
return rows_ == 0 || cols_ == 0;
|
||||
}
|
||||
|
||||
inline
|
||||
cv::ogl::Texture2D::Format cv::ogl::Texture2D::format() const
|
||||
{
|
||||
return format_;
|
||||
}
|
||||
|
||||
///////
|
||||
|
||||
inline
|
||||
cv::ogl::Arrays::Arrays() : size_(0)
|
||||
{
|
||||
}
|
||||
|
||||
inline
|
||||
int cv::ogl::Arrays::size() const
|
||||
{
|
||||
return size_;
|
||||
}
|
||||
|
||||
inline
|
||||
bool cv::ogl::Arrays::empty() const
|
||||
{
|
||||
return size_ == 0;
|
||||
}
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif /* __OPENCV_CORE_OPENGL_HPP__ */
|
530
3rdparty/include/opencv2/core/operations.hpp
vendored
Normal file
530
3rdparty/include/opencv2/core/operations.hpp
vendored
Normal file
@ -0,0 +1,530 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_OPERATIONS_HPP__
|
||||
#define __OPENCV_CORE_OPERATIONS_HPP__
|
||||
|
||||
#ifndef __cplusplus
|
||||
# error operations.hpp header must be compiled as C++
|
||||
#endif
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
////////////////////////////// Matx methods depending on core API /////////////////////////////
|
||||
|
||||
namespace internal
|
||||
{
|
||||
|
||||
template<typename _Tp, int m> struct Matx_FastInvOp
|
||||
{
|
||||
bool operator()(const Matx<_Tp, m, m>& a, Matx<_Tp, m, m>& b, int method) const
|
||||
{
|
||||
Matx<_Tp, m, m> temp = a;
|
||||
|
||||
// assume that b is all 0's on input => make it a unity matrix
|
||||
for( int i = 0; i < m; i++ )
|
||||
b(i, i) = (_Tp)1;
|
||||
|
||||
if( method == DECOMP_CHOLESKY )
|
||||
return Cholesky(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m);
|
||||
|
||||
return LU(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m) != 0;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename _Tp> struct Matx_FastInvOp<_Tp, 2>
|
||||
{
|
||||
bool operator()(const Matx<_Tp, 2, 2>& a, Matx<_Tp, 2, 2>& b, int) const
|
||||
{
|
||||
_Tp d = determinant(a);
|
||||
if( d == 0 )
|
||||
return false;
|
||||
d = 1/d;
|
||||
b(1,1) = a(0,0)*d;
|
||||
b(0,0) = a(1,1)*d;
|
||||
b(0,1) = -a(0,1)*d;
|
||||
b(1,0) = -a(1,0)*d;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename _Tp> struct Matx_FastInvOp<_Tp, 3>
|
||||
{
|
||||
bool operator()(const Matx<_Tp, 3, 3>& a, Matx<_Tp, 3, 3>& b, int) const
|
||||
{
|
||||
_Tp d = (_Tp)determinant(a);
|
||||
if( d == 0 )
|
||||
return false;
|
||||
d = 1/d;
|
||||
b(0,0) = (a(1,1) * a(2,2) - a(1,2) * a(2,1)) * d;
|
||||
b(0,1) = (a(0,2) * a(2,1) - a(0,1) * a(2,2)) * d;
|
||||
b(0,2) = (a(0,1) * a(1,2) - a(0,2) * a(1,1)) * d;
|
||||
|
||||
b(1,0) = (a(1,2) * a(2,0) - a(1,0) * a(2,2)) * d;
|
||||
b(1,1) = (a(0,0) * a(2,2) - a(0,2) * a(2,0)) * d;
|
||||
b(1,2) = (a(0,2) * a(1,0) - a(0,0) * a(1,2)) * d;
|
||||
|
||||
b(2,0) = (a(1,0) * a(2,1) - a(1,1) * a(2,0)) * d;
|
||||
b(2,1) = (a(0,1) * a(2,0) - a(0,0) * a(2,1)) * d;
|
||||
b(2,2) = (a(0,0) * a(1,1) - a(0,1) * a(1,0)) * d;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename _Tp, int m, int n> struct Matx_FastSolveOp
|
||||
{
|
||||
bool operator()(const Matx<_Tp, m, m>& a, const Matx<_Tp, m, n>& b,
|
||||
Matx<_Tp, m, n>& x, int method) const
|
||||
{
|
||||
Matx<_Tp, m, m> temp = a;
|
||||
x = b;
|
||||
if( method == DECOMP_CHOLESKY )
|
||||
return Cholesky(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n);
|
||||
|
||||
return LU(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n) != 0;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename _Tp> struct Matx_FastSolveOp<_Tp, 2, 1>
|
||||
{
|
||||
bool operator()(const Matx<_Tp, 2, 2>& a, const Matx<_Tp, 2, 1>& b,
|
||||
Matx<_Tp, 2, 1>& x, int) const
|
||||
{
|
||||
_Tp d = determinant(a);
|
||||
if( d == 0 )
|
||||
return false;
|
||||
d = 1/d;
|
||||
x(0) = (b(0)*a(1,1) - b(1)*a(0,1))*d;
|
||||
x(1) = (b(1)*a(0,0) - b(0)*a(1,0))*d;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename _Tp> struct Matx_FastSolveOp<_Tp, 3, 1>
|
||||
{
|
||||
bool operator()(const Matx<_Tp, 3, 3>& a, const Matx<_Tp, 3, 1>& b,
|
||||
Matx<_Tp, 3, 1>& x, int) const
|
||||
{
|
||||
_Tp d = (_Tp)determinant(a);
|
||||
if( d == 0 )
|
||||
return false;
|
||||
d = 1/d;
|
||||
x(0) = d*(b(0)*(a(1,1)*a(2,2) - a(1,2)*a(2,1)) -
|
||||
a(0,1)*(b(1)*a(2,2) - a(1,2)*b(2)) +
|
||||
a(0,2)*(b(1)*a(2,1) - a(1,1)*b(2)));
|
||||
|
||||
x(1) = d*(a(0,0)*(b(1)*a(2,2) - a(1,2)*b(2)) -
|
||||
b(0)*(a(1,0)*a(2,2) - a(1,2)*a(2,0)) +
|
||||
a(0,2)*(a(1,0)*b(2) - b(1)*a(2,0)));
|
||||
|
||||
x(2) = d*(a(0,0)*(a(1,1)*b(2) - b(1)*a(2,1)) -
|
||||
a(0,1)*(a(1,0)*b(2) - b(1)*a(2,0)) +
|
||||
b(0)*(a(1,0)*a(2,1) - a(1,1)*a(2,0)));
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // internal
|
||||
|
||||
template<typename _Tp, int m, int n> inline
|
||||
Matx<_Tp,m,n> Matx<_Tp,m,n>::randu(_Tp a, _Tp b)
|
||||
{
|
||||
Matx<_Tp,m,n> M;
|
||||
cv::randu(M, Scalar(a), Scalar(b));
|
||||
return M;
|
||||
}
|
||||
|
||||
template<typename _Tp, int m, int n> inline
|
||||
Matx<_Tp,m,n> Matx<_Tp,m,n>::randn(_Tp a, _Tp b)
|
||||
{
|
||||
Matx<_Tp,m,n> M;
|
||||
cv::randn(M, Scalar(a), Scalar(b));
|
||||
return M;
|
||||
}
|
||||
|
||||
template<typename _Tp, int m, int n> inline
|
||||
Matx<_Tp, n, m> Matx<_Tp, m, n>::inv(int method, bool *p_is_ok /*= NULL*/) const
|
||||
{
|
||||
Matx<_Tp, n, m> b;
|
||||
bool ok;
|
||||
if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
|
||||
ok = cv::internal::Matx_FastInvOp<_Tp, m>()(*this, b, method);
|
||||
else
|
||||
{
|
||||
Mat A(*this, false), B(b, false);
|
||||
ok = (invert(A, B, method) != 0);
|
||||
}
|
||||
if( NULL != p_is_ok ) { *p_is_ok = ok; }
|
||||
return ok ? b : Matx<_Tp, n, m>::zeros();
|
||||
}
|
||||
|
||||
template<typename _Tp, int m, int n> template<int l> inline
|
||||
Matx<_Tp, n, l> Matx<_Tp, m, n>::solve(const Matx<_Tp, m, l>& rhs, int method) const
|
||||
{
|
||||
Matx<_Tp, n, l> x;
|
||||
bool ok;
|
||||
if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
|
||||
ok = cv::internal::Matx_FastSolveOp<_Tp, m, l>()(*this, rhs, x, method);
|
||||
else
|
||||
{
|
||||
Mat A(*this, false), B(rhs, false), X(x, false);
|
||||
ok = cv::solve(A, B, X, method);
|
||||
}
|
||||
|
||||
return ok ? x : Matx<_Tp, n, l>::zeros();
|
||||
}
|
||||
|
||||
|
||||
|
||||
////////////////////////// Augmenting algebraic & logical operations //////////////////////////
|
||||
|
||||
#define CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \
|
||||
static inline A& operator op (A& a, const B& b) { cvop; return a; }
|
||||
|
||||
#define CV_MAT_AUG_OPERATOR(op, cvop, A, B) \
|
||||
CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \
|
||||
CV_MAT_AUG_OPERATOR1(op, cvop, const A, B)
|
||||
|
||||
#define CV_MAT_AUG_OPERATOR_T(op, cvop, A, B) \
|
||||
template<typename _Tp> CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \
|
||||
template<typename _Tp> CV_MAT_AUG_OPERATOR1(op, cvop, const A, B)
|
||||
|
||||
CV_MAT_AUG_OPERATOR (+=, cv::add(a,b,a), Mat, Mat)
|
||||
CV_MAT_AUG_OPERATOR (+=, cv::add(a,b,a), Mat, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
|
||||
|
||||
CV_MAT_AUG_OPERATOR (-=, cv::subtract(a,b,a), Mat, Mat)
|
||||
CV_MAT_AUG_OPERATOR (-=, cv::subtract(a,b,a), Mat, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
|
||||
|
||||
CV_MAT_AUG_OPERATOR (*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat_<_Tp>)
|
||||
CV_MAT_AUG_OPERATOR (*=, a.convertTo(a, -1, b), Mat, double)
|
||||
CV_MAT_AUG_OPERATOR_T(*=, a.convertTo(a, -1, b), Mat_<_Tp>, double)
|
||||
|
||||
CV_MAT_AUG_OPERATOR (/=, cv::divide(a,b,a), Mat, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a,b,a), Mat_<_Tp>, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
|
||||
CV_MAT_AUG_OPERATOR (/=, a.convertTo((Mat&)a, -1, 1./b), Mat, double)
|
||||
CV_MAT_AUG_OPERATOR_T(/=, a.convertTo((Mat&)a, -1, 1./b), Mat_<_Tp>, double)
|
||||
|
||||
CV_MAT_AUG_OPERATOR (&=, cv::bitwise_and(a,b,a), Mat, Mat)
|
||||
CV_MAT_AUG_OPERATOR (&=, cv::bitwise_and(a,b,a), Mat, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
|
||||
|
||||
CV_MAT_AUG_OPERATOR (|=, cv::bitwise_or(a,b,a), Mat, Mat)
|
||||
CV_MAT_AUG_OPERATOR (|=, cv::bitwise_or(a,b,a), Mat, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
|
||||
|
||||
CV_MAT_AUG_OPERATOR (^=, cv::bitwise_xor(a,b,a), Mat, Mat)
|
||||
CV_MAT_AUG_OPERATOR (^=, cv::bitwise_xor(a,b,a), Mat, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
|
||||
|
||||
#undef CV_MAT_AUG_OPERATOR_T
|
||||
#undef CV_MAT_AUG_OPERATOR
|
||||
#undef CV_MAT_AUG_OPERATOR1
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////// SVD /////////////////////////////////////////////
|
||||
|
||||
inline SVD::SVD() {}
|
||||
inline SVD::SVD( InputArray m, int flags ) { operator ()(m, flags); }
|
||||
inline void SVD::solveZ( InputArray m, OutputArray _dst )
|
||||
{
|
||||
Mat mtx = m.getMat();
|
||||
SVD svd(mtx, (mtx.rows >= mtx.cols ? 0 : SVD::FULL_UV));
|
||||
_dst.create(svd.vt.cols, 1, svd.vt.type());
|
||||
Mat dst = _dst.getMat();
|
||||
svd.vt.row(svd.vt.rows-1).reshape(1,svd.vt.cols).copyTo(dst);
|
||||
}
|
||||
|
||||
template<typename _Tp, int m, int n, int nm> inline void
|
||||
SVD::compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w, Matx<_Tp, m, nm>& u, Matx<_Tp, n, nm>& vt )
|
||||
{
|
||||
CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector.");
|
||||
Mat _a(a, false), _u(u, false), _w(w, false), _vt(vt, false);
|
||||
SVD::compute(_a, _w, _u, _vt);
|
||||
CV_Assert(_w.data == (uchar*)&w.val[0] && _u.data == (uchar*)&u.val[0] && _vt.data == (uchar*)&vt.val[0]);
|
||||
}
|
||||
|
||||
template<typename _Tp, int m, int n, int nm> inline void
|
||||
SVD::compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w )
|
||||
{
|
||||
CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector.");
|
||||
Mat _a(a, false), _w(w, false);
|
||||
SVD::compute(_a, _w);
|
||||
CV_Assert(_w.data == (uchar*)&w.val[0]);
|
||||
}
|
||||
|
||||
template<typename _Tp, int m, int n, int nm, int nb> inline void
|
||||
SVD::backSubst( const Matx<_Tp, nm, 1>& w, const Matx<_Tp, m, nm>& u,
|
||||
const Matx<_Tp, n, nm>& vt, const Matx<_Tp, m, nb>& rhs,
|
||||
Matx<_Tp, n, nb>& dst )
|
||||
{
|
||||
CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector.");
|
||||
Mat _u(u, false), _w(w, false), _vt(vt, false), _rhs(rhs, false), _dst(dst, false);
|
||||
SVD::backSubst(_w, _u, _vt, _rhs, _dst);
|
||||
CV_Assert(_dst.data == (uchar*)&dst.val[0]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/////////////////////////////////// Multiply-with-Carry RNG ///////////////////////////////////
|
||||
|
||||
inline RNG::RNG() { state = 0xffffffff; }
|
||||
inline RNG::RNG(uint64 _state) { state = _state ? _state : 0xffffffff; }
|
||||
|
||||
inline RNG::operator uchar() { return (uchar)next(); }
|
||||
inline RNG::operator schar() { return (schar)next(); }
|
||||
inline RNG::operator ushort() { return (ushort)next(); }
|
||||
inline RNG::operator short() { return (short)next(); }
|
||||
inline RNG::operator int() { return (int)next(); }
|
||||
inline RNG::operator unsigned() { return next(); }
|
||||
inline RNG::operator float() { return next()*2.3283064365386962890625e-10f; }
|
||||
inline RNG::operator double() { unsigned t = next(); return (((uint64)t << 32) | next()) * 5.4210108624275221700372640043497e-20; }
|
||||
|
||||
inline unsigned RNG::operator ()(unsigned N) { return (unsigned)uniform(0,N); }
|
||||
inline unsigned RNG::operator ()() { return next(); }
|
||||
|
||||
inline int RNG::uniform(int a, int b) { return a == b ? a : (int)(next() % (b - a) + a); }
|
||||
inline float RNG::uniform(float a, float b) { return ((float)*this)*(b - a) + a; }
|
||||
inline double RNG::uniform(double a, double b) { return ((double)*this)*(b - a) + a; }
|
||||
|
||||
inline unsigned RNG::next()
|
||||
{
|
||||
state = (uint64)(unsigned)state* /*CV_RNG_COEFF*/ 4164903690U + (unsigned)(state >> 32);
|
||||
return (unsigned)state;
|
||||
}
|
||||
|
||||
//! returns the next unifomly-distributed random number of the specified type
|
||||
template<typename _Tp> static inline _Tp randu()
|
||||
{
|
||||
return (_Tp)theRNG();
|
||||
}
|
||||
|
||||
///////////////////////////////// Formatted string generation /////////////////////////////////
|
||||
|
||||
CV_EXPORTS String format( const char* fmt, ... );
|
||||
|
||||
///////////////////////////////// Formatted output of cv::Mat /////////////////////////////////
|
||||
|
||||
static inline
|
||||
Ptr<Formatted> format(InputArray mtx, int fmt)
|
||||
{
|
||||
return Formatter::get(fmt)->format(mtx.getMat());
|
||||
}
|
||||
|
||||
static inline
|
||||
int print(Ptr<Formatted> fmtd, FILE* stream = stdout)
|
||||
{
|
||||
int written = 0;
|
||||
fmtd->reset();
|
||||
for(const char* str = fmtd->next(); str; str = fmtd->next())
|
||||
written += fputs(str, stream);
|
||||
|
||||
return written;
|
||||
}
|
||||
|
||||
static inline
|
||||
int print(const Mat& mtx, FILE* stream = stdout)
|
||||
{
|
||||
return print(Formatter::get()->format(mtx), stream);
|
||||
}
|
||||
|
||||
static inline
|
||||
int print(const UMat& mtx, FILE* stream = stdout)
|
||||
{
|
||||
return print(Formatter::get()->format(mtx.getMat(ACCESS_READ)), stream);
|
||||
}
|
||||
|
||||
template<typename _Tp> static inline
|
||||
int print(const std::vector<Point_<_Tp> >& vec, FILE* stream = stdout)
|
||||
{
|
||||
return print(Formatter::get()->format(Mat(vec)), stream);
|
||||
}
|
||||
|
||||
template<typename _Tp> static inline
|
||||
int print(const std::vector<Point3_<_Tp> >& vec, FILE* stream = stdout)
|
||||
{
|
||||
return print(Formatter::get()->format(Mat(vec)), stream);
|
||||
}
|
||||
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
int print(const Matx<_Tp, m, n>& matx, FILE* stream = stdout)
|
||||
{
|
||||
return print(Formatter::get()->format(cv::Mat(matx)), stream);
|
||||
}
|
||||
|
||||
//! @endcond
|
||||
|
||||
/****************************************************************************************\
|
||||
* Auxiliary algorithms *
|
||||
\****************************************************************************************/
|
||||
|
||||
/** @brief Splits an element set into equivalency classes.
|
||||
|
||||
The generic function partition implements an \f$O(N^2)\f$ algorithm for splitting a set of \f$N\f$ elements
|
||||
into one or more equivalency classes, as described in
|
||||
<http://en.wikipedia.org/wiki/Disjoint-set_data_structure> . The function returns the number of
|
||||
equivalency classes.
|
||||
@param _vec Set of elements stored as a vector.
|
||||
@param labels Output vector of labels. It contains as many elements as vec. Each label labels[i] is
|
||||
a 0-based cluster index of `vec[i]`.
|
||||
@param predicate Equivalence predicate (pointer to a boolean function of two arguments or an
|
||||
instance of the class that has the method bool operator()(const _Tp& a, const _Tp& b) ). The
|
||||
predicate returns true when the elements are certainly in the same class, and returns false if they
|
||||
may or may not be in the same class.
|
||||
@ingroup core_cluster
|
||||
*/
|
||||
template<typename _Tp, class _EqPredicate> int
|
||||
partition( const std::vector<_Tp>& _vec, std::vector<int>& labels,
|
||||
_EqPredicate predicate=_EqPredicate())
|
||||
{
|
||||
int i, j, N = (int)_vec.size();
|
||||
const _Tp* vec = &_vec[0];
|
||||
|
||||
const int PARENT=0;
|
||||
const int RANK=1;
|
||||
|
||||
std::vector<int> _nodes(N*2);
|
||||
int (*nodes)[2] = (int(*)[2])&_nodes[0];
|
||||
|
||||
// The first O(N) pass: create N single-vertex trees
|
||||
for(i = 0; i < N; i++)
|
||||
{
|
||||
nodes[i][PARENT]=-1;
|
||||
nodes[i][RANK] = 0;
|
||||
}
|
||||
|
||||
// The main O(N^2) pass: merge connected components
|
||||
for( i = 0; i < N; i++ )
|
||||
{
|
||||
int root = i;
|
||||
|
||||
// find root
|
||||
while( nodes[root][PARENT] >= 0 )
|
||||
root = nodes[root][PARENT];
|
||||
|
||||
for( j = 0; j < N; j++ )
|
||||
{
|
||||
if( i == j || !predicate(vec[i], vec[j]))
|
||||
continue;
|
||||
int root2 = j;
|
||||
|
||||
while( nodes[root2][PARENT] >= 0 )
|
||||
root2 = nodes[root2][PARENT];
|
||||
|
||||
if( root2 != root )
|
||||
{
|
||||
// unite both trees
|
||||
int rank = nodes[root][RANK], rank2 = nodes[root2][RANK];
|
||||
if( rank > rank2 )
|
||||
nodes[root2][PARENT] = root;
|
||||
else
|
||||
{
|
||||
nodes[root][PARENT] = root2;
|
||||
nodes[root2][RANK] += rank == rank2;
|
||||
root = root2;
|
||||
}
|
||||
CV_Assert( nodes[root][PARENT] < 0 );
|
||||
|
||||
int k = j, parent;
|
||||
|
||||
// compress the path from node2 to root
|
||||
while( (parent = nodes[k][PARENT]) >= 0 )
|
||||
{
|
||||
nodes[k][PARENT] = root;
|
||||
k = parent;
|
||||
}
|
||||
|
||||
// compress the path from node to root
|
||||
k = i;
|
||||
while( (parent = nodes[k][PARENT]) >= 0 )
|
||||
{
|
||||
nodes[k][PARENT] = root;
|
||||
k = parent;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Final O(N) pass: enumerate classes
|
||||
labels.resize(N);
|
||||
int nclasses = 0;
|
||||
|
||||
for( i = 0; i < N; i++ )
|
||||
{
|
||||
int root = i;
|
||||
while( nodes[root][PARENT] >= 0 )
|
||||
root = nodes[root][PARENT];
|
||||
// re-use the rank as the class label
|
||||
if( nodes[root][RANK] >= 0 )
|
||||
nodes[root][RANK] = ~nclasses++;
|
||||
labels[i] = ~nodes[root][RANK];
|
||||
}
|
||||
|
||||
return nclasses;
|
||||
}
|
||||
|
||||
} // cv
|
||||
|
||||
#endif
|
302
3rdparty/include/opencv2/core/optim.hpp
vendored
Normal file
302
3rdparty/include/opencv2/core/optim.hpp
vendored
Normal file
@ -0,0 +1,302 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the OpenCV Foundation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_OPTIM_HPP__
|
||||
#define __OPENCV_OPTIM_HPP__
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
/** @addtogroup core_optim
|
||||
The algorithms in this section minimize or maximize function value within specified constraints or
|
||||
without any constraints.
|
||||
@{
|
||||
*/
|
||||
|
||||
/** @brief Basic interface for all solvers
|
||||
*/
|
||||
class CV_EXPORTS MinProblemSolver : public Algorithm
|
||||
{
|
||||
public:
|
||||
/** @brief Represents function being optimized
|
||||
*/
|
||||
class CV_EXPORTS Function
|
||||
{
|
||||
public:
|
||||
virtual ~Function() {}
|
||||
virtual int getDims() const = 0;
|
||||
virtual double getGradientEps() const;
|
||||
virtual double calc(const double* x) const = 0;
|
||||
virtual void getGradient(const double* x,double* grad);
|
||||
};
|
||||
|
||||
/** @brief Getter for the optimized function.
|
||||
|
||||
The optimized function is represented by Function interface, which requires derivatives to
|
||||
implement the sole method calc(double*) to evaluate the function.
|
||||
|
||||
@return Smart-pointer to an object that implements Function interface - it represents the
|
||||
function that is being optimized. It can be empty, if no function was given so far.
|
||||
*/
|
||||
virtual Ptr<Function> getFunction() const = 0;
|
||||
|
||||
/** @brief Setter for the optimized function.
|
||||
|
||||
*It should be called at least once before the call to* minimize(), as default value is not usable.
|
||||
|
||||
@param f The new function to optimize.
|
||||
*/
|
||||
virtual void setFunction(const Ptr<Function>& f) = 0;
|
||||
|
||||
/** @brief Getter for the previously set terminal criteria for this algorithm.
|
||||
|
||||
@return Deep copy of the terminal criteria used at the moment.
|
||||
*/
|
||||
virtual TermCriteria getTermCriteria() const = 0;
|
||||
|
||||
/** @brief Set terminal criteria for solver.
|
||||
|
||||
This method *is not necessary* to be called before the first call to minimize(), as the default
|
||||
value is sensible.
|
||||
|
||||
Algorithm stops when the number of function evaluations done exceeds termcrit.maxCount, when
|
||||
the function values at the vertices of simplex are within termcrit.epsilon range or simplex
|
||||
becomes so small that it can enclosed in a box with termcrit.epsilon sides, whatever comes
|
||||
first.
|
||||
@param termcrit Terminal criteria to be used, represented as cv::TermCriteria structure.
|
||||
*/
|
||||
virtual void setTermCriteria(const TermCriteria& termcrit) = 0;
|
||||
|
||||
/** @brief actually runs the algorithm and performs the minimization.
|
||||
|
||||
The sole input parameter determines the centroid of the starting simplex (roughly, it tells
|
||||
where to start), all the others (terminal criteria, initial step, function to be minimized) are
|
||||
supposed to be set via the setters before the call to this method or the default values (not
|
||||
always sensible) will be used.
|
||||
|
||||
@param x The initial point, that will become a centroid of an initial simplex. After the algorithm
|
||||
will terminate, it will be setted to the point where the algorithm stops, the point of possible
|
||||
minimum.
|
||||
@return The value of a function at the point found.
|
||||
*/
|
||||
virtual double minimize(InputOutputArray x) = 0;
|
||||
};
|
||||
|
||||
/** @brief This class is used to perform the non-linear non-constrained minimization of a function,
|
||||
|
||||
defined on an `n`-dimensional Euclidean space, using the **Nelder-Mead method**, also known as
|
||||
**downhill simplex method**. The basic idea about the method can be obtained from
|
||||
<http://en.wikipedia.org/wiki/Nelder-Mead_method>.
|
||||
|
||||
It should be noted, that this method, although deterministic, is rather a heuristic and therefore
|
||||
may converge to a local minima, not necessary a global one. It is iterative optimization technique,
|
||||
which at each step uses an information about the values of a function evaluated only at `n+1`
|
||||
points, arranged as a *simplex* in `n`-dimensional space (hence the second name of the method). At
|
||||
each step new point is chosen to evaluate function at, obtained value is compared with previous
|
||||
ones and based on this information simplex changes it's shape , slowly moving to the local minimum.
|
||||
Thus this method is using *only* function values to make decision, on contrary to, say, Nonlinear
|
||||
Conjugate Gradient method (which is also implemented in optim).
|
||||
|
||||
Algorithm stops when the number of function evaluations done exceeds termcrit.maxCount, when the
|
||||
function values at the vertices of simplex are within termcrit.epsilon range or simplex becomes so
|
||||
small that it can enclosed in a box with termcrit.epsilon sides, whatever comes first, for some
|
||||
defined by user positive integer termcrit.maxCount and positive non-integer termcrit.epsilon.
|
||||
|
||||
@note DownhillSolver is a derivative of the abstract interface
|
||||
cv::MinProblemSolver, which in turn is derived from the Algorithm interface and is used to
|
||||
encapsulate the functionality, common to all non-linear optimization algorithms in the optim
|
||||
module.
|
||||
|
||||
@note term criteria should meet following condition:
|
||||
@code
|
||||
termcrit.type == (TermCriteria::MAX_ITER + TermCriteria::EPS) && termcrit.epsilon > 0 && termcrit.maxCount > 0
|
||||
@endcode
|
||||
*/
|
||||
class CV_EXPORTS DownhillSolver : public MinProblemSolver
|
||||
{
|
||||
public:
|
||||
/** @brief Returns the initial step that will be used in downhill simplex algorithm.
|
||||
|
||||
@param step Initial step that will be used in algorithm. Note, that although corresponding setter
|
||||
accepts column-vectors as well as row-vectors, this method will return a row-vector.
|
||||
@see DownhillSolver::setInitStep
|
||||
*/
|
||||
virtual void getInitStep(OutputArray step) const=0;
|
||||
|
||||
/** @brief Sets the initial step that will be used in downhill simplex algorithm.
|
||||
|
||||
Step, together with initial point (givin in DownhillSolver::minimize) are two `n`-dimensional
|
||||
vectors that are used to determine the shape of initial simplex. Roughly said, initial point
|
||||
determines the position of a simplex (it will become simplex's centroid), while step determines the
|
||||
spread (size in each dimension) of a simplex. To be more precise, if \f$s,x_0\in\mathbb{R}^n\f$ are
|
||||
the initial step and initial point respectively, the vertices of a simplex will be:
|
||||
\f$v_0:=x_0-\frac{1}{2} s\f$ and \f$v_i:=x_0+s_i\f$ for \f$i=1,2,\dots,n\f$ where \f$s_i\f$ denotes
|
||||
projections of the initial step of *n*-th coordinate (the result of projection is treated to be
|
||||
vector given by \f$s_i:=e_i\cdot\left<e_i\cdot s\right>\f$, where \f$e_i\f$ form canonical basis)
|
||||
|
||||
@param step Initial step that will be used in algorithm. Roughly said, it determines the spread
|
||||
(size in each dimension) of an initial simplex.
|
||||
*/
|
||||
virtual void setInitStep(InputArray step)=0;
|
||||
|
||||
/** @brief This function returns the reference to the ready-to-use DownhillSolver object.
|
||||
|
||||
All the parameters are optional, so this procedure can be called even without parameters at
|
||||
all. In this case, the default values will be used. As default value for terminal criteria are
|
||||
the only sensible ones, MinProblemSolver::setFunction() and DownhillSolver::setInitStep()
|
||||
should be called upon the obtained object, if the respective parameters were not given to
|
||||
create(). Otherwise, the two ways (give parameters to createDownhillSolver() or miss them out
|
||||
and call the MinProblemSolver::setFunction() and DownhillSolver::setInitStep()) are absolutely
|
||||
equivalent (and will drop the same errors in the same way, should invalid input be detected).
|
||||
@param f Pointer to the function that will be minimized, similarly to the one you submit via
|
||||
MinProblemSolver::setFunction.
|
||||
@param initStep Initial step, that will be used to construct the initial simplex, similarly to the one
|
||||
you submit via MinProblemSolver::setInitStep.
|
||||
@param termcrit Terminal criteria to the algorithm, similarly to the one you submit via
|
||||
MinProblemSolver::setTermCriteria.
|
||||
*/
|
||||
static Ptr<DownhillSolver> create(const Ptr<MinProblemSolver::Function>& f=Ptr<MinProblemSolver::Function>(),
|
||||
InputArray initStep=Mat_<double>(1,1,0.0),
|
||||
TermCriteria termcrit=TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5000,0.000001));
|
||||
};
|
||||
|
||||
/** @brief This class is used to perform the non-linear non-constrained minimization of a function
|
||||
with known gradient,
|
||||
|
||||
defined on an *n*-dimensional Euclidean space, using the **Nonlinear Conjugate Gradient method**.
|
||||
The implementation was done based on the beautifully clear explanatory article [An Introduction to
|
||||
the Conjugate Gradient Method Without the Agonizing
|
||||
Pain](http://www.cs.cmu.edu/~quake-papers/painless-conjugate-gradient.pdf) by Jonathan Richard
|
||||
Shewchuk. The method can be seen as an adaptation of a standard Conjugate Gradient method (see, for
|
||||
example <http://en.wikipedia.org/wiki/Conjugate_gradient_method>) for numerically solving the
|
||||
systems of linear equations.
|
||||
|
||||
It should be noted, that this method, although deterministic, is rather a heuristic method and
|
||||
therefore may converge to a local minima, not necessary a global one. What is even more disastrous,
|
||||
most of its behaviour is ruled by gradient, therefore it essentially cannot distinguish between
|
||||
local minima and maxima. Therefore, if it starts sufficiently near to the local maximum, it may
|
||||
converge to it. Another obvious restriction is that it should be possible to compute the gradient of
|
||||
a function at any point, thus it is preferable to have analytic expression for gradient and
|
||||
computational burden should be born by the user.
|
||||
|
||||
The latter responsibility is accompilished via the getGradient method of a
|
||||
MinProblemSolver::Function interface (which represents function being optimized). This method takes
|
||||
point a point in *n*-dimensional space (first argument represents the array of coordinates of that
|
||||
point) and comput its gradient (it should be stored in the second argument as an array).
|
||||
|
||||
@note class ConjGradSolver thus does not add any new methods to the basic MinProblemSolver interface.
|
||||
|
||||
@note term criteria should meet following condition:
|
||||
@code
|
||||
termcrit.type == (TermCriteria::MAX_ITER + TermCriteria::EPS) && termcrit.epsilon > 0 && termcrit.maxCount > 0
|
||||
// or
|
||||
termcrit.type == TermCriteria::MAX_ITER) && termcrit.maxCount > 0
|
||||
@endcode
|
||||
*/
|
||||
class CV_EXPORTS ConjGradSolver : public MinProblemSolver
|
||||
{
|
||||
public:
|
||||
/** @brief This function returns the reference to the ready-to-use ConjGradSolver object.
|
||||
|
||||
All the parameters are optional, so this procedure can be called even without parameters at
|
||||
all. In this case, the default values will be used. As default value for terminal criteria are
|
||||
the only sensible ones, MinProblemSolver::setFunction() should be called upon the obtained
|
||||
object, if the function was not given to create(). Otherwise, the two ways (submit it to
|
||||
create() or miss it out and call the MinProblemSolver::setFunction()) are absolutely equivalent
|
||||
(and will drop the same errors in the same way, should invalid input be detected).
|
||||
@param f Pointer to the function that will be minimized, similarly to the one you submit via
|
||||
MinProblemSolver::setFunction.
|
||||
@param termcrit Terminal criteria to the algorithm, similarly to the one you submit via
|
||||
MinProblemSolver::setTermCriteria.
|
||||
*/
|
||||
static Ptr<ConjGradSolver> create(const Ptr<MinProblemSolver::Function>& f=Ptr<ConjGradSolver::Function>(),
|
||||
TermCriteria termcrit=TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5000,0.000001));
|
||||
};
|
||||
|
||||
//! return codes for cv::solveLP() function
|
||||
enum SolveLPResult
|
||||
{
|
||||
SOLVELP_UNBOUNDED = -2, //!< problem is unbounded (target function can achieve arbitrary high values)
|
||||
SOLVELP_UNFEASIBLE = -1, //!< problem is unfeasible (there are no points that satisfy all the constraints imposed)
|
||||
SOLVELP_SINGLE = 0, //!< there is only one maximum for target function
|
||||
SOLVELP_MULTI = 1 //!< there are multiple maxima for target function - the arbitrary one is returned
|
||||
};
|
||||
|
||||
/** @brief Solve given (non-integer) linear programming problem using the Simplex Algorithm (Simplex Method).
|
||||
|
||||
What we mean here by "linear programming problem" (or LP problem, for short) can be formulated as:
|
||||
|
||||
\f[\mbox{Maximize } c\cdot x\\
|
||||
\mbox{Subject to:}\\
|
||||
Ax\leq b\\
|
||||
x\geq 0\f]
|
||||
|
||||
Where \f$c\f$ is fixed `1`-by-`n` row-vector, \f$A\f$ is fixed `m`-by-`n` matrix, \f$b\f$ is fixed `m`-by-`1`
|
||||
column vector and \f$x\f$ is an arbitrary `n`-by-`1` column vector, which satisfies the constraints.
|
||||
|
||||
Simplex algorithm is one of many algorithms that are designed to handle this sort of problems
|
||||
efficiently. Although it is not optimal in theoretical sense (there exist algorithms that can solve
|
||||
any problem written as above in polynomial time, while simplex method degenerates to exponential
|
||||
time for some special cases), it is well-studied, easy to implement and is shown to work well for
|
||||
real-life purposes.
|
||||
|
||||
The particular implementation is taken almost verbatim from **Introduction to Algorithms, third
|
||||
edition** by T. H. Cormen, C. E. Leiserson, R. L. Rivest and Clifford Stein. In particular, the
|
||||
Bland's rule <http://en.wikipedia.org/wiki/Bland%27s_rule> is used to prevent cycling.
|
||||
|
||||
@param Func This row-vector corresponds to \f$c\f$ in the LP problem formulation (see above). It should
|
||||
contain 32- or 64-bit floating point numbers. As a convenience, column-vector may be also submitted,
|
||||
in the latter case it is understood to correspond to \f$c^T\f$.
|
||||
@param Constr `m`-by-`n+1` matrix, whose rightmost column corresponds to \f$b\f$ in formulation above
|
||||
and the remaining to \f$A\f$. It should containt 32- or 64-bit floating point numbers.
|
||||
@param z The solution will be returned here as a column-vector - it corresponds to \f$c\f$ in the
|
||||
formulation above. It will contain 64-bit floating point numbers.
|
||||
@return One of cv::SolveLPResult
|
||||
*/
|
||||
CV_EXPORTS_W int solveLP(const Mat& Func, const Mat& Constr, Mat& z);
|
||||
|
||||
//! @}
|
||||
|
||||
}// cv
|
||||
|
||||
#endif
|
1195
3rdparty/include/opencv2/core/persistence.hpp
vendored
Normal file
1195
3rdparty/include/opencv2/core/persistence.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
172
3rdparty/include/opencv2/core/private.cuda.hpp
vendored
Normal file
172
3rdparty/include/opencv2/core/private.cuda.hpp
vendored
Normal file
@ -0,0 +1,172 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_PRIVATE_CUDA_HPP__
|
||||
#define __OPENCV_CORE_PRIVATE_CUDA_HPP__
|
||||
|
||||
#ifndef __OPENCV_BUILD
|
||||
# error this is a private header which should not be used from outside of the OpenCV library
|
||||
#endif
|
||||
|
||||
#include "cvconfig.h"
|
||||
|
||||
#include "opencv2/core/cvdef.h"
|
||||
#include "opencv2/core/base.hpp"
|
||||
|
||||
#include "opencv2/core/cuda.hpp"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
# include <cuda.h>
|
||||
# include <cuda_runtime.h>
|
||||
# include <npp.h>
|
||||
# include "opencv2/core/cuda_stream_accessor.hpp"
|
||||
# include "opencv2/core/cuda/common.hpp"
|
||||
|
||||
# define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD)
|
||||
|
||||
# define CUDART_MINIMUM_REQUIRED_VERSION 4020
|
||||
|
||||
# if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
|
||||
# error "Insufficient Cuda Runtime library version, please update it."
|
||||
# endif
|
||||
|
||||
# if defined(CUDA_ARCH_BIN_OR_PTX_10)
|
||||
# error "OpenCV CUDA module doesn't support NVIDIA compute capability 1.0"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv { namespace cuda {
|
||||
CV_EXPORTS cv::String getNppErrorMessage(int code);
|
||||
CV_EXPORTS cv::String getCudaDriverApiErrorMessage(int code);
|
||||
|
||||
CV_EXPORTS GpuMat getInputMat(InputArray _src, Stream& stream);
|
||||
|
||||
CV_EXPORTS GpuMat getOutputMat(OutputArray _dst, int rows, int cols, int type, Stream& stream);
|
||||
static inline GpuMat getOutputMat(OutputArray _dst, Size size, int type, Stream& stream)
|
||||
{
|
||||
return getOutputMat(_dst, size.height, size.width, type, stream);
|
||||
}
|
||||
|
||||
CV_EXPORTS void syncOutput(const GpuMat& dst, OutputArray _dst, Stream& stream);
|
||||
}}
|
||||
|
||||
#ifndef HAVE_CUDA
|
||||
|
||||
static inline void throw_no_cuda() { CV_Error(cv::Error::GpuNotSupported, "The library is compiled without CUDA support"); }
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform"); }
|
||||
|
||||
namespace cv { namespace cuda
|
||||
{
|
||||
class CV_EXPORTS BufferPool
|
||||
{
|
||||
public:
|
||||
explicit BufferPool(Stream& stream);
|
||||
|
||||
GpuMat getBuffer(int rows, int cols, int type);
|
||||
GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); }
|
||||
|
||||
GpuMat::Allocator* getAllocator() const { return allocator_; }
|
||||
|
||||
private:
|
||||
GpuMat::Allocator* allocator_;
|
||||
};
|
||||
|
||||
static inline void checkNppError(int code, const char* file, const int line, const char* func)
|
||||
{
|
||||
if (code < 0)
|
||||
cv::error(cv::Error::GpuApiCallError, getNppErrorMessage(code), func, file, line);
|
||||
}
|
||||
|
||||
static inline void checkCudaDriverApiError(int code, const char* file, const int line, const char* func)
|
||||
{
|
||||
if (code != CUDA_SUCCESS)
|
||||
cv::error(cv::Error::GpuApiCallError, getCudaDriverApiErrorMessage(code), func, file, line);
|
||||
}
|
||||
|
||||
template<int n> struct NPPTypeTraits;
|
||||
template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_8S> { typedef Npp8s npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_64F> { typedef Npp64f npp_type; };
|
||||
|
||||
class NppStreamHandler
|
||||
{
|
||||
public:
|
||||
inline explicit NppStreamHandler(Stream& newStream)
|
||||
{
|
||||
oldStream = nppGetStream();
|
||||
nppSetStream(StreamAccessor::getStream(newStream));
|
||||
}
|
||||
|
||||
inline explicit NppStreamHandler(cudaStream_t newStream)
|
||||
{
|
||||
oldStream = nppGetStream();
|
||||
nppSetStream(newStream);
|
||||
}
|
||||
|
||||
inline ~NppStreamHandler()
|
||||
{
|
||||
nppSetStream(oldStream);
|
||||
}
|
||||
|
||||
private:
|
||||
cudaStream_t oldStream;
|
||||
};
|
||||
}}
|
||||
|
||||
#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV_Func)
|
||||
#define cuSafeCall(expr) cv::cuda::checkCudaDriverApiError(expr, __FILE__, __LINE__, CV_Func)
|
||||
|
||||
#endif // HAVE_CUDA
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CORE_CUDA_PRIVATE_HPP__
|
296
3rdparty/include/opencv2/core/private.hpp
vendored
Normal file
296
3rdparty/include/opencv2/core/private.hpp
vendored
Normal file
@ -0,0 +1,296 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_PRIVATE_HPP__
|
||||
#define __OPENCV_CORE_PRIVATE_HPP__
|
||||
|
||||
#ifndef __OPENCV_BUILD
|
||||
# error this is a private header which should not be used from outside of the OpenCV library
|
||||
#endif
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
#include "cvconfig.h"
|
||||
|
||||
#ifdef HAVE_EIGEN
|
||||
# if defined __GNUC__ && defined __APPLE__
|
||||
# pragma GCC diagnostic ignored "-Wshadow"
|
||||
# endif
|
||||
# include <Eigen/Core>
|
||||
# include "opencv2/core/eigen.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
# include "tbb/tbb_stddef.h"
|
||||
# if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
|
||||
# include "tbb/tbb.h"
|
||||
# include "tbb/task.h"
|
||||
# undef min
|
||||
# undef max
|
||||
# else
|
||||
# undef HAVE_TBB
|
||||
# endif
|
||||
#endif
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv
|
||||
{
|
||||
#ifdef HAVE_TBB
|
||||
|
||||
typedef tbb::blocked_range<int> BlockedRange;
|
||||
|
||||
template<typename Body> static inline
|
||||
void parallel_for( const BlockedRange& range, const Body& body )
|
||||
{
|
||||
tbb::parallel_for(range, body);
|
||||
}
|
||||
|
||||
typedef tbb::split Split;
|
||||
|
||||
template<typename Body> static inline
|
||||
void parallel_reduce( const BlockedRange& range, Body& body )
|
||||
{
|
||||
tbb::parallel_reduce(range, body);
|
||||
}
|
||||
|
||||
typedef tbb::concurrent_vector<Rect> ConcurrentRectVector;
|
||||
#else
|
||||
class BlockedRange
|
||||
{
|
||||
public:
|
||||
BlockedRange() : _begin(0), _end(0), _grainsize(0) {}
|
||||
BlockedRange(int b, int e, int g=1) : _begin(b), _end(e), _grainsize(g) {}
|
||||
int begin() const { return _begin; }
|
||||
int end() const { return _end; }
|
||||
int grainsize() const { return _grainsize; }
|
||||
|
||||
protected:
|
||||
int _begin, _end, _grainsize;
|
||||
};
|
||||
|
||||
template<typename Body> static inline
|
||||
void parallel_for( const BlockedRange& range, const Body& body )
|
||||
{
|
||||
body(range);
|
||||
}
|
||||
typedef std::vector<Rect> ConcurrentRectVector;
|
||||
|
||||
class Split {};
|
||||
|
||||
template<typename Body> static inline
|
||||
void parallel_reduce( const BlockedRange& range, Body& body )
|
||||
{
|
||||
body(range);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Returns a static string if there is a parallel framework,
|
||||
// NULL otherwise.
|
||||
CV_EXPORTS const char* currentParallelFramework();
|
||||
} //namespace cv
|
||||
|
||||
/****************************************************************************************\
|
||||
* Common declarations *
|
||||
\****************************************************************************************/
|
||||
|
||||
/* the alignment of all the allocated buffers */
|
||||
#define CV_MALLOC_ALIGN 16
|
||||
|
||||
/* IEEE754 constants and macros */
|
||||
#define CV_TOGGLE_FLT(x) ((x)^((int)(x) < 0 ? 0x7fffffff : 0))
|
||||
#define CV_TOGGLE_DBL(x) ((x)^((int64)(x) < 0 ? CV_BIG_INT(0x7fffffffffffffff) : 0))
|
||||
|
||||
static inline void* cvAlignPtr( const void* ptr, int align = 32 )
|
||||
{
|
||||
CV_DbgAssert ( (align & (align-1)) == 0 );
|
||||
return (void*)( ((size_t)ptr + align - 1) & ~(size_t)(align-1) );
|
||||
}
|
||||
|
||||
static inline int cvAlign( int size, int align )
|
||||
{
|
||||
CV_DbgAssert( (align & (align-1)) == 0 && size < INT_MAX );
|
||||
return (size + align - 1) & -align;
|
||||
}
|
||||
|
||||
#ifdef IPL_DEPTH_8U
|
||||
static inline cv::Size cvGetMatSize( const CvMat* mat )
|
||||
{
|
||||
return cv::Size(mat->cols, mat->rows);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int unroll_to = 0);
|
||||
}
|
||||
|
||||
// property implementation macros
|
||||
|
||||
#define CV_IMPL_PROPERTY_RO(type, name, member) \
|
||||
inline type get##name() const { return member; }
|
||||
|
||||
#define CV_HELP_IMPL_PROPERTY(r_type, w_type, name, member) \
|
||||
CV_IMPL_PROPERTY_RO(r_type, name, member) \
|
||||
inline void set##name(w_type val) { member = val; }
|
||||
|
||||
#define CV_HELP_WRAP_PROPERTY(r_type, w_type, name, internal_name, internal_obj) \
|
||||
r_type get##name() const { return internal_obj.get##internal_name(); } \
|
||||
void set##name(w_type val) { internal_obj.set##internal_name(val); }
|
||||
|
||||
#define CV_IMPL_PROPERTY(type, name, member) CV_HELP_IMPL_PROPERTY(type, type, name, member)
|
||||
#define CV_IMPL_PROPERTY_S(type, name, member) CV_HELP_IMPL_PROPERTY(type, const type &, name, member)
|
||||
|
||||
#define CV_WRAP_PROPERTY(type, name, internal_name, internal_obj) CV_HELP_WRAP_PROPERTY(type, type, name, internal_name, internal_obj)
|
||||
#define CV_WRAP_PROPERTY_S(type, name, internal_name, internal_obj) CV_HELP_WRAP_PROPERTY(type, const type &, name, internal_name, internal_obj)
|
||||
|
||||
#define CV_WRAP_SAME_PROPERTY(type, name, internal_obj) CV_WRAP_PROPERTY(type, name, name, internal_obj)
|
||||
#define CV_WRAP_SAME_PROPERTY_S(type, name, internal_obj) CV_WRAP_PROPERTY_S(type, name, name, internal_obj)
|
||||
|
||||
/****************************************************************************************\
|
||||
* Structures and macros for integration with IPP *
|
||||
\****************************************************************************************/
|
||||
|
||||
#ifdef HAVE_IPP
|
||||
# include "ipp.h"
|
||||
|
||||
# define IPP_VERSION_X100 (IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR)
|
||||
|
||||
#define IPP_ALIGN 32 // required for AVX optimization
|
||||
|
||||
#define setIppErrorStatus() cv::ipp::setIppStatus(-1, CV_Func, __FILE__, __LINE__)
|
||||
|
||||
static inline IppiSize ippiSize(int width, int height)
|
||||
{
|
||||
IppiSize size = { width, height };
|
||||
return size;
|
||||
}
|
||||
|
||||
static inline IppiSize ippiSize(const cv::Size & _size)
|
||||
{
|
||||
IppiSize size = { _size.width, _size.height };
|
||||
return size;
|
||||
}
|
||||
|
||||
static inline IppiBorderType ippiGetBorderType(int borderTypeNI)
|
||||
{
|
||||
return borderTypeNI == cv::BORDER_CONSTANT ? ippBorderConst :
|
||||
borderTypeNI == cv::BORDER_WRAP ? ippBorderWrap :
|
||||
borderTypeNI == cv::BORDER_REPLICATE ? ippBorderRepl :
|
||||
borderTypeNI == cv::BORDER_REFLECT_101 ? ippBorderMirror :
|
||||
borderTypeNI == cv::BORDER_REFLECT ? ippBorderMirrorR : (IppiBorderType)-1;
|
||||
}
|
||||
|
||||
static inline IppDataType ippiGetDataType(int depth)
|
||||
{
|
||||
return depth == CV_8U ? ipp8u :
|
||||
depth == CV_8S ? ipp8s :
|
||||
depth == CV_16U ? ipp16u :
|
||||
depth == CV_16S ? ipp16s :
|
||||
depth == CV_32S ? ipp32s :
|
||||
depth == CV_32F ? ipp32f :
|
||||
depth == CV_64F ? ipp64f : (IppDataType)-1;
|
||||
}
|
||||
|
||||
#else
|
||||
# define IPP_VERSION_X100 0
|
||||
#endif
|
||||
|
||||
#define CV_IPP_CHECK_COND (cv::ipp::useIPP())
|
||||
#define CV_IPP_CHECK() if(CV_IPP_CHECK_COND)
|
||||
|
||||
#ifndef IPPI_CALL
|
||||
# define IPPI_CALL(func) CV_Assert((func) >= 0)
|
||||
#endif
|
||||
|
||||
/* IPP-compatible return codes */
|
||||
typedef enum CvStatus
|
||||
{
|
||||
CV_BADMEMBLOCK_ERR = -113,
|
||||
CV_INPLACE_NOT_SUPPORTED_ERR= -112,
|
||||
CV_UNMATCHED_ROI_ERR = -111,
|
||||
CV_NOTFOUND_ERR = -110,
|
||||
CV_BADCONVERGENCE_ERR = -109,
|
||||
|
||||
CV_BADDEPTH_ERR = -107,
|
||||
CV_BADROI_ERR = -106,
|
||||
CV_BADHEADER_ERR = -105,
|
||||
CV_UNMATCHED_FORMATS_ERR = -104,
|
||||
CV_UNSUPPORTED_COI_ERR = -103,
|
||||
CV_UNSUPPORTED_CHANNELS_ERR = -102,
|
||||
CV_UNSUPPORTED_DEPTH_ERR = -101,
|
||||
CV_UNSUPPORTED_FORMAT_ERR = -100,
|
||||
|
||||
CV_BADARG_ERR = -49, //ipp comp
|
||||
CV_NOTDEFINED_ERR = -48, //ipp comp
|
||||
|
||||
CV_BADCHANNELS_ERR = -47, //ipp comp
|
||||
CV_BADRANGE_ERR = -44, //ipp comp
|
||||
CV_BADSTEP_ERR = -29, //ipp comp
|
||||
|
||||
CV_BADFLAG_ERR = -12,
|
||||
CV_DIV_BY_ZERO_ERR = -11, //ipp comp
|
||||
CV_BADCOEF_ERR = -10,
|
||||
|
||||
CV_BADFACTOR_ERR = -7,
|
||||
CV_BADPOINT_ERR = -6,
|
||||
CV_BADSCALE_ERR = -4,
|
||||
CV_OUTOFMEM_ERR = -3,
|
||||
CV_NULLPTR_ERR = -2,
|
||||
CV_BADSIZE_ERR = -1,
|
||||
CV_NO_ERR = 0,
|
||||
CV_OK = CV_NO_ERR
|
||||
}
|
||||
CvStatus;
|
||||
|
||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||
namespace tegra {
|
||||
|
||||
CV_EXPORTS bool useTegra();
|
||||
CV_EXPORTS void setUseTegra(bool flag);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CORE_PRIVATE_HPP__
|
342
3rdparty/include/opencv2/core/ptr.inl.hpp
vendored
Normal file
342
3rdparty/include/opencv2/core/ptr.inl.hpp
vendored
Normal file
@ -0,0 +1,342 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the copyright holders or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_PTR_INL_HPP__
|
||||
#define __OPENCV_CORE_PTR_INL_HPP__
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv {
|
||||
|
||||
template<typename Y>
|
||||
void DefaultDeleter<Y>::operator () (Y* p) const
|
||||
{
|
||||
delete p;
|
||||
}
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
struct PtrOwner
|
||||
{
|
||||
PtrOwner() : refCount(1)
|
||||
{}
|
||||
|
||||
void incRef()
|
||||
{
|
||||
CV_XADD(&refCount, 1);
|
||||
}
|
||||
|
||||
void decRef()
|
||||
{
|
||||
if (CV_XADD(&refCount, -1) == 1) deleteSelf();
|
||||
}
|
||||
|
||||
protected:
|
||||
/* This doesn't really need to be virtual, since PtrOwner is never deleted
|
||||
directly, but it doesn't hurt and it helps avoid warnings. */
|
||||
virtual ~PtrOwner()
|
||||
{}
|
||||
|
||||
virtual void deleteSelf() = 0;
|
||||
|
||||
private:
|
||||
unsigned int refCount;
|
||||
|
||||
// noncopyable
|
||||
PtrOwner(const PtrOwner&);
|
||||
PtrOwner& operator = (const PtrOwner&);
|
||||
};
|
||||
|
||||
template<typename Y, typename D>
|
||||
struct PtrOwnerImpl : PtrOwner
|
||||
{
|
||||
PtrOwnerImpl(Y* p, D d) : owned(p), deleter(d)
|
||||
{}
|
||||
|
||||
void deleteSelf()
|
||||
{
|
||||
deleter(owned);
|
||||
delete this;
|
||||
}
|
||||
|
||||
private:
|
||||
Y* owned;
|
||||
D deleter;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Ptr<T>::Ptr() : owner(NULL), stored(NULL)
|
||||
{}
|
||||
|
||||
template<typename T>
|
||||
template<typename Y>
|
||||
Ptr<T>::Ptr(Y* p)
|
||||
: owner(p
|
||||
? new detail::PtrOwnerImpl<Y, DefaultDeleter<Y> >(p, DefaultDeleter<Y>())
|
||||
: NULL),
|
||||
stored(p)
|
||||
{}
|
||||
|
||||
template<typename T>
|
||||
template<typename Y, typename D>
|
||||
Ptr<T>::Ptr(Y* p, D d)
|
||||
: owner(p
|
||||
? new detail::PtrOwnerImpl<Y, D>(p, d)
|
||||
: NULL),
|
||||
stored(p)
|
||||
{}
|
||||
|
||||
template<typename T>
|
||||
Ptr<T>::Ptr(const Ptr& o) : owner(o.owner), stored(o.stored)
|
||||
{
|
||||
if (owner) owner->incRef();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
template<typename Y>
|
||||
Ptr<T>::Ptr(const Ptr<Y>& o) : owner(o.owner), stored(o.stored)
|
||||
{
|
||||
if (owner) owner->incRef();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
template<typename Y>
|
||||
Ptr<T>::Ptr(const Ptr<Y>& o, T* p) : owner(o.owner), stored(p)
|
||||
{
|
||||
if (owner) owner->incRef();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Ptr<T>::~Ptr()
|
||||
{
|
||||
release();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Ptr<T>& Ptr<T>::operator = (const Ptr<T>& o)
|
||||
{
|
||||
Ptr(o).swap(*this);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
template<typename Y>
|
||||
Ptr<T>& Ptr<T>::operator = (const Ptr<Y>& o)
|
||||
{
|
||||
Ptr(o).swap(*this);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Ptr<T>::release()
|
||||
{
|
||||
if (owner) owner->decRef();
|
||||
owner = NULL;
|
||||
stored = NULL;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
template<typename Y>
|
||||
void Ptr<T>::reset(Y* p)
|
||||
{
|
||||
Ptr(p).swap(*this);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
template<typename Y, typename D>
|
||||
void Ptr<T>::reset(Y* p, D d)
|
||||
{
|
||||
Ptr(p, d).swap(*this);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Ptr<T>::swap(Ptr<T>& o)
|
||||
{
|
||||
std::swap(owner, o.owner);
|
||||
std::swap(stored, o.stored);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T* Ptr<T>::get() const
|
||||
{
|
||||
return stored;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
typename detail::RefOrVoid<T>::type Ptr<T>::operator * () const
|
||||
{
|
||||
return *stored;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T* Ptr<T>::operator -> () const
|
||||
{
|
||||
return stored;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Ptr<T>::operator T* () const
|
||||
{
|
||||
return stored;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
bool Ptr<T>::empty() const
|
||||
{
|
||||
return !stored;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
template<typename Y>
|
||||
Ptr<Y> Ptr<T>::staticCast() const
|
||||
{
|
||||
return Ptr<Y>(*this, static_cast<Y*>(stored));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
template<typename Y>
|
||||
Ptr<Y> Ptr<T>::constCast() const
|
||||
{
|
||||
return Ptr<Y>(*this, const_cast<Y*>(stored));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
template<typename Y>
|
||||
Ptr<Y> Ptr<T>::dynamicCast() const
|
||||
{
|
||||
return Ptr<Y>(*this, dynamic_cast<Y*>(stored));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void swap(Ptr<T>& ptr1, Ptr<T>& ptr2){
|
||||
ptr1.swap(ptr2);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool operator == (const Ptr<T>& ptr1, const Ptr<T>& ptr2)
|
||||
{
|
||||
return ptr1.get() == ptr2.get();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool operator != (const Ptr<T>& ptr1, const Ptr<T>& ptr2)
|
||||
{
|
||||
return ptr1.get() != ptr2.get();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Ptr<T> makePtr()
|
||||
{
|
||||
return Ptr<T>(new T());
|
||||
}
|
||||
|
||||
template<typename T, typename A1>
|
||||
Ptr<T> makePtr(const A1& a1)
|
||||
{
|
||||
return Ptr<T>(new T(a1));
|
||||
}
|
||||
|
||||
template<typename T, typename A1, typename A2>
|
||||
Ptr<T> makePtr(const A1& a1, const A2& a2)
|
||||
{
|
||||
return Ptr<T>(new T(a1, a2));
|
||||
}
|
||||
|
||||
template<typename T, typename A1, typename A2, typename A3>
|
||||
Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3)
|
||||
{
|
||||
return Ptr<T>(new T(a1, a2, a3));
|
||||
}
|
||||
|
||||
template<typename T, typename A1, typename A2, typename A3, typename A4>
|
||||
Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4)
|
||||
{
|
||||
return Ptr<T>(new T(a1, a2, a3, a4));
|
||||
}
|
||||
|
||||
template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5>
|
||||
Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5)
|
||||
{
|
||||
return Ptr<T>(new T(a1, a2, a3, a4, a5));
|
||||
}
|
||||
|
||||
template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6>
|
||||
Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6)
|
||||
{
|
||||
return Ptr<T>(new T(a1, a2, a3, a4, a5, a6));
|
||||
}
|
||||
|
||||
template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7>
|
||||
Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7)
|
||||
{
|
||||
return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7));
|
||||
}
|
||||
|
||||
template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8>
|
||||
Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8)
|
||||
{
|
||||
return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8));
|
||||
}
|
||||
|
||||
template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9>
|
||||
Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9)
|
||||
{
|
||||
return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9));
|
||||
}
|
||||
|
||||
template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10>
|
||||
Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10)
|
||||
{
|
||||
return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10));
|
||||
}
|
||||
|
||||
} // namespace cv
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // __OPENCV_CORE_PTR_INL_HPP__
|
645
3rdparty/include/opencv2/core/sse_utils.hpp
vendored
Normal file
645
3rdparty/include/opencv2/core/sse_utils.hpp
vendored
Normal file
@ -0,0 +1,645 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_SSE_UTILS_HPP__
|
||||
#define __OPENCV_CORE_SSE_UTILS_HPP__
|
||||
|
||||
#ifndef __cplusplus
|
||||
# error sse_utils.hpp header must be compiled as C++
|
||||
#endif
|
||||
|
||||
#if CV_SSE2
|
||||
|
||||
inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
|
||||
{
|
||||
__m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_g0);
|
||||
__m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_g0);
|
||||
__m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_g1);
|
||||
__m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_g1);
|
||||
|
||||
__m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk2);
|
||||
__m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk2);
|
||||
__m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk3);
|
||||
__m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk3);
|
||||
|
||||
__m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk2);
|
||||
__m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk2);
|
||||
__m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk3);
|
||||
__m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk3);
|
||||
|
||||
__m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk2);
|
||||
__m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk2);
|
||||
__m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk3);
|
||||
__m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk3);
|
||||
|
||||
v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk2);
|
||||
v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk2);
|
||||
v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk3);
|
||||
v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk3);
|
||||
}
|
||||
|
||||
inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
|
||||
__m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
|
||||
{
|
||||
__m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_g1);
|
||||
__m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_g1);
|
||||
__m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_b0);
|
||||
__m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_b0);
|
||||
__m128i layer1_chunk4 = _mm_unpacklo_epi8(v_g0, v_b1);
|
||||
__m128i layer1_chunk5 = _mm_unpackhi_epi8(v_g0, v_b1);
|
||||
|
||||
__m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk3);
|
||||
__m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk3);
|
||||
__m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk4);
|
||||
__m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk4);
|
||||
__m128i layer2_chunk4 = _mm_unpacklo_epi8(layer1_chunk2, layer1_chunk5);
|
||||
__m128i layer2_chunk5 = _mm_unpackhi_epi8(layer1_chunk2, layer1_chunk5);
|
||||
|
||||
__m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk3);
|
||||
__m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk3);
|
||||
__m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk4);
|
||||
__m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk4);
|
||||
__m128i layer3_chunk4 = _mm_unpacklo_epi8(layer2_chunk2, layer2_chunk5);
|
||||
__m128i layer3_chunk5 = _mm_unpackhi_epi8(layer2_chunk2, layer2_chunk5);
|
||||
|
||||
__m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk3);
|
||||
__m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk3);
|
||||
__m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk4);
|
||||
__m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk4);
|
||||
__m128i layer4_chunk4 = _mm_unpacklo_epi8(layer3_chunk2, layer3_chunk5);
|
||||
__m128i layer4_chunk5 = _mm_unpackhi_epi8(layer3_chunk2, layer3_chunk5);
|
||||
|
||||
v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk3);
|
||||
v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk3);
|
||||
v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk4);
|
||||
v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk4);
|
||||
v_b0 = _mm_unpacklo_epi8(layer4_chunk2, layer4_chunk5);
|
||||
v_b1 = _mm_unpackhi_epi8(layer4_chunk2, layer4_chunk5);
|
||||
}
|
||||
|
||||
inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
|
||||
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
|
||||
{
|
||||
__m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_b0);
|
||||
__m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_b0);
|
||||
__m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_b1);
|
||||
__m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_b1);
|
||||
__m128i layer1_chunk4 = _mm_unpacklo_epi8(v_g0, v_a0);
|
||||
__m128i layer1_chunk5 = _mm_unpackhi_epi8(v_g0, v_a0);
|
||||
__m128i layer1_chunk6 = _mm_unpacklo_epi8(v_g1, v_a1);
|
||||
__m128i layer1_chunk7 = _mm_unpackhi_epi8(v_g1, v_a1);
|
||||
|
||||
__m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk4);
|
||||
__m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk4);
|
||||
__m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk5);
|
||||
__m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk5);
|
||||
__m128i layer2_chunk4 = _mm_unpacklo_epi8(layer1_chunk2, layer1_chunk6);
|
||||
__m128i layer2_chunk5 = _mm_unpackhi_epi8(layer1_chunk2, layer1_chunk6);
|
||||
__m128i layer2_chunk6 = _mm_unpacklo_epi8(layer1_chunk3, layer1_chunk7);
|
||||
__m128i layer2_chunk7 = _mm_unpackhi_epi8(layer1_chunk3, layer1_chunk7);
|
||||
|
||||
__m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk4);
|
||||
__m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk4);
|
||||
__m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk5);
|
||||
__m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk5);
|
||||
__m128i layer3_chunk4 = _mm_unpacklo_epi8(layer2_chunk2, layer2_chunk6);
|
||||
__m128i layer3_chunk5 = _mm_unpackhi_epi8(layer2_chunk2, layer2_chunk6);
|
||||
__m128i layer3_chunk6 = _mm_unpacklo_epi8(layer2_chunk3, layer2_chunk7);
|
||||
__m128i layer3_chunk7 = _mm_unpackhi_epi8(layer2_chunk3, layer2_chunk7);
|
||||
|
||||
__m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk4);
|
||||
__m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk4);
|
||||
__m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk5);
|
||||
__m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk5);
|
||||
__m128i layer4_chunk4 = _mm_unpacklo_epi8(layer3_chunk2, layer3_chunk6);
|
||||
__m128i layer4_chunk5 = _mm_unpackhi_epi8(layer3_chunk2, layer3_chunk6);
|
||||
__m128i layer4_chunk6 = _mm_unpacklo_epi8(layer3_chunk3, layer3_chunk7);
|
||||
__m128i layer4_chunk7 = _mm_unpackhi_epi8(layer3_chunk3, layer3_chunk7);
|
||||
|
||||
v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk4);
|
||||
v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk4);
|
||||
v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk5);
|
||||
v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk5);
|
||||
v_b0 = _mm_unpacklo_epi8(layer4_chunk2, layer4_chunk6);
|
||||
v_b1 = _mm_unpackhi_epi8(layer4_chunk2, layer4_chunk6);
|
||||
v_a0 = _mm_unpacklo_epi8(layer4_chunk3, layer4_chunk7);
|
||||
v_a1 = _mm_unpackhi_epi8(layer4_chunk3, layer4_chunk7);
|
||||
}
|
||||
|
||||
inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
|
||||
{
|
||||
__m128i v_mask = _mm_set1_epi16(0x00ff);
|
||||
|
||||
__m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
|
||||
__m128i layer4_chunk2 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8));
|
||||
__m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
|
||||
__m128i layer4_chunk3 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8));
|
||||
|
||||
__m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask));
|
||||
__m128i layer3_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8));
|
||||
__m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask));
|
||||
__m128i layer3_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8));
|
||||
|
||||
__m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
|
||||
__m128i layer2_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8));
|
||||
__m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
|
||||
__m128i layer2_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8));
|
||||
|
||||
__m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
|
||||
__m128i layer1_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8));
|
||||
__m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
|
||||
__m128i layer1_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8));
|
||||
|
||||
v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
|
||||
v_g0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8));
|
||||
v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
|
||||
v_g1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8));
|
||||
}
|
||||
|
||||
inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
|
||||
__m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
|
||||
{
|
||||
__m128i v_mask = _mm_set1_epi16(0x00ff);
|
||||
|
||||
__m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
|
||||
__m128i layer4_chunk3 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8));
|
||||
__m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
|
||||
__m128i layer4_chunk4 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8));
|
||||
__m128i layer4_chunk2 = _mm_packus_epi16(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask));
|
||||
__m128i layer4_chunk5 = _mm_packus_epi16(_mm_srli_epi16(v_b0, 8), _mm_srli_epi16(v_b1, 8));
|
||||
|
||||
__m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask));
|
||||
__m128i layer3_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8));
|
||||
__m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask));
|
||||
__m128i layer3_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8));
|
||||
__m128i layer3_chunk2 = _mm_packus_epi16(_mm_and_si128(layer4_chunk4, v_mask), _mm_and_si128(layer4_chunk5, v_mask));
|
||||
__m128i layer3_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk4, 8), _mm_srli_epi16(layer4_chunk5, 8));
|
||||
|
||||
__m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
|
||||
__m128i layer2_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8));
|
||||
__m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
|
||||
__m128i layer2_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8));
|
||||
__m128i layer2_chunk2 = _mm_packus_epi16(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
|
||||
__m128i layer2_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk4, 8), _mm_srli_epi16(layer3_chunk5, 8));
|
||||
|
||||
__m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
|
||||
__m128i layer1_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8));
|
||||
__m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
|
||||
__m128i layer1_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8));
|
||||
__m128i layer1_chunk2 = _mm_packus_epi16(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
|
||||
__m128i layer1_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk4, 8), _mm_srli_epi16(layer2_chunk5, 8));
|
||||
|
||||
v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
|
||||
v_g1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8));
|
||||
v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
|
||||
v_b0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8));
|
||||
v_g0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
|
||||
v_b1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk4, 8), _mm_srli_epi16(layer1_chunk5, 8));
|
||||
}
|
||||
|
||||
inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
|
||||
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
|
||||
{
|
||||
__m128i v_mask = _mm_set1_epi16(0x00ff);
|
||||
|
||||
__m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
|
||||
__m128i layer4_chunk4 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8));
|
||||
__m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
|
||||
__m128i layer4_chunk5 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8));
|
||||
__m128i layer4_chunk2 = _mm_packus_epi16(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask));
|
||||
__m128i layer4_chunk6 = _mm_packus_epi16(_mm_srli_epi16(v_b0, 8), _mm_srli_epi16(v_b1, 8));
|
||||
__m128i layer4_chunk3 = _mm_packus_epi16(_mm_and_si128(v_a0, v_mask), _mm_and_si128(v_a1, v_mask));
|
||||
__m128i layer4_chunk7 = _mm_packus_epi16(_mm_srli_epi16(v_a0, 8), _mm_srli_epi16(v_a1, 8));
|
||||
|
||||
__m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask));
|
||||
__m128i layer3_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8));
|
||||
__m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask));
|
||||
__m128i layer3_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8));
|
||||
__m128i layer3_chunk2 = _mm_packus_epi16(_mm_and_si128(layer4_chunk4, v_mask), _mm_and_si128(layer4_chunk5, v_mask));
|
||||
__m128i layer3_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk4, 8), _mm_srli_epi16(layer4_chunk5, 8));
|
||||
__m128i layer3_chunk3 = _mm_packus_epi16(_mm_and_si128(layer4_chunk6, v_mask), _mm_and_si128(layer4_chunk7, v_mask));
|
||||
__m128i layer3_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk6, 8), _mm_srli_epi16(layer4_chunk7, 8));
|
||||
|
||||
__m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
|
||||
__m128i layer2_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8));
|
||||
__m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
|
||||
__m128i layer2_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8));
|
||||
__m128i layer2_chunk2 = _mm_packus_epi16(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
|
||||
__m128i layer2_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk4, 8), _mm_srli_epi16(layer3_chunk5, 8));
|
||||
__m128i layer2_chunk3 = _mm_packus_epi16(_mm_and_si128(layer3_chunk6, v_mask), _mm_and_si128(layer3_chunk7, v_mask));
|
||||
__m128i layer2_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk6, 8), _mm_srli_epi16(layer3_chunk7, 8));
|
||||
|
||||
__m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
|
||||
__m128i layer1_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8));
|
||||
__m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
|
||||
__m128i layer1_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8));
|
||||
__m128i layer1_chunk2 = _mm_packus_epi16(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
|
||||
__m128i layer1_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk4, 8), _mm_srli_epi16(layer2_chunk5, 8));
|
||||
__m128i layer1_chunk3 = _mm_packus_epi16(_mm_and_si128(layer2_chunk6, v_mask), _mm_and_si128(layer2_chunk7, v_mask));
|
||||
__m128i layer1_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk6, 8), _mm_srli_epi16(layer2_chunk7, 8));
|
||||
|
||||
v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
|
||||
v_b0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8));
|
||||
v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
|
||||
v_b1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8));
|
||||
v_g0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
|
||||
v_a0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk4, 8), _mm_srli_epi16(layer1_chunk5, 8));
|
||||
v_g1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk6, v_mask), _mm_and_si128(layer1_chunk7, v_mask));
|
||||
v_a1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk6, 8), _mm_srli_epi16(layer1_chunk7, 8));
|
||||
}
|
||||
|
||||
inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
|
||||
{
|
||||
__m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g0);
|
||||
__m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g0);
|
||||
__m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_g1);
|
||||
__m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_g1);
|
||||
|
||||
__m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk2);
|
||||
__m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk2);
|
||||
__m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk3);
|
||||
__m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk3);
|
||||
|
||||
__m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk2);
|
||||
__m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk2);
|
||||
__m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk3);
|
||||
__m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk3);
|
||||
|
||||
v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk2);
|
||||
v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk2);
|
||||
v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk3);
|
||||
v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk3);
|
||||
}
|
||||
|
||||
inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
|
||||
__m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
|
||||
{
|
||||
__m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g1);
|
||||
__m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g1);
|
||||
__m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_b0);
|
||||
__m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_b0);
|
||||
__m128i layer1_chunk4 = _mm_unpacklo_epi16(v_g0, v_b1);
|
||||
__m128i layer1_chunk5 = _mm_unpackhi_epi16(v_g0, v_b1);
|
||||
|
||||
__m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk3);
|
||||
__m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk3);
|
||||
__m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk4);
|
||||
__m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk4);
|
||||
__m128i layer2_chunk4 = _mm_unpacklo_epi16(layer1_chunk2, layer1_chunk5);
|
||||
__m128i layer2_chunk5 = _mm_unpackhi_epi16(layer1_chunk2, layer1_chunk5);
|
||||
|
||||
__m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk3);
|
||||
__m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk3);
|
||||
__m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk4);
|
||||
__m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk4);
|
||||
__m128i layer3_chunk4 = _mm_unpacklo_epi16(layer2_chunk2, layer2_chunk5);
|
||||
__m128i layer3_chunk5 = _mm_unpackhi_epi16(layer2_chunk2, layer2_chunk5);
|
||||
|
||||
v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk3);
|
||||
v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk3);
|
||||
v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk4);
|
||||
v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk4);
|
||||
v_b0 = _mm_unpacklo_epi16(layer3_chunk2, layer3_chunk5);
|
||||
v_b1 = _mm_unpackhi_epi16(layer3_chunk2, layer3_chunk5);
|
||||
}
|
||||
|
||||
inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
|
||||
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
|
||||
{
|
||||
__m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_b0);
|
||||
__m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_b0);
|
||||
__m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_b1);
|
||||
__m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_b1);
|
||||
__m128i layer1_chunk4 = _mm_unpacklo_epi16(v_g0, v_a0);
|
||||
__m128i layer1_chunk5 = _mm_unpackhi_epi16(v_g0, v_a0);
|
||||
__m128i layer1_chunk6 = _mm_unpacklo_epi16(v_g1, v_a1);
|
||||
__m128i layer1_chunk7 = _mm_unpackhi_epi16(v_g1, v_a1);
|
||||
|
||||
__m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk4);
|
||||
__m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk4);
|
||||
__m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk5);
|
||||
__m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk5);
|
||||
__m128i layer2_chunk4 = _mm_unpacklo_epi16(layer1_chunk2, layer1_chunk6);
|
||||
__m128i layer2_chunk5 = _mm_unpackhi_epi16(layer1_chunk2, layer1_chunk6);
|
||||
__m128i layer2_chunk6 = _mm_unpacklo_epi16(layer1_chunk3, layer1_chunk7);
|
||||
__m128i layer2_chunk7 = _mm_unpackhi_epi16(layer1_chunk3, layer1_chunk7);
|
||||
|
||||
__m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk4);
|
||||
__m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk4);
|
||||
__m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk5);
|
||||
__m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk5);
|
||||
__m128i layer3_chunk4 = _mm_unpacklo_epi16(layer2_chunk2, layer2_chunk6);
|
||||
__m128i layer3_chunk5 = _mm_unpackhi_epi16(layer2_chunk2, layer2_chunk6);
|
||||
__m128i layer3_chunk6 = _mm_unpacklo_epi16(layer2_chunk3, layer2_chunk7);
|
||||
__m128i layer3_chunk7 = _mm_unpackhi_epi16(layer2_chunk3, layer2_chunk7);
|
||||
|
||||
v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk4);
|
||||
v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk4);
|
||||
v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk5);
|
||||
v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk5);
|
||||
v_b0 = _mm_unpacklo_epi16(layer3_chunk2, layer3_chunk6);
|
||||
v_b1 = _mm_unpackhi_epi16(layer3_chunk2, layer3_chunk6);
|
||||
v_a0 = _mm_unpacklo_epi16(layer3_chunk3, layer3_chunk7);
|
||||
v_a1 = _mm_unpackhi_epi16(layer3_chunk3, layer3_chunk7);
|
||||
}
|
||||
|
||||
#if CV_SSE4_1
|
||||
|
||||
inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
|
||||
{
|
||||
__m128i v_mask = _mm_set1_epi32(0x0000ffff);
|
||||
|
||||
__m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
|
||||
__m128i layer3_chunk2 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16));
|
||||
__m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
|
||||
__m128i layer3_chunk3 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16));
|
||||
|
||||
__m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
|
||||
__m128i layer2_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
|
||||
__m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
|
||||
__m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
|
||||
|
||||
__m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
|
||||
__m128i layer1_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
|
||||
__m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
|
||||
__m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
|
||||
|
||||
v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
|
||||
v_g0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
|
||||
v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
|
||||
v_g1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16));
|
||||
}
|
||||
|
||||
inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
|
||||
__m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
|
||||
{
|
||||
__m128i v_mask = _mm_set1_epi32(0x0000ffff);
|
||||
|
||||
__m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
|
||||
__m128i layer3_chunk3 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16));
|
||||
__m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
|
||||
__m128i layer3_chunk4 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16));
|
||||
__m128i layer3_chunk2 = _mm_packus_epi32(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask));
|
||||
__m128i layer3_chunk5 = _mm_packus_epi32(_mm_srli_epi32(v_b0, 16), _mm_srli_epi32(v_b1, 16));
|
||||
|
||||
__m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
|
||||
__m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
|
||||
__m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
|
||||
__m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
|
||||
__m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
|
||||
__m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16));
|
||||
|
||||
__m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
|
||||
__m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
|
||||
__m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
|
||||
__m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
|
||||
__m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
|
||||
__m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16));
|
||||
|
||||
v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
|
||||
v_g1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
|
||||
v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
|
||||
v_b0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16));
|
||||
v_g0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
|
||||
v_b1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk4, 16), _mm_srli_epi32(layer1_chunk5, 16));
|
||||
}
|
||||
|
||||
inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
|
||||
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
|
||||
{
|
||||
__m128i v_mask = _mm_set1_epi32(0x0000ffff);
|
||||
|
||||
__m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
|
||||
__m128i layer3_chunk4 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16));
|
||||
__m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
|
||||
__m128i layer3_chunk5 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16));
|
||||
__m128i layer3_chunk2 = _mm_packus_epi32(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask));
|
||||
__m128i layer3_chunk6 = _mm_packus_epi32(_mm_srli_epi32(v_b0, 16), _mm_srli_epi32(v_b1, 16));
|
||||
__m128i layer3_chunk3 = _mm_packus_epi32(_mm_and_si128(v_a0, v_mask), _mm_and_si128(v_a1, v_mask));
|
||||
__m128i layer3_chunk7 = _mm_packus_epi32(_mm_srli_epi32(v_a0, 16), _mm_srli_epi32(v_a1, 16));
|
||||
|
||||
__m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
|
||||
__m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
|
||||
__m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
|
||||
__m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
|
||||
__m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
|
||||
__m128i layer2_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16));
|
||||
__m128i layer2_chunk3 = _mm_packus_epi32(_mm_and_si128(layer3_chunk6, v_mask), _mm_and_si128(layer3_chunk7, v_mask));
|
||||
__m128i layer2_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk6, 16), _mm_srli_epi32(layer3_chunk7, 16));
|
||||
|
||||
__m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
|
||||
__m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
|
||||
__m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
|
||||
__m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
|
||||
__m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
|
||||
__m128i layer1_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16));
|
||||
__m128i layer1_chunk3 = _mm_packus_epi32(_mm_and_si128(layer2_chunk6, v_mask), _mm_and_si128(layer2_chunk7, v_mask));
|
||||
__m128i layer1_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk6, 16), _mm_srli_epi32(layer2_chunk7, 16));
|
||||
|
||||
v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
|
||||
v_b0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
|
||||
v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
|
||||
v_b1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16));
|
||||
v_g0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
|
||||
v_a0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk4, 16), _mm_srli_epi32(layer1_chunk5, 16));
|
||||
v_g1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk6, v_mask), _mm_and_si128(layer1_chunk7, v_mask));
|
||||
v_a1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk6, 16), _mm_srli_epi32(layer1_chunk7, 16));
|
||||
}
|
||||
|
||||
#endif // CV_SSE4_1
|
||||
|
||||
inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1)
|
||||
{
|
||||
__m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g0);
|
||||
__m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g0);
|
||||
__m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_g1);
|
||||
__m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_g1);
|
||||
|
||||
__m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk2);
|
||||
__m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk2);
|
||||
__m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk3);
|
||||
__m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk3);
|
||||
|
||||
v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk2);
|
||||
v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk2);
|
||||
v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk3);
|
||||
v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk3);
|
||||
}
|
||||
|
||||
inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
|
||||
__m128 & v_g1, __m128 & v_b0, __m128 & v_b1)
|
||||
{
|
||||
__m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g1);
|
||||
__m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g1);
|
||||
__m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b0);
|
||||
__m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_b0);
|
||||
__m128 layer1_chunk4 = _mm_unpacklo_ps(v_g0, v_b1);
|
||||
__m128 layer1_chunk5 = _mm_unpackhi_ps(v_g0, v_b1);
|
||||
|
||||
__m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk3);
|
||||
__m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk3);
|
||||
__m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk4);
|
||||
__m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk4);
|
||||
__m128 layer2_chunk4 = _mm_unpacklo_ps(layer1_chunk2, layer1_chunk5);
|
||||
__m128 layer2_chunk5 = _mm_unpackhi_ps(layer1_chunk2, layer1_chunk5);
|
||||
|
||||
v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk3);
|
||||
v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk3);
|
||||
v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk4);
|
||||
v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk4);
|
||||
v_b0 = _mm_unpacklo_ps(layer2_chunk2, layer2_chunk5);
|
||||
v_b1 = _mm_unpackhi_ps(layer2_chunk2, layer2_chunk5);
|
||||
}
|
||||
|
||||
inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1,
|
||||
__m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1)
|
||||
{
|
||||
__m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_b0);
|
||||
__m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_b0);
|
||||
__m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b1);
|
||||
__m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_b1);
|
||||
__m128 layer1_chunk4 = _mm_unpacklo_ps(v_g0, v_a0);
|
||||
__m128 layer1_chunk5 = _mm_unpackhi_ps(v_g0, v_a0);
|
||||
__m128 layer1_chunk6 = _mm_unpacklo_ps(v_g1, v_a1);
|
||||
__m128 layer1_chunk7 = _mm_unpackhi_ps(v_g1, v_a1);
|
||||
|
||||
__m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk4);
|
||||
__m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk4);
|
||||
__m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk5);
|
||||
__m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk5);
|
||||
__m128 layer2_chunk4 = _mm_unpacklo_ps(layer1_chunk2, layer1_chunk6);
|
||||
__m128 layer2_chunk5 = _mm_unpackhi_ps(layer1_chunk2, layer1_chunk6);
|
||||
__m128 layer2_chunk6 = _mm_unpacklo_ps(layer1_chunk3, layer1_chunk7);
|
||||
__m128 layer2_chunk7 = _mm_unpackhi_ps(layer1_chunk3, layer1_chunk7);
|
||||
|
||||
v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk4);
|
||||
v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk4);
|
||||
v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk5);
|
||||
v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk5);
|
||||
v_b0 = _mm_unpacklo_ps(layer2_chunk2, layer2_chunk6);
|
||||
v_b1 = _mm_unpackhi_ps(layer2_chunk2, layer2_chunk6);
|
||||
v_a0 = _mm_unpacklo_ps(layer2_chunk3, layer2_chunk7);
|
||||
v_a1 = _mm_unpackhi_ps(layer2_chunk3, layer2_chunk7);
|
||||
}
|
||||
|
||||
inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1)
|
||||
{
|
||||
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
|
||||
|
||||
__m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
|
||||
__m128 layer2_chunk2 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
|
||||
__m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo);
|
||||
__m128 layer2_chunk3 = _mm_shuffle_ps(v_g0, v_g1, mask_hi);
|
||||
|
||||
__m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo);
|
||||
__m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi);
|
||||
__m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo);
|
||||
__m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi);
|
||||
|
||||
v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo);
|
||||
v_g0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi);
|
||||
v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo);
|
||||
v_g1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi);
|
||||
}
|
||||
|
||||
inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
|
||||
__m128 & v_g1, __m128 & v_b0, __m128 & v_b1)
|
||||
{
|
||||
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
|
||||
|
||||
__m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
|
||||
__m128 layer2_chunk3 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
|
||||
__m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo);
|
||||
__m128 layer2_chunk4 = _mm_shuffle_ps(v_g0, v_g1, mask_hi);
|
||||
__m128 layer2_chunk2 = _mm_shuffle_ps(v_b0, v_b1, mask_lo);
|
||||
__m128 layer2_chunk5 = _mm_shuffle_ps(v_b0, v_b1, mask_hi);
|
||||
|
||||
__m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo);
|
||||
__m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi);
|
||||
__m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo);
|
||||
__m128 layer1_chunk4 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi);
|
||||
__m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_lo);
|
||||
__m128 layer1_chunk5 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_hi);
|
||||
|
||||
v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo);
|
||||
v_g1 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi);
|
||||
v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo);
|
||||
v_b0 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi);
|
||||
v_g0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_lo);
|
||||
v_b1 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_hi);
|
||||
}
|
||||
|
||||
inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1,
|
||||
__m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1)
|
||||
{
|
||||
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
|
||||
|
||||
__m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
|
||||
__m128 layer2_chunk4 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
|
||||
__m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo);
|
||||
__m128 layer2_chunk5 = _mm_shuffle_ps(v_g0, v_g1, mask_hi);
|
||||
__m128 layer2_chunk2 = _mm_shuffle_ps(v_b0, v_b1, mask_lo);
|
||||
__m128 layer2_chunk6 = _mm_shuffle_ps(v_b0, v_b1, mask_hi);
|
||||
__m128 layer2_chunk3 = _mm_shuffle_ps(v_a0, v_a1, mask_lo);
|
||||
__m128 layer2_chunk7 = _mm_shuffle_ps(v_a0, v_a1, mask_hi);
|
||||
|
||||
__m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo);
|
||||
__m128 layer1_chunk4 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi);
|
||||
__m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo);
|
||||
__m128 layer1_chunk5 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi);
|
||||
__m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_lo);
|
||||
__m128 layer1_chunk6 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_hi);
|
||||
__m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk6, layer2_chunk7, mask_lo);
|
||||
__m128 layer1_chunk7 = _mm_shuffle_ps(layer2_chunk6, layer2_chunk7, mask_hi);
|
||||
|
||||
v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo);
|
||||
v_b0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi);
|
||||
v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo);
|
||||
v_b1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi);
|
||||
v_g0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_lo);
|
||||
v_a0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_hi);
|
||||
v_g1 = _mm_shuffle_ps(layer1_chunk6, layer1_chunk7, mask_lo);
|
||||
v_a1 = _mm_shuffle_ps(layer1_chunk6, layer1_chunk7, mask_hi);
|
||||
}
|
||||
|
||||
#endif // CV_SSE2
|
||||
|
||||
#endif //__OPENCV_CORE_SSE_UTILS_HPP__
|
326
3rdparty/include/opencv2/core/traits.hpp
vendored
Normal file
326
3rdparty/include/opencv2/core/traits.hpp
vendored
Normal file
@ -0,0 +1,326 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_TRAITS_HPP__
|
||||
#define __OPENCV_CORE_TRAITS_HPP__
|
||||
|
||||
#include "opencv2/core/cvdef.h"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
//! @addtogroup core_basic
|
||||
//! @{
|
||||
|
||||
/** @brief Template "trait" class for OpenCV primitive data types.
|
||||
|
||||
A primitive OpenCV data type is one of unsigned char, bool, signed char, unsigned short, signed
|
||||
short, int, float, double, or a tuple of values of one of these types, where all the values in the
|
||||
tuple have the same type. Any primitive type from the list can be defined by an identifier in the
|
||||
form CV_\<bit-depth\>{U|S|F}C(\<number_of_channels\>), for example: uchar \~ CV_8UC1, 3-element
|
||||
floating-point tuple \~ CV_32FC3, and so on. A universal OpenCV structure that is able to store a
|
||||
single instance of such a primitive data type is Vec. Multiple instances of such a type can be
|
||||
stored in a std::vector, Mat, Mat_, SparseMat, SparseMat_, or any other container that is able to
|
||||
store Vec instances.
|
||||
|
||||
The DataType class is basically used to provide a description of such primitive data types without
|
||||
adding any fields or methods to the corresponding classes (and it is actually impossible to add
|
||||
anything to primitive C/C++ data types). This technique is known in C++ as class traits. It is not
|
||||
DataType itself that is used but its specialized versions, such as:
|
||||
@code
|
||||
template<> class DataType<uchar>
|
||||
{
|
||||
typedef uchar value_type;
|
||||
typedef int work_type;
|
||||
typedef uchar channel_type;
|
||||
enum { channel_type = CV_8U, channels = 1, fmt='u', type = CV_8U };
|
||||
};
|
||||
...
|
||||
template<typename _Tp> DataType<std::complex<_Tp> >
|
||||
{
|
||||
typedef std::complex<_Tp> value_type;
|
||||
typedef std::complex<_Tp> work_type;
|
||||
typedef _Tp channel_type;
|
||||
// DataDepth is another helper trait class
|
||||
enum { depth = DataDepth<_Tp>::value, channels=2,
|
||||
fmt=(channels-1)*256+DataDepth<_Tp>::fmt,
|
||||
type=CV_MAKETYPE(depth, channels) };
|
||||
};
|
||||
...
|
||||
@endcode
|
||||
The main purpose of this class is to convert compilation-time type information to an
|
||||
OpenCV-compatible data type identifier, for example:
|
||||
@code
|
||||
// allocates a 30x40 floating-point matrix
|
||||
Mat A(30, 40, DataType<float>::type);
|
||||
|
||||
Mat B = Mat_<std::complex<double> >(3, 3);
|
||||
// the statement below will print 6, 2 , that is depth == CV_64F, channels == 2
|
||||
cout << B.depth() << ", " << B.channels() << endl;
|
||||
@endcode
|
||||
So, such traits are used to tell OpenCV which data type you are working with, even if such a type is
|
||||
not native to OpenCV. For example, the matrix B initialization above is compiled because OpenCV
|
||||
defines the proper specialized template class DataType\<complex\<_Tp\> \> . This mechanism is also
|
||||
useful (and used in OpenCV this way) for generic algorithms implementations.
|
||||
*/
|
||||
template<typename _Tp> class DataType
|
||||
{
|
||||
public:
|
||||
typedef _Tp value_type;
|
||||
typedef value_type work_type;
|
||||
typedef value_type channel_type;
|
||||
typedef value_type vec_type;
|
||||
enum { generic_type = 1,
|
||||
depth = -1,
|
||||
channels = 1,
|
||||
fmt = 0,
|
||||
type = CV_MAKETYPE(depth, channels)
|
||||
};
|
||||
};
|
||||
|
||||
template<> class DataType<bool>
|
||||
{
|
||||
public:
|
||||
typedef bool value_type;
|
||||
typedef int work_type;
|
||||
typedef value_type channel_type;
|
||||
typedef value_type vec_type;
|
||||
enum { generic_type = 0,
|
||||
depth = CV_8U,
|
||||
channels = 1,
|
||||
fmt = (int)'u',
|
||||
type = CV_MAKETYPE(depth, channels)
|
||||
};
|
||||
};
|
||||
|
||||
template<> class DataType<uchar>
|
||||
{
|
||||
public:
|
||||
typedef uchar value_type;
|
||||
typedef int work_type;
|
||||
typedef value_type channel_type;
|
||||
typedef value_type vec_type;
|
||||
enum { generic_type = 0,
|
||||
depth = CV_8U,
|
||||
channels = 1,
|
||||
fmt = (int)'u',
|
||||
type = CV_MAKETYPE(depth, channels)
|
||||
};
|
||||
};
|
||||
|
||||
template<> class DataType<schar>
|
||||
{
|
||||
public:
|
||||
typedef schar value_type;
|
||||
typedef int work_type;
|
||||
typedef value_type channel_type;
|
||||
typedef value_type vec_type;
|
||||
enum { generic_type = 0,
|
||||
depth = CV_8S,
|
||||
channels = 1,
|
||||
fmt = (int)'c',
|
||||
type = CV_MAKETYPE(depth, channels)
|
||||
};
|
||||
};
|
||||
|
||||
template<> class DataType<char>
|
||||
{
|
||||
public:
|
||||
typedef schar value_type;
|
||||
typedef int work_type;
|
||||
typedef value_type channel_type;
|
||||
typedef value_type vec_type;
|
||||
enum { generic_type = 0,
|
||||
depth = CV_8S,
|
||||
channels = 1,
|
||||
fmt = (int)'c',
|
||||
type = CV_MAKETYPE(depth, channels)
|
||||
};
|
||||
};
|
||||
|
||||
template<> class DataType<ushort>
|
||||
{
|
||||
public:
|
||||
typedef ushort value_type;
|
||||
typedef int work_type;
|
||||
typedef value_type channel_type;
|
||||
typedef value_type vec_type;
|
||||
enum { generic_type = 0,
|
||||
depth = CV_16U,
|
||||
channels = 1,
|
||||
fmt = (int)'w',
|
||||
type = CV_MAKETYPE(depth, channels)
|
||||
};
|
||||
};
|
||||
|
||||
template<> class DataType<short>
|
||||
{
|
||||
public:
|
||||
typedef short value_type;
|
||||
typedef int work_type;
|
||||
typedef value_type channel_type;
|
||||
typedef value_type vec_type;
|
||||
enum { generic_type = 0,
|
||||
depth = CV_16S,
|
||||
channels = 1,
|
||||
fmt = (int)'s',
|
||||
type = CV_MAKETYPE(depth, channels)
|
||||
};
|
||||
};
|
||||
|
||||
template<> class DataType<int>
|
||||
{
|
||||
public:
|
||||
typedef int value_type;
|
||||
typedef value_type work_type;
|
||||
typedef value_type channel_type;
|
||||
typedef value_type vec_type;
|
||||
enum { generic_type = 0,
|
||||
depth = CV_32S,
|
||||
channels = 1,
|
||||
fmt = (int)'i',
|
||||
type = CV_MAKETYPE(depth, channels)
|
||||
};
|
||||
};
|
||||
|
||||
template<> class DataType<float>
|
||||
{
|
||||
public:
|
||||
typedef float value_type;
|
||||
typedef value_type work_type;
|
||||
typedef value_type channel_type;
|
||||
typedef value_type vec_type;
|
||||
enum { generic_type = 0,
|
||||
depth = CV_32F,
|
||||
channels = 1,
|
||||
fmt = (int)'f',
|
||||
type = CV_MAKETYPE(depth, channels)
|
||||
};
|
||||
};
|
||||
|
||||
template<> class DataType<double>
|
||||
{
|
||||
public:
|
||||
typedef double value_type;
|
||||
typedef value_type work_type;
|
||||
typedef value_type channel_type;
|
||||
typedef value_type vec_type;
|
||||
enum { generic_type = 0,
|
||||
depth = CV_64F,
|
||||
channels = 1,
|
||||
fmt = (int)'d',
|
||||
type = CV_MAKETYPE(depth, channels)
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** @brief A helper class for cv::DataType
|
||||
|
||||
The class is specialized for each fundamental numerical data type supported by OpenCV. It provides
|
||||
DataDepth<T>::value constant.
|
||||
*/
|
||||
template<typename _Tp> class DataDepth
|
||||
{
|
||||
public:
|
||||
enum
|
||||
{
|
||||
value = DataType<_Tp>::depth,
|
||||
fmt = DataType<_Tp>::fmt
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
template<int _depth> class TypeDepth
|
||||
{
|
||||
enum { depth = CV_USRTYPE1 };
|
||||
typedef void value_type;
|
||||
};
|
||||
|
||||
template<> class TypeDepth<CV_8U>
|
||||
{
|
||||
enum { depth = CV_8U };
|
||||
typedef uchar value_type;
|
||||
};
|
||||
|
||||
template<> class TypeDepth<CV_8S>
|
||||
{
|
||||
enum { depth = CV_8S };
|
||||
typedef schar value_type;
|
||||
};
|
||||
|
||||
template<> class TypeDepth<CV_16U>
|
||||
{
|
||||
enum { depth = CV_16U };
|
||||
typedef ushort value_type;
|
||||
};
|
||||
|
||||
template<> class TypeDepth<CV_16S>
|
||||
{
|
||||
enum { depth = CV_16S };
|
||||
typedef short value_type;
|
||||
};
|
||||
|
||||
template<> class TypeDepth<CV_32S>
|
||||
{
|
||||
enum { depth = CV_32S };
|
||||
typedef int value_type;
|
||||
};
|
||||
|
||||
template<> class TypeDepth<CV_32F>
|
||||
{
|
||||
enum { depth = CV_32F };
|
||||
typedef float value_type;
|
||||
};
|
||||
|
||||
template<> class TypeDepth<CV_64F>
|
||||
{
|
||||
enum { depth = CV_64F };
|
||||
typedef double value_type;
|
||||
};
|
||||
|
||||
//! @}
|
||||
|
||||
} // cv
|
||||
|
||||
#endif // __OPENCV_CORE_TRAITS_HPP__
|
2228
3rdparty/include/opencv2/core/types.hpp
vendored
Normal file
2228
3rdparty/include/opencv2/core/types.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1834
3rdparty/include/opencv2/core/types_c.h
vendored
Normal file
1834
3rdparty/include/opencv2/core/types_c.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
889
3rdparty/include/opencv2/core/utility.hpp
vendored
Normal file
889
3rdparty/include/opencv2/core/utility.hpp
vendored
Normal file
@ -0,0 +1,889 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_UTILITY_H__
|
||||
#define __OPENCV_CORE_UTILITY_H__
|
||||
|
||||
#ifndef __cplusplus
|
||||
# error utility.hpp header must be compiled as C++
|
||||
#endif
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
#ifdef CV_COLLECT_IMPL_DATA
|
||||
CV_EXPORTS void setImpl(int flags); // set implementation flags and reset storage arrays
|
||||
CV_EXPORTS void addImpl(int flag, const char* func = 0); // add implementation and function name to storage arrays
|
||||
// Get stored implementation flags and fucntions names arrays
|
||||
// Each implementation entry correspond to function name entry, so you can find which implementation was executed in which fucntion
|
||||
CV_EXPORTS int getImpl(std::vector<int> &impl, std::vector<String> &funName);
|
||||
|
||||
CV_EXPORTS bool useCollection(); // return implementation collection state
|
||||
CV_EXPORTS void setUseCollection(bool flag); // set implementation collection state
|
||||
|
||||
#define CV_IMPL_PLAIN 0x01 // native CPU OpenCV implementation
|
||||
#define CV_IMPL_OCL 0x02 // OpenCL implementation
|
||||
#define CV_IMPL_IPP 0x04 // IPP implementation
|
||||
#define CV_IMPL_MT 0x10 // multithreaded implementation
|
||||
|
||||
#define CV_IMPL_ADD(impl) \
|
||||
if(cv::useCollection()) \
|
||||
{ \
|
||||
cv::addImpl(impl, CV_Func); \
|
||||
}
|
||||
#else
|
||||
#define CV_IMPL_ADD(impl)
|
||||
#endif
|
||||
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
/** @brief Automatically Allocated Buffer Class
|
||||
|
||||
The class is used for temporary buffers in functions and methods.
|
||||
If a temporary buffer is usually small (a few K's of memory),
|
||||
but its size depends on the parameters, it makes sense to create a small
|
||||
fixed-size array on stack and use it if it's large enough. If the required buffer size
|
||||
is larger than the fixed size, another buffer of sufficient size is allocated dynamically
|
||||
and released after the processing. Therefore, in typical cases, when the buffer size is small,
|
||||
there is no overhead associated with malloc()/free().
|
||||
At the same time, there is no limit on the size of processed data.
|
||||
|
||||
This is what AutoBuffer does. The template takes 2 parameters - type of the buffer elements and
|
||||
the number of stack-allocated elements. Here is how the class is used:
|
||||
|
||||
\code
|
||||
void my_func(const cv::Mat& m)
|
||||
{
|
||||
cv::AutoBuffer<float> buf; // create automatic buffer containing 1000 floats
|
||||
|
||||
buf.allocate(m.rows); // if m.rows <= 1000, the pre-allocated buffer is used,
|
||||
// otherwise the buffer of "m.rows" floats will be allocated
|
||||
// dynamically and deallocated in cv::AutoBuffer destructor
|
||||
...
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
template<typename _Tp, size_t fixed_size = 1024/sizeof(_Tp)+8> class AutoBuffer
|
||||
{
|
||||
public:
|
||||
typedef _Tp value_type;
|
||||
|
||||
//! the default constructor
|
||||
AutoBuffer();
|
||||
//! constructor taking the real buffer size
|
||||
AutoBuffer(size_t _size);
|
||||
|
||||
//! the copy constructor
|
||||
AutoBuffer(const AutoBuffer<_Tp, fixed_size>& buf);
|
||||
//! the assignment operator
|
||||
AutoBuffer<_Tp, fixed_size>& operator = (const AutoBuffer<_Tp, fixed_size>& buf);
|
||||
|
||||
//! destructor. calls deallocate()
|
||||
~AutoBuffer();
|
||||
|
||||
//! allocates the new buffer of size _size. if the _size is small enough, stack-allocated buffer is used
|
||||
void allocate(size_t _size);
|
||||
//! deallocates the buffer if it was dynamically allocated
|
||||
void deallocate();
|
||||
//! resizes the buffer and preserves the content
|
||||
void resize(size_t _size);
|
||||
//! returns the current buffer size
|
||||
size_t size() const;
|
||||
//! returns pointer to the real buffer, stack-allocated or head-allocated
|
||||
operator _Tp* ();
|
||||
//! returns read-only pointer to the real buffer, stack-allocated or head-allocated
|
||||
operator const _Tp* () const;
|
||||
|
||||
protected:
|
||||
//! pointer to the real buffer, can point to buf if the buffer is small enough
|
||||
_Tp* ptr;
|
||||
//! size of the real buffer
|
||||
size_t sz;
|
||||
//! pre-allocated buffer. At least 1 element to confirm C++ standard reqirements
|
||||
_Tp buf[(fixed_size > 0) ? fixed_size : 1];
|
||||
};
|
||||
|
||||
/** @brief Sets/resets the break-on-error mode.
|
||||
|
||||
When the break-on-error mode is set, the default error handler issues a hardware exception, which
|
||||
can make debugging more convenient.
|
||||
|
||||
\return the previous state
|
||||
*/
|
||||
CV_EXPORTS bool setBreakOnError(bool flag);
|
||||
|
||||
extern "C" typedef int (*ErrorCallback)( int status, const char* func_name,
|
||||
const char* err_msg, const char* file_name,
|
||||
int line, void* userdata );
|
||||
|
||||
|
||||
/** @brief Sets the new error handler and the optional user data.
|
||||
|
||||
The function sets the new error handler, called from cv::error().
|
||||
|
||||
\param errCallback the new error handler. If NULL, the default error handler is used.
|
||||
\param userdata the optional user data pointer, passed to the callback.
|
||||
\param prevUserdata the optional output parameter where the previous user data pointer is stored
|
||||
|
||||
\return the previous error handler
|
||||
*/
|
||||
CV_EXPORTS ErrorCallback redirectError( ErrorCallback errCallback, void* userdata=0, void** prevUserdata=0);
|
||||
|
||||
/** @brief Returns a text string formatted using the printf-like expression.
|
||||
|
||||
The function acts like sprintf but forms and returns an STL string. It can be used to form an error
|
||||
message in the Exception constructor.
|
||||
@param fmt printf-compatible formatting specifiers.
|
||||
*/
|
||||
CV_EXPORTS String format( const char* fmt, ... );
|
||||
CV_EXPORTS String tempfile( const char* suffix = 0);
|
||||
CV_EXPORTS void glob(String pattern, std::vector<String>& result, bool recursive = false);
|
||||
|
||||
/** @brief OpenCV will try to set the number of threads for the next parallel region.
|
||||
|
||||
If threads == 0, OpenCV will disable threading optimizations and run all it's functions
|
||||
sequentially. Passing threads \< 0 will reset threads number to system default. This function must
|
||||
be called outside of parallel region.
|
||||
|
||||
OpenCV will try to run it's functions with specified threads number, but some behaviour differs from
|
||||
framework:
|
||||
- `TBB` – User-defined parallel constructions will run with the same threads number, if
|
||||
another does not specified. If late on user creates own scheduler, OpenCV will be use it.
|
||||
- `OpenMP` – No special defined behaviour.
|
||||
- `Concurrency` – If threads == 1, OpenCV will disable threading optimizations and run it's
|
||||
functions sequentially.
|
||||
- `GCD` – Supports only values \<= 0.
|
||||
- `C=` – No special defined behaviour.
|
||||
@param nthreads Number of threads used by OpenCV.
|
||||
@sa getNumThreads, getThreadNum
|
||||
*/
|
||||
CV_EXPORTS void setNumThreads(int nthreads);
|
||||
|
||||
/** @brief Returns the number of threads used by OpenCV for parallel regions.
|
||||
|
||||
Always returns 1 if OpenCV is built without threading support.
|
||||
|
||||
The exact meaning of return value depends on the threading framework used by OpenCV library:
|
||||
- `TBB` – The number of threads, that OpenCV will try to use for parallel regions. If there is
|
||||
any tbb::thread_scheduler_init in user code conflicting with OpenCV, then function returns
|
||||
default number of threads used by TBB library.
|
||||
- `OpenMP` – An upper bound on the number of threads that could be used to form a new team.
|
||||
- `Concurrency` – The number of threads, that OpenCV will try to use for parallel regions.
|
||||
- `GCD` – Unsupported; returns the GCD thread pool limit (512) for compatibility.
|
||||
- `C=` – The number of threads, that OpenCV will try to use for parallel regions, if before
|
||||
called setNumThreads with threads \> 0, otherwise returns the number of logical CPUs,
|
||||
available for the process.
|
||||
@sa setNumThreads, getThreadNum
|
||||
*/
|
||||
CV_EXPORTS int getNumThreads();
|
||||
|
||||
/** @brief Returns the index of the currently executed thread within the current parallel region. Always
|
||||
returns 0 if called outside of parallel region.
|
||||
|
||||
The exact meaning of return value depends on the threading framework used by OpenCV library:
|
||||
- `TBB` – Unsupported with current 4.1 TBB release. May be will be supported in future.
|
||||
- `OpenMP` – The thread number, within the current team, of the calling thread.
|
||||
- `Concurrency` – An ID for the virtual processor that the current context is executing on (0
|
||||
for master thread and unique number for others, but not necessary 1,2,3,...).
|
||||
- `GCD` – System calling thread's ID. Never returns 0 inside parallel region.
|
||||
- `C=` – The index of the current parallel task.
|
||||
@sa setNumThreads, getNumThreads
|
||||
*/
|
||||
CV_EXPORTS int getThreadNum();
|
||||
|
||||
/** @brief Returns full configuration time cmake output.
|
||||
|
||||
Returned value is raw cmake output including version control system revision, compiler version,
|
||||
compiler flags, enabled modules and third party libraries, etc. Output format depends on target
|
||||
architecture.
|
||||
*/
|
||||
CV_EXPORTS_W const String& getBuildInformation();
|
||||
|
||||
/** @brief Returns the number of ticks.
|
||||
|
||||
The function returns the number of ticks after the certain event (for example, when the machine was
|
||||
turned on). It can be used to initialize RNG or to measure a function execution time by reading the
|
||||
tick count before and after the function call. See also the tick frequency.
|
||||
*/
|
||||
CV_EXPORTS_W int64 getTickCount();
|
||||
|
||||
/** @brief Returns the number of ticks per second.
|
||||
|
||||
The function returns the number of ticks per second. That is, the following code computes the
|
||||
execution time in seconds:
|
||||
@code
|
||||
double t = (double)getTickCount();
|
||||
// do something ...
|
||||
t = ((double)getTickCount() - t)/getTickFrequency();
|
||||
@endcode
|
||||
*/
|
||||
CV_EXPORTS_W double getTickFrequency();
|
||||
|
||||
/** @brief Returns the number of CPU ticks.
|
||||
|
||||
The function returns the current number of CPU ticks on some architectures (such as x86, x64,
|
||||
PowerPC). On other platforms the function is equivalent to getTickCount. It can also be used for
|
||||
very accurate time measurements, as well as for RNG initialization. Note that in case of multi-CPU
|
||||
systems a thread, from which getCPUTickCount is called, can be suspended and resumed at another CPU
|
||||
with its own counter. So, theoretically (and practically) the subsequent calls to the function do
|
||||
not necessary return the monotonously increasing values. Also, since a modern CPU varies the CPU
|
||||
frequency depending on the load, the number of CPU clocks spent in some code cannot be directly
|
||||
converted to time units. Therefore, getTickCount is generally a preferable solution for measuring
|
||||
execution time.
|
||||
*/
|
||||
CV_EXPORTS_W int64 getCPUTickCount();
|
||||
|
||||
/** @brief Available CPU features.
|
||||
|
||||
remember to keep this list identical to the one in cvdef.h
|
||||
*/
|
||||
enum CpuFeatures {
|
||||
CPU_MMX = 1,
|
||||
CPU_SSE = 2,
|
||||
CPU_SSE2 = 3,
|
||||
CPU_SSE3 = 4,
|
||||
CPU_SSSE3 = 5,
|
||||
CPU_SSE4_1 = 6,
|
||||
CPU_SSE4_2 = 7,
|
||||
CPU_POPCNT = 8,
|
||||
|
||||
CPU_AVX = 10,
|
||||
CPU_AVX2 = 11,
|
||||
CPU_FMA3 = 12,
|
||||
|
||||
CPU_AVX_512F = 13,
|
||||
CPU_AVX_512BW = 14,
|
||||
CPU_AVX_512CD = 15,
|
||||
CPU_AVX_512DQ = 16,
|
||||
CPU_AVX_512ER = 17,
|
||||
CPU_AVX_512IFMA512 = 18,
|
||||
CPU_AVX_512PF = 19,
|
||||
CPU_AVX_512VBMI = 20,
|
||||
CPU_AVX_512VL = 21,
|
||||
|
||||
CPU_NEON = 100
|
||||
};
|
||||
|
||||
/** @brief Returns true if the specified feature is supported by the host hardware.
|
||||
|
||||
The function returns true if the host hardware supports the specified feature. When user calls
|
||||
setUseOptimized(false), the subsequent calls to checkHardwareSupport() will return false until
|
||||
setUseOptimized(true) is called. This way user can dynamically switch on and off the optimized code
|
||||
in OpenCV.
|
||||
@param feature The feature of interest, one of cv::CpuFeatures
|
||||
*/
|
||||
CV_EXPORTS_W bool checkHardwareSupport(int feature);
|
||||
|
||||
/** @brief Returns the number of logical CPUs available for the process.
|
||||
*/
|
||||
CV_EXPORTS_W int getNumberOfCPUs();
|
||||
|
||||
|
||||
/** @brief Aligns a pointer to the specified number of bytes.
|
||||
|
||||
The function returns the aligned pointer of the same type as the input pointer:
|
||||
\f[\texttt{(\_Tp*)(((size\_t)ptr + n-1) \& -n)}\f]
|
||||
@param ptr Aligned pointer.
|
||||
@param n Alignment size that must be a power of two.
|
||||
*/
|
||||
template<typename _Tp> static inline _Tp* alignPtr(_Tp* ptr, int n=(int)sizeof(_Tp))
|
||||
{
|
||||
return (_Tp*)(((size_t)ptr + n-1) & -n);
|
||||
}
|
||||
|
||||
/** @brief Aligns a buffer size to the specified number of bytes.
|
||||
|
||||
The function returns the minimum number that is greater or equal to sz and is divisible by n :
|
||||
\f[\texttt{(sz + n-1) \& -n}\f]
|
||||
@param sz Buffer size to align.
|
||||
@param n Alignment size that must be a power of two.
|
||||
*/
|
||||
static inline size_t alignSize(size_t sz, int n)
|
||||
{
|
||||
CV_DbgAssert((n & (n - 1)) == 0); // n is a power of 2
|
||||
return (sz + n-1) & -n;
|
||||
}
|
||||
|
||||
/** @brief Enables or disables the optimized code.
|
||||
|
||||
The function can be used to dynamically turn on and off optimized code (code that uses SSE2, AVX,
|
||||
and other instructions on the platforms that support it). It sets a global flag that is further
|
||||
checked by OpenCV functions. Since the flag is not checked in the inner OpenCV loops, it is only
|
||||
safe to call the function on the very top level in your application where you can be sure that no
|
||||
other OpenCV function is currently executed.
|
||||
|
||||
By default, the optimized code is enabled unless you disable it in CMake. The current status can be
|
||||
retrieved using useOptimized.
|
||||
@param onoff The boolean flag specifying whether the optimized code should be used (onoff=true)
|
||||
or not (onoff=false).
|
||||
*/
|
||||
CV_EXPORTS_W void setUseOptimized(bool onoff);
|
||||
|
||||
/** @brief Returns the status of optimized code usage.
|
||||
|
||||
The function returns true if the optimized code is enabled. Otherwise, it returns false.
|
||||
*/
|
||||
CV_EXPORTS_W bool useOptimized();
|
||||
|
||||
static inline size_t getElemSize(int type) { return CV_ELEM_SIZE(type); }
|
||||
|
||||
/////////////////////////////// Parallel Primitives //////////////////////////////////
|
||||
|
||||
/** @brief Base class for parallel data processors
|
||||
*/
|
||||
class CV_EXPORTS ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
virtual ~ParallelLoopBody();
|
||||
virtual void operator() (const Range& range) const = 0;
|
||||
};
|
||||
|
||||
/** @brief Parallel data processor
|
||||
*/
|
||||
CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes=-1.);
|
||||
|
||||
/////////////////////////////// forEach method of cv::Mat ////////////////////////////
|
||||
template<typename _Tp, typename Functor> inline
|
||||
void Mat::forEach_impl(const Functor& operation) {
|
||||
if (false) {
|
||||
operation(*reinterpret_cast<_Tp*>(0), reinterpret_cast<int*>(NULL));
|
||||
// If your compiler fail in this line.
|
||||
// Please check that your functor signature is
|
||||
// (_Tp&, const int*) <- multidimential
|
||||
// or (_Tp&, void*) <- in case of you don't need current idx.
|
||||
}
|
||||
|
||||
CV_Assert(this->total() / this->size[this->dims - 1] <= INT_MAX);
|
||||
const int LINES = static_cast<int>(this->total() / this->size[this->dims - 1]);
|
||||
|
||||
class PixelOperationWrapper :public ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
PixelOperationWrapper(Mat_<_Tp>* const frame, const Functor& _operation)
|
||||
: mat(frame), op(_operation) {};
|
||||
virtual ~PixelOperationWrapper(){};
|
||||
// ! Overloaded virtual operator
|
||||
// convert range call to row call.
|
||||
virtual void operator()(const Range &range) const {
|
||||
const int DIMS = mat->dims;
|
||||
const int COLS = mat->size[DIMS - 1];
|
||||
if (DIMS <= 2) {
|
||||
for (int row = range.start; row < range.end; ++row) {
|
||||
this->rowCall2(row, COLS);
|
||||
}
|
||||
} else {
|
||||
std::vector<int> idx(COLS); /// idx is modified in this->rowCall
|
||||
idx[DIMS - 2] = range.start - 1;
|
||||
|
||||
for (int line_num = range.start; line_num < range.end; ++line_num) {
|
||||
idx[DIMS - 2]++;
|
||||
for (int i = DIMS - 2; i >= 0; --i) {
|
||||
if (idx[i] >= mat->size[i]) {
|
||||
idx[i - 1] += idx[i] / mat->size[i];
|
||||
idx[i] %= mat->size[i];
|
||||
continue; // carry-over;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
this->rowCall(&idx[0], COLS, DIMS);
|
||||
}
|
||||
}
|
||||
};
|
||||
private:
|
||||
Mat_<_Tp>* const mat;
|
||||
const Functor op;
|
||||
// ! Call operator for each elements in this row.
|
||||
inline void rowCall(int* const idx, const int COLS, const int DIMS) const {
|
||||
int &col = idx[DIMS - 1];
|
||||
col = 0;
|
||||
_Tp* pixel = &(mat->template at<_Tp>(idx));
|
||||
|
||||
while (col < COLS) {
|
||||
op(*pixel, const_cast<const int*>(idx));
|
||||
pixel++; col++;
|
||||
}
|
||||
col = 0;
|
||||
}
|
||||
// ! Call operator for each elements in this row. 2d mat special version.
|
||||
inline void rowCall2(const int row, const int COLS) const {
|
||||
union Index{
|
||||
int body[2];
|
||||
operator const int*() const {
|
||||
return reinterpret_cast<const int*>(this);
|
||||
}
|
||||
int& operator[](const int i) {
|
||||
return body[i];
|
||||
}
|
||||
} idx = {{row, 0}};
|
||||
// Special union is needed to avoid
|
||||
// "error: array subscript is above array bounds [-Werror=array-bounds]"
|
||||
// when call the functor `op` such that access idx[3].
|
||||
|
||||
_Tp* pixel = &(mat->template at<_Tp>(idx));
|
||||
const _Tp* const pixel_end = pixel + COLS;
|
||||
while(pixel < pixel_end) {
|
||||
op(*pixel++, static_cast<const int*>(idx));
|
||||
idx[1]++;
|
||||
}
|
||||
};
|
||||
PixelOperationWrapper& operator=(const PixelOperationWrapper &) {
|
||||
CV_Assert(false);
|
||||
// We can not remove this implementation because Visual Studio warning C4822.
|
||||
return *this;
|
||||
};
|
||||
};
|
||||
|
||||
parallel_for_(cv::Range(0, LINES), PixelOperationWrapper(reinterpret_cast<Mat_<_Tp>*>(this), operation));
|
||||
}
|
||||
|
||||
/////////////////////////// Synchronization Primitives ///////////////////////////////
|
||||
|
||||
class CV_EXPORTS Mutex
|
||||
{
|
||||
public:
|
||||
Mutex();
|
||||
~Mutex();
|
||||
Mutex(const Mutex& m);
|
||||
Mutex& operator = (const Mutex& m);
|
||||
|
||||
void lock();
|
||||
bool trylock();
|
||||
void unlock();
|
||||
|
||||
struct Impl;
|
||||
protected:
|
||||
Impl* impl;
|
||||
};
|
||||
|
||||
class CV_EXPORTS AutoLock
|
||||
{
|
||||
public:
|
||||
AutoLock(Mutex& m) : mutex(&m) { mutex->lock(); }
|
||||
~AutoLock() { mutex->unlock(); }
|
||||
protected:
|
||||
Mutex* mutex;
|
||||
private:
|
||||
AutoLock(const AutoLock&);
|
||||
AutoLock& operator = (const AutoLock&);
|
||||
};
|
||||
|
||||
class CV_EXPORTS TLSDataContainer
|
||||
{
|
||||
private:
|
||||
int key_;
|
||||
protected:
|
||||
TLSDataContainer();
|
||||
virtual ~TLSDataContainer();
|
||||
public:
|
||||
virtual void* createDataInstance() const = 0;
|
||||
virtual void deleteDataInstance(void* data) const = 0;
|
||||
|
||||
void* getData() const;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class TLSData : protected TLSDataContainer
|
||||
{
|
||||
public:
|
||||
inline TLSData() {}
|
||||
inline ~TLSData() {}
|
||||
inline T* get() const { return (T*)getData(); }
|
||||
private:
|
||||
virtual void* createDataInstance() const { return new T; }
|
||||
virtual void deleteDataInstance(void* data) const { delete (T*)data; }
|
||||
};
|
||||
|
||||
/** @brief Designed for command line parsing
|
||||
|
||||
The sample below demonstrates how to use CommandLineParser:
|
||||
@code
|
||||
CommandLineParser parser(argc, argv, keys);
|
||||
parser.about("Application name v1.0.0");
|
||||
|
||||
if (parser.has("help"))
|
||||
{
|
||||
parser.printMessage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int N = parser.get<int>("N");
|
||||
double fps = parser.get<double>("fps");
|
||||
String path = parser.get<String>("path");
|
||||
|
||||
use_time_stamp = parser.has("timestamp");
|
||||
|
||||
String img1 = parser.get<String>(0);
|
||||
String img2 = parser.get<String>(1);
|
||||
|
||||
int repeat = parser.get<int>(2);
|
||||
|
||||
if (!parser.check())
|
||||
{
|
||||
parser.printErrors();
|
||||
return 0;
|
||||
}
|
||||
@endcode
|
||||
|
||||
### Keys syntax
|
||||
|
||||
The keys parameter is a string containing several blocks, each one is enclosed in curley braces and
|
||||
describes one argument. Each argument contains three parts separated by the `|` symbol:
|
||||
|
||||
-# argument names is a space-separated list of option synonyms (to mark argument as positional, prefix it with the `@` symbol)
|
||||
-# default value will be used if the argument was not provided (can be empty)
|
||||
-# help message (can be empty)
|
||||
|
||||
For example:
|
||||
|
||||
@code{.cpp}
|
||||
const String keys =
|
||||
"{help h usage ? | | print this message }"
|
||||
"{@image1 | | image1 for compare }"
|
||||
"{@image2 | | image2 for compare }"
|
||||
"{@repeat |1 | number }"
|
||||
"{path |. | path to file }"
|
||||
"{fps | -1.0 | fps for output video }"
|
||||
"{N count |100 | count of objects }"
|
||||
"{ts timestamp | | use time stamp }"
|
||||
;
|
||||
}
|
||||
@endcode
|
||||
|
||||
### Usage
|
||||
|
||||
For the described keys:
|
||||
|
||||
@code{.sh}
|
||||
# Good call (3 positional parameters: image1, image2 and repeat; N is 200, ts is true)
|
||||
$ ./app -N=200 1.png 2.jpg 19 -ts
|
||||
|
||||
# Bad call
|
||||
$ ./app -fps=aaa
|
||||
ERRORS:
|
||||
Exception: can not convert: [aaa] to [double]
|
||||
@endcode
|
||||
*/
|
||||
class CV_EXPORTS CommandLineParser
|
||||
{
|
||||
public:
|
||||
|
||||
/** @brief Constructor
|
||||
|
||||
Initializes command line parser object
|
||||
|
||||
@param argc number of command line arguments (from main())
|
||||
@param argv array of command line arguments (from main())
|
||||
@param keys string describing acceptable command line parameters (see class description for syntax)
|
||||
*/
|
||||
CommandLineParser(int argc, const char* const argv[], const String& keys);
|
||||
|
||||
/** @brief Copy constructor */
|
||||
CommandLineParser(const CommandLineParser& parser);
|
||||
|
||||
/** @brief Assignment operator */
|
||||
CommandLineParser& operator = (const CommandLineParser& parser);
|
||||
|
||||
/** @brief Destructor */
|
||||
~CommandLineParser();
|
||||
|
||||
/** @brief Returns application path
|
||||
|
||||
This method returns the path to the executable from the command line (`argv[0]`).
|
||||
|
||||
For example, if the application has been started with such command:
|
||||
@code{.sh}
|
||||
$ ./bin/my-executable
|
||||
@endcode
|
||||
this method will return `./bin`.
|
||||
*/
|
||||
String getPathToApplication() const;
|
||||
|
||||
/** @brief Access arguments by name
|
||||
|
||||
Returns argument converted to selected type. If the argument is not known or can not be
|
||||
converted to selected type, the error flag is set (can be checked with @ref check).
|
||||
|
||||
For example, define:
|
||||
@code{.cpp}
|
||||
String keys = "{N count||}";
|
||||
@endcode
|
||||
|
||||
Call:
|
||||
@code{.sh}
|
||||
$ ./my-app -N=20
|
||||
# or
|
||||
$ ./my-app --count=20
|
||||
@endcode
|
||||
|
||||
Access:
|
||||
@code{.cpp}
|
||||
int N = parser.get<int>("N");
|
||||
@endcode
|
||||
|
||||
@param name name of the argument
|
||||
@param space_delete remove spaces from the left and right of the string
|
||||
@tparam T the argument will be converted to this type if possible
|
||||
|
||||
@note You can access positional arguments by their `@`-prefixed name:
|
||||
@code{.cpp}
|
||||
parser.get<String>("@image");
|
||||
@endcode
|
||||
*/
|
||||
template <typename T>
|
||||
T get(const String& name, bool space_delete = true) const
|
||||
{
|
||||
T val = T();
|
||||
getByName(name, space_delete, ParamType<T>::type, (void*)&val);
|
||||
return val;
|
||||
}
|
||||
|
||||
/** @brief Access positional arguments by index
|
||||
|
||||
Returns argument converted to selected type. Indexes are counted from zero.
|
||||
|
||||
For example, define:
|
||||
@code{.cpp}
|
||||
String keys = "{@arg1||}{@arg2||}"
|
||||
@endcode
|
||||
|
||||
Call:
|
||||
@code{.sh}
|
||||
./my-app abc qwe
|
||||
@endcode
|
||||
|
||||
Access arguments:
|
||||
@code{.cpp}
|
||||
String val_1 = parser.get<String>(0); // returns "abc", arg1
|
||||
String val_2 = parser.get<String>(1); // returns "qwe", arg2
|
||||
@endcode
|
||||
|
||||
@param index index of the argument
|
||||
@param space_delete remove spaces from the left and right of the string
|
||||
@tparam T the argument will be converted to this type if possible
|
||||
*/
|
||||
template <typename T>
|
||||
T get(int index, bool space_delete = true) const
|
||||
{
|
||||
T val = T();
|
||||
getByIndex(index, space_delete, ParamType<T>::type, (void*)&val);
|
||||
return val;
|
||||
}
|
||||
|
||||
/** @brief Check if field was provided in the command line
|
||||
|
||||
@param name argument name to check
|
||||
*/
|
||||
bool has(const String& name) const;
|
||||
|
||||
/** @brief Check for parsing errors
|
||||
|
||||
Returns true if error occured while accessing the parameters (bad conversion, missing arguments,
|
||||
etc.). Call @ref printErrors to print error messages list.
|
||||
*/
|
||||
bool check() const;
|
||||
|
||||
/** @brief Set the about message
|
||||
|
||||
The about message will be shown when @ref printMessage is called, right before arguments table.
|
||||
*/
|
||||
void about(const String& message);
|
||||
|
||||
/** @brief Print help message
|
||||
|
||||
This method will print standard help message containing the about message and arguments description.
|
||||
|
||||
@sa about
|
||||
*/
|
||||
void printMessage() const;
|
||||
|
||||
/** @brief Print list of errors occured
|
||||
|
||||
@sa check
|
||||
*/
|
||||
void printErrors() const;
|
||||
|
||||
protected:
|
||||
void getByName(const String& name, bool space_delete, int type, void* dst) const;
|
||||
void getByIndex(int index, bool space_delete, int type, void* dst) const;
|
||||
|
||||
struct Impl;
|
||||
Impl* impl;
|
||||
};
|
||||
|
||||
//! @} core_utils
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
/////////////////////////////// AutoBuffer implementation ////////////////////////////////////////
|
||||
|
||||
template<typename _Tp, size_t fixed_size> inline
|
||||
AutoBuffer<_Tp, fixed_size>::AutoBuffer()
|
||||
{
|
||||
ptr = buf;
|
||||
sz = fixed_size;
|
||||
}
|
||||
|
||||
template<typename _Tp, size_t fixed_size> inline
|
||||
AutoBuffer<_Tp, fixed_size>::AutoBuffer(size_t _size)
|
||||
{
|
||||
ptr = buf;
|
||||
sz = fixed_size;
|
||||
allocate(_size);
|
||||
}
|
||||
|
||||
template<typename _Tp, size_t fixed_size> inline
|
||||
AutoBuffer<_Tp, fixed_size>::AutoBuffer(const AutoBuffer<_Tp, fixed_size>& abuf )
|
||||
{
|
||||
ptr = buf;
|
||||
sz = fixed_size;
|
||||
allocate(abuf.size());
|
||||
for( size_t i = 0; i < sz; i++ )
|
||||
ptr[i] = abuf.ptr[i];
|
||||
}
|
||||
|
||||
template<typename _Tp, size_t fixed_size> inline AutoBuffer<_Tp, fixed_size>&
|
||||
AutoBuffer<_Tp, fixed_size>::operator = (const AutoBuffer<_Tp, fixed_size>& abuf)
|
||||
{
|
||||
if( this != &abuf )
|
||||
{
|
||||
deallocate();
|
||||
allocate(abuf.size());
|
||||
for( size_t i = 0; i < sz; i++ )
|
||||
ptr[i] = abuf.ptr[i];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename _Tp, size_t fixed_size> inline
|
||||
AutoBuffer<_Tp, fixed_size>::~AutoBuffer()
|
||||
{ deallocate(); }
|
||||
|
||||
template<typename _Tp, size_t fixed_size> inline void
|
||||
AutoBuffer<_Tp, fixed_size>::allocate(size_t _size)
|
||||
{
|
||||
if(_size <= sz)
|
||||
{
|
||||
sz = _size;
|
||||
return;
|
||||
}
|
||||
deallocate();
|
||||
if(_size > fixed_size)
|
||||
{
|
||||
ptr = new _Tp[_size];
|
||||
sz = _size;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _Tp, size_t fixed_size> inline void
|
||||
AutoBuffer<_Tp, fixed_size>::deallocate()
|
||||
{
|
||||
if( ptr != buf )
|
||||
{
|
||||
delete[] ptr;
|
||||
ptr = buf;
|
||||
sz = fixed_size;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _Tp, size_t fixed_size> inline void
|
||||
AutoBuffer<_Tp, fixed_size>::resize(size_t _size)
|
||||
{
|
||||
if(_size <= sz)
|
||||
{
|
||||
sz = _size;
|
||||
return;
|
||||
}
|
||||
size_t i, prevsize = sz, minsize = MIN(prevsize, _size);
|
||||
_Tp* prevptr = ptr;
|
||||
|
||||
ptr = _size > fixed_size ? new _Tp[_size] : buf;
|
||||
sz = _size;
|
||||
|
||||
if( ptr != prevptr )
|
||||
for( i = 0; i < minsize; i++ )
|
||||
ptr[i] = prevptr[i];
|
||||
for( i = prevsize; i < _size; i++ )
|
||||
ptr[i] = _Tp();
|
||||
|
||||
if( prevptr != buf )
|
||||
delete[] prevptr;
|
||||
}
|
||||
|
||||
template<typename _Tp, size_t fixed_size> inline size_t
|
||||
AutoBuffer<_Tp, fixed_size>::size() const
|
||||
{ return sz; }
|
||||
|
||||
template<typename _Tp, size_t fixed_size> inline
|
||||
AutoBuffer<_Tp, fixed_size>::operator _Tp* ()
|
||||
{ return ptr; }
|
||||
|
||||
template<typename _Tp, size_t fixed_size> inline
|
||||
AutoBuffer<_Tp, fixed_size>::operator const _Tp* () const
|
||||
{ return ptr; }
|
||||
|
||||
#ifndef OPENCV_NOSTL
|
||||
template<> inline std::string CommandLineParser::get<std::string>(int index, bool space_delete) const
|
||||
{
|
||||
return get<String>(index, space_delete);
|
||||
}
|
||||
template<> inline std::string CommandLineParser::get<std::string>(const String& name, bool space_delete) const
|
||||
{
|
||||
return get<String>(name, space_delete);
|
||||
}
|
||||
#endif // OPENCV_NOSTL
|
||||
|
||||
//! @endcond
|
||||
|
||||
} //namespace cv
|
||||
|
||||
#ifndef DISABLE_OPENCV_24_COMPATIBILITY
|
||||
#include "opencv2/core/core_c.h"
|
||||
#endif
|
||||
|
||||
#endif //__OPENCV_CORE_UTILITY_H__
|
71
3rdparty/include/opencv2/core/version.hpp
vendored
Normal file
71
3rdparty/include/opencv2/core/version.hpp
vendored
Normal file
@ -0,0 +1,71 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// Intel License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright( C) 2000-2015, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2011-2013, NVIDIA Corporation, all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of Intel Corporation may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
//(including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort(including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/*
|
||||
definition of the current version of OpenCV
|
||||
Usefull to test in user programs
|
||||
*/
|
||||
|
||||
#ifndef __OPENCV_VERSION_HPP__
|
||||
#define __OPENCV_VERSION_HPP__
|
||||
|
||||
#define CV_VERSION_MAJOR 3
|
||||
#define CV_VERSION_MINOR 0
|
||||
#define CV_VERSION_REVISION 0
|
||||
#define CV_VERSION_STATUS ""
|
||||
|
||||
#define CVAUX_STR_EXP(__A) #__A
|
||||
#define CVAUX_STR(__A) CVAUX_STR_EXP(__A)
|
||||
|
||||
#define CVAUX_STRW_EXP(__A) L#__A
|
||||
#define CVAUX_STRW(__A) CVAUX_STRW_EXP(__A)
|
||||
|
||||
#define CV_VERSION CVAUX_STR(CV_VERSION_MAJOR) "." CVAUX_STR(CV_VERSION_MINOR) "." CVAUX_STR(CV_VERSION_REVISION) CV_VERSION_STATUS
|
||||
|
||||
/* old style version constants*/
|
||||
#define CV_MAJOR_VERSION CV_VERSION_MAJOR
|
||||
#define CV_MINOR_VERSION CV_VERSION_MINOR
|
||||
#define CV_SUBMINOR_VERSION CV_VERSION_REVISION
|
||||
|
||||
#endif
|
603
3rdparty/include/opencv2/core/wimage.hpp
vendored
Normal file
603
3rdparty/include/opencv2/core/wimage.hpp
vendored
Normal file
@ -0,0 +1,603 @@
|
||||
/*M//////////////////////////////////////////////////////////////////////////////
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to
|
||||
// this license. If you do not agree to this license, do not download,
|
||||
// install, copy or use the software.
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2008, Google, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of Intel Corporation or contributors may not be used to endorse
|
||||
// or promote products derived from this software without specific
|
||||
// prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is"
|
||||
// and any express or implied warranties, including, but not limited to, the
|
||||
// implied warranties of merchantability and fitness for a particular purpose
|
||||
// are disclaimed. In no event shall the Intel Corporation or contributors be
|
||||
// liable for any direct, indirect, incidental, special, exemplary, or
|
||||
// consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_WIMAGE_HPP__
|
||||
#define __OPENCV_CORE_WIMAGE_HPP__
|
||||
|
||||
#include "opencv2/core/core_c.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
namespace cv {
|
||||
|
||||
//! @addtogroup core
|
||||
//! @{
|
||||
|
||||
template <typename T> class WImage;
|
||||
template <typename T> class WImageBuffer;
|
||||
template <typename T> class WImageView;
|
||||
|
||||
template<typename T, int C> class WImageC;
|
||||
template<typename T, int C> class WImageBufferC;
|
||||
template<typename T, int C> class WImageViewC;
|
||||
|
||||
// Commonly used typedefs.
|
||||
typedef WImage<uchar> WImage_b;
|
||||
typedef WImageView<uchar> WImageView_b;
|
||||
typedef WImageBuffer<uchar> WImageBuffer_b;
|
||||
|
||||
typedef WImageC<uchar, 1> WImage1_b;
|
||||
typedef WImageViewC<uchar, 1> WImageView1_b;
|
||||
typedef WImageBufferC<uchar, 1> WImageBuffer1_b;
|
||||
|
||||
typedef WImageC<uchar, 3> WImage3_b;
|
||||
typedef WImageViewC<uchar, 3> WImageView3_b;
|
||||
typedef WImageBufferC<uchar, 3> WImageBuffer3_b;
|
||||
|
||||
typedef WImage<float> WImage_f;
|
||||
typedef WImageView<float> WImageView_f;
|
||||
typedef WImageBuffer<float> WImageBuffer_f;
|
||||
|
||||
typedef WImageC<float, 1> WImage1_f;
|
||||
typedef WImageViewC<float, 1> WImageView1_f;
|
||||
typedef WImageBufferC<float, 1> WImageBuffer1_f;
|
||||
|
||||
typedef WImageC<float, 3> WImage3_f;
|
||||
typedef WImageViewC<float, 3> WImageView3_f;
|
||||
typedef WImageBufferC<float, 3> WImageBuffer3_f;
|
||||
|
||||
// There isn't a standard for signed and unsigned short so be more
|
||||
// explicit in the typename for these cases.
|
||||
typedef WImage<short> WImage_16s;
|
||||
typedef WImageView<short> WImageView_16s;
|
||||
typedef WImageBuffer<short> WImageBuffer_16s;
|
||||
|
||||
typedef WImageC<short, 1> WImage1_16s;
|
||||
typedef WImageViewC<short, 1> WImageView1_16s;
|
||||
typedef WImageBufferC<short, 1> WImageBuffer1_16s;
|
||||
|
||||
typedef WImageC<short, 3> WImage3_16s;
|
||||
typedef WImageViewC<short, 3> WImageView3_16s;
|
||||
typedef WImageBufferC<short, 3> WImageBuffer3_16s;
|
||||
|
||||
typedef WImage<ushort> WImage_16u;
|
||||
typedef WImageView<ushort> WImageView_16u;
|
||||
typedef WImageBuffer<ushort> WImageBuffer_16u;
|
||||
|
||||
typedef WImageC<ushort, 1> WImage1_16u;
|
||||
typedef WImageViewC<ushort, 1> WImageView1_16u;
|
||||
typedef WImageBufferC<ushort, 1> WImageBuffer1_16u;
|
||||
|
||||
typedef WImageC<ushort, 3> WImage3_16u;
|
||||
typedef WImageViewC<ushort, 3> WImageView3_16u;
|
||||
typedef WImageBufferC<ushort, 3> WImageBuffer3_16u;
|
||||
|
||||
/** @brief Image class which provides a thin layer around an IplImage.
|
||||
|
||||
The goals of the class design are:
|
||||
|
||||
-# All the data has explicit ownership to avoid memory leaks
|
||||
-# No hidden allocations or copies for performance.
|
||||
-# Easy access to OpenCV methods (which will access IPP if available)
|
||||
-# Can easily treat external data as an image
|
||||
-# Easy to create images which are subsets of other images
|
||||
-# Fast pixel access which can take advantage of number of channels if known at compile time.
|
||||
|
||||
The WImage class is the image class which provides the data accessors. The 'W' comes from the fact
|
||||
that it is also a wrapper around the popular but inconvenient IplImage class. A WImage can be
|
||||
constructed either using a WImageBuffer class which allocates and frees the data, or using a
|
||||
WImageView class which constructs a subimage or a view into external data. The view class does no
|
||||
memory management. Each class actually has two versions, one when the number of channels is known
|
||||
at compile time and one when it isn't. Using the one with the number of channels specified can
|
||||
provide some compile time optimizations by using the fact that the number of channels is a
|
||||
constant.
|
||||
|
||||
We use the convention (c,r) to refer to column c and row r with (0,0) being the upper left corner.
|
||||
This is similar to standard Euclidean coordinates with the first coordinate varying in the
|
||||
horizontal direction and the second coordinate varying in the vertical direction. Thus (c,r) is
|
||||
usually in the domain [0, width) X [0, height)
|
||||
|
||||
Example usage:
|
||||
@code
|
||||
WImageBuffer3_b im(5,7); // Make a 5X7 3 channel image of type uchar
|
||||
WImageView3_b sub_im(im, 2,2, 3,3); // 3X3 submatrix
|
||||
vector<float> vec(10, 3.0f);
|
||||
WImageView1_f user_im(&vec[0], 2, 5); // 2X5 image w/ supplied data
|
||||
|
||||
im.SetZero(); // same as cvSetZero(im.Ipl())
|
||||
*im(2, 3) = 15; // Modify the element at column 2, row 3
|
||||
MySetRand(&sub_im);
|
||||
|
||||
// Copy the second row into the first. This can be done with no memory
|
||||
// allocation and will use SSE if IPP is available.
|
||||
int w = im.Width();
|
||||
im.View(0,0, w,1).CopyFrom(im.View(0,1, w,1));
|
||||
|
||||
// Doesn't care about source of data since using WImage
|
||||
void MySetRand(WImage_b* im) { // Works with any number of channels
|
||||
for (int r = 0; r < im->Height(); ++r) {
|
||||
float* row = im->Row(r);
|
||||
for (int c = 0; c < im->Width(); ++c) {
|
||||
for (int ch = 0; ch < im->Channels(); ++ch, ++row) {
|
||||
*row = uchar(rand() & 255);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@endcode
|
||||
|
||||
Functions that are not part of the basic image allocation, viewing, and access should come from
|
||||
OpenCV, except some useful functions that are not part of OpenCV can be found in wimage_util.h
|
||||
*/
|
||||
template<typename T>
|
||||
class WImage
|
||||
{
|
||||
public:
|
||||
typedef T BaseType;
|
||||
|
||||
// WImage is an abstract class with no other virtual methods so make the
|
||||
// destructor virtual.
|
||||
virtual ~WImage() = 0;
|
||||
|
||||
// Accessors
|
||||
IplImage* Ipl() {return image_; }
|
||||
const IplImage* Ipl() const {return image_; }
|
||||
T* ImageData() { return reinterpret_cast<T*>(image_->imageData); }
|
||||
const T* ImageData() const {
|
||||
return reinterpret_cast<const T*>(image_->imageData);
|
||||
}
|
||||
|
||||
int Width() const {return image_->width; }
|
||||
int Height() const {return image_->height; }
|
||||
|
||||
// WidthStep is the number of bytes to go to the pixel with the next y coord
|
||||
int WidthStep() const {return image_->widthStep; }
|
||||
|
||||
int Channels() const {return image_->nChannels; }
|
||||
int ChannelSize() const {return sizeof(T); } // number of bytes per channel
|
||||
|
||||
// Number of bytes per pixel
|
||||
int PixelSize() const {return Channels() * ChannelSize(); }
|
||||
|
||||
// Return depth type (e.g. IPL_DEPTH_8U, IPL_DEPTH_32F) which is the number
|
||||
// of bits per channel and with the signed bit set.
|
||||
// This is known at compile time using specializations.
|
||||
int Depth() const;
|
||||
|
||||
inline const T* Row(int r) const {
|
||||
return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep);
|
||||
}
|
||||
|
||||
inline T* Row(int r) {
|
||||
return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep);
|
||||
}
|
||||
|
||||
// Pixel accessors which returns a pointer to the start of the channel
|
||||
inline T* operator() (int c, int r) {
|
||||
return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep) +
|
||||
c*Channels();
|
||||
}
|
||||
|
||||
inline const T* operator() (int c, int r) const {
|
||||
return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep) +
|
||||
c*Channels();
|
||||
}
|
||||
|
||||
// Copy the contents from another image which is just a convenience to cvCopy
|
||||
void CopyFrom(const WImage<T>& src) { cvCopy(src.Ipl(), image_); }
|
||||
|
||||
// Set contents to zero which is just a convenient to cvSetZero
|
||||
void SetZero() { cvSetZero(image_); }
|
||||
|
||||
// Construct a view into a region of this image
|
||||
WImageView<T> View(int c, int r, int width, int height);
|
||||
|
||||
protected:
|
||||
// Disallow copy and assignment
|
||||
WImage(const WImage&);
|
||||
void operator=(const WImage&);
|
||||
|
||||
explicit WImage(IplImage* img) : image_(img) {
|
||||
assert(!img || img->depth == Depth());
|
||||
}
|
||||
|
||||
void SetIpl(IplImage* image) {
|
||||
assert(!image || image->depth == Depth());
|
||||
image_ = image;
|
||||
}
|
||||
|
||||
IplImage* image_;
|
||||
};
|
||||
|
||||
|
||||
/** Image class when both the pixel type and number of channels
|
||||
are known at compile time. This wrapper will speed up some of the operations
|
||||
like accessing individual pixels using the () operator.
|
||||
*/
|
||||
template<typename T, int C>
|
||||
class WImageC : public WImage<T>
|
||||
{
|
||||
public:
|
||||
typedef typename WImage<T>::BaseType BaseType;
|
||||
enum { kChannels = C };
|
||||
|
||||
explicit WImageC(IplImage* img) : WImage<T>(img) {
|
||||
assert(!img || img->nChannels == Channels());
|
||||
}
|
||||
|
||||
// Construct a view into a region of this image
|
||||
WImageViewC<T, C> View(int c, int r, int width, int height);
|
||||
|
||||
// Copy the contents from another image which is just a convenience to cvCopy
|
||||
void CopyFrom(const WImageC<T, C>& src) {
|
||||
cvCopy(src.Ipl(), WImage<T>::image_);
|
||||
}
|
||||
|
||||
// WImageC is an abstract class with no other virtual methods so make the
|
||||
// destructor virtual.
|
||||
virtual ~WImageC() = 0;
|
||||
|
||||
int Channels() const {return C; }
|
||||
|
||||
protected:
|
||||
// Disallow copy and assignment
|
||||
WImageC(const WImageC&);
|
||||
void operator=(const WImageC&);
|
||||
|
||||
void SetIpl(IplImage* image) {
|
||||
assert(!image || image->depth == WImage<T>::Depth());
|
||||
WImage<T>::SetIpl(image);
|
||||
}
|
||||
};
|
||||
|
||||
/** Image class which owns the data, so it can be allocated and is always
|
||||
freed. It cannot be copied but can be explicity cloned.
|
||||
*/
|
||||
template<typename T>
|
||||
class WImageBuffer : public WImage<T>
|
||||
{
|
||||
public:
|
||||
typedef typename WImage<T>::BaseType BaseType;
|
||||
|
||||
// Default constructor which creates an object that can be
|
||||
WImageBuffer() : WImage<T>(0) {}
|
||||
|
||||
WImageBuffer(int width, int height, int nchannels) : WImage<T>(0) {
|
||||
Allocate(width, height, nchannels);
|
||||
}
|
||||
|
||||
// Constructor which takes ownership of a given IplImage so releases
|
||||
// the image on destruction.
|
||||
explicit WImageBuffer(IplImage* img) : WImage<T>(img) {}
|
||||
|
||||
// Allocate an image. Does nothing if current size is the same as
|
||||
// the new size.
|
||||
void Allocate(int width, int height, int nchannels);
|
||||
|
||||
// Set the data to point to an image, releasing the old data
|
||||
void SetIpl(IplImage* img) {
|
||||
ReleaseImage();
|
||||
WImage<T>::SetIpl(img);
|
||||
}
|
||||
|
||||
// Clone an image which reallocates the image if of a different dimension.
|
||||
void CloneFrom(const WImage<T>& src) {
|
||||
Allocate(src.Width(), src.Height(), src.Channels());
|
||||
CopyFrom(src);
|
||||
}
|
||||
|
||||
~WImageBuffer() {
|
||||
ReleaseImage();
|
||||
}
|
||||
|
||||
// Release the image if it isn't null.
|
||||
void ReleaseImage() {
|
||||
if (WImage<T>::image_) {
|
||||
IplImage* image = WImage<T>::image_;
|
||||
cvReleaseImage(&image);
|
||||
WImage<T>::SetIpl(0);
|
||||
}
|
||||
}
|
||||
|
||||
bool IsNull() const {return WImage<T>::image_ == NULL; }
|
||||
|
||||
private:
|
||||
// Disallow copy and assignment
|
||||
WImageBuffer(const WImageBuffer&);
|
||||
void operator=(const WImageBuffer&);
|
||||
};
|
||||
|
||||
/** Like a WImageBuffer class but when the number of channels is known at compile time.
|
||||
*/
|
||||
template<typename T, int C>
|
||||
class WImageBufferC : public WImageC<T, C>
|
||||
{
|
||||
public:
|
||||
typedef typename WImage<T>::BaseType BaseType;
|
||||
enum { kChannels = C };
|
||||
|
||||
// Default constructor which creates an object that can be
|
||||
WImageBufferC() : WImageC<T, C>(0) {}
|
||||
|
||||
WImageBufferC(int width, int height) : WImageC<T, C>(0) {
|
||||
Allocate(width, height);
|
||||
}
|
||||
|
||||
// Constructor which takes ownership of a given IplImage so releases
|
||||
// the image on destruction.
|
||||
explicit WImageBufferC(IplImage* img) : WImageC<T, C>(img) {}
|
||||
|
||||
// Allocate an image. Does nothing if current size is the same as
|
||||
// the new size.
|
||||
void Allocate(int width, int height);
|
||||
|
||||
// Set the data to point to an image, releasing the old data
|
||||
void SetIpl(IplImage* img) {
|
||||
ReleaseImage();
|
||||
WImageC<T, C>::SetIpl(img);
|
||||
}
|
||||
|
||||
// Clone an image which reallocates the image if of a different dimension.
|
||||
void CloneFrom(const WImageC<T, C>& src) {
|
||||
Allocate(src.Width(), src.Height());
|
||||
CopyFrom(src);
|
||||
}
|
||||
|
||||
~WImageBufferC() {
|
||||
ReleaseImage();
|
||||
}
|
||||
|
||||
// Release the image if it isn't null.
|
||||
void ReleaseImage() {
|
||||
if (WImage<T>::image_) {
|
||||
IplImage* image = WImage<T>::image_;
|
||||
cvReleaseImage(&image);
|
||||
WImageC<T, C>::SetIpl(0);
|
||||
}
|
||||
}
|
||||
|
||||
bool IsNull() const {return WImage<T>::image_ == NULL; }
|
||||
|
||||
private:
|
||||
// Disallow copy and assignment
|
||||
WImageBufferC(const WImageBufferC&);
|
||||
void operator=(const WImageBufferC&);
|
||||
};
|
||||
|
||||
/** View into an image class which allows treating a subimage as an image or treating external data
|
||||
as an image
|
||||
*/
|
||||
template<typename T> class WImageView : public WImage<T>
|
||||
{
|
||||
public:
|
||||
typedef typename WImage<T>::BaseType BaseType;
|
||||
|
||||
// Construct a subimage. No checks are done that the subimage lies
|
||||
// completely inside the original image.
|
||||
WImageView(WImage<T>* img, int c, int r, int width, int height);
|
||||
|
||||
// Refer to external data.
|
||||
// If not given width_step assumed to be same as width.
|
||||
WImageView(T* data, int width, int height, int channels, int width_step = -1);
|
||||
|
||||
// Refer to external data. This does NOT take ownership
|
||||
// of the supplied IplImage.
|
||||
WImageView(IplImage* img) : WImage<T>(img) {}
|
||||
|
||||
// Copy constructor
|
||||
WImageView(const WImage<T>& img) : WImage<T>(0) {
|
||||
header_ = *(img.Ipl());
|
||||
WImage<T>::SetIpl(&header_);
|
||||
}
|
||||
|
||||
WImageView& operator=(const WImage<T>& img) {
|
||||
header_ = *(img.Ipl());
|
||||
WImage<T>::SetIpl(&header_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
protected:
|
||||
IplImage header_;
|
||||
};
|
||||
|
||||
|
||||
template<typename T, int C>
|
||||
class WImageViewC : public WImageC<T, C>
|
||||
{
|
||||
public:
|
||||
typedef typename WImage<T>::BaseType BaseType;
|
||||
enum { kChannels = C };
|
||||
|
||||
// Default constructor needed for vectors of views.
|
||||
WImageViewC();
|
||||
|
||||
virtual ~WImageViewC() {}
|
||||
|
||||
// Construct a subimage. No checks are done that the subimage lies
|
||||
// completely inside the original image.
|
||||
WImageViewC(WImageC<T, C>* img,
|
||||
int c, int r, int width, int height);
|
||||
|
||||
// Refer to external data
|
||||
WImageViewC(T* data, int width, int height, int width_step = -1);
|
||||
|
||||
// Refer to external data. This does NOT take ownership
|
||||
// of the supplied IplImage.
|
||||
WImageViewC(IplImage* img) : WImageC<T, C>(img) {}
|
||||
|
||||
// Copy constructor which does a shallow copy to allow multiple views
|
||||
// of same data. gcc-4.1.1 gets confused if both versions of
|
||||
// the constructor and assignment operator are not provided.
|
||||
WImageViewC(const WImageC<T, C>& img) : WImageC<T, C>(0) {
|
||||
header_ = *(img.Ipl());
|
||||
WImageC<T, C>::SetIpl(&header_);
|
||||
}
|
||||
WImageViewC(const WImageViewC<T, C>& img) : WImageC<T, C>(0) {
|
||||
header_ = *(img.Ipl());
|
||||
WImageC<T, C>::SetIpl(&header_);
|
||||
}
|
||||
|
||||
WImageViewC& operator=(const WImageC<T, C>& img) {
|
||||
header_ = *(img.Ipl());
|
||||
WImageC<T, C>::SetIpl(&header_);
|
||||
return *this;
|
||||
}
|
||||
WImageViewC& operator=(const WImageViewC<T, C>& img) {
|
||||
header_ = *(img.Ipl());
|
||||
WImageC<T, C>::SetIpl(&header_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
protected:
|
||||
IplImage header_;
|
||||
};
|
||||
|
||||
|
||||
// Specializations for depth
|
||||
template<>
|
||||
inline int WImage<uchar>::Depth() const {return IPL_DEPTH_8U; }
|
||||
template<>
|
||||
inline int WImage<signed char>::Depth() const {return IPL_DEPTH_8S; }
|
||||
template<>
|
||||
inline int WImage<short>::Depth() const {return IPL_DEPTH_16S; }
|
||||
template<>
|
||||
inline int WImage<ushort>::Depth() const {return IPL_DEPTH_16U; }
|
||||
template<>
|
||||
inline int WImage<int>::Depth() const {return IPL_DEPTH_32S; }
|
||||
template<>
|
||||
inline int WImage<float>::Depth() const {return IPL_DEPTH_32F; }
|
||||
template<>
|
||||
inline int WImage<double>::Depth() const {return IPL_DEPTH_64F; }
|
||||
|
||||
template<typename T> inline WImage<T>::~WImage() {}
|
||||
template<typename T, int C> inline WImageC<T, C>::~WImageC() {}
|
||||
|
||||
template<typename T>
|
||||
inline void WImageBuffer<T>::Allocate(int width, int height, int nchannels)
|
||||
{
|
||||
if (IsNull() || WImage<T>::Width() != width ||
|
||||
WImage<T>::Height() != height || WImage<T>::Channels() != nchannels) {
|
||||
ReleaseImage();
|
||||
WImage<T>::image_ = cvCreateImage(cvSize(width, height),
|
||||
WImage<T>::Depth(), nchannels);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, int C>
|
||||
inline void WImageBufferC<T, C>::Allocate(int width, int height)
|
||||
{
|
||||
if (IsNull() || WImage<T>::Width() != width || WImage<T>::Height() != height) {
|
||||
ReleaseImage();
|
||||
WImageC<T, C>::SetIpl(cvCreateImage(cvSize(width, height),WImage<T>::Depth(), C));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
WImageView<T>::WImageView(WImage<T>* img, int c, int r, int width, int height)
|
||||
: WImage<T>(0)
|
||||
{
|
||||
header_ = *(img->Ipl());
|
||||
header_.imageData = reinterpret_cast<char*>((*img)(c, r));
|
||||
header_.width = width;
|
||||
header_.height = height;
|
||||
WImage<T>::SetIpl(&header_);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
WImageView<T>::WImageView(T* data, int width, int height, int nchannels, int width_step)
|
||||
: WImage<T>(0)
|
||||
{
|
||||
cvInitImageHeader(&header_, cvSize(width, height), WImage<T>::Depth(), nchannels);
|
||||
header_.imageData = reinterpret_cast<char*>(data);
|
||||
if (width_step > 0) {
|
||||
header_.widthStep = width_step;
|
||||
}
|
||||
WImage<T>::SetIpl(&header_);
|
||||
}
|
||||
|
||||
template<typename T, int C>
|
||||
WImageViewC<T, C>::WImageViewC(WImageC<T, C>* img, int c, int r, int width, int height)
|
||||
: WImageC<T, C>(0)
|
||||
{
|
||||
header_ = *(img->Ipl());
|
||||
header_.imageData = reinterpret_cast<char*>((*img)(c, r));
|
||||
header_.width = width;
|
||||
header_.height = height;
|
||||
WImageC<T, C>::SetIpl(&header_);
|
||||
}
|
||||
|
||||
template<typename T, int C>
|
||||
WImageViewC<T, C>::WImageViewC() : WImageC<T, C>(0) {
|
||||
cvInitImageHeader(&header_, cvSize(0, 0), WImage<T>::Depth(), C);
|
||||
header_.imageData = reinterpret_cast<char*>(0);
|
||||
WImageC<T, C>::SetIpl(&header_);
|
||||
}
|
||||
|
||||
template<typename T, int C>
|
||||
WImageViewC<T, C>::WImageViewC(T* data, int width, int height, int width_step)
|
||||
: WImageC<T, C>(0)
|
||||
{
|
||||
cvInitImageHeader(&header_, cvSize(width, height), WImage<T>::Depth(), C);
|
||||
header_.imageData = reinterpret_cast<char*>(data);
|
||||
if (width_step > 0) {
|
||||
header_.widthStep = width_step;
|
||||
}
|
||||
WImageC<T, C>::SetIpl(&header_);
|
||||
}
|
||||
|
||||
// Construct a view into a region of an image
|
||||
template<typename T>
|
||||
WImageView<T> WImage<T>::View(int c, int r, int width, int height) {
|
||||
return WImageView<T>(this, c, r, width, height);
|
||||
}
|
||||
|
||||
template<typename T, int C>
|
||||
WImageViewC<T, C> WImageC<T, C>::View(int c, int r, int width, int height) {
|
||||
return WImageViewC<T, C>(this, c, r, width, height);
|
||||
}
|
||||
|
||||
//! @} core
|
||||
|
||||
} // end of namespace
|
||||
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user