Initial commit.

Final release of the project Anonymizer (2015).
Project settings for the Qt Creator (ver. 3.6).
This commit is contained in:
2016-01-25 18:17:34 +01:00
commit 22dbc25cce
479 changed files with 141991 additions and 0 deletions

522
3rdparty/include/opencv2/core/affine.hpp vendored Normal file
View File

@ -0,0 +1,522 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_AFFINE3_HPP__
#define __OPENCV_CORE_AFFINE3_HPP__
#ifdef __cplusplus
#include <opencv2/core.hpp>
namespace cv
{
//! @addtogroup core
//! @{
/** @brief Affine transform
@todo document
*/
template<typename T>
class Affine3
{
public:
typedef T float_type;
typedef Matx<float_type, 3, 3> Mat3;
typedef Matx<float_type, 4, 4> Mat4;
typedef Vec<float_type, 3> Vec3;
Affine3();
//! Augmented affine matrix
Affine3(const Mat4& affine);
//! Rotation matrix
Affine3(const Mat3& R, const Vec3& t = Vec3::all(0));
//! Rodrigues vector
Affine3(const Vec3& rvec, const Vec3& t = Vec3::all(0));
//! Combines all contructors above. Supports 4x4, 4x3, 3x3, 1x3, 3x1 sizes of data matrix
explicit Affine3(const Mat& data, const Vec3& t = Vec3::all(0));
//! From 16th element array
explicit Affine3(const float_type* vals);
//! Create identity transform
static Affine3 Identity();
//! Rotation matrix
void rotation(const Mat3& R);
//! Rodrigues vector
void rotation(const Vec3& rvec);
//! Combines rotation methods above. Suports 3x3, 1x3, 3x1 sizes of data matrix;
void rotation(const Mat& data);
void linear(const Mat3& L);
void translation(const Vec3& t);
Mat3 rotation() const;
Mat3 linear() const;
Vec3 translation() const;
//! Rodrigues vector
Vec3 rvec() const;
Affine3 inv(int method = cv::DECOMP_SVD) const;
//! a.rotate(R) is equivalent to Affine(R, 0) * a;
Affine3 rotate(const Mat3& R) const;
//! a.rotate(R) is equivalent to Affine(rvec, 0) * a;
Affine3 rotate(const Vec3& rvec) const;
//! a.translate(t) is equivalent to Affine(E, t) * a;
Affine3 translate(const Vec3& t) const;
//! a.concatenate(affine) is equivalent to affine * a;
Affine3 concatenate(const Affine3& affine) const;
template <typename Y> operator Affine3<Y>() const;
template <typename Y> Affine3<Y> cast() const;
Mat4 matrix;
#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine);
Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine);
operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const;
operator Eigen::Transform<T, 3, Eigen::Affine>() const;
#endif
};
template<typename T> static
Affine3<T> operator*(const Affine3<T>& affine1, const Affine3<T>& affine2);
template<typename T, typename V> static
V operator*(const Affine3<T>& affine, const V& vector);
typedef Affine3<float> Affine3f;
typedef Affine3<double> Affine3d;
static Vec3f operator*(const Affine3f& affine, const Vec3f& vector);
static Vec3d operator*(const Affine3d& affine, const Vec3d& vector);
template<typename _Tp> class DataType< Affine3<_Tp> >
{
public:
typedef Affine3<_Tp> value_type;
typedef Affine3<typename DataType<_Tp>::work_type> work_type;
typedef _Tp channel_type;
enum { generic_type = 0,
depth = DataType<channel_type>::depth,
channels = 16,
fmt = DataType<channel_type>::fmt + ((channels - 1) << 8),
type = CV_MAKETYPE(depth, channels)
};
typedef Vec<channel_type, channels> vec_type;
};
//! @} core
}
//! @cond IGNORED
///////////////////////////////////////////////////////////////////////////////////
// Implementaiton
template<typename T> inline
cv::Affine3<T>::Affine3()
: matrix(Mat4::eye())
{}
template<typename T> inline
cv::Affine3<T>::Affine3(const Mat4& affine)
: matrix(affine)
{}
template<typename T> inline
cv::Affine3<T>::Affine3(const Mat3& R, const Vec3& t)
{
rotation(R);
translation(t);
matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
matrix.val[15] = 1;
}
template<typename T> inline
cv::Affine3<T>::Affine3(const Vec3& _rvec, const Vec3& t)
{
rotation(_rvec);
translation(t);
matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
matrix.val[15] = 1;
}
template<typename T> inline
cv::Affine3<T>::Affine3(const cv::Mat& data, const Vec3& t)
{
CV_Assert(data.type() == cv::DataType<T>::type);
if (data.cols == 4 && data.rows == 4)
{
data.copyTo(matrix);
return;
}
else if (data.cols == 4 && data.rows == 3)
{
rotation(data(Rect(0, 0, 3, 3)));
translation(data(Rect(3, 0, 1, 3)));
return;
}
rotation(data);
translation(t);
matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
matrix.val[15] = 1;
}
template<typename T> inline
cv::Affine3<T>::Affine3(const float_type* vals) : matrix(vals)
{}
template<typename T> inline
cv::Affine3<T> cv::Affine3<T>::Identity()
{
return Affine3<T>(cv::Affine3<T>::Mat4::eye());
}
template<typename T> inline
void cv::Affine3<T>::rotation(const Mat3& R)
{
linear(R);
}
template<typename T> inline
void cv::Affine3<T>::rotation(const Vec3& _rvec)
{
double rx = _rvec[0], ry = _rvec[1], rz = _rvec[2];
double theta = std::sqrt(rx*rx + ry*ry + rz*rz);
if (theta < DBL_EPSILON)
rotation(Mat3::eye());
else
{
const double I[] = { 1, 0, 0, 0, 1, 0, 0, 0, 1 };
double c = std::cos(theta);
double s = std::sin(theta);
double c1 = 1. - c;
double itheta = (theta != 0) ? 1./theta : 0.;
rx *= itheta; ry *= itheta; rz *= itheta;
double rrt[] = { rx*rx, rx*ry, rx*rz, rx*ry, ry*ry, ry*rz, rx*rz, ry*rz, rz*rz };
double _r_x_[] = { 0, -rz, ry, rz, 0, -rx, -ry, rx, 0 };
Mat3 R;
// R = cos(theta)*I + (1 - cos(theta))*r*rT + sin(theta)*[r_x]
// where [r_x] is [0 -rz ry; rz 0 -rx; -ry rx 0]
for(int k = 0; k < 9; ++k)
R.val[k] = static_cast<float_type>(c*I[k] + c1*rrt[k] + s*_r_x_[k]);
rotation(R);
}
}
//Combines rotation methods above. Suports 3x3, 1x3, 3x1 sizes of data matrix;
template<typename T> inline
void cv::Affine3<T>::rotation(const cv::Mat& data)
{
CV_Assert(data.type() == cv::DataType<T>::type);
if (data.cols == 3 && data.rows == 3)
{
Mat3 R;
data.copyTo(R);
rotation(R);
}
else if ((data.cols == 3 && data.rows == 1) || (data.cols == 1 && data.rows == 3))
{
Vec3 _rvec;
data.reshape(1, 3).copyTo(_rvec);
rotation(_rvec);
}
else
CV_Assert(!"Input marix can be 3x3, 1x3 or 3x1");
}
template<typename T> inline
void cv::Affine3<T>::linear(const Mat3& L)
{
matrix.val[0] = L.val[0]; matrix.val[1] = L.val[1]; matrix.val[ 2] = L.val[2];
matrix.val[4] = L.val[3]; matrix.val[5] = L.val[4]; matrix.val[ 6] = L.val[5];
matrix.val[8] = L.val[6]; matrix.val[9] = L.val[7]; matrix.val[10] = L.val[8];
}
template<typename T> inline
void cv::Affine3<T>::translation(const Vec3& t)
{
matrix.val[3] = t[0]; matrix.val[7] = t[1]; matrix.val[11] = t[2];
}
template<typename T> inline
typename cv::Affine3<T>::Mat3 cv::Affine3<T>::rotation() const
{
return linear();
}
template<typename T> inline
typename cv::Affine3<T>::Mat3 cv::Affine3<T>::linear() const
{
typename cv::Affine3<T>::Mat3 R;
R.val[0] = matrix.val[0]; R.val[1] = matrix.val[1]; R.val[2] = matrix.val[ 2];
R.val[3] = matrix.val[4]; R.val[4] = matrix.val[5]; R.val[5] = matrix.val[ 6];
R.val[6] = matrix.val[8]; R.val[7] = matrix.val[9]; R.val[8] = matrix.val[10];
return R;
}
template<typename T> inline
typename cv::Affine3<T>::Vec3 cv::Affine3<T>::translation() const
{
return Vec3(matrix.val[3], matrix.val[7], matrix.val[11]);
}
template<typename T> inline
typename cv::Affine3<T>::Vec3 cv::Affine3<T>::rvec() const
{
cv::Vec3d w;
cv::Matx33d u, vt, R = rotation();
cv::SVD::compute(R, w, u, vt, cv::SVD::FULL_UV + cv::SVD::MODIFY_A);
R = u * vt;
double rx = R.val[7] - R.val[5];
double ry = R.val[2] - R.val[6];
double rz = R.val[3] - R.val[1];
double s = std::sqrt((rx*rx + ry*ry + rz*rz)*0.25);
double c = (R.val[0] + R.val[4] + R.val[8] - 1) * 0.5;
c = c > 1.0 ? 1.0 : c < -1.0 ? -1.0 : c;
double theta = acos(c);
if( s < 1e-5 )
{
if( c > 0 )
rx = ry = rz = 0;
else
{
double t;
t = (R.val[0] + 1) * 0.5;
rx = std::sqrt(std::max(t, 0.0));
t = (R.val[4] + 1) * 0.5;
ry = std::sqrt(std::max(t, 0.0)) * (R.val[1] < 0 ? -1.0 : 1.0);
t = (R.val[8] + 1) * 0.5;
rz = std::sqrt(std::max(t, 0.0)) * (R.val[2] < 0 ? -1.0 : 1.0);
if( fabs(rx) < fabs(ry) && fabs(rx) < fabs(rz) && (R.val[5] > 0) != (ry*rz > 0) )
rz = -rz;
theta /= std::sqrt(rx*rx + ry*ry + rz*rz);
rx *= theta;
ry *= theta;
rz *= theta;
}
}
else
{
double vth = 1/(2*s);
vth *= theta;
rx *= vth; ry *= vth; rz *= vth;
}
return cv::Vec3d(rx, ry, rz);
}
template<typename T> inline
cv::Affine3<T> cv::Affine3<T>::inv(int method) const
{
return matrix.inv(method);
}
template<typename T> inline
cv::Affine3<T> cv::Affine3<T>::rotate(const Mat3& R) const
{
Mat3 Lc = linear();
Vec3 tc = translation();
Mat4 result;
result.val[12] = result.val[13] = result.val[14] = 0;
result.val[15] = 1;
for(int j = 0; j < 3; ++j)
{
for(int i = 0; i < 3; ++i)
{
float_type value = 0;
for(int k = 0; k < 3; ++k)
value += R(j, k) * Lc(k, i);
result(j, i) = value;
}
result(j, 3) = R.row(j).dot(tc.t());
}
return result;
}
template<typename T> inline
cv::Affine3<T> cv::Affine3<T>::rotate(const Vec3& _rvec) const
{
return rotate(Affine3f(_rvec).rotation());
}
template<typename T> inline
cv::Affine3<T> cv::Affine3<T>::translate(const Vec3& t) const
{
Mat4 m = matrix;
m.val[ 3] += t[0];
m.val[ 7] += t[1];
m.val[11] += t[2];
return m;
}
template<typename T> inline
cv::Affine3<T> cv::Affine3<T>::concatenate(const Affine3<T>& affine) const
{
return (*this).rotate(affine.rotation()).translate(affine.translation());
}
template<typename T> template <typename Y> inline
cv::Affine3<T>::operator Affine3<Y>() const
{
return Affine3<Y>(matrix);
}
template<typename T> template <typename Y> inline
cv::Affine3<Y> cv::Affine3<T>::cast() const
{
return Affine3<Y>(matrix);
}
template<typename T> inline
cv::Affine3<T> cv::operator*(const cv::Affine3<T>& affine1, const cv::Affine3<T>& affine2)
{
return affine2.concatenate(affine1);
}
template<typename T, typename V> inline
V cv::operator*(const cv::Affine3<T>& affine, const V& v)
{
const typename Affine3<T>::Mat4& m = affine.matrix;
V r;
r.x = m.val[0] * v.x + m.val[1] * v.y + m.val[ 2] * v.z + m.val[ 3];
r.y = m.val[4] * v.x + m.val[5] * v.y + m.val[ 6] * v.z + m.val[ 7];
r.z = m.val[8] * v.x + m.val[9] * v.y + m.val[10] * v.z + m.val[11];
return r;
}
static inline
cv::Vec3f cv::operator*(const cv::Affine3f& affine, const cv::Vec3f& v)
{
const cv::Matx44f& m = affine.matrix;
cv::Vec3f r;
r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
return r;
}
static inline
cv::Vec3d cv::operator*(const cv::Affine3d& affine, const cv::Vec3d& v)
{
const cv::Matx44d& m = affine.matrix;
cv::Vec3d r;
r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
return r;
}
#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
template<typename T> inline
cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine)
{
cv::Mat(4, 4, cv::DataType<T>::type, affine.matrix().data()).copyTo(matrix);
}
template<typename T> inline
cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine)
{
Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> a = affine;
cv::Mat(4, 4, cv::DataType<T>::type, a.matrix().data()).copyTo(matrix);
}
template<typename T> inline
cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const
{
Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> r;
cv::Mat hdr(4, 4, cv::DataType<T>::type, r.matrix().data());
cv::Mat(matrix, false).copyTo(hdr);
return r;
}
template<typename T> inline
cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine>() const
{
return this->operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>();
}
#endif /* defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H */
//! @endcond
#endif /* __cplusplus */
#endif /* __OPENCV_CORE_AFFINE3_HPP__ */

745
3rdparty/include/opencv2/core/base.hpp vendored Normal file
View File

@ -0,0 +1,745 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_BASE_HPP__
#define __OPENCV_CORE_BASE_HPP__
#ifndef __cplusplus
# error base.hpp header must be compiled as C++
#endif
#include <climits>
#include "opencv2/core/cvdef.h"
#include "opencv2/core/cvstd.hpp"
#include "opencv2/hal.hpp"
namespace cv
{
//! @addtogroup core_utils
//! @{
namespace Error {
//! error codes
enum Code {
StsOk= 0, //!< everithing is ok
StsBackTrace= -1, //!< pseudo error for back trace
StsError= -2, //!< unknown /unspecified error
StsInternal= -3, //!< internal error (bad state)
StsNoMem= -4, //!< insufficient memory
StsBadArg= -5, //!< function arg/param is bad
StsBadFunc= -6, //!< unsupported function
StsNoConv= -7, //!< iter. didn't converge
StsAutoTrace= -8, //!< tracing
HeaderIsNull= -9, //!< image header is NULL
BadImageSize= -10, //!< image size is invalid
BadOffset= -11, //!< offset is invalid
BadDataPtr= -12, //!<
BadStep= -13, //!<
BadModelOrChSeq= -14, //!<
BadNumChannels= -15, //!<
BadNumChannel1U= -16, //!<
BadDepth= -17, //!<
BadAlphaChannel= -18, //!<
BadOrder= -19, //!<
BadOrigin= -20, //!<
BadAlign= -21, //!<
BadCallBack= -22, //!<
BadTileSize= -23, //!<
BadCOI= -24, //!<
BadROISize= -25, //!<
MaskIsTiled= -26, //!<
StsNullPtr= -27, //!< null pointer
StsVecLengthErr= -28, //!< incorrect vector length
StsFilterStructContentErr= -29, //!< incorr. filter structure content
StsKernelStructContentErr= -30, //!< incorr. transform kernel content
StsFilterOffsetErr= -31, //!< incorrect filter ofset value
StsBadSize= -201, //!< the input/output structure size is incorrect
StsDivByZero= -202, //!< division by zero
StsInplaceNotSupported= -203, //!< in-place operation is not supported
StsObjectNotFound= -204, //!< request can't be completed
StsUnmatchedFormats= -205, //!< formats of input/output arrays differ
StsBadFlag= -206, //!< flag is wrong or not supported
StsBadPoint= -207, //!< bad CvPoint
StsBadMask= -208, //!< bad format of mask (neither 8uC1 nor 8sC1)
StsUnmatchedSizes= -209, //!< sizes of input/output structures do not match
StsUnsupportedFormat= -210, //!< the data format/type is not supported by the function
StsOutOfRange= -211, //!< some of parameters are out of range
StsParseError= -212, //!< invalid syntax/structure of the parsed file
StsNotImplemented= -213, //!< the requested function/feature is not implemented
StsBadMemBlock= -214, //!< an allocated block has been corrupted
StsAssert= -215, //!< assertion failed
GpuNotSupported= -216,
GpuApiCallError= -217,
OpenGlNotSupported= -218,
OpenGlApiCallError= -219,
OpenCLApiCallError= -220,
OpenCLDoubleNotSupported= -221,
OpenCLInitError= -222,
OpenCLNoAMDBlasFft= -223
};
} //Error
//! @} core_utils
//! @addtogroup core_array
//! @{
//! matrix decomposition types
enum DecompTypes {
/** Gaussian elimination with the optimal pivot element chosen. */
DECOMP_LU = 0,
/** singular value decomposition (SVD) method; the system can be over-defined and/or the matrix
src1 can be singular */
DECOMP_SVD = 1,
/** eigenvalue decomposition; the matrix src1 must be symmetrical */
DECOMP_EIG = 2,
/** Cholesky \f$LL^T\f$ factorization; the matrix src1 must be symmetrical and positively
defined */
DECOMP_CHOLESKY = 3,
/** QR factorization; the system can be over-defined and/or the matrix src1 can be singular */
DECOMP_QR = 4,
/** while all the previous flags are mutually exclusive, this flag can be used together with
any of the previous; it means that the normal equations
\f$\texttt{src1}^T\cdot\texttt{src1}\cdot\texttt{dst}=\texttt{src1}^T\texttt{src2}\f$ are
solved instead of the original system
\f$\texttt{src1}\cdot\texttt{dst}=\texttt{src2}\f$ */
DECOMP_NORMAL = 16
};
/** norm types
- For one array:
\f[norm = \forkthree{\|\texttt{src1}\|_{L_{\infty}} = \max _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM\_INF}\) }
{ \| \texttt{src1} \| _{L_1} = \sum _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM\_L1}\) }
{ \| \texttt{src1} \| _{L_2} = \sqrt{\sum_I \texttt{src1}(I)^2} }{if \(\texttt{normType} = \texttt{NORM\_L2}\) }\f]
- Absolute norm for two arrays
\f[norm = \forkthree{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} = \max _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM\_INF}\) }
{ \| \texttt{src1} - \texttt{src2} \| _{L_1} = \sum _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM\_L1}\) }
{ \| \texttt{src1} - \texttt{src2} \| _{L_2} = \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if \(\texttt{normType} = \texttt{NORM\_L2}\) }\f]
- Relative norm for two arrays
\f[norm = \forkthree{\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} }{\|\texttt{src2}\|_{L_{\infty}} }}{if \(\texttt{normType} = \texttt{NORM\_RELATIVE\_INF}\) }
{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if \(\texttt{normType} = \texttt{NORM\_RELATIVE\_L1}\) }
{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if \(\texttt{normType} = \texttt{NORM\_RELATIVE\_L2}\) }\f]
*/
enum NormTypes { NORM_INF = 1,
NORM_L1 = 2,
NORM_L2 = 4,
NORM_L2SQR = 5,
NORM_HAMMING = 6,
NORM_HAMMING2 = 7,
NORM_TYPE_MASK = 7,
NORM_RELATIVE = 8, //!< flag
NORM_MINMAX = 32 //!< flag
};
//! comparison types
enum CmpTypes { CMP_EQ = 0, //!< src1 is equal to src2.
CMP_GT = 1, //!< src1 is greater than src2.
CMP_GE = 2, //!< src1 is greater than or equal to src2.
CMP_LT = 3, //!< src1 is less than src2.
CMP_LE = 4, //!< src1 is less than or equal to src2.
CMP_NE = 5 //!< src1 is unequal to src2.
};
//! generalized matrix multiplication flags
enum GemmFlags { GEMM_1_T = 1, //!< transposes src1
GEMM_2_T = 2, //!< transposes src2
GEMM_3_T = 4 //!< transposes src3
};
enum DftFlags {
/** performs an inverse 1D or 2D transform instead of the default forward
transform. */
DFT_INVERSE = 1,
/** scales the result: divide it by the number of array elements. Normally, it is
combined with DFT_INVERSE. */
DFT_SCALE = 2,
/** performs a forward or inverse transform of every individual row of the input
matrix; this flag enables you to transform multiple vectors simultaneously and can be used to
decrease the overhead (which is sometimes several times larger than the processing itself) to
perform 3D and higher-dimensional transformations and so forth.*/
DFT_ROWS = 4,
/** performs a forward transformation of 1D or 2D real array; the result,
though being a complex array, has complex-conjugate symmetry (*CCS*, see the function
description below for details), and such an array can be packed into a real array of the same
size as input, which is the fastest option and which is what the function does by default;
however, you may wish to get a full complex array (for simpler spectrum analysis, and so on) -
pass the flag to enable the function to produce a full-size complex output array. */
DFT_COMPLEX_OUTPUT = 16,
/** performs an inverse transformation of a 1D or 2D complex array; the
result is normally a complex array of the same size, however, if the input array has
conjugate-complex symmetry (for example, it is a result of forward transformation with
DFT_COMPLEX_OUTPUT flag), the output is a real array; while the function itself does not
check whether the input is symmetrical or not, you can pass the flag and then the function
will assume the symmetry and produce the real output array (note that when the input is packed
into a real array and inverse transformation is executed, the function treats the input as a
packed complex-conjugate symmetrical array, and the output will also be a real array). */
DFT_REAL_OUTPUT = 32,
/** performs an inverse 1D or 2D transform instead of the default forward transform. */
DCT_INVERSE = DFT_INVERSE,
/** performs a forward or inverse transform of every individual row of the input
matrix. This flag enables you to transform multiple vectors simultaneously and can be used to
decrease the overhead (which is sometimes several times larger than the processing itself) to
perform 3D and higher-dimensional transforms and so forth.*/
DCT_ROWS = DFT_ROWS
};
//! Various border types, image boundaries are denoted with `|`
//! @see borderInterpolate, copyMakeBorder
enum BorderTypes {
BORDER_CONSTANT = 0, //!< `iiiiii|abcdefgh|iiiiiii` with some specified `i`
BORDER_REPLICATE = 1, //!< `aaaaaa|abcdefgh|hhhhhhh`
BORDER_REFLECT = 2, //!< `fedcba|abcdefgh|hgfedcb`
BORDER_WRAP = 3, //!< `cdefgh|abcdefgh|abcdefg`
BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba`
BORDER_TRANSPARENT = 5, //!< `uvwxyz|absdefgh|ijklmno`
BORDER_REFLECT101 = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
BORDER_DEFAULT = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
BORDER_ISOLATED = 16 //!< do not look outside of ROI
};
//! @} core_array
//! @addtogroup core_utils
//! @{
//! @cond IGNORED
//////////////// static assert /////////////////
#define CVAUX_CONCAT_EXP(a, b) a##b
#define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b)
#if defined(__clang__)
# ifndef __has_extension
# define __has_extension __has_feature /* compatibility, for older versions of clang */
# endif
# if __has_extension(cxx_static_assert)
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
# endif
#elif defined(__GNUC__)
# if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
# endif
#elif defined(_MSC_VER)
# if _MSC_VER >= 1600 /* MSVC 10 */
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
# endif
#endif
#ifndef CV_StaticAssert
# if defined(__GNUC__) && (__GNUC__ > 3) && (__GNUC_MINOR__ > 2)
# define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); })
# else
template <bool x> struct CV_StaticAssert_failed;
template <> struct CV_StaticAssert_failed<true> { enum { val = 1 }; };
template<int x> struct CV_StaticAssert_test {};
# define CV_StaticAssert(condition, reason)\
typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast<bool>(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__)
# endif
#endif
// Suppress warning "-Wdeprecated-declarations" / C4996
#if defined(_MSC_VER)
#define CV_DO_PRAGMA(x) __pragma(x)
#elif defined(__GNUC__)
#define CV_DO_PRAGMA(x) _Pragma (#x)
#else
#define CV_DO_PRAGMA(x)
#endif
#ifdef _MSC_VER
#define CV_SUPPRESS_DEPRECATED_START \
CV_DO_PRAGMA(warning(push)) \
CV_DO_PRAGMA(warning(disable: 4996))
#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop))
#elif defined (__clang__) || ((__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 5))))
#define CV_SUPPRESS_DEPRECATED_START \
CV_DO_PRAGMA(GCC diagnostic push) \
CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop)
#else
#define CV_SUPPRESS_DEPRECATED_START
#define CV_SUPPRESS_DEPRECATED_END
#endif
//! @endcond
/*! @brief Signals an error and raises the exception.
By default the function prints information about the error to stderr,
then it either stops if setBreakOnError() had been called before or raises the exception.
It is possible to alternate error processing by using redirectError().
@param _code - error code (Error::Code)
@param _err - error description
@param _func - function name. Available only when the compiler supports getting it
@param _file - source file name where the error has occured
@param _line - line number in the source file where the error has occured
@see CV_Error, CV_Error_, CV_ErrorNoReturn, CV_ErrorNoReturn_, CV_Assert, CV_DbgAssert
*/
CV_EXPORTS void error(int _code, const String& _err, const char* _func, const char* _file, int _line);
#ifdef __GNUC__
# if defined __clang__ || defined __APPLE__
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Winvalid-noreturn"
# endif
#endif
/** same as cv::error, but does not return */
CV_INLINE CV_NORETURN void errorNoReturn(int _code, const String& _err, const char* _func, const char* _file, int _line)
{
error(_code, _err, _func, _file, _line);
#ifdef __GNUC__
# if !defined __clang__ && !defined __APPLE__
// this suppresses this warning: "noreturn" function does return [enabled by default]
__builtin_trap();
// or use infinite loop: for (;;) {}
# endif
#endif
}
#ifdef __GNUC__
# if defined __clang__ || defined __APPLE__
# pragma GCC diagnostic pop
# endif
#endif
#if defined __GNUC__
#define CV_Func __func__
#elif defined _MSC_VER
#define CV_Func __FUNCTION__
#else
#define CV_Func ""
#endif
/** @brief Call the error handler.
Currently, the error handler prints the error code and the error message to the standard
error stream `stderr`. In the Debug configuration, it then provokes memory access violation, so that
the execution stack and all the parameters can be analyzed by the debugger. In the Release
configuration, the exception is thrown.
@param code one of Error::Code
@param msg error message
*/
#define CV_Error( code, msg ) cv::error( code, msg, CV_Func, __FILE__, __LINE__ )
/** @brief Call the error handler.
This macro can be used to construct an error message on-fly to include some dynamic information,
for example:
@code
// note the extra parentheses around the formatted text message
CV_Error_( CV_StsOutOfRange,
("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue));
@endcode
@param code one of Error::Code
@param args printf-like formatted error message in parentheses
*/
#define CV_Error_( code, args ) cv::error( code, cv::format args, CV_Func, __FILE__, __LINE__ )
/** @brief Checks a condition at runtime and throws exception if it fails
The macros CV_Assert (and CV_DbgAssert(expr)) evaluate the specified expression. If it is 0, the macros
raise an error (see cv::error). The macro CV_Assert checks the condition in both Debug and Release
configurations while CV_DbgAssert is only retained in the Debug configuration.
*/
#define CV_Assert( expr ) if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ )
/** same as CV_Error(code,msg), but does not return */
#define CV_ErrorNoReturn( code, msg ) cv::errorNoReturn( code, msg, CV_Func, __FILE__, __LINE__ )
/** same as CV_Error_(code,args), but does not return */
#define CV_ErrorNoReturn_( code, args ) cv::errorNoReturn( code, cv::format args, CV_Func, __FILE__, __LINE__ )
/** replaced with CV_Assert(expr) in Debug configuration */
#ifdef _DEBUG
# define CV_DbgAssert(expr) CV_Assert(expr)
#else
# define CV_DbgAssert(expr)
#endif
/*
* Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
* bit count of A exclusive XOR'ed with B
*/
struct CV_EXPORTS Hamming
{
enum { normType = NORM_HAMMING };
typedef unsigned char ValueType;
typedef int ResultType;
/** this will count the bits in a ^ b
*/
ResultType operator()( const unsigned char* a, const unsigned char* b, int size ) const;
};
typedef Hamming HammingLUT;
/////////////////////////////////// inline norms ////////////////////////////////////
template<typename _Tp> inline _Tp cv_abs(_Tp x) { return std::abs(x); }
inline int cv_abs(uchar x) { return x; }
inline int cv_abs(schar x) { return std::abs(x); }
inline int cv_abs(ushort x) { return x; }
inline int cv_abs(short x) { return std::abs(x); }
template<typename _Tp, typename _AccTp> static inline
_AccTp normL2Sqr(const _Tp* a, int n)
{
_AccTp s = 0;
int i=0;
#if CV_ENABLE_UNROLLED
for( ; i <= n - 4; i += 4 )
{
_AccTp v0 = a[i], v1 = a[i+1], v2 = a[i+2], v3 = a[i+3];
s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
}
#endif
for( ; i < n; i++ )
{
_AccTp v = a[i];
s += v*v;
}
return s;
}
template<typename _Tp, typename _AccTp> static inline
_AccTp normL1(const _Tp* a, int n)
{
_AccTp s = 0;
int i = 0;
#if CV_ENABLE_UNROLLED
for(; i <= n - 4; i += 4 )
{
s += (_AccTp)cv_abs(a[i]) + (_AccTp)cv_abs(a[i+1]) +
(_AccTp)cv_abs(a[i+2]) + (_AccTp)cv_abs(a[i+3]);
}
#endif
for( ; i < n; i++ )
s += cv_abs(a[i]);
return s;
}
template<typename _Tp, typename _AccTp> static inline
_AccTp normInf(const _Tp* a, int n)
{
_AccTp s = 0;
for( int i = 0; i < n; i++ )
s = std::max(s, (_AccTp)cv_abs(a[i]));
return s;
}
template<typename _Tp, typename _AccTp> static inline
_AccTp normL2Sqr(const _Tp* a, const _Tp* b, int n)
{
_AccTp s = 0;
int i= 0;
#if CV_ENABLE_UNROLLED
for(; i <= n - 4; i += 4 )
{
_AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
}
#endif
for( ; i < n; i++ )
{
_AccTp v = _AccTp(a[i] - b[i]);
s += v*v;
}
return s;
}
static inline float normL2Sqr(const float* a, const float* b, int n)
{
float s = 0.f;
for( int i = 0; i < n; i++ )
{
float v = a[i] - b[i];
s += v*v;
}
return s;
}
template<typename _Tp, typename _AccTp> static inline
_AccTp normL1(const _Tp* a, const _Tp* b, int n)
{
_AccTp s = 0;
int i= 0;
#if CV_ENABLE_UNROLLED
for(; i <= n - 4; i += 4 )
{
_AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
s += std::abs(v0) + std::abs(v1) + std::abs(v2) + std::abs(v3);
}
#endif
for( ; i < n; i++ )
{
_AccTp v = _AccTp(a[i] - b[i]);
s += std::abs(v);
}
return s;
}
inline float normL1(const float* a, const float* b, int n)
{
float s = 0.f;
for( int i = 0; i < n; i++ )
{
s += std::abs(a[i] - b[i]);
}
return s;
}
inline int normL1(const uchar* a, const uchar* b, int n)
{
int s = 0;
for( int i = 0; i < n; i++ )
{
s += std::abs(a[i] - b[i]);
}
return s;
}
template<typename _Tp, typename _AccTp> static inline
_AccTp normInf(const _Tp* a, const _Tp* b, int n)
{
_AccTp s = 0;
for( int i = 0; i < n; i++ )
{
_AccTp v0 = a[i] - b[i];
s = std::max(s, std::abs(v0));
}
return s;
}
/** @brief Computes the cube root of an argument.
The function cubeRoot computes \f$\sqrt[3]{\texttt{val}}\f$. Negative arguments are handled correctly.
NaN and Inf are not handled. The accuracy approaches the maximum possible accuracy for
single-precision data.
@param val A function argument.
*/
CV_EXPORTS_W float cubeRoot(float val);
/** @brief Calculates the angle of a 2D vector in degrees.
The function fastAtan2 calculates the full-range angle of an input 2D vector. The angle is measured
in degrees and varies from 0 to 360 degrees. The accuracy is about 0.3 degrees.
@param x x-coordinate of the vector.
@param y y-coordinate of the vector.
*/
CV_EXPORTS_W float fastAtan2(float y, float x);
/** proxy for hal::LU */
CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
/** proxy for hal::LU */
CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
/** proxy for hal::Cholesky */
CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
/** proxy for hal::Cholesky */
CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
////////////////// forward declarations for important OpenCV types //////////////////
//! @cond IGNORED
template<typename _Tp, int cn> class Vec;
template<typename _Tp, int m, int n> class Matx;
template<typename _Tp> class Complex;
template<typename _Tp> class Point_;
template<typename _Tp> class Point3_;
template<typename _Tp> class Size_;
template<typename _Tp> class Rect_;
template<typename _Tp> class Scalar_;
class CV_EXPORTS RotatedRect;
class CV_EXPORTS Range;
class CV_EXPORTS TermCriteria;
class CV_EXPORTS KeyPoint;
class CV_EXPORTS DMatch;
class CV_EXPORTS RNG;
class CV_EXPORTS Mat;
class CV_EXPORTS MatExpr;
class CV_EXPORTS UMat;
class CV_EXPORTS SparseMat;
typedef Mat MatND;
template<typename _Tp> class Mat_;
template<typename _Tp> class SparseMat_;
class CV_EXPORTS MatConstIterator;
class CV_EXPORTS SparseMatIterator;
class CV_EXPORTS SparseMatConstIterator;
template<typename _Tp> class MatIterator_;
template<typename _Tp> class MatConstIterator_;
template<typename _Tp> class SparseMatIterator_;
template<typename _Tp> class SparseMatConstIterator_;
namespace ogl
{
class CV_EXPORTS Buffer;
class CV_EXPORTS Texture2D;
class CV_EXPORTS Arrays;
}
namespace cuda
{
class CV_EXPORTS GpuMat;
class CV_EXPORTS HostMem;
class CV_EXPORTS Stream;
class CV_EXPORTS Event;
}
namespace cudev
{
template <typename _Tp> class GpuMat_;
}
namespace ipp
{
CV_EXPORTS void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL,
int line = 0);
CV_EXPORTS int getIppStatus();
CV_EXPORTS String getIppErrorLocation();
CV_EXPORTS bool useIPP();
CV_EXPORTS void setUseIPP(bool flag);
} // ipp
//! @endcond
//! @} core_utils
//! @addtogroup core_utils_neon
//! @{
#if CV_NEON
inline int32x2_t cv_vrnd_s32_f32(float32x2_t v)
{
static int32x2_t v_sign = vdup_n_s32(1 << 31),
v_05 = vreinterpret_s32_f32(vdup_n_f32(0.5f));
int32x2_t v_addition = vorr_s32(v_05, vand_s32(v_sign, vreinterpret_s32_f32(v)));
return vcvt_s32_f32(vadd_f32(v, vreinterpret_f32_s32(v_addition)));
}
inline int32x4_t cv_vrndq_s32_f32(float32x4_t v)
{
static int32x4_t v_sign = vdupq_n_s32(1 << 31),
v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f));
int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(v)));
return vcvtq_s32_f32(vaddq_f32(v, vreinterpretq_f32_s32(v_addition)));
}
inline uint32x2_t cv_vrnd_u32_f32(float32x2_t v)
{
static float32x2_t v_05 = vdup_n_f32(0.5f);
return vcvt_u32_f32(vadd_f32(v, v_05));
}
inline uint32x4_t cv_vrndq_u32_f32(float32x4_t v)
{
static float32x4_t v_05 = vdupq_n_f32(0.5f);
return vcvtq_u32_f32(vaddq_f32(v, v_05));
}
inline float32x4_t cv_vrecpq_f32(float32x4_t val)
{
float32x4_t reciprocal = vrecpeq_f32(val);
reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
return reciprocal;
}
inline float32x2_t cv_vrecp_f32(float32x2_t val)
{
float32x2_t reciprocal = vrecpe_f32(val);
reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
return reciprocal;
}
inline float32x4_t cv_vrsqrtq_f32(float32x4_t val)
{
float32x4_t e = vrsqrteq_f32(val);
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
return e;
}
inline float32x2_t cv_vrsqrt_f32(float32x2_t val)
{
float32x2_t e = vrsqrte_f32(val);
e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
return e;
}
inline float32x4_t cv_vsqrtq_f32(float32x4_t val)
{
return cv_vrecpq_f32(cv_vrsqrtq_f32(val));
}
inline float32x2_t cv_vsqrt_f32(float32x2_t val)
{
return cv_vrecp_f32(cv_vrsqrt_f32(val));
}
#endif
//! @} core_utils_neon
} // cv
#include "sse_utils.hpp"
#endif //__OPENCV_CORE_BASE_HPP__

View File

@ -0,0 +1,31 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
#ifndef __OPENCV_CORE_BUFFER_POOL_HPP__
#define __OPENCV_CORE_BUFFER_POOL_HPP__
namespace cv
{
//! @addtogroup core
//! @{
class BufferPoolController
{
protected:
~BufferPoolController() { }
public:
virtual size_t getReservedSize() const = 0;
virtual size_t getMaxReservedSize() const = 0;
virtual void setMaxReservedSize(size_t size) = 0;
virtual void freeAllReservedBuffers() = 0;
};
//! @}
}
#endif // __OPENCV_CORE_BUFFER_POOL_HPP__

48
3rdparty/include/opencv2/core/core.hpp vendored Normal file
View File

@ -0,0 +1,48 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifdef __OPENCV_BUILD
#error this is a compatibility header which should not be used inside the OpenCV library
#endif
#include "opencv2/core.hpp"

3152
3rdparty/include/opencv2/core/core_c.h vendored Normal file

File diff suppressed because it is too large Load Diff

845
3rdparty/include/opencv2/core/cuda.hpp vendored Normal file
View File

@ -0,0 +1,845 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_CUDA_HPP__
#define __OPENCV_CORE_CUDA_HPP__
#ifndef __cplusplus
# error cuda.hpp header must be compiled as C++
#endif
#include "opencv2/core.hpp"
#include "opencv2/core/cuda_types.hpp"
/**
@defgroup cuda CUDA-accelerated Computer Vision
@{
@defgroup cudacore Core part
@{
@defgroup cudacore_init Initalization and Information
@defgroup cudacore_struct Data Structures
@}
@}
*/
namespace cv { namespace cuda {
//! @addtogroup cudacore_struct
//! @{
//===================================================================================
// GpuMat
//===================================================================================
/** @brief Base storage class for GPU memory with reference counting.
Its interface matches the Mat interface with the following limitations:
- no arbitrary dimensions support (only 2D)
- no functions that return references to their data (because references on GPU are not valid for
CPU)
- no expression templates technique support
Beware that the latter limitation may lead to overloaded matrix operators that cause memory
allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
passed directly to the kernel.
@note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
@note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
release function returns error if the CUDA context has been destroyed before.
@sa Mat
*/
class CV_EXPORTS GpuMat
{
public:
class CV_EXPORTS Allocator
{
public:
virtual ~Allocator() {}
// allocator must fill data, step and refcount fields
virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
virtual void free(GpuMat* mat) = 0;
};
//! default allocator
static Allocator* defaultAllocator();
static void setDefaultAllocator(Allocator* allocator);
//! default constructor
explicit GpuMat(Allocator* allocator = defaultAllocator());
//! constructs GpuMat of the specified size and type
GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());
//! constucts GpuMat and fills it with the specified value _s
GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator());
GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator());
//! copy constructor
GpuMat(const GpuMat& m);
//! constructor for GpuMat headers pointing to user-allocated data
GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
//! creates a GpuMat header for a part of the bigger matrix
GpuMat(const GpuMat& m, Range rowRange, Range colRange);
GpuMat(const GpuMat& m, Rect roi);
//! builds GpuMat from host memory (Blocking call)
explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator());
//! destructor - calls release()
~GpuMat();
//! assignment operators
GpuMat& operator =(const GpuMat& m);
//! allocates new GpuMat data unless the GpuMat already has specified size and type
void create(int rows, int cols, int type);
void create(Size size, int type);
//! decreases reference counter, deallocate the data when reference counter reaches 0
void release();
//! swaps with other smart pointer
void swap(GpuMat& mat);
//! pefroms upload data to GpuMat (Blocking call)
void upload(InputArray arr);
//! pefroms upload data to GpuMat (Non-Blocking call)
void upload(InputArray arr, Stream& stream);
//! pefroms download data from device to host memory (Blocking call)
void download(OutputArray dst) const;
//! pefroms download data from device to host memory (Non-Blocking call)
void download(OutputArray dst, Stream& stream) const;
//! returns deep copy of the GpuMat, i.e. the data is copied
GpuMat clone() const;
//! copies the GpuMat content to device memory (Blocking call)
void copyTo(OutputArray dst) const;
//! copies the GpuMat content to device memory (Non-Blocking call)
void copyTo(OutputArray dst, Stream& stream) const;
//! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
void copyTo(OutputArray dst, InputArray mask) const;
//! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
//! sets some of the GpuMat elements to s (Blocking call)
GpuMat& setTo(Scalar s);
//! sets some of the GpuMat elements to s (Non-Blocking call)
GpuMat& setTo(Scalar s, Stream& stream);
//! sets some of the GpuMat elements to s, according to the mask (Blocking call)
GpuMat& setTo(Scalar s, InputArray mask);
//! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
//! converts GpuMat to another datatype (Blocking call)
void convertTo(OutputArray dst, int rtype) const;
//! converts GpuMat to another datatype (Non-Blocking call)
void convertTo(OutputArray dst, int rtype, Stream& stream) const;
//! converts GpuMat to another datatype with scaling (Blocking call)
void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
//! converts GpuMat to another datatype with scaling (Non-Blocking call)
void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
//! converts GpuMat to another datatype with scaling (Non-Blocking call)
void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
void assignTo(GpuMat& m, int type=-1) const;
//! returns pointer to y-th row
uchar* ptr(int y = 0);
const uchar* ptr(int y = 0) const;
//! template version of the above method
template<typename _Tp> _Tp* ptr(int y = 0);
template<typename _Tp> const _Tp* ptr(int y = 0) const;
template <typename _Tp> operator PtrStepSz<_Tp>() const;
template <typename _Tp> operator PtrStep<_Tp>() const;
//! returns a new GpuMat header for the specified row
GpuMat row(int y) const;
//! returns a new GpuMat header for the specified column
GpuMat col(int x) const;
//! ... for the specified row span
GpuMat rowRange(int startrow, int endrow) const;
GpuMat rowRange(Range r) const;
//! ... for the specified column span
GpuMat colRange(int startcol, int endcol) const;
GpuMat colRange(Range r) const;
//! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
GpuMat operator ()(Range rowRange, Range colRange) const;
GpuMat operator ()(Rect roi) const;
//! creates alternative GpuMat header for the same data, with different
//! number of channels and/or different number of rows
GpuMat reshape(int cn, int rows = 0) const;
//! locates GpuMat header within a parent GpuMat
void locateROI(Size& wholeSize, Point& ofs) const;
//! moves/resizes the current GpuMat ROI inside the parent GpuMat
GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
//! returns true iff the GpuMat data is continuous
//! (i.e. when there are no gaps between successive rows)
bool isContinuous() const;
//! returns element size in bytes
size_t elemSize() const;
//! returns the size of element channel in bytes
size_t elemSize1() const;
//! returns element type
int type() const;
//! returns element type
int depth() const;
//! returns number of channels
int channels() const;
//! returns step/elemSize1()
size_t step1() const;
//! returns GpuMat size : width == number of columns, height == number of rows
Size size() const;
//! returns true if GpuMat data is NULL
bool empty() const;
/*! includes several bit-fields:
- the magic signature
- continuity flag
- depth
- number of channels
*/
int flags;
//! the number of rows and columns
int rows, cols;
//! a distance between successive rows in bytes; includes the gap if any
size_t step;
//! pointer to the data
uchar* data;
//! pointer to the reference counter;
//! when GpuMat points to user-allocated data, the pointer is NULL
int* refcount;
//! helper fields used in locateROI and adjustROI
uchar* datastart;
const uchar* dataend;
//! allocator
Allocator* allocator;
};
/** @brief Creates a continuous matrix.
@param rows Row count.
@param cols Column count.
@param type Type of the matrix.
@param arr Destination matrix. This parameter changes only if it has a proper type and area (
\f$\texttt{rows} \times \texttt{cols}\f$ ).
Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
end of each row.
*/
CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);
/** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
@param rows Minimum desired number of rows.
@param cols Minimum desired number of columns.
@param type Desired matrix type.
@param arr Destination matrix.
The function does not reallocate memory if the matrix has proper attributes already.
*/
CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
//! BufferPool management (must be called before Stream creation)
CV_EXPORTS void setBufferPoolUsage(bool on);
CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
//===================================================================================
// HostMem
//===================================================================================
/** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
Its interface is also Mat-like but with additional memory type parameters.
- **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
uploading/downloading data from/to GPU.
- **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
address space, if supported.
- **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
utilization.
@note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
Pinned Memory APIs* document or *CUDA C Programming Guide*.
*/
class CV_EXPORTS HostMem
{
public:
enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
static MatAllocator* getAllocator(AllocType alloc_type = PAGE_LOCKED);
explicit HostMem(AllocType alloc_type = PAGE_LOCKED);
HostMem(const HostMem& m);
HostMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED);
HostMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED);
//! creates from host memory with coping data
explicit HostMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED);
~HostMem();
HostMem& operator =(const HostMem& m);
//! swaps with other smart pointer
void swap(HostMem& b);
//! returns deep copy of the matrix, i.e. the data is copied
HostMem clone() const;
//! allocates new matrix data unless the matrix already has specified size and type.
void create(int rows, int cols, int type);
void create(Size size, int type);
//! creates alternative HostMem header for the same data, with different
//! number of channels and/or different number of rows
HostMem reshape(int cn, int rows = 0) const;
//! decrements reference counter and released memory if needed.
void release();
//! returns matrix header with disabled reference counting for HostMem data.
Mat createMatHeader() const;
/** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
for it.
This can be done only if memory was allocated with the SHARED flag and if it is supported by the
hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
eliminates an extra copy.
*/
GpuMat createGpuMatHeader() const;
// Please see cv::Mat for descriptions
bool isContinuous() const;
size_t elemSize() const;
size_t elemSize1() const;
int type() const;
int depth() const;
int channels() const;
size_t step1() const;
Size size() const;
bool empty() const;
// Please see cv::Mat for descriptions
int flags;
int rows, cols;
size_t step;
uchar* data;
int* refcount;
uchar* datastart;
const uchar* dataend;
AllocType alloc_type;
};
/** @brief Page-locks the memory of matrix and maps it for the device(s).
@param m Input matrix.
*/
CV_EXPORTS void registerPageLocked(Mat& m);
/** @brief Unmaps the memory of matrix and makes it pageable again.
@param m Input matrix.
*/
CV_EXPORTS void unregisterPageLocked(Mat& m);
//===================================================================================
// Stream
//===================================================================================
/** @brief This class encapsulates a queue of asynchronous calls.
@note Currently, you may face problems if an operation is enqueued twice with different data. Some
functions use the constant GPU memory, and next call may update the memory before the previous one
has been finished. But calling different operations asynchronously is safe because each operation
has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
also safe. :
*/
class CV_EXPORTS Stream
{
typedef void (Stream::*bool_type)() const;
void this_type_does_not_support_comparisons() const {}
public:
typedef void (*StreamCallback)(int status, void* userData);
//! creates a new asynchronous stream
Stream();
/** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
*/
bool queryIfComplete() const;
/** @brief Blocks the current CPU thread until all operations in the stream are complete.
*/
void waitForCompletion();
/** @brief Makes a compute stream wait on an event.
*/
void waitEvent(const Event& event);
/** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
completed.
@note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
Callbacks without a mandated order (in independent streams) execute in undefined order and may be
serialized.
*/
void enqueueHostCallback(StreamCallback callback, void* userData);
//! return Stream object for default CUDA stream
static Stream& Null();
//! returns true if stream object is not default (!= 0)
operator bool_type() const;
class Impl;
private:
Ptr<Impl> impl_;
Stream(const Ptr<Impl>& impl);
friend struct StreamAccessor;
friend class BufferPool;
friend class DefaultDeviceInitializer;
};
class CV_EXPORTS Event
{
public:
enum CreateFlags
{
DEFAULT = 0x00, /**< Default event flag */
BLOCKING_SYNC = 0x01, /**< Event uses blocking synchronization */
DISABLE_TIMING = 0x02, /**< Event will not record timing data */
INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */
};
explicit Event(CreateFlags flags = DEFAULT);
//! records an event
void record(Stream& stream = Stream::Null());
//! queries an event's status
bool queryIfComplete() const;
//! waits for an event to complete
void waitForCompletion();
//! computes the elapsed time between events
static float elapsedTime(const Event& start, const Event& end);
class Impl;
private:
Ptr<Impl> impl_;
friend struct EventAccessor;
};
//! @} cudacore_struct
//===================================================================================
// Initialization & Info
//===================================================================================
//! @addtogroup cudacore_init
//! @{
/** @brief Returns the number of installed CUDA-enabled devices.
Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
this function returns 0.
*/
CV_EXPORTS int getCudaEnabledDeviceCount();
/** @brief Sets a device and initializes it for the current thread.
@param device System index of a CUDA device starting with 0.
If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
*/
CV_EXPORTS void setDevice(int device);
/** @brief Returns the current device index set by cuda::setDevice or initialized by default.
*/
CV_EXPORTS int getDevice();
/** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
process.
Any subsequent API call to this device will reinitialize the device.
*/
CV_EXPORTS void resetDevice();
/** @brief Enumeration providing CUDA computing features.
*/
enum FeatureSet
{
FEATURE_SET_COMPUTE_10 = 10,
FEATURE_SET_COMPUTE_11 = 11,
FEATURE_SET_COMPUTE_12 = 12,
FEATURE_SET_COMPUTE_13 = 13,
FEATURE_SET_COMPUTE_20 = 20,
FEATURE_SET_COMPUTE_21 = 21,
FEATURE_SET_COMPUTE_30 = 30,
FEATURE_SET_COMPUTE_32 = 32,
FEATURE_SET_COMPUTE_35 = 35,
FEATURE_SET_COMPUTE_50 = 50,
GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
};
//! checks whether current device supports the given feature
CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
/** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
built for.
According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
capability can always be compiled to binary code of greater or equal compute capability".
*/
class CV_EXPORTS TargetArchs
{
public:
/** @brief The following method checks whether the module was built with the support of the given feature:
@param feature_set Features to be checked. See :ocvcuda::FeatureSet.
*/
static bool builtWith(FeatureSet feature_set);
/** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
code for the given architecture(s):
@param major Major compute capability version.
@param minor Minor compute capability version.
*/
static bool has(int major, int minor);
static bool hasPtx(int major, int minor);
static bool hasBin(int major, int minor);
static bool hasEqualOrLessPtx(int major, int minor);
static bool hasEqualOrGreater(int major, int minor);
static bool hasEqualOrGreaterPtx(int major, int minor);
static bool hasEqualOrGreaterBin(int major, int minor);
};
/** @brief Class providing functionality for querying the specified GPU properties.
*/
class CV_EXPORTS DeviceInfo
{
public:
//! creates DeviceInfo object for the current GPU
DeviceInfo();
/** @brief The constructors.
@param device_id System index of the CUDA device starting with 0.
Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
constructs an object for the current device.
*/
DeviceInfo(int device_id);
/** @brief Returns system index of the CUDA device starting with 0.
*/
int deviceID() const;
//! ASCII string identifying device
const char* name() const;
//! global memory available on device in bytes
size_t totalGlobalMem() const;
//! shared memory available per block in bytes
size_t sharedMemPerBlock() const;
//! 32-bit registers available per block
int regsPerBlock() const;
//! warp size in threads
int warpSize() const;
//! maximum pitch in bytes allowed by memory copies
size_t memPitch() const;
//! maximum number of threads per block
int maxThreadsPerBlock() const;
//! maximum size of each dimension of a block
Vec3i maxThreadsDim() const;
//! maximum size of each dimension of a grid
Vec3i maxGridSize() const;
//! clock frequency in kilohertz
int clockRate() const;
//! constant memory available on device in bytes
size_t totalConstMem() const;
//! major compute capability
int majorVersion() const;
//! minor compute capability
int minorVersion() const;
//! alignment requirement for textures
size_t textureAlignment() const;
//! pitch alignment requirement for texture references bound to pitched memory
size_t texturePitchAlignment() const;
//! number of multiprocessors on device
int multiProcessorCount() const;
//! specified whether there is a run time limit on kernels
bool kernelExecTimeoutEnabled() const;
//! device is integrated as opposed to discrete
bool integrated() const;
//! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
bool canMapHostMemory() const;
enum ComputeMode
{
ComputeModeDefault, /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */
ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */
ComputeModeProhibited, /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */
ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */
};
//! compute mode
ComputeMode computeMode() const;
//! maximum 1D texture size
int maxTexture1D() const;
//! maximum 1D mipmapped texture size
int maxTexture1DMipmap() const;
//! maximum size for 1D textures bound to linear memory
int maxTexture1DLinear() const;
//! maximum 2D texture dimensions
Vec2i maxTexture2D() const;
//! maximum 2D mipmapped texture dimensions
Vec2i maxTexture2DMipmap() const;
//! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
Vec3i maxTexture2DLinear() const;
//! maximum 2D texture dimensions if texture gather operations have to be performed
Vec2i maxTexture2DGather() const;
//! maximum 3D texture dimensions
Vec3i maxTexture3D() const;
//! maximum Cubemap texture dimensions
int maxTextureCubemap() const;
//! maximum 1D layered texture dimensions
Vec2i maxTexture1DLayered() const;
//! maximum 2D layered texture dimensions
Vec3i maxTexture2DLayered() const;
//! maximum Cubemap layered texture dimensions
Vec2i maxTextureCubemapLayered() const;
//! maximum 1D surface size
int maxSurface1D() const;
//! maximum 2D surface dimensions
Vec2i maxSurface2D() const;
//! maximum 3D surface dimensions
Vec3i maxSurface3D() const;
//! maximum 1D layered surface dimensions
Vec2i maxSurface1DLayered() const;
//! maximum 2D layered surface dimensions
Vec3i maxSurface2DLayered() const;
//! maximum Cubemap surface dimensions
int maxSurfaceCubemap() const;
//! maximum Cubemap layered surface dimensions
Vec2i maxSurfaceCubemapLayered() const;
//! alignment requirements for surfaces
size_t surfaceAlignment() const;
//! device can possibly execute multiple kernels concurrently
bool concurrentKernels() const;
//! device has ECC support enabled
bool ECCEnabled() const;
//! PCI bus ID of the device
int pciBusID() const;
//! PCI device ID of the device
int pciDeviceID() const;
//! PCI domain ID of the device
int pciDomainID() const;
//! true if device is a Tesla device using TCC driver, false otherwise
bool tccDriver() const;
//! number of asynchronous engines
int asyncEngineCount() const;
//! device shares a unified address space with the host
bool unifiedAddressing() const;
//! peak memory clock frequency in kilohertz
int memoryClockRate() const;
//! global memory bus width in bits
int memoryBusWidth() const;
//! size of L2 cache in bytes
int l2CacheSize() const;
//! maximum resident threads per multiprocessor
int maxThreadsPerMultiProcessor() const;
//! gets free and total device memory
void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
size_t freeMemory() const;
size_t totalMemory() const;
/** @brief Provides information on CUDA feature support.
@param feature_set Features to be checked. See cuda::FeatureSet.
This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
*/
bool supports(FeatureSet feature_set) const;
/** @brief Checks the CUDA module and device compatibility.
This function returns true if the CUDA module can be run on the specified device. Otherwise, it
returns false .
*/
bool isCompatible() const;
private:
int device_id_;
};
CV_EXPORTS void printCudaDeviceInfo(int device);
CV_EXPORTS void printShortCudaDeviceInfo(int device);
//! @} cudacore_init
}} // namespace cv { namespace cuda {
#include "opencv2/core/cuda.inl.hpp"
#endif /* __OPENCV_CORE_CUDA_HPP__ */

View File

@ -0,0 +1,621 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_CUDAINL_HPP__
#define __OPENCV_CORE_CUDAINL_HPP__
#include "opencv2/core/cuda.hpp"
//! @cond IGNORED
namespace cv { namespace cuda {
//===================================================================================
// GpuMat
//===================================================================================
inline
GpuMat::GpuMat(Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{}
inline
GpuMat::GpuMat(int rows_, int cols_, int type_, Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{
if (rows_ > 0 && cols_ > 0)
create(rows_, cols_, type_);
}
inline
GpuMat::GpuMat(Size size_, int type_, Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{
if (size_.height > 0 && size_.width > 0)
create(size_.height, size_.width, type_);
}
inline
GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_, Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{
if (rows_ > 0 && cols_ > 0)
{
create(rows_, cols_, type_);
setTo(s_);
}
}
inline
GpuMat::GpuMat(Size size_, int type_, Scalar s_, Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{
if (size_.height > 0 && size_.width > 0)
{
create(size_.height, size_.width, type_);
setTo(s_);
}
}
inline
GpuMat::GpuMat(const GpuMat& m)
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), allocator(m.allocator)
{
if (refcount)
CV_XADD(refcount, 1);
}
inline
GpuMat::GpuMat(InputArray arr, Allocator* allocator_) :
flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{
upload(arr);
}
inline
GpuMat::~GpuMat()
{
release();
}
inline
GpuMat& GpuMat::operator =(const GpuMat& m)
{
if (this != &m)
{
GpuMat temp(m);
swap(temp);
}
return *this;
}
inline
void GpuMat::create(Size size_, int type_)
{
create(size_.height, size_.width, type_);
}
inline
void GpuMat::swap(GpuMat& b)
{
std::swap(flags, b.flags);
std::swap(rows, b.rows);
std::swap(cols, b.cols);
std::swap(step, b.step);
std::swap(data, b.data);
std::swap(datastart, b.datastart);
std::swap(dataend, b.dataend);
std::swap(refcount, b.refcount);
std::swap(allocator, b.allocator);
}
inline
GpuMat GpuMat::clone() const
{
GpuMat m;
copyTo(m);
return m;
}
inline
void GpuMat::copyTo(OutputArray dst, InputArray mask) const
{
copyTo(dst, mask, Stream::Null());
}
inline
GpuMat& GpuMat::setTo(Scalar s)
{
return setTo(s, Stream::Null());
}
inline
GpuMat& GpuMat::setTo(Scalar s, InputArray mask)
{
return setTo(s, mask, Stream::Null());
}
inline
void GpuMat::convertTo(OutputArray dst, int rtype) const
{
convertTo(dst, rtype, Stream::Null());
}
inline
void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, double beta) const
{
convertTo(dst, rtype, alpha, beta, Stream::Null());
}
inline
void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const
{
convertTo(dst, rtype, alpha, 0.0, stream);
}
inline
void GpuMat::assignTo(GpuMat& m, int _type) const
{
if (_type < 0)
m = *this;
else
convertTo(m, _type);
}
inline
uchar* GpuMat::ptr(int y)
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
return data + step * y;
}
inline
const uchar* GpuMat::ptr(int y) const
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
return data + step * y;
}
template<typename _Tp> inline
_Tp* GpuMat::ptr(int y)
{
return (_Tp*)ptr(y);
}
template<typename _Tp> inline
const _Tp* GpuMat::ptr(int y) const
{
return (const _Tp*)ptr(y);
}
template <class T> inline
GpuMat::operator PtrStepSz<T>() const
{
return PtrStepSz<T>(rows, cols, (T*)data, step);
}
template <class T> inline
GpuMat::operator PtrStep<T>() const
{
return PtrStep<T>((T*)data, step);
}
inline
GpuMat GpuMat::row(int y) const
{
return GpuMat(*this, Range(y, y+1), Range::all());
}
inline
GpuMat GpuMat::col(int x) const
{
return GpuMat(*this, Range::all(), Range(x, x+1));
}
inline
GpuMat GpuMat::rowRange(int startrow, int endrow) const
{
return GpuMat(*this, Range(startrow, endrow), Range::all());
}
inline
GpuMat GpuMat::rowRange(Range r) const
{
return GpuMat(*this, r, Range::all());
}
inline
GpuMat GpuMat::colRange(int startcol, int endcol) const
{
return GpuMat(*this, Range::all(), Range(startcol, endcol));
}
inline
GpuMat GpuMat::colRange(Range r) const
{
return GpuMat(*this, Range::all(), r);
}
inline
GpuMat GpuMat::operator ()(Range rowRange_, Range colRange_) const
{
return GpuMat(*this, rowRange_, colRange_);
}
inline
GpuMat GpuMat::operator ()(Rect roi) const
{
return GpuMat(*this, roi);
}
inline
bool GpuMat::isContinuous() const
{
return (flags & Mat::CONTINUOUS_FLAG) != 0;
}
inline
size_t GpuMat::elemSize() const
{
return CV_ELEM_SIZE(flags);
}
inline
size_t GpuMat::elemSize1() const
{
return CV_ELEM_SIZE1(flags);
}
inline
int GpuMat::type() const
{
return CV_MAT_TYPE(flags);
}
inline
int GpuMat::depth() const
{
return CV_MAT_DEPTH(flags);
}
inline
int GpuMat::channels() const
{
return CV_MAT_CN(flags);
}
inline
size_t GpuMat::step1() const
{
return step / elemSize1();
}
inline
Size GpuMat::size() const
{
return Size(cols, rows);
}
inline
bool GpuMat::empty() const
{
return data == 0;
}
static inline
GpuMat createContinuous(int rows, int cols, int type)
{
GpuMat m;
createContinuous(rows, cols, type, m);
return m;
}
static inline
void createContinuous(Size size, int type, OutputArray arr)
{
createContinuous(size.height, size.width, type, arr);
}
static inline
GpuMat createContinuous(Size size, int type)
{
GpuMat m;
createContinuous(size, type, m);
return m;
}
static inline
void ensureSizeIsEnough(Size size, int type, OutputArray arr)
{
ensureSizeIsEnough(size.height, size.width, type, arr);
}
static inline
void swap(GpuMat& a, GpuMat& b)
{
a.swap(b);
}
//===================================================================================
// HostMem
//===================================================================================
inline
HostMem::HostMem(AllocType alloc_type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
{
}
inline
HostMem::HostMem(const HostMem& m)
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
{
if( refcount )
CV_XADD(refcount, 1);
}
inline
HostMem::HostMem(int rows_, int cols_, int type_, AllocType alloc_type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
{
if (rows_ > 0 && cols_ > 0)
create(rows_, cols_, type_);
}
inline
HostMem::HostMem(Size size_, int type_, AllocType alloc_type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
{
if (size_.height > 0 && size_.width > 0)
create(size_.height, size_.width, type_);
}
inline
HostMem::HostMem(InputArray arr, AllocType alloc_type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
{
arr.getMat().copyTo(*this);
}
inline
HostMem::~HostMem()
{
release();
}
inline
HostMem& HostMem::operator =(const HostMem& m)
{
if (this != &m)
{
HostMem temp(m);
swap(temp);
}
return *this;
}
inline
void HostMem::swap(HostMem& b)
{
std::swap(flags, b.flags);
std::swap(rows, b.rows);
std::swap(cols, b.cols);
std::swap(step, b.step);
std::swap(data, b.data);
std::swap(datastart, b.datastart);
std::swap(dataend, b.dataend);
std::swap(refcount, b.refcount);
std::swap(alloc_type, b.alloc_type);
}
inline
HostMem HostMem::clone() const
{
HostMem m(size(), type(), alloc_type);
createMatHeader().copyTo(m);
return m;
}
inline
void HostMem::create(Size size_, int type_)
{
create(size_.height, size_.width, type_);
}
inline
Mat HostMem::createMatHeader() const
{
return Mat(size(), type(), data, step);
}
inline
bool HostMem::isContinuous() const
{
return (flags & Mat::CONTINUOUS_FLAG) != 0;
}
inline
size_t HostMem::elemSize() const
{
return CV_ELEM_SIZE(flags);
}
inline
size_t HostMem::elemSize1() const
{
return CV_ELEM_SIZE1(flags);
}
inline
int HostMem::type() const
{
return CV_MAT_TYPE(flags);
}
inline
int HostMem::depth() const
{
return CV_MAT_DEPTH(flags);
}
inline
int HostMem::channels() const
{
return CV_MAT_CN(flags);
}
inline
size_t HostMem::step1() const
{
return step / elemSize1();
}
inline
Size HostMem::size() const
{
return Size(cols, rows);
}
inline
bool HostMem::empty() const
{
return data == 0;
}
static inline
void swap(HostMem& a, HostMem& b)
{
a.swap(b);
}
//===================================================================================
// Stream
//===================================================================================
inline
Stream::Stream(const Ptr<Impl>& impl)
: impl_(impl)
{
}
//===================================================================================
// Initialization & Info
//===================================================================================
inline
bool TargetArchs::has(int major, int minor)
{
return hasPtx(major, minor) || hasBin(major, minor);
}
inline
bool TargetArchs::hasEqualOrGreater(int major, int minor)
{
return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
}
inline
DeviceInfo::DeviceInfo()
{
device_id_ = getDevice();
}
inline
DeviceInfo::DeviceInfo(int device_id)
{
CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() );
device_id_ = device_id;
}
inline
int DeviceInfo::deviceID() const
{
return device_id_;
}
inline
size_t DeviceInfo::freeMemory() const
{
size_t _totalMemory, _freeMemory;
queryMemory(_totalMemory, _freeMemory);
return _freeMemory;
}
inline
size_t DeviceInfo::totalMemory() const
{
size_t _totalMemory, _freeMemory;
queryMemory(_totalMemory, _freeMemory);
return _totalMemory;
}
inline
bool DeviceInfo::supports(FeatureSet feature_set) const
{
int version = majorVersion() * 10 + minorVersion();
return version >= feature_set;
}
}} // namespace cv { namespace cuda {
//===================================================================================
// Mat
//===================================================================================
namespace cv {
inline
Mat::Mat(const cuda::GpuMat& m)
: flags(0), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows)
{
m.download(*this);
}
}
//! @endcond
#endif // __OPENCV_CORE_CUDAINL_HPP__

View File

@ -0,0 +1,211 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_DEVICE_BLOCK_HPP__
#define __OPENCV_CUDA_DEVICE_BLOCK_HPP__
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
struct Block
{
static __device__ __forceinline__ unsigned int id()
{
return blockIdx.x;
}
static __device__ __forceinline__ unsigned int stride()
{
return blockDim.x * blockDim.y * blockDim.z;
}
static __device__ __forceinline__ void sync()
{
__syncthreads();
}
static __device__ __forceinline__ int flattenedThreadId()
{
return threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
}
template<typename It, typename T>
static __device__ __forceinline__ void fill(It beg, It end, const T& value)
{
int STRIDE = stride();
It t = beg + flattenedThreadId();
for(; t < end; t += STRIDE)
*t = value;
}
template<typename OutIt, typename T>
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
{
int STRIDE = stride();
int tid = flattenedThreadId();
value += tid;
for(OutIt t = beg + tid; t < end; t += STRIDE, value += STRIDE)
*t = value;
}
template<typename InIt, typename OutIt>
static __device__ __forceinline__ void copy(InIt beg, InIt end, OutIt out)
{
int STRIDE = stride();
InIt t = beg + flattenedThreadId();
OutIt o = out + (t - beg);
for(; t < end; t += STRIDE, o += STRIDE)
*o = *t;
}
template<typename InIt, typename OutIt, class UnOp>
static __device__ __forceinline__ void transfrom(InIt beg, InIt end, OutIt out, UnOp op)
{
int STRIDE = stride();
InIt t = beg + flattenedThreadId();
OutIt o = out + (t - beg);
for(; t < end; t += STRIDE, o += STRIDE)
*o = op(*t);
}
template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
static __device__ __forceinline__ void transfrom(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
{
int STRIDE = stride();
InIt1 t1 = beg1 + flattenedThreadId();
InIt2 t2 = beg2 + flattenedThreadId();
OutIt o = out + (t1 - beg1);
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE)
*o = op(*t1, *t2);
}
template<int CTA_SIZE, typename T, class BinOp>
static __device__ __forceinline__ void reduce(volatile T* buffer, BinOp op)
{
int tid = flattenedThreadId();
T val = buffer[tid];
if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); }
if (tid < 32)
{
if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); }
if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); }
if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); }
if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); }
if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); }
if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); }
}
}
template<int CTA_SIZE, typename T, class BinOp>
static __device__ __forceinline__ T reduce(volatile T* buffer, T init, BinOp op)
{
int tid = flattenedThreadId();
T val = buffer[tid] = init;
__syncthreads();
if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); }
if (tid < 32)
{
if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); }
if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); }
if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); }
if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); }
if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); }
if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); }
}
__syncthreads();
return buffer[0];
}
template <typename T, class BinOp>
static __device__ __forceinline__ void reduce_n(T* data, unsigned int n, BinOp op)
{
int ftid = flattenedThreadId();
int sft = stride();
if (sft < n)
{
for (unsigned int i = sft + ftid; i < n; i += sft)
data[ftid] = op(data[ftid], data[i]);
__syncthreads();
n = sft;
}
while (n > 1)
{
unsigned int half = n/2;
if (ftid < half)
data[ftid] = op(data[ftid], data[n - ftid - 1]);
__syncthreads();
n = n - half;
}
}
};
}}}
//! @endcond
#endif /* __OPENCV_CUDA_DEVICE_BLOCK_HPP__ */

View File

@ -0,0 +1,722 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__
#define __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
#include "vec_math.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
//////////////////////////////////////////////////////////////
// BrdConstant
template <typename D> struct BrdRowConstant
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits<D>::all(0)) : width(width_), val(val_) {}
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{
return x >= 0 ? saturate_cast<D>(data[x]) : val;
}
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{
return x < width ? saturate_cast<D>(data[x]) : val;
}
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{
return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val;
}
int width;
D val;
};
template <typename D> struct BrdColConstant
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits<D>::all(0)) : height(height_), val(val_) {}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return y >= 0 ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return y < height ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
}
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
}
int height;
D val;
};
template <typename D> struct BrdConstant
{
typedef D result_type;
__host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) : height(height_), width(width_), val(val_)
{
}
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(((const T*)((const uchar*)data + y * step))[x]) : val;
}
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
}
int height;
int width;
D val;
};
//////////////////////////////////////////////////////////////
// BrdReplicate
template <typename D> struct BrdRowReplicate
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdRowReplicate(int width, U) : last_col(width - 1) {}
__device__ __forceinline__ int idx_col_low(int x) const
{
return ::max(x, 0);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::min(x, last_col);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_low(x)]);
}
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_high(x)]);
}
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col(x)]);
}
int last_col;
};
template <typename D> struct BrdColReplicate
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdColReplicate(int height, U) : last_row(height - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return ::max(y, 0);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return ::min(y, last_row);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const T*)((const char*)data + idx_row_low(y) * step));
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const T*)((const char*)data + idx_row_high(y) * step));
}
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step));
}
int last_row;
};
template <typename D> struct BrdReplicate
{
typedef D result_type;
__host__ __device__ __forceinline__ BrdReplicate(int height, int width) : last_row(height - 1), last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return ::max(y, 0);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return ::min(y, last_row);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
__device__ __forceinline__ int idx_col_low(int x) const
{
return ::max(x, 0);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::min(x, last_col);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
}
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
}
int last_row;
int last_col;
};
//////////////////////////////////////////////////////////////
// BrdReflect101
template <typename D> struct BrdRowReflect101
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdRowReflect101(int width, U) : last_col(width - 1) {}
__device__ __forceinline__ int idx_col_low(int x) const
{
return ::abs(x) % (last_col + 1);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_low(x)]);
}
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_high(x)]);
}
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col(x)]);
}
int last_col;
};
template <typename D> struct BrdColReflect101
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdColReflect101(int height, U) : last_row(height - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return ::abs(y) % (last_row + 1);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
}
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
}
int last_row;
};
template <typename D> struct BrdReflect101
{
typedef D result_type;
__host__ __device__ __forceinline__ BrdReflect101(int height, int width) : last_row(height - 1), last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return ::abs(y) % (last_row + 1);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
__device__ __forceinline__ int idx_col_low(int x) const
{
return ::abs(x) % (last_col + 1);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
}
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
}
int last_row;
int last_col;
};
//////////////////////////////////////////////////////////////
// BrdReflect
template <typename D> struct BrdRowReflect
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowReflect(int width) : last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdRowReflect(int width, U) : last_col(width - 1) {}
__device__ __forceinline__ int idx_col_low(int x) const
{
return (::abs(x) - (x < 0)) % (last_col + 1);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::abs(last_col - ::abs(last_col - x) + (x > last_col)) % (last_col + 1);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_high(::abs(x) - (x < 0));
}
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_low(x)]);
}
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_high(x)]);
}
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col(x)]);
}
int last_col;
};
template <typename D> struct BrdColReflect
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdColReflect(int height) : last_row(height - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdColReflect(int height, U) : last_row(height - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return (::abs(y) - (y < 0)) % (last_row + 1);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return ::abs(last_row - ::abs(last_row - y) + (y > last_row)) % (last_row + 1);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_high(::abs(y) - (y < 0));
}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
}
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
}
int last_row;
};
template <typename D> struct BrdReflect
{
typedef D result_type;
__host__ __device__ __forceinline__ BrdReflect(int height, int width) : last_row(height - 1), last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdReflect(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return (::abs(y) - (y < 0)) % (last_row + 1);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return /*::abs*/(last_row - ::abs(last_row - y) + (y > last_row)) /*% (last_row + 1)*/;
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
__device__ __forceinline__ int idx_col_low(int x) const
{
return (::abs(x) - (x < 0)) % (last_col + 1);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return (last_col - ::abs(last_col - x) + (x > last_col));
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
}
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
}
int last_row;
int last_col;
};
//////////////////////////////////////////////////////////////
// BrdWrap
template <typename D> struct BrdRowWrap
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowWrap(int width_) : width(width_) {}
template <typename U> __host__ __device__ __forceinline__ BrdRowWrap(int width_, U) : width(width_) {}
__device__ __forceinline__ int idx_col_low(int x) const
{
return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return (x < width) * x + (x >= width) * (x % width);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_high(idx_col_low(x));
}
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_low(x)]);
}
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_high(x)]);
}
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col(x)]);
}
int width;
};
template <typename D> struct BrdColWrap
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdColWrap(int height_) : height(height_) {}
template <typename U> __host__ __device__ __forceinline__ BrdColWrap(int height_, U) : height(height_) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return (y < height) * y + (y >= height) * (y % height);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_high(idx_row_low(y));
}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
}
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
}
int height;
};
template <typename D> struct BrdWrap
{
typedef D result_type;
__host__ __device__ __forceinline__ BrdWrap(int height_, int width_) :
height(height_), width(width_)
{
}
template <typename U>
__host__ __device__ __forceinline__ BrdWrap(int height_, int width_, U) :
height(height_), width(width_)
{
}
__device__ __forceinline__ int idx_row_low(int y) const
{
return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return (y < height) * y + (y >= height) * (y % height);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_high(idx_row_low(y));
}
__device__ __forceinline__ int idx_col_low(int x) const
{
return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return (x < width) * x + (x >= width) * (x % width);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_high(idx_col_low(x));
}
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
}
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
}
int height;
int width;
};
//////////////////////////////////////////////////////////////
// BorderReader
template <typename Ptr2D, typename B> struct BorderReader
{
typedef typename B::result_type elem_type;
typedef typename Ptr2D::index_type index_type;
__host__ __device__ __forceinline__ BorderReader(const Ptr2D& ptr_, const B& b_) : ptr(ptr_), b(b_) {}
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const
{
return b.at(y, x, ptr);
}
Ptr2D ptr;
B b;
};
// under win32 there is some bug with templated types that passed as kernel parameters
// with this specialization all works fine
template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> >
{
typedef typename BrdConstant<D>::result_type elem_type;
typedef typename Ptr2D::index_type index_type;
__host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant<D>& b) :
src(src_), height(b.height), width(b.width), val(b.val)
{
}
__device__ __forceinline__ D operator ()(index_type y, index_type x) const
{
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
}
Ptr2D src;
int height;
int width;
D val;
};
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__

View File

@ -0,0 +1,309 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_COLOR_HPP__
#define __OPENCV_CUDA_COLOR_HPP__
#include "detail/color_detail.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
// All OPENCV_CUDA_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
// template <typename T> class ColorSpace1_to_ColorSpace2_traits
// {
// typedef ... functor_type;
// static __host__ __device__ functor_type create_functor();
// };
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)
#undef OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)
#undef OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)
#undef OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)
#undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)
#undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)
#undef OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
#undef OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab, 3, 3, true, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab, 4, 3, true, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab4, 3, 4, true, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab4, 4, 4, true, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab, 3, 3, true, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab, 4, 3, true, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab4, 3, 4, true, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab4, 4, 4, true, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab, 3, 3, false, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab, 4, 3, false, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab4, 3, 4, false, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab4, 4, 4, false, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab, 3, 3, false, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab, 4, 3, false, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab4, 3, 4, false, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab4, 4, 4, false, 0)
#undef OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgb, 3, 3, true, 2)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgb, 4, 3, true, 2)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgba, 3, 4, true, 2)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgba, 4, 4, true, 2)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgr, 3, 3, true, 0)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgr, 4, 3, true, 0)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgra, 3, 4, true, 0)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgra, 4, 4, true, 0)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgb, 3, 3, false, 2)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgb, 4, 3, false, 2)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgba, 3, 4, false, 2)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgba, 4, 4, false, 2)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgr, 3, 3, false, 0)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgr, 4, 3, false, 0)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgra, 3, 4, false, 0)
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgra, 4, 4, false, 0)
#undef OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv, 3, 3, true, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv, 4, 3, true, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv4, 3, 4, true, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv4, 4, 4, true, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv, 3, 3, true, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv, 4, 3, true, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv4, 3, 4, true, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv4, 4, 4, true, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv, 3, 3, false, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv, 4, 3, false, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv4, 3, 4, false, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv4, 4, 4, false, 2)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv, 3, 3, false, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv, 4, 3, false, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv4, 3, 4, false, 0)
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv4, 4, 4, false, 0)
#undef OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgb, 3, 3, true, 2)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgb, 4, 3, true, 2)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgba, 3, 4, true, 2)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgba, 4, 4, true, 2)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgr, 3, 3, true, 0)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgr, 4, 3, true, 0)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgra, 3, 4, true, 0)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgra, 4, 4, true, 0)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgb, 3, 3, false, 2)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgb, 4, 3, false, 2)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgba, 3, 4, false, 2)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgba, 4, 4, false, 2)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgr, 3, 3, false, 0)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgr, 4, 3, false, 0)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgra, 3, 4, false, 0)
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgra, 4, 4, false, 0)
#undef OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__

View File

@ -0,0 +1,109 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_COMMON_HPP__
#define __OPENCV_CUDA_COMMON_HPP__
#include <cuda_runtime.h>
#include "opencv2/core/cuda_types.hpp"
#include "opencv2/core/cvdef.h"
#include "opencv2/core/base.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
#ifndef CV_PI_F
#ifndef CV_PI
#define CV_PI_F 3.14159265f
#else
#define CV_PI_F ((float)CV_PI)
#endif
#endif
namespace cv { namespace cuda {
static inline void checkCudaError(cudaError_t err, const char* file, const int line, const char* func)
{
if (cudaSuccess != err)
cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
}
}}
#ifndef cudaSafeCall
#define cudaSafeCall(expr) cv::cuda::checkCudaError(expr, __FILE__, __LINE__, CV_Func)
#endif
namespace cv { namespace cuda
{
template <typename T> static inline bool isAligned(const T* ptr, size_t size)
{
return reinterpret_cast<size_t>(ptr) % size == 0;
}
static inline bool isAligned(size_t step, size_t size)
{
return step % size == 0;
}
}}
namespace cv { namespace cuda
{
namespace device
{
__host__ __device__ __forceinline__ int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
}
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
}
}
}}
//! @endcond
#endif // __OPENCV_CUDA_COMMON_HPP__

View File

@ -0,0 +1,113 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_DATAMOV_UTILS_HPP__
#define __OPENCV_CUDA_DATAMOV_UTILS_HPP__
#include "common.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
// for Fermi memory space is detected automatically
template <typename T> struct ForceGlob
{
__device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
};
#else // __CUDA_ARCH__ >= 200
#if defined(_WIN64) || defined(__LP64__)
// 64-bit register modifier for inlined asm
#define OPENCV_CUDA_ASM_PTR "l"
#else
// 32-bit register modifier for inlined asm
#define OPENCV_CUDA_ASM_PTR "r"
#endif
template<class T> struct ForceGlob;
#define OPENCV_CUDA_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
template <> struct ForceGlob<base_type> \
{ \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
} \
};
#define OPENCV_CUDA_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
template <> struct ForceGlob<base_type> \
{ \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
} \
};
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(uchar, u8)
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(schar, s8)
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(char, b8)
OPENCV_CUDA_DEFINE_FORCE_GLOB (ushort, u16, h)
OPENCV_CUDA_DEFINE_FORCE_GLOB (short, s16, h)
OPENCV_CUDA_DEFINE_FORCE_GLOB (uint, u32, r)
OPENCV_CUDA_DEFINE_FORCE_GLOB (int, s32, r)
OPENCV_CUDA_DEFINE_FORCE_GLOB (float, f32, f)
OPENCV_CUDA_DEFINE_FORCE_GLOB (double, f64, d)
#undef OPENCV_CUDA_DEFINE_FORCE_GLOB
#undef OPENCV_CUDA_DEFINE_FORCE_GLOB_B
#undef OPENCV_CUDA_ASM_PTR
#endif // __CUDA_ARCH__ >= 200
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // __OPENCV_CUDA_DATAMOV_UTILS_HPP__

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,365 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_REDUCE_DETAIL_HPP__
#define __OPENCV_CUDA_REDUCE_DETAIL_HPP__
#include <thrust/tuple.h>
#include "../warp.hpp"
#include "../warp_shuffle.hpp"
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
namespace reduce_detail
{
template <typename T> struct GetType;
template <typename T> struct GetType<T*>
{
typedef T type;
};
template <typename T> struct GetType<volatile T*>
{
typedef T type;
};
template <typename T> struct GetType<T&>
{
typedef T type;
};
template <unsigned int I, unsigned int N>
struct For
{
template <class PointerTuple, class ValTuple>
static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
{
thrust::get<I>(smem)[tid] = thrust::get<I>(val);
For<I + 1, N>::loadToSmem(smem, val, tid);
}
template <class PointerTuple, class ValTuple>
static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
{
thrust::get<I>(val) = thrust::get<I>(smem)[tid];
For<I + 1, N>::loadFromSmem(smem, val, tid);
}
template <class PointerTuple, class ValTuple, class OpTuple>
static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op)
{
typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta];
thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
For<I + 1, N>::merge(smem, val, tid, delta, op);
}
template <class ValTuple, class OpTuple>
static __device__ void mergeShfl(const ValTuple& val, unsigned int delta, unsigned int width, const OpTuple& op)
{
typename GetType<typename thrust::tuple_element<I, ValTuple>::type>::type reg = shfl_down(thrust::get<I>(val), delta, width);
thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
For<I + 1, N>::mergeShfl(val, delta, width, op);
}
};
template <unsigned int N>
struct For<N, N>
{
template <class PointerTuple, class ValTuple>
static __device__ void loadToSmem(const PointerTuple&, const ValTuple&, unsigned int)
{
}
template <class PointerTuple, class ValTuple>
static __device__ void loadFromSmem(const PointerTuple&, const ValTuple&, unsigned int)
{
}
template <class PointerTuple, class ValTuple, class OpTuple>
static __device__ void merge(const PointerTuple&, const ValTuple&, unsigned int, unsigned int, const OpTuple&)
{
}
template <class ValTuple, class OpTuple>
static __device__ void mergeShfl(const ValTuple&, unsigned int, unsigned int, const OpTuple&)
{
}
};
template <typename T>
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, unsigned int tid)
{
smem[tid] = val;
}
template <typename T>
__device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, unsigned int tid)
{
val = smem[tid];
}
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
__device__ __forceinline__ void loadToSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
unsigned int tid)
{
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid);
}
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
__device__ __forceinline__ void loadFromSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
unsigned int tid)
{
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid);
}
template <typename T, class Op>
__device__ __forceinline__ void merge(volatile T* smem, T& val, unsigned int tid, unsigned int delta, const Op& op)
{
T reg = smem[tid + delta];
smem[tid] = val = op(val, reg);
}
template <typename T, class Op>
__device__ __forceinline__ void mergeShfl(T& val, unsigned int delta, unsigned int width, const Op& op)
{
T reg = shfl_down(val, delta, width);
val = op(val, reg);
}
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
__device__ __forceinline__ void merge(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
unsigned int tid,
unsigned int delta,
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
{
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op);
}
template <typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
__device__ __forceinline__ void mergeShfl(const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
unsigned int delta,
unsigned int width,
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
{
For<0, thrust::tuple_size<thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9> >::value>::mergeShfl(val, delta, width, op);
}
template <unsigned int N> struct Generic
{
template <typename Pointer, typename Reference, class Op>
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
{
loadToSmem(smem, val, tid);
if (N >= 32)
__syncthreads();
if (N >= 2048)
{
if (tid < 1024)
merge(smem, val, tid, 1024, op);
__syncthreads();
}
if (N >= 1024)
{
if (tid < 512)
merge(smem, val, tid, 512, op);
__syncthreads();
}
if (N >= 512)
{
if (tid < 256)
merge(smem, val, tid, 256, op);
__syncthreads();
}
if (N >= 256)
{
if (tid < 128)
merge(smem, val, tid, 128, op);
__syncthreads();
}
if (N >= 128)
{
if (tid < 64)
merge(smem, val, tid, 64, op);
__syncthreads();
}
if (N >= 64)
{
if (tid < 32)
merge(smem, val, tid, 32, op);
}
if (tid < 16)
{
merge(smem, val, tid, 16, op);
merge(smem, val, tid, 8, op);
merge(smem, val, tid, 4, op);
merge(smem, val, tid, 2, op);
merge(smem, val, tid, 1, op);
}
}
};
template <unsigned int I, typename Pointer, typename Reference, class Op>
struct Unroll
{
static __device__ void loopShfl(Reference val, Op op, unsigned int N)
{
mergeShfl(val, I, N, op);
Unroll<I / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
}
static __device__ void loop(Pointer smem, Reference val, unsigned int tid, Op op)
{
merge(smem, val, tid, I, op);
Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
}
};
template <typename Pointer, typename Reference, class Op>
struct Unroll<0, Pointer, Reference, Op>
{
static __device__ void loopShfl(Reference, Op, unsigned int)
{
}
static __device__ void loop(Pointer, Reference, unsigned int, Op)
{
}
};
template <unsigned int N> struct WarpOptimized
{
template <typename Pointer, typename Reference, class Op>
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
{
#if __CUDA_ARCH__ >= 300
(void) smem;
(void) tid;
Unroll<N / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
#else
loadToSmem(smem, val, tid);
if (tid < N / 2)
Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
#endif
}
};
template <unsigned int N> struct GenericOptimized32
{
enum { M = N / 32 };
template <typename Pointer, typename Reference, class Op>
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
{
const unsigned int laneId = Warp::laneId();
#if __CUDA_ARCH__ >= 300
Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize);
if (laneId == 0)
loadToSmem(smem, val, tid / 32);
#else
loadToSmem(smem, val, tid);
if (laneId < 16)
Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op);
__syncthreads();
if (laneId == 0)
loadToSmem(smem, val, tid / 32);
#endif
__syncthreads();
loadFromSmem(smem, val, tid);
if (tid < 32)
{
#if __CUDA_ARCH__ >= 300
Unroll<M / 2, Pointer, Reference, Op>::loopShfl(val, op, M);
#else
Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
#endif
}
}
};
template <bool val, class T1, class T2> struct StaticIf;
template <class T1, class T2> struct StaticIf<true, T1, T2>
{
typedef T1 type;
};
template <class T1, class T2> struct StaticIf<false, T1, T2>
{
typedef T2 type;
};
template <unsigned int N> struct IsPowerOf2
{
enum { value = ((N != 0) && !(N & (N - 1))) };
};
template <unsigned int N> struct Dispatcher
{
typedef typename StaticIf<
(N <= 32) && IsPowerOf2<N>::value,
WarpOptimized<N>,
typename StaticIf<
(N <= 1024) && IsPowerOf2<N>::value,
GenericOptimized32<N>,
Generic<N>
>::type
>::type reductor;
};
}
}}}
//! @endcond
#endif // __OPENCV_CUDA_REDUCE_DETAIL_HPP__

View File

@ -0,0 +1,502 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__
#define __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__
#include <thrust/tuple.h>
#include "../warp.hpp"
#include "../warp_shuffle.hpp"
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
namespace reduce_key_val_detail
{
template <typename T> struct GetType;
template <typename T> struct GetType<T*>
{
typedef T type;
};
template <typename T> struct GetType<volatile T*>
{
typedef T type;
};
template <typename T> struct GetType<T&>
{
typedef T type;
};
template <unsigned int I, unsigned int N>
struct For
{
template <class PointerTuple, class ReferenceTuple>
static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
{
thrust::get<I>(smem)[tid] = thrust::get<I>(data);
For<I + 1, N>::loadToSmem(smem, data, tid);
}
template <class PointerTuple, class ReferenceTuple>
static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
{
thrust::get<I>(data) = thrust::get<I>(smem)[tid];
For<I + 1, N>::loadFromSmem(smem, data, tid);
}
template <class ReferenceTuple>
static __device__ void copyShfl(const ReferenceTuple& val, unsigned int delta, int width)
{
thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
For<I + 1, N>::copyShfl(val, delta, width);
}
template <class PointerTuple, class ReferenceTuple>
static __device__ void copy(const PointerTuple& svals, const ReferenceTuple& val, unsigned int tid, unsigned int delta)
{
thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
For<I + 1, N>::copy(svals, val, tid, delta);
}
template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
static __device__ void mergeShfl(const KeyReferenceTuple& key, const ValReferenceTuple& val, const CmpTuple& cmp, unsigned int delta, int width)
{
typename GetType<typename thrust::tuple_element<I, KeyReferenceTuple>::type>::type reg = shfl_down(thrust::get<I>(key), delta, width);
if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
{
thrust::get<I>(key) = reg;
thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
}
For<I + 1, N>::mergeShfl(key, val, cmp, delta, width);
}
template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
static __device__ void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key,
const ValPointerTuple& svals, const ValReferenceTuple& val,
const CmpTuple& cmp,
unsigned int tid, unsigned int delta)
{
typename GetType<typename thrust::tuple_element<I, KeyPointerTuple>::type>::type reg = thrust::get<I>(skeys)[tid + delta];
if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
{
thrust::get<I>(skeys)[tid] = thrust::get<I>(key) = reg;
thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
}
For<I + 1, N>::merge(skeys, key, svals, val, cmp, tid, delta);
}
};
template <unsigned int N>
struct For<N, N>
{
template <class PointerTuple, class ReferenceTuple>
static __device__ void loadToSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
{
}
template <class PointerTuple, class ReferenceTuple>
static __device__ void loadFromSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
{
}
template <class ReferenceTuple>
static __device__ void copyShfl(const ReferenceTuple&, unsigned int, int)
{
}
template <class PointerTuple, class ReferenceTuple>
static __device__ void copy(const PointerTuple&, const ReferenceTuple&, unsigned int, unsigned int)
{
}
template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
static __device__ void mergeShfl(const KeyReferenceTuple&, const ValReferenceTuple&, const CmpTuple&, unsigned int, int)
{
}
template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
static __device__ void merge(const KeyPointerTuple&, const KeyReferenceTuple&,
const ValPointerTuple&, const ValReferenceTuple&,
const CmpTuple&,
unsigned int, unsigned int)
{
}
};
//////////////////////////////////////////////////////
// loadToSmem
template <typename T>
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid)
{
smem[tid] = data;
}
template <typename T>
__device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid)
{
data = smem[tid];
}
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
__device__ __forceinline__ void loadToSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
unsigned int tid)
{
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid);
}
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
__device__ __forceinline__ void loadFromSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
unsigned int tid)
{
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid);
}
//////////////////////////////////////////////////////
// copyVals
template <typename V>
__device__ __forceinline__ void copyValsShfl(V& val, unsigned int delta, int width)
{
val = shfl_down(val, delta, width);
}
template <typename V>
__device__ __forceinline__ void copyVals(volatile V* svals, V& val, unsigned int tid, unsigned int delta)
{
svals[tid] = val = svals[tid + delta];
}
template <typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
__device__ __forceinline__ void copyValsShfl(const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
unsigned int delta,
int width)
{
For<0, thrust::tuple_size<thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9> >::value>::copyShfl(val, delta, width);
}
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
__device__ __forceinline__ void copyVals(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
unsigned int tid, unsigned int delta)
{
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::copy(svals, val, tid, delta);
}
//////////////////////////////////////////////////////
// merge
template <typename K, typename V, class Cmp>
__device__ __forceinline__ void mergeShfl(K& key, V& val, const Cmp& cmp, unsigned int delta, int width)
{
K reg = shfl_down(key, delta, width);
if (cmp(reg, key))
{
key = reg;
copyValsShfl(val, delta, width);
}
}
template <typename K, typename V, class Cmp>
__device__ __forceinline__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, unsigned int tid, unsigned int delta)
{
K reg = skeys[tid + delta];
if (cmp(reg, key))
{
skeys[tid] = key = reg;
copyVals(svals, val, tid, delta);
}
}
template <typename K,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp>
__device__ __forceinline__ void mergeShfl(K& key,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
const Cmp& cmp,
unsigned int delta, int width)
{
K reg = shfl_down(key, delta, width);
if (cmp(reg, key))
{
key = reg;
copyValsShfl(val, delta, width);
}
}
template <typename K,
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp>
__device__ __forceinline__ void merge(volatile K* skeys, K& key,
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
const Cmp& cmp, unsigned int tid, unsigned int delta)
{
K reg = skeys[tid + delta];
if (cmp(reg, key))
{
skeys[tid] = key = reg;
copyVals(svals, val, tid, delta);
}
}
template <typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
__device__ __forceinline__ void mergeShfl(const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
unsigned int delta, int width)
{
For<0, thrust::tuple_size<thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9> >::value>::mergeShfl(key, val, cmp, delta, width);
}
template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
__device__ __forceinline__ void merge(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
unsigned int tid, unsigned int delta)
{
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::merge(skeys, key, svals, val, cmp, tid, delta);
}
//////////////////////////////////////////////////////
// Generic
template <unsigned int N> struct Generic
{
template <class KP, class KR, class VP, class VR, class Cmp>
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
{
loadToSmem(skeys, key, tid);
loadValsToSmem(svals, val, tid);
if (N >= 32)
__syncthreads();
if (N >= 2048)
{
if (tid < 1024)
merge(skeys, key, svals, val, cmp, tid, 1024);
__syncthreads();
}
if (N >= 1024)
{
if (tid < 512)
merge(skeys, key, svals, val, cmp, tid, 512);
__syncthreads();
}
if (N >= 512)
{
if (tid < 256)
merge(skeys, key, svals, val, cmp, tid, 256);
__syncthreads();
}
if (N >= 256)
{
if (tid < 128)
merge(skeys, key, svals, val, cmp, tid, 128);
__syncthreads();
}
if (N >= 128)
{
if (tid < 64)
merge(skeys, key, svals, val, cmp, tid, 64);
__syncthreads();
}
if (N >= 64)
{
if (tid < 32)
merge(skeys, key, svals, val, cmp, tid, 32);
}
if (tid < 16)
{
merge(skeys, key, svals, val, cmp, tid, 16);
merge(skeys, key, svals, val, cmp, tid, 8);
merge(skeys, key, svals, val, cmp, tid, 4);
merge(skeys, key, svals, val, cmp, tid, 2);
merge(skeys, key, svals, val, cmp, tid, 1);
}
}
};
template <unsigned int I, class KP, class KR, class VP, class VR, class Cmp>
struct Unroll
{
static __device__ void loopShfl(KR key, VR val, Cmp cmp, unsigned int N)
{
mergeShfl(key, val, cmp, I, N);
Unroll<I / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
}
static __device__ void loop(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
{
merge(skeys, key, svals, val, cmp, tid, I);
Unroll<I / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
}
};
template <class KP, class KR, class VP, class VR, class Cmp>
struct Unroll<0, KP, KR, VP, VR, Cmp>
{
static __device__ void loopShfl(KR, VR, Cmp, unsigned int)
{
}
static __device__ void loop(KP, KR, VP, VR, unsigned int, Cmp)
{
}
};
template <unsigned int N> struct WarpOptimized
{
template <class KP, class KR, class VP, class VR, class Cmp>
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
{
#if 0 // __CUDA_ARCH__ >= 300
(void) skeys;
(void) svals;
(void) tid;
Unroll<N / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
#else
loadToSmem(skeys, key, tid);
loadToSmem(svals, val, tid);
if (tid < N / 2)
Unroll<N / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
#endif
}
};
template <unsigned int N> struct GenericOptimized32
{
enum { M = N / 32 };
template <class KP, class KR, class VP, class VR, class Cmp>
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
{
const unsigned int laneId = Warp::laneId();
#if 0 // __CUDA_ARCH__ >= 300
Unroll<16, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, warpSize);
if (laneId == 0)
{
loadToSmem(skeys, key, tid / 32);
loadToSmem(svals, val, tid / 32);
}
#else
loadToSmem(skeys, key, tid);
loadToSmem(svals, val, tid);
if (laneId < 16)
Unroll<16, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
__syncthreads();
if (laneId == 0)
{
loadToSmem(skeys, key, tid / 32);
loadToSmem(svals, val, tid / 32);
}
#endif
__syncthreads();
loadFromSmem(skeys, key, tid);
if (tid < 32)
{
#if 0 // __CUDA_ARCH__ >= 300
loadFromSmem(svals, val, tid);
Unroll<M / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, M);
#else
Unroll<M / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
#endif
}
}
};
template <bool val, class T1, class T2> struct StaticIf;
template <class T1, class T2> struct StaticIf<true, T1, T2>
{
typedef T1 type;
};
template <class T1, class T2> struct StaticIf<false, T1, T2>
{
typedef T2 type;
};
template <unsigned int N> struct IsPowerOf2
{
enum { value = ((N != 0) && !(N & (N - 1))) };
};
template <unsigned int N> struct Dispatcher
{
typedef typename StaticIf<
(N <= 32) && IsPowerOf2<N>::value,
WarpOptimized<N>,
typename StaticIf<
(N <= 1024) && IsPowerOf2<N>::value,
GenericOptimized32<N>,
Generic<N>
>::type
>::type reductor;
};
}
}}}
//! @endcond
#endif // __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__

View File

@ -0,0 +1,399 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__
#define __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__
#include "../common.hpp"
#include "../vec_traits.hpp"
#include "../functional.hpp"
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
namespace transform_detail
{
//! Read Write Traits
template <typename T, typename D, int shift> struct UnaryReadWriteTraits
{
typedef typename TypeVec<T, shift>::vec_type read_type;
typedef typename TypeVec<D, shift>::vec_type write_type;
};
template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
{
typedef typename TypeVec<T1, shift>::vec_type read_type1;
typedef typename TypeVec<T2, shift>::vec_type read_type2;
typedef typename TypeVec<D, shift>::vec_type write_type;
};
//! Transform kernels
template <int shift> struct OpUnroller;
template <> struct OpUnroller<1>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
}
};
template <> struct OpUnroller<2>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
}
};
template <> struct OpUnroller<3>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
if (mask(y, x_shifted + 2))
dst.z = op(src.z);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
if (mask(y, x_shifted + 2))
dst.z = op(src1.z, src2.z);
}
};
template <> struct OpUnroller<4>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
if (mask(y, x_shifted + 2))
dst.z = op(src.z);
if (mask(y, x_shifted + 3))
dst.w = op(src.w);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
if (mask(y, x_shifted + 2))
dst.z = op(src1.z, src2.z);
if (mask(y, x_shifted + 3))
dst.w = op(src1.w, src2.w);
}
};
template <> struct OpUnroller<8>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.a0 = op(src.a0);
if (mask(y, x_shifted + 1))
dst.a1 = op(src.a1);
if (mask(y, x_shifted + 2))
dst.a2 = op(src.a2);
if (mask(y, x_shifted + 3))
dst.a3 = op(src.a3);
if (mask(y, x_shifted + 4))
dst.a4 = op(src.a4);
if (mask(y, x_shifted + 5))
dst.a5 = op(src.a5);
if (mask(y, x_shifted + 6))
dst.a6 = op(src.a6);
if (mask(y, x_shifted + 7))
dst.a7 = op(src.a7);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.a0 = op(src1.a0, src2.a0);
if (mask(y, x_shifted + 1))
dst.a1 = op(src1.a1, src2.a1);
if (mask(y, x_shifted + 2))
dst.a2 = op(src1.a2, src2.a2);
if (mask(y, x_shifted + 3))
dst.a3 = op(src1.a3, src2.a3);
if (mask(y, x_shifted + 4))
dst.a4 = op(src1.a4, src2.a4);
if (mask(y, x_shifted + 5))
dst.a5 = op(src1.a5, src2.a5);
if (mask(y, x_shifted + 6))
dst.a6 = op(src1.a6, src2.a6);
if (mask(y, x_shifted + 7))
dst.a7 = op(src1.a7, src2.a7);
}
};
template <typename T, typename D, typename UnOp, typename Mask>
static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
{
typedef TransformFunctorTraits<UnOp> ft;
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;
const int x_shifted = x * ft::smart_shift;
if (y < src_.rows)
{
const T* src = src_.ptr(y);
D* dst = dst_.ptr(y);
if (x_shifted + ft::smart_shift - 1 < src_.cols)
{
const read_type src_n_el = ((const read_type*)src)[x];
write_type dst_n_el = ((const write_type*)dst)[x];
OpUnroller<ft::smart_shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
((write_type*)dst)[x] = dst_n_el;
}
else
{
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src[real_x]);
}
}
}
}
template <typename T, typename D, typename UnOp, typename Mask>
__global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src.cols && y < src.rows && mask(y, x))
{
dst.ptr(y)[x] = op(src.ptr(y)[x]);
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
const Mask mask, const BinOp op)
{
typedef TransformFunctorTraits<BinOp> ft;
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;
const int x_shifted = x * ft::smart_shift;
if (y < src1_.rows)
{
const T1* src1 = src1_.ptr(y);
const T2* src2 = src2_.ptr(y);
D* dst = dst_.ptr(y);
if (x_shifted + ft::smart_shift - 1 < src1_.cols)
{
const read_type1 src1_n_el = ((const read_type1*)src1)[x];
const read_type2 src2_n_el = ((const read_type2*)src2)[x];
write_type dst_n_el = ((const write_type*)dst)[x];
OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
((write_type*)dst)[x] = dst_n_el;
}
else
{
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src1[real_x], src2[real_x]);
}
}
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
const Mask mask, const BinOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src1.cols && y < src1.rows && mask(y, x))
{
const T1 src1_data = src1.ptr(y)[x];
const T2 src2_data = src2.ptr(y)[x];
dst.ptr(y)[x] = op(src1_data, src2_data);
}
}
template <bool UseSmart> struct TransformDispatcher;
template<> struct TransformDispatcher<false>
{
template <typename T, typename D, typename UnOp, typename Mask>
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<UnOp> ft;
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<BinOp> ft;
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template<> struct TransformDispatcher<true>
{
template <typename T, typename D, typename UnOp, typename Mask>
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<UnOp> ft;
CV_StaticAssert(ft::smart_shift != 1, "");
if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
{
TransformDispatcher<false>::call(src, dst, op, mask, stream);
return;
}
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<BinOp> ft;
CV_StaticAssert(ft::smart_shift != 1, "");
if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
!isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
{
TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
return;
}
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
} // namespace transform_detail
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__

View File

@ -0,0 +1,191 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP__
#define __OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP__
#include "../common.hpp"
#include "../vec_traits.hpp"
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
namespace type_traits_detail
{
template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };
template <typename T> struct IsSignedIntergral { enum {value = 0}; };
template <> struct IsSignedIntergral<schar> { enum {value = 1}; };
template <> struct IsSignedIntergral<char1> { enum {value = 1}; };
template <> struct IsSignedIntergral<short> { enum {value = 1}; };
template <> struct IsSignedIntergral<short1> { enum {value = 1}; };
template <> struct IsSignedIntergral<int> { enum {value = 1}; };
template <> struct IsSignedIntergral<int1> { enum {value = 1}; };
template <typename T> struct IsUnsignedIntegral { enum {value = 0}; };
template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<uchar1> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<ushort1> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<uint1> { enum {value = 1}; };
template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; };
template <> struct IsIntegral<char> { enum {value = 1}; };
template <> struct IsIntegral<bool> { enum {value = 1}; };
template <typename T> struct IsFloat { enum {value = 0}; };
template <> struct IsFloat<float> { enum {value = 1}; };
template <> struct IsFloat<double> { enum {value = 1}; };
template <typename T> struct IsVec { enum {value = 0}; };
template <> struct IsVec<uchar1> { enum {value = 1}; };
template <> struct IsVec<uchar2> { enum {value = 1}; };
template <> struct IsVec<uchar3> { enum {value = 1}; };
template <> struct IsVec<uchar4> { enum {value = 1}; };
template <> struct IsVec<uchar8> { enum {value = 1}; };
template <> struct IsVec<char1> { enum {value = 1}; };
template <> struct IsVec<char2> { enum {value = 1}; };
template <> struct IsVec<char3> { enum {value = 1}; };
template <> struct IsVec<char4> { enum {value = 1}; };
template <> struct IsVec<char8> { enum {value = 1}; };
template <> struct IsVec<ushort1> { enum {value = 1}; };
template <> struct IsVec<ushort2> { enum {value = 1}; };
template <> struct IsVec<ushort3> { enum {value = 1}; };
template <> struct IsVec<ushort4> { enum {value = 1}; };
template <> struct IsVec<ushort8> { enum {value = 1}; };
template <> struct IsVec<short1> { enum {value = 1}; };
template <> struct IsVec<short2> { enum {value = 1}; };
template <> struct IsVec<short3> { enum {value = 1}; };
template <> struct IsVec<short4> { enum {value = 1}; };
template <> struct IsVec<short8> { enum {value = 1}; };
template <> struct IsVec<uint1> { enum {value = 1}; };
template <> struct IsVec<uint2> { enum {value = 1}; };
template <> struct IsVec<uint3> { enum {value = 1}; };
template <> struct IsVec<uint4> { enum {value = 1}; };
template <> struct IsVec<uint8> { enum {value = 1}; };
template <> struct IsVec<int1> { enum {value = 1}; };
template <> struct IsVec<int2> { enum {value = 1}; };
template <> struct IsVec<int3> { enum {value = 1}; };
template <> struct IsVec<int4> { enum {value = 1}; };
template <> struct IsVec<int8> { enum {value = 1}; };
template <> struct IsVec<float1> { enum {value = 1}; };
template <> struct IsVec<float2> { enum {value = 1}; };
template <> struct IsVec<float3> { enum {value = 1}; };
template <> struct IsVec<float4> { enum {value = 1}; };
template <> struct IsVec<float8> { enum {value = 1}; };
template <> struct IsVec<double1> { enum {value = 1}; };
template <> struct IsVec<double2> { enum {value = 1}; };
template <> struct IsVec<double3> { enum {value = 1}; };
template <> struct IsVec<double4> { enum {value = 1}; };
template <> struct IsVec<double8> { enum {value = 1}; };
template <class U> struct AddParameterType { typedef const U& type; };
template <class U> struct AddParameterType<U&> { typedef U& type; };
template <> struct AddParameterType<void> { typedef void type; };
template <class U> struct ReferenceTraits
{
enum { value = false };
typedef U type;
};
template <class U> struct ReferenceTraits<U&>
{
enum { value = true };
typedef U type;
};
template <class U> struct PointerTraits
{
enum { value = false };
typedef void type;
};
template <class U> struct PointerTraits<U*>
{
enum { value = true };
typedef U type;
};
template <class U> struct PointerTraits<U*&>
{
enum { value = true };
typedef U type;
};
template <class U> struct UnConst
{
typedef U type;
enum { value = 0 };
};
template <class U> struct UnConst<const U>
{
typedef U type;
enum { value = 1 };
};
template <class U> struct UnConst<const U&>
{
typedef U& type;
enum { value = 1 };
};
template <class U> struct UnVolatile
{
typedef U type;
enum { value = 0 };
};
template <class U> struct UnVolatile<volatile U>
{
typedef U type;
enum { value = 1 };
};
template <class U> struct UnVolatile<volatile U&>
{
typedef U& type;
enum { value = 1 };
};
} // namespace type_traits_detail
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // __OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP__

View File

@ -0,0 +1,121 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP__
#define __OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP__
#include "../datamov_utils.hpp"
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
namespace vec_distance_detail
{
template <int THREAD_DIM, int N> struct UnrollVecDiffCached
{
template <typename Dist, typename T1, typename T2>
static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind)
{
if (ind < len)
{
T1 val1 = *vecCached++;
T2 val2;
ForceGlob<T2>::Load(vecGlob, ind, val2);
dist.reduceIter(val1, val2);
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM);
}
}
template <typename Dist, typename T1, typename T2>
static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist)
{
T1 val1 = *vecCached++;
T2 val2;
ForceGlob<T2>::Load(vecGlob, 0, val2);
vecGlob += THREAD_DIM;
dist.reduceIter(val1, val2);
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcWithoutCheck(vecCached, vecGlob, dist);
}
};
template <int THREAD_DIM> struct UnrollVecDiffCached<THREAD_DIM, 0>
{
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int)
{
}
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&)
{
}
};
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN> struct VecDiffCachedCalculator;
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, false>
{
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
{
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcCheck(vecCached, vecGlob, len, dist, tid);
}
};
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, true>
{
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
{
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
}
};
} // namespace vec_distance_detail
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // __OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP__

View File

@ -0,0 +1,88 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_DYNAMIC_SMEM_HPP__
#define __OPENCV_CUDA_DYNAMIC_SMEM_HPP__
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template<class T> struct DynamicSharedMem
{
__device__ __forceinline__ operator T*()
{
extern __shared__ int __smem[];
return (T*)__smem;
}
__device__ __forceinline__ operator const T*() const
{
extern __shared__ int __smem[];
return (T*)__smem;
}
};
// specialize for double to avoid unaligned memory access compile errors
template<> struct DynamicSharedMem<double>
{
__device__ __forceinline__ operator double*()
{
extern __shared__ double __smem_d[];
return (double*)__smem_d;
}
__device__ __forceinline__ operator const double*() const
{
extern __shared__ double __smem_d[];
return (double*)__smem_d;
}
};
}}}
//! @endcond
#endif // __OPENCV_CUDA_DYNAMIC_SMEM_HPP__

View File

@ -0,0 +1,269 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_EMULATION_HPP_
#define OPENCV_CUDA_EMULATION_HPP_
#include "common.hpp"
#include "warp_reduce.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
struct Emulation
{
static __device__ __forceinline__ int syncthreadsOr(int pred)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
// just campilation stab
return 0;
#else
return __syncthreads_or(pred);
#endif
}
template<int CTA_SIZE>
static __forceinline__ __device__ int Ballot(int predicate)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
return __ballot(predicate);
#else
__shared__ volatile int cta_buffer[CTA_SIZE];
int tid = threadIdx.x;
cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
return warp_reduce(cta_buffer);
#endif
}
struct smem
{
enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };
template<typename T>
static __device__ __forceinline__ T atomicInc(T* address, T val)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
T count;
unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
do
{
count = *address & TAG_MASK;
count = tag | (count + 1);
*address = count;
} while (*address != count);
return (count & TAG_MASK) - 1;
#else
return ::atomicInc(address, val);
#endif
}
template<typename T>
static __device__ __forceinline__ T atomicAdd(T* address, T val)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
T count;
unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
do
{
count = *address & TAG_MASK;
count = tag | (count + val);
*address = count;
} while (*address != count);
return (count & TAG_MASK) - val;
#else
return ::atomicAdd(address, val);
#endif
}
template<typename T>
static __device__ __forceinline__ T atomicMin(T* address, T val)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
T count = ::min(*address, val);
do
{
*address = count;
} while (*address > count);
return count;
#else
return ::atomicMin(address, val);
#endif
}
}; // struct cmem
struct glob
{
static __device__ __forceinline__ int atomicAdd(int* address, int val)
{
return ::atomicAdd(address, val);
}
static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val)
{
return ::atomicAdd(address, val);
}
static __device__ __forceinline__ float atomicAdd(float* address, float val)
{
#if __CUDA_ARCH__ >= 200
return ::atomicAdd(address, val);
#else
int* address_as_i = (int*) address;
int old = *address_as_i, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_i, assumed,
__float_as_int(val + __int_as_float(assumed)));
} while (assumed != old);
return __int_as_float(old);
#endif
}
static __device__ __forceinline__ double atomicAdd(double* address, double val)
{
#if __CUDA_ARCH__ >= 130
unsigned long long int* address_as_ull = (unsigned long long int*) address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_ull, assumed,
__double_as_longlong(val + __longlong_as_double(assumed)));
} while (assumed != old);
return __longlong_as_double(old);
#else
(void) address;
(void) val;
return 0.0;
#endif
}
static __device__ __forceinline__ int atomicMin(int* address, int val)
{
return ::atomicMin(address, val);
}
static __device__ __forceinline__ float atomicMin(float* address, float val)
{
#if __CUDA_ARCH__ >= 120
int* address_as_i = (int*) address;
int old = *address_as_i, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_i, assumed,
__float_as_int(::fminf(val, __int_as_float(assumed))));
} while (assumed != old);
return __int_as_float(old);
#else
(void) address;
(void) val;
return 0.0f;
#endif
}
static __device__ __forceinline__ double atomicMin(double* address, double val)
{
#if __CUDA_ARCH__ >= 130
unsigned long long int* address_as_ull = (unsigned long long int*) address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_ull, assumed,
__double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
} while (assumed != old);
return __longlong_as_double(old);
#else
(void) address;
(void) val;
return 0.0;
#endif
}
static __device__ __forceinline__ int atomicMax(int* address, int val)
{
return ::atomicMax(address, val);
}
static __device__ __forceinline__ float atomicMax(float* address, float val)
{
#if __CUDA_ARCH__ >= 120
int* address_as_i = (int*) address;
int old = *address_as_i, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_i, assumed,
__float_as_int(::fmaxf(val, __int_as_float(assumed))));
} while (assumed != old);
return __int_as_float(old);
#else
(void) address;
(void) val;
return 0.0f;
#endif
}
static __device__ __forceinline__ double atomicMax(double* address, double val)
{
#if __CUDA_ARCH__ >= 130
unsigned long long int* address_as_ull = (unsigned long long int*) address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_ull, assumed,
__double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
} while (assumed != old);
return __longlong_as_double(old);
#else
(void) address;
(void) val;
return 0.0;
#endif
}
};
}; //struct Emulation
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif /* OPENCV_CUDA_EMULATION_HPP_ */

View File

@ -0,0 +1,286 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_FILTERS_HPP__
#define __OPENCV_CUDA_FILTERS_HPP__
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
#include "vec_math.hpp"
#include "type_traits.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template <typename Ptr2D> struct PointFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
: src(src_)
{
(void)fx;
(void)fy;
}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
return src(__float2int_rz(y), __float2int_rz(x));
}
Ptr2D src;
};
template <typename Ptr2D> struct LinearFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
: src(src_)
{
(void)fx;
(void)fy;
}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0);
const int x1 = __float2int_rd(x);
const int y1 = __float2int_rd(y);
const int x2 = x1 + 1;
const int y2 = y1 + 1;
elem_type src_reg = src(y1, x1);
out = out + src_reg * ((x2 - x) * (y2 - y));
src_reg = src(y1, x2);
out = out + src_reg * ((x - x1) * (y2 - y));
src_reg = src(y2, x1);
out = out + src_reg * ((x2 - x) * (y - y1));
src_reg = src(y2, x2);
out = out + src_reg * ((x - x1) * (y - y1));
return saturate_cast<elem_type>(out);
}
Ptr2D src;
};
template <typename Ptr2D> struct CubicFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
: src(src_)
{
(void)fx;
(void)fy;
}
static __device__ __forceinline__ float bicubicCoeff(float x_)
{
float x = fabsf(x_);
if (x <= 1.0f)
{
return x * x * (1.5f * x - 2.5f) + 1.0f;
}
else if (x < 2.0f)
{
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
}
else
{
return 0.0f;
}
}
__device__ elem_type operator ()(float y, float x) const
{
const float xmin = ::ceilf(x - 2.0f);
const float xmax = ::floorf(x + 2.0f);
const float ymin = ::ceilf(y - 2.0f);
const float ymax = ::floorf(y + 2.0f);
work_type sum = VecTraits<work_type>::all(0);
float wsum = 0.0f;
for (float cy = ymin; cy <= ymax; cy += 1.0f)
{
for (float cx = xmin; cx <= xmax; cx += 1.0f)
{
const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy);
sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx));
wsum += w;
}
}
work_type res = (!wsum)? VecTraits<work_type>::all(0) : sum / wsum;
return saturate_cast<elem_type>(res);
}
Ptr2D src;
};
// for integer scaling
template <typename Ptr2D> struct IntegerAreaFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
: src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
float fsx1 = x * scale_x;
float fsx2 = fsx1 + scale_x;
int sx1 = __float2int_ru(fsx1);
int sx2 = __float2int_rd(fsx2);
float fsy1 = y * scale_y;
float fsy2 = fsy1 + scale_y;
int sy1 = __float2int_ru(fsy1);
int sy2 = __float2int_rd(fsy2);
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0.f);
for(int dy = sy1; dy < sy2; ++dy)
for(int dx = sx1; dx < sx2; ++dx)
{
out = out + src(dy, dx) * scale;
}
return saturate_cast<elem_type>(out);
}
Ptr2D src;
float scale_x, scale_y ,scale;
};
template <typename Ptr2D> struct AreaFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
: src(src_), scale_x(scale_x_), scale_y(scale_y_){}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
float fsx1 = x * scale_x;
float fsx2 = fsx1 + scale_x;
int sx1 = __float2int_ru(fsx1);
int sx2 = __float2int_rd(fsx2);
float fsy1 = y * scale_y;
float fsy2 = fsy1 + scale_y;
int sy1 = __float2int_ru(fsy1);
int sy2 = __float2int_rd(fsy2);
float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0.f);
for (int dy = sy1; dy < sy2; ++dy)
{
for (int dx = sx1; dx < sx2; ++dx)
out = out + src(dy, dx) * scale;
if (sx1 > fsx1)
out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
if (sx2 < fsx2)
out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
}
if (sy1 > fsy1)
for (int dx = sx1; dx < sx2; ++dx)
out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
if (sy2 < fsy2)
for (int dx = sx1; dx < sx2; ++dx)
out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
if ((sy1 > fsy1) && (sx1 > fsx1))
out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
if ((sy1 > fsy1) && (sx2 < fsx2))
out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
if ((sy2 < fsy2) && (sx2 < fsx2))
out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
if ((sy2 < fsy2) && (sx1 > fsx1))
out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
return saturate_cast<elem_type>(out);
}
Ptr2D src;
float scale_x, scale_y;
int width, haight;
};
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // __OPENCV_CUDA_FILTERS_HPP__

View File

@ -0,0 +1,79 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP_
#define __OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP_
#include <cstdio>
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template<class Func>
void printFuncAttrib(Func& func)
{
cudaFuncAttributes attrs;
cudaFuncGetAttributes(&attrs, func);
printf("=== Function stats ===\n");
printf("Name: \n");
printf("sharedSizeBytes = %d\n", attrs.sharedSizeBytes);
printf("constSizeBytes = %d\n", attrs.constSizeBytes);
printf("localSizeBytes = %d\n", attrs.localSizeBytes);
printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock);
printf("numRegs = %d\n", attrs.numRegs);
printf("ptxVersion = %d\n", attrs.ptxVersion);
printf("binaryVersion = %d\n", attrs.binaryVersion);
printf("\n");
fflush(stdout);
}
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif /* __OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP_ */

View File

@ -0,0 +1,797 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_FUNCTIONAL_HPP__
#define __OPENCV_CUDA_FUNCTIONAL_HPP__
#include <functional>
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
#include "type_traits.hpp"
#include "device_functions.h"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
// Function Objects
template<typename Argument, typename Result> struct unary_function : public std::unary_function<Argument, Result> {};
template<typename Argument1, typename Argument2, typename Result> struct binary_function : public std::binary_function<Argument1, Argument2, Result> {};
// Arithmetic Operations
template <typename T> struct plus : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a + b;
}
__host__ __device__ __forceinline__ plus() {}
__host__ __device__ __forceinline__ plus(const plus&) {}
};
template <typename T> struct minus : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a - b;
}
__host__ __device__ __forceinline__ minus() {}
__host__ __device__ __forceinline__ minus(const minus&) {}
};
template <typename T> struct multiplies : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a * b;
}
__host__ __device__ __forceinline__ multiplies() {}
__host__ __device__ __forceinline__ multiplies(const multiplies&) {}
};
template <typename T> struct divides : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a / b;
}
__host__ __device__ __forceinline__ divides() {}
__host__ __device__ __forceinline__ divides(const divides&) {}
};
template <typename T> struct modulus : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a % b;
}
__host__ __device__ __forceinline__ modulus() {}
__host__ __device__ __forceinline__ modulus(const modulus&) {}
};
template <typename T> struct negate : unary_function<T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a) const
{
return -a;
}
__host__ __device__ __forceinline__ negate() {}
__host__ __device__ __forceinline__ negate(const negate&) {}
};
// Comparison Operations
template <typename T> struct equal_to : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a == b;
}
__host__ __device__ __forceinline__ equal_to() {}
__host__ __device__ __forceinline__ equal_to(const equal_to&) {}
};
template <typename T> struct not_equal_to : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a != b;
}
__host__ __device__ __forceinline__ not_equal_to() {}
__host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {}
};
template <typename T> struct greater : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a > b;
}
__host__ __device__ __forceinline__ greater() {}
__host__ __device__ __forceinline__ greater(const greater&) {}
};
template <typename T> struct less : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a < b;
}
__host__ __device__ __forceinline__ less() {}
__host__ __device__ __forceinline__ less(const less&) {}
};
template <typename T> struct greater_equal : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a >= b;
}
__host__ __device__ __forceinline__ greater_equal() {}
__host__ __device__ __forceinline__ greater_equal(const greater_equal&) {}
};
template <typename T> struct less_equal : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a <= b;
}
__host__ __device__ __forceinline__ less_equal() {}
__host__ __device__ __forceinline__ less_equal(const less_equal&) {}
};
// Logical Operations
template <typename T> struct logical_and : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a && b;
}
__host__ __device__ __forceinline__ logical_and() {}
__host__ __device__ __forceinline__ logical_and(const logical_and&) {}
};
template <typename T> struct logical_or : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a || b;
}
__host__ __device__ __forceinline__ logical_or() {}
__host__ __device__ __forceinline__ logical_or(const logical_or&) {}
};
template <typename T> struct logical_not : unary_function<T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a) const
{
return !a;
}
__host__ __device__ __forceinline__ logical_not() {}
__host__ __device__ __forceinline__ logical_not(const logical_not&) {}
};
// Bitwise Operations
template <typename T> struct bit_and : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a & b;
}
__host__ __device__ __forceinline__ bit_and() {}
__host__ __device__ __forceinline__ bit_and(const bit_and&) {}
};
template <typename T> struct bit_or : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a | b;
}
__host__ __device__ __forceinline__ bit_or() {}
__host__ __device__ __forceinline__ bit_or(const bit_or&) {}
};
template <typename T> struct bit_xor : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a ^ b;
}
__host__ __device__ __forceinline__ bit_xor() {}
__host__ __device__ __forceinline__ bit_xor(const bit_xor&) {}
};
template <typename T> struct bit_not : unary_function<T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType v) const
{
return ~v;
}
__host__ __device__ __forceinline__ bit_not() {}
__host__ __device__ __forceinline__ bit_not(const bit_not&) {}
};
// Generalized Identity Operations
template <typename T> struct identity : unary_function<T, T>
{
__device__ __forceinline__ typename TypeTraits<T>::ParameterType operator()(typename TypeTraits<T>::ParameterType x) const
{
return x;
}
__host__ __device__ __forceinline__ identity() {}
__host__ __device__ __forceinline__ identity(const identity&) {}
};
template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
{
__device__ __forceinline__ typename TypeTraits<T1>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
{
return lhs;
}
__host__ __device__ __forceinline__ project1st() {}
__host__ __device__ __forceinline__ project1st(const project1st&) {}
};
template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
{
__device__ __forceinline__ typename TypeTraits<T2>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
{
return rhs;
}
__host__ __device__ __forceinline__ project2nd() {}
__host__ __device__ __forceinline__ project2nd(const project2nd&) {}
};
// Min/Max Operations
#define OPENCV_CUDA_IMPLEMENT_MINMAX(name, type, op) \
template <> struct name<type> : binary_function<type, type, type> \
{ \
__device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
__host__ __device__ __forceinline__ name() {}\
__host__ __device__ __forceinline__ name(const name&) {}\
};
template <typename T> struct maximum : binary_function<T, T, T>
{
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
{
return max(lhs, rhs);
}
__host__ __device__ __forceinline__ maximum() {}
__host__ __device__ __forceinline__ maximum(const maximum&) {}
};
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uchar, ::max)
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, schar, ::max)
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, char, ::max)
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, ushort, ::max)
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, short, ::max)
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, int, ::max)
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uint, ::max)
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, float, ::fmax)
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, double, ::fmax)
template <typename T> struct minimum : binary_function<T, T, T>
{
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
{
return min(lhs, rhs);
}
__host__ __device__ __forceinline__ minimum() {}
__host__ __device__ __forceinline__ minimum(const minimum&) {}
};
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uchar, ::min)
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, schar, ::min)
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, char, ::min)
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, ushort, ::min)
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, short, ::min)
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, int, ::min)
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uint, ::min)
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, float, ::fmin)
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, double, ::fmin)
#undef OPENCV_CUDA_IMPLEMENT_MINMAX
// Math functions
template <typename T> struct abs_func : unary_function<T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType x) const
{
return abs(x);
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<unsigned char> : unary_function<unsigned char, unsigned char>
{
__device__ __forceinline__ unsigned char operator ()(unsigned char x) const
{
return x;
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<signed char> : unary_function<signed char, signed char>
{
__device__ __forceinline__ signed char operator ()(signed char x) const
{
return ::abs((int)x);
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<char> : unary_function<char, char>
{
__device__ __forceinline__ char operator ()(char x) const
{
return ::abs((int)x);
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<unsigned short> : unary_function<unsigned short, unsigned short>
{
__device__ __forceinline__ unsigned short operator ()(unsigned short x) const
{
return x;
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<short> : unary_function<short, short>
{
__device__ __forceinline__ short operator ()(short x) const
{
return ::abs((int)x);
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<unsigned int> : unary_function<unsigned int, unsigned int>
{
__device__ __forceinline__ unsigned int operator ()(unsigned int x) const
{
return x;
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<int> : unary_function<int, int>
{
__device__ __forceinline__ int operator ()(int x) const
{
return ::abs(x);
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<float> : unary_function<float, float>
{
__device__ __forceinline__ float operator ()(float x) const
{
return ::fabsf(x);
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<double> : unary_function<double, double>
{
__device__ __forceinline__ double operator ()(double x) const
{
return ::fabs(x);
}
__host__ __device__ __forceinline__ abs_func() {}
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
#define OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(name, func) \
template <typename T> struct name ## _func : unary_function<T, float> \
{ \
__device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v) const \
{ \
return func ## f(v); \
} \
__host__ __device__ __forceinline__ name ## _func() {} \
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
}; \
template <> struct name ## _func<double> : unary_function<double, double> \
{ \
__device__ __forceinline__ double operator ()(double v) const \
{ \
return func(v); \
} \
__host__ __device__ __forceinline__ name ## _func() {} \
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
};
#define OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(name, func) \
template <typename T> struct name ## _func : binary_function<T, T, float> \
{ \
__device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v1, typename TypeTraits<T>::ParameterType v2) const \
{ \
return func ## f(v1, v2); \
} \
__host__ __device__ __forceinline__ name ## _func() {} \
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
}; \
template <> struct name ## _func<double> : binary_function<double, double, double> \
{ \
__device__ __forceinline__ double operator ()(double v1, double v2) const \
{ \
return func(v1, v2); \
} \
__host__ __device__ __forceinline__ name ## _func() {} \
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
};
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp, ::exp)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp2, ::exp2)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp10, ::exp10)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log, ::log)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log2, ::log2)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log10, ::log10)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sin, ::sin)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cos, ::cos)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tan, ::tan)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asin, ::asin)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acos, ::acos)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atan, ::atan)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sinh, ::sinh)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cosh, ::cosh)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tanh, ::tanh)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asinh, ::asinh)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acosh, ::acosh)
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atanh, ::atanh)
OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(hypot, ::hypot)
OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(atan2, ::atan2)
OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(pow, ::pow)
#undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR
#undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR_NO_DOUBLE
#undef OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR
template<typename T> struct hypot_sqr_func : binary_function<T, T, float>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType src1, typename TypeTraits<T>::ParameterType src2) const
{
return src1 * src1 + src2 * src2;
}
__host__ __device__ __forceinline__ hypot_sqr_func() {}
__host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {}
};
// Saturate Cast Functor
template <typename T, typename D> struct saturate_cast_func : unary_function<T, D>
{
__device__ __forceinline__ D operator ()(typename TypeTraits<T>::ParameterType v) const
{
return saturate_cast<D>(v);
}
__host__ __device__ __forceinline__ saturate_cast_func() {}
__host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {}
};
// Threshold Functors
template <typename T> struct thresh_binary_func : unary_function<T, T>
{
__host__ __device__ __forceinline__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
{
return (src > thresh) * maxVal;
}
__host__ __device__ __forceinline__ thresh_binary_func() {}
__host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
: thresh(other.thresh), maxVal(other.maxVal) {}
T thresh;
T maxVal;
};
template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
{
__host__ __device__ __forceinline__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
{
return (src <= thresh) * maxVal;
}
__host__ __device__ __forceinline__ thresh_binary_inv_func() {}
__host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
: thresh(other.thresh), maxVal(other.maxVal) {}
T thresh;
T maxVal;
};
template <typename T> struct thresh_trunc_func : unary_function<T, T>
{
explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
{
return minimum<T>()(src, thresh);
}
__host__ __device__ __forceinline__ thresh_trunc_func() {}
__host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
: thresh(other.thresh) {}
T thresh;
};
template <typename T> struct thresh_to_zero_func : unary_function<T, T>
{
explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
{
return (src > thresh) * src;
}
__host__ __device__ __forceinline__ thresh_to_zero_func() {}
__host__ __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
: thresh(other.thresh) {}
T thresh;
};
template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
{
explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
{
return (src <= thresh) * src;
}
__host__ __device__ __forceinline__ thresh_to_zero_inv_func() {}
__host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
: thresh(other.thresh) {}
T thresh;
};
// Function Object Adaptors
template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool>
{
explicit __host__ __device__ __forceinline__ unary_negate(const Predicate& p) : pred(p) {}
__device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::argument_type>::ParameterType x) const
{
return !pred(x);
}
__host__ __device__ __forceinline__ unary_negate() {}
__host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {}
Predicate pred;
};
template <typename Predicate> __host__ __device__ __forceinline__ unary_negate<Predicate> not1(const Predicate& pred)
{
return unary_negate<Predicate>(pred);
}
template <typename Predicate> struct binary_negate : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>
{
explicit __host__ __device__ __forceinline__ binary_negate(const Predicate& p) : pred(p) {}
__device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::first_argument_type>::ParameterType x,
typename TypeTraits<typename Predicate::second_argument_type>::ParameterType y) const
{
return !pred(x,y);
}
__host__ __device__ __forceinline__ binary_negate() {}
__host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {}
Predicate pred;
};
template <typename BinaryPredicate> __host__ __device__ __forceinline__ binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred)
{
return binary_negate<BinaryPredicate>(pred);
}
template <typename Op> struct binder1st : unary_function<typename Op::second_argument_type, typename Op::result_type>
{
__host__ __device__ __forceinline__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {}
__device__ __forceinline__ typename Op::result_type operator ()(typename TypeTraits<typename Op::second_argument_type>::ParameterType a) const
{
return op(arg1, a);
}
__host__ __device__ __forceinline__ binder1st() {}
__host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {}
Op op;
typename Op::first_argument_type arg1;
};
template <typename Op, typename T> __host__ __device__ __forceinline__ binder1st<Op> bind1st(const Op& op, const T& x)
{
return binder1st<Op>(op, typename Op::first_argument_type(x));
}
template <typename Op> struct binder2nd : unary_function<typename Op::first_argument_type, typename Op::result_type>
{
__host__ __device__ __forceinline__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {}
__forceinline__ __device__ typename Op::result_type operator ()(typename TypeTraits<typename Op::first_argument_type>::ParameterType a) const
{
return op(a, arg2);
}
__host__ __device__ __forceinline__ binder2nd() {}
__host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {}
Op op;
typename Op::second_argument_type arg2;
};
template <typename Op, typename T> __host__ __device__ __forceinline__ binder2nd<Op> bind2nd(const Op& op, const T& x)
{
return binder2nd<Op>(op, typename Op::second_argument_type(x));
}
// Functor Traits
template <typename F> struct IsUnaryFunction
{
typedef char Yes;
struct No {Yes a[2];};
template <typename T, typename D> static Yes check(unary_function<T, D>);
static No check(...);
static F makeF();
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
};
template <typename F> struct IsBinaryFunction
{
typedef char Yes;
struct No {Yes a[2];};
template <typename T1, typename T2, typename D> static Yes check(binary_function<T1, T2, D>);
static No check(...);
static F makeF();
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
};
namespace functional_detail
{
template <size_t src_elem_size, size_t dst_elem_size> struct UnOpShift { enum { shift = 1 }; };
template <size_t src_elem_size> struct UnOpShift<src_elem_size, 1> { enum { shift = 4 }; };
template <size_t src_elem_size> struct UnOpShift<src_elem_size, 2> { enum { shift = 2 }; };
template <typename T, typename D> struct DefaultUnaryShift
{
enum { shift = UnOpShift<sizeof(T), sizeof(D)>::shift };
};
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size> struct BinOpShift { enum { shift = 1 }; };
template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 1> { enum { shift = 4 }; };
template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 2> { enum { shift = 2 }; };
template <typename T1, typename T2, typename D> struct DefaultBinaryShift
{
enum { shift = BinOpShift<sizeof(T1), sizeof(T2), sizeof(D)>::shift };
};
template <typename Func, bool unary = IsUnaryFunction<Func>::value> struct ShiftDispatcher;
template <typename Func> struct ShiftDispatcher<Func, true>
{
enum { shift = DefaultUnaryShift<typename Func::argument_type, typename Func::result_type>::shift };
};
template <typename Func> struct ShiftDispatcher<Func, false>
{
enum { shift = DefaultBinaryShift<typename Func::first_argument_type, typename Func::second_argument_type, typename Func::result_type>::shift };
};
}
template <typename Func> struct DefaultTransformShift
{
enum { shift = functional_detail::ShiftDispatcher<Func>::shift };
};
template <typename Func> struct DefaultTransformFunctorTraits
{
enum { simple_block_dim_x = 16 };
enum { simple_block_dim_y = 16 };
enum { smart_block_dim_x = 16 };
enum { smart_block_dim_y = 16 };
enum { smart_shift = DefaultTransformShift<Func>::shift };
};
template <typename Func> struct TransformFunctorTraits : DefaultTransformFunctorTraits<Func> {};
#define OPENCV_CUDA_TRANSFORM_FUNCTOR_TRAITS(type) \
template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type >
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // __OPENCV_CUDA_FUNCTIONAL_HPP__

View File

@ -0,0 +1,128 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_LIMITS_HPP__
#define __OPENCV_CUDA_LIMITS_HPP__
#include <limits.h>
#include <float.h>
#include "common.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template <class T> struct numeric_limits;
template <> struct numeric_limits<bool>
{
__device__ __forceinline__ static bool min() { return false; }
__device__ __forceinline__ static bool max() { return true; }
static const bool is_signed = false;
};
template <> struct numeric_limits<signed char>
{
__device__ __forceinline__ static signed char min() { return SCHAR_MIN; }
__device__ __forceinline__ static signed char max() { return SCHAR_MAX; }
static const bool is_signed = true;
};
template <> struct numeric_limits<unsigned char>
{
__device__ __forceinline__ static unsigned char min() { return 0; }
__device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; }
static const bool is_signed = false;
};
template <> struct numeric_limits<short>
{
__device__ __forceinline__ static short min() { return SHRT_MIN; }
__device__ __forceinline__ static short max() { return SHRT_MAX; }
static const bool is_signed = true;
};
template <> struct numeric_limits<unsigned short>
{
__device__ __forceinline__ static unsigned short min() { return 0; }
__device__ __forceinline__ static unsigned short max() { return USHRT_MAX; }
static const bool is_signed = false;
};
template <> struct numeric_limits<int>
{
__device__ __forceinline__ static int min() { return INT_MIN; }
__device__ __forceinline__ static int max() { return INT_MAX; }
static const bool is_signed = true;
};
template <> struct numeric_limits<unsigned int>
{
__device__ __forceinline__ static unsigned int min() { return 0; }
__device__ __forceinline__ static unsigned int max() { return UINT_MAX; }
static const bool is_signed = false;
};
template <> struct numeric_limits<float>
{
__device__ __forceinline__ static float min() { return FLT_MIN; }
__device__ __forceinline__ static float max() { return FLT_MAX; }
__device__ __forceinline__ static float epsilon() { return FLT_EPSILON; }
static const bool is_signed = true;
};
template <> struct numeric_limits<double>
{
__device__ __forceinline__ static double min() { return DBL_MIN; }
__device__ __forceinline__ static double max() { return DBL_MAX; }
__device__ __forceinline__ static double epsilon() { return DBL_EPSILON; }
static const bool is_signed = true;
};
}}} // namespace cv { namespace cuda { namespace cudev {
//! @endcond
#endif // __OPENCV_CUDA_LIMITS_HPP__

View File

@ -0,0 +1,205 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_REDUCE_HPP__
#define __OPENCV_CUDA_REDUCE_HPP__
#include <thrust/tuple.h>
#include "detail/reduce.hpp"
#include "detail/reduce_key_val.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template <int N, typename T, class Op>
__device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op)
{
reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op);
}
template <int N,
typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
__device__ __forceinline__ void reduce(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
unsigned int tid,
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
{
reduce_detail::Dispatcher<N>::reductor::template reduce<
const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>&,
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>&,
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
}
template <unsigned int N, typename K, typename V, class Cmp>
__device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, unsigned int tid, const Cmp& cmp)
{
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&, volatile V*, V&, const Cmp&>(skeys, key, svals, val, tid, cmp);
}
template <unsigned int N,
typename K,
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp>
__device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key,
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
unsigned int tid, const Cmp& cmp)
{
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&,
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
const Cmp&>(skeys, key, svals, val, tid, cmp);
}
template <unsigned int N,
typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
__device__ __forceinline__ void reduceKeyVal(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
unsigned int tid,
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp)
{
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<
const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>&,
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>&,
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>&
>(skeys, key, svals, val, tid, cmp);
}
// smem_tuple
template <typename T0>
__device__ __forceinline__
thrust::tuple<volatile T0*>
smem_tuple(T0* t0)
{
return thrust::make_tuple((volatile T0*) t0);
}
template <typename T0, typename T1>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*>
smem_tuple(T0* t0, T1* t1)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1);
}
template <typename T0, typename T1, typename T2>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*, volatile T2*>
smem_tuple(T0* t0, T1* t1, T2* t2)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2);
}
template <typename T0, typename T1, typename T2, typename T3>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8);
}
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9>
__device__ __forceinline__
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*, volatile T9*>
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8, T9* t9)
{
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9);
}
}}}
//! @endcond
#endif // __OPENCV_CUDA_UTILITY_HPP__

View File

@ -0,0 +1,292 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_SATURATE_CAST_HPP__
#define __OPENCV_CUDA_SATURATE_CAST_HPP__
#include "common.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
{
uint res = 0;
int vi = v;
asm("cvt.sat.u8.s8 %0, %1;" : "=r"(res) : "r"(vi));
return res;
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
{
uint res = 0;
asm("cvt.sat.u8.s16 %0, %1;" : "=r"(res) : "h"(v));
return res;
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
{
uint res = 0;
asm("cvt.sat.u8.u16 %0, %1;" : "=r"(res) : "h"(v));
return res;
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
{
uint res = 0;
asm("cvt.sat.u8.s32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
{
uint res = 0;
asm("cvt.sat.u8.u32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
{
uint res = 0;
asm("cvt.rni.sat.u8.f32 %0, %1;" : "=r"(res) : "f"(v));
return res;
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
{
#if __CUDA_ARCH__ >= 130
uint res = 0;
asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v));
return res;
#else
return saturate_cast<uchar>((float)v);
#endif
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
{
uint res = 0;
uint vi = v;
asm("cvt.sat.s8.u8 %0, %1;" : "=r"(res) : "r"(vi));
return res;
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
{
uint res = 0;
asm("cvt.sat.s8.s16 %0, %1;" : "=r"(res) : "h"(v));
return res;
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
{
uint res = 0;
asm("cvt.sat.s8.u16 %0, %1;" : "=r"(res) : "h"(v));
return res;
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
{
uint res = 0;
asm("cvt.sat.s8.s32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
{
uint res = 0;
asm("cvt.sat.s8.u32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
{
uint res = 0;
asm("cvt.rni.sat.s8.f32 %0, %1;" : "=r"(res) : "f"(v));
return res;
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
{
#if __CUDA_ARCH__ >= 130
uint res = 0;
asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v));
return res;
#else
return saturate_cast<schar>((float)v);
#endif
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
{
ushort res = 0;
int vi = v;
asm("cvt.sat.u16.s8 %0, %1;" : "=h"(res) : "r"(vi));
return res;
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
{
ushort res = 0;
asm("cvt.sat.u16.s16 %0, %1;" : "=h"(res) : "h"(v));
return res;
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
{
ushort res = 0;
asm("cvt.sat.u16.s32 %0, %1;" : "=h"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
{
ushort res = 0;
asm("cvt.sat.u16.u32 %0, %1;" : "=h"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
{
ushort res = 0;
asm("cvt.rni.sat.u16.f32 %0, %1;" : "=h"(res) : "f"(v));
return res;
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
{
#if __CUDA_ARCH__ >= 130
ushort res = 0;
asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v));
return res;
#else
return saturate_cast<ushort>((float)v);
#endif
}
template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
{
short res = 0;
asm("cvt.sat.s16.u16 %0, %1;" : "=h"(res) : "h"(v));
return res;
}
template<> __device__ __forceinline__ short saturate_cast<short>(int v)
{
short res = 0;
asm("cvt.sat.s16.s32 %0, %1;" : "=h"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
{
short res = 0;
asm("cvt.sat.s16.u32 %0, %1;" : "=h"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ short saturate_cast<short>(float v)
{
short res = 0;
asm("cvt.rni.sat.s16.f32 %0, %1;" : "=h"(res) : "f"(v));
return res;
}
template<> __device__ __forceinline__ short saturate_cast<short>(double v)
{
#if __CUDA_ARCH__ >= 130
short res = 0;
asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v));
return res;
#else
return saturate_cast<short>((float)v);
#endif
}
template<> __device__ __forceinline__ int saturate_cast<int>(uint v)
{
int res = 0;
asm("cvt.sat.s32.u32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ int saturate_cast<int>(float v)
{
return __float2int_rn(v);
}
template<> __device__ __forceinline__ int saturate_cast<int>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
return __double2int_rn(v);
#else
return saturate_cast<int>((float)v);
#endif
}
template<> __device__ __forceinline__ uint saturate_cast<uint>(schar v)
{
uint res = 0;
int vi = v;
asm("cvt.sat.u32.s8 %0, %1;" : "=r"(res) : "r"(vi));
return res;
}
template<> __device__ __forceinline__ uint saturate_cast<uint>(short v)
{
uint res = 0;
asm("cvt.sat.u32.s16 %0, %1;" : "=r"(res) : "h"(v));
return res;
}
template<> __device__ __forceinline__ uint saturate_cast<uint>(int v)
{
uint res = 0;
asm("cvt.sat.u32.s32 %0, %1;" : "=r"(res) : "r"(v));
return res;
}
template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
{
return __float2uint_rn(v);
}
template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
return __double2uint_rn(v);
#else
return saturate_cast<uint>((float)v);
#endif
}
}}}
//! @endcond
#endif /* __OPENCV_CUDA_SATURATE_CAST_HPP__ */

View File

@ -0,0 +1,258 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_SCAN_HPP__
#define __OPENCV_CUDA_SCAN_HPP__
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/utility.hpp"
#include "opencv2/core/cuda/warp.hpp"
#include "opencv2/core/cuda/warp_shuffle.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
enum ScanKind { EXCLUSIVE = 0, INCLUSIVE = 1 };
template <ScanKind Kind, typename T, typename F> struct WarpScan
{
__device__ __forceinline__ WarpScan() {}
__device__ __forceinline__ WarpScan(const WarpScan& other) { (void)other; }
__device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
{
const unsigned int lane = idx & 31;
F op;
if ( lane >= 1) ptr [idx ] = op(ptr [idx - 1], ptr [idx]);
if ( lane >= 2) ptr [idx ] = op(ptr [idx - 2], ptr [idx]);
if ( lane >= 4) ptr [idx ] = op(ptr [idx - 4], ptr [idx]);
if ( lane >= 8) ptr [idx ] = op(ptr [idx - 8], ptr [idx]);
if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
if( Kind == INCLUSIVE )
return ptr [idx];
else
return (lane > 0) ? ptr [idx - 1] : 0;
}
__device__ __forceinline__ unsigned int index(const unsigned int tid)
{
return tid;
}
__device__ __forceinline__ void init(volatile T *ptr){}
static const int warp_offset = 0;
typedef WarpScan<INCLUSIVE, T, F> merge;
};
template <ScanKind Kind , typename T, typename F> struct WarpScanNoComp
{
__device__ __forceinline__ WarpScanNoComp() {}
__device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { (void)other; }
__device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
{
const unsigned int lane = threadIdx.x & 31;
F op;
ptr [idx ] = op(ptr [idx - 1], ptr [idx]);
ptr [idx ] = op(ptr [idx - 2], ptr [idx]);
ptr [idx ] = op(ptr [idx - 4], ptr [idx]);
ptr [idx ] = op(ptr [idx - 8], ptr [idx]);
ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
if( Kind == INCLUSIVE )
return ptr [idx];
else
return (lane > 0) ? ptr [idx - 1] : 0;
}
__device__ __forceinline__ unsigned int index(const unsigned int tid)
{
return (tid >> warp_log) * warp_smem_stride + 16 + (tid & warp_mask);
}
__device__ __forceinline__ void init(volatile T *ptr)
{
ptr[threadIdx.x] = 0;
}
static const int warp_smem_stride = 32 + 16 + 1;
static const int warp_offset = 16;
static const int warp_log = 5;
static const int warp_mask = 31;
typedef WarpScanNoComp<INCLUSIVE, T, F> merge;
};
template <ScanKind Kind , typename T, typename Sc, typename F> struct BlockScan
{
__device__ __forceinline__ BlockScan() {}
__device__ __forceinline__ BlockScan(const BlockScan& other) { (void)other; }
__device__ __forceinline__ T operator()(volatile T *ptr)
{
const unsigned int tid = threadIdx.x;
const unsigned int lane = tid & warp_mask;
const unsigned int warp = tid >> warp_log;
Sc scan;
typename Sc::merge merge_scan;
const unsigned int idx = scan.index(tid);
T val = scan(ptr, idx);
__syncthreads ();
if( warp == 0)
scan.init(ptr);
__syncthreads ();
if( lane == 31 )
ptr [scan.warp_offset + warp ] = (Kind == INCLUSIVE) ? val : ptr [idx];
__syncthreads ();
if( warp == 0 )
merge_scan(ptr, idx);
__syncthreads();
if ( warp > 0)
val = ptr [scan.warp_offset + warp - 1] + val;
__syncthreads ();
ptr[idx] = val;
__syncthreads ();
return val ;
}
static const int warp_log = 5;
static const int warp_mask = 31;
};
template <typename T>
__device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
{
#if __CUDA_ARCH__ >= 300
const unsigned int laneId = cv::cuda::device::Warp::laneId();
// scan on shuffl functions
#pragma unroll
for (int i = 1; i <= (OPENCV_CUDA_WARP_SIZE / 2); i *= 2)
{
const T n = cv::cuda::device::shfl_up(idata, i);
if (laneId >= i)
idata += n;
}
return idata;
#else
unsigned int pos = 2 * tid - (tid & (OPENCV_CUDA_WARP_SIZE - 1));
s_Data[pos] = 0;
pos += OPENCV_CUDA_WARP_SIZE;
s_Data[pos] = idata;
s_Data[pos] += s_Data[pos - 1];
s_Data[pos] += s_Data[pos - 2];
s_Data[pos] += s_Data[pos - 4];
s_Data[pos] += s_Data[pos - 8];
s_Data[pos] += s_Data[pos - 16];
return s_Data[pos];
#endif
}
template <typename T>
__device__ __forceinline__ T warpScanExclusive(T idata, volatile T* s_Data, unsigned int tid)
{
return warpScanInclusive(idata, s_Data, tid) - idata;
}
template <int tiNumScanThreads, typename T>
__device__ T blockScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
{
if (tiNumScanThreads > OPENCV_CUDA_WARP_SIZE)
{
//Bottom-level inclusive warp scan
T warpResult = warpScanInclusive(idata, s_Data, tid);
//Save top elements of each warp for exclusive warp scan
//sync to wait for warp scans to complete (because s_Data is being overwritten)
__syncthreads();
if ((tid & (OPENCV_CUDA_WARP_SIZE - 1)) == (OPENCV_CUDA_WARP_SIZE - 1))
{
s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE] = warpResult;
}
//wait for warp scans to complete
__syncthreads();
if (tid < (tiNumScanThreads / OPENCV_CUDA_WARP_SIZE) )
{
//grab top warp elements
T val = s_Data[tid];
//calculate exclusive scan and write back to shared memory
s_Data[tid] = warpScanExclusive(val, s_Data, tid);
}
//return updated warp scans with exclusive scan results
__syncthreads();
return warpResult + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE];
}
else
{
return warpScanInclusive(idata, s_Data, tid);
}
}
}}}
//! @endcond
#endif // __OPENCV_CUDA_SCAN_HPP__

View File

@ -0,0 +1,869 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/*
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of NVIDIA Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __OPENCV_CUDA_SIMD_FUNCTIONS_HPP__
#define __OPENCV_CUDA_SIMD_FUNCTIONS_HPP__
#include "common.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
// 2
static __device__ __forceinline__ unsigned int vadd2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s;
s = a ^ b; // sum bits
r = a + b; // actual sum
s = s ^ r; // determine carry-ins for each bit position
s = s & 0x00010000; // carry-in to high word (= carry-out from low word)
r = r - s; // subtract out carry-out from low word
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsub2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s;
s = a ^ b; // sum bits
r = a - b; // actual sum
s = s ^ r; // determine carry-ins for each bit position
s = s & 0x00010000; // borrow to high word
r = r + s; // compensate for borrow from low word
#endif
return r;
}
static __device__ __forceinline__ unsigned int vabsdiff2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s, t, u, v;
s = a & 0x0000ffff; // extract low halfword
r = b & 0x0000ffff; // extract low halfword
u = ::max(r, s); // maximum of low halfwords
v = ::min(r, s); // minimum of low halfwords
s = a & 0xffff0000; // extract high halfword
r = b & 0xffff0000; // extract high halfword
t = ::max(r, s); // maximum of high halfwords
s = ::min(r, s); // minimum of high halfwords
r = u | t; // maximum of both halfwords
s = v | s; // minimum of both halfwords
r = r - s; // |a - b| = max(a,b) - min(a,b);
#endif
return r;
}
static __device__ __forceinline__ unsigned int vavg2(unsigned int a, unsigned int b)
{
unsigned int r, s;
// HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
// (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
s = a ^ b;
r = a & b;
s = s & 0xfffefffe; // ensure shift doesn't cross halfword boundaries
s = s >> 1;
s = r + s;
return s;
}
static __device__ __forceinline__ unsigned int vavrg2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vavrg2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
// (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
unsigned int s;
s = a ^ b;
r = a | b;
s = s & 0xfffefffe; // ensure shift doesn't cross half-word boundaries
s = s >> 1;
r = r - s;
#endif
return r;
}
static __device__ __forceinline__ unsigned int vseteq2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset2.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
unsigned int c;
r = a ^ b; // 0x0000 if a == b
c = r | 0x80008000; // set msbs, to catch carry out
r = r ^ c; // extract msbs, msb = 1 if r < 0x8000
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
c = r & ~c; // msb = 1, if r was 0x0000
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpeq2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vseteq2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
r = a ^ b; // 0x0000 if a == b
c = r | 0x80008000; // set msbs, to catch carry out
r = r ^ c; // extract msbs, msb = 1 if r < 0x8000
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
c = r & ~c; // msb = 1, if r was 0x0000
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetge2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset2.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(b));
c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpge2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetge2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
asm("not.b32 %0, %0;" : "+r"(b));
c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetgt2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset2.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(b));
c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
c = c & 0x80008000; // msbs = carry-outs
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpgt2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetgt2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
asm("not.b32 %0, %0;" : "+r"(b));
c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
c = c & 0x80008000; // msbs = carry-outs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetle2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset2.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(a));
c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmple2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetle2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
asm("not.b32 %0, %0;" : "+r"(a));
c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetlt2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset2.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(a));
c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmplt2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetlt2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
asm("not.b32 %0, %0;" : "+r"(a));
c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetne2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm ("vset2.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
unsigned int c;
r = a ^ b; // 0x0000 if a == b
c = r | 0x80008000; // set msbs, to catch carry out
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
c = r | c; // msb = 1, if r was not 0x0000
c = c & 0x80008000; // extract msbs
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpne2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetne2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
r = a ^ b; // 0x0000 if a == b
c = r | 0x80008000; // set msbs, to catch carry out
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
c = r | c; // msb = 1, if r was not 0x0000
c = c & 0x80008000; // extract msbs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vmax2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s, t, u;
r = a & 0x0000ffff; // extract low halfword
s = b & 0x0000ffff; // extract low halfword
t = ::max(r, s); // maximum of low halfwords
r = a & 0xffff0000; // extract high halfword
s = b & 0xffff0000; // extract high halfword
u = ::max(r, s); // maximum of high halfwords
r = t | u; // combine halfword maximums
#endif
return r;
}
static __device__ __forceinline__ unsigned int vmin2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s, t, u;
r = a & 0x0000ffff; // extract low halfword
s = b & 0x0000ffff; // extract low halfword
t = ::min(r, s); // minimum of low halfwords
r = a & 0xffff0000; // extract high halfword
s = b & 0xffff0000; // extract high halfword
u = ::min(r, s); // minimum of high halfwords
r = t | u; // combine halfword minimums
#endif
return r;
}
// 4
static __device__ __forceinline__ unsigned int vadd4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s, t;
s = a ^ b; // sum bits
r = a & 0x7f7f7f7f; // clear msbs
t = b & 0x7f7f7f7f; // clear msbs
s = s & 0x80808080; // msb sum bits
r = r + t; // add without msbs, record carry-out in msbs
r = r ^ s; // sum of msb sum and carry-in bits, w/o carry-out
#endif /* __CUDA_ARCH__ >= 300 */
return r;
}
static __device__ __forceinline__ unsigned int vsub4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s, t;
s = a ^ ~b; // inverted sum bits
r = a | 0x80808080; // set msbs
t = b & 0x7f7f7f7f; // clear msbs
s = s & 0x80808080; // inverted msb sum bits
r = r - t; // subtract w/o msbs, record inverted borrows in msb
r = r ^ s; // combine inverted msb sum bits and borrows
#endif
return r;
}
static __device__ __forceinline__ unsigned int vavg4(unsigned int a, unsigned int b)
{
unsigned int r, s;
// HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
// (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
s = a ^ b;
r = a & b;
s = s & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
s = s >> 1;
s = r + s;
return s;
}
static __device__ __forceinline__ unsigned int vavrg4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vavrg4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
// (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
unsigned int c;
c = a ^ b;
r = a | b;
c = c & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
c = c >> 1;
r = r - c;
#endif
return r;
}
static __device__ __forceinline__ unsigned int vseteq4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
unsigned int c;
r = a ^ b; // 0x00 if a == b
c = r | 0x80808080; // set msbs, to catch carry out
r = r ^ c; // extract msbs, msb = 1 if r < 0x80
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
c = r & ~c; // msb = 1, if r was 0x00
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpeq4(unsigned int a, unsigned int b)
{
unsigned int r, t;
#if __CUDA_ARCH__ >= 300
r = vseteq4(a, b);
t = r << 8; // convert bool
r = t - r; // to mask
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
t = a ^ b; // 0x00 if a == b
r = t | 0x80808080; // set msbs, to catch carry out
t = t ^ r; // extract msbs, msb = 1 if t < 0x80
r = r - 0x01010101; // msb = 0, if t was 0x00 or 0x80
r = t & ~r; // msb = 1, if t was 0x00
t = r >> 7; // build mask
t = r - t; // from
r = t | r; // msbs
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetle4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(a));
c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmple4(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetle4(a, b);
c = r << 8; // convert bool
r = c - r; // to mask
#else
asm("not.b32 %0, %0;" : "+r"(a));
c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
c = c & 0x80808080; // msbs = carry-outs
r = c >> 7; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetlt4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(a));
c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmplt4(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetlt4(a, b);
c = r << 8; // convert bool
r = c - r; // to mask
#else
asm("not.b32 %0, %0;" : "+r"(a));
c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
c = c & 0x80808080; // msbs = carry-outs
r = c >> 7; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetge4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(b));
c = vavrg4(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpge4(unsigned int a, unsigned int b)
{
unsigned int r, s;
#if __CUDA_ARCH__ >= 300
r = vsetge4(a, b);
s = r << 8; // convert bool
r = s - r; // to mask
#else
asm ("not.b32 %0,%0;" : "+r"(b));
r = vavrg4 (a, b); // (a + ~b + 1) / 2 = (a - b) / 2
r = r & 0x80808080; // msb = carry-outs
s = r >> 7; // build mask
s = r - s; // from
r = s | r; // msbs
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetgt4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(b));
c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpgt4(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetgt4(a, b);
c = r << 8; // convert bool
r = c - r; // to mask
#else
asm("not.b32 %0, %0;" : "+r"(b));
c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetne4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
unsigned int c;
r = a ^ b; // 0x00 if a == b
c = r | 0x80808080; // set msbs, to catch carry out
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
c = r | c; // msb = 1, if r was not 0x00
c = c & 0x80808080; // extract msbs
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpne4(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetne4(a, b);
c = r << 8; // convert bool
r = c - r; // to mask
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
r = a ^ b; // 0x00 if a == b
c = r | 0x80808080; // set msbs, to catch carry out
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
c = r | c; // msb = 1, if r was not 0x00
c = c & 0x80808080; // extract msbs
r = c >> 7; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vabsdiff4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s;
s = vcmpge4(a, b); // mask = 0xff if a >= b
r = a ^ b; //
s = (r & s) ^ b; // select a when a >= b, else select b => max(a,b)
r = s ^ r; // select a when b >= a, else select b => min(a,b)
r = s - r; // |a - b| = max(a,b) - min(a,b);
#endif
return r;
}
static __device__ __forceinline__ unsigned int vmax4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s;
s = vcmpge4(a, b); // mask = 0xff if a >= b
r = a & s; // select a when b >= a
s = b & ~s; // select b when b < a
r = r | s; // combine byte selections
#endif
return r; // byte-wise unsigned maximum
}
static __device__ __forceinline__ unsigned int vmin4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s;
s = vcmpge4(b, a); // mask = 0xff if a >= b
r = a & s; // select a when b >= a
s = b & ~s; // select b when b < a
r = r | s; // combine byte selections
#endif
return r;
}
}}}
//! @endcond
#endif // __OPENCV_CUDA_SIMD_FUNCTIONS_HPP__

View File

@ -0,0 +1,75 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_TRANSFORM_HPP__
#define __OPENCV_CUDA_TRANSFORM_HPP__
#include "common.hpp"
#include "utility.hpp"
#include "detail/transform_detail.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template <typename T, typename D, typename UnOp, typename Mask>
static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<UnOp> ft;
transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static inline void transform(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, const Mask& mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<BinOp> ft;
transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
}
}}}
//! @endcond
#endif // __OPENCV_CUDA_TRANSFORM_HPP__

View File

@ -0,0 +1,90 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_TYPE_TRAITS_HPP__
#define __OPENCV_CUDA_TYPE_TRAITS_HPP__
#include "detail/type_traits_detail.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template <typename T> struct IsSimpleParameter
{
enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value ||
type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value};
};
template <typename T> struct TypeTraits
{
typedef typename type_traits_detail::UnConst<T>::type NonConstType;
typedef typename type_traits_detail::UnVolatile<T>::type NonVolatileType;
typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType;
typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type PointeeType;
typedef typename type_traits_detail::ReferenceTraits<T>::type ReferredType;
enum { isConst = type_traits_detail::UnConst<T>::value };
enum { isVolatile = type_traits_detail::UnVolatile<T>::value };
enum { isReference = type_traits_detail::ReferenceTraits<UnqualifiedType>::value };
enum { isPointer = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value };
enum { isUnsignedInt = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value };
enum { isSignedInt = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value };
enum { isIntegral = type_traits_detail::IsIntegral<UnqualifiedType>::value };
enum { isFloat = type_traits_detail::IsFloat<UnqualifiedType>::value };
enum { isArith = isIntegral || isFloat };
enum { isVec = type_traits_detail::IsVec<UnqualifiedType>::value };
typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value,
T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
};
}}}
//! @endcond
#endif // __OPENCV_CUDA_TYPE_TRAITS_HPP__

View File

@ -0,0 +1,221 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_UTILITY_HPP__
#define __OPENCV_CUDA_UTILITY_HPP__
#include "saturate_cast.hpp"
#include "datamov_utils.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
#define OPENCV_CUDA_LOG_WARP_SIZE (5)
#define OPENCV_CUDA_WARP_SIZE (1 << OPENCV_CUDA_LOG_WARP_SIZE)
#define OPENCV_CUDA_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
#define OPENCV_CUDA_MEM_BANKS (1 << OPENCV_CUDA_LOG_MEM_BANKS)
///////////////////////////////////////////////////////////////////////////////
// swap
template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
{
const T temp = a;
a = b;
b = temp;
}
///////////////////////////////////////////////////////////////////////////////
// Mask Reader
struct SingleMask
{
explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {}
__host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){}
__device__ __forceinline__ bool operator()(int y, int x) const
{
return mask.ptr(y)[x] != 0;
}
PtrStepb mask;
};
struct SingleMaskChannels
{
__host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_)
: mask(mask_), channels(channels_) {}
__host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_)
:mask(mask_.mask), channels(mask_.channels){}
__device__ __forceinline__ bool operator()(int y, int x) const
{
return mask.ptr(y)[x / channels] != 0;
}
PtrStepb mask;
int channels;
};
struct MaskCollection
{
explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_)
: maskCollection(maskCollection_) {}
__device__ __forceinline__ MaskCollection(const MaskCollection& masks_)
: maskCollection(masks_.maskCollection), curMask(masks_.curMask){}
__device__ __forceinline__ void next()
{
curMask = *maskCollection++;
}
__device__ __forceinline__ void setMask(int z)
{
curMask = maskCollection[z];
}
__device__ __forceinline__ bool operator()(int y, int x) const
{
uchar val;
return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(y), x, val), (val != 0));
}
const PtrStepb* maskCollection;
PtrStepb curMask;
};
struct WithOutMask
{
__host__ __device__ __forceinline__ WithOutMask(){}
__host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){}
__device__ __forceinline__ void next() const
{
}
__device__ __forceinline__ void setMask(int) const
{
}
__device__ __forceinline__ bool operator()(int, int) const
{
return true;
}
__device__ __forceinline__ bool operator()(int, int, int) const
{
return true;
}
static __device__ __forceinline__ bool check(int, int)
{
return true;
}
static __device__ __forceinline__ bool check(int, int, int)
{
return true;
}
};
///////////////////////////////////////////////////////////////////////////////
// Solve linear system
// solve 2x2 linear system Ax=b
template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
{
T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
if (det != 0)
{
double invdet = 1.0 / det;
x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1]));
x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0]));
return true;
}
return false;
}
// solve 3x3 linear system Ax=b
template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
{
T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
- A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
+ A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
if (det != 0)
{
double invdet = 1.0 / det;
x[0] = saturate_cast<T>(invdet *
(b[0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
A[0][1] * (b[1] * A[2][2] - A[1][2] * b[2] ) +
A[0][2] * (b[1] * A[2][1] - A[1][1] * b[2] )));
x[1] = saturate_cast<T>(invdet *
(A[0][0] * (b[1] * A[2][2] - A[1][2] * b[2] ) -
b[0] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
A[0][2] * (A[1][0] * b[2] - b[1] * A[2][0])));
x[2] = saturate_cast<T>(invdet *
(A[0][0] * (A[1][1] * b[2] - b[1] * A[2][1]) -
A[0][1] * (A[1][0] * b[2] - b[1] * A[2][0]) +
b[0] * (A[1][0] * A[2][1] - A[1][1] * A[2][0])));
return true;
}
return false;
}
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // __OPENCV_CUDA_UTILITY_HPP__

View File

@ -0,0 +1,232 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_VEC_DISTANCE_HPP__
#define __OPENCV_CUDA_VEC_DISTANCE_HPP__
#include "reduce.hpp"
#include "functional.hpp"
#include "detail/vec_distance_detail.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template <typename T> struct L1Dist
{
typedef int value_type;
typedef int result_type;
__device__ __forceinline__ L1Dist() : mySum(0) {}
__device__ __forceinline__ void reduceIter(int val1, int val2)
{
mySum = __sad(val1, val2, mySum);
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
}
__device__ __forceinline__ operator int() const
{
return mySum;
}
int mySum;
};
template <> struct L1Dist<float>
{
typedef float value_type;
typedef float result_type;
__device__ __forceinline__ L1Dist() : mySum(0.0f) {}
__device__ __forceinline__ void reduceIter(float val1, float val2)
{
mySum += ::fabs(val1 - val2);
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
}
__device__ __forceinline__ operator float() const
{
return mySum;
}
float mySum;
};
struct L2Dist
{
typedef float value_type;
typedef float result_type;
__device__ __forceinline__ L2Dist() : mySum(0.0f) {}
__device__ __forceinline__ void reduceIter(float val1, float val2)
{
float reg = val1 - val2;
mySum += reg * reg;
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
}
__device__ __forceinline__ operator float() const
{
return sqrtf(mySum);
}
float mySum;
};
struct HammingDist
{
typedef int value_type;
typedef int result_type;
__device__ __forceinline__ HammingDist() : mySum(0) {}
__device__ __forceinline__ void reduceIter(int val1, int val2)
{
mySum += __popc(val1 ^ val2);
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
}
__device__ __forceinline__ operator int() const
{
return mySum;
}
int mySum;
};
// calc distance between two vectors in global memory
template <int THREAD_DIM, typename Dist, typename T1, typename T2>
__device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
{
for (int i = tid; i < len; i += THREAD_DIM)
{
T1 val1;
ForceGlob<T1>::Load(vec1, i, val1);
T2 val2;
ForceGlob<T2>::Load(vec2, i, val2);
dist.reduceIter(val1, val2);
}
dist.reduceAll<THREAD_DIM>(smem, tid);
}
// calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
__device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
{
vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
dist.reduceAll<THREAD_DIM>(smem, tid);
}
// calc distance between two vectors in global memory
template <int THREAD_DIM, typename T1> struct VecDiffGlobal
{
explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
{
vec1 = vec1_;
}
template <typename T2, typename Dist>
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
{
calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
}
const T1* vec1;
};
// calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
{
template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
{
if (glob_tid < len)
smem[glob_tid] = vec1[glob_tid];
__syncthreads();
U* vec1ValsPtr = vec1Vals;
#pragma unroll
for (int i = tid; i < MAX_LEN; i += THREAD_DIM)
*vec1ValsPtr++ = smem[i];
__syncthreads();
}
template <typename T2, typename Dist>
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
{
calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
}
U vec1Vals[MAX_LEN / THREAD_DIM];
};
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // __OPENCV_CUDA_VEC_DISTANCE_HPP__

View File

@ -0,0 +1,930 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_VECMATH_HPP__
#define __OPENCV_CUDA_VECMATH_HPP__
#include "vec_traits.hpp"
#include "saturate_cast.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
// saturate_cast
namespace vec_math_detail
{
template <int cn, typename VecD> struct SatCastHelper;
template <typename VecD> struct SatCastHelper<1, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x));
}
};
template <typename VecD> struct SatCastHelper<2, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
}
};
template <typename VecD> struct SatCastHelper<3, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
}
};
template <typename VecD> struct SatCastHelper<4, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
}
};
template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v)
{
return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
}
}
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
template<typename T> static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
// unary operators
#define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \
{ \
return VecTraits<output_type ## 1>::make(op (a.x)); \
} \
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \
{ \
return VecTraits<output_type ## 2>::make(op (a.x), op (a.y)); \
} \
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \
{ \
return VecTraits<output_type ## 3>::make(op (a.x), op (a.y), op (a.z)); \
} \
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \
{ \
return VecTraits<output_type ## 4>::make(op (a.x), op (a.y), op (a.z), op (a.w)); \
}
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int)
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint)
#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP
// unary functions
#define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \
{ \
return VecTraits<output_type ## 1>::make(func (a.x)); \
} \
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \
{ \
return VecTraits<output_type ## 2>::make(func (a.x), func (a.y)); \
} \
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \
{ \
return VecTraits<output_type ## 3>::make(func (a.x), func (a.y), func (a.z)); \
} \
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \
{ \
return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \
}
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabs, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double)
#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC
// binary operators (vec & vec)
#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \
{ \
return VecTraits<output_type ## 1>::make(a.x op b.x); \
} \
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \
{ \
return VecTraits<output_type ## 2>::make(a.x op b.x, a.y op b.y); \
} \
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \
{ \
return VecTraits<output_type ## 3>::make(a.x op b.x, a.y op b.y, a.z op b.z); \
} \
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \
{ \
return VecTraits<output_type ## 4>::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \
}
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint)
#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP
// binary operators (vec & scalar)
#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 1>::make(a.x op s); \
} \
__device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \
{ \
return VecTraits<output_type ## 1>::make(s op b.x); \
} \
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 2>::make(a.x op s, a.y op s); \
} \
__device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \
{ \
return VecTraits<output_type ## 2>::make(s op b.x, s op b.y); \
} \
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 3>::make(a.x op s, a.y op s, a.z op s); \
} \
__device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \
{ \
return VecTraits<output_type ## 3>::make(s op b.x, s op b.y, s op b.z); \
} \
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 4>::make(a.x op s, a.y op s, a.z op s, a.w op s); \
} \
__device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \
{ \
return VecTraits<output_type ## 4>::make(s op b.x, s op b.y, s op b.z, s op b.w); \
}
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint)
#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP
// binary function (vec & vec)
#define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \
{ \
return VecTraits<output_type ## 1>::make(func (a.x, b.x)); \
} \
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \
{ \
return VecTraits<output_type ## 2>::make(func (a.x, b.x), func (a.y, b.y)); \
} \
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \
{ \
return VecTraits<output_type ## 3>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \
} \
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \
{ \
return VecTraits<output_type ## 4>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \
}
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float)
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double)
#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC
// binary function (vec & scalar)
#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 1>::make(func ((output_type) a.x, (output_type) s)); \
} \
__device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \
{ \
return VecTraits<output_type ## 1>::make(func ((output_type) s, (output_type) b.x)); \
} \
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 2>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \
} \
__device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \
{ \
return VecTraits<output_type ## 2>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \
} \
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 3>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \
} \
__device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \
{ \
return VecTraits<output_type ## 3>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \
} \
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \
{ \
return VecTraits<output_type ## 4>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \
} \
__device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \
{ \
return VecTraits<output_type ## 4>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \
}
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double)
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)
#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC
}}} // namespace cv { namespace cuda { namespace device
//! @endcond
#endif // __OPENCV_CUDA_VECMATH_HPP__

View File

@ -0,0 +1,288 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_VEC_TRAITS_HPP__
#define __OPENCV_CUDA_VEC_TRAITS_HPP__
#include "common.hpp"
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template<typename T, int N> struct TypeVec;
struct __align__(8) uchar8
{
uchar a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7)
{
uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(8) char8
{
schar a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7)
{
char8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(16) ushort8
{
ushort a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7)
{
ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(16) short8
{
short a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7)
{
short8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(32) uint8
{
uint a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7)
{
uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(32) int8
{
int a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7)
{
int8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(32) float8
{
float a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7)
{
float8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct double8
{
double a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7)
{
double8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
#define OPENCV_CUDA_IMPLEMENT_TYPE_VEC(type) \
template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \
template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \
template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \
template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \
template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \
template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \
template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; }; \
template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \
template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; };
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uchar)
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(char)
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(ushort)
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(short)
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(int)
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uint)
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(float)
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(double)
#undef OPENCV_CUDA_IMPLEMENT_TYPE_VEC
template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
template<> struct TypeVec<schar, 8> { typedef char8 vec_type; };
template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };
template<typename T> struct VecTraits;
#define OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(type) \
template<> struct VecTraits<type> \
{ \
typedef type elem_type; \
enum {cn=1}; \
static __device__ __host__ __forceinline__ type all(type v) {return v;} \
static __device__ __host__ __forceinline__ type make(type x) {return x;} \
static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \
}; \
template<> struct VecTraits<type ## 1> \
{ \
typedef type elem_type; \
enum {cn=1}; \
static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
}; \
template<> struct VecTraits<type ## 2> \
{ \
typedef type elem_type; \
enum {cn=2}; \
static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
}; \
template<> struct VecTraits<type ## 3> \
{ \
typedef type elem_type; \
enum {cn=3}; \
static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
}; \
template<> struct VecTraits<type ## 4> \
{ \
typedef type elem_type; \
enum {cn=4}; \
static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
}; \
template<> struct VecTraits<type ## 8> \
{ \
typedef type elem_type; \
enum {cn=8}; \
static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \
static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \
static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
};
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uchar)
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(ushort)
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(short)
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(int)
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uint)
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(float)
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(double)
#undef OPENCV_CUDA_IMPLEMENT_VEC_TRAITS
template<> struct VecTraits<char>
{
typedef char elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ char all(char v) {return v;}
static __device__ __host__ __forceinline__ char make(char x) {return x;}
static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
};
template<> struct VecTraits<schar>
{
typedef schar elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
};
template<> struct VecTraits<char1>
{
typedef schar elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
};
template<> struct VecTraits<char2>
{
typedef schar elem_type;
enum {cn=2};
static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
};
template<> struct VecTraits<char3>
{
typedef schar elem_type;
enum {cn=3};
static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
};
template<> struct VecTraits<char4>
{
typedef schar elem_type;
enum {cn=4};
static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
};
template<> struct VecTraits<char8>
{
typedef schar elem_type;
enum {cn=8};
static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
};
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif // __OPENCV_CUDA_VEC_TRAITS_HPP__

View File

@ -0,0 +1,139 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_DEVICE_WARP_HPP__
#define __OPENCV_CUDA_DEVICE_WARP_HPP__
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
struct Warp
{
enum
{
LOG_WARP_SIZE = 5,
WARP_SIZE = 1 << LOG_WARP_SIZE,
STRIDE = WARP_SIZE
};
/** \brief Returns the warp lane ID of the calling thread. */
static __device__ __forceinline__ unsigned int laneId()
{
unsigned int ret;
asm("mov.u32 %0, %laneid;" : "=r"(ret) );
return ret;
}
template<typename It, typename T>
static __device__ __forceinline__ void fill(It beg, It end, const T& value)
{
for(It t = beg + laneId(); t < end; t += STRIDE)
*t = value;
}
template<typename InIt, typename OutIt>
static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
{
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
*out = *t;
return out;
}
template<typename InIt, typename OutIt, class UnOp>
static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
{
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
*out = op(*t);
return out;
}
template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
{
unsigned int lane = laneId();
InIt1 t1 = beg1 + lane;
InIt2 t2 = beg2 + lane;
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
*out = op(*t1, *t2);
return out;
}
template <class T, class BinOp>
static __device__ __forceinline__ T reduce(volatile T *ptr, BinOp op)
{
const unsigned int lane = laneId();
if (lane < 16)
{
T partial = ptr[lane];
ptr[lane] = partial = op(partial, ptr[lane + 16]);
ptr[lane] = partial = op(partial, ptr[lane + 8]);
ptr[lane] = partial = op(partial, ptr[lane + 4]);
ptr[lane] = partial = op(partial, ptr[lane + 2]);
ptr[lane] = partial = op(partial, ptr[lane + 1]);
}
return *ptr;
}
template<typename OutIt, typename T>
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
{
unsigned int lane = laneId();
value += lane;
for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
*t = value;
}
};
}}} // namespace cv { namespace cuda { namespace cudev
//! @endcond
#endif /* __OPENCV_CUDA_DEVICE_WARP_HPP__ */

View File

@ -0,0 +1,76 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_CUDA_WARP_REDUCE_HPP__
#define OPENCV_CUDA_WARP_REDUCE_HPP__
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template <class T>
__device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
{
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
if (lane < 16)
{
T partial = ptr[tid];
ptr[tid] = partial = partial + ptr[tid + 16];
ptr[tid] = partial = partial + ptr[tid + 8];
ptr[tid] = partial = partial + ptr[tid + 4];
ptr[tid] = partial = partial + ptr[tid + 2];
ptr[tid] = partial = partial + ptr[tid + 1];
}
return ptr[tid - lane];
}
}}} // namespace cv { namespace cuda { namespace cudev {
//! @endcond
#endif /* OPENCV_CUDA_WARP_REDUCE_HPP__ */

View File

@ -0,0 +1,153 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_WARP_SHUFFLE_HPP__
#define __OPENCV_CUDA_WARP_SHUFFLE_HPP__
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
namespace cv { namespace cuda { namespace device
{
template <typename T>
__device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
{
#if __CUDA_ARCH__ >= 300
return __shfl(val, srcLane, width);
#else
return T();
#endif
}
__device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize)
{
#if __CUDA_ARCH__ >= 300
return (unsigned int) __shfl((int) val, srcLane, width);
#else
return 0;
#endif
}
__device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize)
{
#if __CUDA_ARCH__ >= 300
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl(lo, srcLane, width);
hi = __shfl(hi, srcLane, width);
return __hiloint2double(hi, lo);
#else
return 0.0;
#endif
}
template <typename T>
__device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize)
{
#if __CUDA_ARCH__ >= 300
return __shfl_down(val, delta, width);
#else
return T();
#endif
}
__device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize)
{
#if __CUDA_ARCH__ >= 300
return (unsigned int) __shfl_down((int) val, delta, width);
#else
return 0;
#endif
}
__device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize)
{
#if __CUDA_ARCH__ >= 300
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl_down(lo, delta, width);
hi = __shfl_down(hi, delta, width);
return __hiloint2double(hi, lo);
#else
return 0.0;
#endif
}
template <typename T>
__device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize)
{
#if __CUDA_ARCH__ >= 300
return __shfl_up(val, delta, width);
#else
return T();
#endif
}
__device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize)
{
#if __CUDA_ARCH__ >= 300
return (unsigned int) __shfl_up((int) val, delta, width);
#else
return 0;
#endif
}
__device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize)
{
#if __CUDA_ARCH__ >= 300
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl_up(lo, delta, width);
hi = __shfl_up(hi, delta, width);
return __hiloint2double(hi, lo);
#else
return 0.0;
#endif
}
}}}
//! @endcond
#endif // __OPENCV_CUDA_WARP_SHUFFLE_HPP__

View File

@ -0,0 +1,87 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP__
#define __OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP__
#ifndef __cplusplus
# error cuda_stream_accessor.hpp header must be compiled as C++
#endif
/** @file cuda_stream_accessor.hpp
* This is only header file that depends on CUDA Runtime API. All other headers are independent.
*/
#include <cuda_runtime.h>
#include "opencv2/core/cvdef.h"
namespace cv
{
namespace cuda
{
//! @addtogroup cudacore_struct
//! @{
class Stream;
class Event;
/** @brief Class that enables getting cudaStream_t from cuda::Stream
*/
struct StreamAccessor
{
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
};
/** @brief Class that enables getting cudaEvent_t from cuda::Event
*/
struct EventAccessor
{
CV_EXPORTS static cudaEvent_t getEvent(const Event& event);
};
//! @}
}
}
#endif /* __OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP__ */

View File

@ -0,0 +1,135 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_CUDA_TYPES_HPP__
#define __OPENCV_CORE_CUDA_TYPES_HPP__
#ifndef __cplusplus
# error cuda_types.hpp header must be compiled as C++
#endif
/** @file
* @deprecated Use @ref cudev instead.
*/
//! @cond IGNORED
#ifdef __CUDACC__
#define __CV_CUDA_HOST_DEVICE__ __host__ __device__ __forceinline__
#else
#define __CV_CUDA_HOST_DEVICE__
#endif
namespace cv
{
namespace cuda
{
// Simple lightweight structures that encapsulates information about an image on device.
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
template <typename T> struct DevPtr
{
typedef T elem_type;
typedef int index_type;
enum { elem_size = sizeof(elem_type) };
T* data;
__CV_CUDA_HOST_DEVICE__ DevPtr() : data(0) {}
__CV_CUDA_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
__CV_CUDA_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
__CV_CUDA_HOST_DEVICE__ operator T*() { return data; }
__CV_CUDA_HOST_DEVICE__ operator const T*() const { return data; }
};
template <typename T> struct PtrSz : public DevPtr<T>
{
__CV_CUDA_HOST_DEVICE__ PtrSz() : size(0) {}
__CV_CUDA_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
size_t size;
};
template <typename T> struct PtrStep : public DevPtr<T>
{
__CV_CUDA_HOST_DEVICE__ PtrStep() : step(0) {}
__CV_CUDA_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
size_t step;
__CV_CUDA_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr<T>::data + y * step); }
__CV_CUDA_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
__CV_CUDA_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
};
template <typename T> struct PtrStepSz : public PtrStep<T>
{
__CV_CUDA_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
__CV_CUDA_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
: PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
template <typename U>
explicit PtrStepSz(const PtrStepSz<U>& d) : PtrStep<T>((T*)d.data, d.step), cols(d.cols), rows(d.rows){}
int cols;
int rows;
};
typedef PtrStepSz<unsigned char> PtrStepSzb;
typedef PtrStepSz<float> PtrStepSzf;
typedef PtrStepSz<int> PtrStepSzi;
typedef PtrStep<unsigned char> PtrStepb;
typedef PtrStep<float> PtrStepf;
typedef PtrStep<int> PtrStepi;
}
}
//! @endcond
#endif /* __OPENCV_CORE_CUDA_TYPES_HPP__ */

231
3rdparty/include/opencv2/core/cvdef.h vendored Normal file
View File

@ -0,0 +1,231 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_CVDEF_H__
#define __OPENCV_CORE_CVDEF_H__
#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
#endif
// undef problematic defines sometimes defined by system headers (windows.h in particular)
#undef small
#undef min
#undef max
#undef abs
#undef Complex
#include "opencv2/hal/defs.h"
#ifdef __OPENCV_BUILD
# define DISABLE_OPENCV_24_COMPATIBILITY
#endif
#if (defined WIN32 || defined _WIN32 || defined WINCE || defined __CYGWIN__) && defined CVAPI_EXPORTS
# define CV_EXPORTS __declspec(dllexport)
#elif defined __GNUC__ && __GNUC__ >= 4
# define CV_EXPORTS __attribute__ ((visibility ("default")))
#else
# define CV_EXPORTS
#endif
#ifndef CV_EXTERN_C
# ifdef __cplusplus
# define CV_EXTERN_C extern "C"
# else
# define CV_EXTERN_C
# endif
#endif
/* special informative macros for wrapper generators */
#define CV_EXPORTS_W CV_EXPORTS
#define CV_EXPORTS_W_SIMPLE CV_EXPORTS
#define CV_EXPORTS_AS(synonym) CV_EXPORTS
#define CV_EXPORTS_W_MAP CV_EXPORTS
#define CV_IN_OUT
#define CV_OUT
#define CV_PROP
#define CV_PROP_RW
#define CV_WRAP
#define CV_WRAP_AS(synonym)
/****************************************************************************************\
* Matrix type (Mat) *
\****************************************************************************************/
#define CV_CN_MAX 512
#define CV_CN_SHIFT 3
#define CV_DEPTH_MAX (1 << CV_CN_SHIFT)
#define CV_8U 0
#define CV_8S 1
#define CV_16U 2
#define CV_16S 3
#define CV_32S 4
#define CV_32F 5
#define CV_64F 6
#define CV_USRTYPE1 7
#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1)
#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK)
#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT))
#define CV_MAKE_TYPE CV_MAKETYPE
#define CV_8UC1 CV_MAKETYPE(CV_8U,1)
#define CV_8UC2 CV_MAKETYPE(CV_8U,2)
#define CV_8UC3 CV_MAKETYPE(CV_8U,3)
#define CV_8UC4 CV_MAKETYPE(CV_8U,4)
#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n))
#define CV_8SC1 CV_MAKETYPE(CV_8S,1)
#define CV_8SC2 CV_MAKETYPE(CV_8S,2)
#define CV_8SC3 CV_MAKETYPE(CV_8S,3)
#define CV_8SC4 CV_MAKETYPE(CV_8S,4)
#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n))
#define CV_16UC1 CV_MAKETYPE(CV_16U,1)
#define CV_16UC2 CV_MAKETYPE(CV_16U,2)
#define CV_16UC3 CV_MAKETYPE(CV_16U,3)
#define CV_16UC4 CV_MAKETYPE(CV_16U,4)
#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n))
#define CV_16SC1 CV_MAKETYPE(CV_16S,1)
#define CV_16SC2 CV_MAKETYPE(CV_16S,2)
#define CV_16SC3 CV_MAKETYPE(CV_16S,3)
#define CV_16SC4 CV_MAKETYPE(CV_16S,4)
#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n))
#define CV_32SC1 CV_MAKETYPE(CV_32S,1)
#define CV_32SC2 CV_MAKETYPE(CV_32S,2)
#define CV_32SC3 CV_MAKETYPE(CV_32S,3)
#define CV_32SC4 CV_MAKETYPE(CV_32S,4)
#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n))
#define CV_32FC1 CV_MAKETYPE(CV_32F,1)
#define CV_32FC2 CV_MAKETYPE(CV_32F,2)
#define CV_32FC3 CV_MAKETYPE(CV_32F,3)
#define CV_32FC4 CV_MAKETYPE(CV_32F,4)
#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n))
#define CV_64FC1 CV_MAKETYPE(CV_64F,1)
#define CV_64FC2 CV_MAKETYPE(CV_64F,2)
#define CV_64FC3 CV_MAKETYPE(CV_64F,3)
#define CV_64FC4 CV_MAKETYPE(CV_64F,4)
#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n))
#define CV_MAT_CN_MASK ((CV_CN_MAX - 1) << CV_CN_SHIFT)
#define CV_MAT_CN(flags) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1)
#define CV_MAT_TYPE_MASK (CV_DEPTH_MAX*CV_CN_MAX - 1)
#define CV_MAT_TYPE(flags) ((flags) & CV_MAT_TYPE_MASK)
#define CV_MAT_CONT_FLAG_SHIFT 14
#define CV_MAT_CONT_FLAG (1 << CV_MAT_CONT_FLAG_SHIFT)
#define CV_IS_MAT_CONT(flags) ((flags) & CV_MAT_CONT_FLAG)
#define CV_IS_CONT_MAT CV_IS_MAT_CONT
#define CV_SUBMAT_FLAG_SHIFT 15
#define CV_SUBMAT_FLAG (1 << CV_SUBMAT_FLAG_SHIFT)
#define CV_IS_SUBMAT(flags) ((flags) & CV_MAT_SUBMAT_FLAG)
/* Size of each channel item,
0x124489 = 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
#define CV_ELEM_SIZE1(type) \
((((sizeof(size_t)<<28)|0x8442211) >> CV_MAT_DEPTH(type)*4) & 15)
/* 0x3a50 = 11 10 10 01 01 00 00 ~ array of log2(sizeof(arr_type_elem)) */
#define CV_ELEM_SIZE(type) \
(CV_MAT_CN(type) << ((((sizeof(size_t)/4+1)*16384|0x3a50) >> CV_MAT_DEPTH(type)*2) & 3))
#ifndef MIN
# define MIN(a,b) ((a) > (b) ? (b) : (a))
#endif
#ifndef MAX
# define MAX(a,b) ((a) < (b) ? (b) : (a))
#endif
/****************************************************************************************\
* exchange-add operation for atomic operations on reference counters *
\****************************************************************************************/
#if defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32)
// atomic increment on the linux version of the Intel(tm) compiler
# define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast<void*>(reinterpret_cast<volatile void*>(addr)), delta)
#elif defined __GNUC__
# if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__)
# ifdef __ATOMIC_ACQ_REL
# define CV_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
# else
# define CV_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4)
# endif
# else
# if defined __ATOMIC_ACQ_REL && !defined __clang__
// version for gcc >= 4.7
# define CV_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL)
# else
# define CV_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta))
# endif
# endif
#elif defined _MSC_VER && !defined RC_INVOKED
# include <intrin.h>
# define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
#else
CV_INLINE CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; }
#endif
/****************************************************************************************\
* CV_NORETURN attribute *
\****************************************************************************************/
#ifndef CV_NORETURN
# if defined(__GNUC__)
# define CV_NORETURN __attribute__((__noreturn__))
# elif defined(_MSC_VER) && (_MSC_VER >= 1300)
# define CV_NORETURN __declspec(noreturn)
# else
# define CV_NORETURN /* nothing by default */
# endif
#endif
#endif // __OPENCV_CORE_CVDEF_H__

1059
3rdparty/include/opencv2/core/cvstd.hpp vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,267 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_CVSTDINL_HPP__
#define __OPENCV_CORE_CVSTDINL_HPP__
#ifndef OPENCV_NOSTL
# include <complex>
# include <ostream>
#endif
//! @cond IGNORED
namespace cv
{
#ifndef OPENCV_NOSTL
template<typename _Tp> class DataType< std::complex<_Tp> >
{
public:
typedef std::complex<_Tp> value_type;
typedef value_type work_type;
typedef _Tp channel_type;
enum { generic_type = 0,
depth = DataType<channel_type>::depth,
channels = 2,
fmt = DataType<channel_type>::fmt + ((channels - 1) << 8),
type = CV_MAKETYPE(depth, channels) };
typedef Vec<channel_type, channels> vec_type;
};
inline
String::String(const std::string& str)
: cstr_(0), len_(0)
{
if (!str.empty())
{
size_t len = str.size();
memcpy(allocate(len), str.c_str(), len);
}
}
inline
String::String(const std::string& str, size_t pos, size_t len)
: cstr_(0), len_(0)
{
size_t strlen = str.size();
pos = max(pos, strlen);
len = min(strlen - pos, len);
if (!len) return;
memcpy(allocate(len), str.c_str() + pos, len);
}
inline
String& String::operator = (const std::string& str)
{
deallocate();
if (!str.empty())
{
size_t len = str.size();
memcpy(allocate(len), str.c_str(), len);
}
return *this;
}
inline
String& String::operator += (const std::string& str)
{
*this = *this + str;
return *this;
}
inline
String::operator std::string() const
{
return std::string(cstr_, len_);
}
inline
String operator + (const String& lhs, const std::string& rhs)
{
String s;
size_t rhslen = rhs.size();
s.allocate(lhs.len_ + rhslen);
memcpy(s.cstr_, lhs.cstr_, lhs.len_);
memcpy(s.cstr_ + lhs.len_, rhs.c_str(), rhslen);
return s;
}
inline
String operator + (const std::string& lhs, const String& rhs)
{
String s;
size_t lhslen = lhs.size();
s.allocate(lhslen + rhs.len_);
memcpy(s.cstr_, lhs.c_str(), lhslen);
memcpy(s.cstr_ + lhslen, rhs.cstr_, rhs.len_);
return s;
}
inline
FileNode::operator std::string() const
{
String value;
read(*this, value, value);
return value;
}
template<> inline
void operator >> (const FileNode& n, std::string& value)
{
String val;
read(n, val, val);
value = val;
}
template<> inline
FileStorage& operator << (FileStorage& fs, const std::string& value)
{
return fs << cv::String(value);
}
static inline
std::ostream& operator << (std::ostream& os, const String& str)
{
return os << str.c_str();
}
static inline
std::ostream& operator << (std::ostream& out, Ptr<Formatted> fmtd)
{
fmtd->reset();
for(const char* str = fmtd->next(); str; str = fmtd->next())
out << str;
return out;
}
static inline
std::ostream& operator << (std::ostream& out, const Mat& mtx)
{
return out << Formatter::get()->format(mtx);
}
template<typename _Tp> static inline
std::ostream& operator << (std::ostream& out, const std::vector<Point_<_Tp> >& vec)
{
return out << Formatter::get()->format(Mat(vec));
}
template<typename _Tp> static inline
std::ostream& operator << (std::ostream& out, const std::vector<Point3_<_Tp> >& vec)
{
return out << Formatter::get()->format(Mat(vec));
}
template<typename _Tp, int m, int n> static inline
std::ostream& operator << (std::ostream& out, const Matx<_Tp, m, n>& matx)
{
return out << Formatter::get()->format(Mat(matx));
}
template<typename _Tp> static inline
std::ostream& operator << (std::ostream& out, const Point_<_Tp>& p)
{
out << "[" << p.x << ", " << p.y << "]";
return out;
}
template<typename _Tp> static inline
std::ostream& operator << (std::ostream& out, const Point3_<_Tp>& p)
{
out << "[" << p.x << ", " << p.y << ", " << p.z << "]";
return out;
}
template<typename _Tp, int n> static inline
std::ostream& operator << (std::ostream& out, const Vec<_Tp, n>& vec)
{
out << "[";
#ifdef _MSC_VER
#pragma warning( push )
#pragma warning( disable: 4127 )
#endif
if(Vec<_Tp, n>::depth < CV_32F)
#ifdef _MSC_VER
#pragma warning( pop )
#endif
{
for (int i = 0; i < n - 1; ++i) {
out << (int)vec[i] << ", ";
}
out << (int)vec[n-1] << "]";
}
else
{
for (int i = 0; i < n - 1; ++i) {
out << vec[i] << ", ";
}
out << vec[n-1] << "]";
}
return out;
}
template<typename _Tp> static inline
std::ostream& operator << (std::ostream& out, const Size_<_Tp>& size)
{
return out << "[" << size.width << " x " << size.height << "]";
}
template<typename _Tp> static inline
std::ostream& operator << (std::ostream& out, const Rect_<_Tp>& rect)
{
return out << "[" << rect.width << " x " << rect.height << " from (" << rect.x << ", " << rect.y << ")]";
}
#endif // OPENCV_NOSTL
} // cv
//! @endcond
#endif // __OPENCV_CORE_CVSTDINL_HPP__

View File

@ -0,0 +1,105 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the copyright holders or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_DIRECTX_HPP__
#define __OPENCV_CORE_DIRECTX_HPP__
#include "mat.hpp"
#include "ocl.hpp"
#if !defined(__d3d11_h__)
struct ID3D11Device;
struct ID3D11Texture2D;
#endif
#if !defined(__d3d10_h__)
struct ID3D10Device;
struct ID3D10Texture2D;
#endif
#if !defined(_D3D9_H_)
struct IDirect3DDevice9;
struct IDirect3DDevice9Ex;
struct IDirect3DSurface9;
#endif
namespace cv { namespace directx {
namespace ocl {
using namespace cv::ocl;
//! @addtogroup core_directx
//! @{
// TODO static functions in the Context class
CV_EXPORTS Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device);
CV_EXPORTS Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device);
CV_EXPORTS Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDevice9Ex);
CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9);
//! @}
} // namespace cv::directx::ocl
//! @addtogroup core_directx
//! @{
CV_EXPORTS void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D);
CV_EXPORTS void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst);
CV_EXPORTS void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D);
CV_EXPORTS void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst);
CV_EXPORTS void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurface9, void* surfaceSharedHandle = NULL);
CV_EXPORTS void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArray dst, void* surfaceSharedHandle = NULL);
// Get OpenCV type from DirectX type, return -1 if there is no equivalent
CV_EXPORTS int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT); // enum DXGI_FORMAT for D3D10/D3D11
// Get OpenCV type from DirectX type, return -1 if there is no equivalent
CV_EXPORTS int getTypeFromD3DFORMAT(const int iD3DFORMAT); // enum D3DTYPE for D3D9
//! @}
} } // namespace cv::directx
#endif // __OPENCV_CORE_DIRECTX_HPP__

280
3rdparty/include/opencv2/core/eigen.hpp vendored Normal file
View File

@ -0,0 +1,280 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_EIGEN_HPP__
#define __OPENCV_CORE_EIGEN_HPP__
#include "opencv2/core.hpp"
#if defined _MSC_VER && _MSC_VER >= 1200
#pragma warning( disable: 4714 ) //__forceinline is not inlined
#pragma warning( disable: 4127 ) //conditional expression is constant
#pragma warning( disable: 4244 ) //conversion from '__int64' to 'int', possible loss of data
#endif
namespace cv
{
//! @addtogroup core_eigen
//! @{
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, Mat& dst )
{
if( !(src.Flags & Eigen::RowMajorBit) )
{
Mat _src(src.cols(), src.rows(), DataType<_Tp>::type,
(void*)src.data(), src.stride()*sizeof(_Tp));
transpose(_src, dst);
}
else
{
Mat _src(src.rows(), src.cols(), DataType<_Tp>::type,
(void*)src.data(), src.stride()*sizeof(_Tp));
_src.copyTo(dst);
}
}
// Matx case
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src,
Matx<_Tp, _rows, _cols>& dst )
{
if( !(src.Flags & Eigen::RowMajorBit) )
{
dst = Matx<_Tp, _cols, _rows>(static_cast<const _Tp*>(src.data())).t();
}
else
{
dst = Matx<_Tp, _rows, _cols>(static_cast<const _Tp*>(src.data()));
}
}
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
void cv2eigen( const Mat& src,
Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
{
CV_DbgAssert(src.rows == _rows && src.cols == _cols);
if( !(dst.Flags & Eigen::RowMajorBit) )
{
const Mat _dst(src.cols, src.rows, DataType<_Tp>::type,
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
if( src.type() == _dst.type() )
transpose(src, _dst);
else if( src.cols == src.rows )
{
src.convertTo(_dst, _dst.type());
transpose(_dst, _dst);
}
else
Mat(src.t()).convertTo(_dst, _dst.type());
}
else
{
const Mat _dst(src.rows, src.cols, DataType<_Tp>::type,
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
src.convertTo(_dst, _dst.type());
}
}
// Matx case
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
{
if( !(dst.Flags & Eigen::RowMajorBit) )
{
const Mat _dst(_cols, _rows, DataType<_Tp>::type,
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
transpose(src, _dst);
}
else
{
const Mat _dst(_rows, _cols, DataType<_Tp>::type,
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
Mat(src).copyTo(_dst);
}
}
template<typename _Tp> static inline
void cv2eigen( const Mat& src,
Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
{
dst.resize(src.rows, src.cols);
if( !(dst.Flags & Eigen::RowMajorBit) )
{
const Mat _dst(src.cols, src.rows, DataType<_Tp>::type,
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
if( src.type() == _dst.type() )
transpose(src, _dst);
else if( src.cols == src.rows )
{
src.convertTo(_dst, _dst.type());
transpose(_dst, _dst);
}
else
Mat(src.t()).convertTo(_dst, _dst.type());
}
else
{
const Mat _dst(src.rows, src.cols, DataType<_Tp>::type,
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
src.convertTo(_dst, _dst.type());
}
}
// Matx case
template<typename _Tp, int _rows, int _cols> static inline
void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
{
dst.resize(_rows, _cols);
if( !(dst.Flags & Eigen::RowMajorBit) )
{
const Mat _dst(_cols, _rows, DataType<_Tp>::type,
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
transpose(src, _dst);
}
else
{
const Mat _dst(_rows, _cols, DataType<_Tp>::type,
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
Mat(src).copyTo(_dst);
}
}
template<typename _Tp> static inline
void cv2eigen( const Mat& src,
Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
{
CV_Assert(src.cols == 1);
dst.resize(src.rows);
if( !(dst.Flags & Eigen::RowMajorBit) )
{
const Mat _dst(src.cols, src.rows, DataType<_Tp>::type,
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
if( src.type() == _dst.type() )
transpose(src, _dst);
else
Mat(src.t()).convertTo(_dst, _dst.type());
}
else
{
const Mat _dst(src.rows, src.cols, DataType<_Tp>::type,
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
src.convertTo(_dst, _dst.type());
}
}
// Matx case
template<typename _Tp, int _rows> static inline
void cv2eigen( const Matx<_Tp, _rows, 1>& src,
Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
{
dst.resize(_rows);
if( !(dst.Flags & Eigen::RowMajorBit) )
{
const Mat _dst(1, _rows, DataType<_Tp>::type,
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
transpose(src, _dst);
}
else
{
const Mat _dst(_rows, 1, DataType<_Tp>::type,
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
src.copyTo(_dst);
}
}
template<typename _Tp> static inline
void cv2eigen( const Mat& src,
Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
{
CV_Assert(src.rows == 1);
dst.resize(src.cols);
if( !(dst.Flags & Eigen::RowMajorBit) )
{
const Mat _dst(src.cols, src.rows, DataType<_Tp>::type,
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
if( src.type() == _dst.type() )
transpose(src, _dst);
else
Mat(src.t()).convertTo(_dst, _dst.type());
}
else
{
const Mat _dst(src.rows, src.cols, DataType<_Tp>::type,
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
src.convertTo(_dst, _dst.type());
}
}
//Matx
template<typename _Tp, int _cols> static inline
void cv2eigen( const Matx<_Tp, 1, _cols>& src,
Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
{
dst.resize(_cols);
if( !(dst.Flags & Eigen::RowMajorBit) )
{
const Mat _dst(_cols, 1, DataType<_Tp>::type,
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
transpose(src, _dst);
}
else
{
const Mat _dst(1, _cols, DataType<_Tp>::type,
dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
Mat(src).copyTo(_dst);
}
}
//! @}
} // cv
#endif

View File

@ -0,0 +1,195 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2015, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_IPPASYNC_HPP__
#define __OPENCV_CORE_IPPASYNC_HPP__
#ifdef HAVE_IPP_A
#include "opencv2/core.hpp"
#include <ipp_async_op.h>
#include <ipp_async_accel.h>
namespace cv
{
namespace hpp
{
/** @addtogroup core_ipp
This section describes conversion between OpenCV and [Intel&reg; IPP Asynchronous
C/C++](http://software.intel.com/en-us/intel-ipp-preview) library. [Getting Started
Guide](http://registrationcenter.intel.com/irc_nas/3727/ipp_async_get_started.htm) help you to
install the library, configure header and library build paths.
*/
//! @{
//! convert OpenCV data type to hppDataType
inline int toHppType(const int cvType)
{
int depth = CV_MAT_DEPTH(cvType);
int hppType = depth == CV_8U ? HPP_DATA_TYPE_8U :
depth == CV_16U ? HPP_DATA_TYPE_16U :
depth == CV_16S ? HPP_DATA_TYPE_16S :
depth == CV_32S ? HPP_DATA_TYPE_32S :
depth == CV_32F ? HPP_DATA_TYPE_32F :
depth == CV_64F ? HPP_DATA_TYPE_64F : -1;
CV_Assert( hppType >= 0 );
return hppType;
}
//! convert hppDataType to OpenCV data type
inline int toCvType(const int hppType)
{
int cvType = hppType == HPP_DATA_TYPE_8U ? CV_8U :
hppType == HPP_DATA_TYPE_16U ? CV_16U :
hppType == HPP_DATA_TYPE_16S ? CV_16S :
hppType == HPP_DATA_TYPE_32S ? CV_32S :
hppType == HPP_DATA_TYPE_32F ? CV_32F :
hppType == HPP_DATA_TYPE_64F ? CV_64F : -1;
CV_Assert( cvType >= 0 );
return cvType;
}
/** @brief Convert hppiMatrix to Mat.
This function allocates and initializes new matrix (if needed) that has the same size and type as
input matrix. Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
@param src input hppiMatrix.
@param dst output matrix.
@param accel accelerator instance (see hpp::getHpp for the list of acceleration framework types).
@param cn number of channels.
*/
inline void copyHppToMat(hppiMatrix* src, Mat& dst, hppAccel accel, int cn)
{
hppDataType type;
hpp32u width, height;
hppStatus sts;
if (src == NULL)
return dst.release();
sts = hppiInquireMatrix(src, &type, &width, &height);
CV_Assert( sts == HPP_STATUS_NO_ERROR);
int matType = CV_MAKETYPE(toCvType(type), cn);
CV_Assert(width%cn == 0);
width /= cn;
dst.create((int)height, (int)width, (int)matType);
size_t newSize = (size_t)(height*(hpp32u)(dst.step));
sts = hppiGetMatrixData(accel,src,(hpp32u)(dst.step),dst.data,&newSize);
CV_Assert( sts == HPP_STATUS_NO_ERROR);
}
/** @brief Create Mat from hppiMatrix.
This function allocates and initializes the Mat that has the same size and type as input matrix.
Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
@param src input hppiMatrix.
@param accel accelerator instance (see hpp::getHpp for the list of acceleration framework types).
@param cn number of channels.
@sa howToUseIPPAconversion, hpp::copyHppToMat, hpp::getHpp.
*/
inline Mat getMat(hppiMatrix* src, hppAccel accel, int cn)
{
Mat dst;
copyHppToMat(src, dst, accel, cn);
return dst;
}
/** @brief Create hppiMatrix from Mat.
This function allocates and initializes the hppiMatrix that has the same size and type as input
matrix, returns the hppiMatrix*.
If you want to use zero-copy for GPU you should to have 4KB aligned matrix data. See details
[hppiCreateSharedMatrix](http://software.intel.com/ru-ru/node/501697).
Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
@note The hppiMatrix pointer to the image buffer in system memory refers to the src.data. Control
the lifetime of the matrix and don't change its data, if there is no special need.
@param src input matrix.
@param accel accelerator instance. Supports type:
- **HPP_ACCEL_TYPE_CPU** - accelerated by optimized CPU instructions.
- **HPP_ACCEL_TYPE_GPU** - accelerated by GPU programmable units or fixed-function
accelerators.
- **HPP_ACCEL_TYPE_ANY** - any acceleration or no acceleration available.
@sa howToUseIPPAconversion, hpp::getMat
*/
inline hppiMatrix* getHpp(const Mat& src, hppAccel accel)
{
int htype = toHppType(src.type());
int cn = src.channels();
CV_Assert(src.data);
hppAccelType accelType = hppQueryAccelType(accel);
if (accelType!=HPP_ACCEL_TYPE_CPU)
{
hpp32u pitch, size;
hppQueryMatrixAllocParams(accel, src.cols*cn, src.rows, htype, &pitch, &size);
if (pitch!=0 && size!=0)
if ((int)(src.data)%4096==0 && pitch==(hpp32u)(src.step))
{
return hppiCreateSharedMatrix(htype, src.cols*cn, src.rows, src.data, pitch, size);
}
}
return hppiCreateMatrix(htype, src.cols*cn, src.rows, src.data, (hpp32s)(src.step));;
}
//! @}
}}
#endif
#endif

3398
3rdparty/include/opencv2/core/mat.hpp vendored Normal file

File diff suppressed because it is too large Load Diff

3436
3rdparty/include/opencv2/core/mat.inl.hpp vendored Normal file

File diff suppressed because it is too large Load Diff

1376
3rdparty/include/opencv2/core/matx.hpp vendored Normal file

File diff suppressed because it is too large Load Diff

691
3rdparty/include/opencv2/core/ocl.hpp vendored Normal file
View File

@ -0,0 +1,691 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the OpenCV Foundation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_OPENCL_HPP__
#define __OPENCV_OPENCL_HPP__
#include "opencv2/core.hpp"
namespace cv { namespace ocl {
//! @addtogroup core_opencl
//! @{
CV_EXPORTS_W bool haveOpenCL();
CV_EXPORTS_W bool useOpenCL();
CV_EXPORTS_W bool haveAmdBlas();
CV_EXPORTS_W bool haveAmdFft();
CV_EXPORTS_W void setUseOpenCL(bool flag);
CV_EXPORTS_W void finish();
CV_EXPORTS bool haveSVM();
class CV_EXPORTS Context;
class CV_EXPORTS Device;
class CV_EXPORTS Kernel;
class CV_EXPORTS Program;
class CV_EXPORTS ProgramSource;
class CV_EXPORTS Queue;
class CV_EXPORTS PlatformInfo;
class CV_EXPORTS Image2D;
class CV_EXPORTS Device
{
public:
Device();
explicit Device(void* d);
Device(const Device& d);
Device& operator = (const Device& d);
~Device();
void set(void* d);
enum
{
TYPE_DEFAULT = (1 << 0),
TYPE_CPU = (1 << 1),
TYPE_GPU = (1 << 2),
TYPE_ACCELERATOR = (1 << 3),
TYPE_DGPU = TYPE_GPU + (1 << 16),
TYPE_IGPU = TYPE_GPU + (1 << 17),
TYPE_ALL = 0xFFFFFFFF
};
String name() const;
String extensions() const;
String version() const;
String vendorName() const;
String OpenCL_C_Version() const;
String OpenCLVersion() const;
int deviceVersionMajor() const;
int deviceVersionMinor() const;
String driverVersion() const;
void* ptr() const;
int type() const;
int addressBits() const;
bool available() const;
bool compilerAvailable() const;
bool linkerAvailable() const;
enum
{
FP_DENORM=(1 << 0),
FP_INF_NAN=(1 << 1),
FP_ROUND_TO_NEAREST=(1 << 2),
FP_ROUND_TO_ZERO=(1 << 3),
FP_ROUND_TO_INF=(1 << 4),
FP_FMA=(1 << 5),
FP_SOFT_FLOAT=(1 << 6),
FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7)
};
int doubleFPConfig() const;
int singleFPConfig() const;
int halfFPConfig() const;
bool endianLittle() const;
bool errorCorrectionSupport() const;
enum
{
EXEC_KERNEL=(1 << 0),
EXEC_NATIVE_KERNEL=(1 << 1)
};
int executionCapabilities() const;
size_t globalMemCacheSize() const;
enum
{
NO_CACHE=0,
READ_ONLY_CACHE=1,
READ_WRITE_CACHE=2
};
int globalMemCacheType() const;
int globalMemCacheLineSize() const;
size_t globalMemSize() const;
size_t localMemSize() const;
enum
{
NO_LOCAL_MEM=0,
LOCAL_IS_LOCAL=1,
LOCAL_IS_GLOBAL=2
};
int localMemType() const;
bool hostUnifiedMemory() const;
bool imageSupport() const;
bool imageFromBufferSupport() const;
uint imagePitchAlignment() const;
uint imageBaseAddressAlignment() const;
size_t image2DMaxWidth() const;
size_t image2DMaxHeight() const;
size_t image3DMaxWidth() const;
size_t image3DMaxHeight() const;
size_t image3DMaxDepth() const;
size_t imageMaxBufferSize() const;
size_t imageMaxArraySize() const;
enum
{
UNKNOWN_VENDOR=0,
VENDOR_AMD=1,
VENDOR_INTEL=2,
VENDOR_NVIDIA=3
};
int vendorID() const;
// FIXIT
// dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform.
// This method should use platform name instead of vendor name.
// After fix restore code in arithm.cpp: ocl_compare()
inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }
int maxClockFrequency() const;
int maxComputeUnits() const;
int maxConstantArgs() const;
size_t maxConstantBufferSize() const;
size_t maxMemAllocSize() const;
size_t maxParameterSize() const;
int maxReadImageArgs() const;
int maxWriteImageArgs() const;
int maxSamplers() const;
size_t maxWorkGroupSize() const;
int maxWorkItemDims() const;
void maxWorkItemSizes(size_t*) const;
int memBaseAddrAlign() const;
int nativeVectorWidthChar() const;
int nativeVectorWidthShort() const;
int nativeVectorWidthInt() const;
int nativeVectorWidthLong() const;
int nativeVectorWidthFloat() const;
int nativeVectorWidthDouble() const;
int nativeVectorWidthHalf() const;
int preferredVectorWidthChar() const;
int preferredVectorWidthShort() const;
int preferredVectorWidthInt() const;
int preferredVectorWidthLong() const;
int preferredVectorWidthFloat() const;
int preferredVectorWidthDouble() const;
int preferredVectorWidthHalf() const;
size_t printfBufferSize() const;
size_t profilingTimerResolution() const;
static const Device& getDefault();
protected:
struct Impl;
Impl* p;
};
class CV_EXPORTS Context
{
public:
Context();
explicit Context(int dtype);
~Context();
Context(const Context& c);
Context& operator = (const Context& c);
bool create();
bool create(int dtype);
size_t ndevices() const;
const Device& device(size_t idx) const;
Program getProg(const ProgramSource& prog,
const String& buildopt, String& errmsg);
static Context& getDefault(bool initialize = true);
void* ptr() const;
friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
bool useSVM() const;
void setUseSVM(bool enabled);
struct Impl;
Impl* p;
};
class CV_EXPORTS Platform
{
public:
Platform();
~Platform();
Platform(const Platform& p);
Platform& operator = (const Platform& p);
void* ptr() const;
static Platform& getDefault();
friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
protected:
struct Impl;
Impl* p;
};
// TODO Move to internal header
void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
class CV_EXPORTS Queue
{
public:
Queue();
explicit Queue(const Context& c, const Device& d=Device());
~Queue();
Queue(const Queue& q);
Queue& operator = (const Queue& q);
bool create(const Context& c=Context(), const Device& d=Device());
void finish();
void* ptr() const;
static Queue& getDefault();
protected:
struct Impl;
Impl* p;
};
class CV_EXPORTS KernelArg
{
public:
enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, PTR_ONLY = 16, NO_SIZE=256 };
KernelArg(int _flags, UMat* _m, int wscale=1, int iwscale=1, const void* _obj=0, size_t _sz=0);
KernelArg();
static KernelArg Local() { return KernelArg(LOCAL, 0); }
static KernelArg PtrWriteOnly(const UMat& m)
{ return KernelArg(PTR_ONLY+WRITE_ONLY, (UMat*)&m); }
static KernelArg PtrReadOnly(const UMat& m)
{ return KernelArg(PTR_ONLY+READ_ONLY, (UMat*)&m); }
static KernelArg PtrReadWrite(const UMat& m)
{ return KernelArg(PTR_ONLY+READ_WRITE, (UMat*)&m); }
static KernelArg ReadWrite(const UMat& m, int wscale=1, int iwscale=1)
{ return KernelArg(READ_WRITE, (UMat*)&m, wscale, iwscale); }
static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1, int iwscale=1)
{ return KernelArg(READ_WRITE+NO_SIZE, (UMat*)&m, wscale, iwscale); }
static KernelArg ReadOnly(const UMat& m, int wscale=1, int iwscale=1)
{ return KernelArg(READ_ONLY, (UMat*)&m, wscale, iwscale); }
static KernelArg WriteOnly(const UMat& m, int wscale=1, int iwscale=1)
{ return KernelArg(WRITE_ONLY, (UMat*)&m, wscale, iwscale); }
static KernelArg ReadOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1)
{ return KernelArg(READ_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); }
static KernelArg WriteOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1)
{ return KernelArg(WRITE_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); }
static KernelArg Constant(const Mat& m);
template<typename _Tp> static KernelArg Constant(const _Tp* arr, size_t n)
{ return KernelArg(CONSTANT, 0, 1, 1, (void*)arr, n); }
int flags;
UMat* m;
const void* obj;
size_t sz;
int wscale, iwscale;
};
class CV_EXPORTS Kernel
{
public:
Kernel();
Kernel(const char* kname, const Program& prog);
Kernel(const char* kname, const ProgramSource& prog,
const String& buildopts = String(), String* errmsg=0);
~Kernel();
Kernel(const Kernel& k);
Kernel& operator = (const Kernel& k);
bool empty() const;
bool create(const char* kname, const Program& prog);
bool create(const char* kname, const ProgramSource& prog,
const String& buildopts, String* errmsg=0);
int set(int i, const void* value, size_t sz);
int set(int i, const Image2D& image2D);
int set(int i, const UMat& m);
int set(int i, const KernelArg& arg);
template<typename _Tp> int set(int i, const _Tp& value)
{ return set(i, &value, sizeof(value)); }
template<typename _Tp0>
Kernel& args(const _Tp0& a0)
{
set(0, a0); return *this;
}
template<typename _Tp0, typename _Tp1>
Kernel& args(const _Tp0& a0, const _Tp1& a1)
{
int i = set(0, a0); set(i, a1); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2)
{
int i = set(0, a0); i = set(i, a1); set(i, a2); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
const _Tp3& a3, const _Tp4& a4)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2);
i = set(i, a3); set(i, a4); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2,
typename _Tp3, typename _Tp4, typename _Tp5>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
const _Tp3& a3, const _Tp4& a4, const _Tp5& a5)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2);
i = set(i, a3); i = set(i, a4); set(i, a5); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3);
i = set(i, a4); i = set(i, a5); set(i, a6); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3);
i = set(i, a4); i = set(i, a5); i = set(i, a6); set(i, a7); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
i = set(i, a5); i = set(i, a6); i = set(i, a7); set(i, a8); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); set(i, a9); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
typename _Tp8, typename _Tp9, typename _Tp10>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); set(i, a10); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); set(i, a11); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
const _Tp12& a12)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
set(i, a12); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
typename _Tp13>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
const _Tp12& a12, const _Tp13& a13)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
i = set(i, a12); set(i, a13); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
typename _Tp13, typename _Tp14>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
const _Tp12& a12, const _Tp13& a13, const _Tp14& a14)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
i = set(i, a12); i = set(i, a13); set(i, a14); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
typename _Tp13, typename _Tp14, typename _Tp15>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
const _Tp12& a12, const _Tp13& a13, const _Tp14& a14, const _Tp15& a15)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
i = set(i, a12); i = set(i, a13); i = set(i, a14); set(i, a15); return *this;
}
bool run(int dims, size_t globalsize[],
size_t localsize[], bool sync, const Queue& q=Queue());
bool runTask(bool sync, const Queue& q=Queue());
size_t workGroupSize() const;
size_t preferedWorkGroupSizeMultiple() const;
bool compileWorkGroupSize(size_t wsz[]) const;
size_t localMemSize() const;
void* ptr() const;
struct Impl;
protected:
Impl* p;
};
class CV_EXPORTS Program
{
public:
Program();
Program(const ProgramSource& src,
const String& buildflags, String& errmsg);
explicit Program(const String& buf);
Program(const Program& prog);
Program& operator = (const Program& prog);
~Program();
bool create(const ProgramSource& src,
const String& buildflags, String& errmsg);
bool read(const String& buf, const String& buildflags);
bool write(String& buf) const;
const ProgramSource& source() const;
void* ptr() const;
String getPrefix() const;
static String getPrefix(const String& buildflags);
protected:
struct Impl;
Impl* p;
};
class CV_EXPORTS ProgramSource
{
public:
typedef uint64 hash_t;
ProgramSource();
explicit ProgramSource(const String& prog);
explicit ProgramSource(const char* prog);
~ProgramSource();
ProgramSource(const ProgramSource& prog);
ProgramSource& operator = (const ProgramSource& prog);
const String& source() const;
hash_t hash() const;
protected:
struct Impl;
Impl* p;
};
class CV_EXPORTS PlatformInfo
{
public:
PlatformInfo();
explicit PlatformInfo(void* id);
~PlatformInfo();
PlatformInfo(const PlatformInfo& i);
PlatformInfo& operator =(const PlatformInfo& i);
String name() const;
String vendor() const;
String version() const;
int deviceNumber() const;
void getDevice(Device& device, int d) const;
protected:
struct Impl;
Impl* p;
};
CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf);
CV_EXPORTS const char* typeToStr(int t);
CV_EXPORTS const char* memopTypeToStr(int t);
CV_EXPORTS const char* vecopTypeToStr(int t);
CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL);
CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info);
enum OclVectorStrategy
{
// all matrices have its own vector width
OCL_VECTOR_OWN = 0,
// all matrices have maximal vector width among all matrices
// (useful for cases when matrices have different data types)
OCL_VECTOR_MAX = 1,
// default strategy
OCL_VECTOR_DEFAULT = OCL_VECTOR_OWN
};
CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
CV_EXPORTS int checkOptimalVectorWidth(const int *vectorWidths,
InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
// with OCL_VECTOR_MAX strategy
CV_EXPORTS int predictOptimalVectorWidthMax(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray());
CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m);
class CV_EXPORTS Image2D
{
public:
Image2D();
// src: The UMat from which to get image properties and data
// norm: Flag to enable the use of normalized channel data types
// alias: Flag indicating that the image should alias the src UMat.
// If true, changes to the image or src will be reflected in
// both objects.
explicit Image2D(const UMat &src, bool norm = false, bool alias = false);
Image2D(const Image2D & i);
~Image2D();
Image2D & operator = (const Image2D & i);
// Indicates if creating an aliased image should succeed. Depends on the
// underlying platform and the dimensions of the UMat.
static bool canCreateAlias(const UMat &u);
// Indicates if the image format is supported.
static bool isFormatSupported(int depth, int cn, bool norm);
void* ptr() const;
protected:
struct Impl;
Impl* p;
};
CV_EXPORTS MatAllocator* getOpenCLAllocator();
#ifdef __OPENCV_BUILD
namespace internal {
CV_EXPORTS bool isPerformanceCheckBypassed();
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
CV_EXPORTS bool isCLBuffer(UMat& u);
} // namespace internal
#endif
//! @}
}}
#endif

View File

@ -0,0 +1,64 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the OpenCV Foundation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_OPENCL_GENBASE_HPP__
#define __OPENCV_OPENCL_GENBASE_HPP__
namespace cv
{
namespace ocl
{
//! @cond IGNORED
struct ProgramEntry
{
const char* name;
const char* programStr;
const char* programHash;
};
//! @endcond
}
}
#endif

684
3rdparty/include/opencv2/core/opengl.hpp vendored Normal file
View File

@ -0,0 +1,684 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_OPENGL_HPP__
#define __OPENCV_CORE_OPENGL_HPP__
#ifndef __cplusplus
# error opengl.hpp header must be compiled as C++
#endif
#include "opencv2/core.hpp"
namespace cv { namespace ogl {
/** @addtogroup core_opengl
This section describes OpenGL interoperability.
To enable OpenGL support, configure OpenCV using CMake with WITH_OPENGL=ON . Currently OpenGL is
supported only with WIN32, GTK and Qt backends on Windows and Linux (MacOS and Android are not
supported). For GTK backend gtkglext-1.0 library is required.
To use OpenGL functionality you should first create OpenGL context (window or frame buffer). You can
do this with namedWindow function or with other OpenGL toolkit (GLUT, for example).
*/
//! @{
/////////////////// OpenGL Objects ///////////////////
/** @brief Smart pointer for OpenGL buffer object with reference counting.
Buffer Objects are OpenGL objects that store an array of unformatted memory allocated by the OpenGL
context. These can be used to store vertex data, pixel data retrieved from images or the
framebuffer, and a variety of other things.
ogl::Buffer has interface similar with Mat interface and represents 2D array memory.
ogl::Buffer supports memory transfers between host and device and also can be mapped to CUDA memory.
*/
class CV_EXPORTS Buffer
{
public:
/** @brief The target defines how you intend to use the buffer object.
*/
enum Target
{
ARRAY_BUFFER = 0x8892, //!< The buffer will be used as a source for vertex data
ELEMENT_ARRAY_BUFFER = 0x8893, //!< The buffer will be used for indices (in glDrawElements, for example)
PIXEL_PACK_BUFFER = 0x88EB, //!< The buffer will be used for reading from OpenGL textures
PIXEL_UNPACK_BUFFER = 0x88EC //!< The buffer will be used for writing to OpenGL textures
};
enum Access
{
READ_ONLY = 0x88B8,
WRITE_ONLY = 0x88B9,
READ_WRITE = 0x88BA
};
/** @brief The constructors.
Creates empty ogl::Buffer object, creates ogl::Buffer object from existed buffer ( abufId
parameter), allocates memory for ogl::Buffer object or copies from host/device memory.
*/
Buffer();
/** @overload
@param arows Number of rows in a 2D array.
@param acols Number of columns in a 2D array.
@param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
@param abufId Buffer object name.
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
*/
Buffer(int arows, int acols, int atype, unsigned int abufId, bool autoRelease = false);
/** @overload
@param asize 2D array size.
@param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
@param abufId Buffer object name.
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
*/
Buffer(Size asize, int atype, unsigned int abufId, bool autoRelease = false);
/** @overload
@param arows Number of rows in a 2D array.
@param acols Number of columns in a 2D array.
@param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
@param target Buffer usage. See cv::ogl::Buffer::Target .
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
*/
Buffer(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
/** @overload
@param asize 2D array size.
@param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
@param target Buffer usage. See cv::ogl::Buffer::Target .
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
*/
Buffer(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
/** @overload
@param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or std::vector ).
@param target Buffer usage. See cv::ogl::Buffer::Target .
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
*/
explicit Buffer(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false);
/** @brief Allocates memory for ogl::Buffer object.
@param arows Number of rows in a 2D array.
@param acols Number of columns in a 2D array.
@param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
@param target Buffer usage. See cv::ogl::Buffer::Target .
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
*/
void create(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
/** @overload
@param asize 2D array size.
@param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
@param target Buffer usage. See cv::ogl::Buffer::Target .
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
*/
void create(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
/** @brief Decrements the reference counter and destroys the buffer object if needed.
The function will call setAutoRelease(true) .
*/
void release();
/** @brief Sets auto release mode.
The lifetime of the OpenGL object is tied to the lifetime of the context. If OpenGL context was
bound to a window it could be released at any time (user can close a window). If object's destructor
is called after destruction of the context it will cause an error. Thus ogl::Buffer doesn't destroy
OpenGL object in destructor by default (all OpenGL resources will be released with OpenGL context).
This function can force ogl::Buffer destructor to destroy OpenGL object.
@param flag Auto release mode (if true, release will be called in object's destructor).
*/
void setAutoRelease(bool flag);
/** @brief Copies from host/device memory to OpenGL buffer.
@param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or std::vector ).
@param target Buffer usage. See cv::ogl::Buffer::Target .
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
*/
void copyFrom(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false);
/** @overload */
void copyFrom(InputArray arr, cuda::Stream& stream, Target target = ARRAY_BUFFER, bool autoRelease = false);
/** @brief Copies from OpenGL buffer to host/device memory or another OpenGL buffer object.
@param arr Destination array (host or device memory, can be Mat , cuda::GpuMat , std::vector or
ogl::Buffer ).
*/
void copyTo(OutputArray arr) const;
/** @overload */
void copyTo(OutputArray arr, cuda::Stream& stream) const;
/** @brief Creates a full copy of the buffer object and the underlying data.
@param target Buffer usage for destination buffer.
@param autoRelease Auto release mode for destination buffer.
*/
Buffer clone(Target target = ARRAY_BUFFER, bool autoRelease = false) const;
/** @brief Binds OpenGL buffer to the specified buffer binding point.
@param target Binding point. See cv::ogl::Buffer::Target .
*/
void bind(Target target) const;
/** @brief Unbind any buffers from the specified binding point.
@param target Binding point. See cv::ogl::Buffer::Target .
*/
static void unbind(Target target);
/** @brief Maps OpenGL buffer to host memory.
mapHost maps to the client's address space the entire data store of the buffer object. The data can
then be directly read and/or written relative to the returned pointer, depending on the specified
access policy.
A mapped data store must be unmapped with ogl::Buffer::unmapHost before its buffer object is used.
This operation can lead to memory transfers between host and device.
Only one buffer object can be mapped at a time.
@param access Access policy, indicating whether it will be possible to read from, write to, or both
read from and write to the buffer object's mapped data store. The symbolic constant must be
ogl::Buffer::READ_ONLY , ogl::Buffer::WRITE_ONLY or ogl::Buffer::READ_WRITE .
*/
Mat mapHost(Access access);
/** @brief Unmaps OpenGL buffer.
*/
void unmapHost();
//! map to device memory (blocking)
cuda::GpuMat mapDevice();
void unmapDevice();
/** @brief Maps OpenGL buffer to CUDA device memory.
This operatation doesn't copy data. Several buffer objects can be mapped to CUDA memory at a time.
A mapped data store must be unmapped with ogl::Buffer::unmapDevice before its buffer object is used.
*/
cuda::GpuMat mapDevice(cuda::Stream& stream);
/** @brief Unmaps OpenGL buffer.
*/
void unmapDevice(cuda::Stream& stream);
int rows() const;
int cols() const;
Size size() const;
bool empty() const;
int type() const;
int depth() const;
int channels() const;
int elemSize() const;
int elemSize1() const;
//! get OpenGL opject id
unsigned int bufId() const;
class Impl;
private:
Ptr<Impl> impl_;
int rows_;
int cols_;
int type_;
};
/** @brief Smart pointer for OpenGL 2D texture memory with reference counting.
*/
class CV_EXPORTS Texture2D
{
public:
/** @brief An Image Format describes the way that the images in Textures store their data.
*/
enum Format
{
NONE = 0,
DEPTH_COMPONENT = 0x1902, //!< Depth
RGB = 0x1907, //!< Red, Green, Blue
RGBA = 0x1908 //!< Red, Green, Blue, Alpha
};
/** @brief The constructors.
Creates empty ogl::Texture2D object, allocates memory for ogl::Texture2D object or copies from
host/device memory.
*/
Texture2D();
/** @overload */
Texture2D(int arows, int acols, Format aformat, unsigned int atexId, bool autoRelease = false);
/** @overload */
Texture2D(Size asize, Format aformat, unsigned int atexId, bool autoRelease = false);
/** @overload
@param arows Number of rows.
@param acols Number of columns.
@param aformat Image format. See cv::ogl::Texture2D::Format .
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
*/
Texture2D(int arows, int acols, Format aformat, bool autoRelease = false);
/** @overload
@param asize 2D array size.
@param aformat Image format. See cv::ogl::Texture2D::Format .
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
*/
Texture2D(Size asize, Format aformat, bool autoRelease = false);
/** @overload
@param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or ogl::Buffer ).
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
*/
explicit Texture2D(InputArray arr, bool autoRelease = false);
/** @brief Allocates memory for ogl::Texture2D object.
@param arows Number of rows.
@param acols Number of columns.
@param aformat Image format. See cv::ogl::Texture2D::Format .
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
*/
void create(int arows, int acols, Format aformat, bool autoRelease = false);
/** @overload
@param asize 2D array size.
@param aformat Image format. See cv::ogl::Texture2D::Format .
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
*/
void create(Size asize, Format aformat, bool autoRelease = false);
/** @brief Decrements the reference counter and destroys the texture object if needed.
The function will call setAutoRelease(true) .
*/
void release();
/** @brief Sets auto release mode.
@param flag Auto release mode (if true, release will be called in object's destructor).
The lifetime of the OpenGL object is tied to the lifetime of the context. If OpenGL context was
bound to a window it could be released at any time (user can close a window). If object's destructor
is called after destruction of the context it will cause an error. Thus ogl::Texture2D doesn't
destroy OpenGL object in destructor by default (all OpenGL resources will be released with OpenGL
context). This function can force ogl::Texture2D destructor to destroy OpenGL object.
*/
void setAutoRelease(bool flag);
/** @brief Copies from host/device memory to OpenGL texture.
@param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or ogl::Buffer ).
@param autoRelease Auto release mode (if true, release will be called in object's destructor).
*/
void copyFrom(InputArray arr, bool autoRelease = false);
/** @brief Copies from OpenGL texture to host/device memory or another OpenGL texture object.
@param arr Destination array (host or device memory, can be Mat , cuda::GpuMat , ogl::Buffer or
ogl::Texture2D ).
@param ddepth Destination depth.
@param autoRelease Auto release mode for destination buffer (if arr is OpenGL buffer or texture).
*/
void copyTo(OutputArray arr, int ddepth = CV_32F, bool autoRelease = false) const;
/** @brief Binds texture to current active texture unit for GL_TEXTURE_2D target.
*/
void bind() const;
int rows() const;
int cols() const;
Size size() const;
bool empty() const;
Format format() const;
//! get OpenGL opject id
unsigned int texId() const;
class Impl;
private:
Ptr<Impl> impl_;
int rows_;
int cols_;
Format format_;
};
/** @brief Wrapper for OpenGL Client-Side Vertex arrays.
ogl::Arrays stores vertex data in ogl::Buffer objects.
*/
class CV_EXPORTS Arrays
{
public:
/** @brief Default constructor
*/
Arrays();
/** @brief Sets an array of vertex coordinates.
@param vertex array with vertex coordinates, can be both host and device memory.
*/
void setVertexArray(InputArray vertex);
/** @brief Resets vertex coordinates.
*/
void resetVertexArray();
/** @brief Sets an array of vertex colors.
@param color array with vertex colors, can be both host and device memory.
*/
void setColorArray(InputArray color);
/** @brief Resets vertex colors.
*/
void resetColorArray();
/** @brief Sets an array of vertex normals.
@param normal array with vertex normals, can be both host and device memory.
*/
void setNormalArray(InputArray normal);
/** @brief Resets vertex normals.
*/
void resetNormalArray();
/** @brief Sets an array of vertex texture coordinates.
@param texCoord array with vertex texture coordinates, can be both host and device memory.
*/
void setTexCoordArray(InputArray texCoord);
/** @brief Resets vertex texture coordinates.
*/
void resetTexCoordArray();
/** @brief Releases all inner buffers.
*/
void release();
/** @brief Sets auto release mode all inner buffers.
@param flag Auto release mode.
*/
void setAutoRelease(bool flag);
/** @brief Binds all vertex arrays.
*/
void bind() const;
/** @brief Returns the vertex count.
*/
int size() const;
bool empty() const;
private:
int size_;
Buffer vertex_;
Buffer color_;
Buffer normal_;
Buffer texCoord_;
};
/////////////////// Render Functions ///////////////////
//! render mode
enum RenderModes {
POINTS = 0x0000,
LINES = 0x0001,
LINE_LOOP = 0x0002,
LINE_STRIP = 0x0003,
TRIANGLES = 0x0004,
TRIANGLE_STRIP = 0x0005,
TRIANGLE_FAN = 0x0006,
QUADS = 0x0007,
QUAD_STRIP = 0x0008,
POLYGON = 0x0009
};
/** @brief Render OpenGL texture or primitives.
@param tex Texture to draw.
@param wndRect Region of window, where to draw a texture (normalized coordinates).
@param texRect Region of texture to draw (normalized coordinates).
*/
CV_EXPORTS void render(const Texture2D& tex,
Rect_<double> wndRect = Rect_<double>(0.0, 0.0, 1.0, 1.0),
Rect_<double> texRect = Rect_<double>(0.0, 0.0, 1.0, 1.0));
/** @overload
@param arr Array of privitives vertices.
@param mode Render mode. One of cv::ogl::RenderModes
@param color Color for all vertices. Will be used if arr doesn't contain color array.
*/
CV_EXPORTS void render(const Arrays& arr, int mode = POINTS, Scalar color = Scalar::all(255));
/** @overload
@param arr Array of privitives vertices.
@param indices Array of vertices indices (host or device memory).
@param mode Render mode. One of cv::ogl::RenderModes
@param color Color for all vertices. Will be used if arr doesn't contain color array.
*/
CV_EXPORTS void render(const Arrays& arr, InputArray indices, int mode = POINTS, Scalar color = Scalar::all(255));
//! @} core_opengl
}} // namespace cv::ogl
namespace cv { namespace cuda {
//! @addtogroup cuda
//! @{
/** @brief Sets a CUDA device and initializes it for the current thread with OpenGL interoperability.
This function should be explicitly called after OpenGL context creation and before any CUDA calls.
@param device System index of a CUDA device starting with 0.
@ingroup core_opengl
*/
CV_EXPORTS void setGlDevice(int device = 0);
//! @}
}}
//! @cond IGNORED
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
inline
cv::ogl::Buffer::Buffer(int arows, int acols, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
{
create(arows, acols, atype, target, autoRelease);
}
inline
cv::ogl::Buffer::Buffer(Size asize, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
{
create(asize, atype, target, autoRelease);
}
inline
void cv::ogl::Buffer::create(Size asize, int atype, Target target, bool autoRelease)
{
create(asize.height, asize.width, atype, target, autoRelease);
}
inline
int cv::ogl::Buffer::rows() const
{
return rows_;
}
inline
int cv::ogl::Buffer::cols() const
{
return cols_;
}
inline
cv::Size cv::ogl::Buffer::size() const
{
return Size(cols_, rows_);
}
inline
bool cv::ogl::Buffer::empty() const
{
return rows_ == 0 || cols_ == 0;
}
inline
int cv::ogl::Buffer::type() const
{
return type_;
}
inline
int cv::ogl::Buffer::depth() const
{
return CV_MAT_DEPTH(type_);
}
inline
int cv::ogl::Buffer::channels() const
{
return CV_MAT_CN(type_);
}
inline
int cv::ogl::Buffer::elemSize() const
{
return CV_ELEM_SIZE(type_);
}
inline
int cv::ogl::Buffer::elemSize1() const
{
return CV_ELEM_SIZE1(type_);
}
///////
inline
cv::ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
{
create(arows, acols, aformat, autoRelease);
}
inline
cv::ogl::Texture2D::Texture2D(Size asize, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
{
create(asize, aformat, autoRelease);
}
inline
void cv::ogl::Texture2D::create(Size asize, Format aformat, bool autoRelease)
{
create(asize.height, asize.width, aformat, autoRelease);
}
inline
int cv::ogl::Texture2D::rows() const
{
return rows_;
}
inline
int cv::ogl::Texture2D::cols() const
{
return cols_;
}
inline
cv::Size cv::ogl::Texture2D::size() const
{
return Size(cols_, rows_);
}
inline
bool cv::ogl::Texture2D::empty() const
{
return rows_ == 0 || cols_ == 0;
}
inline
cv::ogl::Texture2D::Format cv::ogl::Texture2D::format() const
{
return format_;
}
///////
inline
cv::ogl::Arrays::Arrays() : size_(0)
{
}
inline
int cv::ogl::Arrays::size() const
{
return size_;
}
inline
bool cv::ogl::Arrays::empty() const
{
return size_ == 0;
}
//! @endcond
#endif /* __OPENCV_CORE_OPENGL_HPP__ */

View File

@ -0,0 +1,530 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_OPERATIONS_HPP__
#define __OPENCV_CORE_OPERATIONS_HPP__
#ifndef __cplusplus
# error operations.hpp header must be compiled as C++
#endif
#include <cstdio>
//! @cond IGNORED
namespace cv
{
////////////////////////////// Matx methods depending on core API /////////////////////////////
namespace internal
{
template<typename _Tp, int m> struct Matx_FastInvOp
{
bool operator()(const Matx<_Tp, m, m>& a, Matx<_Tp, m, m>& b, int method) const
{
Matx<_Tp, m, m> temp = a;
// assume that b is all 0's on input => make it a unity matrix
for( int i = 0; i < m; i++ )
b(i, i) = (_Tp)1;
if( method == DECOMP_CHOLESKY )
return Cholesky(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m);
return LU(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m) != 0;
}
};
template<typename _Tp> struct Matx_FastInvOp<_Tp, 2>
{
bool operator()(const Matx<_Tp, 2, 2>& a, Matx<_Tp, 2, 2>& b, int) const
{
_Tp d = determinant(a);
if( d == 0 )
return false;
d = 1/d;
b(1,1) = a(0,0)*d;
b(0,0) = a(1,1)*d;
b(0,1) = -a(0,1)*d;
b(1,0) = -a(1,0)*d;
return true;
}
};
template<typename _Tp> struct Matx_FastInvOp<_Tp, 3>
{
bool operator()(const Matx<_Tp, 3, 3>& a, Matx<_Tp, 3, 3>& b, int) const
{
_Tp d = (_Tp)determinant(a);
if( d == 0 )
return false;
d = 1/d;
b(0,0) = (a(1,1) * a(2,2) - a(1,2) * a(2,1)) * d;
b(0,1) = (a(0,2) * a(2,1) - a(0,1) * a(2,2)) * d;
b(0,2) = (a(0,1) * a(1,2) - a(0,2) * a(1,1)) * d;
b(1,0) = (a(1,2) * a(2,0) - a(1,0) * a(2,2)) * d;
b(1,1) = (a(0,0) * a(2,2) - a(0,2) * a(2,0)) * d;
b(1,2) = (a(0,2) * a(1,0) - a(0,0) * a(1,2)) * d;
b(2,0) = (a(1,0) * a(2,1) - a(1,1) * a(2,0)) * d;
b(2,1) = (a(0,1) * a(2,0) - a(0,0) * a(2,1)) * d;
b(2,2) = (a(0,0) * a(1,1) - a(0,1) * a(1,0)) * d;
return true;
}
};
template<typename _Tp, int m, int n> struct Matx_FastSolveOp
{
bool operator()(const Matx<_Tp, m, m>& a, const Matx<_Tp, m, n>& b,
Matx<_Tp, m, n>& x, int method) const
{
Matx<_Tp, m, m> temp = a;
x = b;
if( method == DECOMP_CHOLESKY )
return Cholesky(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n);
return LU(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n) != 0;
}
};
template<typename _Tp> struct Matx_FastSolveOp<_Tp, 2, 1>
{
bool operator()(const Matx<_Tp, 2, 2>& a, const Matx<_Tp, 2, 1>& b,
Matx<_Tp, 2, 1>& x, int) const
{
_Tp d = determinant(a);
if( d == 0 )
return false;
d = 1/d;
x(0) = (b(0)*a(1,1) - b(1)*a(0,1))*d;
x(1) = (b(1)*a(0,0) - b(0)*a(1,0))*d;
return true;
}
};
template<typename _Tp> struct Matx_FastSolveOp<_Tp, 3, 1>
{
bool operator()(const Matx<_Tp, 3, 3>& a, const Matx<_Tp, 3, 1>& b,
Matx<_Tp, 3, 1>& x, int) const
{
_Tp d = (_Tp)determinant(a);
if( d == 0 )
return false;
d = 1/d;
x(0) = d*(b(0)*(a(1,1)*a(2,2) - a(1,2)*a(2,1)) -
a(0,1)*(b(1)*a(2,2) - a(1,2)*b(2)) +
a(0,2)*(b(1)*a(2,1) - a(1,1)*b(2)));
x(1) = d*(a(0,0)*(b(1)*a(2,2) - a(1,2)*b(2)) -
b(0)*(a(1,0)*a(2,2) - a(1,2)*a(2,0)) +
a(0,2)*(a(1,0)*b(2) - b(1)*a(2,0)));
x(2) = d*(a(0,0)*(a(1,1)*b(2) - b(1)*a(2,1)) -
a(0,1)*(a(1,0)*b(2) - b(1)*a(2,0)) +
b(0)*(a(1,0)*a(2,1) - a(1,1)*a(2,0)));
return true;
}
};
} // internal
template<typename _Tp, int m, int n> inline
Matx<_Tp,m,n> Matx<_Tp,m,n>::randu(_Tp a, _Tp b)
{
Matx<_Tp,m,n> M;
cv::randu(M, Scalar(a), Scalar(b));
return M;
}
template<typename _Tp, int m, int n> inline
Matx<_Tp,m,n> Matx<_Tp,m,n>::randn(_Tp a, _Tp b)
{
Matx<_Tp,m,n> M;
cv::randn(M, Scalar(a), Scalar(b));
return M;
}
template<typename _Tp, int m, int n> inline
Matx<_Tp, n, m> Matx<_Tp, m, n>::inv(int method, bool *p_is_ok /*= NULL*/) const
{
Matx<_Tp, n, m> b;
bool ok;
if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
ok = cv::internal::Matx_FastInvOp<_Tp, m>()(*this, b, method);
else
{
Mat A(*this, false), B(b, false);
ok = (invert(A, B, method) != 0);
}
if( NULL != p_is_ok ) { *p_is_ok = ok; }
return ok ? b : Matx<_Tp, n, m>::zeros();
}
template<typename _Tp, int m, int n> template<int l> inline
Matx<_Tp, n, l> Matx<_Tp, m, n>::solve(const Matx<_Tp, m, l>& rhs, int method) const
{
Matx<_Tp, n, l> x;
bool ok;
if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
ok = cv::internal::Matx_FastSolveOp<_Tp, m, l>()(*this, rhs, x, method);
else
{
Mat A(*this, false), B(rhs, false), X(x, false);
ok = cv::solve(A, B, X, method);
}
return ok ? x : Matx<_Tp, n, l>::zeros();
}
////////////////////////// Augmenting algebraic & logical operations //////////////////////////
#define CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \
static inline A& operator op (A& a, const B& b) { cvop; return a; }
#define CV_MAT_AUG_OPERATOR(op, cvop, A, B) \
CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \
CV_MAT_AUG_OPERATOR1(op, cvop, const A, B)
#define CV_MAT_AUG_OPERATOR_T(op, cvop, A, B) \
template<typename _Tp> CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \
template<typename _Tp> CV_MAT_AUG_OPERATOR1(op, cvop, const A, B)
CV_MAT_AUG_OPERATOR (+=, cv::add(a,b,a), Mat, Mat)
CV_MAT_AUG_OPERATOR (+=, cv::add(a,b,a), Mat, Scalar)
CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Mat)
CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Scalar)
CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
CV_MAT_AUG_OPERATOR (-=, cv::subtract(a,b,a), Mat, Mat)
CV_MAT_AUG_OPERATOR (-=, cv::subtract(a,b,a), Mat, Scalar)
CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Mat)
CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Scalar)
CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
CV_MAT_AUG_OPERATOR (*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat, Mat)
CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat)
CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat_<_Tp>)
CV_MAT_AUG_OPERATOR (*=, a.convertTo(a, -1, b), Mat, double)
CV_MAT_AUG_OPERATOR_T(*=, a.convertTo(a, -1, b), Mat_<_Tp>, double)
CV_MAT_AUG_OPERATOR (/=, cv::divide(a,b,a), Mat, Mat)
CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a,b,a), Mat_<_Tp>, Mat)
CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
CV_MAT_AUG_OPERATOR (/=, a.convertTo((Mat&)a, -1, 1./b), Mat, double)
CV_MAT_AUG_OPERATOR_T(/=, a.convertTo((Mat&)a, -1, 1./b), Mat_<_Tp>, double)
CV_MAT_AUG_OPERATOR (&=, cv::bitwise_and(a,b,a), Mat, Mat)
CV_MAT_AUG_OPERATOR (&=, cv::bitwise_and(a,b,a), Mat, Scalar)
CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Mat)
CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Scalar)
CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
CV_MAT_AUG_OPERATOR (|=, cv::bitwise_or(a,b,a), Mat, Mat)
CV_MAT_AUG_OPERATOR (|=, cv::bitwise_or(a,b,a), Mat, Scalar)
CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Mat)
CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Scalar)
CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
CV_MAT_AUG_OPERATOR (^=, cv::bitwise_xor(a,b,a), Mat, Mat)
CV_MAT_AUG_OPERATOR (^=, cv::bitwise_xor(a,b,a), Mat, Scalar)
CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Mat)
CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Scalar)
CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
#undef CV_MAT_AUG_OPERATOR_T
#undef CV_MAT_AUG_OPERATOR
#undef CV_MAT_AUG_OPERATOR1
///////////////////////////////////////////// SVD /////////////////////////////////////////////
inline SVD::SVD() {}
inline SVD::SVD( InputArray m, int flags ) { operator ()(m, flags); }
inline void SVD::solveZ( InputArray m, OutputArray _dst )
{
Mat mtx = m.getMat();
SVD svd(mtx, (mtx.rows >= mtx.cols ? 0 : SVD::FULL_UV));
_dst.create(svd.vt.cols, 1, svd.vt.type());
Mat dst = _dst.getMat();
svd.vt.row(svd.vt.rows-1).reshape(1,svd.vt.cols).copyTo(dst);
}
template<typename _Tp, int m, int n, int nm> inline void
SVD::compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w, Matx<_Tp, m, nm>& u, Matx<_Tp, n, nm>& vt )
{
CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector.");
Mat _a(a, false), _u(u, false), _w(w, false), _vt(vt, false);
SVD::compute(_a, _w, _u, _vt);
CV_Assert(_w.data == (uchar*)&w.val[0] && _u.data == (uchar*)&u.val[0] && _vt.data == (uchar*)&vt.val[0]);
}
template<typename _Tp, int m, int n, int nm> inline void
SVD::compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w )
{
CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector.");
Mat _a(a, false), _w(w, false);
SVD::compute(_a, _w);
CV_Assert(_w.data == (uchar*)&w.val[0]);
}
template<typename _Tp, int m, int n, int nm, int nb> inline void
SVD::backSubst( const Matx<_Tp, nm, 1>& w, const Matx<_Tp, m, nm>& u,
const Matx<_Tp, n, nm>& vt, const Matx<_Tp, m, nb>& rhs,
Matx<_Tp, n, nb>& dst )
{
CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector.");
Mat _u(u, false), _w(w, false), _vt(vt, false), _rhs(rhs, false), _dst(dst, false);
SVD::backSubst(_w, _u, _vt, _rhs, _dst);
CV_Assert(_dst.data == (uchar*)&dst.val[0]);
}
/////////////////////////////////// Multiply-with-Carry RNG ///////////////////////////////////
inline RNG::RNG() { state = 0xffffffff; }
inline RNG::RNG(uint64 _state) { state = _state ? _state : 0xffffffff; }
inline RNG::operator uchar() { return (uchar)next(); }
inline RNG::operator schar() { return (schar)next(); }
inline RNG::operator ushort() { return (ushort)next(); }
inline RNG::operator short() { return (short)next(); }
inline RNG::operator int() { return (int)next(); }
inline RNG::operator unsigned() { return next(); }
inline RNG::operator float() { return next()*2.3283064365386962890625e-10f; }
inline RNG::operator double() { unsigned t = next(); return (((uint64)t << 32) | next()) * 5.4210108624275221700372640043497e-20; }
inline unsigned RNG::operator ()(unsigned N) { return (unsigned)uniform(0,N); }
inline unsigned RNG::operator ()() { return next(); }
inline int RNG::uniform(int a, int b) { return a == b ? a : (int)(next() % (b - a) + a); }
inline float RNG::uniform(float a, float b) { return ((float)*this)*(b - a) + a; }
inline double RNG::uniform(double a, double b) { return ((double)*this)*(b - a) + a; }
inline unsigned RNG::next()
{
state = (uint64)(unsigned)state* /*CV_RNG_COEFF*/ 4164903690U + (unsigned)(state >> 32);
return (unsigned)state;
}
//! returns the next unifomly-distributed random number of the specified type
template<typename _Tp> static inline _Tp randu()
{
return (_Tp)theRNG();
}
///////////////////////////////// Formatted string generation /////////////////////////////////
CV_EXPORTS String format( const char* fmt, ... );
///////////////////////////////// Formatted output of cv::Mat /////////////////////////////////
static inline
Ptr<Formatted> format(InputArray mtx, int fmt)
{
return Formatter::get(fmt)->format(mtx.getMat());
}
static inline
int print(Ptr<Formatted> fmtd, FILE* stream = stdout)
{
int written = 0;
fmtd->reset();
for(const char* str = fmtd->next(); str; str = fmtd->next())
written += fputs(str, stream);
return written;
}
static inline
int print(const Mat& mtx, FILE* stream = stdout)
{
return print(Formatter::get()->format(mtx), stream);
}
static inline
int print(const UMat& mtx, FILE* stream = stdout)
{
return print(Formatter::get()->format(mtx.getMat(ACCESS_READ)), stream);
}
template<typename _Tp> static inline
int print(const std::vector<Point_<_Tp> >& vec, FILE* stream = stdout)
{
return print(Formatter::get()->format(Mat(vec)), stream);
}
template<typename _Tp> static inline
int print(const std::vector<Point3_<_Tp> >& vec, FILE* stream = stdout)
{
return print(Formatter::get()->format(Mat(vec)), stream);
}
template<typename _Tp, int m, int n> static inline
int print(const Matx<_Tp, m, n>& matx, FILE* stream = stdout)
{
return print(Formatter::get()->format(cv::Mat(matx)), stream);
}
//! @endcond
/****************************************************************************************\
* Auxiliary algorithms *
\****************************************************************************************/
/** @brief Splits an element set into equivalency classes.
The generic function partition implements an \f$O(N^2)\f$ algorithm for splitting a set of \f$N\f$ elements
into one or more equivalency classes, as described in
<http://en.wikipedia.org/wiki/Disjoint-set_data_structure> . The function returns the number of
equivalency classes.
@param _vec Set of elements stored as a vector.
@param labels Output vector of labels. It contains as many elements as vec. Each label labels[i] is
a 0-based cluster index of `vec[i]`.
@param predicate Equivalence predicate (pointer to a boolean function of two arguments or an
instance of the class that has the method bool operator()(const _Tp& a, const _Tp& b) ). The
predicate returns true when the elements are certainly in the same class, and returns false if they
may or may not be in the same class.
@ingroup core_cluster
*/
template<typename _Tp, class _EqPredicate> int
partition( const std::vector<_Tp>& _vec, std::vector<int>& labels,
_EqPredicate predicate=_EqPredicate())
{
int i, j, N = (int)_vec.size();
const _Tp* vec = &_vec[0];
const int PARENT=0;
const int RANK=1;
std::vector<int> _nodes(N*2);
int (*nodes)[2] = (int(*)[2])&_nodes[0];
// The first O(N) pass: create N single-vertex trees
for(i = 0; i < N; i++)
{
nodes[i][PARENT]=-1;
nodes[i][RANK] = 0;
}
// The main O(N^2) pass: merge connected components
for( i = 0; i < N; i++ )
{
int root = i;
// find root
while( nodes[root][PARENT] >= 0 )
root = nodes[root][PARENT];
for( j = 0; j < N; j++ )
{
if( i == j || !predicate(vec[i], vec[j]))
continue;
int root2 = j;
while( nodes[root2][PARENT] >= 0 )
root2 = nodes[root2][PARENT];
if( root2 != root )
{
// unite both trees
int rank = nodes[root][RANK], rank2 = nodes[root2][RANK];
if( rank > rank2 )
nodes[root2][PARENT] = root;
else
{
nodes[root][PARENT] = root2;
nodes[root2][RANK] += rank == rank2;
root = root2;
}
CV_Assert( nodes[root][PARENT] < 0 );
int k = j, parent;
// compress the path from node2 to root
while( (parent = nodes[k][PARENT]) >= 0 )
{
nodes[k][PARENT] = root;
k = parent;
}
// compress the path from node to root
k = i;
while( (parent = nodes[k][PARENT]) >= 0 )
{
nodes[k][PARENT] = root;
k = parent;
}
}
}
}
// Final O(N) pass: enumerate classes
labels.resize(N);
int nclasses = 0;
for( i = 0; i < N; i++ )
{
int root = i;
while( nodes[root][PARENT] >= 0 )
root = nodes[root][PARENT];
// re-use the rank as the class label
if( nodes[root][RANK] >= 0 )
nodes[root][RANK] = ~nclasses++;
labels[i] = ~nodes[root][RANK];
}
return nclasses;
}
} // cv
#endif

302
3rdparty/include/opencv2/core/optim.hpp vendored Normal file
View File

@ -0,0 +1,302 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the OpenCV Foundation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_OPTIM_HPP__
#define __OPENCV_OPTIM_HPP__
#include "opencv2/core.hpp"
namespace cv
{
/** @addtogroup core_optim
The algorithms in this section minimize or maximize function value within specified constraints or
without any constraints.
@{
*/
/** @brief Basic interface for all solvers
*/
class CV_EXPORTS MinProblemSolver : public Algorithm
{
public:
/** @brief Represents function being optimized
*/
class CV_EXPORTS Function
{
public:
virtual ~Function() {}
virtual int getDims() const = 0;
virtual double getGradientEps() const;
virtual double calc(const double* x) const = 0;
virtual void getGradient(const double* x,double* grad);
};
/** @brief Getter for the optimized function.
The optimized function is represented by Function interface, which requires derivatives to
implement the sole method calc(double*) to evaluate the function.
@return Smart-pointer to an object that implements Function interface - it represents the
function that is being optimized. It can be empty, if no function was given so far.
*/
virtual Ptr<Function> getFunction() const = 0;
/** @brief Setter for the optimized function.
*It should be called at least once before the call to* minimize(), as default value is not usable.
@param f The new function to optimize.
*/
virtual void setFunction(const Ptr<Function>& f) = 0;
/** @brief Getter for the previously set terminal criteria for this algorithm.
@return Deep copy of the terminal criteria used at the moment.
*/
virtual TermCriteria getTermCriteria() const = 0;
/** @brief Set terminal criteria for solver.
This method *is not necessary* to be called before the first call to minimize(), as the default
value is sensible.
Algorithm stops when the number of function evaluations done exceeds termcrit.maxCount, when
the function values at the vertices of simplex are within termcrit.epsilon range or simplex
becomes so small that it can enclosed in a box with termcrit.epsilon sides, whatever comes
first.
@param termcrit Terminal criteria to be used, represented as cv::TermCriteria structure.
*/
virtual void setTermCriteria(const TermCriteria& termcrit) = 0;
/** @brief actually runs the algorithm and performs the minimization.
The sole input parameter determines the centroid of the starting simplex (roughly, it tells
where to start), all the others (terminal criteria, initial step, function to be minimized) are
supposed to be set via the setters before the call to this method or the default values (not
always sensible) will be used.
@param x The initial point, that will become a centroid of an initial simplex. After the algorithm
will terminate, it will be setted to the point where the algorithm stops, the point of possible
minimum.
@return The value of a function at the point found.
*/
virtual double minimize(InputOutputArray x) = 0;
};
/** @brief This class is used to perform the non-linear non-constrained minimization of a function,
defined on an `n`-dimensional Euclidean space, using the **Nelder-Mead method**, also known as
**downhill simplex method**. The basic idea about the method can be obtained from
<http://en.wikipedia.org/wiki/Nelder-Mead_method>.
It should be noted, that this method, although deterministic, is rather a heuristic and therefore
may converge to a local minima, not necessary a global one. It is iterative optimization technique,
which at each step uses an information about the values of a function evaluated only at `n+1`
points, arranged as a *simplex* in `n`-dimensional space (hence the second name of the method). At
each step new point is chosen to evaluate function at, obtained value is compared with previous
ones and based on this information simplex changes it's shape , slowly moving to the local minimum.
Thus this method is using *only* function values to make decision, on contrary to, say, Nonlinear
Conjugate Gradient method (which is also implemented in optim).
Algorithm stops when the number of function evaluations done exceeds termcrit.maxCount, when the
function values at the vertices of simplex are within termcrit.epsilon range or simplex becomes so
small that it can enclosed in a box with termcrit.epsilon sides, whatever comes first, for some
defined by user positive integer termcrit.maxCount and positive non-integer termcrit.epsilon.
@note DownhillSolver is a derivative of the abstract interface
cv::MinProblemSolver, which in turn is derived from the Algorithm interface and is used to
encapsulate the functionality, common to all non-linear optimization algorithms in the optim
module.
@note term criteria should meet following condition:
@code
termcrit.type == (TermCriteria::MAX_ITER + TermCriteria::EPS) && termcrit.epsilon > 0 && termcrit.maxCount > 0
@endcode
*/
class CV_EXPORTS DownhillSolver : public MinProblemSolver
{
public:
/** @brief Returns the initial step that will be used in downhill simplex algorithm.
@param step Initial step that will be used in algorithm. Note, that although corresponding setter
accepts column-vectors as well as row-vectors, this method will return a row-vector.
@see DownhillSolver::setInitStep
*/
virtual void getInitStep(OutputArray step) const=0;
/** @brief Sets the initial step that will be used in downhill simplex algorithm.
Step, together with initial point (givin in DownhillSolver::minimize) are two `n`-dimensional
vectors that are used to determine the shape of initial simplex. Roughly said, initial point
determines the position of a simplex (it will become simplex's centroid), while step determines the
spread (size in each dimension) of a simplex. To be more precise, if \f$s,x_0\in\mathbb{R}^n\f$ are
the initial step and initial point respectively, the vertices of a simplex will be:
\f$v_0:=x_0-\frac{1}{2} s\f$ and \f$v_i:=x_0+s_i\f$ for \f$i=1,2,\dots,n\f$ where \f$s_i\f$ denotes
projections of the initial step of *n*-th coordinate (the result of projection is treated to be
vector given by \f$s_i:=e_i\cdot\left<e_i\cdot s\right>\f$, where \f$e_i\f$ form canonical basis)
@param step Initial step that will be used in algorithm. Roughly said, it determines the spread
(size in each dimension) of an initial simplex.
*/
virtual void setInitStep(InputArray step)=0;
/** @brief This function returns the reference to the ready-to-use DownhillSolver object.
All the parameters are optional, so this procedure can be called even without parameters at
all. In this case, the default values will be used. As default value for terminal criteria are
the only sensible ones, MinProblemSolver::setFunction() and DownhillSolver::setInitStep()
should be called upon the obtained object, if the respective parameters were not given to
create(). Otherwise, the two ways (give parameters to createDownhillSolver() or miss them out
and call the MinProblemSolver::setFunction() and DownhillSolver::setInitStep()) are absolutely
equivalent (and will drop the same errors in the same way, should invalid input be detected).
@param f Pointer to the function that will be minimized, similarly to the one you submit via
MinProblemSolver::setFunction.
@param initStep Initial step, that will be used to construct the initial simplex, similarly to the one
you submit via MinProblemSolver::setInitStep.
@param termcrit Terminal criteria to the algorithm, similarly to the one you submit via
MinProblemSolver::setTermCriteria.
*/
static Ptr<DownhillSolver> create(const Ptr<MinProblemSolver::Function>& f=Ptr<MinProblemSolver::Function>(),
InputArray initStep=Mat_<double>(1,1,0.0),
TermCriteria termcrit=TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5000,0.000001));
};
/** @brief This class is used to perform the non-linear non-constrained minimization of a function
with known gradient,
defined on an *n*-dimensional Euclidean space, using the **Nonlinear Conjugate Gradient method**.
The implementation was done based on the beautifully clear explanatory article [An Introduction to
the Conjugate Gradient Method Without the Agonizing
Pain](http://www.cs.cmu.edu/~quake-papers/painless-conjugate-gradient.pdf) by Jonathan Richard
Shewchuk. The method can be seen as an adaptation of a standard Conjugate Gradient method (see, for
example <http://en.wikipedia.org/wiki/Conjugate_gradient_method>) for numerically solving the
systems of linear equations.
It should be noted, that this method, although deterministic, is rather a heuristic method and
therefore may converge to a local minima, not necessary a global one. What is even more disastrous,
most of its behaviour is ruled by gradient, therefore it essentially cannot distinguish between
local minima and maxima. Therefore, if it starts sufficiently near to the local maximum, it may
converge to it. Another obvious restriction is that it should be possible to compute the gradient of
a function at any point, thus it is preferable to have analytic expression for gradient and
computational burden should be born by the user.
The latter responsibility is accompilished via the getGradient method of a
MinProblemSolver::Function interface (which represents function being optimized). This method takes
point a point in *n*-dimensional space (first argument represents the array of coordinates of that
point) and comput its gradient (it should be stored in the second argument as an array).
@note class ConjGradSolver thus does not add any new methods to the basic MinProblemSolver interface.
@note term criteria should meet following condition:
@code
termcrit.type == (TermCriteria::MAX_ITER + TermCriteria::EPS) && termcrit.epsilon > 0 && termcrit.maxCount > 0
// or
termcrit.type == TermCriteria::MAX_ITER) && termcrit.maxCount > 0
@endcode
*/
class CV_EXPORTS ConjGradSolver : public MinProblemSolver
{
public:
/** @brief This function returns the reference to the ready-to-use ConjGradSolver object.
All the parameters are optional, so this procedure can be called even without parameters at
all. In this case, the default values will be used. As default value for terminal criteria are
the only sensible ones, MinProblemSolver::setFunction() should be called upon the obtained
object, if the function was not given to create(). Otherwise, the two ways (submit it to
create() or miss it out and call the MinProblemSolver::setFunction()) are absolutely equivalent
(and will drop the same errors in the same way, should invalid input be detected).
@param f Pointer to the function that will be minimized, similarly to the one you submit via
MinProblemSolver::setFunction.
@param termcrit Terminal criteria to the algorithm, similarly to the one you submit via
MinProblemSolver::setTermCriteria.
*/
static Ptr<ConjGradSolver> create(const Ptr<MinProblemSolver::Function>& f=Ptr<ConjGradSolver::Function>(),
TermCriteria termcrit=TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5000,0.000001));
};
//! return codes for cv::solveLP() function
enum SolveLPResult
{
SOLVELP_UNBOUNDED = -2, //!< problem is unbounded (target function can achieve arbitrary high values)
SOLVELP_UNFEASIBLE = -1, //!< problem is unfeasible (there are no points that satisfy all the constraints imposed)
SOLVELP_SINGLE = 0, //!< there is only one maximum for target function
SOLVELP_MULTI = 1 //!< there are multiple maxima for target function - the arbitrary one is returned
};
/** @brief Solve given (non-integer) linear programming problem using the Simplex Algorithm (Simplex Method).
What we mean here by "linear programming problem" (or LP problem, for short) can be formulated as:
\f[\mbox{Maximize } c\cdot x\\
\mbox{Subject to:}\\
Ax\leq b\\
x\geq 0\f]
Where \f$c\f$ is fixed `1`-by-`n` row-vector, \f$A\f$ is fixed `m`-by-`n` matrix, \f$b\f$ is fixed `m`-by-`1`
column vector and \f$x\f$ is an arbitrary `n`-by-`1` column vector, which satisfies the constraints.
Simplex algorithm is one of many algorithms that are designed to handle this sort of problems
efficiently. Although it is not optimal in theoretical sense (there exist algorithms that can solve
any problem written as above in polynomial time, while simplex method degenerates to exponential
time for some special cases), it is well-studied, easy to implement and is shown to work well for
real-life purposes.
The particular implementation is taken almost verbatim from **Introduction to Algorithms, third
edition** by T. H. Cormen, C. E. Leiserson, R. L. Rivest and Clifford Stein. In particular, the
Bland's rule <http://en.wikipedia.org/wiki/Bland%27s_rule> is used to prevent cycling.
@param Func This row-vector corresponds to \f$c\f$ in the LP problem formulation (see above). It should
contain 32- or 64-bit floating point numbers. As a convenience, column-vector may be also submitted,
in the latter case it is understood to correspond to \f$c^T\f$.
@param Constr `m`-by-`n+1` matrix, whose rightmost column corresponds to \f$b\f$ in formulation above
and the remaining to \f$A\f$. It should containt 32- or 64-bit floating point numbers.
@param z The solution will be returned here as a column-vector - it corresponds to \f$c\f$ in the
formulation above. It will contain 64-bit floating point numbers.
@return One of cv::SolveLPResult
*/
CV_EXPORTS_W int solveLP(const Mat& Func, const Mat& Constr, Mat& z);
//! @}
}// cv
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,172 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_PRIVATE_CUDA_HPP__
#define __OPENCV_CORE_PRIVATE_CUDA_HPP__
#ifndef __OPENCV_BUILD
# error this is a private header which should not be used from outside of the OpenCV library
#endif
#include "cvconfig.h"
#include "opencv2/core/cvdef.h"
#include "opencv2/core/base.hpp"
#include "opencv2/core/cuda.hpp"
#ifdef HAVE_CUDA
# include <cuda.h>
# include <cuda_runtime.h>
# include <npp.h>
# include "opencv2/core/cuda_stream_accessor.hpp"
# include "opencv2/core/cuda/common.hpp"
# define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD)
# define CUDART_MINIMUM_REQUIRED_VERSION 4020
# if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
# error "Insufficient Cuda Runtime library version, please update it."
# endif
# if defined(CUDA_ARCH_BIN_OR_PTX_10)
# error "OpenCV CUDA module doesn't support NVIDIA compute capability 1.0"
# endif
#endif
//! @cond IGNORED
namespace cv { namespace cuda {
CV_EXPORTS cv::String getNppErrorMessage(int code);
CV_EXPORTS cv::String getCudaDriverApiErrorMessage(int code);
CV_EXPORTS GpuMat getInputMat(InputArray _src, Stream& stream);
CV_EXPORTS GpuMat getOutputMat(OutputArray _dst, int rows, int cols, int type, Stream& stream);
static inline GpuMat getOutputMat(OutputArray _dst, Size size, int type, Stream& stream)
{
return getOutputMat(_dst, size.height, size.width, type, stream);
}
CV_EXPORTS void syncOutput(const GpuMat& dst, OutputArray _dst, Stream& stream);
}}
#ifndef HAVE_CUDA
static inline void throw_no_cuda() { CV_Error(cv::Error::GpuNotSupported, "The library is compiled without CUDA support"); }
#else // HAVE_CUDA
static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform"); }
namespace cv { namespace cuda
{
class CV_EXPORTS BufferPool
{
public:
explicit BufferPool(Stream& stream);
GpuMat getBuffer(int rows, int cols, int type);
GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); }
GpuMat::Allocator* getAllocator() const { return allocator_; }
private:
GpuMat::Allocator* allocator_;
};
static inline void checkNppError(int code, const char* file, const int line, const char* func)
{
if (code < 0)
cv::error(cv::Error::GpuApiCallError, getNppErrorMessage(code), func, file, line);
}
static inline void checkCudaDriverApiError(int code, const char* file, const int line, const char* func)
{
if (code != CUDA_SUCCESS)
cv::error(cv::Error::GpuApiCallError, getCudaDriverApiErrorMessage(code), func, file, line);
}
template<int n> struct NPPTypeTraits;
template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };
template<> struct NPPTypeTraits<CV_8S> { typedef Npp8s npp_type; };
template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };
template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };
template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };
template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
template<> struct NPPTypeTraits<CV_64F> { typedef Npp64f npp_type; };
class NppStreamHandler
{
public:
inline explicit NppStreamHandler(Stream& newStream)
{
oldStream = nppGetStream();
nppSetStream(StreamAccessor::getStream(newStream));
}
inline explicit NppStreamHandler(cudaStream_t newStream)
{
oldStream = nppGetStream();
nppSetStream(newStream);
}
inline ~NppStreamHandler()
{
nppSetStream(oldStream);
}
private:
cudaStream_t oldStream;
};
}}
#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV_Func)
#define cuSafeCall(expr) cv::cuda::checkCudaDriverApiError(expr, __FILE__, __LINE__, CV_Func)
#endif // HAVE_CUDA
//! @endcond
#endif // __OPENCV_CORE_CUDA_PRIVATE_HPP__

View File

@ -0,0 +1,296 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_PRIVATE_HPP__
#define __OPENCV_CORE_PRIVATE_HPP__
#ifndef __OPENCV_BUILD
# error this is a private header which should not be used from outside of the OpenCV library
#endif
#include "opencv2/core.hpp"
#include "cvconfig.h"
#ifdef HAVE_EIGEN
# if defined __GNUC__ && defined __APPLE__
# pragma GCC diagnostic ignored "-Wshadow"
# endif
# include <Eigen/Core>
# include "opencv2/core/eigen.hpp"
#endif
#ifdef HAVE_TBB
# include "tbb/tbb_stddef.h"
# if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
# include "tbb/tbb.h"
# include "tbb/task.h"
# undef min
# undef max
# else
# undef HAVE_TBB
# endif
#endif
//! @cond IGNORED
namespace cv
{
#ifdef HAVE_TBB
typedef tbb::blocked_range<int> BlockedRange;
template<typename Body> static inline
void parallel_for( const BlockedRange& range, const Body& body )
{
tbb::parallel_for(range, body);
}
typedef tbb::split Split;
template<typename Body> static inline
void parallel_reduce( const BlockedRange& range, Body& body )
{
tbb::parallel_reduce(range, body);
}
typedef tbb::concurrent_vector<Rect> ConcurrentRectVector;
#else
class BlockedRange
{
public:
BlockedRange() : _begin(0), _end(0), _grainsize(0) {}
BlockedRange(int b, int e, int g=1) : _begin(b), _end(e), _grainsize(g) {}
int begin() const { return _begin; }
int end() const { return _end; }
int grainsize() const { return _grainsize; }
protected:
int _begin, _end, _grainsize;
};
template<typename Body> static inline
void parallel_for( const BlockedRange& range, const Body& body )
{
body(range);
}
typedef std::vector<Rect> ConcurrentRectVector;
class Split {};
template<typename Body> static inline
void parallel_reduce( const BlockedRange& range, Body& body )
{
body(range);
}
#endif
// Returns a static string if there is a parallel framework,
// NULL otherwise.
CV_EXPORTS const char* currentParallelFramework();
} //namespace cv
/****************************************************************************************\
* Common declarations *
\****************************************************************************************/
/* the alignment of all the allocated buffers */
#define CV_MALLOC_ALIGN 16
/* IEEE754 constants and macros */
#define CV_TOGGLE_FLT(x) ((x)^((int)(x) < 0 ? 0x7fffffff : 0))
#define CV_TOGGLE_DBL(x) ((x)^((int64)(x) < 0 ? CV_BIG_INT(0x7fffffffffffffff) : 0))
static inline void* cvAlignPtr( const void* ptr, int align = 32 )
{
CV_DbgAssert ( (align & (align-1)) == 0 );
return (void*)( ((size_t)ptr + align - 1) & ~(size_t)(align-1) );
}
static inline int cvAlign( int size, int align )
{
CV_DbgAssert( (align & (align-1)) == 0 && size < INT_MAX );
return (size + align - 1) & -align;
}
#ifdef IPL_DEPTH_8U
static inline cv::Size cvGetMatSize( const CvMat* mat )
{
return cv::Size(mat->cols, mat->rows);
}
#endif
namespace cv
{
CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int unroll_to = 0);
}
// property implementation macros
#define CV_IMPL_PROPERTY_RO(type, name, member) \
inline type get##name() const { return member; }
#define CV_HELP_IMPL_PROPERTY(r_type, w_type, name, member) \
CV_IMPL_PROPERTY_RO(r_type, name, member) \
inline void set##name(w_type val) { member = val; }
#define CV_HELP_WRAP_PROPERTY(r_type, w_type, name, internal_name, internal_obj) \
r_type get##name() const { return internal_obj.get##internal_name(); } \
void set##name(w_type val) { internal_obj.set##internal_name(val); }
#define CV_IMPL_PROPERTY(type, name, member) CV_HELP_IMPL_PROPERTY(type, type, name, member)
#define CV_IMPL_PROPERTY_S(type, name, member) CV_HELP_IMPL_PROPERTY(type, const type &, name, member)
#define CV_WRAP_PROPERTY(type, name, internal_name, internal_obj) CV_HELP_WRAP_PROPERTY(type, type, name, internal_name, internal_obj)
#define CV_WRAP_PROPERTY_S(type, name, internal_name, internal_obj) CV_HELP_WRAP_PROPERTY(type, const type &, name, internal_name, internal_obj)
#define CV_WRAP_SAME_PROPERTY(type, name, internal_obj) CV_WRAP_PROPERTY(type, name, name, internal_obj)
#define CV_WRAP_SAME_PROPERTY_S(type, name, internal_obj) CV_WRAP_PROPERTY_S(type, name, name, internal_obj)
/****************************************************************************************\
* Structures and macros for integration with IPP *
\****************************************************************************************/
#ifdef HAVE_IPP
# include "ipp.h"
# define IPP_VERSION_X100 (IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR)
#define IPP_ALIGN 32 // required for AVX optimization
#define setIppErrorStatus() cv::ipp::setIppStatus(-1, CV_Func, __FILE__, __LINE__)
static inline IppiSize ippiSize(int width, int height)
{
IppiSize size = { width, height };
return size;
}
static inline IppiSize ippiSize(const cv::Size & _size)
{
IppiSize size = { _size.width, _size.height };
return size;
}
static inline IppiBorderType ippiGetBorderType(int borderTypeNI)
{
return borderTypeNI == cv::BORDER_CONSTANT ? ippBorderConst :
borderTypeNI == cv::BORDER_WRAP ? ippBorderWrap :
borderTypeNI == cv::BORDER_REPLICATE ? ippBorderRepl :
borderTypeNI == cv::BORDER_REFLECT_101 ? ippBorderMirror :
borderTypeNI == cv::BORDER_REFLECT ? ippBorderMirrorR : (IppiBorderType)-1;
}
static inline IppDataType ippiGetDataType(int depth)
{
return depth == CV_8U ? ipp8u :
depth == CV_8S ? ipp8s :
depth == CV_16U ? ipp16u :
depth == CV_16S ? ipp16s :
depth == CV_32S ? ipp32s :
depth == CV_32F ? ipp32f :
depth == CV_64F ? ipp64f : (IppDataType)-1;
}
#else
# define IPP_VERSION_X100 0
#endif
#define CV_IPP_CHECK_COND (cv::ipp::useIPP())
#define CV_IPP_CHECK() if(CV_IPP_CHECK_COND)
#ifndef IPPI_CALL
# define IPPI_CALL(func) CV_Assert((func) >= 0)
#endif
/* IPP-compatible return codes */
typedef enum CvStatus
{
CV_BADMEMBLOCK_ERR = -113,
CV_INPLACE_NOT_SUPPORTED_ERR= -112,
CV_UNMATCHED_ROI_ERR = -111,
CV_NOTFOUND_ERR = -110,
CV_BADCONVERGENCE_ERR = -109,
CV_BADDEPTH_ERR = -107,
CV_BADROI_ERR = -106,
CV_BADHEADER_ERR = -105,
CV_UNMATCHED_FORMATS_ERR = -104,
CV_UNSUPPORTED_COI_ERR = -103,
CV_UNSUPPORTED_CHANNELS_ERR = -102,
CV_UNSUPPORTED_DEPTH_ERR = -101,
CV_UNSUPPORTED_FORMAT_ERR = -100,
CV_BADARG_ERR = -49, //ipp comp
CV_NOTDEFINED_ERR = -48, //ipp comp
CV_BADCHANNELS_ERR = -47, //ipp comp
CV_BADRANGE_ERR = -44, //ipp comp
CV_BADSTEP_ERR = -29, //ipp comp
CV_BADFLAG_ERR = -12,
CV_DIV_BY_ZERO_ERR = -11, //ipp comp
CV_BADCOEF_ERR = -10,
CV_BADFACTOR_ERR = -7,
CV_BADPOINT_ERR = -6,
CV_BADSCALE_ERR = -4,
CV_OUTOFMEM_ERR = -3,
CV_NULLPTR_ERR = -2,
CV_BADSIZE_ERR = -1,
CV_NO_ERR = 0,
CV_OK = CV_NO_ERR
}
CvStatus;
#ifdef HAVE_TEGRA_OPTIMIZATION
namespace tegra {
CV_EXPORTS bool useTegra();
CV_EXPORTS void setUseTegra(bool flag);
}
#endif
//! @endcond
#endif // __OPENCV_CORE_PRIVATE_HPP__

View File

@ -0,0 +1,342 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the copyright holders or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_PTR_INL_HPP__
#define __OPENCV_CORE_PTR_INL_HPP__
#include <algorithm>
//! @cond IGNORED
namespace cv {
template<typename Y>
void DefaultDeleter<Y>::operator () (Y* p) const
{
delete p;
}
namespace detail
{
struct PtrOwner
{
PtrOwner() : refCount(1)
{}
void incRef()
{
CV_XADD(&refCount, 1);
}
void decRef()
{
if (CV_XADD(&refCount, -1) == 1) deleteSelf();
}
protected:
/* This doesn't really need to be virtual, since PtrOwner is never deleted
directly, but it doesn't hurt and it helps avoid warnings. */
virtual ~PtrOwner()
{}
virtual void deleteSelf() = 0;
private:
unsigned int refCount;
// noncopyable
PtrOwner(const PtrOwner&);
PtrOwner& operator = (const PtrOwner&);
};
template<typename Y, typename D>
struct PtrOwnerImpl : PtrOwner
{
PtrOwnerImpl(Y* p, D d) : owned(p), deleter(d)
{}
void deleteSelf()
{
deleter(owned);
delete this;
}
private:
Y* owned;
D deleter;
};
}
template<typename T>
Ptr<T>::Ptr() : owner(NULL), stored(NULL)
{}
template<typename T>
template<typename Y>
Ptr<T>::Ptr(Y* p)
: owner(p
? new detail::PtrOwnerImpl<Y, DefaultDeleter<Y> >(p, DefaultDeleter<Y>())
: NULL),
stored(p)
{}
template<typename T>
template<typename Y, typename D>
Ptr<T>::Ptr(Y* p, D d)
: owner(p
? new detail::PtrOwnerImpl<Y, D>(p, d)
: NULL),
stored(p)
{}
template<typename T>
Ptr<T>::Ptr(const Ptr& o) : owner(o.owner), stored(o.stored)
{
if (owner) owner->incRef();
}
template<typename T>
template<typename Y>
Ptr<T>::Ptr(const Ptr<Y>& o) : owner(o.owner), stored(o.stored)
{
if (owner) owner->incRef();
}
template<typename T>
template<typename Y>
Ptr<T>::Ptr(const Ptr<Y>& o, T* p) : owner(o.owner), stored(p)
{
if (owner) owner->incRef();
}
template<typename T>
Ptr<T>::~Ptr()
{
release();
}
template<typename T>
Ptr<T>& Ptr<T>::operator = (const Ptr<T>& o)
{
Ptr(o).swap(*this);
return *this;
}
template<typename T>
template<typename Y>
Ptr<T>& Ptr<T>::operator = (const Ptr<Y>& o)
{
Ptr(o).swap(*this);
return *this;
}
template<typename T>
void Ptr<T>::release()
{
if (owner) owner->decRef();
owner = NULL;
stored = NULL;
}
template<typename T>
template<typename Y>
void Ptr<T>::reset(Y* p)
{
Ptr(p).swap(*this);
}
template<typename T>
template<typename Y, typename D>
void Ptr<T>::reset(Y* p, D d)
{
Ptr(p, d).swap(*this);
}
template<typename T>
void Ptr<T>::swap(Ptr<T>& o)
{
std::swap(owner, o.owner);
std::swap(stored, o.stored);
}
template<typename T>
T* Ptr<T>::get() const
{
return stored;
}
template<typename T>
typename detail::RefOrVoid<T>::type Ptr<T>::operator * () const
{
return *stored;
}
template<typename T>
T* Ptr<T>::operator -> () const
{
return stored;
}
template<typename T>
Ptr<T>::operator T* () const
{
return stored;
}
template<typename T>
bool Ptr<T>::empty() const
{
return !stored;
}
template<typename T>
template<typename Y>
Ptr<Y> Ptr<T>::staticCast() const
{
return Ptr<Y>(*this, static_cast<Y*>(stored));
}
template<typename T>
template<typename Y>
Ptr<Y> Ptr<T>::constCast() const
{
return Ptr<Y>(*this, const_cast<Y*>(stored));
}
template<typename T>
template<typename Y>
Ptr<Y> Ptr<T>::dynamicCast() const
{
return Ptr<Y>(*this, dynamic_cast<Y*>(stored));
}
template<typename T>
void swap(Ptr<T>& ptr1, Ptr<T>& ptr2){
ptr1.swap(ptr2);
}
template<typename T>
bool operator == (const Ptr<T>& ptr1, const Ptr<T>& ptr2)
{
return ptr1.get() == ptr2.get();
}
template<typename T>
bool operator != (const Ptr<T>& ptr1, const Ptr<T>& ptr2)
{
return ptr1.get() != ptr2.get();
}
template<typename T>
Ptr<T> makePtr()
{
return Ptr<T>(new T());
}
template<typename T, typename A1>
Ptr<T> makePtr(const A1& a1)
{
return Ptr<T>(new T(a1));
}
template<typename T, typename A1, typename A2>
Ptr<T> makePtr(const A1& a1, const A2& a2)
{
return Ptr<T>(new T(a1, a2));
}
template<typename T, typename A1, typename A2, typename A3>
Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3)
{
return Ptr<T>(new T(a1, a2, a3));
}
template<typename T, typename A1, typename A2, typename A3, typename A4>
Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4)
{
return Ptr<T>(new T(a1, a2, a3, a4));
}
template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5>
Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5)
{
return Ptr<T>(new T(a1, a2, a3, a4, a5));
}
template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6>
Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6)
{
return Ptr<T>(new T(a1, a2, a3, a4, a5, a6));
}
template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7>
Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7)
{
return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7));
}
template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8>
Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8)
{
return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8));
}
template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9>
Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9)
{
return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9));
}
template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10>
Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10)
{
return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10));
}
} // namespace cv
//! @endcond
#endif // __OPENCV_CORE_PTR_INL_HPP__

View File

@ -0,0 +1,645 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_SSE_UTILS_HPP__
#define __OPENCV_CORE_SSE_UTILS_HPP__
#ifndef __cplusplus
# error sse_utils.hpp header must be compiled as C++
#endif
#if CV_SSE2
inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
{
__m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_g0);
__m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_g0);
__m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_g1);
__m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_g1);
__m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk2);
__m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk2);
__m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk3);
__m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk3);
__m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk2);
__m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk2);
__m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk3);
__m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk3);
__m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk2);
__m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk2);
__m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk3);
__m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk3);
v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk2);
v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk2);
v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk3);
v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk3);
}
inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
__m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
{
__m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_g1);
__m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_g1);
__m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_b0);
__m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_b0);
__m128i layer1_chunk4 = _mm_unpacklo_epi8(v_g0, v_b1);
__m128i layer1_chunk5 = _mm_unpackhi_epi8(v_g0, v_b1);
__m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk3);
__m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk3);
__m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk4);
__m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk4);
__m128i layer2_chunk4 = _mm_unpacklo_epi8(layer1_chunk2, layer1_chunk5);
__m128i layer2_chunk5 = _mm_unpackhi_epi8(layer1_chunk2, layer1_chunk5);
__m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk3);
__m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk3);
__m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk4);
__m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk4);
__m128i layer3_chunk4 = _mm_unpacklo_epi8(layer2_chunk2, layer2_chunk5);
__m128i layer3_chunk5 = _mm_unpackhi_epi8(layer2_chunk2, layer2_chunk5);
__m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk3);
__m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk3);
__m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk4);
__m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk4);
__m128i layer4_chunk4 = _mm_unpacklo_epi8(layer3_chunk2, layer3_chunk5);
__m128i layer4_chunk5 = _mm_unpackhi_epi8(layer3_chunk2, layer3_chunk5);
v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk3);
v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk3);
v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk4);
v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk4);
v_b0 = _mm_unpacklo_epi8(layer4_chunk2, layer4_chunk5);
v_b1 = _mm_unpackhi_epi8(layer4_chunk2, layer4_chunk5);
}
inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
{
__m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_b0);
__m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_b0);
__m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_b1);
__m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_b1);
__m128i layer1_chunk4 = _mm_unpacklo_epi8(v_g0, v_a0);
__m128i layer1_chunk5 = _mm_unpackhi_epi8(v_g0, v_a0);
__m128i layer1_chunk6 = _mm_unpacklo_epi8(v_g1, v_a1);
__m128i layer1_chunk7 = _mm_unpackhi_epi8(v_g1, v_a1);
__m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk4);
__m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk4);
__m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk5);
__m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk5);
__m128i layer2_chunk4 = _mm_unpacklo_epi8(layer1_chunk2, layer1_chunk6);
__m128i layer2_chunk5 = _mm_unpackhi_epi8(layer1_chunk2, layer1_chunk6);
__m128i layer2_chunk6 = _mm_unpacklo_epi8(layer1_chunk3, layer1_chunk7);
__m128i layer2_chunk7 = _mm_unpackhi_epi8(layer1_chunk3, layer1_chunk7);
__m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk4);
__m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk4);
__m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk5);
__m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk5);
__m128i layer3_chunk4 = _mm_unpacklo_epi8(layer2_chunk2, layer2_chunk6);
__m128i layer3_chunk5 = _mm_unpackhi_epi8(layer2_chunk2, layer2_chunk6);
__m128i layer3_chunk6 = _mm_unpacklo_epi8(layer2_chunk3, layer2_chunk7);
__m128i layer3_chunk7 = _mm_unpackhi_epi8(layer2_chunk3, layer2_chunk7);
__m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk4);
__m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk4);
__m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk5);
__m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk5);
__m128i layer4_chunk4 = _mm_unpacklo_epi8(layer3_chunk2, layer3_chunk6);
__m128i layer4_chunk5 = _mm_unpackhi_epi8(layer3_chunk2, layer3_chunk6);
__m128i layer4_chunk6 = _mm_unpacklo_epi8(layer3_chunk3, layer3_chunk7);
__m128i layer4_chunk7 = _mm_unpackhi_epi8(layer3_chunk3, layer3_chunk7);
v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk4);
v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk4);
v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk5);
v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk5);
v_b0 = _mm_unpacklo_epi8(layer4_chunk2, layer4_chunk6);
v_b1 = _mm_unpackhi_epi8(layer4_chunk2, layer4_chunk6);
v_a0 = _mm_unpacklo_epi8(layer4_chunk3, layer4_chunk7);
v_a1 = _mm_unpackhi_epi8(layer4_chunk3, layer4_chunk7);
}
inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
{
__m128i v_mask = _mm_set1_epi16(0x00ff);
__m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
__m128i layer4_chunk2 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8));
__m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
__m128i layer4_chunk3 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8));
__m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask));
__m128i layer3_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8));
__m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask));
__m128i layer3_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8));
__m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
__m128i layer2_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8));
__m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
__m128i layer2_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8));
__m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
__m128i layer1_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8));
__m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
__m128i layer1_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8));
v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
v_g0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8));
v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
v_g1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8));
}
inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
__m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
{
__m128i v_mask = _mm_set1_epi16(0x00ff);
__m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
__m128i layer4_chunk3 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8));
__m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
__m128i layer4_chunk4 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8));
__m128i layer4_chunk2 = _mm_packus_epi16(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask));
__m128i layer4_chunk5 = _mm_packus_epi16(_mm_srli_epi16(v_b0, 8), _mm_srli_epi16(v_b1, 8));
__m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask));
__m128i layer3_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8));
__m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask));
__m128i layer3_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8));
__m128i layer3_chunk2 = _mm_packus_epi16(_mm_and_si128(layer4_chunk4, v_mask), _mm_and_si128(layer4_chunk5, v_mask));
__m128i layer3_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk4, 8), _mm_srli_epi16(layer4_chunk5, 8));
__m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
__m128i layer2_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8));
__m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
__m128i layer2_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8));
__m128i layer2_chunk2 = _mm_packus_epi16(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
__m128i layer2_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk4, 8), _mm_srli_epi16(layer3_chunk5, 8));
__m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
__m128i layer1_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8));
__m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
__m128i layer1_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8));
__m128i layer1_chunk2 = _mm_packus_epi16(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
__m128i layer1_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk4, 8), _mm_srli_epi16(layer2_chunk5, 8));
v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
v_g1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8));
v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
v_b0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8));
v_g0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
v_b1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk4, 8), _mm_srli_epi16(layer1_chunk5, 8));
}
inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
{
__m128i v_mask = _mm_set1_epi16(0x00ff);
__m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
__m128i layer4_chunk4 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8));
__m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
__m128i layer4_chunk5 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8));
__m128i layer4_chunk2 = _mm_packus_epi16(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask));
__m128i layer4_chunk6 = _mm_packus_epi16(_mm_srli_epi16(v_b0, 8), _mm_srli_epi16(v_b1, 8));
__m128i layer4_chunk3 = _mm_packus_epi16(_mm_and_si128(v_a0, v_mask), _mm_and_si128(v_a1, v_mask));
__m128i layer4_chunk7 = _mm_packus_epi16(_mm_srli_epi16(v_a0, 8), _mm_srli_epi16(v_a1, 8));
__m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask));
__m128i layer3_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8));
__m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask));
__m128i layer3_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8));
__m128i layer3_chunk2 = _mm_packus_epi16(_mm_and_si128(layer4_chunk4, v_mask), _mm_and_si128(layer4_chunk5, v_mask));
__m128i layer3_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk4, 8), _mm_srli_epi16(layer4_chunk5, 8));
__m128i layer3_chunk3 = _mm_packus_epi16(_mm_and_si128(layer4_chunk6, v_mask), _mm_and_si128(layer4_chunk7, v_mask));
__m128i layer3_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk6, 8), _mm_srli_epi16(layer4_chunk7, 8));
__m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
__m128i layer2_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8));
__m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
__m128i layer2_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8));
__m128i layer2_chunk2 = _mm_packus_epi16(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
__m128i layer2_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk4, 8), _mm_srli_epi16(layer3_chunk5, 8));
__m128i layer2_chunk3 = _mm_packus_epi16(_mm_and_si128(layer3_chunk6, v_mask), _mm_and_si128(layer3_chunk7, v_mask));
__m128i layer2_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk6, 8), _mm_srli_epi16(layer3_chunk7, 8));
__m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
__m128i layer1_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8));
__m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
__m128i layer1_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8));
__m128i layer1_chunk2 = _mm_packus_epi16(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
__m128i layer1_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk4, 8), _mm_srli_epi16(layer2_chunk5, 8));
__m128i layer1_chunk3 = _mm_packus_epi16(_mm_and_si128(layer2_chunk6, v_mask), _mm_and_si128(layer2_chunk7, v_mask));
__m128i layer1_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk6, 8), _mm_srli_epi16(layer2_chunk7, 8));
v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
v_b0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8));
v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
v_b1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8));
v_g0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
v_a0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk4, 8), _mm_srli_epi16(layer1_chunk5, 8));
v_g1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk6, v_mask), _mm_and_si128(layer1_chunk7, v_mask));
v_a1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk6, 8), _mm_srli_epi16(layer1_chunk7, 8));
}
inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
{
__m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g0);
__m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g0);
__m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_g1);
__m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_g1);
__m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk2);
__m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk2);
__m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk3);
__m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk3);
__m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk2);
__m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk2);
__m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk3);
__m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk3);
v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk2);
v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk2);
v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk3);
v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk3);
}
inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
__m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
{
__m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g1);
__m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g1);
__m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_b0);
__m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_b0);
__m128i layer1_chunk4 = _mm_unpacklo_epi16(v_g0, v_b1);
__m128i layer1_chunk5 = _mm_unpackhi_epi16(v_g0, v_b1);
__m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk3);
__m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk3);
__m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk4);
__m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk4);
__m128i layer2_chunk4 = _mm_unpacklo_epi16(layer1_chunk2, layer1_chunk5);
__m128i layer2_chunk5 = _mm_unpackhi_epi16(layer1_chunk2, layer1_chunk5);
__m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk3);
__m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk3);
__m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk4);
__m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk4);
__m128i layer3_chunk4 = _mm_unpacklo_epi16(layer2_chunk2, layer2_chunk5);
__m128i layer3_chunk5 = _mm_unpackhi_epi16(layer2_chunk2, layer2_chunk5);
v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk3);
v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk3);
v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk4);
v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk4);
v_b0 = _mm_unpacklo_epi16(layer3_chunk2, layer3_chunk5);
v_b1 = _mm_unpackhi_epi16(layer3_chunk2, layer3_chunk5);
}
inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
{
__m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_b0);
__m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_b0);
__m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_b1);
__m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_b1);
__m128i layer1_chunk4 = _mm_unpacklo_epi16(v_g0, v_a0);
__m128i layer1_chunk5 = _mm_unpackhi_epi16(v_g0, v_a0);
__m128i layer1_chunk6 = _mm_unpacklo_epi16(v_g1, v_a1);
__m128i layer1_chunk7 = _mm_unpackhi_epi16(v_g1, v_a1);
__m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk4);
__m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk4);
__m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk5);
__m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk5);
__m128i layer2_chunk4 = _mm_unpacklo_epi16(layer1_chunk2, layer1_chunk6);
__m128i layer2_chunk5 = _mm_unpackhi_epi16(layer1_chunk2, layer1_chunk6);
__m128i layer2_chunk6 = _mm_unpacklo_epi16(layer1_chunk3, layer1_chunk7);
__m128i layer2_chunk7 = _mm_unpackhi_epi16(layer1_chunk3, layer1_chunk7);
__m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk4);
__m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk4);
__m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk5);
__m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk5);
__m128i layer3_chunk4 = _mm_unpacklo_epi16(layer2_chunk2, layer2_chunk6);
__m128i layer3_chunk5 = _mm_unpackhi_epi16(layer2_chunk2, layer2_chunk6);
__m128i layer3_chunk6 = _mm_unpacklo_epi16(layer2_chunk3, layer2_chunk7);
__m128i layer3_chunk7 = _mm_unpackhi_epi16(layer2_chunk3, layer2_chunk7);
v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk4);
v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk4);
v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk5);
v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk5);
v_b0 = _mm_unpacklo_epi16(layer3_chunk2, layer3_chunk6);
v_b1 = _mm_unpackhi_epi16(layer3_chunk2, layer3_chunk6);
v_a0 = _mm_unpacklo_epi16(layer3_chunk3, layer3_chunk7);
v_a1 = _mm_unpackhi_epi16(layer3_chunk3, layer3_chunk7);
}
#if CV_SSE4_1
inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
{
__m128i v_mask = _mm_set1_epi32(0x0000ffff);
__m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
__m128i layer3_chunk2 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16));
__m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
__m128i layer3_chunk3 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16));
__m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
__m128i layer2_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
__m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
__m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
__m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
__m128i layer1_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
__m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
__m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
v_g0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
v_g1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16));
}
inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
__m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
{
__m128i v_mask = _mm_set1_epi32(0x0000ffff);
__m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
__m128i layer3_chunk3 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16));
__m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
__m128i layer3_chunk4 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16));
__m128i layer3_chunk2 = _mm_packus_epi32(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask));
__m128i layer3_chunk5 = _mm_packus_epi32(_mm_srli_epi32(v_b0, 16), _mm_srli_epi32(v_b1, 16));
__m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
__m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
__m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
__m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
__m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
__m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16));
__m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
__m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
__m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
__m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
__m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
__m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16));
v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
v_g1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
v_b0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16));
v_g0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
v_b1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk4, 16), _mm_srli_epi32(layer1_chunk5, 16));
}
inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
{
__m128i v_mask = _mm_set1_epi32(0x0000ffff);
__m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
__m128i layer3_chunk4 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16));
__m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
__m128i layer3_chunk5 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16));
__m128i layer3_chunk2 = _mm_packus_epi32(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask));
__m128i layer3_chunk6 = _mm_packus_epi32(_mm_srli_epi32(v_b0, 16), _mm_srli_epi32(v_b1, 16));
__m128i layer3_chunk3 = _mm_packus_epi32(_mm_and_si128(v_a0, v_mask), _mm_and_si128(v_a1, v_mask));
__m128i layer3_chunk7 = _mm_packus_epi32(_mm_srli_epi32(v_a0, 16), _mm_srli_epi32(v_a1, 16));
__m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
__m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
__m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
__m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
__m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
__m128i layer2_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16));
__m128i layer2_chunk3 = _mm_packus_epi32(_mm_and_si128(layer3_chunk6, v_mask), _mm_and_si128(layer3_chunk7, v_mask));
__m128i layer2_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk6, 16), _mm_srli_epi32(layer3_chunk7, 16));
__m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
__m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
__m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
__m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
__m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
__m128i layer1_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16));
__m128i layer1_chunk3 = _mm_packus_epi32(_mm_and_si128(layer2_chunk6, v_mask), _mm_and_si128(layer2_chunk7, v_mask));
__m128i layer1_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk6, 16), _mm_srli_epi32(layer2_chunk7, 16));
v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
v_b0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
v_b1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16));
v_g0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
v_a0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk4, 16), _mm_srli_epi32(layer1_chunk5, 16));
v_g1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk6, v_mask), _mm_and_si128(layer1_chunk7, v_mask));
v_a1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk6, 16), _mm_srli_epi32(layer1_chunk7, 16));
}
#endif // CV_SSE4_1
inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1)
{
__m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g0);
__m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g0);
__m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_g1);
__m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_g1);
__m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk2);
__m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk2);
__m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk3);
__m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk3);
v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk2);
v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk2);
v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk3);
v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk3);
}
inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
__m128 & v_g1, __m128 & v_b0, __m128 & v_b1)
{
__m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g1);
__m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g1);
__m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b0);
__m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_b0);
__m128 layer1_chunk4 = _mm_unpacklo_ps(v_g0, v_b1);
__m128 layer1_chunk5 = _mm_unpackhi_ps(v_g0, v_b1);
__m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk3);
__m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk3);
__m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk4);
__m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk4);
__m128 layer2_chunk4 = _mm_unpacklo_ps(layer1_chunk2, layer1_chunk5);
__m128 layer2_chunk5 = _mm_unpackhi_ps(layer1_chunk2, layer1_chunk5);
v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk3);
v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk3);
v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk4);
v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk4);
v_b0 = _mm_unpacklo_ps(layer2_chunk2, layer2_chunk5);
v_b1 = _mm_unpackhi_ps(layer2_chunk2, layer2_chunk5);
}
inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1,
__m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1)
{
__m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_b0);
__m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_b0);
__m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b1);
__m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_b1);
__m128 layer1_chunk4 = _mm_unpacklo_ps(v_g0, v_a0);
__m128 layer1_chunk5 = _mm_unpackhi_ps(v_g0, v_a0);
__m128 layer1_chunk6 = _mm_unpacklo_ps(v_g1, v_a1);
__m128 layer1_chunk7 = _mm_unpackhi_ps(v_g1, v_a1);
__m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk4);
__m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk4);
__m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk5);
__m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk5);
__m128 layer2_chunk4 = _mm_unpacklo_ps(layer1_chunk2, layer1_chunk6);
__m128 layer2_chunk5 = _mm_unpackhi_ps(layer1_chunk2, layer1_chunk6);
__m128 layer2_chunk6 = _mm_unpacklo_ps(layer1_chunk3, layer1_chunk7);
__m128 layer2_chunk7 = _mm_unpackhi_ps(layer1_chunk3, layer1_chunk7);
v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk4);
v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk4);
v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk5);
v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk5);
v_b0 = _mm_unpacklo_ps(layer2_chunk2, layer2_chunk6);
v_b1 = _mm_unpackhi_ps(layer2_chunk2, layer2_chunk6);
v_a0 = _mm_unpacklo_ps(layer2_chunk3, layer2_chunk7);
v_a1 = _mm_unpackhi_ps(layer2_chunk3, layer2_chunk7);
}
inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1)
{
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
__m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
__m128 layer2_chunk2 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
__m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo);
__m128 layer2_chunk3 = _mm_shuffle_ps(v_g0, v_g1, mask_hi);
__m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo);
__m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi);
__m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo);
__m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi);
v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo);
v_g0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi);
v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo);
v_g1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi);
}
inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
__m128 & v_g1, __m128 & v_b0, __m128 & v_b1)
{
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
__m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
__m128 layer2_chunk3 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
__m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo);
__m128 layer2_chunk4 = _mm_shuffle_ps(v_g0, v_g1, mask_hi);
__m128 layer2_chunk2 = _mm_shuffle_ps(v_b0, v_b1, mask_lo);
__m128 layer2_chunk5 = _mm_shuffle_ps(v_b0, v_b1, mask_hi);
__m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo);
__m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi);
__m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo);
__m128 layer1_chunk4 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi);
__m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_lo);
__m128 layer1_chunk5 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_hi);
v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo);
v_g1 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi);
v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo);
v_b0 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi);
v_g0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_lo);
v_b1 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_hi);
}
inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1,
__m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1)
{
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
__m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
__m128 layer2_chunk4 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
__m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo);
__m128 layer2_chunk5 = _mm_shuffle_ps(v_g0, v_g1, mask_hi);
__m128 layer2_chunk2 = _mm_shuffle_ps(v_b0, v_b1, mask_lo);
__m128 layer2_chunk6 = _mm_shuffle_ps(v_b0, v_b1, mask_hi);
__m128 layer2_chunk3 = _mm_shuffle_ps(v_a0, v_a1, mask_lo);
__m128 layer2_chunk7 = _mm_shuffle_ps(v_a0, v_a1, mask_hi);
__m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo);
__m128 layer1_chunk4 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi);
__m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo);
__m128 layer1_chunk5 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi);
__m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_lo);
__m128 layer1_chunk6 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_hi);
__m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk6, layer2_chunk7, mask_lo);
__m128 layer1_chunk7 = _mm_shuffle_ps(layer2_chunk6, layer2_chunk7, mask_hi);
v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo);
v_b0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi);
v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo);
v_b1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi);
v_g0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_lo);
v_a0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_hi);
v_g1 = _mm_shuffle_ps(layer1_chunk6, layer1_chunk7, mask_lo);
v_a1 = _mm_shuffle_ps(layer1_chunk6, layer1_chunk7, mask_hi);
}
#endif // CV_SSE2
#endif //__OPENCV_CORE_SSE_UTILS_HPP__

326
3rdparty/include/opencv2/core/traits.hpp vendored Normal file
View File

@ -0,0 +1,326 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_TRAITS_HPP__
#define __OPENCV_CORE_TRAITS_HPP__
#include "opencv2/core/cvdef.h"
namespace cv
{
//! @addtogroup core_basic
//! @{
/** @brief Template "trait" class for OpenCV primitive data types.
A primitive OpenCV data type is one of unsigned char, bool, signed char, unsigned short, signed
short, int, float, double, or a tuple of values of one of these types, where all the values in the
tuple have the same type. Any primitive type from the list can be defined by an identifier in the
form CV_\<bit-depth\>{U|S|F}C(\<number_of_channels\>), for example: uchar \~ CV_8UC1, 3-element
floating-point tuple \~ CV_32FC3, and so on. A universal OpenCV structure that is able to store a
single instance of such a primitive data type is Vec. Multiple instances of such a type can be
stored in a std::vector, Mat, Mat_, SparseMat, SparseMat_, or any other container that is able to
store Vec instances.
The DataType class is basically used to provide a description of such primitive data types without
adding any fields or methods to the corresponding classes (and it is actually impossible to add
anything to primitive C/C++ data types). This technique is known in C++ as class traits. It is not
DataType itself that is used but its specialized versions, such as:
@code
template<> class DataType<uchar>
{
typedef uchar value_type;
typedef int work_type;
typedef uchar channel_type;
enum { channel_type = CV_8U, channels = 1, fmt='u', type = CV_8U };
};
...
template<typename _Tp> DataType<std::complex<_Tp> >
{
typedef std::complex<_Tp> value_type;
typedef std::complex<_Tp> work_type;
typedef _Tp channel_type;
// DataDepth is another helper trait class
enum { depth = DataDepth<_Tp>::value, channels=2,
fmt=(channels-1)*256+DataDepth<_Tp>::fmt,
type=CV_MAKETYPE(depth, channels) };
};
...
@endcode
The main purpose of this class is to convert compilation-time type information to an
OpenCV-compatible data type identifier, for example:
@code
// allocates a 30x40 floating-point matrix
Mat A(30, 40, DataType<float>::type);
Mat B = Mat_<std::complex<double> >(3, 3);
// the statement below will print 6, 2 , that is depth == CV_64F, channels == 2
cout << B.depth() << ", " << B.channels() << endl;
@endcode
So, such traits are used to tell OpenCV which data type you are working with, even if such a type is
not native to OpenCV. For example, the matrix B initialization above is compiled because OpenCV
defines the proper specialized template class DataType\<complex\<_Tp\> \> . This mechanism is also
useful (and used in OpenCV this way) for generic algorithms implementations.
*/
template<typename _Tp> class DataType
{
public:
typedef _Tp value_type;
typedef value_type work_type;
typedef value_type channel_type;
typedef value_type vec_type;
enum { generic_type = 1,
depth = -1,
channels = 1,
fmt = 0,
type = CV_MAKETYPE(depth, channels)
};
};
template<> class DataType<bool>
{
public:
typedef bool value_type;
typedef int work_type;
typedef value_type channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = CV_8U,
channels = 1,
fmt = (int)'u',
type = CV_MAKETYPE(depth, channels)
};
};
template<> class DataType<uchar>
{
public:
typedef uchar value_type;
typedef int work_type;
typedef value_type channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = CV_8U,
channels = 1,
fmt = (int)'u',
type = CV_MAKETYPE(depth, channels)
};
};
template<> class DataType<schar>
{
public:
typedef schar value_type;
typedef int work_type;
typedef value_type channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = CV_8S,
channels = 1,
fmt = (int)'c',
type = CV_MAKETYPE(depth, channels)
};
};
template<> class DataType<char>
{
public:
typedef schar value_type;
typedef int work_type;
typedef value_type channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = CV_8S,
channels = 1,
fmt = (int)'c',
type = CV_MAKETYPE(depth, channels)
};
};
template<> class DataType<ushort>
{
public:
typedef ushort value_type;
typedef int work_type;
typedef value_type channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = CV_16U,
channels = 1,
fmt = (int)'w',
type = CV_MAKETYPE(depth, channels)
};
};
template<> class DataType<short>
{
public:
typedef short value_type;
typedef int work_type;
typedef value_type channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = CV_16S,
channels = 1,
fmt = (int)'s',
type = CV_MAKETYPE(depth, channels)
};
};
template<> class DataType<int>
{
public:
typedef int value_type;
typedef value_type work_type;
typedef value_type channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = CV_32S,
channels = 1,
fmt = (int)'i',
type = CV_MAKETYPE(depth, channels)
};
};
template<> class DataType<float>
{
public:
typedef float value_type;
typedef value_type work_type;
typedef value_type channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = CV_32F,
channels = 1,
fmt = (int)'f',
type = CV_MAKETYPE(depth, channels)
};
};
template<> class DataType<double>
{
public:
typedef double value_type;
typedef value_type work_type;
typedef value_type channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = CV_64F,
channels = 1,
fmt = (int)'d',
type = CV_MAKETYPE(depth, channels)
};
};
/** @brief A helper class for cv::DataType
The class is specialized for each fundamental numerical data type supported by OpenCV. It provides
DataDepth<T>::value constant.
*/
template<typename _Tp> class DataDepth
{
public:
enum
{
value = DataType<_Tp>::depth,
fmt = DataType<_Tp>::fmt
};
};
template<int _depth> class TypeDepth
{
enum { depth = CV_USRTYPE1 };
typedef void value_type;
};
template<> class TypeDepth<CV_8U>
{
enum { depth = CV_8U };
typedef uchar value_type;
};
template<> class TypeDepth<CV_8S>
{
enum { depth = CV_8S };
typedef schar value_type;
};
template<> class TypeDepth<CV_16U>
{
enum { depth = CV_16U };
typedef ushort value_type;
};
template<> class TypeDepth<CV_16S>
{
enum { depth = CV_16S };
typedef short value_type;
};
template<> class TypeDepth<CV_32S>
{
enum { depth = CV_32S };
typedef int value_type;
};
template<> class TypeDepth<CV_32F>
{
enum { depth = CV_32F };
typedef float value_type;
};
template<> class TypeDepth<CV_64F>
{
enum { depth = CV_64F };
typedef double value_type;
};
//! @}
} // cv
#endif // __OPENCV_CORE_TRAITS_HPP__

2228
3rdparty/include/opencv2/core/types.hpp vendored Normal file

File diff suppressed because it is too large Load Diff

1834
3rdparty/include/opencv2/core/types_c.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,889 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_UTILITY_H__
#define __OPENCV_CORE_UTILITY_H__
#ifndef __cplusplus
# error utility.hpp header must be compiled as C++
#endif
#include "opencv2/core.hpp"
namespace cv
{
#ifdef CV_COLLECT_IMPL_DATA
CV_EXPORTS void setImpl(int flags); // set implementation flags and reset storage arrays
CV_EXPORTS void addImpl(int flag, const char* func = 0); // add implementation and function name to storage arrays
// Get stored implementation flags and fucntions names arrays
// Each implementation entry correspond to function name entry, so you can find which implementation was executed in which fucntion
CV_EXPORTS int getImpl(std::vector<int> &impl, std::vector<String> &funName);
CV_EXPORTS bool useCollection(); // return implementation collection state
CV_EXPORTS void setUseCollection(bool flag); // set implementation collection state
#define CV_IMPL_PLAIN 0x01 // native CPU OpenCV implementation
#define CV_IMPL_OCL 0x02 // OpenCL implementation
#define CV_IMPL_IPP 0x04 // IPP implementation
#define CV_IMPL_MT 0x10 // multithreaded implementation
#define CV_IMPL_ADD(impl) \
if(cv::useCollection()) \
{ \
cv::addImpl(impl, CV_Func); \
}
#else
#define CV_IMPL_ADD(impl)
#endif
//! @addtogroup core_utils
//! @{
/** @brief Automatically Allocated Buffer Class
The class is used for temporary buffers in functions and methods.
If a temporary buffer is usually small (a few K's of memory),
but its size depends on the parameters, it makes sense to create a small
fixed-size array on stack and use it if it's large enough. If the required buffer size
is larger than the fixed size, another buffer of sufficient size is allocated dynamically
and released after the processing. Therefore, in typical cases, when the buffer size is small,
there is no overhead associated with malloc()/free().
At the same time, there is no limit on the size of processed data.
This is what AutoBuffer does. The template takes 2 parameters - type of the buffer elements and
the number of stack-allocated elements. Here is how the class is used:
\code
void my_func(const cv::Mat& m)
{
cv::AutoBuffer<float> buf; // create automatic buffer containing 1000 floats
buf.allocate(m.rows); // if m.rows <= 1000, the pre-allocated buffer is used,
// otherwise the buffer of "m.rows" floats will be allocated
// dynamically and deallocated in cv::AutoBuffer destructor
...
}
\endcode
*/
template<typename _Tp, size_t fixed_size = 1024/sizeof(_Tp)+8> class AutoBuffer
{
public:
typedef _Tp value_type;
//! the default constructor
AutoBuffer();
//! constructor taking the real buffer size
AutoBuffer(size_t _size);
//! the copy constructor
AutoBuffer(const AutoBuffer<_Tp, fixed_size>& buf);
//! the assignment operator
AutoBuffer<_Tp, fixed_size>& operator = (const AutoBuffer<_Tp, fixed_size>& buf);
//! destructor. calls deallocate()
~AutoBuffer();
//! allocates the new buffer of size _size. if the _size is small enough, stack-allocated buffer is used
void allocate(size_t _size);
//! deallocates the buffer if it was dynamically allocated
void deallocate();
//! resizes the buffer and preserves the content
void resize(size_t _size);
//! returns the current buffer size
size_t size() const;
//! returns pointer to the real buffer, stack-allocated or head-allocated
operator _Tp* ();
//! returns read-only pointer to the real buffer, stack-allocated or head-allocated
operator const _Tp* () const;
protected:
//! pointer to the real buffer, can point to buf if the buffer is small enough
_Tp* ptr;
//! size of the real buffer
size_t sz;
//! pre-allocated buffer. At least 1 element to confirm C++ standard reqirements
_Tp buf[(fixed_size > 0) ? fixed_size : 1];
};
/** @brief Sets/resets the break-on-error mode.
When the break-on-error mode is set, the default error handler issues a hardware exception, which
can make debugging more convenient.
\return the previous state
*/
CV_EXPORTS bool setBreakOnError(bool flag);
extern "C" typedef int (*ErrorCallback)( int status, const char* func_name,
const char* err_msg, const char* file_name,
int line, void* userdata );
/** @brief Sets the new error handler and the optional user data.
The function sets the new error handler, called from cv::error().
\param errCallback the new error handler. If NULL, the default error handler is used.
\param userdata the optional user data pointer, passed to the callback.
\param prevUserdata the optional output parameter where the previous user data pointer is stored
\return the previous error handler
*/
CV_EXPORTS ErrorCallback redirectError( ErrorCallback errCallback, void* userdata=0, void** prevUserdata=0);
/** @brief Returns a text string formatted using the printf-like expression.
The function acts like sprintf but forms and returns an STL string. It can be used to form an error
message in the Exception constructor.
@param fmt printf-compatible formatting specifiers.
*/
CV_EXPORTS String format( const char* fmt, ... );
CV_EXPORTS String tempfile( const char* suffix = 0);
CV_EXPORTS void glob(String pattern, std::vector<String>& result, bool recursive = false);
/** @brief OpenCV will try to set the number of threads for the next parallel region.
If threads == 0, OpenCV will disable threading optimizations and run all it's functions
sequentially. Passing threads \< 0 will reset threads number to system default. This function must
be called outside of parallel region.
OpenCV will try to run it's functions with specified threads number, but some behaviour differs from
framework:
- `TBB` User-defined parallel constructions will run with the same threads number, if
another does not specified. If late on user creates own scheduler, OpenCV will be use it.
- `OpenMP` No special defined behaviour.
- `Concurrency` If threads == 1, OpenCV will disable threading optimizations and run it's
functions sequentially.
- `GCD` Supports only values \<= 0.
- `C=` No special defined behaviour.
@param nthreads Number of threads used by OpenCV.
@sa getNumThreads, getThreadNum
*/
CV_EXPORTS void setNumThreads(int nthreads);
/** @brief Returns the number of threads used by OpenCV for parallel regions.
Always returns 1 if OpenCV is built without threading support.
The exact meaning of return value depends on the threading framework used by OpenCV library:
- `TBB` The number of threads, that OpenCV will try to use for parallel regions. If there is
any tbb::thread_scheduler_init in user code conflicting with OpenCV, then function returns
default number of threads used by TBB library.
- `OpenMP` An upper bound on the number of threads that could be used to form a new team.
- `Concurrency` The number of threads, that OpenCV will try to use for parallel regions.
- `GCD` Unsupported; returns the GCD thread pool limit (512) for compatibility.
- `C=` The number of threads, that OpenCV will try to use for parallel regions, if before
called setNumThreads with threads \> 0, otherwise returns the number of logical CPUs,
available for the process.
@sa setNumThreads, getThreadNum
*/
CV_EXPORTS int getNumThreads();
/** @brief Returns the index of the currently executed thread within the current parallel region. Always
returns 0 if called outside of parallel region.
The exact meaning of return value depends on the threading framework used by OpenCV library:
- `TBB` Unsupported with current 4.1 TBB release. May be will be supported in future.
- `OpenMP` The thread number, within the current team, of the calling thread.
- `Concurrency` An ID for the virtual processor that the current context is executing on (0
for master thread and unique number for others, but not necessary 1,2,3,...).
- `GCD` System calling thread's ID. Never returns 0 inside parallel region.
- `C=` The index of the current parallel task.
@sa setNumThreads, getNumThreads
*/
CV_EXPORTS int getThreadNum();
/** @brief Returns full configuration time cmake output.
Returned value is raw cmake output including version control system revision, compiler version,
compiler flags, enabled modules and third party libraries, etc. Output format depends on target
architecture.
*/
CV_EXPORTS_W const String& getBuildInformation();
/** @brief Returns the number of ticks.
The function returns the number of ticks after the certain event (for example, when the machine was
turned on). It can be used to initialize RNG or to measure a function execution time by reading the
tick count before and after the function call. See also the tick frequency.
*/
CV_EXPORTS_W int64 getTickCount();
/** @brief Returns the number of ticks per second.
The function returns the number of ticks per second. That is, the following code computes the
execution time in seconds:
@code
double t = (double)getTickCount();
// do something ...
t = ((double)getTickCount() - t)/getTickFrequency();
@endcode
*/
CV_EXPORTS_W double getTickFrequency();
/** @brief Returns the number of CPU ticks.
The function returns the current number of CPU ticks on some architectures (such as x86, x64,
PowerPC). On other platforms the function is equivalent to getTickCount. It can also be used for
very accurate time measurements, as well as for RNG initialization. Note that in case of multi-CPU
systems a thread, from which getCPUTickCount is called, can be suspended and resumed at another CPU
with its own counter. So, theoretically (and practically) the subsequent calls to the function do
not necessary return the monotonously increasing values. Also, since a modern CPU varies the CPU
frequency depending on the load, the number of CPU clocks spent in some code cannot be directly
converted to time units. Therefore, getTickCount is generally a preferable solution for measuring
execution time.
*/
CV_EXPORTS_W int64 getCPUTickCount();
/** @brief Available CPU features.
remember to keep this list identical to the one in cvdef.h
*/
enum CpuFeatures {
CPU_MMX = 1,
CPU_SSE = 2,
CPU_SSE2 = 3,
CPU_SSE3 = 4,
CPU_SSSE3 = 5,
CPU_SSE4_1 = 6,
CPU_SSE4_2 = 7,
CPU_POPCNT = 8,
CPU_AVX = 10,
CPU_AVX2 = 11,
CPU_FMA3 = 12,
CPU_AVX_512F = 13,
CPU_AVX_512BW = 14,
CPU_AVX_512CD = 15,
CPU_AVX_512DQ = 16,
CPU_AVX_512ER = 17,
CPU_AVX_512IFMA512 = 18,
CPU_AVX_512PF = 19,
CPU_AVX_512VBMI = 20,
CPU_AVX_512VL = 21,
CPU_NEON = 100
};
/** @brief Returns true if the specified feature is supported by the host hardware.
The function returns true if the host hardware supports the specified feature. When user calls
setUseOptimized(false), the subsequent calls to checkHardwareSupport() will return false until
setUseOptimized(true) is called. This way user can dynamically switch on and off the optimized code
in OpenCV.
@param feature The feature of interest, one of cv::CpuFeatures
*/
CV_EXPORTS_W bool checkHardwareSupport(int feature);
/** @brief Returns the number of logical CPUs available for the process.
*/
CV_EXPORTS_W int getNumberOfCPUs();
/** @brief Aligns a pointer to the specified number of bytes.
The function returns the aligned pointer of the same type as the input pointer:
\f[\texttt{(\_Tp*)(((size\_t)ptr + n-1) \& -n)}\f]
@param ptr Aligned pointer.
@param n Alignment size that must be a power of two.
*/
template<typename _Tp> static inline _Tp* alignPtr(_Tp* ptr, int n=(int)sizeof(_Tp))
{
return (_Tp*)(((size_t)ptr + n-1) & -n);
}
/** @brief Aligns a buffer size to the specified number of bytes.
The function returns the minimum number that is greater or equal to sz and is divisible by n :
\f[\texttt{(sz + n-1) \& -n}\f]
@param sz Buffer size to align.
@param n Alignment size that must be a power of two.
*/
static inline size_t alignSize(size_t sz, int n)
{
CV_DbgAssert((n & (n - 1)) == 0); // n is a power of 2
return (sz + n-1) & -n;
}
/** @brief Enables or disables the optimized code.
The function can be used to dynamically turn on and off optimized code (code that uses SSE2, AVX,
and other instructions on the platforms that support it). It sets a global flag that is further
checked by OpenCV functions. Since the flag is not checked in the inner OpenCV loops, it is only
safe to call the function on the very top level in your application where you can be sure that no
other OpenCV function is currently executed.
By default, the optimized code is enabled unless you disable it in CMake. The current status can be
retrieved using useOptimized.
@param onoff The boolean flag specifying whether the optimized code should be used (onoff=true)
or not (onoff=false).
*/
CV_EXPORTS_W void setUseOptimized(bool onoff);
/** @brief Returns the status of optimized code usage.
The function returns true if the optimized code is enabled. Otherwise, it returns false.
*/
CV_EXPORTS_W bool useOptimized();
static inline size_t getElemSize(int type) { return CV_ELEM_SIZE(type); }
/////////////////////////////// Parallel Primitives //////////////////////////////////
/** @brief Base class for parallel data processors
*/
class CV_EXPORTS ParallelLoopBody
{
public:
virtual ~ParallelLoopBody();
virtual void operator() (const Range& range) const = 0;
};
/** @brief Parallel data processor
*/
CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes=-1.);
/////////////////////////////// forEach method of cv::Mat ////////////////////////////
template<typename _Tp, typename Functor> inline
void Mat::forEach_impl(const Functor& operation) {
if (false) {
operation(*reinterpret_cast<_Tp*>(0), reinterpret_cast<int*>(NULL));
// If your compiler fail in this line.
// Please check that your functor signature is
// (_Tp&, const int*) <- multidimential
// or (_Tp&, void*) <- in case of you don't need current idx.
}
CV_Assert(this->total() / this->size[this->dims - 1] <= INT_MAX);
const int LINES = static_cast<int>(this->total() / this->size[this->dims - 1]);
class PixelOperationWrapper :public ParallelLoopBody
{
public:
PixelOperationWrapper(Mat_<_Tp>* const frame, const Functor& _operation)
: mat(frame), op(_operation) {};
virtual ~PixelOperationWrapper(){};
// ! Overloaded virtual operator
// convert range call to row call.
virtual void operator()(const Range &range) const {
const int DIMS = mat->dims;
const int COLS = mat->size[DIMS - 1];
if (DIMS <= 2) {
for (int row = range.start; row < range.end; ++row) {
this->rowCall2(row, COLS);
}
} else {
std::vector<int> idx(COLS); /// idx is modified in this->rowCall
idx[DIMS - 2] = range.start - 1;
for (int line_num = range.start; line_num < range.end; ++line_num) {
idx[DIMS - 2]++;
for (int i = DIMS - 2; i >= 0; --i) {
if (idx[i] >= mat->size[i]) {
idx[i - 1] += idx[i] / mat->size[i];
idx[i] %= mat->size[i];
continue; // carry-over;
}
else {
break;
}
}
this->rowCall(&idx[0], COLS, DIMS);
}
}
};
private:
Mat_<_Tp>* const mat;
const Functor op;
// ! Call operator for each elements in this row.
inline void rowCall(int* const idx, const int COLS, const int DIMS) const {
int &col = idx[DIMS - 1];
col = 0;
_Tp* pixel = &(mat->template at<_Tp>(idx));
while (col < COLS) {
op(*pixel, const_cast<const int*>(idx));
pixel++; col++;
}
col = 0;
}
// ! Call operator for each elements in this row. 2d mat special version.
inline void rowCall2(const int row, const int COLS) const {
union Index{
int body[2];
operator const int*() const {
return reinterpret_cast<const int*>(this);
}
int& operator[](const int i) {
return body[i];
}
} idx = {{row, 0}};
// Special union is needed to avoid
// "error: array subscript is above array bounds [-Werror=array-bounds]"
// when call the functor `op` such that access idx[3].
_Tp* pixel = &(mat->template at<_Tp>(idx));
const _Tp* const pixel_end = pixel + COLS;
while(pixel < pixel_end) {
op(*pixel++, static_cast<const int*>(idx));
idx[1]++;
}
};
PixelOperationWrapper& operator=(const PixelOperationWrapper &) {
CV_Assert(false);
// We can not remove this implementation because Visual Studio warning C4822.
return *this;
};
};
parallel_for_(cv::Range(0, LINES), PixelOperationWrapper(reinterpret_cast<Mat_<_Tp>*>(this), operation));
}
/////////////////////////// Synchronization Primitives ///////////////////////////////
class CV_EXPORTS Mutex
{
public:
Mutex();
~Mutex();
Mutex(const Mutex& m);
Mutex& operator = (const Mutex& m);
void lock();
bool trylock();
void unlock();
struct Impl;
protected:
Impl* impl;
};
class CV_EXPORTS AutoLock
{
public:
AutoLock(Mutex& m) : mutex(&m) { mutex->lock(); }
~AutoLock() { mutex->unlock(); }
protected:
Mutex* mutex;
private:
AutoLock(const AutoLock&);
AutoLock& operator = (const AutoLock&);
};
class CV_EXPORTS TLSDataContainer
{
private:
int key_;
protected:
TLSDataContainer();
virtual ~TLSDataContainer();
public:
virtual void* createDataInstance() const = 0;
virtual void deleteDataInstance(void* data) const = 0;
void* getData() const;
};
template <typename T>
class TLSData : protected TLSDataContainer
{
public:
inline TLSData() {}
inline ~TLSData() {}
inline T* get() const { return (T*)getData(); }
private:
virtual void* createDataInstance() const { return new T; }
virtual void deleteDataInstance(void* data) const { delete (T*)data; }
};
/** @brief Designed for command line parsing
The sample below demonstrates how to use CommandLineParser:
@code
CommandLineParser parser(argc, argv, keys);
parser.about("Application name v1.0.0");
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
int N = parser.get<int>("N");
double fps = parser.get<double>("fps");
String path = parser.get<String>("path");
use_time_stamp = parser.has("timestamp");
String img1 = parser.get<String>(0);
String img2 = parser.get<String>(1);
int repeat = parser.get<int>(2);
if (!parser.check())
{
parser.printErrors();
return 0;
}
@endcode
### Keys syntax
The keys parameter is a string containing several blocks, each one is enclosed in curley braces and
describes one argument. Each argument contains three parts separated by the `|` symbol:
-# argument names is a space-separated list of option synonyms (to mark argument as positional, prefix it with the `@` symbol)
-# default value will be used if the argument was not provided (can be empty)
-# help message (can be empty)
For example:
@code{.cpp}
const String keys =
"{help h usage ? | | print this message }"
"{@image1 | | image1 for compare }"
"{@image2 | | image2 for compare }"
"{@repeat |1 | number }"
"{path |. | path to file }"
"{fps | -1.0 | fps for output video }"
"{N count |100 | count of objects }"
"{ts timestamp | | use time stamp }"
;
}
@endcode
### Usage
For the described keys:
@code{.sh}
# Good call (3 positional parameters: image1, image2 and repeat; N is 200, ts is true)
$ ./app -N=200 1.png 2.jpg 19 -ts
# Bad call
$ ./app -fps=aaa
ERRORS:
Exception: can not convert: [aaa] to [double]
@endcode
*/
class CV_EXPORTS CommandLineParser
{
public:
/** @brief Constructor
Initializes command line parser object
@param argc number of command line arguments (from main())
@param argv array of command line arguments (from main())
@param keys string describing acceptable command line parameters (see class description for syntax)
*/
CommandLineParser(int argc, const char* const argv[], const String& keys);
/** @brief Copy constructor */
CommandLineParser(const CommandLineParser& parser);
/** @brief Assignment operator */
CommandLineParser& operator = (const CommandLineParser& parser);
/** @brief Destructor */
~CommandLineParser();
/** @brief Returns application path
This method returns the path to the executable from the command line (`argv[0]`).
For example, if the application has been started with such command:
@code{.sh}
$ ./bin/my-executable
@endcode
this method will return `./bin`.
*/
String getPathToApplication() const;
/** @brief Access arguments by name
Returns argument converted to selected type. If the argument is not known or can not be
converted to selected type, the error flag is set (can be checked with @ref check).
For example, define:
@code{.cpp}
String keys = "{N count||}";
@endcode
Call:
@code{.sh}
$ ./my-app -N=20
# or
$ ./my-app --count=20
@endcode
Access:
@code{.cpp}
int N = parser.get<int>("N");
@endcode
@param name name of the argument
@param space_delete remove spaces from the left and right of the string
@tparam T the argument will be converted to this type if possible
@note You can access positional arguments by their `@`-prefixed name:
@code{.cpp}
parser.get<String>("@image");
@endcode
*/
template <typename T>
T get(const String& name, bool space_delete = true) const
{
T val = T();
getByName(name, space_delete, ParamType<T>::type, (void*)&val);
return val;
}
/** @brief Access positional arguments by index
Returns argument converted to selected type. Indexes are counted from zero.
For example, define:
@code{.cpp}
String keys = "{@arg1||}{@arg2||}"
@endcode
Call:
@code{.sh}
./my-app abc qwe
@endcode
Access arguments:
@code{.cpp}
String val_1 = parser.get<String>(0); // returns "abc", arg1
String val_2 = parser.get<String>(1); // returns "qwe", arg2
@endcode
@param index index of the argument
@param space_delete remove spaces from the left and right of the string
@tparam T the argument will be converted to this type if possible
*/
template <typename T>
T get(int index, bool space_delete = true) const
{
T val = T();
getByIndex(index, space_delete, ParamType<T>::type, (void*)&val);
return val;
}
/** @brief Check if field was provided in the command line
@param name argument name to check
*/
bool has(const String& name) const;
/** @brief Check for parsing errors
Returns true if error occured while accessing the parameters (bad conversion, missing arguments,
etc.). Call @ref printErrors to print error messages list.
*/
bool check() const;
/** @brief Set the about message
The about message will be shown when @ref printMessage is called, right before arguments table.
*/
void about(const String& message);
/** @brief Print help message
This method will print standard help message containing the about message and arguments description.
@sa about
*/
void printMessage() const;
/** @brief Print list of errors occured
@sa check
*/
void printErrors() const;
protected:
void getByName(const String& name, bool space_delete, int type, void* dst) const;
void getByIndex(int index, bool space_delete, int type, void* dst) const;
struct Impl;
Impl* impl;
};
//! @} core_utils
//! @cond IGNORED
/////////////////////////////// AutoBuffer implementation ////////////////////////////////////////
template<typename _Tp, size_t fixed_size> inline
AutoBuffer<_Tp, fixed_size>::AutoBuffer()
{
ptr = buf;
sz = fixed_size;
}
template<typename _Tp, size_t fixed_size> inline
AutoBuffer<_Tp, fixed_size>::AutoBuffer(size_t _size)
{
ptr = buf;
sz = fixed_size;
allocate(_size);
}
template<typename _Tp, size_t fixed_size> inline
AutoBuffer<_Tp, fixed_size>::AutoBuffer(const AutoBuffer<_Tp, fixed_size>& abuf )
{
ptr = buf;
sz = fixed_size;
allocate(abuf.size());
for( size_t i = 0; i < sz; i++ )
ptr[i] = abuf.ptr[i];
}
template<typename _Tp, size_t fixed_size> inline AutoBuffer<_Tp, fixed_size>&
AutoBuffer<_Tp, fixed_size>::operator = (const AutoBuffer<_Tp, fixed_size>& abuf)
{
if( this != &abuf )
{
deallocate();
allocate(abuf.size());
for( size_t i = 0; i < sz; i++ )
ptr[i] = abuf.ptr[i];
}
return *this;
}
template<typename _Tp, size_t fixed_size> inline
AutoBuffer<_Tp, fixed_size>::~AutoBuffer()
{ deallocate(); }
template<typename _Tp, size_t fixed_size> inline void
AutoBuffer<_Tp, fixed_size>::allocate(size_t _size)
{
if(_size <= sz)
{
sz = _size;
return;
}
deallocate();
if(_size > fixed_size)
{
ptr = new _Tp[_size];
sz = _size;
}
}
template<typename _Tp, size_t fixed_size> inline void
AutoBuffer<_Tp, fixed_size>::deallocate()
{
if( ptr != buf )
{
delete[] ptr;
ptr = buf;
sz = fixed_size;
}
}
template<typename _Tp, size_t fixed_size> inline void
AutoBuffer<_Tp, fixed_size>::resize(size_t _size)
{
if(_size <= sz)
{
sz = _size;
return;
}
size_t i, prevsize = sz, minsize = MIN(prevsize, _size);
_Tp* prevptr = ptr;
ptr = _size > fixed_size ? new _Tp[_size] : buf;
sz = _size;
if( ptr != prevptr )
for( i = 0; i < minsize; i++ )
ptr[i] = prevptr[i];
for( i = prevsize; i < _size; i++ )
ptr[i] = _Tp();
if( prevptr != buf )
delete[] prevptr;
}
template<typename _Tp, size_t fixed_size> inline size_t
AutoBuffer<_Tp, fixed_size>::size() const
{ return sz; }
template<typename _Tp, size_t fixed_size> inline
AutoBuffer<_Tp, fixed_size>::operator _Tp* ()
{ return ptr; }
template<typename _Tp, size_t fixed_size> inline
AutoBuffer<_Tp, fixed_size>::operator const _Tp* () const
{ return ptr; }
#ifndef OPENCV_NOSTL
template<> inline std::string CommandLineParser::get<std::string>(int index, bool space_delete) const
{
return get<String>(index, space_delete);
}
template<> inline std::string CommandLineParser::get<std::string>(const String& name, bool space_delete) const
{
return get<String>(name, space_delete);
}
#endif // OPENCV_NOSTL
//! @endcond
} //namespace cv
#ifndef DISABLE_OPENCV_24_COMPATIBILITY
#include "opencv2/core/core_c.h"
#endif
#endif //__OPENCV_CORE_UTILITY_H__

View File

@ -0,0 +1,71 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright( C) 2000-2015, Intel Corporation, all rights reserved.
// Copyright (C) 2011-2013, NVIDIA Corporation, all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
//(including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort(including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/*
definition of the current version of OpenCV
Usefull to test in user programs
*/
#ifndef __OPENCV_VERSION_HPP__
#define __OPENCV_VERSION_HPP__
#define CV_VERSION_MAJOR 3
#define CV_VERSION_MINOR 0
#define CV_VERSION_REVISION 0
#define CV_VERSION_STATUS ""
#define CVAUX_STR_EXP(__A) #__A
#define CVAUX_STR(__A) CVAUX_STR_EXP(__A)
#define CVAUX_STRW_EXP(__A) L#__A
#define CVAUX_STRW(__A) CVAUX_STRW_EXP(__A)
#define CV_VERSION CVAUX_STR(CV_VERSION_MAJOR) "." CVAUX_STR(CV_VERSION_MINOR) "." CVAUX_STR(CV_VERSION_REVISION) CV_VERSION_STATUS
/* old style version constants*/
#define CV_MAJOR_VERSION CV_VERSION_MAJOR
#define CV_MINOR_VERSION CV_VERSION_MINOR
#define CV_SUBMINOR_VERSION CV_VERSION_REVISION
#endif

603
3rdparty/include/opencv2/core/wimage.hpp vendored Normal file
View File

@ -0,0 +1,603 @@
/*M//////////////////////////////////////////////////////////////////////////////
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to
// this license. If you do not agree to this license, do not download,
// install, copy or use the software.
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2008, Google, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation or contributors may not be used to endorse
// or promote products derived from this software without specific
// prior written permission.
//
// This software is provided by the copyright holders and contributors "as is"
// and any express or implied warranties, including, but not limited to, the
// implied warranties of merchantability and fitness for a particular purpose
// are disclaimed. In no event shall the Intel Corporation or contributors be
// liable for any direct, indirect, incidental, special, exemplary, or
// consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
/////////////////////////////////////////////////////////////////////////////////
//M*/
#ifndef __OPENCV_CORE_WIMAGE_HPP__
#define __OPENCV_CORE_WIMAGE_HPP__
#include "opencv2/core/core_c.h"
#ifdef __cplusplus
namespace cv {
//! @addtogroup core
//! @{
template <typename T> class WImage;
template <typename T> class WImageBuffer;
template <typename T> class WImageView;
template<typename T, int C> class WImageC;
template<typename T, int C> class WImageBufferC;
template<typename T, int C> class WImageViewC;
// Commonly used typedefs.
typedef WImage<uchar> WImage_b;
typedef WImageView<uchar> WImageView_b;
typedef WImageBuffer<uchar> WImageBuffer_b;
typedef WImageC<uchar, 1> WImage1_b;
typedef WImageViewC<uchar, 1> WImageView1_b;
typedef WImageBufferC<uchar, 1> WImageBuffer1_b;
typedef WImageC<uchar, 3> WImage3_b;
typedef WImageViewC<uchar, 3> WImageView3_b;
typedef WImageBufferC<uchar, 3> WImageBuffer3_b;
typedef WImage<float> WImage_f;
typedef WImageView<float> WImageView_f;
typedef WImageBuffer<float> WImageBuffer_f;
typedef WImageC<float, 1> WImage1_f;
typedef WImageViewC<float, 1> WImageView1_f;
typedef WImageBufferC<float, 1> WImageBuffer1_f;
typedef WImageC<float, 3> WImage3_f;
typedef WImageViewC<float, 3> WImageView3_f;
typedef WImageBufferC<float, 3> WImageBuffer3_f;
// There isn't a standard for signed and unsigned short so be more
// explicit in the typename for these cases.
typedef WImage<short> WImage_16s;
typedef WImageView<short> WImageView_16s;
typedef WImageBuffer<short> WImageBuffer_16s;
typedef WImageC<short, 1> WImage1_16s;
typedef WImageViewC<short, 1> WImageView1_16s;
typedef WImageBufferC<short, 1> WImageBuffer1_16s;
typedef WImageC<short, 3> WImage3_16s;
typedef WImageViewC<short, 3> WImageView3_16s;
typedef WImageBufferC<short, 3> WImageBuffer3_16s;
typedef WImage<ushort> WImage_16u;
typedef WImageView<ushort> WImageView_16u;
typedef WImageBuffer<ushort> WImageBuffer_16u;
typedef WImageC<ushort, 1> WImage1_16u;
typedef WImageViewC<ushort, 1> WImageView1_16u;
typedef WImageBufferC<ushort, 1> WImageBuffer1_16u;
typedef WImageC<ushort, 3> WImage3_16u;
typedef WImageViewC<ushort, 3> WImageView3_16u;
typedef WImageBufferC<ushort, 3> WImageBuffer3_16u;
/** @brief Image class which provides a thin layer around an IplImage.
The goals of the class design are:
-# All the data has explicit ownership to avoid memory leaks
-# No hidden allocations or copies for performance.
-# Easy access to OpenCV methods (which will access IPP if available)
-# Can easily treat external data as an image
-# Easy to create images which are subsets of other images
-# Fast pixel access which can take advantage of number of channels if known at compile time.
The WImage class is the image class which provides the data accessors. The 'W' comes from the fact
that it is also a wrapper around the popular but inconvenient IplImage class. A WImage can be
constructed either using a WImageBuffer class which allocates and frees the data, or using a
WImageView class which constructs a subimage or a view into external data. The view class does no
memory management. Each class actually has two versions, one when the number of channels is known
at compile time and one when it isn't. Using the one with the number of channels specified can
provide some compile time optimizations by using the fact that the number of channels is a
constant.
We use the convention (c,r) to refer to column c and row r with (0,0) being the upper left corner.
This is similar to standard Euclidean coordinates with the first coordinate varying in the
horizontal direction and the second coordinate varying in the vertical direction. Thus (c,r) is
usually in the domain [0, width) X [0, height)
Example usage:
@code
WImageBuffer3_b im(5,7); // Make a 5X7 3 channel image of type uchar
WImageView3_b sub_im(im, 2,2, 3,3); // 3X3 submatrix
vector<float> vec(10, 3.0f);
WImageView1_f user_im(&vec[0], 2, 5); // 2X5 image w/ supplied data
im.SetZero(); // same as cvSetZero(im.Ipl())
*im(2, 3) = 15; // Modify the element at column 2, row 3
MySetRand(&sub_im);
// Copy the second row into the first. This can be done with no memory
// allocation and will use SSE if IPP is available.
int w = im.Width();
im.View(0,0, w,1).CopyFrom(im.View(0,1, w,1));
// Doesn't care about source of data since using WImage
void MySetRand(WImage_b* im) { // Works with any number of channels
for (int r = 0; r < im->Height(); ++r) {
float* row = im->Row(r);
for (int c = 0; c < im->Width(); ++c) {
for (int ch = 0; ch < im->Channels(); ++ch, ++row) {
*row = uchar(rand() & 255);
}
}
}
}
@endcode
Functions that are not part of the basic image allocation, viewing, and access should come from
OpenCV, except some useful functions that are not part of OpenCV can be found in wimage_util.h
*/
template<typename T>
class WImage
{
public:
typedef T BaseType;
// WImage is an abstract class with no other virtual methods so make the
// destructor virtual.
virtual ~WImage() = 0;
// Accessors
IplImage* Ipl() {return image_; }
const IplImage* Ipl() const {return image_; }
T* ImageData() { return reinterpret_cast<T*>(image_->imageData); }
const T* ImageData() const {
return reinterpret_cast<const T*>(image_->imageData);
}
int Width() const {return image_->width; }
int Height() const {return image_->height; }
// WidthStep is the number of bytes to go to the pixel with the next y coord
int WidthStep() const {return image_->widthStep; }
int Channels() const {return image_->nChannels; }
int ChannelSize() const {return sizeof(T); } // number of bytes per channel
// Number of bytes per pixel
int PixelSize() const {return Channels() * ChannelSize(); }
// Return depth type (e.g. IPL_DEPTH_8U, IPL_DEPTH_32F) which is the number
// of bits per channel and with the signed bit set.
// This is known at compile time using specializations.
int Depth() const;
inline const T* Row(int r) const {
return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep);
}
inline T* Row(int r) {
return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep);
}
// Pixel accessors which returns a pointer to the start of the channel
inline T* operator() (int c, int r) {
return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep) +
c*Channels();
}
inline const T* operator() (int c, int r) const {
return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep) +
c*Channels();
}
// Copy the contents from another image which is just a convenience to cvCopy
void CopyFrom(const WImage<T>& src) { cvCopy(src.Ipl(), image_); }
// Set contents to zero which is just a convenient to cvSetZero
void SetZero() { cvSetZero(image_); }
// Construct a view into a region of this image
WImageView<T> View(int c, int r, int width, int height);
protected:
// Disallow copy and assignment
WImage(const WImage&);
void operator=(const WImage&);
explicit WImage(IplImage* img) : image_(img) {
assert(!img || img->depth == Depth());
}
void SetIpl(IplImage* image) {
assert(!image || image->depth == Depth());
image_ = image;
}
IplImage* image_;
};
/** Image class when both the pixel type and number of channels
are known at compile time. This wrapper will speed up some of the operations
like accessing individual pixels using the () operator.
*/
template<typename T, int C>
class WImageC : public WImage<T>
{
public:
typedef typename WImage<T>::BaseType BaseType;
enum { kChannels = C };
explicit WImageC(IplImage* img) : WImage<T>(img) {
assert(!img || img->nChannels == Channels());
}
// Construct a view into a region of this image
WImageViewC<T, C> View(int c, int r, int width, int height);
// Copy the contents from another image which is just a convenience to cvCopy
void CopyFrom(const WImageC<T, C>& src) {
cvCopy(src.Ipl(), WImage<T>::image_);
}
// WImageC is an abstract class with no other virtual methods so make the
// destructor virtual.
virtual ~WImageC() = 0;
int Channels() const {return C; }
protected:
// Disallow copy and assignment
WImageC(const WImageC&);
void operator=(const WImageC&);
void SetIpl(IplImage* image) {
assert(!image || image->depth == WImage<T>::Depth());
WImage<T>::SetIpl(image);
}
};
/** Image class which owns the data, so it can be allocated and is always
freed. It cannot be copied but can be explicity cloned.
*/
template<typename T>
class WImageBuffer : public WImage<T>
{
public:
typedef typename WImage<T>::BaseType BaseType;
// Default constructor which creates an object that can be
WImageBuffer() : WImage<T>(0) {}
WImageBuffer(int width, int height, int nchannels) : WImage<T>(0) {
Allocate(width, height, nchannels);
}
// Constructor which takes ownership of a given IplImage so releases
// the image on destruction.
explicit WImageBuffer(IplImage* img) : WImage<T>(img) {}
// Allocate an image. Does nothing if current size is the same as
// the new size.
void Allocate(int width, int height, int nchannels);
// Set the data to point to an image, releasing the old data
void SetIpl(IplImage* img) {
ReleaseImage();
WImage<T>::SetIpl(img);
}
// Clone an image which reallocates the image if of a different dimension.
void CloneFrom(const WImage<T>& src) {
Allocate(src.Width(), src.Height(), src.Channels());
CopyFrom(src);
}
~WImageBuffer() {
ReleaseImage();
}
// Release the image if it isn't null.
void ReleaseImage() {
if (WImage<T>::image_) {
IplImage* image = WImage<T>::image_;
cvReleaseImage(&image);
WImage<T>::SetIpl(0);
}
}
bool IsNull() const {return WImage<T>::image_ == NULL; }
private:
// Disallow copy and assignment
WImageBuffer(const WImageBuffer&);
void operator=(const WImageBuffer&);
};
/** Like a WImageBuffer class but when the number of channels is known at compile time.
*/
template<typename T, int C>
class WImageBufferC : public WImageC<T, C>
{
public:
typedef typename WImage<T>::BaseType BaseType;
enum { kChannels = C };
// Default constructor which creates an object that can be
WImageBufferC() : WImageC<T, C>(0) {}
WImageBufferC(int width, int height) : WImageC<T, C>(0) {
Allocate(width, height);
}
// Constructor which takes ownership of a given IplImage so releases
// the image on destruction.
explicit WImageBufferC(IplImage* img) : WImageC<T, C>(img) {}
// Allocate an image. Does nothing if current size is the same as
// the new size.
void Allocate(int width, int height);
// Set the data to point to an image, releasing the old data
void SetIpl(IplImage* img) {
ReleaseImage();
WImageC<T, C>::SetIpl(img);
}
// Clone an image which reallocates the image if of a different dimension.
void CloneFrom(const WImageC<T, C>& src) {
Allocate(src.Width(), src.Height());
CopyFrom(src);
}
~WImageBufferC() {
ReleaseImage();
}
// Release the image if it isn't null.
void ReleaseImage() {
if (WImage<T>::image_) {
IplImage* image = WImage<T>::image_;
cvReleaseImage(&image);
WImageC<T, C>::SetIpl(0);
}
}
bool IsNull() const {return WImage<T>::image_ == NULL; }
private:
// Disallow copy and assignment
WImageBufferC(const WImageBufferC&);
void operator=(const WImageBufferC&);
};
/** View into an image class which allows treating a subimage as an image or treating external data
as an image
*/
template<typename T> class WImageView : public WImage<T>
{
public:
typedef typename WImage<T>::BaseType BaseType;
// Construct a subimage. No checks are done that the subimage lies
// completely inside the original image.
WImageView(WImage<T>* img, int c, int r, int width, int height);
// Refer to external data.
// If not given width_step assumed to be same as width.
WImageView(T* data, int width, int height, int channels, int width_step = -1);
// Refer to external data. This does NOT take ownership
// of the supplied IplImage.
WImageView(IplImage* img) : WImage<T>(img) {}
// Copy constructor
WImageView(const WImage<T>& img) : WImage<T>(0) {
header_ = *(img.Ipl());
WImage<T>::SetIpl(&header_);
}
WImageView& operator=(const WImage<T>& img) {
header_ = *(img.Ipl());
WImage<T>::SetIpl(&header_);
return *this;
}
protected:
IplImage header_;
};
template<typename T, int C>
class WImageViewC : public WImageC<T, C>
{
public:
typedef typename WImage<T>::BaseType BaseType;
enum { kChannels = C };
// Default constructor needed for vectors of views.
WImageViewC();
virtual ~WImageViewC() {}
// Construct a subimage. No checks are done that the subimage lies
// completely inside the original image.
WImageViewC(WImageC<T, C>* img,
int c, int r, int width, int height);
// Refer to external data
WImageViewC(T* data, int width, int height, int width_step = -1);
// Refer to external data. This does NOT take ownership
// of the supplied IplImage.
WImageViewC(IplImage* img) : WImageC<T, C>(img) {}
// Copy constructor which does a shallow copy to allow multiple views
// of same data. gcc-4.1.1 gets confused if both versions of
// the constructor and assignment operator are not provided.
WImageViewC(const WImageC<T, C>& img) : WImageC<T, C>(0) {
header_ = *(img.Ipl());
WImageC<T, C>::SetIpl(&header_);
}
WImageViewC(const WImageViewC<T, C>& img) : WImageC<T, C>(0) {
header_ = *(img.Ipl());
WImageC<T, C>::SetIpl(&header_);
}
WImageViewC& operator=(const WImageC<T, C>& img) {
header_ = *(img.Ipl());
WImageC<T, C>::SetIpl(&header_);
return *this;
}
WImageViewC& operator=(const WImageViewC<T, C>& img) {
header_ = *(img.Ipl());
WImageC<T, C>::SetIpl(&header_);
return *this;
}
protected:
IplImage header_;
};
// Specializations for depth
template<>
inline int WImage<uchar>::Depth() const {return IPL_DEPTH_8U; }
template<>
inline int WImage<signed char>::Depth() const {return IPL_DEPTH_8S; }
template<>
inline int WImage<short>::Depth() const {return IPL_DEPTH_16S; }
template<>
inline int WImage<ushort>::Depth() const {return IPL_DEPTH_16U; }
template<>
inline int WImage<int>::Depth() const {return IPL_DEPTH_32S; }
template<>
inline int WImage<float>::Depth() const {return IPL_DEPTH_32F; }
template<>
inline int WImage<double>::Depth() const {return IPL_DEPTH_64F; }
template<typename T> inline WImage<T>::~WImage() {}
template<typename T, int C> inline WImageC<T, C>::~WImageC() {}
template<typename T>
inline void WImageBuffer<T>::Allocate(int width, int height, int nchannels)
{
if (IsNull() || WImage<T>::Width() != width ||
WImage<T>::Height() != height || WImage<T>::Channels() != nchannels) {
ReleaseImage();
WImage<T>::image_ = cvCreateImage(cvSize(width, height),
WImage<T>::Depth(), nchannels);
}
}
template<typename T, int C>
inline void WImageBufferC<T, C>::Allocate(int width, int height)
{
if (IsNull() || WImage<T>::Width() != width || WImage<T>::Height() != height) {
ReleaseImage();
WImageC<T, C>::SetIpl(cvCreateImage(cvSize(width, height),WImage<T>::Depth(), C));
}
}
template<typename T>
WImageView<T>::WImageView(WImage<T>* img, int c, int r, int width, int height)
: WImage<T>(0)
{
header_ = *(img->Ipl());
header_.imageData = reinterpret_cast<char*>((*img)(c, r));
header_.width = width;
header_.height = height;
WImage<T>::SetIpl(&header_);
}
template<typename T>
WImageView<T>::WImageView(T* data, int width, int height, int nchannels, int width_step)
: WImage<T>(0)
{
cvInitImageHeader(&header_, cvSize(width, height), WImage<T>::Depth(), nchannels);
header_.imageData = reinterpret_cast<char*>(data);
if (width_step > 0) {
header_.widthStep = width_step;
}
WImage<T>::SetIpl(&header_);
}
template<typename T, int C>
WImageViewC<T, C>::WImageViewC(WImageC<T, C>* img, int c, int r, int width, int height)
: WImageC<T, C>(0)
{
header_ = *(img->Ipl());
header_.imageData = reinterpret_cast<char*>((*img)(c, r));
header_.width = width;
header_.height = height;
WImageC<T, C>::SetIpl(&header_);
}
template<typename T, int C>
WImageViewC<T, C>::WImageViewC() : WImageC<T, C>(0) {
cvInitImageHeader(&header_, cvSize(0, 0), WImage<T>::Depth(), C);
header_.imageData = reinterpret_cast<char*>(0);
WImageC<T, C>::SetIpl(&header_);
}
template<typename T, int C>
WImageViewC<T, C>::WImageViewC(T* data, int width, int height, int width_step)
: WImageC<T, C>(0)
{
cvInitImageHeader(&header_, cvSize(width, height), WImage<T>::Depth(), C);
header_.imageData = reinterpret_cast<char*>(data);
if (width_step > 0) {
header_.widthStep = width_step;
}
WImageC<T, C>::SetIpl(&header_);
}
// Construct a view into a region of an image
template<typename T>
WImageView<T> WImage<T>::View(int c, int r, int width, int height) {
return WImageView<T>(this, c, r, width, height);
}
template<typename T, int C>
WImageViewC<T, C> WImageC<T, C>::View(int c, int r, int width, int height) {
return WImageViewC<T, C>(this, c, r, width, height);
}
//! @} core
} // end of namespace
#endif // __cplusplus
#endif