Initial commit.
Final release of the project Anonymizer (2015). Project settings for the Qt Creator (ver. 3.6).
This commit is contained in:
		
							
								
								
									
										211
									
								
								3rdparty/include/opencv2/core/cuda/block.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										211
									
								
								3rdparty/include/opencv2/core/cuda/block.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,211 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_DEVICE_BLOCK_HPP__ | ||||
| #define __OPENCV_CUDA_DEVICE_BLOCK_HPP__ | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     struct Block | ||||
|     { | ||||
|         static __device__ __forceinline__ unsigned int id() | ||||
|         { | ||||
|             return blockIdx.x; | ||||
|         } | ||||
|  | ||||
|         static __device__ __forceinline__ unsigned int stride() | ||||
|         { | ||||
|             return blockDim.x * blockDim.y * blockDim.z; | ||||
|         } | ||||
|  | ||||
|         static __device__ __forceinline__ void sync() | ||||
|         { | ||||
|             __syncthreads(); | ||||
|         } | ||||
|  | ||||
|         static __device__ __forceinline__ int flattenedThreadId() | ||||
|         { | ||||
|             return threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; | ||||
|         } | ||||
|  | ||||
|         template<typename It, typename T> | ||||
|         static __device__ __forceinline__ void fill(It beg, It end, const T& value) | ||||
|         { | ||||
|             int STRIDE = stride(); | ||||
|             It t = beg + flattenedThreadId(); | ||||
|  | ||||
|             for(; t < end; t += STRIDE) | ||||
|                 *t = value; | ||||
|         } | ||||
|  | ||||
|         template<typename OutIt, typename T> | ||||
|         static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value) | ||||
|         { | ||||
|             int STRIDE = stride(); | ||||
|             int tid = flattenedThreadId(); | ||||
|             value += tid; | ||||
|  | ||||
|             for(OutIt t = beg + tid; t < end; t += STRIDE, value += STRIDE) | ||||
|                 *t = value; | ||||
|         } | ||||
|  | ||||
|         template<typename InIt, typename OutIt> | ||||
|         static __device__ __forceinline__ void copy(InIt beg, InIt end, OutIt out) | ||||
|         { | ||||
|             int STRIDE = stride(); | ||||
|             InIt  t = beg + flattenedThreadId(); | ||||
|             OutIt o = out + (t - beg); | ||||
|  | ||||
|             for(; t < end; t += STRIDE, o += STRIDE) | ||||
|                 *o = *t; | ||||
|         } | ||||
|  | ||||
|         template<typename InIt, typename OutIt, class UnOp> | ||||
|         static __device__ __forceinline__ void transfrom(InIt beg, InIt end, OutIt out, UnOp op) | ||||
|         { | ||||
|             int STRIDE = stride(); | ||||
|             InIt  t = beg + flattenedThreadId(); | ||||
|             OutIt o = out + (t - beg); | ||||
|  | ||||
|             for(; t < end; t += STRIDE, o += STRIDE) | ||||
|                 *o = op(*t); | ||||
|         } | ||||
|  | ||||
|         template<typename InIt1, typename InIt2, typename OutIt, class BinOp> | ||||
|         static __device__ __forceinline__ void transfrom(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op) | ||||
|         { | ||||
|             int STRIDE = stride(); | ||||
|             InIt1 t1 = beg1 + flattenedThreadId(); | ||||
|             InIt2 t2 = beg2 + flattenedThreadId(); | ||||
|             OutIt o  = out + (t1 - beg1); | ||||
|  | ||||
|             for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE) | ||||
|                 *o = op(*t1, *t2); | ||||
|         } | ||||
|  | ||||
|         template<int CTA_SIZE, typename T, class BinOp> | ||||
|         static __device__ __forceinline__ void reduce(volatile T* buffer, BinOp op) | ||||
|         { | ||||
|             int tid = flattenedThreadId(); | ||||
|             T val =  buffer[tid]; | ||||
|  | ||||
|             if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); } | ||||
|             if (CTA_SIZE >=  512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); } | ||||
|             if (CTA_SIZE >=  256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); } | ||||
|             if (CTA_SIZE >=  128) { if (tid <  64) buffer[tid] = val = op(val, buffer[tid +  64]); __syncthreads(); } | ||||
|  | ||||
|             if (tid < 32) | ||||
|             { | ||||
|                 if (CTA_SIZE >=   64) { buffer[tid] = val = op(val, buffer[tid +  32]); } | ||||
|                 if (CTA_SIZE >=   32) { buffer[tid] = val = op(val, buffer[tid +  16]); } | ||||
|                 if (CTA_SIZE >=   16) { buffer[tid] = val = op(val, buffer[tid +   8]); } | ||||
|                 if (CTA_SIZE >=    8) { buffer[tid] = val = op(val, buffer[tid +   4]); } | ||||
|                 if (CTA_SIZE >=    4) { buffer[tid] = val = op(val, buffer[tid +   2]); } | ||||
|                 if (CTA_SIZE >=    2) { buffer[tid] = val = op(val, buffer[tid +   1]); } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         template<int CTA_SIZE, typename T, class BinOp> | ||||
|         static __device__ __forceinline__ T reduce(volatile T* buffer, T init, BinOp op) | ||||
|         { | ||||
|             int tid = flattenedThreadId(); | ||||
|             T val =  buffer[tid] = init; | ||||
|             __syncthreads(); | ||||
|  | ||||
|             if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); } | ||||
|             if (CTA_SIZE >=  512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); } | ||||
|             if (CTA_SIZE >=  256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); } | ||||
|             if (CTA_SIZE >=  128) { if (tid <  64) buffer[tid] = val = op(val, buffer[tid +  64]); __syncthreads(); } | ||||
|  | ||||
|             if (tid < 32) | ||||
|             { | ||||
|                 if (CTA_SIZE >=   64) { buffer[tid] = val = op(val, buffer[tid +  32]); } | ||||
|                 if (CTA_SIZE >=   32) { buffer[tid] = val = op(val, buffer[tid +  16]); } | ||||
|                 if (CTA_SIZE >=   16) { buffer[tid] = val = op(val, buffer[tid +   8]); } | ||||
|                 if (CTA_SIZE >=    8) { buffer[tid] = val = op(val, buffer[tid +   4]); } | ||||
|                 if (CTA_SIZE >=    4) { buffer[tid] = val = op(val, buffer[tid +   2]); } | ||||
|                 if (CTA_SIZE >=    2) { buffer[tid] = val = op(val, buffer[tid +   1]); } | ||||
|             } | ||||
|             __syncthreads(); | ||||
|             return buffer[0]; | ||||
|         } | ||||
|  | ||||
|         template <typename T, class BinOp> | ||||
|         static __device__ __forceinline__ void reduce_n(T* data, unsigned int n, BinOp op) | ||||
|         { | ||||
|             int ftid = flattenedThreadId(); | ||||
|             int sft = stride(); | ||||
|  | ||||
|             if (sft < n) | ||||
|             { | ||||
|                 for (unsigned int i = sft + ftid; i < n; i += sft) | ||||
|                     data[ftid] = op(data[ftid], data[i]); | ||||
|  | ||||
|                 __syncthreads(); | ||||
|  | ||||
|                 n = sft; | ||||
|             } | ||||
|  | ||||
|             while (n > 1) | ||||
|             { | ||||
|                 unsigned int half = n/2; | ||||
|  | ||||
|                 if (ftid < half) | ||||
|                     data[ftid] = op(data[ftid], data[n - ftid - 1]); | ||||
|  | ||||
|                 __syncthreads(); | ||||
|  | ||||
|                 n = n - half; | ||||
|             } | ||||
|         } | ||||
|     }; | ||||
| }}} | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif /* __OPENCV_CUDA_DEVICE_BLOCK_HPP__ */ | ||||
							
								
								
									
										722
									
								
								3rdparty/include/opencv2/core/cuda/border_interpolate.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										722
									
								
								3rdparty/include/opencv2/core/cuda/border_interpolate.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,722 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__ | ||||
| #define __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__ | ||||
|  | ||||
| #include "saturate_cast.hpp" | ||||
| #include "vec_traits.hpp" | ||||
| #include "vec_math.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     ////////////////////////////////////////////////////////////// | ||||
|     // BrdConstant | ||||
|  | ||||
|     template <typename D> struct BrdRowConstant | ||||
|     { | ||||
|         typedef D result_type; | ||||
|  | ||||
|         explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits<D>::all(0)) : width(width_), val(val_) {} | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const | ||||
|         { | ||||
|             return x >= 0 ? saturate_cast<D>(data[x]) : val; | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const | ||||
|         { | ||||
|             return x < width ? saturate_cast<D>(data[x]) : val; | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at(int x, const T* data) const | ||||
|         { | ||||
|             return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val; | ||||
|         } | ||||
|  | ||||
|         int width; | ||||
|         D val; | ||||
|     }; | ||||
|  | ||||
|     template <typename D> struct BrdColConstant | ||||
|     { | ||||
|         typedef D result_type; | ||||
|  | ||||
|         explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits<D>::all(0)) : height(height_), val(val_) {} | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const | ||||
|         { | ||||
|             return y >= 0 ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val; | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const | ||||
|         { | ||||
|             return y < height ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val; | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const | ||||
|         { | ||||
|             return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val; | ||||
|         } | ||||
|  | ||||
|         int height; | ||||
|         D val; | ||||
|     }; | ||||
|  | ||||
|     template <typename D> struct BrdConstant | ||||
|     { | ||||
|         typedef D result_type; | ||||
|  | ||||
|         __host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) : height(height_), width(width_), val(val_) | ||||
|         { | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const | ||||
|         { | ||||
|             return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(((const T*)((const uchar*)data + y * step))[x]) : val; | ||||
|         } | ||||
|  | ||||
|         template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const | ||||
|         { | ||||
|             return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val; | ||||
|         } | ||||
|  | ||||
|         int height; | ||||
|         int width; | ||||
|         D val; | ||||
|     }; | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////// | ||||
|     // BrdReplicate | ||||
|  | ||||
|     template <typename D> struct BrdRowReplicate | ||||
|     { | ||||
|         typedef D result_type; | ||||
|  | ||||
|         explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {} | ||||
|         template <typename U> __host__ __device__ __forceinline__ BrdRowReplicate(int width, U) : last_col(width - 1) {} | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col_low(int x) const | ||||
|         { | ||||
|             return ::max(x, 0); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col_high(int x) const | ||||
|         { | ||||
|             return ::min(x, last_col); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col(int x) const | ||||
|         { | ||||
|             return idx_col_low(idx_col_high(x)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const | ||||
|         { | ||||
|             return saturate_cast<D>(data[idx_col_low(x)]); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const | ||||
|         { | ||||
|             return saturate_cast<D>(data[idx_col_high(x)]); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at(int x, const T* data) const | ||||
|         { | ||||
|             return saturate_cast<D>(data[idx_col(x)]); | ||||
|         } | ||||
|  | ||||
|         int last_col; | ||||
|     }; | ||||
|  | ||||
|     template <typename D> struct BrdColReplicate | ||||
|     { | ||||
|         typedef D result_type; | ||||
|  | ||||
|         explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {} | ||||
|         template <typename U> __host__ __device__ __forceinline__ BrdColReplicate(int height, U) : last_row(height - 1) {} | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row_low(int y) const | ||||
|         { | ||||
|             return ::max(y, 0); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row_high(int y) const | ||||
|         { | ||||
|             return ::min(y, last_row); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row(int y) const | ||||
|         { | ||||
|             return idx_row_low(idx_row_high(y)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const | ||||
|         { | ||||
|             return saturate_cast<D>(*(const T*)((const char*)data + idx_row_low(y) * step)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const | ||||
|         { | ||||
|             return saturate_cast<D>(*(const T*)((const char*)data + idx_row_high(y) * step)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const | ||||
|         { | ||||
|             return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step)); | ||||
|         } | ||||
|  | ||||
|         int last_row; | ||||
|     }; | ||||
|  | ||||
|     template <typename D> struct BrdReplicate | ||||
|     { | ||||
|         typedef D result_type; | ||||
|  | ||||
|         __host__ __device__ __forceinline__ BrdReplicate(int height, int width) : last_row(height - 1), last_col(width - 1) {} | ||||
|         template <typename U> __host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) : last_row(height - 1), last_col(width - 1) {} | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row_low(int y) const | ||||
|         { | ||||
|             return ::max(y, 0); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row_high(int y) const | ||||
|         { | ||||
|             return ::min(y, last_row); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row(int y) const | ||||
|         { | ||||
|             return idx_row_low(idx_row_high(y)); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col_low(int x) const | ||||
|         { | ||||
|             return ::max(x, 0); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col_high(int x) const | ||||
|         { | ||||
|             return ::min(x, last_col); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col(int x) const | ||||
|         { | ||||
|             return idx_col_low(idx_col_high(x)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const | ||||
|         { | ||||
|             return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]); | ||||
|         } | ||||
|  | ||||
|         template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const | ||||
|         { | ||||
|             return saturate_cast<D>(src(idx_row(y), idx_col(x))); | ||||
|         } | ||||
|  | ||||
|         int last_row; | ||||
|         int last_col; | ||||
|     }; | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////// | ||||
|     // BrdReflect101 | ||||
|  | ||||
|     template <typename D> struct BrdRowReflect101 | ||||
|     { | ||||
|         typedef D result_type; | ||||
|  | ||||
|         explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {} | ||||
|         template <typename U> __host__ __device__ __forceinline__ BrdRowReflect101(int width, U) : last_col(width - 1) {} | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col_low(int x) const | ||||
|         { | ||||
|             return ::abs(x) % (last_col + 1); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col_high(int x) const | ||||
|         { | ||||
|             return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col(int x) const | ||||
|         { | ||||
|             return idx_col_low(idx_col_high(x)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const | ||||
|         { | ||||
|             return saturate_cast<D>(data[idx_col_low(x)]); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const | ||||
|         { | ||||
|             return saturate_cast<D>(data[idx_col_high(x)]); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at(int x, const T* data) const | ||||
|         { | ||||
|             return saturate_cast<D>(data[idx_col(x)]); | ||||
|         } | ||||
|  | ||||
|         int last_col; | ||||
|     }; | ||||
|  | ||||
|     template <typename D> struct BrdColReflect101 | ||||
|     { | ||||
|         typedef D result_type; | ||||
|  | ||||
|         explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {} | ||||
|         template <typename U> __host__ __device__ __forceinline__ BrdColReflect101(int height, U) : last_row(height - 1) {} | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row_low(int y) const | ||||
|         { | ||||
|             return ::abs(y) % (last_row + 1); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row_high(int y) const | ||||
|         { | ||||
|             return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row(int y) const | ||||
|         { | ||||
|             return idx_row_low(idx_row_high(y)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const | ||||
|         { | ||||
|             return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const | ||||
|         { | ||||
|             return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const | ||||
|         { | ||||
|             return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step)); | ||||
|         } | ||||
|  | ||||
|         int last_row; | ||||
|     }; | ||||
|  | ||||
|     template <typename D> struct BrdReflect101 | ||||
|     { | ||||
|         typedef D result_type; | ||||
|  | ||||
|         __host__ __device__ __forceinline__ BrdReflect101(int height, int width) : last_row(height - 1), last_col(width - 1) {} | ||||
|         template <typename U> __host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) : last_row(height - 1), last_col(width - 1) {} | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row_low(int y) const | ||||
|         { | ||||
|             return ::abs(y) % (last_row + 1); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row_high(int y) const | ||||
|         { | ||||
|             return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row(int y) const | ||||
|         { | ||||
|             return idx_row_low(idx_row_high(y)); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col_low(int x) const | ||||
|         { | ||||
|             return ::abs(x) % (last_col + 1); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col_high(int x) const | ||||
|         { | ||||
|             return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col(int x) const | ||||
|         { | ||||
|             return idx_col_low(idx_col_high(x)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const | ||||
|         { | ||||
|             return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]); | ||||
|         } | ||||
|  | ||||
|         template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const | ||||
|         { | ||||
|             return saturate_cast<D>(src(idx_row(y), idx_col(x))); | ||||
|         } | ||||
|  | ||||
|         int last_row; | ||||
|         int last_col; | ||||
|     }; | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////// | ||||
|     // BrdReflect | ||||
|  | ||||
|     template <typename D> struct BrdRowReflect | ||||
|     { | ||||
|         typedef D result_type; | ||||
|  | ||||
|         explicit __host__ __device__ __forceinline__ BrdRowReflect(int width) : last_col(width - 1) {} | ||||
|         template <typename U> __host__ __device__ __forceinline__ BrdRowReflect(int width, U) : last_col(width - 1) {} | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col_low(int x) const | ||||
|         { | ||||
|             return (::abs(x) - (x < 0)) % (last_col + 1); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col_high(int x) const | ||||
|         { | ||||
|             return ::abs(last_col - ::abs(last_col - x) + (x > last_col)) % (last_col + 1); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col(int x) const | ||||
|         { | ||||
|             return idx_col_high(::abs(x) - (x < 0)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const | ||||
|         { | ||||
|             return saturate_cast<D>(data[idx_col_low(x)]); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const | ||||
|         { | ||||
|             return saturate_cast<D>(data[idx_col_high(x)]); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at(int x, const T* data) const | ||||
|         { | ||||
|             return saturate_cast<D>(data[idx_col(x)]); | ||||
|         } | ||||
|  | ||||
|         int last_col; | ||||
|     }; | ||||
|  | ||||
|     template <typename D> struct BrdColReflect | ||||
|     { | ||||
|         typedef D result_type; | ||||
|  | ||||
|         explicit __host__ __device__ __forceinline__ BrdColReflect(int height) : last_row(height - 1) {} | ||||
|         template <typename U> __host__ __device__ __forceinline__ BrdColReflect(int height, U) : last_row(height - 1) {} | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row_low(int y) const | ||||
|         { | ||||
|             return (::abs(y) - (y < 0)) % (last_row + 1); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row_high(int y) const | ||||
|         { | ||||
|             return ::abs(last_row - ::abs(last_row - y) + (y > last_row)) % (last_row + 1); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row(int y) const | ||||
|         { | ||||
|             return idx_row_high(::abs(y) - (y < 0)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const | ||||
|         { | ||||
|             return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const | ||||
|         { | ||||
|             return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const | ||||
|         { | ||||
|             return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step)); | ||||
|         } | ||||
|  | ||||
|         int last_row; | ||||
|     }; | ||||
|  | ||||
|     template <typename D> struct BrdReflect | ||||
|     { | ||||
|         typedef D result_type; | ||||
|  | ||||
|         __host__ __device__ __forceinline__ BrdReflect(int height, int width) : last_row(height - 1), last_col(width - 1) {} | ||||
|         template <typename U> __host__ __device__ __forceinline__ BrdReflect(int height, int width, U) : last_row(height - 1), last_col(width - 1) {} | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row_low(int y) const | ||||
|         { | ||||
|             return (::abs(y) - (y < 0)) % (last_row + 1); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row_high(int y) const | ||||
|         { | ||||
|             return /*::abs*/(last_row - ::abs(last_row - y) + (y > last_row)) /*% (last_row + 1)*/; | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row(int y) const | ||||
|         { | ||||
|             return idx_row_low(idx_row_high(y)); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col_low(int x) const | ||||
|         { | ||||
|             return (::abs(x) - (x < 0)) % (last_col + 1); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col_high(int x) const | ||||
|         { | ||||
|             return (last_col - ::abs(last_col - x) + (x > last_col)); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col(int x) const | ||||
|         { | ||||
|             return idx_col_low(idx_col_high(x)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const | ||||
|         { | ||||
|             return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]); | ||||
|         } | ||||
|  | ||||
|         template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const | ||||
|         { | ||||
|             return saturate_cast<D>(src(idx_row(y), idx_col(x))); | ||||
|         } | ||||
|  | ||||
|         int last_row; | ||||
|         int last_col; | ||||
|     }; | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////// | ||||
|     // BrdWrap | ||||
|  | ||||
|     template <typename D> struct BrdRowWrap | ||||
|     { | ||||
|         typedef D result_type; | ||||
|  | ||||
|         explicit __host__ __device__ __forceinline__ BrdRowWrap(int width_) : width(width_) {} | ||||
|         template <typename U> __host__ __device__ __forceinline__ BrdRowWrap(int width_, U) : width(width_) {} | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col_low(int x) const | ||||
|         { | ||||
|             return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col_high(int x) const | ||||
|         { | ||||
|             return (x < width) * x + (x >= width) * (x % width); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col(int x) const | ||||
|         { | ||||
|             return idx_col_high(idx_col_low(x)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const | ||||
|         { | ||||
|             return saturate_cast<D>(data[idx_col_low(x)]); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const | ||||
|         { | ||||
|             return saturate_cast<D>(data[idx_col_high(x)]); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at(int x, const T* data) const | ||||
|         { | ||||
|             return saturate_cast<D>(data[idx_col(x)]); | ||||
|         } | ||||
|  | ||||
|         int width; | ||||
|     }; | ||||
|  | ||||
|     template <typename D> struct BrdColWrap | ||||
|     { | ||||
|         typedef D result_type; | ||||
|  | ||||
|         explicit __host__ __device__ __forceinline__ BrdColWrap(int height_) : height(height_) {} | ||||
|         template <typename U> __host__ __device__ __forceinline__ BrdColWrap(int height_, U) : height(height_) {} | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row_low(int y) const | ||||
|         { | ||||
|             return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row_high(int y) const | ||||
|         { | ||||
|             return (y < height) * y + (y >= height) * (y % height); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row(int y) const | ||||
|         { | ||||
|             return idx_row_high(idx_row_low(y)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const | ||||
|         { | ||||
|             return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const | ||||
|         { | ||||
|             return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const | ||||
|         { | ||||
|             return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step)); | ||||
|         } | ||||
|  | ||||
|         int height; | ||||
|     }; | ||||
|  | ||||
|     template <typename D> struct BrdWrap | ||||
|     { | ||||
|         typedef D result_type; | ||||
|  | ||||
|         __host__ __device__ __forceinline__ BrdWrap(int height_, int width_) : | ||||
|             height(height_), width(width_) | ||||
|         { | ||||
|         } | ||||
|         template <typename U> | ||||
|         __host__ __device__ __forceinline__ BrdWrap(int height_, int width_, U) : | ||||
|             height(height_), width(width_) | ||||
|         { | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row_low(int y) const | ||||
|         { | ||||
|             return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row_high(int y) const | ||||
|         { | ||||
|             return (y < height) * y + (y >= height) * (y % height); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_row(int y) const | ||||
|         { | ||||
|             return idx_row_high(idx_row_low(y)); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col_low(int x) const | ||||
|         { | ||||
|             return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col_high(int x) const | ||||
|         { | ||||
|             return (x < width) * x + (x >= width) * (x % width); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ int idx_col(int x) const | ||||
|         { | ||||
|             return idx_col_high(idx_col_low(x)); | ||||
|         } | ||||
|  | ||||
|         template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const | ||||
|         { | ||||
|             return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]); | ||||
|         } | ||||
|  | ||||
|         template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const | ||||
|         { | ||||
|             return saturate_cast<D>(src(idx_row(y), idx_col(x))); | ||||
|         } | ||||
|  | ||||
|         int height; | ||||
|         int width; | ||||
|     }; | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////// | ||||
|     // BorderReader | ||||
|  | ||||
|     template <typename Ptr2D, typename B> struct BorderReader | ||||
|     { | ||||
|         typedef typename B::result_type elem_type; | ||||
|         typedef typename Ptr2D::index_type index_type; | ||||
|  | ||||
|         __host__ __device__ __forceinline__ BorderReader(const Ptr2D& ptr_, const B& b_) : ptr(ptr_), b(b_) {} | ||||
|  | ||||
|         __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const | ||||
|         { | ||||
|             return b.at(y, x, ptr); | ||||
|         } | ||||
|  | ||||
|         Ptr2D ptr; | ||||
|         B b; | ||||
|     }; | ||||
|  | ||||
|     // under win32 there is some bug with templated types that passed as kernel parameters | ||||
|     // with this specialization all works fine | ||||
|     template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> > | ||||
|     { | ||||
|         typedef typename BrdConstant<D>::result_type elem_type; | ||||
|         typedef typename Ptr2D::index_type index_type; | ||||
|  | ||||
|         __host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant<D>& b) : | ||||
|             src(src_), height(b.height), width(b.width), val(b.val) | ||||
|         { | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ D operator ()(index_type y, index_type x) const | ||||
|         { | ||||
|             return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val; | ||||
|         } | ||||
|  | ||||
|         Ptr2D src; | ||||
|         int height; | ||||
|         int width; | ||||
|         D val; | ||||
|     }; | ||||
| }}} // namespace cv { namespace cuda { namespace cudev | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__ | ||||
							
								
								
									
										309
									
								
								3rdparty/include/opencv2/core/cuda/color.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										309
									
								
								3rdparty/include/opencv2/core/cuda/color.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,309 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_COLOR_HPP__ | ||||
| #define __OPENCV_CUDA_COLOR_HPP__ | ||||
|  | ||||
| #include "detail/color_detail.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     // All OPENCV_CUDA_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements | ||||
|     // template <typename T> class ColorSpace1_to_ColorSpace2_traits | ||||
|     // { | ||||
|     //     typedef ... functor_type; | ||||
|     //     static __host__ __device__ functor_type create_functor(); | ||||
|     // }; | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3) | ||||
|     OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5) | ||||
|     OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 0) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 0) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab, 3, 3, true, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab, 4, 3, true, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab4, 3, 4, true, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab4, 4, 4, true, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab, 3, 3, true, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab, 4, 3, true, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab4, 3, 4, true, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab4, 4, 4, true, 0) | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab, 3, 3, false, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab, 4, 3, false, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab4, 3, 4, false, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab4, 4, 4, false, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab, 3, 3, false, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab, 4, 3, false, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab4, 3, 4, false, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab4, 4, 4, false, 0) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgb, 3, 3, true, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgb, 4, 3, true, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgba, 3, 4, true, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgba, 4, 4, true, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgr, 3, 3, true, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgr, 4, 3, true, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgra, 3, 4, true, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgra, 4, 4, true, 0) | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgb, 3, 3, false, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgb, 4, 3, false, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgba, 3, 4, false, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgba, 4, 4, false, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgr, 3, 3, false, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgr, 4, 3, false, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgra, 3, 4, false, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgra, 4, 4, false, 0) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv, 3, 3, true, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv, 4, 3, true, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv4, 3, 4, true, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv4, 4, 4, true, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv, 3, 3, true, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv, 4, 3, true, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv4, 3, 4, true, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv4, 4, 4, true, 0) | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv, 3, 3, false, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv, 4, 3, false, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv4, 3, 4, false, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv4, 4, 4, false, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv, 3, 3, false, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv, 4, 3, false, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv4, 3, 4, false, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv4, 4, 4, false, 0) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgb, 3, 3, true, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgb, 4, 3, true, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgba, 3, 4, true, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgba, 4, 4, true, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgr, 3, 3, true, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgr, 4, 3, true, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgra, 3, 4, true, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgra, 4, 4, true, 0) | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgb, 3, 3, false, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgb, 4, 3, false, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgba, 3, 4, false, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgba, 4, 4, false, 2) | ||||
|     OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgr, 3, 3, false, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgr, 4, 3, false, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgra, 3, 4, false, 0) | ||||
|     OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgra, 4, 4, false, 0) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS | ||||
| }}} // namespace cv { namespace cuda { namespace cudev | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__ | ||||
							
								
								
									
										109
									
								
								3rdparty/include/opencv2/core/cuda/common.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								3rdparty/include/opencv2/core/cuda/common.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,109 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_COMMON_HPP__ | ||||
| #define __OPENCV_CUDA_COMMON_HPP__ | ||||
|  | ||||
| #include <cuda_runtime.h> | ||||
| #include "opencv2/core/cuda_types.hpp" | ||||
| #include "opencv2/core/cvdef.h" | ||||
| #include "opencv2/core/base.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| #ifndef CV_PI_F | ||||
|     #ifndef CV_PI | ||||
|         #define CV_PI_F 3.14159265f | ||||
|     #else | ||||
|         #define CV_PI_F ((float)CV_PI) | ||||
|     #endif | ||||
| #endif | ||||
|  | ||||
| namespace cv { namespace cuda { | ||||
|     static inline void checkCudaError(cudaError_t err, const char* file, const int line, const char* func) | ||||
|     { | ||||
|         if (cudaSuccess != err) | ||||
|             cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line); | ||||
|     } | ||||
| }} | ||||
|  | ||||
| #ifndef cudaSafeCall | ||||
|     #define cudaSafeCall(expr)  cv::cuda::checkCudaError(expr, __FILE__, __LINE__, CV_Func) | ||||
| #endif | ||||
|  | ||||
| namespace cv { namespace cuda | ||||
| { | ||||
|     template <typename T> static inline bool isAligned(const T* ptr, size_t size) | ||||
|     { | ||||
|         return reinterpret_cast<size_t>(ptr) % size == 0; | ||||
|     } | ||||
|  | ||||
|     static inline bool isAligned(size_t step, size_t size) | ||||
|     { | ||||
|         return step % size == 0; | ||||
|     } | ||||
| }} | ||||
|  | ||||
| namespace cv { namespace cuda | ||||
| { | ||||
|     namespace device | ||||
|     { | ||||
|         __host__ __device__ __forceinline__ int divUp(int total, int grain) | ||||
|         { | ||||
|             return (total + grain - 1) / grain; | ||||
|         } | ||||
|  | ||||
|         template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img) | ||||
|         { | ||||
|             cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>(); | ||||
|             cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) ); | ||||
|         } | ||||
|     } | ||||
| }} | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_COMMON_HPP__ | ||||
							
								
								
									
										113
									
								
								3rdparty/include/opencv2/core/cuda/datamov_utils.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										113
									
								
								3rdparty/include/opencv2/core/cuda/datamov_utils.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,113 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_DATAMOV_UTILS_HPP__ | ||||
| #define __OPENCV_CUDA_DATAMOV_UTILS_HPP__ | ||||
|  | ||||
| #include "common.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200 | ||||
|  | ||||
|         // for Fermi memory space is detected automatically | ||||
|         template <typename T> struct ForceGlob | ||||
|         { | ||||
|             __device__ __forceinline__ static void Load(const T* ptr, int offset, T& val)  { val = ptr[offset];  } | ||||
|         }; | ||||
|  | ||||
|     #else // __CUDA_ARCH__ >= 200 | ||||
|  | ||||
|         #if defined(_WIN64) || defined(__LP64__) | ||||
|             // 64-bit register modifier for inlined asm | ||||
|             #define OPENCV_CUDA_ASM_PTR "l" | ||||
|         #else | ||||
|             // 32-bit register modifier for inlined asm | ||||
|             #define OPENCV_CUDA_ASM_PTR "r" | ||||
|         #endif | ||||
|  | ||||
|         template<class T> struct ForceGlob; | ||||
|  | ||||
|         #define OPENCV_CUDA_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \ | ||||
|             template <> struct ForceGlob<base_type> \ | ||||
|             { \ | ||||
|                 __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \ | ||||
|                 { \ | ||||
|                     asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \ | ||||
|                 } \ | ||||
|             }; | ||||
|  | ||||
|         #define OPENCV_CUDA_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \ | ||||
|             template <> struct ForceGlob<base_type> \ | ||||
|             { \ | ||||
|                 __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \ | ||||
|                 { \ | ||||
|                     asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \ | ||||
|                 } \ | ||||
|             }; | ||||
|  | ||||
|             OPENCV_CUDA_DEFINE_FORCE_GLOB_B(uchar,  u8) | ||||
|             OPENCV_CUDA_DEFINE_FORCE_GLOB_B(schar,  s8) | ||||
|             OPENCV_CUDA_DEFINE_FORCE_GLOB_B(char,   b8) | ||||
|             OPENCV_CUDA_DEFINE_FORCE_GLOB  (ushort, u16, h) | ||||
|             OPENCV_CUDA_DEFINE_FORCE_GLOB  (short,  s16, h) | ||||
|             OPENCV_CUDA_DEFINE_FORCE_GLOB  (uint,   u32, r) | ||||
|             OPENCV_CUDA_DEFINE_FORCE_GLOB  (int,    s32, r) | ||||
|             OPENCV_CUDA_DEFINE_FORCE_GLOB  (float,  f32, f) | ||||
|             OPENCV_CUDA_DEFINE_FORCE_GLOB  (double, f64, d) | ||||
|  | ||||
|         #undef OPENCV_CUDA_DEFINE_FORCE_GLOB | ||||
|         #undef OPENCV_CUDA_DEFINE_FORCE_GLOB_B | ||||
|         #undef OPENCV_CUDA_ASM_PTR | ||||
|  | ||||
|     #endif // __CUDA_ARCH__ >= 200 | ||||
| }}} // namespace cv { namespace cuda { namespace cudev | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_DATAMOV_UTILS_HPP__ | ||||
							
								
								
									
										1980
									
								
								3rdparty/include/opencv2/core/cuda/detail/color_detail.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1980
									
								
								3rdparty/include/opencv2/core/cuda/detail/color_detail.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										365
									
								
								3rdparty/include/opencv2/core/cuda/detail/reduce.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										365
									
								
								3rdparty/include/opencv2/core/cuda/detail/reduce.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,365 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_REDUCE_DETAIL_HPP__ | ||||
| #define __OPENCV_CUDA_REDUCE_DETAIL_HPP__ | ||||
|  | ||||
| #include <thrust/tuple.h> | ||||
| #include "../warp.hpp" | ||||
| #include "../warp_shuffle.hpp" | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     namespace reduce_detail | ||||
|     { | ||||
|         template <typename T> struct GetType; | ||||
|         template <typename T> struct GetType<T*> | ||||
|         { | ||||
|             typedef T type; | ||||
|         }; | ||||
|         template <typename T> struct GetType<volatile T*> | ||||
|         { | ||||
|             typedef T type; | ||||
|         }; | ||||
|         template <typename T> struct GetType<T&> | ||||
|         { | ||||
|             typedef T type; | ||||
|         }; | ||||
|  | ||||
|         template <unsigned int I, unsigned int N> | ||||
|         struct For | ||||
|         { | ||||
|             template <class PointerTuple, class ValTuple> | ||||
|             static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) | ||||
|             { | ||||
|                 thrust::get<I>(smem)[tid] = thrust::get<I>(val); | ||||
|  | ||||
|                 For<I + 1, N>::loadToSmem(smem, val, tid); | ||||
|             } | ||||
|             template <class PointerTuple, class ValTuple> | ||||
|             static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) | ||||
|             { | ||||
|                 thrust::get<I>(val) = thrust::get<I>(smem)[tid]; | ||||
|  | ||||
|                 For<I + 1, N>::loadFromSmem(smem, val, tid); | ||||
|             } | ||||
|  | ||||
|             template <class PointerTuple, class ValTuple, class OpTuple> | ||||
|             static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op) | ||||
|             { | ||||
|                 typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta]; | ||||
|                 thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg); | ||||
|  | ||||
|                 For<I + 1, N>::merge(smem, val, tid, delta, op); | ||||
|             } | ||||
|             template <class ValTuple, class OpTuple> | ||||
|             static __device__ void mergeShfl(const ValTuple& val, unsigned int delta, unsigned int width, const OpTuple& op) | ||||
|             { | ||||
|                 typename GetType<typename thrust::tuple_element<I, ValTuple>::type>::type reg = shfl_down(thrust::get<I>(val), delta, width); | ||||
|                 thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg); | ||||
|  | ||||
|                 For<I + 1, N>::mergeShfl(val, delta, width, op); | ||||
|             } | ||||
|         }; | ||||
|         template <unsigned int N> | ||||
|         struct For<N, N> | ||||
|         { | ||||
|             template <class PointerTuple, class ValTuple> | ||||
|             static __device__ void loadToSmem(const PointerTuple&, const ValTuple&, unsigned int) | ||||
|             { | ||||
|             } | ||||
|             template <class PointerTuple, class ValTuple> | ||||
|             static __device__ void loadFromSmem(const PointerTuple&, const ValTuple&, unsigned int) | ||||
|             { | ||||
|             } | ||||
|  | ||||
|             template <class PointerTuple, class ValTuple, class OpTuple> | ||||
|             static __device__ void merge(const PointerTuple&, const ValTuple&, unsigned int, unsigned int, const OpTuple&) | ||||
|             { | ||||
|             } | ||||
|             template <class ValTuple, class OpTuple> | ||||
|             static __device__ void mergeShfl(const ValTuple&, unsigned int, unsigned int, const OpTuple&) | ||||
|             { | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         template <typename T> | ||||
|         __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, unsigned int tid) | ||||
|         { | ||||
|             smem[tid] = val; | ||||
|         } | ||||
|         template <typename T> | ||||
|         __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, unsigned int tid) | ||||
|         { | ||||
|             val = smem[tid]; | ||||
|         } | ||||
|         template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9, | ||||
|                   typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9> | ||||
|         __device__ __forceinline__ void loadToSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, | ||||
|                                                        const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val, | ||||
|                                                        unsigned int tid) | ||||
|         { | ||||
|             For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid); | ||||
|         } | ||||
|         template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9, | ||||
|                   typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9> | ||||
|         __device__ __forceinline__ void loadFromSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, | ||||
|                                                          const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val, | ||||
|                                                          unsigned int tid) | ||||
|         { | ||||
|             For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid); | ||||
|         } | ||||
|  | ||||
|         template <typename T, class Op> | ||||
|         __device__ __forceinline__ void merge(volatile T* smem, T& val, unsigned int tid, unsigned int delta, const Op& op) | ||||
|         { | ||||
|             T reg = smem[tid + delta]; | ||||
|             smem[tid] = val = op(val, reg); | ||||
|         } | ||||
|         template <typename T, class Op> | ||||
|         __device__ __forceinline__ void mergeShfl(T& val, unsigned int delta, unsigned int width, const Op& op) | ||||
|         { | ||||
|             T reg = shfl_down(val, delta, width); | ||||
|             val = op(val, reg); | ||||
|         } | ||||
|         template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9, | ||||
|                   typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9, | ||||
|                   class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9> | ||||
|         __device__ __forceinline__ void merge(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, | ||||
|                                               const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val, | ||||
|                                               unsigned int tid, | ||||
|                                               unsigned int delta, | ||||
|                                               const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op) | ||||
|         { | ||||
|             For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op); | ||||
|         } | ||||
|         template <typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9, | ||||
|                   class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9> | ||||
|         __device__ __forceinline__ void mergeShfl(const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val, | ||||
|                                                   unsigned int delta, | ||||
|                                                   unsigned int width, | ||||
|                                                   const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op) | ||||
|         { | ||||
|             For<0, thrust::tuple_size<thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9> >::value>::mergeShfl(val, delta, width, op); | ||||
|         } | ||||
|  | ||||
|         template <unsigned int N> struct Generic | ||||
|         { | ||||
|             template <typename Pointer, typename Reference, class Op> | ||||
|             static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) | ||||
|             { | ||||
|                 loadToSmem(smem, val, tid); | ||||
|                 if (N >= 32) | ||||
|                     __syncthreads(); | ||||
|  | ||||
|                 if (N >= 2048) | ||||
|                 { | ||||
|                     if (tid < 1024) | ||||
|                         merge(smem, val, tid, 1024, op); | ||||
|  | ||||
|                     __syncthreads(); | ||||
|                 } | ||||
|                 if (N >= 1024) | ||||
|                 { | ||||
|                     if (tid < 512) | ||||
|                         merge(smem, val, tid, 512, op); | ||||
|  | ||||
|                     __syncthreads(); | ||||
|                 } | ||||
|                 if (N >= 512) | ||||
|                 { | ||||
|                     if (tid < 256) | ||||
|                         merge(smem, val, tid, 256, op); | ||||
|  | ||||
|                     __syncthreads(); | ||||
|                 } | ||||
|                 if (N >= 256) | ||||
|                 { | ||||
|                     if (tid < 128) | ||||
|                         merge(smem, val, tid, 128, op); | ||||
|  | ||||
|                     __syncthreads(); | ||||
|                 } | ||||
|                 if (N >= 128) | ||||
|                 { | ||||
|                     if (tid < 64) | ||||
|                         merge(smem, val, tid, 64, op); | ||||
|  | ||||
|                     __syncthreads(); | ||||
|                 } | ||||
|                 if (N >= 64) | ||||
|                 { | ||||
|                     if (tid < 32) | ||||
|                         merge(smem, val, tid, 32, op); | ||||
|                 } | ||||
|  | ||||
|                 if (tid < 16) | ||||
|                 { | ||||
|                     merge(smem, val, tid, 16, op); | ||||
|                     merge(smem, val, tid, 8, op); | ||||
|                     merge(smem, val, tid, 4, op); | ||||
|                     merge(smem, val, tid, 2, op); | ||||
|                     merge(smem, val, tid, 1, op); | ||||
|                 } | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         template <unsigned int I, typename Pointer, typename Reference, class Op> | ||||
|         struct Unroll | ||||
|         { | ||||
|             static __device__ void loopShfl(Reference val, Op op, unsigned int N) | ||||
|             { | ||||
|                 mergeShfl(val, I, N, op); | ||||
|                 Unroll<I / 2, Pointer, Reference, Op>::loopShfl(val, op, N); | ||||
|             } | ||||
|             static __device__ void loop(Pointer smem, Reference val, unsigned int tid, Op op) | ||||
|             { | ||||
|                 merge(smem, val, tid, I, op); | ||||
|                 Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); | ||||
|             } | ||||
|         }; | ||||
|         template <typename Pointer, typename Reference, class Op> | ||||
|         struct Unroll<0, Pointer, Reference, Op> | ||||
|         { | ||||
|             static __device__ void loopShfl(Reference, Op, unsigned int) | ||||
|             { | ||||
|             } | ||||
|             static __device__ void loop(Pointer, Reference, unsigned int, Op) | ||||
|             { | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         template <unsigned int N> struct WarpOptimized | ||||
|         { | ||||
|             template <typename Pointer, typename Reference, class Op> | ||||
|             static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) | ||||
|             { | ||||
|             #if __CUDA_ARCH__ >= 300 | ||||
|                 (void) smem; | ||||
|                 (void) tid; | ||||
|  | ||||
|                 Unroll<N / 2, Pointer, Reference, Op>::loopShfl(val, op, N); | ||||
|             #else | ||||
|                 loadToSmem(smem, val, tid); | ||||
|  | ||||
|                 if (tid < N / 2) | ||||
|                     Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); | ||||
|             #endif | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         template <unsigned int N> struct GenericOptimized32 | ||||
|         { | ||||
|             enum { M = N / 32 }; | ||||
|  | ||||
|             template <typename Pointer, typename Reference, class Op> | ||||
|             static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) | ||||
|             { | ||||
|                 const unsigned int laneId = Warp::laneId(); | ||||
|  | ||||
|             #if __CUDA_ARCH__ >= 300 | ||||
|                 Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize); | ||||
|  | ||||
|                 if (laneId == 0) | ||||
|                     loadToSmem(smem, val, tid / 32); | ||||
|             #else | ||||
|                 loadToSmem(smem, val, tid); | ||||
|  | ||||
|                 if (laneId < 16) | ||||
|                     Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op); | ||||
|  | ||||
|                 __syncthreads(); | ||||
|  | ||||
|                 if (laneId == 0) | ||||
|                     loadToSmem(smem, val, tid / 32); | ||||
|             #endif | ||||
|  | ||||
|                 __syncthreads(); | ||||
|  | ||||
|                 loadFromSmem(smem, val, tid); | ||||
|  | ||||
|                 if (tid < 32) | ||||
|                 { | ||||
|                 #if __CUDA_ARCH__ >= 300 | ||||
|                     Unroll<M / 2, Pointer, Reference, Op>::loopShfl(val, op, M); | ||||
|                 #else | ||||
|                     Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); | ||||
|                 #endif | ||||
|                 } | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         template <bool val, class T1, class T2> struct StaticIf; | ||||
|         template <class T1, class T2> struct StaticIf<true, T1, T2> | ||||
|         { | ||||
|             typedef T1 type; | ||||
|         }; | ||||
|         template <class T1, class T2> struct StaticIf<false, T1, T2> | ||||
|         { | ||||
|             typedef T2 type; | ||||
|         }; | ||||
|  | ||||
|         template <unsigned int N> struct IsPowerOf2 | ||||
|         { | ||||
|             enum { value = ((N != 0) && !(N & (N - 1))) }; | ||||
|         }; | ||||
|  | ||||
|         template <unsigned int N> struct Dispatcher | ||||
|         { | ||||
|             typedef typename StaticIf< | ||||
|                 (N <= 32) && IsPowerOf2<N>::value, | ||||
|                 WarpOptimized<N>, | ||||
|                 typename StaticIf< | ||||
|                     (N <= 1024) && IsPowerOf2<N>::value, | ||||
|                     GenericOptimized32<N>, | ||||
|                     Generic<N> | ||||
|                 >::type | ||||
|             >::type reductor; | ||||
|         }; | ||||
|     } | ||||
| }}} | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_REDUCE_DETAIL_HPP__ | ||||
							
								
								
									
										502
									
								
								3rdparty/include/opencv2/core/cuda/detail/reduce_key_val.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										502
									
								
								3rdparty/include/opencv2/core/cuda/detail/reduce_key_val.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,502 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__ | ||||
| #define __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__ | ||||
|  | ||||
| #include <thrust/tuple.h> | ||||
| #include "../warp.hpp" | ||||
| #include "../warp_shuffle.hpp" | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     namespace reduce_key_val_detail | ||||
|     { | ||||
|         template <typename T> struct GetType; | ||||
|         template <typename T> struct GetType<T*> | ||||
|         { | ||||
|             typedef T type; | ||||
|         }; | ||||
|         template <typename T> struct GetType<volatile T*> | ||||
|         { | ||||
|             typedef T type; | ||||
|         }; | ||||
|         template <typename T> struct GetType<T&> | ||||
|         { | ||||
|             typedef T type; | ||||
|         }; | ||||
|  | ||||
|         template <unsigned int I, unsigned int N> | ||||
|         struct For | ||||
|         { | ||||
|             template <class PointerTuple, class ReferenceTuple> | ||||
|             static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid) | ||||
|             { | ||||
|                 thrust::get<I>(smem)[tid] = thrust::get<I>(data); | ||||
|  | ||||
|                 For<I + 1, N>::loadToSmem(smem, data, tid); | ||||
|             } | ||||
|             template <class PointerTuple, class ReferenceTuple> | ||||
|             static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid) | ||||
|             { | ||||
|                 thrust::get<I>(data) = thrust::get<I>(smem)[tid]; | ||||
|  | ||||
|                 For<I + 1, N>::loadFromSmem(smem, data, tid); | ||||
|             } | ||||
|  | ||||
|             template <class ReferenceTuple> | ||||
|             static __device__ void copyShfl(const ReferenceTuple& val, unsigned int delta, int width) | ||||
|             { | ||||
|                 thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width); | ||||
|  | ||||
|                 For<I + 1, N>::copyShfl(val, delta, width); | ||||
|             } | ||||
|             template <class PointerTuple, class ReferenceTuple> | ||||
|             static __device__ void copy(const PointerTuple& svals, const ReferenceTuple& val, unsigned int tid, unsigned int delta) | ||||
|             { | ||||
|                 thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta]; | ||||
|  | ||||
|                 For<I + 1, N>::copy(svals, val, tid, delta); | ||||
|             } | ||||
|  | ||||
|             template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple> | ||||
|             static __device__ void mergeShfl(const KeyReferenceTuple& key, const ValReferenceTuple& val, const CmpTuple& cmp, unsigned int delta, int width) | ||||
|             { | ||||
|                 typename GetType<typename thrust::tuple_element<I, KeyReferenceTuple>::type>::type reg = shfl_down(thrust::get<I>(key), delta, width); | ||||
|  | ||||
|                 if (thrust::get<I>(cmp)(reg, thrust::get<I>(key))) | ||||
|                 { | ||||
|                     thrust::get<I>(key) = reg; | ||||
|                     thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width); | ||||
|                 } | ||||
|  | ||||
|                 For<I + 1, N>::mergeShfl(key, val, cmp, delta, width); | ||||
|             } | ||||
|             template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple> | ||||
|             static __device__ void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key, | ||||
|                                          const ValPointerTuple& svals, const ValReferenceTuple& val, | ||||
|                                          const CmpTuple& cmp, | ||||
|                                          unsigned int tid, unsigned int delta) | ||||
|             { | ||||
|                 typename GetType<typename thrust::tuple_element<I, KeyPointerTuple>::type>::type reg = thrust::get<I>(skeys)[tid + delta]; | ||||
|  | ||||
|                 if (thrust::get<I>(cmp)(reg, thrust::get<I>(key))) | ||||
|                 { | ||||
|                     thrust::get<I>(skeys)[tid] = thrust::get<I>(key) = reg; | ||||
|                     thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta]; | ||||
|                 } | ||||
|  | ||||
|                 For<I + 1, N>::merge(skeys, key, svals, val, cmp, tid, delta); | ||||
|             } | ||||
|         }; | ||||
|         template <unsigned int N> | ||||
|         struct For<N, N> | ||||
|         { | ||||
|             template <class PointerTuple, class ReferenceTuple> | ||||
|             static __device__ void loadToSmem(const PointerTuple&, const ReferenceTuple&, unsigned int) | ||||
|             { | ||||
|             } | ||||
|             template <class PointerTuple, class ReferenceTuple> | ||||
|             static __device__ void loadFromSmem(const PointerTuple&, const ReferenceTuple&, unsigned int) | ||||
|             { | ||||
|             } | ||||
|  | ||||
|             template <class ReferenceTuple> | ||||
|             static __device__ void copyShfl(const ReferenceTuple&, unsigned int, int) | ||||
|             { | ||||
|             } | ||||
|             template <class PointerTuple, class ReferenceTuple> | ||||
|             static __device__ void copy(const PointerTuple&, const ReferenceTuple&, unsigned int, unsigned int) | ||||
|             { | ||||
|             } | ||||
|  | ||||
|             template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple> | ||||
|             static __device__ void mergeShfl(const KeyReferenceTuple&, const ValReferenceTuple&, const CmpTuple&, unsigned int, int) | ||||
|             { | ||||
|             } | ||||
|             template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple> | ||||
|             static __device__ void merge(const KeyPointerTuple&, const KeyReferenceTuple&, | ||||
|                                          const ValPointerTuple&, const ValReferenceTuple&, | ||||
|                                          const CmpTuple&, | ||||
|                                          unsigned int, unsigned int) | ||||
|             { | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         ////////////////////////////////////////////////////// | ||||
|         // loadToSmem | ||||
|  | ||||
|         template <typename T> | ||||
|         __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid) | ||||
|         { | ||||
|             smem[tid] = data; | ||||
|         } | ||||
|         template <typename T> | ||||
|         __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid) | ||||
|         { | ||||
|             data = smem[tid]; | ||||
|         } | ||||
|         template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9, | ||||
|                   typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9> | ||||
|         __device__ __forceinline__ void loadToSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem, | ||||
|                                                    const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data, | ||||
|                                                    unsigned int tid) | ||||
|         { | ||||
|             For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid); | ||||
|         } | ||||
|         template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9, | ||||
|                   typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9> | ||||
|         __device__ __forceinline__ void loadFromSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem, | ||||
|                                                      const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data, | ||||
|                                                      unsigned int tid) | ||||
|         { | ||||
|             For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid); | ||||
|         } | ||||
|  | ||||
|         ////////////////////////////////////////////////////// | ||||
|         // copyVals | ||||
|  | ||||
|         template <typename V> | ||||
|         __device__ __forceinline__ void copyValsShfl(V& val, unsigned int delta, int width) | ||||
|         { | ||||
|             val = shfl_down(val, delta, width); | ||||
|         } | ||||
|         template <typename V> | ||||
|         __device__ __forceinline__ void copyVals(volatile V* svals, V& val, unsigned int tid, unsigned int delta) | ||||
|         { | ||||
|             svals[tid] = val = svals[tid + delta]; | ||||
|         } | ||||
|         template <typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9> | ||||
|         __device__ __forceinline__ void copyValsShfl(const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val, | ||||
|                                                      unsigned int delta, | ||||
|                                                      int width) | ||||
|         { | ||||
|             For<0, thrust::tuple_size<thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9> >::value>::copyShfl(val, delta, width); | ||||
|         } | ||||
|         template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9, | ||||
|                   typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9> | ||||
|         __device__ __forceinline__ void copyVals(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals, | ||||
|                                                  const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val, | ||||
|                                                  unsigned int tid, unsigned int delta) | ||||
|         { | ||||
|             For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::copy(svals, val, tid, delta); | ||||
|         } | ||||
|  | ||||
|         ////////////////////////////////////////////////////// | ||||
|         // merge | ||||
|  | ||||
|         template <typename K, typename V, class Cmp> | ||||
|         __device__ __forceinline__ void mergeShfl(K& key, V& val, const Cmp& cmp, unsigned int delta, int width) | ||||
|         { | ||||
|             K reg = shfl_down(key, delta, width); | ||||
|  | ||||
|             if (cmp(reg, key)) | ||||
|             { | ||||
|                 key = reg; | ||||
|                 copyValsShfl(val, delta, width); | ||||
|             } | ||||
|         } | ||||
|         template <typename K, typename V, class Cmp> | ||||
|         __device__ __forceinline__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, unsigned int tid, unsigned int delta) | ||||
|         { | ||||
|             K reg = skeys[tid + delta]; | ||||
|  | ||||
|             if (cmp(reg, key)) | ||||
|             { | ||||
|                 skeys[tid] = key = reg; | ||||
|                 copyVals(svals, val, tid, delta); | ||||
|             } | ||||
|         } | ||||
|         template <typename K, | ||||
|                   typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9, | ||||
|                   class Cmp> | ||||
|         __device__ __forceinline__ void mergeShfl(K& key, | ||||
|                                                   const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val, | ||||
|                                                   const Cmp& cmp, | ||||
|                                                   unsigned int delta, int width) | ||||
|         { | ||||
|             K reg = shfl_down(key, delta, width); | ||||
|  | ||||
|             if (cmp(reg, key)) | ||||
|             { | ||||
|                 key = reg; | ||||
|                 copyValsShfl(val, delta, width); | ||||
|             } | ||||
|         } | ||||
|         template <typename K, | ||||
|                   typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9, | ||||
|                   typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9, | ||||
|                   class Cmp> | ||||
|         __device__ __forceinline__ void merge(volatile K* skeys, K& key, | ||||
|                                               const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals, | ||||
|                                               const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val, | ||||
|                                               const Cmp& cmp, unsigned int tid, unsigned int delta) | ||||
|         { | ||||
|             K reg = skeys[tid + delta]; | ||||
|  | ||||
|             if (cmp(reg, key)) | ||||
|             { | ||||
|                 skeys[tid] = key = reg; | ||||
|                 copyVals(svals, val, tid, delta); | ||||
|             } | ||||
|         } | ||||
|         template <typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9, | ||||
|                   typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9, | ||||
|                   class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9> | ||||
|         __device__ __forceinline__ void mergeShfl(const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key, | ||||
|                                                   const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val, | ||||
|                                                   const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp, | ||||
|                                                   unsigned int delta, int width) | ||||
|         { | ||||
|             For<0, thrust::tuple_size<thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9> >::value>::mergeShfl(key, val, cmp, delta, width); | ||||
|         } | ||||
|         template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9, | ||||
|                   typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9, | ||||
|                   typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9, | ||||
|                   typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9, | ||||
|                   class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9> | ||||
|         __device__ __forceinline__ void merge(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys, | ||||
|                                               const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key, | ||||
|                                               const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals, | ||||
|                                               const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val, | ||||
|                                               const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp, | ||||
|                                               unsigned int tid, unsigned int delta) | ||||
|         { | ||||
|             For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::merge(skeys, key, svals, val, cmp, tid, delta); | ||||
|         } | ||||
|  | ||||
|         ////////////////////////////////////////////////////// | ||||
|         // Generic | ||||
|  | ||||
|         template <unsigned int N> struct Generic | ||||
|         { | ||||
|             template <class KP, class KR, class VP, class VR, class Cmp> | ||||
|             static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) | ||||
|             { | ||||
|                 loadToSmem(skeys, key, tid); | ||||
|                 loadValsToSmem(svals, val, tid); | ||||
|                 if (N >= 32) | ||||
|                     __syncthreads(); | ||||
|  | ||||
|                 if (N >= 2048) | ||||
|                 { | ||||
|                     if (tid < 1024) | ||||
|                         merge(skeys, key, svals, val, cmp, tid, 1024); | ||||
|  | ||||
|                     __syncthreads(); | ||||
|                 } | ||||
|                 if (N >= 1024) | ||||
|                 { | ||||
|                     if (tid < 512) | ||||
|                         merge(skeys, key, svals, val, cmp, tid, 512); | ||||
|  | ||||
|                     __syncthreads(); | ||||
|                 } | ||||
|                 if (N >= 512) | ||||
|                 { | ||||
|                     if (tid < 256) | ||||
|                         merge(skeys, key, svals, val, cmp, tid, 256); | ||||
|  | ||||
|                     __syncthreads(); | ||||
|                 } | ||||
|                 if (N >= 256) | ||||
|                 { | ||||
|                     if (tid < 128) | ||||
|                         merge(skeys, key, svals, val, cmp, tid, 128); | ||||
|  | ||||
|                     __syncthreads(); | ||||
|                 } | ||||
|                 if (N >= 128) | ||||
|                 { | ||||
|                     if (tid < 64) | ||||
|                         merge(skeys, key, svals, val, cmp, tid, 64); | ||||
|  | ||||
|                     __syncthreads(); | ||||
|                 } | ||||
|                 if (N >= 64) | ||||
|                 { | ||||
|                     if (tid < 32) | ||||
|                         merge(skeys, key, svals, val, cmp, tid, 32); | ||||
|                 } | ||||
|  | ||||
|                 if (tid < 16) | ||||
|                 { | ||||
|                     merge(skeys, key, svals, val, cmp, tid, 16); | ||||
|                     merge(skeys, key, svals, val, cmp, tid, 8); | ||||
|                     merge(skeys, key, svals, val, cmp, tid, 4); | ||||
|                     merge(skeys, key, svals, val, cmp, tid, 2); | ||||
|                     merge(skeys, key, svals, val, cmp, tid, 1); | ||||
|                 } | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         template <unsigned int I, class KP, class KR, class VP, class VR, class Cmp> | ||||
|         struct Unroll | ||||
|         { | ||||
|             static __device__ void loopShfl(KR key, VR val, Cmp cmp, unsigned int N) | ||||
|             { | ||||
|                 mergeShfl(key, val, cmp, I, N); | ||||
|                 Unroll<I / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N); | ||||
|             } | ||||
|             static __device__ void loop(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) | ||||
|             { | ||||
|                 merge(skeys, key, svals, val, cmp, tid, I); | ||||
|                 Unroll<I / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp); | ||||
|             } | ||||
|         }; | ||||
|         template <class KP, class KR, class VP, class VR, class Cmp> | ||||
|         struct Unroll<0, KP, KR, VP, VR, Cmp> | ||||
|         { | ||||
|             static __device__ void loopShfl(KR, VR, Cmp, unsigned int) | ||||
|             { | ||||
|             } | ||||
|             static __device__ void loop(KP, KR, VP, VR, unsigned int, Cmp) | ||||
|             { | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         template <unsigned int N> struct WarpOptimized | ||||
|         { | ||||
|             template <class KP, class KR, class VP, class VR, class Cmp> | ||||
|             static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) | ||||
|             { | ||||
|             #if 0 // __CUDA_ARCH__ >= 300 | ||||
|                 (void) skeys; | ||||
|                 (void) svals; | ||||
|                 (void) tid; | ||||
|  | ||||
|                 Unroll<N / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N); | ||||
|             #else | ||||
|                 loadToSmem(skeys, key, tid); | ||||
|                 loadToSmem(svals, val, tid); | ||||
|  | ||||
|                 if (tid < N / 2) | ||||
|                     Unroll<N / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp); | ||||
|             #endif | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         template <unsigned int N> struct GenericOptimized32 | ||||
|         { | ||||
|             enum { M = N / 32 }; | ||||
|  | ||||
|             template <class KP, class KR, class VP, class VR, class Cmp> | ||||
|             static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) | ||||
|             { | ||||
|                 const unsigned int laneId = Warp::laneId(); | ||||
|  | ||||
|             #if 0 // __CUDA_ARCH__ >= 300 | ||||
|                 Unroll<16, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, warpSize); | ||||
|  | ||||
|                 if (laneId == 0) | ||||
|                 { | ||||
|                     loadToSmem(skeys, key, tid / 32); | ||||
|                     loadToSmem(svals, val, tid / 32); | ||||
|                 } | ||||
|             #else | ||||
|                 loadToSmem(skeys, key, tid); | ||||
|                 loadToSmem(svals, val, tid); | ||||
|  | ||||
|                 if (laneId < 16) | ||||
|                     Unroll<16, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp); | ||||
|  | ||||
|                 __syncthreads(); | ||||
|  | ||||
|                 if (laneId == 0) | ||||
|                 { | ||||
|                     loadToSmem(skeys, key, tid / 32); | ||||
|                     loadToSmem(svals, val, tid / 32); | ||||
|                 } | ||||
|             #endif | ||||
|  | ||||
|                 __syncthreads(); | ||||
|  | ||||
|                 loadFromSmem(skeys, key, tid); | ||||
|  | ||||
|                 if (tid < 32) | ||||
|                 { | ||||
|                 #if 0 // __CUDA_ARCH__ >= 300 | ||||
|                     loadFromSmem(svals, val, tid); | ||||
|  | ||||
|                     Unroll<M / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, M); | ||||
|                 #else | ||||
|                     Unroll<M / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp); | ||||
|                 #endif | ||||
|                 } | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         template <bool val, class T1, class T2> struct StaticIf; | ||||
|         template <class T1, class T2> struct StaticIf<true, T1, T2> | ||||
|         { | ||||
|             typedef T1 type; | ||||
|         }; | ||||
|         template <class T1, class T2> struct StaticIf<false, T1, T2> | ||||
|         { | ||||
|             typedef T2 type; | ||||
|         }; | ||||
|  | ||||
|         template <unsigned int N> struct IsPowerOf2 | ||||
|         { | ||||
|             enum { value = ((N != 0) && !(N & (N - 1))) }; | ||||
|         }; | ||||
|  | ||||
|         template <unsigned int N> struct Dispatcher | ||||
|         { | ||||
|             typedef typename StaticIf< | ||||
|                 (N <= 32) && IsPowerOf2<N>::value, | ||||
|                 WarpOptimized<N>, | ||||
|                 typename StaticIf< | ||||
|                     (N <= 1024) && IsPowerOf2<N>::value, | ||||
|                     GenericOptimized32<N>, | ||||
|                     Generic<N> | ||||
|                 >::type | ||||
|             >::type reductor; | ||||
|         }; | ||||
|     } | ||||
| }}} | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__ | ||||
							
								
								
									
										399
									
								
								3rdparty/include/opencv2/core/cuda/detail/transform_detail.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										399
									
								
								3rdparty/include/opencv2/core/cuda/detail/transform_detail.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,399 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__ | ||||
| #define __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__ | ||||
|  | ||||
| #include "../common.hpp" | ||||
| #include "../vec_traits.hpp" | ||||
| #include "../functional.hpp" | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     namespace transform_detail | ||||
|     { | ||||
|         //! Read Write Traits | ||||
|  | ||||
|         template <typename T, typename D, int shift> struct UnaryReadWriteTraits | ||||
|         { | ||||
|             typedef typename TypeVec<T, shift>::vec_type read_type; | ||||
|             typedef typename TypeVec<D, shift>::vec_type write_type; | ||||
|         }; | ||||
|  | ||||
|         template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits | ||||
|         { | ||||
|             typedef typename TypeVec<T1, shift>::vec_type read_type1; | ||||
|             typedef typename TypeVec<T2, shift>::vec_type read_type2; | ||||
|             typedef typename TypeVec<D, shift>::vec_type write_type; | ||||
|         }; | ||||
|  | ||||
|         //! Transform kernels | ||||
|  | ||||
|         template <int shift> struct OpUnroller; | ||||
|         template <> struct OpUnroller<1> | ||||
|         { | ||||
|             template <typename T, typename D, typename UnOp, typename Mask> | ||||
|             static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y) | ||||
|             { | ||||
|                 if (mask(y, x_shifted)) | ||||
|                     dst.x = op(src.x); | ||||
|             } | ||||
|  | ||||
|             template <typename T1, typename T2, typename D, typename BinOp, typename Mask> | ||||
|             static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y) | ||||
|             { | ||||
|                 if (mask(y, x_shifted)) | ||||
|                     dst.x = op(src1.x, src2.x); | ||||
|             } | ||||
|         }; | ||||
|         template <> struct OpUnroller<2> | ||||
|         { | ||||
|             template <typename T, typename D, typename UnOp, typename Mask> | ||||
|             static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y) | ||||
|             { | ||||
|                 if (mask(y, x_shifted)) | ||||
|                     dst.x = op(src.x); | ||||
|                 if (mask(y, x_shifted + 1)) | ||||
|                     dst.y = op(src.y); | ||||
|             } | ||||
|  | ||||
|             template <typename T1, typename T2, typename D, typename BinOp, typename Mask> | ||||
|             static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y) | ||||
|             { | ||||
|                 if (mask(y, x_shifted)) | ||||
|                     dst.x = op(src1.x, src2.x); | ||||
|                 if (mask(y, x_shifted + 1)) | ||||
|                     dst.y = op(src1.y, src2.y); | ||||
|             } | ||||
|         }; | ||||
|         template <> struct OpUnroller<3> | ||||
|         { | ||||
|             template <typename T, typename D, typename UnOp, typename Mask> | ||||
|             static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y) | ||||
|             { | ||||
|                 if (mask(y, x_shifted)) | ||||
|                     dst.x = op(src.x); | ||||
|                 if (mask(y, x_shifted + 1)) | ||||
|                     dst.y = op(src.y); | ||||
|                 if (mask(y, x_shifted + 2)) | ||||
|                     dst.z = op(src.z); | ||||
|             } | ||||
|  | ||||
|             template <typename T1, typename T2, typename D, typename BinOp, typename Mask> | ||||
|             static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y) | ||||
|             { | ||||
|                 if (mask(y, x_shifted)) | ||||
|                     dst.x = op(src1.x, src2.x); | ||||
|                 if (mask(y, x_shifted + 1)) | ||||
|                     dst.y = op(src1.y, src2.y); | ||||
|                 if (mask(y, x_shifted + 2)) | ||||
|                     dst.z = op(src1.z, src2.z); | ||||
|             } | ||||
|         }; | ||||
|         template <> struct OpUnroller<4> | ||||
|         { | ||||
|             template <typename T, typename D, typename UnOp, typename Mask> | ||||
|             static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y) | ||||
|             { | ||||
|                 if (mask(y, x_shifted)) | ||||
|                     dst.x = op(src.x); | ||||
|                 if (mask(y, x_shifted + 1)) | ||||
|                     dst.y = op(src.y); | ||||
|                 if (mask(y, x_shifted + 2)) | ||||
|                     dst.z = op(src.z); | ||||
|                 if (mask(y, x_shifted + 3)) | ||||
|                     dst.w = op(src.w); | ||||
|             } | ||||
|  | ||||
|             template <typename T1, typename T2, typename D, typename BinOp, typename Mask> | ||||
|             static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y) | ||||
|             { | ||||
|                 if (mask(y, x_shifted)) | ||||
|                     dst.x = op(src1.x, src2.x); | ||||
|                 if (mask(y, x_shifted + 1)) | ||||
|                     dst.y = op(src1.y, src2.y); | ||||
|                 if (mask(y, x_shifted + 2)) | ||||
|                     dst.z = op(src1.z, src2.z); | ||||
|                 if (mask(y, x_shifted + 3)) | ||||
|                     dst.w = op(src1.w, src2.w); | ||||
|             } | ||||
|         }; | ||||
|         template <> struct OpUnroller<8> | ||||
|         { | ||||
|             template <typename T, typename D, typename UnOp, typename Mask> | ||||
|             static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y) | ||||
|             { | ||||
|                 if (mask(y, x_shifted)) | ||||
|                     dst.a0 = op(src.a0); | ||||
|                 if (mask(y, x_shifted + 1)) | ||||
|                     dst.a1 = op(src.a1); | ||||
|                 if (mask(y, x_shifted + 2)) | ||||
|                     dst.a2 = op(src.a2); | ||||
|                 if (mask(y, x_shifted + 3)) | ||||
|                     dst.a3 = op(src.a3); | ||||
|                 if (mask(y, x_shifted + 4)) | ||||
|                     dst.a4 = op(src.a4); | ||||
|                 if (mask(y, x_shifted + 5)) | ||||
|                     dst.a5 = op(src.a5); | ||||
|                 if (mask(y, x_shifted + 6)) | ||||
|                     dst.a6 = op(src.a6); | ||||
|                 if (mask(y, x_shifted + 7)) | ||||
|                     dst.a7 = op(src.a7); | ||||
|             } | ||||
|  | ||||
|             template <typename T1, typename T2, typename D, typename BinOp, typename Mask> | ||||
|             static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y) | ||||
|             { | ||||
|                 if (mask(y, x_shifted)) | ||||
|                     dst.a0 = op(src1.a0, src2.a0); | ||||
|                 if (mask(y, x_shifted + 1)) | ||||
|                     dst.a1 = op(src1.a1, src2.a1); | ||||
|                 if (mask(y, x_shifted + 2)) | ||||
|                     dst.a2 = op(src1.a2, src2.a2); | ||||
|                 if (mask(y, x_shifted + 3)) | ||||
|                     dst.a3 = op(src1.a3, src2.a3); | ||||
|                 if (mask(y, x_shifted + 4)) | ||||
|                     dst.a4 = op(src1.a4, src2.a4); | ||||
|                 if (mask(y, x_shifted + 5)) | ||||
|                     dst.a5 = op(src1.a5, src2.a5); | ||||
|                 if (mask(y, x_shifted + 6)) | ||||
|                     dst.a6 = op(src1.a6, src2.a6); | ||||
|                 if (mask(y, x_shifted + 7)) | ||||
|                     dst.a7 = op(src1.a7, src2.a7); | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         template <typename T, typename D, typename UnOp, typename Mask> | ||||
|         static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op) | ||||
|         { | ||||
|             typedef TransformFunctorTraits<UnOp> ft; | ||||
|             typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type; | ||||
|             typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type; | ||||
|  | ||||
|             const int x = threadIdx.x + blockIdx.x * blockDim.x; | ||||
|             const int y = threadIdx.y + blockIdx.y * blockDim.y; | ||||
|             const int x_shifted = x * ft::smart_shift; | ||||
|  | ||||
|             if (y < src_.rows) | ||||
|             { | ||||
|                 const T* src = src_.ptr(y); | ||||
|                 D* dst = dst_.ptr(y); | ||||
|  | ||||
|                 if (x_shifted + ft::smart_shift - 1 < src_.cols) | ||||
|                 { | ||||
|                     const read_type src_n_el = ((const read_type*)src)[x]; | ||||
|                     write_type dst_n_el = ((const write_type*)dst)[x]; | ||||
|  | ||||
|                     OpUnroller<ft::smart_shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y); | ||||
|  | ||||
|                     ((write_type*)dst)[x] = dst_n_el; | ||||
|                 } | ||||
|                 else | ||||
|                 { | ||||
|                     for (int real_x = x_shifted; real_x < src_.cols; ++real_x) | ||||
|                     { | ||||
|                         if (mask(y, real_x)) | ||||
|                             dst[real_x] = op(src[real_x]); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         template <typename T, typename D, typename UnOp, typename Mask> | ||||
|         __global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op) | ||||
|         { | ||||
|             const int x = blockDim.x * blockIdx.x + threadIdx.x; | ||||
|             const int y = blockDim.y * blockIdx.y + threadIdx.y; | ||||
|  | ||||
|             if (x < src.cols && y < src.rows && mask(y, x)) | ||||
|             { | ||||
|                 dst.ptr(y)[x] = op(src.ptr(y)[x]); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         template <typename T1, typename T2, typename D, typename BinOp, typename Mask> | ||||
|         static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_, | ||||
|             const Mask mask, const BinOp op) | ||||
|         { | ||||
|             typedef TransformFunctorTraits<BinOp> ft; | ||||
|             typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1; | ||||
|             typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2; | ||||
|             typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type; | ||||
|  | ||||
|             const int x = threadIdx.x + blockIdx.x * blockDim.x; | ||||
|             const int y = threadIdx.y + blockIdx.y * blockDim.y; | ||||
|             const int x_shifted = x * ft::smart_shift; | ||||
|  | ||||
|             if (y < src1_.rows) | ||||
|             { | ||||
|                 const T1* src1 = src1_.ptr(y); | ||||
|                 const T2* src2 = src2_.ptr(y); | ||||
|                 D* dst = dst_.ptr(y); | ||||
|  | ||||
|                 if (x_shifted + ft::smart_shift - 1 < src1_.cols) | ||||
|                 { | ||||
|                     const read_type1 src1_n_el = ((const read_type1*)src1)[x]; | ||||
|                     const read_type2 src2_n_el = ((const read_type2*)src2)[x]; | ||||
|                     write_type dst_n_el = ((const write_type*)dst)[x]; | ||||
|  | ||||
|                     OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y); | ||||
|  | ||||
|                     ((write_type*)dst)[x] = dst_n_el; | ||||
|                 } | ||||
|                 else | ||||
|                 { | ||||
|                     for (int real_x = x_shifted; real_x < src1_.cols; ++real_x) | ||||
|                     { | ||||
|                         if (mask(y, real_x)) | ||||
|                             dst[real_x] = op(src1[real_x], src2[real_x]); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         template <typename T1, typename T2, typename D, typename BinOp, typename Mask> | ||||
|         static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst, | ||||
|             const Mask mask, const BinOp op) | ||||
|         { | ||||
|             const int x = blockDim.x * blockIdx.x + threadIdx.x; | ||||
|             const int y = blockDim.y * blockIdx.y + threadIdx.y; | ||||
|  | ||||
|             if (x < src1.cols && y < src1.rows && mask(y, x)) | ||||
|             { | ||||
|                 const T1 src1_data = src1.ptr(y)[x]; | ||||
|                 const T2 src2_data = src2.ptr(y)[x]; | ||||
|                 dst.ptr(y)[x] = op(src1_data, src2_data); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         template <bool UseSmart> struct TransformDispatcher; | ||||
|         template<> struct TransformDispatcher<false> | ||||
|         { | ||||
|             template <typename T, typename D, typename UnOp, typename Mask> | ||||
|             static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream) | ||||
|             { | ||||
|                 typedef TransformFunctorTraits<UnOp> ft; | ||||
|  | ||||
|                 const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1); | ||||
|                 const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1); | ||||
|  | ||||
|                 transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op); | ||||
|                 cudaSafeCall( cudaGetLastError() ); | ||||
|  | ||||
|                 if (stream == 0) | ||||
|                     cudaSafeCall( cudaDeviceSynchronize() ); | ||||
|             } | ||||
|  | ||||
|             template <typename T1, typename T2, typename D, typename BinOp, typename Mask> | ||||
|             static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream) | ||||
|             { | ||||
|                 typedef TransformFunctorTraits<BinOp> ft; | ||||
|  | ||||
|                 const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1); | ||||
|                 const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1); | ||||
|  | ||||
|                 transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op); | ||||
|                 cudaSafeCall( cudaGetLastError() ); | ||||
|  | ||||
|                 if (stream == 0) | ||||
|                     cudaSafeCall( cudaDeviceSynchronize() ); | ||||
|             } | ||||
|         }; | ||||
|         template<> struct TransformDispatcher<true> | ||||
|         { | ||||
|             template <typename T, typename D, typename UnOp, typename Mask> | ||||
|             static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream) | ||||
|             { | ||||
|                 typedef TransformFunctorTraits<UnOp> ft; | ||||
|  | ||||
|                 CV_StaticAssert(ft::smart_shift != 1, ""); | ||||
|  | ||||
|                 if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) || | ||||
|                     !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D))) | ||||
|                 { | ||||
|                     TransformDispatcher<false>::call(src, dst, op, mask, stream); | ||||
|                     return; | ||||
|                 } | ||||
|  | ||||
|                 const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1); | ||||
|                 const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1); | ||||
|  | ||||
|                 transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op); | ||||
|                 cudaSafeCall( cudaGetLastError() ); | ||||
|  | ||||
|                 if (stream == 0) | ||||
|                     cudaSafeCall( cudaDeviceSynchronize() ); | ||||
|             } | ||||
|  | ||||
|             template <typename T1, typename T2, typename D, typename BinOp, typename Mask> | ||||
|             static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream) | ||||
|             { | ||||
|                 typedef TransformFunctorTraits<BinOp> ft; | ||||
|  | ||||
|                 CV_StaticAssert(ft::smart_shift != 1, ""); | ||||
|  | ||||
|                 if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) || | ||||
|                     !isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) || | ||||
|                     !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D))) | ||||
|                 { | ||||
|                     TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream); | ||||
|                     return; | ||||
|                 } | ||||
|  | ||||
|                 const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1); | ||||
|                 const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1); | ||||
|  | ||||
|                 transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op); | ||||
|                 cudaSafeCall( cudaGetLastError() ); | ||||
|  | ||||
|                 if (stream == 0) | ||||
|                     cudaSafeCall( cudaDeviceSynchronize() ); | ||||
|             } | ||||
|         }; | ||||
|     } // namespace transform_detail | ||||
| }}} // namespace cv { namespace cuda { namespace cudev | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__ | ||||
							
								
								
									
										191
									
								
								3rdparty/include/opencv2/core/cuda/detail/type_traits_detail.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										191
									
								
								3rdparty/include/opencv2/core/cuda/detail/type_traits_detail.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,191 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP__ | ||||
| #define __OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP__ | ||||
|  | ||||
| #include "../common.hpp" | ||||
| #include "../vec_traits.hpp" | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     namespace type_traits_detail | ||||
|     { | ||||
|         template <bool, typename T1, typename T2> struct Select { typedef T1 type; }; | ||||
|         template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; }; | ||||
|  | ||||
|         template <typename T> struct IsSignedIntergral { enum {value = 0}; }; | ||||
|         template <> struct IsSignedIntergral<schar> { enum {value = 1}; }; | ||||
|         template <> struct IsSignedIntergral<char1> { enum {value = 1}; }; | ||||
|         template <> struct IsSignedIntergral<short> { enum {value = 1}; }; | ||||
|         template <> struct IsSignedIntergral<short1> { enum {value = 1}; }; | ||||
|         template <> struct IsSignedIntergral<int> { enum {value = 1}; }; | ||||
|         template <> struct IsSignedIntergral<int1> { enum {value = 1}; }; | ||||
|  | ||||
|         template <typename T> struct IsUnsignedIntegral { enum {value = 0}; }; | ||||
|         template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; }; | ||||
|         template <> struct IsUnsignedIntegral<uchar1> { enum {value = 1}; }; | ||||
|         template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; }; | ||||
|         template <> struct IsUnsignedIntegral<ushort1> { enum {value = 1}; }; | ||||
|         template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; }; | ||||
|         template <> struct IsUnsignedIntegral<uint1> { enum {value = 1}; }; | ||||
|  | ||||
|         template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; }; | ||||
|         template <> struct IsIntegral<char> { enum {value = 1}; }; | ||||
|         template <> struct IsIntegral<bool> { enum {value = 1}; }; | ||||
|  | ||||
|         template <typename T> struct IsFloat { enum {value = 0}; }; | ||||
|         template <> struct IsFloat<float> { enum {value = 1}; }; | ||||
|         template <> struct IsFloat<double> { enum {value = 1}; }; | ||||
|  | ||||
|         template <typename T> struct IsVec { enum {value = 0}; }; | ||||
|         template <> struct IsVec<uchar1> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<uchar2> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<uchar3> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<uchar4> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<uchar8> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<char1> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<char2> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<char3> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<char4> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<char8> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<ushort1> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<ushort2> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<ushort3> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<ushort4> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<ushort8> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<short1> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<short2> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<short3> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<short4> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<short8> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<uint1> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<uint2> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<uint3> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<uint4> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<uint8> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<int1> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<int2> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<int3> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<int4> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<int8> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<float1> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<float2> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<float3> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<float4> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<float8> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<double1> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<double2> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<double3> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<double4> { enum {value = 1}; }; | ||||
|         template <> struct IsVec<double8> { enum {value = 1}; }; | ||||
|  | ||||
|         template <class U> struct AddParameterType { typedef const U& type; }; | ||||
|         template <class U> struct AddParameterType<U&> { typedef U& type; }; | ||||
|         template <> struct AddParameterType<void> { typedef void type; }; | ||||
|  | ||||
|         template <class U> struct ReferenceTraits | ||||
|         { | ||||
|             enum { value = false }; | ||||
|             typedef U type; | ||||
|         }; | ||||
|         template <class U> struct ReferenceTraits<U&> | ||||
|         { | ||||
|             enum { value = true }; | ||||
|             typedef U type; | ||||
|         }; | ||||
|  | ||||
|         template <class U> struct PointerTraits | ||||
|         { | ||||
|             enum { value = false }; | ||||
|             typedef void type; | ||||
|         }; | ||||
|         template <class U> struct PointerTraits<U*> | ||||
|         { | ||||
|             enum { value = true }; | ||||
|             typedef U type; | ||||
|         }; | ||||
|         template <class U> struct PointerTraits<U*&> | ||||
|         { | ||||
|             enum { value = true }; | ||||
|             typedef U type; | ||||
|         }; | ||||
|  | ||||
|         template <class U> struct UnConst | ||||
|         { | ||||
|             typedef U type; | ||||
|             enum { value = 0 }; | ||||
|         }; | ||||
|         template <class U> struct UnConst<const U> | ||||
|         { | ||||
|             typedef U type; | ||||
|             enum { value = 1 }; | ||||
|         }; | ||||
|         template <class U> struct UnConst<const U&> | ||||
|         { | ||||
|             typedef U& type; | ||||
|             enum { value = 1 }; | ||||
|         }; | ||||
|  | ||||
|         template <class U> struct UnVolatile | ||||
|         { | ||||
|             typedef U type; | ||||
|             enum { value = 0 }; | ||||
|         }; | ||||
|         template <class U> struct UnVolatile<volatile U> | ||||
|         { | ||||
|             typedef U type; | ||||
|             enum { value = 1 }; | ||||
|         }; | ||||
|         template <class U> struct UnVolatile<volatile U&> | ||||
|         { | ||||
|             typedef U& type; | ||||
|             enum { value = 1 }; | ||||
|         }; | ||||
|     } // namespace type_traits_detail | ||||
| }}} // namespace cv { namespace cuda { namespace cudev | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP__ | ||||
							
								
								
									
										121
									
								
								3rdparty/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								3rdparty/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,121 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP__ | ||||
| #define __OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP__ | ||||
|  | ||||
| #include "../datamov_utils.hpp" | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     namespace vec_distance_detail | ||||
|     { | ||||
|         template <int THREAD_DIM, int N> struct UnrollVecDiffCached | ||||
|         { | ||||
|             template <typename Dist, typename T1, typename T2> | ||||
|             static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind) | ||||
|             { | ||||
|                 if (ind < len) | ||||
|                 { | ||||
|                     T1 val1 = *vecCached++; | ||||
|  | ||||
|                     T2 val2; | ||||
|                     ForceGlob<T2>::Load(vecGlob, ind, val2); | ||||
|  | ||||
|                     dist.reduceIter(val1, val2); | ||||
|  | ||||
|                     UnrollVecDiffCached<THREAD_DIM, N - 1>::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM); | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             template <typename Dist, typename T1, typename T2> | ||||
|             static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist) | ||||
|             { | ||||
|                 T1 val1 = *vecCached++; | ||||
|  | ||||
|                 T2 val2; | ||||
|                 ForceGlob<T2>::Load(vecGlob, 0, val2); | ||||
|                 vecGlob += THREAD_DIM; | ||||
|  | ||||
|                 dist.reduceIter(val1, val2); | ||||
|  | ||||
|                 UnrollVecDiffCached<THREAD_DIM, N - 1>::calcWithoutCheck(vecCached, vecGlob, dist); | ||||
|             } | ||||
|         }; | ||||
|         template <int THREAD_DIM> struct UnrollVecDiffCached<THREAD_DIM, 0> | ||||
|         { | ||||
|             template <typename Dist, typename T1, typename T2> | ||||
|             static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int) | ||||
|             { | ||||
|             } | ||||
|  | ||||
|             template <typename Dist, typename T1, typename T2> | ||||
|             static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&) | ||||
|             { | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN> struct VecDiffCachedCalculator; | ||||
|         template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, false> | ||||
|         { | ||||
|             template <typename Dist, typename T1, typename T2> | ||||
|             static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid) | ||||
|             { | ||||
|                 UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcCheck(vecCached, vecGlob, len, dist, tid); | ||||
|             } | ||||
|         }; | ||||
|         template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, true> | ||||
|         { | ||||
|             template <typename Dist, typename T1, typename T2> | ||||
|             static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid) | ||||
|             { | ||||
|                 UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist); | ||||
|             } | ||||
|         }; | ||||
|     } // namespace vec_distance_detail | ||||
| }}} // namespace cv { namespace cuda { namespace cudev | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP__ | ||||
							
								
								
									
										88
									
								
								3rdparty/include/opencv2/core/cuda/dynamic_smem.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								3rdparty/include/opencv2/core/cuda/dynamic_smem.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,88 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_DYNAMIC_SMEM_HPP__ | ||||
| #define __OPENCV_CUDA_DYNAMIC_SMEM_HPP__ | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     template<class T> struct DynamicSharedMem | ||||
|     { | ||||
|         __device__ __forceinline__ operator T*() | ||||
|         { | ||||
|             extern __shared__ int __smem[]; | ||||
|             return (T*)__smem; | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ operator const T*() const | ||||
|         { | ||||
|             extern __shared__ int __smem[]; | ||||
|             return (T*)__smem; | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     // specialize for double to avoid unaligned memory access compile errors | ||||
|     template<> struct DynamicSharedMem<double> | ||||
|     { | ||||
|         __device__ __forceinline__ operator double*() | ||||
|         { | ||||
|             extern __shared__ double __smem_d[]; | ||||
|             return (double*)__smem_d; | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ operator const double*() const | ||||
|         { | ||||
|             extern __shared__ double __smem_d[]; | ||||
|             return (double*)__smem_d; | ||||
|         } | ||||
|     }; | ||||
| }}} | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_DYNAMIC_SMEM_HPP__ | ||||
							
								
								
									
										269
									
								
								3rdparty/include/opencv2/core/cuda/emulation.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										269
									
								
								3rdparty/include/opencv2/core/cuda/emulation.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,269 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef OPENCV_CUDA_EMULATION_HPP_ | ||||
| #define OPENCV_CUDA_EMULATION_HPP_ | ||||
|  | ||||
| #include "common.hpp" | ||||
| #include "warp_reduce.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     struct Emulation | ||||
|     { | ||||
|  | ||||
|         static __device__ __forceinline__ int syncthreadsOr(int pred) | ||||
|         { | ||||
| #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200) | ||||
|                 // just campilation stab | ||||
|                 return 0; | ||||
| #else | ||||
|                 return __syncthreads_or(pred); | ||||
| #endif | ||||
|         } | ||||
|  | ||||
|         template<int CTA_SIZE> | ||||
|         static __forceinline__ __device__ int Ballot(int predicate) | ||||
|         { | ||||
| #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200) | ||||
|             return __ballot(predicate); | ||||
| #else | ||||
|             __shared__ volatile int cta_buffer[CTA_SIZE]; | ||||
|  | ||||
|             int tid = threadIdx.x; | ||||
|             cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0; | ||||
|             return warp_reduce(cta_buffer); | ||||
| #endif | ||||
|         } | ||||
|  | ||||
|         struct smem | ||||
|         { | ||||
|             enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U }; | ||||
|  | ||||
|             template<typename T> | ||||
|             static __device__ __forceinline__ T atomicInc(T* address, T val) | ||||
|             { | ||||
| #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) | ||||
|                 T count; | ||||
|                 unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U); | ||||
|                 do | ||||
|                 { | ||||
|                     count = *address & TAG_MASK; | ||||
|                     count = tag | (count + 1); | ||||
|                     *address = count; | ||||
|                 } while (*address != count); | ||||
|  | ||||
|                 return (count & TAG_MASK) - 1; | ||||
| #else | ||||
|                 return ::atomicInc(address, val); | ||||
| #endif | ||||
|             } | ||||
|  | ||||
|             template<typename T> | ||||
|             static __device__ __forceinline__ T atomicAdd(T* address, T val) | ||||
|             { | ||||
| #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) | ||||
|                 T count; | ||||
|                 unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U); | ||||
|                 do | ||||
|                 { | ||||
|                     count = *address & TAG_MASK; | ||||
|                     count = tag | (count + val); | ||||
|                     *address = count; | ||||
|                 } while (*address != count); | ||||
|  | ||||
|                 return (count & TAG_MASK) - val; | ||||
| #else | ||||
|                 return ::atomicAdd(address, val); | ||||
| #endif | ||||
|             } | ||||
|  | ||||
|             template<typename T> | ||||
|             static __device__ __forceinline__ T atomicMin(T* address, T val) | ||||
|             { | ||||
| #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) | ||||
|                 T count = ::min(*address, val); | ||||
|                 do | ||||
|                 { | ||||
|                     *address = count; | ||||
|                 } while (*address > count); | ||||
|  | ||||
|                 return count; | ||||
| #else | ||||
|                 return ::atomicMin(address, val); | ||||
| #endif | ||||
|             } | ||||
|         }; // struct cmem | ||||
|  | ||||
|         struct glob | ||||
|         { | ||||
|             static __device__ __forceinline__ int atomicAdd(int* address, int val) | ||||
|             { | ||||
|                 return ::atomicAdd(address, val); | ||||
|             } | ||||
|             static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val) | ||||
|             { | ||||
|                 return ::atomicAdd(address, val); | ||||
|             } | ||||
|             static __device__ __forceinline__ float atomicAdd(float* address, float val) | ||||
|             { | ||||
|             #if __CUDA_ARCH__ >= 200 | ||||
|                 return ::atomicAdd(address, val); | ||||
|             #else | ||||
|                 int* address_as_i = (int*) address; | ||||
|                 int old = *address_as_i, assumed; | ||||
|                 do { | ||||
|                     assumed = old; | ||||
|                     old = ::atomicCAS(address_as_i, assumed, | ||||
|                         __float_as_int(val + __int_as_float(assumed))); | ||||
|                 } while (assumed != old); | ||||
|                 return __int_as_float(old); | ||||
|             #endif | ||||
|             } | ||||
|             static __device__ __forceinline__ double atomicAdd(double* address, double val) | ||||
|             { | ||||
|             #if __CUDA_ARCH__ >= 130 | ||||
|                 unsigned long long int* address_as_ull = (unsigned long long int*) address; | ||||
|                 unsigned long long int old = *address_as_ull, assumed; | ||||
|                 do { | ||||
|                     assumed = old; | ||||
|                     old = ::atomicCAS(address_as_ull, assumed, | ||||
|                         __double_as_longlong(val + __longlong_as_double(assumed))); | ||||
|                 } while (assumed != old); | ||||
|                 return __longlong_as_double(old); | ||||
|             #else | ||||
|                 (void) address; | ||||
|                 (void) val; | ||||
|                 return 0.0; | ||||
|             #endif | ||||
|             } | ||||
|  | ||||
|             static __device__ __forceinline__ int atomicMin(int* address, int val) | ||||
|             { | ||||
|                 return ::atomicMin(address, val); | ||||
|             } | ||||
|             static __device__ __forceinline__ float atomicMin(float* address, float val) | ||||
|             { | ||||
|             #if __CUDA_ARCH__ >= 120 | ||||
|                 int* address_as_i = (int*) address; | ||||
|                 int old = *address_as_i, assumed; | ||||
|                 do { | ||||
|                     assumed = old; | ||||
|                     old = ::atomicCAS(address_as_i, assumed, | ||||
|                         __float_as_int(::fminf(val, __int_as_float(assumed)))); | ||||
|                 } while (assumed != old); | ||||
|                 return __int_as_float(old); | ||||
|             #else | ||||
|                 (void) address; | ||||
|                 (void) val; | ||||
|                 return 0.0f; | ||||
|             #endif | ||||
|             } | ||||
|             static __device__ __forceinline__ double atomicMin(double* address, double val) | ||||
|             { | ||||
|             #if __CUDA_ARCH__ >= 130 | ||||
|                 unsigned long long int* address_as_ull = (unsigned long long int*) address; | ||||
|                 unsigned long long int old = *address_as_ull, assumed; | ||||
|                 do { | ||||
|                     assumed = old; | ||||
|                     old = ::atomicCAS(address_as_ull, assumed, | ||||
|                         __double_as_longlong(::fmin(val, __longlong_as_double(assumed)))); | ||||
|                 } while (assumed != old); | ||||
|                 return __longlong_as_double(old); | ||||
|             #else | ||||
|                 (void) address; | ||||
|                 (void) val; | ||||
|                 return 0.0; | ||||
|             #endif | ||||
|             } | ||||
|  | ||||
|             static __device__ __forceinline__ int atomicMax(int* address, int val) | ||||
|             { | ||||
|                 return ::atomicMax(address, val); | ||||
|             } | ||||
|             static __device__ __forceinline__ float atomicMax(float* address, float val) | ||||
|             { | ||||
|             #if __CUDA_ARCH__ >= 120 | ||||
|                 int* address_as_i = (int*) address; | ||||
|                 int old = *address_as_i, assumed; | ||||
|                 do { | ||||
|                     assumed = old; | ||||
|                     old = ::atomicCAS(address_as_i, assumed, | ||||
|                         __float_as_int(::fmaxf(val, __int_as_float(assumed)))); | ||||
|                 } while (assumed != old); | ||||
|                 return __int_as_float(old); | ||||
|             #else | ||||
|                 (void) address; | ||||
|                 (void) val; | ||||
|                 return 0.0f; | ||||
|             #endif | ||||
|             } | ||||
|             static __device__ __forceinline__ double atomicMax(double* address, double val) | ||||
|             { | ||||
|             #if __CUDA_ARCH__ >= 130 | ||||
|                 unsigned long long int* address_as_ull = (unsigned long long int*) address; | ||||
|                 unsigned long long int old = *address_as_ull, assumed; | ||||
|                 do { | ||||
|                     assumed = old; | ||||
|                     old = ::atomicCAS(address_as_ull, assumed, | ||||
|                         __double_as_longlong(::fmax(val, __longlong_as_double(assumed)))); | ||||
|                 } while (assumed != old); | ||||
|                 return __longlong_as_double(old); | ||||
|             #else | ||||
|                 (void) address; | ||||
|                 (void) val; | ||||
|                 return 0.0; | ||||
|             #endif | ||||
|             } | ||||
|         }; | ||||
|     }; //struct Emulation | ||||
| }}} // namespace cv { namespace cuda { namespace cudev | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif /* OPENCV_CUDA_EMULATION_HPP_ */ | ||||
							
								
								
									
										286
									
								
								3rdparty/include/opencv2/core/cuda/filters.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										286
									
								
								3rdparty/include/opencv2/core/cuda/filters.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,286 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_FILTERS_HPP__ | ||||
| #define __OPENCV_CUDA_FILTERS_HPP__ | ||||
|  | ||||
| #include "saturate_cast.hpp" | ||||
| #include "vec_traits.hpp" | ||||
| #include "vec_math.hpp" | ||||
| #include "type_traits.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     template <typename Ptr2D> struct PointFilter | ||||
|     { | ||||
|         typedef typename Ptr2D::elem_type elem_type; | ||||
|         typedef float index_type; | ||||
|  | ||||
|         explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) | ||||
|         : src(src_) | ||||
|         { | ||||
|             (void)fx; | ||||
|             (void)fy; | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ elem_type operator ()(float y, float x) const | ||||
|         { | ||||
|             return src(__float2int_rz(y), __float2int_rz(x)); | ||||
|         } | ||||
|  | ||||
|         Ptr2D src; | ||||
|     }; | ||||
|  | ||||
|     template <typename Ptr2D> struct LinearFilter | ||||
|     { | ||||
|         typedef typename Ptr2D::elem_type elem_type; | ||||
|         typedef float index_type; | ||||
|  | ||||
|         explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) | ||||
|         : src(src_) | ||||
|         { | ||||
|             (void)fx; | ||||
|             (void)fy; | ||||
|         } | ||||
|         __device__ __forceinline__ elem_type operator ()(float y, float x) const | ||||
|         { | ||||
|             typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type; | ||||
|  | ||||
|             work_type out = VecTraits<work_type>::all(0); | ||||
|  | ||||
|             const int x1 = __float2int_rd(x); | ||||
|             const int y1 = __float2int_rd(y); | ||||
|             const int x2 = x1 + 1; | ||||
|             const int y2 = y1 + 1; | ||||
|  | ||||
|             elem_type src_reg = src(y1, x1); | ||||
|             out = out + src_reg * ((x2 - x) * (y2 - y)); | ||||
|  | ||||
|             src_reg = src(y1, x2); | ||||
|             out = out + src_reg * ((x - x1) * (y2 - y)); | ||||
|  | ||||
|             src_reg = src(y2, x1); | ||||
|             out = out + src_reg * ((x2 - x) * (y - y1)); | ||||
|  | ||||
|             src_reg = src(y2, x2); | ||||
|             out = out + src_reg * ((x - x1) * (y - y1)); | ||||
|  | ||||
|             return saturate_cast<elem_type>(out); | ||||
|         } | ||||
|  | ||||
|         Ptr2D src; | ||||
|     }; | ||||
|  | ||||
|     template <typename Ptr2D> struct CubicFilter | ||||
|     { | ||||
|         typedef typename Ptr2D::elem_type elem_type; | ||||
|         typedef float index_type; | ||||
|         typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type; | ||||
|  | ||||
|         explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) | ||||
|         : src(src_) | ||||
|         { | ||||
|             (void)fx; | ||||
|             (void)fy; | ||||
|         } | ||||
|  | ||||
|         static __device__ __forceinline__ float bicubicCoeff(float x_) | ||||
|         { | ||||
|             float x = fabsf(x_); | ||||
|             if (x <= 1.0f) | ||||
|             { | ||||
|                 return x * x * (1.5f * x - 2.5f) + 1.0f; | ||||
|             } | ||||
|             else if (x < 2.0f) | ||||
|             { | ||||
|                 return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f; | ||||
|             } | ||||
|             else | ||||
|             { | ||||
|                 return 0.0f; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         __device__ elem_type operator ()(float y, float x) const | ||||
|         { | ||||
|             const float xmin = ::ceilf(x - 2.0f); | ||||
|             const float xmax = ::floorf(x + 2.0f); | ||||
|  | ||||
|             const float ymin = ::ceilf(y - 2.0f); | ||||
|             const float ymax = ::floorf(y + 2.0f); | ||||
|  | ||||
|             work_type sum = VecTraits<work_type>::all(0); | ||||
|             float wsum = 0.0f; | ||||
|  | ||||
|             for (float cy = ymin; cy <= ymax; cy += 1.0f) | ||||
|             { | ||||
|                 for (float cx = xmin; cx <= xmax; cx += 1.0f) | ||||
|                 { | ||||
|                     const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy); | ||||
|                     sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx)); | ||||
|                     wsum += w; | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             work_type res = (!wsum)? VecTraits<work_type>::all(0) : sum / wsum; | ||||
|  | ||||
|             return saturate_cast<elem_type>(res); | ||||
|         } | ||||
|  | ||||
|         Ptr2D src; | ||||
|     }; | ||||
|     // for integer scaling | ||||
|     template <typename Ptr2D> struct IntegerAreaFilter | ||||
|     { | ||||
|         typedef typename Ptr2D::elem_type elem_type; | ||||
|         typedef float index_type; | ||||
|  | ||||
|         explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_) | ||||
|             : src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {} | ||||
|  | ||||
|         __device__ __forceinline__ elem_type operator ()(float y, float x) const | ||||
|         { | ||||
|             float fsx1 = x * scale_x; | ||||
|             float fsx2 = fsx1 + scale_x; | ||||
|  | ||||
|             int sx1 = __float2int_ru(fsx1); | ||||
|             int sx2 = __float2int_rd(fsx2); | ||||
|  | ||||
|             float fsy1 = y * scale_y; | ||||
|             float fsy2 = fsy1 + scale_y; | ||||
|  | ||||
|             int sy1 = __float2int_ru(fsy1); | ||||
|             int sy2 = __float2int_rd(fsy2); | ||||
|  | ||||
|             typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type; | ||||
|             work_type out = VecTraits<work_type>::all(0.f); | ||||
|  | ||||
|             for(int dy = sy1; dy < sy2; ++dy) | ||||
|                 for(int dx = sx1; dx < sx2; ++dx) | ||||
|                 { | ||||
|                     out = out + src(dy, dx) * scale; | ||||
|                 } | ||||
|  | ||||
|             return saturate_cast<elem_type>(out); | ||||
|         } | ||||
|  | ||||
|         Ptr2D src; | ||||
|         float scale_x, scale_y ,scale; | ||||
|     }; | ||||
|  | ||||
|     template <typename Ptr2D> struct AreaFilter | ||||
|     { | ||||
|         typedef typename Ptr2D::elem_type elem_type; | ||||
|         typedef float index_type; | ||||
|  | ||||
|         explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_) | ||||
|             : src(src_), scale_x(scale_x_), scale_y(scale_y_){} | ||||
|  | ||||
|         __device__ __forceinline__ elem_type operator ()(float y, float x) const | ||||
|         { | ||||
|             float fsx1 = x * scale_x; | ||||
|             float fsx2 = fsx1 + scale_x; | ||||
|  | ||||
|             int sx1 = __float2int_ru(fsx1); | ||||
|             int sx2 = __float2int_rd(fsx2); | ||||
|  | ||||
|             float fsy1 = y * scale_y; | ||||
|             float fsy2 = fsy1 + scale_y; | ||||
|  | ||||
|             int sy1 = __float2int_ru(fsy1); | ||||
|             int sy2 = __float2int_rd(fsy2); | ||||
|  | ||||
|             float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1)); | ||||
|  | ||||
|             typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type; | ||||
|             work_type out = VecTraits<work_type>::all(0.f); | ||||
|  | ||||
|             for (int dy = sy1; dy < sy2; ++dy) | ||||
|             { | ||||
|                 for (int dx = sx1; dx < sx2; ++dx) | ||||
|                     out = out + src(dy, dx) * scale; | ||||
|  | ||||
|                 if (sx1 > fsx1) | ||||
|                     out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale); | ||||
|  | ||||
|                 if (sx2 < fsx2) | ||||
|                     out = out + src(dy, sx2) * ((fsx2 -sx2) * scale); | ||||
|             } | ||||
|  | ||||
|             if (sy1 > fsy1) | ||||
|                 for (int dx = sx1; dx < sx2; ++dx) | ||||
|                     out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale); | ||||
|  | ||||
|             if (sy2 < fsy2) | ||||
|                 for (int dx = sx1; dx < sx2; ++dx) | ||||
|                     out = out + src(sy2, dx) * ((fsy2 -sy2) * scale); | ||||
|  | ||||
|             if ((sy1 > fsy1) &&  (sx1 > fsx1)) | ||||
|                 out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale); | ||||
|  | ||||
|             if ((sy1 > fsy1) &&  (sx2 < fsx2)) | ||||
|                 out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale); | ||||
|  | ||||
|             if ((sy2 < fsy2) &&  (sx2 < fsx2)) | ||||
|                 out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale); | ||||
|  | ||||
|             if ((sy2 < fsy2) &&  (sx1 > fsx1)) | ||||
|                 out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale); | ||||
|  | ||||
|             return saturate_cast<elem_type>(out); | ||||
|         } | ||||
|  | ||||
|         Ptr2D src; | ||||
|         float scale_x, scale_y; | ||||
|         int width, haight; | ||||
|     }; | ||||
| }}} // namespace cv { namespace cuda { namespace cudev | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_FILTERS_HPP__ | ||||
							
								
								
									
										79
									
								
								3rdparty/include/opencv2/core/cuda/funcattrib.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								3rdparty/include/opencv2/core/cuda/funcattrib.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,79 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP_ | ||||
| #define __OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP_ | ||||
|  | ||||
| #include <cstdio> | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     template<class Func> | ||||
|     void printFuncAttrib(Func& func) | ||||
|     { | ||||
|  | ||||
|         cudaFuncAttributes attrs; | ||||
|         cudaFuncGetAttributes(&attrs, func); | ||||
|  | ||||
|         printf("=== Function stats ===\n"); | ||||
|         printf("Name: \n"); | ||||
|         printf("sharedSizeBytes    = %d\n", attrs.sharedSizeBytes); | ||||
|         printf("constSizeBytes     = %d\n", attrs.constSizeBytes); | ||||
|         printf("localSizeBytes     = %d\n", attrs.localSizeBytes); | ||||
|         printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock); | ||||
|         printf("numRegs            = %d\n", attrs.numRegs); | ||||
|         printf("ptxVersion         = %d\n", attrs.ptxVersion); | ||||
|         printf("binaryVersion      = %d\n", attrs.binaryVersion); | ||||
|         printf("\n"); | ||||
|         fflush(stdout); | ||||
|     } | ||||
| }}} // namespace cv { namespace cuda { namespace cudev | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif  /* __OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP_ */ | ||||
							
								
								
									
										797
									
								
								3rdparty/include/opencv2/core/cuda/functional.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										797
									
								
								3rdparty/include/opencv2/core/cuda/functional.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,797 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_FUNCTIONAL_HPP__ | ||||
| #define __OPENCV_CUDA_FUNCTIONAL_HPP__ | ||||
|  | ||||
| #include <functional> | ||||
| #include "saturate_cast.hpp" | ||||
| #include "vec_traits.hpp" | ||||
| #include "type_traits.hpp" | ||||
| #include "device_functions.h" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     // Function Objects | ||||
|     template<typename Argument, typename Result> struct unary_function : public std::unary_function<Argument, Result> {}; | ||||
|     template<typename Argument1, typename Argument2, typename Result> struct binary_function : public std::binary_function<Argument1, Argument2, Result> {}; | ||||
|  | ||||
|     // Arithmetic Operations | ||||
|     template <typename T> struct plus : binary_function<T, T, T> | ||||
|     { | ||||
|         __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, | ||||
|                                                  typename TypeTraits<T>::ParameterType b) const | ||||
|         { | ||||
|             return a + b; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ plus() {} | ||||
|         __host__ __device__ __forceinline__ plus(const plus&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct minus : binary_function<T, T, T> | ||||
|     { | ||||
|         __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, | ||||
|                                                  typename TypeTraits<T>::ParameterType b) const | ||||
|         { | ||||
|             return a - b; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ minus() {} | ||||
|         __host__ __device__ __forceinline__ minus(const minus&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct multiplies : binary_function<T, T, T> | ||||
|     { | ||||
|         __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, | ||||
|                                                  typename TypeTraits<T>::ParameterType b) const | ||||
|         { | ||||
|             return a * b; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ multiplies() {} | ||||
|         __host__ __device__ __forceinline__ multiplies(const multiplies&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct divides : binary_function<T, T, T> | ||||
|     { | ||||
|         __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, | ||||
|                                                  typename TypeTraits<T>::ParameterType b) const | ||||
|         { | ||||
|             return a / b; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ divides() {} | ||||
|         __host__ __device__ __forceinline__ divides(const divides&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct modulus : binary_function<T, T, T> | ||||
|     { | ||||
|         __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, | ||||
|                                                  typename TypeTraits<T>::ParameterType b) const | ||||
|         { | ||||
|             return a % b; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ modulus() {} | ||||
|         __host__ __device__ __forceinline__ modulus(const modulus&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct negate : unary_function<T, T> | ||||
|     { | ||||
|         __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a) const | ||||
|         { | ||||
|             return -a; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ negate() {} | ||||
|         __host__ __device__ __forceinline__ negate(const negate&) {} | ||||
|     }; | ||||
|  | ||||
|     // Comparison Operations | ||||
|     template <typename T> struct equal_to : binary_function<T, T, bool> | ||||
|     { | ||||
|         __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, | ||||
|                                                     typename TypeTraits<T>::ParameterType b) const | ||||
|         { | ||||
|             return a == b; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ equal_to() {} | ||||
|         __host__ __device__ __forceinline__ equal_to(const equal_to&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct not_equal_to : binary_function<T, T, bool> | ||||
|     { | ||||
|         __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, | ||||
|                                                     typename TypeTraits<T>::ParameterType b) const | ||||
|         { | ||||
|             return a != b; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ not_equal_to() {} | ||||
|         __host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct greater : binary_function<T, T, bool> | ||||
|     { | ||||
|         __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, | ||||
|                                                     typename TypeTraits<T>::ParameterType b) const | ||||
|         { | ||||
|             return a > b; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ greater() {} | ||||
|         __host__ __device__ __forceinline__ greater(const greater&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct less : binary_function<T, T, bool> | ||||
|     { | ||||
|         __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, | ||||
|                                                     typename TypeTraits<T>::ParameterType b) const | ||||
|         { | ||||
|             return a < b; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ less() {} | ||||
|         __host__ __device__ __forceinline__ less(const less&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct greater_equal : binary_function<T, T, bool> | ||||
|     { | ||||
|         __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, | ||||
|                                                     typename TypeTraits<T>::ParameterType b) const | ||||
|         { | ||||
|             return a >= b; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ greater_equal() {} | ||||
|         __host__ __device__ __forceinline__ greater_equal(const greater_equal&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct less_equal : binary_function<T, T, bool> | ||||
|     { | ||||
|         __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, | ||||
|                                                     typename TypeTraits<T>::ParameterType b) const | ||||
|         { | ||||
|             return a <= b; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ less_equal() {} | ||||
|         __host__ __device__ __forceinline__ less_equal(const less_equal&) {} | ||||
|     }; | ||||
|  | ||||
|     // Logical Operations | ||||
|     template <typename T> struct logical_and : binary_function<T, T, bool> | ||||
|     { | ||||
|         __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, | ||||
|                                                     typename TypeTraits<T>::ParameterType b) const | ||||
|         { | ||||
|             return a && b; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ logical_and() {} | ||||
|         __host__ __device__ __forceinline__ logical_and(const logical_and&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct logical_or : binary_function<T, T, bool> | ||||
|     { | ||||
|         __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a, | ||||
|                                                     typename TypeTraits<T>::ParameterType b) const | ||||
|         { | ||||
|             return a || b; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ logical_or() {} | ||||
|         __host__ __device__ __forceinline__ logical_or(const logical_or&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct logical_not : unary_function<T, bool> | ||||
|     { | ||||
|         __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a) const | ||||
|         { | ||||
|             return !a; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ logical_not() {} | ||||
|         __host__ __device__ __forceinline__ logical_not(const logical_not&) {} | ||||
|     }; | ||||
|  | ||||
|     // Bitwise Operations | ||||
|     template <typename T> struct bit_and : binary_function<T, T, T> | ||||
|     { | ||||
|         __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, | ||||
|                                                  typename TypeTraits<T>::ParameterType b) const | ||||
|         { | ||||
|             return a & b; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ bit_and() {} | ||||
|         __host__ __device__ __forceinline__ bit_and(const bit_and&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct bit_or : binary_function<T, T, T> | ||||
|     { | ||||
|         __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, | ||||
|                                                  typename TypeTraits<T>::ParameterType b) const | ||||
|         { | ||||
|             return a | b; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ bit_or() {} | ||||
|         __host__ __device__ __forceinline__ bit_or(const bit_or&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct bit_xor : binary_function<T, T, T> | ||||
|     { | ||||
|         __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a, | ||||
|                                                  typename TypeTraits<T>::ParameterType b) const | ||||
|         { | ||||
|             return a ^ b; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ bit_xor() {} | ||||
|         __host__ __device__ __forceinline__ bit_xor(const bit_xor&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct bit_not : unary_function<T, T> | ||||
|     { | ||||
|         __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType v) const | ||||
|         { | ||||
|             return ~v; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ bit_not() {} | ||||
|         __host__ __device__ __forceinline__ bit_not(const bit_not&) {} | ||||
|     }; | ||||
|  | ||||
|     // Generalized Identity Operations | ||||
|     template <typename T> struct identity : unary_function<T, T> | ||||
|     { | ||||
|         __device__ __forceinline__ typename TypeTraits<T>::ParameterType operator()(typename TypeTraits<T>::ParameterType x) const | ||||
|         { | ||||
|             return x; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ identity() {} | ||||
|         __host__ __device__ __forceinline__ identity(const identity&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1> | ||||
|     { | ||||
|         __device__ __forceinline__ typename TypeTraits<T1>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const | ||||
|         { | ||||
|             return lhs; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ project1st() {} | ||||
|         __host__ __device__ __forceinline__ project1st(const project1st&) {} | ||||
|     }; | ||||
|  | ||||
|     template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2> | ||||
|     { | ||||
|         __device__ __forceinline__ typename TypeTraits<T2>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const | ||||
|         { | ||||
|             return rhs; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ project2nd() {} | ||||
|         __host__ __device__ __forceinline__ project2nd(const project2nd&) {} | ||||
|     }; | ||||
|  | ||||
|     // Min/Max Operations | ||||
|  | ||||
| #define OPENCV_CUDA_IMPLEMENT_MINMAX(name, type, op) \ | ||||
|     template <> struct name<type> : binary_function<type, type, type> \ | ||||
|     { \ | ||||
|         __device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \ | ||||
|         __host__ __device__ __forceinline__ name() {}\ | ||||
|         __host__ __device__ __forceinline__ name(const name&) {}\ | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct maximum : binary_function<T, T, T> | ||||
|     { | ||||
|         __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const | ||||
|         { | ||||
|             return max(lhs, rhs); | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ maximum() {} | ||||
|         __host__ __device__ __forceinline__ maximum(const maximum&) {} | ||||
|     }; | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uchar, ::max) | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, schar, ::max) | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, char, ::max) | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, ushort, ::max) | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, short, ::max) | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, int, ::max) | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uint, ::max) | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, float, ::fmax) | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, double, ::fmax) | ||||
|  | ||||
|     template <typename T> struct minimum : binary_function<T, T, T> | ||||
|     { | ||||
|         __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const | ||||
|         { | ||||
|             return min(lhs, rhs); | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ minimum() {} | ||||
|         __host__ __device__ __forceinline__ minimum(const minimum&) {} | ||||
|     }; | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uchar, ::min) | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, schar, ::min) | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, char, ::min) | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, ushort, ::min) | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, short, ::min) | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, int, ::min) | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uint, ::min) | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, float, ::fmin) | ||||
|     OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, double, ::fmin) | ||||
|  | ||||
| #undef OPENCV_CUDA_IMPLEMENT_MINMAX | ||||
|  | ||||
|     // Math functions | ||||
|  | ||||
|     template <typename T> struct abs_func : unary_function<T, T> | ||||
|     { | ||||
|         __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType x) const | ||||
|         { | ||||
|             return abs(x); | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ abs_func() {} | ||||
|         __host__ __device__ __forceinline__ abs_func(const abs_func&) {} | ||||
|     }; | ||||
|     template <> struct abs_func<unsigned char> : unary_function<unsigned char, unsigned char> | ||||
|     { | ||||
|         __device__ __forceinline__ unsigned char operator ()(unsigned char x) const | ||||
|         { | ||||
|             return x; | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ abs_func() {} | ||||
|         __host__ __device__ __forceinline__ abs_func(const abs_func&) {} | ||||
|     }; | ||||
|     template <> struct abs_func<signed char> : unary_function<signed char, signed char> | ||||
|     { | ||||
|         __device__ __forceinline__ signed char operator ()(signed char x) const | ||||
|         { | ||||
|             return ::abs((int)x); | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ abs_func() {} | ||||
|         __host__ __device__ __forceinline__ abs_func(const abs_func&) {} | ||||
|     }; | ||||
|     template <> struct abs_func<char> : unary_function<char, char> | ||||
|     { | ||||
|         __device__ __forceinline__ char operator ()(char x) const | ||||
|         { | ||||
|             return ::abs((int)x); | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ abs_func() {} | ||||
|         __host__ __device__ __forceinline__ abs_func(const abs_func&) {} | ||||
|     }; | ||||
|     template <> struct abs_func<unsigned short> : unary_function<unsigned short, unsigned short> | ||||
|     { | ||||
|         __device__ __forceinline__ unsigned short operator ()(unsigned short x) const | ||||
|         { | ||||
|             return x; | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ abs_func() {} | ||||
|         __host__ __device__ __forceinline__ abs_func(const abs_func&) {} | ||||
|     }; | ||||
|     template <> struct abs_func<short> : unary_function<short, short> | ||||
|     { | ||||
|         __device__ __forceinline__ short operator ()(short x) const | ||||
|         { | ||||
|             return ::abs((int)x); | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ abs_func() {} | ||||
|         __host__ __device__ __forceinline__ abs_func(const abs_func&) {} | ||||
|     }; | ||||
|     template <> struct abs_func<unsigned int> : unary_function<unsigned int, unsigned int> | ||||
|     { | ||||
|         __device__ __forceinline__ unsigned int operator ()(unsigned int x) const | ||||
|         { | ||||
|             return x; | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ abs_func() {} | ||||
|         __host__ __device__ __forceinline__ abs_func(const abs_func&) {} | ||||
|     }; | ||||
|     template <> struct abs_func<int> : unary_function<int, int> | ||||
|     { | ||||
|         __device__ __forceinline__ int operator ()(int x) const | ||||
|         { | ||||
|             return ::abs(x); | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ abs_func() {} | ||||
|         __host__ __device__ __forceinline__ abs_func(const abs_func&) {} | ||||
|     }; | ||||
|     template <> struct abs_func<float> : unary_function<float, float> | ||||
|     { | ||||
|         __device__ __forceinline__ float operator ()(float x) const | ||||
|         { | ||||
|             return ::fabsf(x); | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ abs_func() {} | ||||
|         __host__ __device__ __forceinline__ abs_func(const abs_func&) {} | ||||
|     }; | ||||
|     template <> struct abs_func<double> : unary_function<double, double> | ||||
|     { | ||||
|         __device__ __forceinline__ double operator ()(double x) const | ||||
|         { | ||||
|             return ::fabs(x); | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ abs_func() {} | ||||
|         __host__ __device__ __forceinline__ abs_func(const abs_func&) {} | ||||
|     }; | ||||
|  | ||||
| #define OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(name, func) \ | ||||
|     template <typename T> struct name ## _func : unary_function<T, float> \ | ||||
|     { \ | ||||
|         __device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v) const \ | ||||
|         { \ | ||||
|             return func ## f(v); \ | ||||
|         } \ | ||||
|         __host__ __device__ __forceinline__ name ## _func() {} \ | ||||
|         __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ | ||||
|     }; \ | ||||
|     template <> struct name ## _func<double> : unary_function<double, double> \ | ||||
|     { \ | ||||
|         __device__ __forceinline__ double operator ()(double v) const \ | ||||
|         { \ | ||||
|             return func(v); \ | ||||
|         } \ | ||||
|         __host__ __device__ __forceinline__ name ## _func() {} \ | ||||
|         __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ | ||||
|     }; | ||||
|  | ||||
| #define OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(name, func) \ | ||||
|     template <typename T> struct name ## _func : binary_function<T, T, float> \ | ||||
|     { \ | ||||
|         __device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v1, typename TypeTraits<T>::ParameterType v2) const \ | ||||
|         { \ | ||||
|             return func ## f(v1, v2); \ | ||||
|         } \ | ||||
|         __host__ __device__ __forceinline__ name ## _func() {} \ | ||||
|         __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ | ||||
|     }; \ | ||||
|     template <> struct name ## _func<double> : binary_function<double, double, double> \ | ||||
|     { \ | ||||
|         __device__ __forceinline__ double operator ()(double v1, double v2) const \ | ||||
|         { \ | ||||
|             return func(v1, v2); \ | ||||
|         } \ | ||||
|         __host__ __device__ __forceinline__ name ## _func() {} \ | ||||
|         __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ | ||||
|     }; | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp, ::exp) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp2, ::exp2) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp10, ::exp10) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log, ::log) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log2, ::log2) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log10, ::log10) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sin, ::sin) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cos, ::cos) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tan, ::tan) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asin, ::asin) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acos, ::acos) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atan, ::atan) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sinh, ::sinh) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cosh, ::cosh) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tanh, ::tanh) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asinh, ::asinh) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acosh, ::acosh) | ||||
|     OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atanh, ::atanh) | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(hypot, ::hypot) | ||||
|     OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(atan2, ::atan2) | ||||
|     OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(pow, ::pow) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR_NO_DOUBLE | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR | ||||
|  | ||||
|     template<typename T> struct hypot_sqr_func : binary_function<T, T, float> | ||||
|     { | ||||
|         __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType src1, typename TypeTraits<T>::ParameterType src2) const | ||||
|         { | ||||
|             return src1 * src1 + src2 * src2; | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ hypot_sqr_func() {} | ||||
|         __host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {} | ||||
|     }; | ||||
|  | ||||
|     // Saturate Cast Functor | ||||
|     template <typename T, typename D> struct saturate_cast_func : unary_function<T, D> | ||||
|     { | ||||
|         __device__ __forceinline__ D operator ()(typename TypeTraits<T>::ParameterType v) const | ||||
|         { | ||||
|             return saturate_cast<D>(v); | ||||
|         } | ||||
|         __host__ __device__ __forceinline__ saturate_cast_func() {} | ||||
|         __host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {} | ||||
|     }; | ||||
|  | ||||
|     // Threshold Functors | ||||
|     template <typename T> struct thresh_binary_func : unary_function<T, T> | ||||
|     { | ||||
|         __host__ __device__ __forceinline__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {} | ||||
|  | ||||
|         __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const | ||||
|         { | ||||
|             return (src > thresh) * maxVal; | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ thresh_binary_func() {} | ||||
|         __host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other) | ||||
|             : thresh(other.thresh), maxVal(other.maxVal) {} | ||||
|  | ||||
|         T thresh; | ||||
|         T maxVal; | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct thresh_binary_inv_func : unary_function<T, T> | ||||
|     { | ||||
|         __host__ __device__ __forceinline__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {} | ||||
|  | ||||
|         __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const | ||||
|         { | ||||
|             return (src <= thresh) * maxVal; | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ thresh_binary_inv_func() {} | ||||
|         __host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other) | ||||
|             : thresh(other.thresh), maxVal(other.maxVal) {} | ||||
|  | ||||
|         T thresh; | ||||
|         T maxVal; | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct thresh_trunc_func : unary_function<T, T> | ||||
|     { | ||||
|         explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;} | ||||
|  | ||||
|         __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const | ||||
|         { | ||||
|             return minimum<T>()(src, thresh); | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ thresh_trunc_func() {} | ||||
|         __host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other) | ||||
|             : thresh(other.thresh) {} | ||||
|  | ||||
|         T thresh; | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct thresh_to_zero_func : unary_function<T, T> | ||||
|     { | ||||
|         explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;} | ||||
|  | ||||
|         __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const | ||||
|         { | ||||
|             return (src > thresh) * src; | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ thresh_to_zero_func() {} | ||||
|        __host__  __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other) | ||||
|             : thresh(other.thresh) {} | ||||
|  | ||||
|         T thresh; | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T> | ||||
|     { | ||||
|         explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;} | ||||
|  | ||||
|         __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const | ||||
|         { | ||||
|             return (src <= thresh) * src; | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ thresh_to_zero_inv_func() {} | ||||
|         __host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other) | ||||
|             : thresh(other.thresh) {} | ||||
|  | ||||
|         T thresh; | ||||
|     }; | ||||
|  | ||||
|     // Function Object Adaptors | ||||
|     template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool> | ||||
|     { | ||||
|       explicit __host__ __device__ __forceinline__ unary_negate(const Predicate& p) : pred(p) {} | ||||
|  | ||||
|       __device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::argument_type>::ParameterType x) const | ||||
|       { | ||||
|           return !pred(x); | ||||
|       } | ||||
|  | ||||
|       __host__ __device__ __forceinline__ unary_negate() {} | ||||
|       __host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {} | ||||
|  | ||||
|       Predicate pred; | ||||
|     }; | ||||
|  | ||||
|     template <typename Predicate> __host__ __device__ __forceinline__ unary_negate<Predicate> not1(const Predicate& pred) | ||||
|     { | ||||
|         return unary_negate<Predicate>(pred); | ||||
|     } | ||||
|  | ||||
|     template <typename Predicate> struct binary_negate : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool> | ||||
|     { | ||||
|         explicit __host__ __device__ __forceinline__ binary_negate(const Predicate& p) : pred(p) {} | ||||
|  | ||||
|         __device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::first_argument_type>::ParameterType x, | ||||
|                                                    typename TypeTraits<typename Predicate::second_argument_type>::ParameterType y) const | ||||
|         { | ||||
|             return !pred(x,y); | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ binary_negate() {} | ||||
|         __host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {} | ||||
|  | ||||
|         Predicate pred; | ||||
|     }; | ||||
|  | ||||
|     template <typename BinaryPredicate> __host__ __device__ __forceinline__ binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred) | ||||
|     { | ||||
|         return binary_negate<BinaryPredicate>(pred); | ||||
|     } | ||||
|  | ||||
|     template <typename Op> struct binder1st : unary_function<typename Op::second_argument_type, typename Op::result_type> | ||||
|     { | ||||
|         __host__ __device__ __forceinline__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {} | ||||
|  | ||||
|         __device__ __forceinline__ typename Op::result_type operator ()(typename TypeTraits<typename Op::second_argument_type>::ParameterType a) const | ||||
|         { | ||||
|             return op(arg1, a); | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ binder1st() {} | ||||
|         __host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {} | ||||
|  | ||||
|         Op op; | ||||
|         typename Op::first_argument_type arg1; | ||||
|     }; | ||||
|  | ||||
|     template <typename Op, typename T> __host__ __device__ __forceinline__ binder1st<Op> bind1st(const Op& op, const T& x) | ||||
|     { | ||||
|         return binder1st<Op>(op, typename Op::first_argument_type(x)); | ||||
|     } | ||||
|  | ||||
|     template <typename Op> struct binder2nd : unary_function<typename Op::first_argument_type, typename Op::result_type> | ||||
|     { | ||||
|         __host__ __device__ __forceinline__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {} | ||||
|  | ||||
|         __forceinline__ __device__ typename Op::result_type operator ()(typename TypeTraits<typename Op::first_argument_type>::ParameterType a) const | ||||
|         { | ||||
|             return op(a, arg2); | ||||
|         } | ||||
|  | ||||
|         __host__ __device__ __forceinline__ binder2nd() {} | ||||
|         __host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {} | ||||
|  | ||||
|         Op op; | ||||
|         typename Op::second_argument_type arg2; | ||||
|     }; | ||||
|  | ||||
|     template <typename Op, typename T> __host__ __device__ __forceinline__ binder2nd<Op> bind2nd(const Op& op, const T& x) | ||||
|     { | ||||
|         return binder2nd<Op>(op, typename Op::second_argument_type(x)); | ||||
|     } | ||||
|  | ||||
|     // Functor Traits | ||||
|     template <typename F> struct IsUnaryFunction | ||||
|     { | ||||
|         typedef char Yes; | ||||
|         struct No {Yes a[2];}; | ||||
|  | ||||
|         template <typename T, typename D> static Yes check(unary_function<T, D>); | ||||
|         static No check(...); | ||||
|  | ||||
|         static F makeF(); | ||||
|  | ||||
|         enum { value = (sizeof(check(makeF())) == sizeof(Yes)) }; | ||||
|     }; | ||||
|  | ||||
|     template <typename F> struct IsBinaryFunction | ||||
|     { | ||||
|         typedef char Yes; | ||||
|         struct No {Yes a[2];}; | ||||
|  | ||||
|         template <typename T1, typename T2, typename D> static Yes check(binary_function<T1, T2, D>); | ||||
|         static No check(...); | ||||
|  | ||||
|         static F makeF(); | ||||
|  | ||||
|         enum { value = (sizeof(check(makeF())) == sizeof(Yes)) }; | ||||
|     }; | ||||
|  | ||||
|     namespace functional_detail | ||||
|     { | ||||
|         template <size_t src_elem_size, size_t dst_elem_size> struct UnOpShift { enum { shift = 1 }; }; | ||||
|         template <size_t src_elem_size> struct UnOpShift<src_elem_size, 1> { enum { shift = 4 }; }; | ||||
|         template <size_t src_elem_size> struct UnOpShift<src_elem_size, 2> { enum { shift = 2 }; }; | ||||
|  | ||||
|         template <typename T, typename D> struct DefaultUnaryShift | ||||
|         { | ||||
|             enum { shift = UnOpShift<sizeof(T), sizeof(D)>::shift }; | ||||
|         }; | ||||
|  | ||||
|         template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size> struct BinOpShift { enum { shift = 1 }; }; | ||||
|         template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 1> { enum { shift = 4 }; }; | ||||
|         template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 2> { enum { shift = 2 }; }; | ||||
|  | ||||
|         template <typename T1, typename T2, typename D> struct DefaultBinaryShift | ||||
|         { | ||||
|             enum { shift = BinOpShift<sizeof(T1), sizeof(T2), sizeof(D)>::shift }; | ||||
|         }; | ||||
|  | ||||
|         template <typename Func, bool unary = IsUnaryFunction<Func>::value> struct ShiftDispatcher; | ||||
|         template <typename Func> struct ShiftDispatcher<Func, true> | ||||
|         { | ||||
|             enum { shift = DefaultUnaryShift<typename Func::argument_type, typename Func::result_type>::shift }; | ||||
|         }; | ||||
|         template <typename Func> struct ShiftDispatcher<Func, false> | ||||
|         { | ||||
|             enum { shift = DefaultBinaryShift<typename Func::first_argument_type, typename Func::second_argument_type, typename Func::result_type>::shift }; | ||||
|         }; | ||||
|     } | ||||
|  | ||||
|     template <typename Func> struct DefaultTransformShift | ||||
|     { | ||||
|         enum { shift = functional_detail::ShiftDispatcher<Func>::shift }; | ||||
|     }; | ||||
|  | ||||
|     template <typename Func> struct DefaultTransformFunctorTraits | ||||
|     { | ||||
|         enum { simple_block_dim_x = 16 }; | ||||
|         enum { simple_block_dim_y = 16 }; | ||||
|  | ||||
|         enum { smart_block_dim_x = 16 }; | ||||
|         enum { smart_block_dim_y = 16 }; | ||||
|         enum { smart_shift = DefaultTransformShift<Func>::shift }; | ||||
|     }; | ||||
|  | ||||
|     template <typename Func> struct TransformFunctorTraits : DefaultTransformFunctorTraits<Func> {}; | ||||
|  | ||||
| #define OPENCV_CUDA_TRANSFORM_FUNCTOR_TRAITS(type) \ | ||||
|     template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type > | ||||
| }}} // namespace cv { namespace cuda { namespace cudev | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_FUNCTIONAL_HPP__ | ||||
							
								
								
									
										128
									
								
								3rdparty/include/opencv2/core/cuda/limits.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										128
									
								
								3rdparty/include/opencv2/core/cuda/limits.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,128 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_LIMITS_HPP__ | ||||
| #define __OPENCV_CUDA_LIMITS_HPP__ | ||||
|  | ||||
| #include <limits.h> | ||||
| #include <float.h> | ||||
| #include "common.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
| template <class T> struct numeric_limits; | ||||
|  | ||||
| template <> struct numeric_limits<bool> | ||||
| { | ||||
|     __device__ __forceinline__ static bool min() { return false; } | ||||
|     __device__ __forceinline__ static bool max() { return true;  } | ||||
|     static const bool is_signed = false; | ||||
| }; | ||||
|  | ||||
| template <> struct numeric_limits<signed char> | ||||
| { | ||||
|     __device__ __forceinline__ static signed char min() { return SCHAR_MIN; } | ||||
|     __device__ __forceinline__ static signed char max() { return SCHAR_MAX; } | ||||
|     static const bool is_signed = true; | ||||
| }; | ||||
|  | ||||
| template <> struct numeric_limits<unsigned char> | ||||
| { | ||||
|     __device__ __forceinline__ static unsigned char min() { return 0; } | ||||
|     __device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; } | ||||
|     static const bool is_signed = false; | ||||
| }; | ||||
|  | ||||
| template <> struct numeric_limits<short> | ||||
| { | ||||
|     __device__ __forceinline__ static short min() { return SHRT_MIN; } | ||||
|     __device__ __forceinline__ static short max() { return SHRT_MAX; } | ||||
|     static const bool is_signed = true; | ||||
| }; | ||||
|  | ||||
| template <> struct numeric_limits<unsigned short> | ||||
| { | ||||
|     __device__ __forceinline__ static unsigned short min() { return 0; } | ||||
|     __device__ __forceinline__ static unsigned short max() { return USHRT_MAX; } | ||||
|     static const bool is_signed = false; | ||||
| }; | ||||
|  | ||||
| template <> struct numeric_limits<int> | ||||
| { | ||||
|     __device__ __forceinline__ static int min() { return INT_MIN; } | ||||
|     __device__ __forceinline__ static int max() { return INT_MAX; } | ||||
|     static const bool is_signed = true; | ||||
| }; | ||||
|  | ||||
| template <> struct numeric_limits<unsigned int> | ||||
| { | ||||
|     __device__ __forceinline__ static unsigned int min() { return 0; } | ||||
|     __device__ __forceinline__ static unsigned int max() { return UINT_MAX; } | ||||
|     static const bool is_signed = false; | ||||
| }; | ||||
|  | ||||
| template <> struct numeric_limits<float> | ||||
| { | ||||
|     __device__ __forceinline__ static float min() { return FLT_MIN; } | ||||
|     __device__ __forceinline__ static float max() { return FLT_MAX; } | ||||
|     __device__ __forceinline__ static float epsilon() { return FLT_EPSILON; } | ||||
|     static const bool is_signed = true; | ||||
| }; | ||||
|  | ||||
| template <> struct numeric_limits<double> | ||||
| { | ||||
|     __device__ __forceinline__ static double min() { return DBL_MIN; } | ||||
|     __device__ __forceinline__ static double max() { return DBL_MAX; } | ||||
|     __device__ __forceinline__ static double epsilon() { return DBL_EPSILON; } | ||||
|     static const bool is_signed = true; | ||||
| }; | ||||
| }}} // namespace cv { namespace cuda { namespace cudev { | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_LIMITS_HPP__ | ||||
							
								
								
									
										205
									
								
								3rdparty/include/opencv2/core/cuda/reduce.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										205
									
								
								3rdparty/include/opencv2/core/cuda/reduce.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,205 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_REDUCE_HPP__ | ||||
| #define __OPENCV_CUDA_REDUCE_HPP__ | ||||
|  | ||||
| #include <thrust/tuple.h> | ||||
| #include "detail/reduce.hpp" | ||||
| #include "detail/reduce_key_val.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     template <int N, typename T, class Op> | ||||
|     __device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op) | ||||
|     { | ||||
|         reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op); | ||||
|     } | ||||
|     template <int N, | ||||
|               typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9, | ||||
|               typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9, | ||||
|               class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9> | ||||
|     __device__ __forceinline__ void reduce(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, | ||||
|                                            const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val, | ||||
|                                            unsigned int tid, | ||||
|                                            const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op) | ||||
|     { | ||||
|         reduce_detail::Dispatcher<N>::reductor::template reduce< | ||||
|                 const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>&, | ||||
|                 const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>&, | ||||
|                 const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op); | ||||
|     } | ||||
|  | ||||
|     template <unsigned int N, typename K, typename V, class Cmp> | ||||
|     __device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, unsigned int tid, const Cmp& cmp) | ||||
|     { | ||||
|         reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&, volatile V*, V&, const Cmp&>(skeys, key, svals, val, tid, cmp); | ||||
|     } | ||||
|     template <unsigned int N, | ||||
|               typename K, | ||||
|               typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9, | ||||
|               typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9, | ||||
|               class Cmp> | ||||
|     __device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, | ||||
|                                                  const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals, | ||||
|                                                  const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val, | ||||
|                                                  unsigned int tid, const Cmp& cmp) | ||||
|     { | ||||
|         reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&, | ||||
|                 const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&, | ||||
|                 const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&, | ||||
|                 const Cmp&>(skeys, key, svals, val, tid, cmp); | ||||
|     } | ||||
|     template <unsigned int N, | ||||
|               typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9, | ||||
|               typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9, | ||||
|               typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9, | ||||
|               typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9, | ||||
|               class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9> | ||||
|     __device__ __forceinline__ void reduceKeyVal(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys, | ||||
|                                                  const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key, | ||||
|                                                  const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals, | ||||
|                                                  const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val, | ||||
|                                                  unsigned int tid, | ||||
|                                                  const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp) | ||||
|     { | ||||
|         reduce_key_val_detail::Dispatcher<N>::reductor::template reduce< | ||||
|                 const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>&, | ||||
|                 const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>&, | ||||
|                 const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&, | ||||
|                 const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&, | ||||
|                 const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& | ||||
|                 >(skeys, key, svals, val, tid, cmp); | ||||
|     } | ||||
|  | ||||
|     // smem_tuple | ||||
|  | ||||
|     template <typename T0> | ||||
|     __device__ __forceinline__ | ||||
|     thrust::tuple<volatile T0*> | ||||
|     smem_tuple(T0* t0) | ||||
|     { | ||||
|         return thrust::make_tuple((volatile T0*) t0); | ||||
|     } | ||||
|  | ||||
|     template <typename T0, typename T1> | ||||
|     __device__ __forceinline__ | ||||
|     thrust::tuple<volatile T0*, volatile T1*> | ||||
|     smem_tuple(T0* t0, T1* t1) | ||||
|     { | ||||
|         return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1); | ||||
|     } | ||||
|  | ||||
|     template <typename T0, typename T1, typename T2> | ||||
|     __device__ __forceinline__ | ||||
|     thrust::tuple<volatile T0*, volatile T1*, volatile T2*> | ||||
|     smem_tuple(T0* t0, T1* t1, T2* t2) | ||||
|     { | ||||
|         return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2); | ||||
|     } | ||||
|  | ||||
|     template <typename T0, typename T1, typename T2, typename T3> | ||||
|     __device__ __forceinline__ | ||||
|     thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*> | ||||
|     smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3) | ||||
|     { | ||||
|         return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3); | ||||
|     } | ||||
|  | ||||
|     template <typename T0, typename T1, typename T2, typename T3, typename T4> | ||||
|     __device__ __forceinline__ | ||||
|     thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*> | ||||
|     smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4) | ||||
|     { | ||||
|         return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4); | ||||
|     } | ||||
|  | ||||
|     template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5> | ||||
|     __device__ __forceinline__ | ||||
|     thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*> | ||||
|     smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5) | ||||
|     { | ||||
|         return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5); | ||||
|     } | ||||
|  | ||||
|     template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> | ||||
|     __device__ __forceinline__ | ||||
|     thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*> | ||||
|     smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6) | ||||
|     { | ||||
|         return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6); | ||||
|     } | ||||
|  | ||||
|     template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7> | ||||
|     __device__ __forceinline__ | ||||
|     thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*> | ||||
|     smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7) | ||||
|     { | ||||
|         return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7); | ||||
|     } | ||||
|  | ||||
|     template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8> | ||||
|     __device__ __forceinline__ | ||||
|     thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*> | ||||
|     smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8) | ||||
|     { | ||||
|         return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8); | ||||
|     } | ||||
|  | ||||
|     template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9> | ||||
|     __device__ __forceinline__ | ||||
|     thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*, volatile T9*> | ||||
|     smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8, T9* t9) | ||||
|     { | ||||
|         return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9); | ||||
|     } | ||||
| }}} | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_UTILITY_HPP__ | ||||
							
								
								
									
										292
									
								
								3rdparty/include/opencv2/core/cuda/saturate_cast.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										292
									
								
								3rdparty/include/opencv2/core/cuda/saturate_cast.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,292 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_SATURATE_CAST_HPP__ | ||||
| #define __OPENCV_CUDA_SATURATE_CAST_HPP__ | ||||
|  | ||||
| #include "common.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); } | ||||
|     template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); } | ||||
|     template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); } | ||||
|     template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); } | ||||
|     template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); } | ||||
|     template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); } | ||||
|     template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); } | ||||
|     template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); } | ||||
|  | ||||
|     template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v) | ||||
|     { | ||||
|         uint res = 0; | ||||
|         int vi = v; | ||||
|         asm("cvt.sat.u8.s8 %0, %1;" : "=r"(res) : "r"(vi)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v) | ||||
|     { | ||||
|         uint res = 0; | ||||
|         asm("cvt.sat.u8.s16 %0, %1;" : "=r"(res) : "h"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v) | ||||
|     { | ||||
|         uint res = 0; | ||||
|         asm("cvt.sat.u8.u16 %0, %1;" : "=r"(res) : "h"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v) | ||||
|     { | ||||
|         uint res = 0; | ||||
|         asm("cvt.sat.u8.s32 %0, %1;" : "=r"(res) : "r"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v) | ||||
|     { | ||||
|         uint res = 0; | ||||
|         asm("cvt.sat.u8.u32 %0, %1;" : "=r"(res) : "r"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v) | ||||
|     { | ||||
|         uint res = 0; | ||||
|         asm("cvt.rni.sat.u8.f32 %0, %1;" : "=r"(res) : "f"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v) | ||||
|     { | ||||
|     #if __CUDA_ARCH__ >= 130 | ||||
|         uint res = 0; | ||||
|         asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v)); | ||||
|         return res; | ||||
|     #else | ||||
|         return saturate_cast<uchar>((float)v); | ||||
|     #endif | ||||
|     } | ||||
|  | ||||
|     template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v) | ||||
|     { | ||||
|         uint res = 0; | ||||
|         uint vi = v; | ||||
|         asm("cvt.sat.s8.u8 %0, %1;" : "=r"(res) : "r"(vi)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ schar saturate_cast<schar>(short v) | ||||
|     { | ||||
|         uint res = 0; | ||||
|         asm("cvt.sat.s8.s16 %0, %1;" : "=r"(res) : "h"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v) | ||||
|     { | ||||
|         uint res = 0; | ||||
|         asm("cvt.sat.s8.u16 %0, %1;" : "=r"(res) : "h"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ schar saturate_cast<schar>(int v) | ||||
|     { | ||||
|         uint res = 0; | ||||
|         asm("cvt.sat.s8.s32 %0, %1;" : "=r"(res) : "r"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v) | ||||
|     { | ||||
|         uint res = 0; | ||||
|         asm("cvt.sat.s8.u32 %0, %1;" : "=r"(res) : "r"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ schar saturate_cast<schar>(float v) | ||||
|     { | ||||
|         uint res = 0; | ||||
|         asm("cvt.rni.sat.s8.f32 %0, %1;" : "=r"(res) : "f"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ schar saturate_cast<schar>(double v) | ||||
|     { | ||||
|     #if __CUDA_ARCH__ >= 130 | ||||
|         uint res = 0; | ||||
|         asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v)); | ||||
|         return res; | ||||
|     #else | ||||
|         return saturate_cast<schar>((float)v); | ||||
|     #endif | ||||
|     } | ||||
|  | ||||
|     template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v) | ||||
|     { | ||||
|         ushort res = 0; | ||||
|         int vi = v; | ||||
|         asm("cvt.sat.u16.s8 %0, %1;" : "=h"(res) : "r"(vi)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v) | ||||
|     { | ||||
|         ushort res = 0; | ||||
|         asm("cvt.sat.u16.s16 %0, %1;" : "=h"(res) : "h"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v) | ||||
|     { | ||||
|         ushort res = 0; | ||||
|         asm("cvt.sat.u16.s32 %0, %1;" : "=h"(res) : "r"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v) | ||||
|     { | ||||
|         ushort res = 0; | ||||
|         asm("cvt.sat.u16.u32 %0, %1;" : "=h"(res) : "r"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v) | ||||
|     { | ||||
|         ushort res = 0; | ||||
|         asm("cvt.rni.sat.u16.f32 %0, %1;" : "=h"(res) : "f"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v) | ||||
|     { | ||||
|     #if __CUDA_ARCH__ >= 130 | ||||
|         ushort res = 0; | ||||
|         asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v)); | ||||
|         return res; | ||||
|     #else | ||||
|         return saturate_cast<ushort>((float)v); | ||||
|     #endif | ||||
|     } | ||||
|  | ||||
|     template<> __device__ __forceinline__ short saturate_cast<short>(ushort v) | ||||
|     { | ||||
|         short res = 0; | ||||
|         asm("cvt.sat.s16.u16 %0, %1;" : "=h"(res) : "h"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ short saturate_cast<short>(int v) | ||||
|     { | ||||
|         short res = 0; | ||||
|         asm("cvt.sat.s16.s32 %0, %1;" : "=h"(res) : "r"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ short saturate_cast<short>(uint v) | ||||
|     { | ||||
|         short res = 0; | ||||
|         asm("cvt.sat.s16.u32 %0, %1;" : "=h"(res) : "r"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ short saturate_cast<short>(float v) | ||||
|     { | ||||
|         short res = 0; | ||||
|         asm("cvt.rni.sat.s16.f32 %0, %1;" : "=h"(res) : "f"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ short saturate_cast<short>(double v) | ||||
|     { | ||||
|     #if __CUDA_ARCH__ >= 130 | ||||
|         short res = 0; | ||||
|         asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v)); | ||||
|         return res; | ||||
|     #else | ||||
|         return saturate_cast<short>((float)v); | ||||
|     #endif | ||||
|     } | ||||
|  | ||||
|     template<> __device__ __forceinline__ int saturate_cast<int>(uint v) | ||||
|     { | ||||
|         int res = 0; | ||||
|         asm("cvt.sat.s32.u32 %0, %1;" : "=r"(res) : "r"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ int saturate_cast<int>(float v) | ||||
|     { | ||||
|         return __float2int_rn(v); | ||||
|     } | ||||
|     template<> __device__ __forceinline__ int saturate_cast<int>(double v) | ||||
|     { | ||||
|     #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 | ||||
|         return __double2int_rn(v); | ||||
|     #else | ||||
|         return saturate_cast<int>((float)v); | ||||
|     #endif | ||||
|     } | ||||
|  | ||||
|     template<> __device__ __forceinline__ uint saturate_cast<uint>(schar v) | ||||
|     { | ||||
|         uint res = 0; | ||||
|         int vi = v; | ||||
|         asm("cvt.sat.u32.s8 %0, %1;" : "=r"(res) : "r"(vi)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ uint saturate_cast<uint>(short v) | ||||
|     { | ||||
|         uint res = 0; | ||||
|         asm("cvt.sat.u32.s16 %0, %1;" : "=r"(res) : "h"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ uint saturate_cast<uint>(int v) | ||||
|     { | ||||
|         uint res = 0; | ||||
|         asm("cvt.sat.u32.s32 %0, %1;" : "=r"(res) : "r"(v)); | ||||
|         return res; | ||||
|     } | ||||
|     template<> __device__ __forceinline__ uint saturate_cast<uint>(float v) | ||||
|     { | ||||
|         return __float2uint_rn(v); | ||||
|     } | ||||
|     template<> __device__ __forceinline__ uint saturate_cast<uint>(double v) | ||||
|     { | ||||
|     #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 | ||||
|         return __double2uint_rn(v); | ||||
|     #else | ||||
|         return saturate_cast<uint>((float)v); | ||||
|     #endif | ||||
|     } | ||||
| }}} | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif /* __OPENCV_CUDA_SATURATE_CAST_HPP__ */ | ||||
							
								
								
									
										258
									
								
								3rdparty/include/opencv2/core/cuda/scan.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										258
									
								
								3rdparty/include/opencv2/core/cuda/scan.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,258 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_SCAN_HPP__ | ||||
| #define __OPENCV_CUDA_SCAN_HPP__ | ||||
|  | ||||
| #include "opencv2/core/cuda/common.hpp" | ||||
| #include "opencv2/core/cuda/utility.hpp" | ||||
| #include "opencv2/core/cuda/warp.hpp" | ||||
| #include "opencv2/core/cuda/warp_shuffle.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     enum ScanKind { EXCLUSIVE = 0,  INCLUSIVE = 1 }; | ||||
|  | ||||
|     template <ScanKind Kind, typename T, typename F> struct WarpScan | ||||
|     { | ||||
|         __device__ __forceinline__ WarpScan() {} | ||||
|         __device__ __forceinline__ WarpScan(const WarpScan& other) { (void)other; } | ||||
|  | ||||
|         __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx) | ||||
|         { | ||||
|             const unsigned int lane = idx & 31; | ||||
|             F op; | ||||
|  | ||||
|             if ( lane >=  1) ptr [idx ] = op(ptr [idx -  1], ptr [idx]); | ||||
|             if ( lane >=  2) ptr [idx ] = op(ptr [idx -  2], ptr [idx]); | ||||
|             if ( lane >=  4) ptr [idx ] = op(ptr [idx -  4], ptr [idx]); | ||||
|             if ( lane >=  8) ptr [idx ] = op(ptr [idx -  8], ptr [idx]); | ||||
|             if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]); | ||||
|  | ||||
|             if( Kind == INCLUSIVE ) | ||||
|                 return ptr [idx]; | ||||
|             else | ||||
|                 return (lane > 0) ? ptr [idx - 1] : 0; | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ unsigned int index(const unsigned int tid) | ||||
|         { | ||||
|             return tid; | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ void init(volatile T *ptr){} | ||||
|  | ||||
|         static const int warp_offset      = 0; | ||||
|  | ||||
|         typedef WarpScan<INCLUSIVE, T, F>  merge; | ||||
|     }; | ||||
|  | ||||
|     template <ScanKind Kind , typename T, typename F> struct WarpScanNoComp | ||||
|     { | ||||
|         __device__ __forceinline__ WarpScanNoComp() {} | ||||
|         __device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { (void)other; } | ||||
|  | ||||
|         __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx) | ||||
|         { | ||||
|             const unsigned int lane = threadIdx.x & 31; | ||||
|             F op; | ||||
|  | ||||
|             ptr [idx ] = op(ptr [idx -  1], ptr [idx]); | ||||
|             ptr [idx ] = op(ptr [idx -  2], ptr [idx]); | ||||
|             ptr [idx ] = op(ptr [idx -  4], ptr [idx]); | ||||
|             ptr [idx ] = op(ptr [idx -  8], ptr [idx]); | ||||
|             ptr [idx ] = op(ptr [idx - 16], ptr [idx]); | ||||
|  | ||||
|             if( Kind == INCLUSIVE ) | ||||
|                 return ptr [idx]; | ||||
|             else | ||||
|                 return (lane > 0) ? ptr [idx - 1] : 0; | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ unsigned int index(const unsigned int tid) | ||||
|         { | ||||
|             return (tid >> warp_log) * warp_smem_stride + 16 + (tid & warp_mask); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ void init(volatile T *ptr) | ||||
|         { | ||||
|             ptr[threadIdx.x] = 0; | ||||
|         } | ||||
|  | ||||
|         static const int warp_smem_stride = 32 + 16 + 1; | ||||
|         static const int warp_offset      = 16; | ||||
|         static const int warp_log         = 5; | ||||
|         static const int warp_mask        = 31; | ||||
|  | ||||
|         typedef WarpScanNoComp<INCLUSIVE, T, F> merge; | ||||
|     }; | ||||
|  | ||||
|     template <ScanKind Kind , typename T, typename Sc, typename F> struct BlockScan | ||||
|     { | ||||
|         __device__ __forceinline__ BlockScan() {} | ||||
|         __device__ __forceinline__ BlockScan(const BlockScan& other) { (void)other; } | ||||
|  | ||||
|         __device__ __forceinline__ T operator()(volatile T *ptr) | ||||
|         { | ||||
|             const unsigned int tid  = threadIdx.x; | ||||
|             const unsigned int lane = tid & warp_mask; | ||||
|             const unsigned int warp = tid >> warp_log; | ||||
|  | ||||
|             Sc scan; | ||||
|             typename Sc::merge merge_scan; | ||||
|             const unsigned int idx = scan.index(tid); | ||||
|  | ||||
|             T val = scan(ptr, idx); | ||||
|             __syncthreads (); | ||||
|  | ||||
|             if( warp == 0) | ||||
|                 scan.init(ptr); | ||||
|             __syncthreads (); | ||||
|  | ||||
|             if( lane == 31 ) | ||||
|                 ptr [scan.warp_offset + warp ] = (Kind == INCLUSIVE) ? val : ptr [idx]; | ||||
|             __syncthreads (); | ||||
|  | ||||
|             if( warp == 0 ) | ||||
|                 merge_scan(ptr, idx); | ||||
|             __syncthreads(); | ||||
|  | ||||
|             if ( warp > 0) | ||||
|                 val = ptr [scan.warp_offset + warp - 1] + val; | ||||
|             __syncthreads (); | ||||
|  | ||||
|             ptr[idx] = val; | ||||
|             __syncthreads (); | ||||
|  | ||||
|             return val ; | ||||
|         } | ||||
|  | ||||
|         static const int warp_log  = 5; | ||||
|         static const int warp_mask = 31; | ||||
|     }; | ||||
|  | ||||
|     template <typename T> | ||||
|     __device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid) | ||||
|     { | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         const unsigned int laneId = cv::cuda::device::Warp::laneId(); | ||||
|  | ||||
|         // scan on shuffl functions | ||||
|         #pragma unroll | ||||
|         for (int i = 1; i <= (OPENCV_CUDA_WARP_SIZE / 2); i *= 2) | ||||
|         { | ||||
|             const T n = cv::cuda::device::shfl_up(idata, i); | ||||
|             if (laneId >= i) | ||||
|                   idata += n; | ||||
|         } | ||||
|  | ||||
|         return idata; | ||||
|     #else | ||||
|         unsigned int pos = 2 * tid - (tid & (OPENCV_CUDA_WARP_SIZE - 1)); | ||||
|         s_Data[pos] = 0; | ||||
|         pos += OPENCV_CUDA_WARP_SIZE; | ||||
|         s_Data[pos] = idata; | ||||
|  | ||||
|         s_Data[pos] += s_Data[pos - 1]; | ||||
|         s_Data[pos] += s_Data[pos - 2]; | ||||
|         s_Data[pos] += s_Data[pos - 4]; | ||||
|         s_Data[pos] += s_Data[pos - 8]; | ||||
|         s_Data[pos] += s_Data[pos - 16]; | ||||
|  | ||||
|         return s_Data[pos]; | ||||
|     #endif | ||||
|     } | ||||
|  | ||||
|     template <typename T> | ||||
|     __device__ __forceinline__ T warpScanExclusive(T idata, volatile T* s_Data, unsigned int tid) | ||||
|     { | ||||
|         return warpScanInclusive(idata, s_Data, tid) - idata; | ||||
|     } | ||||
|  | ||||
|     template <int tiNumScanThreads, typename T> | ||||
|     __device__ T blockScanInclusive(T idata, volatile T* s_Data, unsigned int tid) | ||||
|     { | ||||
|         if (tiNumScanThreads > OPENCV_CUDA_WARP_SIZE) | ||||
|         { | ||||
|             //Bottom-level inclusive warp scan | ||||
|             T warpResult = warpScanInclusive(idata, s_Data, tid); | ||||
|  | ||||
|             //Save top elements of each warp for exclusive warp scan | ||||
|             //sync to wait for warp scans to complete (because s_Data is being overwritten) | ||||
|             __syncthreads(); | ||||
|             if ((tid & (OPENCV_CUDA_WARP_SIZE - 1)) == (OPENCV_CUDA_WARP_SIZE - 1)) | ||||
|             { | ||||
|                 s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE] = warpResult; | ||||
|             } | ||||
|  | ||||
|             //wait for warp scans to complete | ||||
|             __syncthreads(); | ||||
|  | ||||
|             if (tid < (tiNumScanThreads / OPENCV_CUDA_WARP_SIZE) ) | ||||
|             { | ||||
|                 //grab top warp elements | ||||
|                 T val = s_Data[tid]; | ||||
|                 //calculate exclusive scan and write back to shared memory | ||||
|                 s_Data[tid] = warpScanExclusive(val, s_Data, tid); | ||||
|             } | ||||
|  | ||||
|             //return updated warp scans with exclusive scan results | ||||
|             __syncthreads(); | ||||
|  | ||||
|             return warpResult + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE]; | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             return warpScanInclusive(idata, s_Data, tid); | ||||
|         } | ||||
|     } | ||||
| }}} | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_SCAN_HPP__ | ||||
							
								
								
									
										869
									
								
								3rdparty/include/opencv2/core/cuda/simd_functions.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										869
									
								
								3rdparty/include/opencv2/core/cuda/simd_functions.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,869 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| /* | ||||
|  * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. | ||||
|  * | ||||
|  * Redistribution and use in source and binary forms, with or without | ||||
|  * modification, are permitted provided that the following conditions are met: | ||||
|  * | ||||
|  *   Redistributions of source code must retain the above copyright notice, | ||||
|  *   this list of conditions and the following disclaimer. | ||||
|  * | ||||
|  *   Redistributions in binary form must reproduce the above copyright notice, | ||||
|  *   this list of conditions and the following disclaimer in the documentation | ||||
|  *   and/or other materials provided with the distribution. | ||||
|  * | ||||
|  *   Neither the name of NVIDIA Corporation nor the names of its contributors | ||||
|  *   may be used to endorse or promote products derived from this software | ||||
|  *   without specific prior written permission. | ||||
|  * | ||||
|  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||||
|  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||||
|  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||||
|  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | ||||
|  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||||
|  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||||
|  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||||
|  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||||
|  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||||
|  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||||
|  * POSSIBILITY OF SUCH DAMAGE. | ||||
|  */ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_SIMD_FUNCTIONS_HPP__ | ||||
| #define __OPENCV_CUDA_SIMD_FUNCTIONS_HPP__ | ||||
|  | ||||
| #include "common.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     // 2 | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vadd2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #elif __CUDA_ARCH__ >= 200 | ||||
|         asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int s; | ||||
|         s = a ^ b;          // sum bits | ||||
|         r = a + b;          // actual sum | ||||
|         s = s ^ r;          // determine carry-ins for each bit position | ||||
|         s = s & 0x00010000; // carry-in to high word (= carry-out from low word) | ||||
|         r = r - s;          // subtract out carry-out from low word | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vsub2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #elif __CUDA_ARCH__ >= 200 | ||||
|         asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int s; | ||||
|         s = a ^ b;          // sum bits | ||||
|         r = a - b;          // actual sum | ||||
|         s = s ^ r;          // determine carry-ins for each bit position | ||||
|         s = s & 0x00010000; // borrow to high word | ||||
|         r = r + s;          // compensate for borrow from low word | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vabsdiff2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #elif __CUDA_ARCH__ >= 200 | ||||
|         asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int s, t, u, v; | ||||
|         s = a & 0x0000ffff; // extract low halfword | ||||
|         r = b & 0x0000ffff; // extract low halfword | ||||
|         u = ::max(r, s);    // maximum of low halfwords | ||||
|         v = ::min(r, s);    // minimum of low halfwords | ||||
|         s = a & 0xffff0000; // extract high halfword | ||||
|         r = b & 0xffff0000; // extract high halfword | ||||
|         t = ::max(r, s);    // maximum of high halfwords | ||||
|         s = ::min(r, s);    // minimum of high halfwords | ||||
|         r = u | t;          // maximum of both halfwords | ||||
|         s = v | s;          // minimum of both halfwords | ||||
|         r = r - s;          // |a - b| = max(a,b) - min(a,b); | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vavg2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r, s; | ||||
|  | ||||
|         // HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==> | ||||
|         // (a + b) / 2 = (a & b) + ((a ^ b) >> 1) | ||||
|         s = a ^ b; | ||||
|         r = a & b; | ||||
|         s = s & 0xfffefffe; // ensure shift doesn't cross halfword boundaries | ||||
|         s = s >> 1; | ||||
|         s = r + s; | ||||
|  | ||||
|         return s; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vavrg2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vavrg2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         // HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==> | ||||
|         // (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1) | ||||
|         unsigned int s; | ||||
|         s = a ^ b; | ||||
|         r = a | b; | ||||
|         s = s & 0xfffefffe; // ensure shift doesn't cross half-word boundaries | ||||
|         s = s >> 1; | ||||
|         r = r - s; | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vseteq2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vset2.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         // inspired by Alan Mycroft's null-byte detection algorithm: | ||||
|         // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) | ||||
|         unsigned int c; | ||||
|         r = a ^ b;          // 0x0000 if a == b | ||||
|         c = r | 0x80008000; // set msbs, to catch carry out | ||||
|         r = r ^ c;          // extract msbs, msb = 1 if r < 0x8000 | ||||
|         c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000 | ||||
|         c = r & ~c;         // msb = 1, if r was 0x0000 | ||||
|         r = c >> 15;        // convert to bool | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vcmpeq2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r, c; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         r = vseteq2(a, b); | ||||
|         c = r << 16;        // convert bool | ||||
|         r = c - r;          //  into mask | ||||
|     #else | ||||
|         // inspired by Alan Mycroft's null-byte detection algorithm: | ||||
|         // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) | ||||
|         r = a ^ b;          // 0x0000 if a == b | ||||
|         c = r | 0x80008000; // set msbs, to catch carry out | ||||
|         r = r ^ c;          // extract msbs, msb = 1 if r < 0x8000 | ||||
|         c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000 | ||||
|         c = r & ~c;         // msb = 1, if r was 0x0000 | ||||
|         r = c >> 15;        // convert | ||||
|         r = c - r;          //  msbs to | ||||
|         r = c | r;          //   mask | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vsetge2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vset2.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int c; | ||||
|         asm("not.b32 %0, %0;" : "+r"(b)); | ||||
|         c = vavrg2(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2 | ||||
|         c = c & 0x80008000; // msb = carry-outs | ||||
|         r = c >> 15;        // convert to bool | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vcmpge2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r, c; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         r = vsetge2(a, b); | ||||
|         c = r << 16;        // convert bool | ||||
|         r = c - r;          //  into mask | ||||
|     #else | ||||
|         asm("not.b32 %0, %0;" : "+r"(b)); | ||||
|         c = vavrg2(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2 | ||||
|         c = c & 0x80008000; // msb = carry-outs | ||||
|         r = c >> 15;        // convert | ||||
|         r = c - r;          //  msbs to | ||||
|         r = c | r;          //   mask | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vsetgt2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vset2.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int c; | ||||
|         asm("not.b32 %0, %0;" : "+r"(b)); | ||||
|         c = vavg2(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down] | ||||
|         c = c & 0x80008000; // msbs = carry-outs | ||||
|         r = c >> 15;        // convert to bool | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vcmpgt2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r, c; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         r = vsetgt2(a, b); | ||||
|         c = r << 16;        // convert bool | ||||
|         r = c - r;          //  into mask | ||||
|     #else | ||||
|         asm("not.b32 %0, %0;" : "+r"(b)); | ||||
|         c = vavg2(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down] | ||||
|         c = c & 0x80008000; // msbs = carry-outs | ||||
|         r = c >> 15;        // convert | ||||
|         r = c - r;          //  msbs to | ||||
|         r = c | r;          //   mask | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vsetle2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vset2.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int c; | ||||
|         asm("not.b32 %0, %0;" : "+r"(a)); | ||||
|         c = vavrg2(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2 | ||||
|         c = c & 0x80008000; // msb = carry-outs | ||||
|         r = c >> 15;        // convert to bool | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vcmple2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r, c; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         r = vsetle2(a, b); | ||||
|         c = r << 16;        // convert bool | ||||
|         r = c - r;          //  into mask | ||||
|     #else | ||||
|         asm("not.b32 %0, %0;" : "+r"(a)); | ||||
|         c = vavrg2(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2 | ||||
|         c = c & 0x80008000; // msb = carry-outs | ||||
|         r = c >> 15;        // convert | ||||
|         r = c - r;          //  msbs to | ||||
|         r = c | r;          //   mask | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vsetlt2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vset2.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int c; | ||||
|         asm("not.b32 %0, %0;" : "+r"(a)); | ||||
|         c = vavg2(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down] | ||||
|         c = c & 0x80008000; // msb = carry-outs | ||||
|         r = c >> 15;        // convert to bool | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vcmplt2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r, c; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         r = vsetlt2(a, b); | ||||
|         c = r << 16;        // convert bool | ||||
|         r = c - r;          //  into mask | ||||
|     #else | ||||
|         asm("not.b32 %0, %0;" : "+r"(a)); | ||||
|         c = vavg2(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down] | ||||
|         c = c & 0x80008000; // msb = carry-outs | ||||
|         r = c >> 15;        // convert | ||||
|         r = c - r;          //  msbs to | ||||
|         r = c | r;          //   mask | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vsetne2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm ("vset2.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         // inspired by Alan Mycroft's null-byte detection algorithm: | ||||
|         // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) | ||||
|         unsigned int c; | ||||
|         r = a ^ b;          // 0x0000 if a == b | ||||
|         c = r | 0x80008000; // set msbs, to catch carry out | ||||
|         c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000 | ||||
|         c = r | c;          // msb = 1, if r was not 0x0000 | ||||
|         c = c & 0x80008000; // extract msbs | ||||
|         r = c >> 15;        // convert to bool | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vcmpne2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r, c; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         r = vsetne2(a, b); | ||||
|         c = r << 16;        // convert bool | ||||
|         r = c - r;          //  into mask | ||||
|     #else | ||||
|         // inspired by Alan Mycroft's null-byte detection algorithm: | ||||
|         // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) | ||||
|         r = a ^ b;          // 0x0000 if a == b | ||||
|         c = r | 0x80008000; // set msbs, to catch carry out | ||||
|         c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000 | ||||
|         c = r | c;          // msb = 1, if r was not 0x0000 | ||||
|         c = c & 0x80008000; // extract msbs | ||||
|         r = c >> 15;        // convert | ||||
|         r = c - r;          //  msbs to | ||||
|         r = c | r;          //   mask | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vmax2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #elif __CUDA_ARCH__ >= 200 | ||||
|         asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int s, t, u; | ||||
|         r = a & 0x0000ffff; // extract low halfword | ||||
|         s = b & 0x0000ffff; // extract low halfword | ||||
|         t = ::max(r, s);    // maximum of low halfwords | ||||
|         r = a & 0xffff0000; // extract high halfword | ||||
|         s = b & 0xffff0000; // extract high halfword | ||||
|         u = ::max(r, s);    // maximum of high halfwords | ||||
|         r = t | u;          // combine halfword maximums | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vmin2(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #elif __CUDA_ARCH__ >= 200 | ||||
|         asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int s, t, u; | ||||
|         r = a & 0x0000ffff; // extract low halfword | ||||
|         s = b & 0x0000ffff; // extract low halfword | ||||
|         t = ::min(r, s);    // minimum of low halfwords | ||||
|         r = a & 0xffff0000; // extract high halfword | ||||
|         s = b & 0xffff0000; // extract high halfword | ||||
|         u = ::min(r, s);    // minimum of high halfwords | ||||
|         r = t | u;          // combine halfword minimums | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     // 4 | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vadd4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #elif __CUDA_ARCH__ >= 200 | ||||
|         asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int s, t; | ||||
|         s = a ^ b;          // sum bits | ||||
|         r = a & 0x7f7f7f7f; // clear msbs | ||||
|         t = b & 0x7f7f7f7f; // clear msbs | ||||
|         s = s & 0x80808080; // msb sum bits | ||||
|         r = r + t;          // add without msbs, record carry-out in msbs | ||||
|         r = r ^ s;          // sum of msb sum and carry-in bits, w/o carry-out | ||||
|     #endif /* __CUDA_ARCH__ >= 300 */ | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vsub4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #elif __CUDA_ARCH__ >= 200 | ||||
|         asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int s, t; | ||||
|         s = a ^ ~b;         // inverted sum bits | ||||
|         r = a | 0x80808080; // set msbs | ||||
|         t = b & 0x7f7f7f7f; // clear msbs | ||||
|         s = s & 0x80808080; // inverted msb sum bits | ||||
|         r = r - t;          // subtract w/o msbs, record inverted borrows in msb | ||||
|         r = r ^ s;          // combine inverted msb sum bits and borrows | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vavg4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r, s; | ||||
|  | ||||
|         // HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==> | ||||
|         // (a + b) / 2 = (a & b) + ((a ^ b) >> 1) | ||||
|         s = a ^ b; | ||||
|         r = a & b; | ||||
|         s = s & 0xfefefefe; // ensure following shift doesn't cross byte boundaries | ||||
|         s = s >> 1; | ||||
|         s = r + s; | ||||
|  | ||||
|         return s; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vavrg4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vavrg4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         // HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==> | ||||
|         // (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1) | ||||
|         unsigned int c; | ||||
|         c = a ^ b; | ||||
|         r = a | b; | ||||
|         c = c & 0xfefefefe; // ensure following shift doesn't cross byte boundaries | ||||
|         c = c >> 1; | ||||
|         r = r - c; | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vseteq4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         // inspired by Alan Mycroft's null-byte detection algorithm: | ||||
|         // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) | ||||
|         unsigned int c; | ||||
|         r = a ^ b;          // 0x00 if a == b | ||||
|         c = r | 0x80808080; // set msbs, to catch carry out | ||||
|         r = r ^ c;          // extract msbs, msb = 1 if r < 0x80 | ||||
|         c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80 | ||||
|         c = r & ~c;         // msb = 1, if r was 0x00 | ||||
|         r = c >> 7;         // convert to bool | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vcmpeq4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r, t; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         r = vseteq4(a, b); | ||||
|         t = r << 8;         // convert bool | ||||
|         r = t - r;          //  to mask | ||||
|     #else | ||||
|         // inspired by Alan Mycroft's null-byte detection algorithm: | ||||
|         // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) | ||||
|         t = a ^ b;          // 0x00 if a == b | ||||
|         r = t | 0x80808080; // set msbs, to catch carry out | ||||
|         t = t ^ r;          // extract msbs, msb = 1 if t < 0x80 | ||||
|         r = r - 0x01010101; // msb = 0, if t was 0x00 or 0x80 | ||||
|         r = t & ~r;         // msb = 1, if t was 0x00 | ||||
|         t = r >> 7;         // build mask | ||||
|         t = r - t;          //  from | ||||
|         r = t | r;          //   msbs | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vsetle4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vset4.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int c; | ||||
|         asm("not.b32 %0, %0;" : "+r"(a)); | ||||
|         c = vavrg4(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2 | ||||
|         c = c & 0x80808080; // msb = carry-outs | ||||
|         r = c >> 7;         // convert to bool | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vcmple4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r, c; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         r = vsetle4(a, b); | ||||
|         c = r << 8;         // convert bool | ||||
|         r = c - r;          //  to mask | ||||
|     #else | ||||
|         asm("not.b32 %0, %0;" : "+r"(a)); | ||||
|         c = vavrg4(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2 | ||||
|         c = c & 0x80808080; // msbs = carry-outs | ||||
|         r = c >> 7;         // convert | ||||
|         r = c - r;          //  msbs to | ||||
|         r = c | r;          //   mask | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vsetlt4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vset4.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int c; | ||||
|         asm("not.b32 %0, %0;" : "+r"(a)); | ||||
|         c = vavg4(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down] | ||||
|         c = c & 0x80808080; // msb = carry-outs | ||||
|         r = c >> 7;         // convert to bool | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vcmplt4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r, c; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         r = vsetlt4(a, b); | ||||
|         c = r << 8;         // convert bool | ||||
|         r = c - r;          //  to mask | ||||
|     #else | ||||
|         asm("not.b32 %0, %0;" : "+r"(a)); | ||||
|         c = vavg4(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down] | ||||
|         c = c & 0x80808080; // msbs = carry-outs | ||||
|         r = c >> 7;         // convert | ||||
|         r = c - r;          //  msbs to | ||||
|         r = c | r;          //   mask | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vsetge4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vset4.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int c; | ||||
|         asm("not.b32 %0, %0;" : "+r"(b)); | ||||
|         c = vavrg4(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2 | ||||
|         c = c & 0x80808080; // msb = carry-outs | ||||
|         r = c >> 7;         // convert to bool | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vcmpge4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r, s; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         r = vsetge4(a, b); | ||||
|         s = r << 8;         // convert bool | ||||
|         r = s - r;          //  to mask | ||||
|     #else | ||||
|         asm ("not.b32 %0,%0;" : "+r"(b)); | ||||
|         r = vavrg4 (a, b);  // (a + ~b + 1) / 2 = (a - b) / 2 | ||||
|         r = r & 0x80808080; // msb = carry-outs | ||||
|         s = r >> 7;         // build mask | ||||
|         s = r - s;          //  from | ||||
|         r = s | r;          //   msbs | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vsetgt4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vset4.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int c; | ||||
|         asm("not.b32 %0, %0;" : "+r"(b)); | ||||
|         c = vavg4(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down] | ||||
|         c = c & 0x80808080; // msb = carry-outs | ||||
|         r = c >> 7;         // convert to bool | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vcmpgt4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r, c; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         r = vsetgt4(a, b); | ||||
|         c = r << 8;         // convert bool | ||||
|         r = c - r;          //  to mask | ||||
|     #else | ||||
|         asm("not.b32 %0, %0;" : "+r"(b)); | ||||
|         c = vavg4(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down] | ||||
|         c = c & 0x80808080; // msb = carry-outs | ||||
|         r = c >> 7;         // convert | ||||
|         r = c - r;          //  msbs to | ||||
|         r = c | r;          //   mask | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vsetne4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vset4.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         // inspired by Alan Mycroft's null-byte detection algorithm: | ||||
|         // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) | ||||
|         unsigned int c; | ||||
|         r = a ^ b;          // 0x00 if a == b | ||||
|         c = r | 0x80808080; // set msbs, to catch carry out | ||||
|         c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80 | ||||
|         c = r | c;          // msb = 1, if r was not 0x00 | ||||
|         c = c & 0x80808080; // extract msbs | ||||
|         r = c >> 7;         // convert to bool | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vcmpne4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r, c; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         r = vsetne4(a, b); | ||||
|         c = r << 8;         // convert bool | ||||
|         r = c - r;          //  to mask | ||||
|     #else | ||||
|         // inspired by Alan Mycroft's null-byte detection algorithm: | ||||
|         // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) | ||||
|         r = a ^ b;          // 0x00 if a == b | ||||
|         c = r | 0x80808080; // set msbs, to catch carry out | ||||
|         c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80 | ||||
|         c = r | c;          // msb = 1, if r was not 0x00 | ||||
|         c = c & 0x80808080; // extract msbs | ||||
|         r = c >> 7;         // convert | ||||
|         r = c - r;          //  msbs to | ||||
|         r = c | r;          //   mask | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vabsdiff4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #elif __CUDA_ARCH__ >= 200 | ||||
|         asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int s; | ||||
|         s = vcmpge4(a, b);  // mask = 0xff if a >= b | ||||
|         r = a ^ b;          // | ||||
|         s = (r &  s) ^ b;   // select a when a >= b, else select b => max(a,b) | ||||
|         r = s ^ r;          // select a when b >= a, else select b => min(a,b) | ||||
|         r = s - r;          // |a - b| = max(a,b) - min(a,b); | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vmax4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #elif __CUDA_ARCH__ >= 200 | ||||
|         asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int s; | ||||
|         s = vcmpge4(a, b);  // mask = 0xff if a >= b | ||||
|         r = a & s;          // select a when b >= a | ||||
|         s = b & ~s;         // select b when b < a | ||||
|         r = r | s;          // combine byte selections | ||||
|     #endif | ||||
|  | ||||
|         return r;           // byte-wise unsigned maximum | ||||
|     } | ||||
|  | ||||
|     static __device__ __forceinline__ unsigned int vmin4(unsigned int a, unsigned int b) | ||||
|     { | ||||
|         unsigned int r = 0; | ||||
|  | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #elif __CUDA_ARCH__ >= 200 | ||||
|         asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|         asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); | ||||
|     #else | ||||
|         unsigned int s; | ||||
|         s = vcmpge4(b, a);  // mask = 0xff if a >= b | ||||
|         r = a & s;          // select a when b >= a | ||||
|         s = b & ~s;         // select b when b < a | ||||
|         r = r | s;          // combine byte selections | ||||
|     #endif | ||||
|  | ||||
|         return r; | ||||
|     } | ||||
| }}} | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_SIMD_FUNCTIONS_HPP__ | ||||
							
								
								
									
										75
									
								
								3rdparty/include/opencv2/core/cuda/transform.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								3rdparty/include/opencv2/core/cuda/transform.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,75 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_TRANSFORM_HPP__ | ||||
| #define __OPENCV_CUDA_TRANSFORM_HPP__ | ||||
|  | ||||
| #include "common.hpp" | ||||
| #include "utility.hpp" | ||||
| #include "detail/transform_detail.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     template <typename T, typename D, typename UnOp, typename Mask> | ||||
|     static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream) | ||||
|     { | ||||
|         typedef TransformFunctorTraits<UnOp> ft; | ||||
|         transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream); | ||||
|     } | ||||
|  | ||||
|     template <typename T1, typename T2, typename D, typename BinOp, typename Mask> | ||||
|     static inline void transform(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, const Mask& mask, cudaStream_t stream) | ||||
|     { | ||||
|         typedef TransformFunctorTraits<BinOp> ft; | ||||
|         transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream); | ||||
|     } | ||||
| }}} | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_TRANSFORM_HPP__ | ||||
							
								
								
									
										90
									
								
								3rdparty/include/opencv2/core/cuda/type_traits.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								3rdparty/include/opencv2/core/cuda/type_traits.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,90 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_TYPE_TRAITS_HPP__ | ||||
| #define __OPENCV_CUDA_TYPE_TRAITS_HPP__ | ||||
|  | ||||
| #include "detail/type_traits_detail.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     template <typename T> struct IsSimpleParameter | ||||
|     { | ||||
|         enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value || | ||||
|             type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value}; | ||||
|     }; | ||||
|  | ||||
|     template <typename T> struct TypeTraits | ||||
|     { | ||||
|         typedef typename type_traits_detail::UnConst<T>::type                                                NonConstType; | ||||
|         typedef typename type_traits_detail::UnVolatile<T>::type                                             NonVolatileType; | ||||
|         typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType; | ||||
|         typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type                            PointeeType; | ||||
|         typedef typename type_traits_detail::ReferenceTraits<T>::type                                        ReferredType; | ||||
|  | ||||
|         enum { isConst          = type_traits_detail::UnConst<T>::value }; | ||||
|         enum { isVolatile       = type_traits_detail::UnVolatile<T>::value }; | ||||
|  | ||||
|         enum { isReference      = type_traits_detail::ReferenceTraits<UnqualifiedType>::value }; | ||||
|         enum { isPointer        = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value }; | ||||
|  | ||||
|         enum { isUnsignedInt    = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value }; | ||||
|         enum { isSignedInt      = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value }; | ||||
|         enum { isIntegral       = type_traits_detail::IsIntegral<UnqualifiedType>::value }; | ||||
|         enum { isFloat          = type_traits_detail::IsFloat<UnqualifiedType>::value }; | ||||
|         enum { isArith          = isIntegral || isFloat }; | ||||
|         enum { isVec            = type_traits_detail::IsVec<UnqualifiedType>::value }; | ||||
|  | ||||
|         typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value, | ||||
|             T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType; | ||||
|     }; | ||||
| }}} | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_TYPE_TRAITS_HPP__ | ||||
							
								
								
									
										221
									
								
								3rdparty/include/opencv2/core/cuda/utility.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										221
									
								
								3rdparty/include/opencv2/core/cuda/utility.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,221 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_UTILITY_HPP__ | ||||
| #define __OPENCV_CUDA_UTILITY_HPP__ | ||||
|  | ||||
| #include "saturate_cast.hpp" | ||||
| #include "datamov_utils.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     #define OPENCV_CUDA_LOG_WARP_SIZE        (5) | ||||
|     #define OPENCV_CUDA_WARP_SIZE            (1 << OPENCV_CUDA_LOG_WARP_SIZE) | ||||
|     #define OPENCV_CUDA_LOG_MEM_BANKS        ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla | ||||
|     #define OPENCV_CUDA_MEM_BANKS            (1 << OPENCV_CUDA_LOG_MEM_BANKS) | ||||
|  | ||||
|     /////////////////////////////////////////////////////////////////////////////// | ||||
|     // swap | ||||
|  | ||||
|     template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b) | ||||
|     { | ||||
|         const T temp = a; | ||||
|         a = b; | ||||
|         b = temp; | ||||
|     } | ||||
|  | ||||
|     /////////////////////////////////////////////////////////////////////////////// | ||||
|     // Mask Reader | ||||
|  | ||||
|     struct SingleMask | ||||
|     { | ||||
|         explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {} | ||||
|         __host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){} | ||||
|  | ||||
|         __device__ __forceinline__ bool operator()(int y, int x) const | ||||
|         { | ||||
|             return mask.ptr(y)[x] != 0; | ||||
|         } | ||||
|  | ||||
|         PtrStepb mask; | ||||
|     }; | ||||
|  | ||||
|     struct SingleMaskChannels | ||||
|     { | ||||
|         __host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_) | ||||
|         : mask(mask_), channels(channels_) {} | ||||
|         __host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_) | ||||
|             :mask(mask_.mask), channels(mask_.channels){} | ||||
|  | ||||
|         __device__ __forceinline__ bool operator()(int y, int x) const | ||||
|         { | ||||
|             return mask.ptr(y)[x / channels] != 0; | ||||
|         } | ||||
|  | ||||
|         PtrStepb mask; | ||||
|         int channels; | ||||
|     }; | ||||
|  | ||||
|     struct MaskCollection | ||||
|     { | ||||
|         explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_) | ||||
|             : maskCollection(maskCollection_) {} | ||||
|  | ||||
|         __device__ __forceinline__ MaskCollection(const MaskCollection& masks_) | ||||
|             : maskCollection(masks_.maskCollection), curMask(masks_.curMask){} | ||||
|  | ||||
|         __device__ __forceinline__ void next() | ||||
|         { | ||||
|             curMask = *maskCollection++; | ||||
|         } | ||||
|         __device__ __forceinline__ void setMask(int z) | ||||
|         { | ||||
|             curMask = maskCollection[z]; | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ bool operator()(int y, int x) const | ||||
|         { | ||||
|             uchar val; | ||||
|             return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(y), x, val), (val != 0)); | ||||
|         } | ||||
|  | ||||
|         const PtrStepb* maskCollection; | ||||
|         PtrStepb curMask; | ||||
|     }; | ||||
|  | ||||
|     struct WithOutMask | ||||
|     { | ||||
|         __host__ __device__ __forceinline__ WithOutMask(){} | ||||
|         __host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){} | ||||
|  | ||||
|         __device__ __forceinline__ void next() const | ||||
|         { | ||||
|         } | ||||
|         __device__ __forceinline__ void setMask(int) const | ||||
|         { | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ bool operator()(int, int) const | ||||
|         { | ||||
|             return true; | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ bool operator()(int, int, int) const | ||||
|         { | ||||
|             return true; | ||||
|         } | ||||
|  | ||||
|         static __device__ __forceinline__ bool check(int, int) | ||||
|         { | ||||
|             return true; | ||||
|         } | ||||
|  | ||||
|         static __device__ __forceinline__ bool check(int, int, int) | ||||
|         { | ||||
|             return true; | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     /////////////////////////////////////////////////////////////////////////////// | ||||
|     // Solve linear system | ||||
|  | ||||
|     // solve 2x2 linear system Ax=b | ||||
|     template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2]) | ||||
|     { | ||||
|         T det = A[0][0] * A[1][1] - A[1][0] * A[0][1]; | ||||
|  | ||||
|         if (det != 0) | ||||
|         { | ||||
|             double invdet = 1.0 / det; | ||||
|  | ||||
|             x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1])); | ||||
|  | ||||
|             x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0])); | ||||
|  | ||||
|             return true; | ||||
|         } | ||||
|  | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     // solve 3x3 linear system Ax=b | ||||
|     template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3]) | ||||
|     { | ||||
|         T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) | ||||
|               - A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) | ||||
|               + A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]); | ||||
|  | ||||
|         if (det != 0) | ||||
|         { | ||||
|             double invdet = 1.0 / det; | ||||
|  | ||||
|             x[0] = saturate_cast<T>(invdet * | ||||
|                 (b[0]    * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) - | ||||
|                  A[0][1] * (b[1]    * A[2][2] - A[1][2] * b[2]   ) + | ||||
|                  A[0][2] * (b[1]    * A[2][1] - A[1][1] * b[2]   ))); | ||||
|  | ||||
|             x[1] = saturate_cast<T>(invdet * | ||||
|                 (A[0][0] * (b[1]    * A[2][2] - A[1][2] * b[2]   ) - | ||||
|                  b[0]    * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) + | ||||
|                  A[0][2] * (A[1][0] * b[2]    - b[1]    * A[2][0]))); | ||||
|  | ||||
|             x[2] = saturate_cast<T>(invdet * | ||||
|                 (A[0][0] * (A[1][1] * b[2]    - b[1]    * A[2][1]) - | ||||
|                  A[0][1] * (A[1][0] * b[2]    - b[1]    * A[2][0]) + | ||||
|                  b[0]    * (A[1][0] * A[2][1] - A[1][1] * A[2][0]))); | ||||
|  | ||||
|             return true; | ||||
|         } | ||||
|  | ||||
|         return false; | ||||
|     } | ||||
| }}} // namespace cv { namespace cuda { namespace cudev | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_UTILITY_HPP__ | ||||
							
								
								
									
										232
									
								
								3rdparty/include/opencv2/core/cuda/vec_distance.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										232
									
								
								3rdparty/include/opencv2/core/cuda/vec_distance.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,232 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_VEC_DISTANCE_HPP__ | ||||
| #define __OPENCV_CUDA_VEC_DISTANCE_HPP__ | ||||
|  | ||||
| #include "reduce.hpp" | ||||
| #include "functional.hpp" | ||||
| #include "detail/vec_distance_detail.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     template <typename T> struct L1Dist | ||||
|     { | ||||
|         typedef int value_type; | ||||
|         typedef int result_type; | ||||
|  | ||||
|         __device__ __forceinline__ L1Dist() : mySum(0) {} | ||||
|  | ||||
|         __device__ __forceinline__ void reduceIter(int val1, int val2) | ||||
|         { | ||||
|             mySum = __sad(val1, val2, mySum); | ||||
|         } | ||||
|  | ||||
|         template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) | ||||
|         { | ||||
|             reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ operator int() const | ||||
|         { | ||||
|             return mySum; | ||||
|         } | ||||
|  | ||||
|         int mySum; | ||||
|     }; | ||||
|     template <> struct L1Dist<float> | ||||
|     { | ||||
|         typedef float value_type; | ||||
|         typedef float result_type; | ||||
|  | ||||
|         __device__ __forceinline__ L1Dist() : mySum(0.0f) {} | ||||
|  | ||||
|         __device__ __forceinline__ void reduceIter(float val1, float val2) | ||||
|         { | ||||
|             mySum += ::fabs(val1 - val2); | ||||
|         } | ||||
|  | ||||
|         template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) | ||||
|         { | ||||
|             reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ operator float() const | ||||
|         { | ||||
|             return mySum; | ||||
|         } | ||||
|  | ||||
|         float mySum; | ||||
|     }; | ||||
|  | ||||
|     struct L2Dist | ||||
|     { | ||||
|         typedef float value_type; | ||||
|         typedef float result_type; | ||||
|  | ||||
|         __device__ __forceinline__ L2Dist() : mySum(0.0f) {} | ||||
|  | ||||
|         __device__ __forceinline__ void reduceIter(float val1, float val2) | ||||
|         { | ||||
|             float reg = val1 - val2; | ||||
|             mySum += reg * reg; | ||||
|         } | ||||
|  | ||||
|         template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) | ||||
|         { | ||||
|             reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ operator float() const | ||||
|         { | ||||
|             return sqrtf(mySum); | ||||
|         } | ||||
|  | ||||
|         float mySum; | ||||
|     }; | ||||
|  | ||||
|     struct HammingDist | ||||
|     { | ||||
|         typedef int value_type; | ||||
|         typedef int result_type; | ||||
|  | ||||
|         __device__ __forceinline__ HammingDist() : mySum(0) {} | ||||
|  | ||||
|         __device__ __forceinline__ void reduceIter(int val1, int val2) | ||||
|         { | ||||
|             mySum += __popc(val1 ^ val2); | ||||
|         } | ||||
|  | ||||
|         template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) | ||||
|         { | ||||
|             reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); | ||||
|         } | ||||
|  | ||||
|         __device__ __forceinline__ operator int() const | ||||
|         { | ||||
|             return mySum; | ||||
|         } | ||||
|  | ||||
|         int mySum; | ||||
|     }; | ||||
|  | ||||
|     // calc distance between two vectors in global memory | ||||
|     template <int THREAD_DIM, typename Dist, typename T1, typename T2> | ||||
|     __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) | ||||
|     { | ||||
|         for (int i = tid; i < len; i += THREAD_DIM) | ||||
|         { | ||||
|             T1 val1; | ||||
|             ForceGlob<T1>::Load(vec1, i, val1); | ||||
|  | ||||
|             T2 val2; | ||||
|             ForceGlob<T2>::Load(vec2, i, val2); | ||||
|  | ||||
|             dist.reduceIter(val1, val2); | ||||
|         } | ||||
|  | ||||
|         dist.reduceAll<THREAD_DIM>(smem, tid); | ||||
|     } | ||||
|  | ||||
|     // calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory | ||||
|     template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2> | ||||
|     __device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid) | ||||
|     { | ||||
|         vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid); | ||||
|  | ||||
|         dist.reduceAll<THREAD_DIM>(smem, tid); | ||||
|     } | ||||
|  | ||||
|     // calc distance between two vectors in global memory | ||||
|     template <int THREAD_DIM, typename T1> struct VecDiffGlobal | ||||
|     { | ||||
|         explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0) | ||||
|         { | ||||
|             vec1 = vec1_; | ||||
|         } | ||||
|  | ||||
|         template <typename T2, typename Dist> | ||||
|         __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const | ||||
|         { | ||||
|             calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid); | ||||
|         } | ||||
|  | ||||
|         const T1* vec1; | ||||
|     }; | ||||
|  | ||||
|     // calc distance between two vectors, first vector is cached in register memory, second vector is in global memory | ||||
|     template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister | ||||
|     { | ||||
|         template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid) | ||||
|         { | ||||
|             if (glob_tid < len) | ||||
|                 smem[glob_tid] = vec1[glob_tid]; | ||||
|             __syncthreads(); | ||||
|  | ||||
|             U* vec1ValsPtr = vec1Vals; | ||||
|  | ||||
|             #pragma unroll | ||||
|             for (int i = tid; i < MAX_LEN; i += THREAD_DIM) | ||||
|                 *vec1ValsPtr++ = smem[i]; | ||||
|  | ||||
|             __syncthreads(); | ||||
|         } | ||||
|  | ||||
|         template <typename T2, typename Dist> | ||||
|         __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const | ||||
|         { | ||||
|             calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid); | ||||
|         } | ||||
|  | ||||
|         U vec1Vals[MAX_LEN / THREAD_DIM]; | ||||
|     }; | ||||
| }}} // namespace cv { namespace cuda { namespace cudev | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_VEC_DISTANCE_HPP__ | ||||
							
								
								
									
										930
									
								
								3rdparty/include/opencv2/core/cuda/vec_math.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										930
									
								
								3rdparty/include/opencv2/core/cuda/vec_math.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,930 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_VECMATH_HPP__ | ||||
| #define __OPENCV_CUDA_VECMATH_HPP__ | ||||
|  | ||||
| #include "vec_traits.hpp" | ||||
| #include "saturate_cast.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|  | ||||
| // saturate_cast | ||||
|  | ||||
| namespace vec_math_detail | ||||
| { | ||||
|     template <int cn, typename VecD> struct SatCastHelper; | ||||
|     template <typename VecD> struct SatCastHelper<1, VecD> | ||||
|     { | ||||
|         template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v) | ||||
|         { | ||||
|             typedef typename VecTraits<VecD>::elem_type D; | ||||
|             return VecTraits<VecD>::make(saturate_cast<D>(v.x)); | ||||
|         } | ||||
|     }; | ||||
|     template <typename VecD> struct SatCastHelper<2, VecD> | ||||
|     { | ||||
|         template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v) | ||||
|         { | ||||
|             typedef typename VecTraits<VecD>::elem_type D; | ||||
|             return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y)); | ||||
|         } | ||||
|     }; | ||||
|     template <typename VecD> struct SatCastHelper<3, VecD> | ||||
|     { | ||||
|         template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v) | ||||
|         { | ||||
|             typedef typename VecTraits<VecD>::elem_type D; | ||||
|             return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z)); | ||||
|         } | ||||
|     }; | ||||
|     template <typename VecD> struct SatCastHelper<4, VecD> | ||||
|     { | ||||
|         template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v) | ||||
|         { | ||||
|             typedef typename VecTraits<VecD>::elem_type D; | ||||
|             return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w)); | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v) | ||||
|     { | ||||
|         return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v); | ||||
|     } | ||||
| } | ||||
|  | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
|  | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
|  | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
|  | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
| template<typename T> static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper<T>(v);} | ||||
|  | ||||
| // unary operators | ||||
|  | ||||
| #define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \ | ||||
|     __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 1>::make(op (a.x)); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 2>::make(op (a.x), op (a.y)); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 3>::make(op (a.x), op (a.y), op (a.z)); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 4>::make(op (a.x), op (a.y), op (a.z), op (a.w)); \ | ||||
|     } | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint) | ||||
|  | ||||
| #undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP | ||||
|  | ||||
| // unary functions | ||||
|  | ||||
| #define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \ | ||||
|     __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 1>::make(func (a.x)); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 2>::make(func (a.x), func (a.y)); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 3>::make(func (a.x), func (a.y), func (a.z)); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \ | ||||
|     } | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabs, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double) | ||||
|  | ||||
| #undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC | ||||
|  | ||||
| // binary operators (vec & vec) | ||||
|  | ||||
| #define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \ | ||||
|     __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 1>::make(a.x op b.x); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 2>::make(a.x op b.x, a.y op b.y); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 3>::make(a.x op b.x, a.y op b.y, a.z op b.z); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 4>::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \ | ||||
|     } | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint) | ||||
|  | ||||
| #undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP | ||||
|  | ||||
| // binary operators (vec & scalar) | ||||
|  | ||||
| #define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \ | ||||
|     __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 1>::make(a.x op s); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 1>::make(s op b.x); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 2>::make(a.x op s, a.y op s); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 2>::make(s op b.x, s op b.y); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 3>::make(a.x op s, a.y op s, a.z op s); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 3>::make(s op b.x, s op b.y, s op b.z); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 4>::make(a.x op s, a.y op s, a.z op s, a.w op s); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 4>::make(s op b.x, s op b.y, s op b.z, s op b.w); \ | ||||
|     } | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint) | ||||
|  | ||||
| #undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP | ||||
|  | ||||
| // binary function (vec & vec) | ||||
|  | ||||
| #define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \ | ||||
|     __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 1>::make(func (a.x, b.x)); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 2>::make(func (a.x, b.x), func (a.y, b.y)); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 3>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 4>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \ | ||||
|     } | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float) | ||||
| CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double) | ||||
|  | ||||
| #undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC | ||||
|  | ||||
| // binary function (vec & scalar) | ||||
|  | ||||
| #define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \ | ||||
|     __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 1>::make(func ((output_type) a.x, (output_type) s)); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 1>::make(func ((output_type) s, (output_type) b.x)); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 2>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 2>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 3>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 3>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 4>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \ | ||||
|     } \ | ||||
|     __device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \ | ||||
|     { \ | ||||
|         return VecTraits<output_type ## 4>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \ | ||||
|     } | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double) | ||||
|  | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double) | ||||
| CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double) | ||||
|  | ||||
| #undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC | ||||
|  | ||||
| }}} // namespace cv { namespace cuda { namespace device | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_VECMATH_HPP__ | ||||
							
								
								
									
										288
									
								
								3rdparty/include/opencv2/core/cuda/vec_traits.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										288
									
								
								3rdparty/include/opencv2/core/cuda/vec_traits.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,288 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_VEC_TRAITS_HPP__ | ||||
| #define __OPENCV_CUDA_VEC_TRAITS_HPP__ | ||||
|  | ||||
| #include "common.hpp" | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     template<typename T, int N> struct TypeVec; | ||||
|  | ||||
|     struct __align__(8) uchar8 | ||||
|     { | ||||
|         uchar a0, a1, a2, a3, a4, a5, a6, a7; | ||||
|     }; | ||||
|     static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7) | ||||
|     { | ||||
|         uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; | ||||
|         return val; | ||||
|     } | ||||
|     struct __align__(8) char8 | ||||
|     { | ||||
|         schar a0, a1, a2, a3, a4, a5, a6, a7; | ||||
|     }; | ||||
|     static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) | ||||
|     { | ||||
|         char8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; | ||||
|         return val; | ||||
|     } | ||||
|     struct __align__(16) ushort8 | ||||
|     { | ||||
|         ushort a0, a1, a2, a3, a4, a5, a6, a7; | ||||
|     }; | ||||
|     static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7) | ||||
|     { | ||||
|         ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; | ||||
|         return val; | ||||
|     } | ||||
|     struct __align__(16) short8 | ||||
|     { | ||||
|         short a0, a1, a2, a3, a4, a5, a6, a7; | ||||
|     }; | ||||
|     static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7) | ||||
|     { | ||||
|         short8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; | ||||
|         return val; | ||||
|     } | ||||
|     struct __align__(32) uint8 | ||||
|     { | ||||
|         uint a0, a1, a2, a3, a4, a5, a6, a7; | ||||
|     }; | ||||
|     static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7) | ||||
|     { | ||||
|         uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; | ||||
|         return val; | ||||
|     } | ||||
|     struct __align__(32) int8 | ||||
|     { | ||||
|         int a0, a1, a2, a3, a4, a5, a6, a7; | ||||
|     }; | ||||
|     static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7) | ||||
|     { | ||||
|         int8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; | ||||
|         return val; | ||||
|     } | ||||
|     struct __align__(32) float8 | ||||
|     { | ||||
|         float a0, a1, a2, a3, a4, a5, a6, a7; | ||||
|     }; | ||||
|     static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7) | ||||
|     { | ||||
|         float8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; | ||||
|         return val; | ||||
|     } | ||||
|     struct double8 | ||||
|     { | ||||
|         double a0, a1, a2, a3, a4, a5, a6, a7; | ||||
|     }; | ||||
|     static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7) | ||||
|     { | ||||
|         double8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; | ||||
|         return val; | ||||
|     } | ||||
|  | ||||
| #define OPENCV_CUDA_IMPLEMENT_TYPE_VEC(type) \ | ||||
|     template<> struct TypeVec<type, 1> { typedef type vec_type; }; \ | ||||
|     template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \ | ||||
|     template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \ | ||||
|     template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \ | ||||
|     template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \ | ||||
|     template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \ | ||||
|     template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \ | ||||
|     template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; }; \ | ||||
|     template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \ | ||||
|     template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; }; | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uchar) | ||||
|     OPENCV_CUDA_IMPLEMENT_TYPE_VEC(char) | ||||
|     OPENCV_CUDA_IMPLEMENT_TYPE_VEC(ushort) | ||||
|     OPENCV_CUDA_IMPLEMENT_TYPE_VEC(short) | ||||
|     OPENCV_CUDA_IMPLEMENT_TYPE_VEC(int) | ||||
|     OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uint) | ||||
|     OPENCV_CUDA_IMPLEMENT_TYPE_VEC(float) | ||||
|     OPENCV_CUDA_IMPLEMENT_TYPE_VEC(double) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_TYPE_VEC | ||||
|  | ||||
|     template<> struct TypeVec<schar, 1> { typedef schar vec_type; }; | ||||
|     template<> struct TypeVec<schar, 2> { typedef char2 vec_type; }; | ||||
|     template<> struct TypeVec<schar, 3> { typedef char3 vec_type; }; | ||||
|     template<> struct TypeVec<schar, 4> { typedef char4 vec_type; }; | ||||
|     template<> struct TypeVec<schar, 8> { typedef char8 vec_type; }; | ||||
|  | ||||
|     template<> struct TypeVec<bool, 1> { typedef uchar vec_type; }; | ||||
|     template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; }; | ||||
|     template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; }; | ||||
|     template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; }; | ||||
|     template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; }; | ||||
|  | ||||
|     template<typename T> struct VecTraits; | ||||
|  | ||||
| #define OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(type) \ | ||||
|     template<> struct VecTraits<type> \ | ||||
|     { \ | ||||
|         typedef type elem_type; \ | ||||
|         enum {cn=1}; \ | ||||
|         static __device__ __host__ __forceinline__ type all(type v) {return v;} \ | ||||
|         static __device__ __host__ __forceinline__ type make(type x) {return x;} \ | ||||
|         static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \ | ||||
|     }; \ | ||||
|     template<> struct VecTraits<type ## 1> \ | ||||
|     { \ | ||||
|         typedef type elem_type; \ | ||||
|         enum {cn=1}; \ | ||||
|         static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \ | ||||
|         static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \ | ||||
|         static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \ | ||||
|     }; \ | ||||
|     template<> struct VecTraits<type ## 2> \ | ||||
|     { \ | ||||
|         typedef type elem_type; \ | ||||
|         enum {cn=2}; \ | ||||
|         static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \ | ||||
|         static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \ | ||||
|         static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \ | ||||
|     }; \ | ||||
|     template<> struct VecTraits<type ## 3> \ | ||||
|     { \ | ||||
|         typedef type elem_type; \ | ||||
|         enum {cn=3}; \ | ||||
|         static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \ | ||||
|         static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \ | ||||
|         static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \ | ||||
|     }; \ | ||||
|     template<> struct VecTraits<type ## 4> \ | ||||
|     { \ | ||||
|         typedef type elem_type; \ | ||||
|         enum {cn=4}; \ | ||||
|         static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \ | ||||
|         static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \ | ||||
|         static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \ | ||||
|     }; \ | ||||
|     template<> struct VecTraits<type ## 8> \ | ||||
|     { \ | ||||
|         typedef type elem_type; \ | ||||
|         enum {cn=8}; \ | ||||
|         static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \ | ||||
|         static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \ | ||||
|         static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \ | ||||
|     }; | ||||
|  | ||||
|     OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uchar) | ||||
|     OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(ushort) | ||||
|     OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(short) | ||||
|     OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(int) | ||||
|     OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uint) | ||||
|     OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(float) | ||||
|     OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(double) | ||||
|  | ||||
|     #undef OPENCV_CUDA_IMPLEMENT_VEC_TRAITS | ||||
|  | ||||
|     template<> struct VecTraits<char> | ||||
|     { | ||||
|         typedef char elem_type; | ||||
|         enum {cn=1}; | ||||
|         static __device__ __host__ __forceinline__ char all(char v) {return v;} | ||||
|         static __device__ __host__ __forceinline__ char make(char x) {return x;} | ||||
|         static __device__ __host__ __forceinline__ char make(const char* x) {return *x;} | ||||
|     }; | ||||
|     template<> struct VecTraits<schar> | ||||
|     { | ||||
|         typedef schar elem_type; | ||||
|         enum {cn=1}; | ||||
|         static __device__ __host__ __forceinline__ schar all(schar v) {return v;} | ||||
|         static __device__ __host__ __forceinline__ schar make(schar x) {return x;} | ||||
|         static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;} | ||||
|     }; | ||||
|     template<> struct VecTraits<char1> | ||||
|     { | ||||
|         typedef schar elem_type; | ||||
|         enum {cn=1}; | ||||
|         static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);} | ||||
|         static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);} | ||||
|         static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);} | ||||
|     }; | ||||
|     template<> struct VecTraits<char2> | ||||
|     { | ||||
|         typedef schar elem_type; | ||||
|         enum {cn=2}; | ||||
|         static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);} | ||||
|         static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);} | ||||
|         static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);} | ||||
|     }; | ||||
|     template<> struct VecTraits<char3> | ||||
|     { | ||||
|         typedef schar elem_type; | ||||
|         enum {cn=3}; | ||||
|         static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);} | ||||
|         static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);} | ||||
|         static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);} | ||||
|     }; | ||||
|     template<> struct VecTraits<char4> | ||||
|     { | ||||
|         typedef schar elem_type; | ||||
|         enum {cn=4}; | ||||
|         static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);} | ||||
|         static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);} | ||||
|         static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);} | ||||
|     }; | ||||
|     template<> struct VecTraits<char8> | ||||
|     { | ||||
|         typedef schar elem_type; | ||||
|         enum {cn=8}; | ||||
|         static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);} | ||||
|         static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);} | ||||
|         static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} | ||||
|     }; | ||||
| }}} // namespace cv { namespace cuda { namespace cudev | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_VEC_TRAITS_HPP__ | ||||
							
								
								
									
										139
									
								
								3rdparty/include/opencv2/core/cuda/warp.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										139
									
								
								3rdparty/include/opencv2/core/cuda/warp.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,139 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_DEVICE_WARP_HPP__ | ||||
| #define __OPENCV_CUDA_DEVICE_WARP_HPP__ | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     struct Warp | ||||
|     { | ||||
|         enum | ||||
|         { | ||||
|             LOG_WARP_SIZE = 5, | ||||
|             WARP_SIZE     = 1 << LOG_WARP_SIZE, | ||||
|             STRIDE        = WARP_SIZE | ||||
|         }; | ||||
|  | ||||
|         /** \brief Returns the warp lane ID of the calling thread. */ | ||||
|         static __device__ __forceinline__ unsigned int laneId() | ||||
|         { | ||||
|             unsigned int ret; | ||||
|             asm("mov.u32 %0, %laneid;" : "=r"(ret) ); | ||||
|             return ret; | ||||
|         } | ||||
|  | ||||
|         template<typename It, typename T> | ||||
|         static __device__ __forceinline__ void fill(It beg, It end, const T& value) | ||||
|         { | ||||
|             for(It t = beg + laneId(); t < end; t += STRIDE) | ||||
|                 *t = value; | ||||
|         } | ||||
|  | ||||
|         template<typename InIt, typename OutIt> | ||||
|         static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out) | ||||
|         { | ||||
|             for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE) | ||||
|                 *out = *t; | ||||
|             return out; | ||||
|         } | ||||
|  | ||||
|         template<typename InIt, typename OutIt, class UnOp> | ||||
|         static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op) | ||||
|         { | ||||
|             for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE) | ||||
|                 *out = op(*t); | ||||
|             return out; | ||||
|         } | ||||
|  | ||||
|         template<typename InIt1, typename InIt2, typename OutIt, class BinOp> | ||||
|         static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op) | ||||
|         { | ||||
|             unsigned int lane = laneId(); | ||||
|  | ||||
|             InIt1 t1 = beg1 + lane; | ||||
|             InIt2 t2 = beg2 + lane; | ||||
|             for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE) | ||||
|                 *out = op(*t1, *t2); | ||||
|             return out; | ||||
|         } | ||||
|  | ||||
|         template <class T, class BinOp> | ||||
|         static __device__ __forceinline__ T reduce(volatile T *ptr, BinOp op) | ||||
|         { | ||||
|             const unsigned int lane = laneId(); | ||||
|  | ||||
|             if (lane < 16) | ||||
|             { | ||||
|                 T partial = ptr[lane]; | ||||
|  | ||||
|                 ptr[lane] = partial = op(partial, ptr[lane + 16]); | ||||
|                 ptr[lane] = partial = op(partial, ptr[lane + 8]); | ||||
|                 ptr[lane] = partial = op(partial, ptr[lane + 4]); | ||||
|                 ptr[lane] = partial = op(partial, ptr[lane + 2]); | ||||
|                 ptr[lane] = partial = op(partial, ptr[lane + 1]); | ||||
|             } | ||||
|  | ||||
|             return *ptr; | ||||
|         } | ||||
|  | ||||
|         template<typename OutIt, typename T> | ||||
|         static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value) | ||||
|         { | ||||
|             unsigned int lane = laneId(); | ||||
|             value += lane; | ||||
|  | ||||
|             for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE) | ||||
|                 *t = value; | ||||
|         } | ||||
|     }; | ||||
| }}} // namespace cv { namespace cuda { namespace cudev | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif /* __OPENCV_CUDA_DEVICE_WARP_HPP__ */ | ||||
							
								
								
									
										76
									
								
								3rdparty/include/opencv2/core/cuda/warp_reduce.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								3rdparty/include/opencv2/core/cuda/warp_reduce.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,76 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef OPENCV_CUDA_WARP_REDUCE_HPP__ | ||||
| #define OPENCV_CUDA_WARP_REDUCE_HPP__ | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     template <class T> | ||||
|     __device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x) | ||||
|     { | ||||
|         const unsigned int lane = tid & 31; // index of thread in warp (0..31) | ||||
|  | ||||
|         if (lane < 16) | ||||
|         { | ||||
|             T partial = ptr[tid]; | ||||
|  | ||||
|             ptr[tid] = partial = partial + ptr[tid + 16]; | ||||
|             ptr[tid] = partial = partial + ptr[tid + 8]; | ||||
|             ptr[tid] = partial = partial + ptr[tid + 4]; | ||||
|             ptr[tid] = partial = partial + ptr[tid + 2]; | ||||
|             ptr[tid] = partial = partial + ptr[tid + 1]; | ||||
|         } | ||||
|  | ||||
|         return ptr[tid - lane]; | ||||
|     } | ||||
| }}} // namespace cv { namespace cuda { namespace cudev { | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif /* OPENCV_CUDA_WARP_REDUCE_HPP__ */ | ||||
							
								
								
									
										153
									
								
								3rdparty/include/opencv2/core/cuda/warp_shuffle.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										153
									
								
								3rdparty/include/opencv2/core/cuda/warp_shuffle.hpp
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,153 @@ | ||||
| /*M/////////////////////////////////////////////////////////////////////////////////////// | ||||
| // | ||||
| //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. | ||||
| // | ||||
| //  By downloading, copying, installing or using the software you agree to this license. | ||||
| //  If you do not agree to this license, do not download, install, | ||||
| //  copy or use the software. | ||||
| // | ||||
| // | ||||
| //                           License Agreement | ||||
| //                For Open Source Computer Vision Library | ||||
| // | ||||
| // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. | ||||
| // Copyright (C) 2009, Willow Garage Inc., all rights reserved. | ||||
| // Third party copyrights are property of their respective owners. | ||||
| // | ||||
| // Redistribution and use in source and binary forms, with or without modification, | ||||
| // are permitted provided that the following conditions are met: | ||||
| // | ||||
| //   * Redistribution's of source code must retain the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer. | ||||
| // | ||||
| //   * Redistribution's in binary form must reproduce the above copyright notice, | ||||
| //     this list of conditions and the following disclaimer in the documentation | ||||
| //     and/or other materials provided with the distribution. | ||||
| // | ||||
| //   * The name of the copyright holders may not be used to endorse or promote products | ||||
| //     derived from this software without specific prior written permission. | ||||
| // | ||||
| // This software is provided by the copyright holders and contributors "as is" and | ||||
| // any express or implied warranties, including, but not limited to, the implied | ||||
| // warranties of merchantability and fitness for a particular purpose are disclaimed. | ||||
| // In no event shall the Intel Corporation or contributors be liable for any direct, | ||||
| // indirect, incidental, special, exemplary, or consequential damages | ||||
| // (including, but not limited to, procurement of substitute goods or services; | ||||
| // loss of use, data, or profits; or business interruption) however caused | ||||
| // and on any theory of liability, whether in contract, strict liability, | ||||
| // or tort (including negligence or otherwise) arising in any way out of | ||||
| // the use of this software, even if advised of the possibility of such damage. | ||||
| // | ||||
| //M*/ | ||||
|  | ||||
| #ifndef __OPENCV_CUDA_WARP_SHUFFLE_HPP__ | ||||
| #define __OPENCV_CUDA_WARP_SHUFFLE_HPP__ | ||||
|  | ||||
| /** @file | ||||
|  * @deprecated Use @ref cudev instead. | ||||
|  */ | ||||
|  | ||||
| //! @cond IGNORED | ||||
|  | ||||
| namespace cv { namespace cuda { namespace device | ||||
| { | ||||
|     template <typename T> | ||||
|     __device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize) | ||||
|     { | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         return __shfl(val, srcLane, width); | ||||
|     #else | ||||
|         return T(); | ||||
|     #endif | ||||
|     } | ||||
|     __device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize) | ||||
|     { | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         return (unsigned int) __shfl((int) val, srcLane, width); | ||||
|     #else | ||||
|         return 0; | ||||
|     #endif | ||||
|     } | ||||
|     __device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize) | ||||
|     { | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         int lo = __double2loint(val); | ||||
|         int hi = __double2hiint(val); | ||||
|  | ||||
|         lo = __shfl(lo, srcLane, width); | ||||
|         hi = __shfl(hi, srcLane, width); | ||||
|  | ||||
|         return __hiloint2double(hi, lo); | ||||
|     #else | ||||
|         return 0.0; | ||||
|     #endif | ||||
|     } | ||||
|  | ||||
|     template <typename T> | ||||
|     __device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize) | ||||
|     { | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         return __shfl_down(val, delta, width); | ||||
|     #else | ||||
|         return T(); | ||||
|     #endif | ||||
|     } | ||||
|     __device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize) | ||||
|     { | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         return (unsigned int) __shfl_down((int) val, delta, width); | ||||
|     #else | ||||
|         return 0; | ||||
|     #endif | ||||
|     } | ||||
|     __device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize) | ||||
|     { | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         int lo = __double2loint(val); | ||||
|         int hi = __double2hiint(val); | ||||
|  | ||||
|         lo = __shfl_down(lo, delta, width); | ||||
|         hi = __shfl_down(hi, delta, width); | ||||
|  | ||||
|         return __hiloint2double(hi, lo); | ||||
|     #else | ||||
|         return 0.0; | ||||
|     #endif | ||||
|     } | ||||
|  | ||||
|     template <typename T> | ||||
|     __device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize) | ||||
|     { | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         return __shfl_up(val, delta, width); | ||||
|     #else | ||||
|         return T(); | ||||
|     #endif | ||||
|     } | ||||
|     __device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize) | ||||
|     { | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         return (unsigned int) __shfl_up((int) val, delta, width); | ||||
|     #else | ||||
|         return 0; | ||||
|     #endif | ||||
|     } | ||||
|     __device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize) | ||||
|     { | ||||
|     #if __CUDA_ARCH__ >= 300 | ||||
|         int lo = __double2loint(val); | ||||
|         int hi = __double2hiint(val); | ||||
|  | ||||
|         lo = __shfl_up(lo, delta, width); | ||||
|         hi = __shfl_up(hi, delta, width); | ||||
|  | ||||
|         return __hiloint2double(hi, lo); | ||||
|     #else | ||||
|         return 0.0; | ||||
|     #endif | ||||
|     } | ||||
| }}} | ||||
|  | ||||
| //! @endcond | ||||
|  | ||||
| #endif // __OPENCV_CUDA_WARP_SHUFFLE_HPP__ | ||||
		Reference in New Issue
	
	Block a user