@@ -68,9 +68,23 @@ namespace cv { namespace cuda { namespace device
6868 }
6969 }
7070
71+ template <typename Ptr2D, typename T> __global__ void remap_relative (const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, PtrStepSz<T> dst)
72+ {
73+ const int x = blockDim .x * blockIdx .x + threadIdx .x ;
74+ const int y = blockDim .y * blockIdx .y + threadIdx .y ;
75+
76+ if (x < dst.cols && y < dst.rows )
77+ {
78+ const float xcoo = x+mapx.ptr (y)[x];
79+ const float ycoo = y+mapy.ptr (y)[x];
80+
81+ dst.ptr (y)[x] = saturate_cast<T>(src (ycoo, xcoo));
82+ }
83+ }
84+
7185 template <template <typename > class Filter , template <typename > class B , typename T> struct RemapDispatcherStream
7286 {
73- static void call (PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float * borderValue, cudaStream_t stream, bool )
87+ static void call (PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float * borderValue, cudaStream_t stream, bool , bool isRelative )
7488 {
7589 typedef typename TypeVec<float , VecTraits<T>::cn>::vec_type work_type;
7690
@@ -81,14 +95,17 @@ namespace cv { namespace cuda { namespace device
8195 BorderReader<PtrStep<T>, B<work_type>> brdSrc (src, brd);
8296 Filter<BorderReader<PtrStep<T>, B<work_type>>> filter_src (brdSrc);
8397
84- remap<<<grid, block, 0 , stream>>> (filter_src, mapx, mapy, dst);
98+ if (isRelative)
99+ remap_relative<<<grid, block, 0 , stream>>> (filter_src, mapx, mapy, dst);
100+ else
101+ remap<<<grid, block, 0 , stream>>> (filter_src, mapx, mapy, dst);
85102 cudaSafeCall ( cudaGetLastError () );
86103 }
87104 };
88105
89106 template <template <typename > class Filter , template <typename > class B , typename T> struct RemapDispatcherNonStream
90107 {
91- static void call (PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float * borderValue, bool )
108+ static void call (PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float * borderValue, bool , bool isRelative )
92109 {
93110 CV_UNUSED (srcWhole);
94111 CV_UNUSED (xoff);
@@ -102,7 +119,10 @@ namespace cv { namespace cuda { namespace device
102119 BorderReader<PtrStep<T>, B<work_type>> brdSrc (src, brd);
103120 Filter<BorderReader<PtrStep<T>, B<work_type>>> filter_src (brdSrc);
104121
105- remap<<<grid, block>>> (filter_src, mapx, mapy, dst);
122+ if (isRelative)
123+ remap_relative<<<grid, block>>> (filter_src, mapx, mapy, dst);
124+ else
125+ remap<<<grid, block>>> (filter_src, mapx, mapy, dst);
106126 cudaSafeCall ( cudaGetLastError () );
107127
108128 cudaSafeCall ( cudaDeviceSynchronize () );
@@ -112,7 +132,7 @@ namespace cv { namespace cuda { namespace device
112132 template <template <typename > class Filter , template <typename > class B , typename T> struct RemapDispatcherNonStreamTex
113133 {
114134 static void call (PtrStepSz< T > src, PtrStepSz< T > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
115- PtrStepSz< T > dst, const float * borderValue, bool cc20)
135+ PtrStepSz< T > dst, const float * borderValue, bool cc20, bool isRelative )
116136 {
117137 typedef typename TypeVec<float , VecTraits< T >::cn>::vec_type work_type;
118138 dim3 block (32 , cc20 ? 8 : 4 );
@@ -123,15 +143,21 @@ namespace cv { namespace cuda { namespace device
123143 B<work_type> brd (src.rows , src.cols , VecTraits<work_type>::make (borderValue));
124144 BorderReader<cudev::TexturePtr<T>, B<work_type>> brdSrc (texSrcWhole, brd);
125145 Filter<BorderReader<cudev::TexturePtr<T>, B<work_type>>> filter_src (brdSrc);
126- remap<<<grid, block>>> (filter_src, mapx, mapy, dst);
146+ if (isRelative)
147+ remap_relative<<<grid, block>>> (filter_src, mapx, mapy, dst);
148+ else
149+ remap<<<grid, block>>> (filter_src, mapx, mapy, dst);
127150
128151 }
129152 else {
130153 cudev::TextureOff<T> texSrcWhole (srcWhole, yoff, xoff);
131154 B<work_type> brd (src.rows , src.cols , VecTraits<work_type>::make (borderValue));
132155 BorderReader<cudev::TextureOffPtr<T>, B<work_type>> brdSrc (texSrcWhole, brd);
133156 Filter<BorderReader<cudev::TextureOffPtr<T>, B<work_type>>> filter_src (brdSrc);
134- remap<<<grid, block >>> (filter_src, mapx, mapy, dst);
157+ if (isRelative)
158+ remap_relative<<<grid, block>>> (filter_src, mapx, mapy, dst);
159+ else
160+ remap<<<grid, block >>> (filter_src, mapx, mapy, dst);
135161 }
136162
137163 cudaSafeCall ( cudaGetLastError () );
@@ -142,23 +168,29 @@ namespace cv { namespace cuda { namespace device
142168 template <template <typename > class Filter , typename T> struct RemapDispatcherNonStreamTex <Filter, BrdReplicate, T>
143169 {
144170 static void call (PtrStepSz< T > src, PtrStepSz< T > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
145- PtrStepSz< T > dst, const float *, bool )
171+ PtrStepSz< T > dst, const float *, bool , bool isRelative )
146172 {
147173 dim3 block (32 , 8 );
148174 dim3 grid (divUp (dst.cols , block.x ), divUp (dst.rows , block.y ));
149175 if (srcWhole.cols == src.cols && srcWhole.rows == src.rows )
150176 {
151177 cudev::Texture<T> texSrcWhole (srcWhole);
152178 Filter<cudev::TexturePtr<T>> filter_src (texSrcWhole);
153- remap<<<grid, block>>> (filter_src, mapx, mapy, dst);
179+ if (isRelative)
180+ remap_relative<<<grid, block>>> (filter_src, mapx, mapy, dst);
181+ else
182+ remap<<<grid, block>>> (filter_src, mapx, mapy, dst);
154183 }
155184 else
156185 {
157186 cudev::TextureOff<T> texSrcWhole (srcWhole, yoff, xoff);
158187 BrdReplicate<T> brd (src.rows , src.cols );
159188 BorderReader<cudev::TextureOffPtr<T>, BrdReplicate<T>> brdSrc (texSrcWhole, brd);
160189 Filter<BorderReader<cudev::TextureOffPtr<T>, BrdReplicate<T>>> filter_src (brdSrc);
161- remap<<<grid, block>>> (filter_src, mapx, mapy, dst);
190+ if (isRelative)
191+ remap_relative<<<grid, block>>> (filter_src, mapx, mapy, dst);
192+ else
193+ remap<<<grid, block>>> (filter_src, mapx, mapy, dst);
162194 }
163195 cudaSafeCall ( cudaGetLastError () );
164196 cudaSafeCall ( cudaDeviceSynchronize () );
@@ -203,20 +235,20 @@ namespace cv { namespace cuda { namespace device
203235 template <template <typename > class Filter , template <typename > class B , typename T> struct RemapDispatcher
204236 {
205237 static void call (PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
206- PtrStepSz<T> dst, const float * borderValue, cudaStream_t stream, bool cc20)
238+ PtrStepSz<T> dst, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative )
207239 {
208240 if (stream == 0 )
209- RemapDispatcherNonStream<Filter, B, T>::call (src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc20);
241+ RemapDispatcherNonStream<Filter, B, T>::call (src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc20, isRelative );
210242 else
211- RemapDispatcherStream<Filter, B, T>::call (src, mapx, mapy, dst, borderValue, stream, cc20);
243+ RemapDispatcherStream<Filter, B, T>::call (src, mapx, mapy, dst, borderValue, stream, cc20, isRelative );
212244 }
213245 };
214246
215247 template <typename T> void remap_gpu (PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
216- PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20)
248+ PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative )
217249 {
218250 typedef void (*caller_t )(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
219- PtrStepSz<T> dst, const float * borderValue, cudaStream_t stream, bool cc20);
251+ PtrStepSz<T> dst, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
220252
221253 static const caller_t callers[3 ][5 ] =
222254 {
@@ -244,24 +276,24 @@ namespace cv { namespace cuda { namespace device
244276 };
245277
246278 callers[interpolation][borderMode](static_cast <PtrStepSz<T>>(src), static_cast <PtrStepSz<T>>(srcWhole), xoff, yoff, xmap, ymap,
247- static_cast <PtrStepSz<T>>(dst), borderValue, stream, cc20);
279+ static_cast <PtrStepSz<T>>(dst), borderValue, stream, cc20, isRelative );
248280 }
249281
250- template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
251- template void remap_gpu<uchar3 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
252- template void remap_gpu<uchar4 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
282+ template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
283+ template void remap_gpu<uchar3 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
284+ template void remap_gpu<uchar4 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
253285
254- template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
255- template void remap_gpu<ushort3 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
256- template void remap_gpu<ushort4 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
286+ template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
287+ template void remap_gpu<ushort3 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
288+ template void remap_gpu<ushort4 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
257289
258- template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
259- template void remap_gpu<short3 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
260- template void remap_gpu<short4 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
290+ template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
291+ template void remap_gpu<short3 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
292+ template void remap_gpu<short4 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
261293
262- template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
263- template void remap_gpu<float3 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
264- template void remap_gpu<float4 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
294+ template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
295+ template void remap_gpu<float3 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
296+ template void remap_gpu<float4 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
265297 } // namespace imgproc
266298}}} // namespace cv { namespace cuda { namespace cudev
267299
0 commit comments