@@ -134,17 +134,15 @@ Base.collect(X::AnyGPUArray) = collect_to_cpu(X)
134134
135135# memory copying
136136
137+ # expects the GPU array type to have linear `copyto!` methods (i.e. accepting an integer
138+ # offset and length) from and to CPU arrays and between GPU arrays.
139+
137140function Base. copy! (dst:: AbstractGPUVector , src:: AbstractGPUVector )
138141 axes (dst) == axes (src) || throw (ArgumentError (
139142 " arrays must have the same axes for `copy!`. consider using `copyto!` instead" ))
140143 copyto! (dst, src)
141144end
142145
143- # # basic linear copies of identically-typed memory
144-
145- # expects the GPU array type to have linear `copyto!` methods (i.e. accepting an integer
146- # offset and length) from and to CPU arrays and between GPU arrays.
147-
148146for (D, S) in ((AnyGPUArray, Array),
149147 (Array, AnyGPUArray),
150148 (AnyGPUArray, AnyGPUArray))
@@ -156,18 +154,6 @@ for (D, S) in ((AnyGPUArray, Array),
156154 copyto! (dest, drange, src, srange)
157155 end
158156
159- function Base. copyto! (dest:: $D , d_range:: CartesianIndices{1} ,
160- src:: $S , s_range:: CartesianIndices{1} )
161- len = length (d_range)
162- if length (s_range) != len
163- throw (ArgumentError (" Copy range needs same length. Found: dest: $len , src: $(length (s_range)) " ))
164- end
165- len == 0 && return dest
166- d_offset = first (d_range)[1 ]
167- s_offset = first (s_range)[1 ]
168- copyto! (dest, d_offset, src, s_offset, len)
169- end
170-
171157 Base. copyto! (dest:: $D , src:: $S ) = copyto! (dest, 1 , src, 1 , length (src))
172158 end
173159end
@@ -260,6 +246,13 @@ function Base.copyto!(dest::AnyGPUArray{<:Any, N}, destcrange::CartesianIndices{
260246 len = length (destcrange)
261247 len == 0 && return dest
262248
249+ # linear copy if we can
250+ if N == 1
251+ d_offset = first (destcrange)[1 ]
252+ s_offset = first (srccrange)[1 ]
253+ return copyto! (dest, d_offset, src, s_offset, len)
254+ end
255+
263256 dest_offsets = first (destcrange) - oneunit (CartesianIndex{N})
264257 src_offsets = first (srccrange) - oneunit (CartesianIndex{N})
265258 gpu_call (cartesian_copy_kernel!,
@@ -275,6 +268,15 @@ for (dstTyp, srcTyp) in (AbstractGPUArray=>Array, Array=>AbstractGPUArray)
275268 if size (dstrange) != size (srcrange)
276269 throw (ArgumentError (" source and destination must have same size (got $(size (srcrange)) and $(size (dstrange)) )" ))
277270 end
271+ len = length (dstrange)
272+ len == 0 && return dest
273+
274+ # linear copy if we can
275+ if N == 1
276+ d_offset = first (dstrange)[1 ]
277+ s_offset = first (srcrange)[1 ]
278+ return copyto! (dst, d_offset, src, s_offset, len)
279+ end
278280
279281 # figure out how many dimensions of the Cartesian ranges map onto contiguous memory
280282 # in both source and destination. we will copy these one by one as linear ranges.
0 commit comments