@@ -6,6 +6,7 @@ immutable Butterfly{T} <: Factorization{T}
66 temp1:: Vector{T}
77 temp2:: Vector{T}
88 temp3:: Vector{T}
9+ temp4:: Vector{T}
910end
1011
1112function size (B:: Butterfly , dim:: Integer )
@@ -105,7 +106,7 @@ function Butterfly{T}(A::AbstractMatrix{T}, L::Int; isorthogonal::Bool = false,
105106
106107 kk = sumkmax (indices)
107108
108- Butterfly (columns, factors, permutations, indices, zeros (T, kk), zeros (T, kk), zeros (T, kk))
109+ Butterfly (columns, factors, permutations, indices, zeros (T, kk), zeros (T, kk), zeros (T, kk), zeros (T, kk) )
109110end
110111
111112function sumkmax (indices:: Vector{Vector{Int}} )
@@ -150,11 +151,30 @@ function rowperm!(fwd::Bool, x::StridedVecOrMat, p::Vector{Int}, jstart::Int)
150151 x
151152end
152153
154+ function rowperm! (fwd:: Bool , y:: StridedVector , x:: StridedVector , p:: Vector{Int} , jstart:: Int )
155+ n = length (p)
156+ jshift = jstart- 1
157+ @inbounds if (fwd)
158+ @simd for i = 1 : n
159+ y[jshift+ i] = x[jshift+ p[i]]
160+ end
161+ else
162+ @simd for i = 1 : n
163+ y[jshift+ p[i]] = x[jshift+ i]
164+ end
165+ end
166+ y
167+ end
168+
153169# # ColumnPermutation
154170A_mul_B! (A:: ColPerm , B:: StridedVecOrMat , jstart:: Int ) = rowperm! (false , B, A. p, jstart)
155171At_mul_B! (A:: ColPerm , B:: StridedVecOrMat , jstart:: Int ) = rowperm! (true , B, A. p, jstart)
156172Ac_mul_B! (A:: ColPerm , B:: StridedVecOrMat , jstart:: Int ) = At_mul_B! (A, B, jstart)
157173
174+ A_mul_B! (y:: StridedVector , A:: ColPerm , x:: StridedVector , jstart:: Int ) = rowperm! (false , y, x, A. p, jstart)
175+ At_mul_B! (y:: StridedVector , A:: ColPerm , x:: StridedVector , jstart:: Int ) = rowperm! (true , y, x, A. p, jstart)
176+ Ac_mul_B! (y:: StridedVector , A:: ColPerm , x:: StridedVector , jstart:: Int ) = At_mul_B! (y, x, A, jstart)
177+
158178# Fast A_mul_B!, At_mul_B!, and Ac_mul_B! for an ID. These overwrite the output.
159179
160180function A_mul_B! {T} (y:: AbstractVecOrMat{T} , A:: IDPackedV{T} , P:: ColumnPermutation , x:: AbstractVecOrMat{T} , istart:: Int , jstart:: Int )
@@ -166,6 +186,14 @@ function A_mul_B!{T}(y::AbstractVecOrMat{T}, A::IDPackedV{T}, P::ColumnPermutati
166186 y
167187end
168188
189+ function A_mul_B! {T} (y:: AbstractVector{T} , A:: IDPackedV{T} , P:: ColumnPermutation , x:: AbstractVector{T} , temp:: AbstractVector{T} , istart:: Int , jstart:: Int )
190+ k, n = size (A)
191+ At_mul_B! (temp, P, x, jstart)
192+ copy! (y, istart, temp, jstart, k)
193+ A_mul_B! (y, A. T, temp, istart, jstart+ k)
194+ y
195+ end
196+
169197for f! in (:At_mul_B! , :Ac_mul_B! )
170198 @eval begin
171199 function $f! {T} (y:: AbstractVecOrMat{T} , A:: IDPackedV{T} , P:: ColumnPermutation , x:: AbstractVecOrMat{T} , istart:: Int , jstart:: Int )
@@ -175,6 +203,14 @@ for f! in (:At_mul_B!, :Ac_mul_B!)
175203 A_mul_B! (P, y, istart)
176204 y
177205 end
206+
207+ function $f! {T} (y:: AbstractVector{T} , A:: IDPackedV{T} , P:: ColumnPermutation , x:: AbstractVector{T} , temp:: AbstractVector{T} , istart:: Int , jstart:: Int )
208+ k, n = size (A)
209+ copy! (temp, istart, x, jstart, k)
210+ $ f! (temp, A. T, x, istart+ k, jstart)
211+ A_mul_B! (y, P, temp, istart)
212+ y
213+ end
178214 end
179215end
180216
@@ -194,6 +230,7 @@ function A_mul_B_col_J!{T}(u::VecOrMat{T}, B::Butterfly{T}, b::VecOrMat{T}, J::I
194230
195231 temp1 = B. temp1
196232 temp2 = B. temp2
233+ temp3 = B. temp3
197234 fill! (temp1, zero (T))
198235 fill! (temp2, zero (T))
199236
@@ -217,7 +254,7 @@ function A_mul_B_col_J!{T}(u::VecOrMat{T}, B::Butterfly{T}, b::VecOrMat{T}, J::I
217254 for i = 1 : ii
218255 shft = 2 jj* div (ctr,2 jj)
219256 for j = 1 : jj
220- A_mul_B! (temp2, factors[j+ ctr], permutations[j+ ctr], temp1, indsout[j+ ctr], indsin[2 j+ shft- 1 ])
257+ A_mul_B! (temp2, factors[j+ ctr], permutations[j+ ctr], temp1, temp3, indsout[j+ ctr], indsin[2 j+ shft- 1 ])
221258 end
222259 ctr += jj
223260 end
@@ -252,6 +289,7 @@ for f! in (:At_mul_B!,:Ac_mul_B!)
252289 temp1 = B. temp1
253290 temp2 = B. temp2
254291 temp3 = B. temp3
292+ temp4 = B. temp4
255293 fill! (temp1, zero (T))
256294 fill! (temp2, zero (T))
257295 fill! (temp3, zero (T))
@@ -274,8 +312,9 @@ for f! in (:At_mul_B!,:Ac_mul_B!)
274312 ctr = 0
275313 for i = 1 : ii
276314 shft = 2 jj* div (ctr,2 jj)
315+ fill! (temp4, zero (T))
277316 for j = 1 : jj
278- $ f! (temp3, factors[j+ ctr], permutations[j+ ctr], temp1, indsout[2 j+ shft- 1 ], indsin[j+ ctr])
317+ $ f! (temp3, factors[j+ ctr], permutations[j+ ctr], temp1, temp4, indsout[2 j+ shft- 1 ], indsin[j+ ctr])
279318 addtemp3totemp2! (temp2, temp3, indsout[2 j+ shft- 1 ], indsout[2 j+ shft+ 1 ]- 1 )
280319 end
281320 ctr += jj
0 commit comments