Skip to content

Commit a64fc46

Browse files
committed
fix some issues in stats - reduce allocations
1 parent 8b5fbde commit a64fc46

File tree

2 files changed

+55
-6
lines changed

2 files changed

+55
-6
lines changed

src/stat/non_hp_stat.jl

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,6 @@ function stat_maximum(f::typeof(identity), x::AbstractArray{T,1}; lo=1, hi=lengt
141141
Base.mapreduce_impl(_dmiss, max, x, lo, hi)
142142
end
143143
function stat_maximum(f::F, x::AbstractArray{T,1}; lo=1, hi=length(x)) where {F,T}
144-
all(ismissing, view(x, lo:hi)) && return missing
145144
Base.mapreduce_impl(f, _stat_max_fun, x, lo, hi)
146145
end
147146
stat_maximum(x::AbstractArray{T,1}; lo=1, hi=length(x)) where {T} = stat_maximum(identity, x; lo=lo, hi=hi)
@@ -166,7 +165,6 @@ function stat_minimum(f::typeof(identity), x::AbstractArray{T,1}; lo=1, hi=lengt
166165
Base.mapreduce_impl(_dmiss, min, x, lo, hi)
167166
end
168167
function stat_minimum(f::F, x::AbstractArray{T,1}; lo=1, hi=length(x)) where {F,T}
169-
all(ismissing, view(x, lo:hi)) && return missing
170168
Base.mapreduce_impl(f, _stat_min_fun, x, lo, hi)
171169
end
172170
stat_minimum(x::AbstractArray{T,1}; lo=1, hi=length(x)) where {T} = stat_minimum(identity, x; lo=lo, hi=hi)
@@ -180,9 +178,7 @@ stat_findmin(x::AbstractArray{T,1}) where {T} = stat_findmin(identity, x)
180178

181179

182180
function stat_sum(f, x::AbstractArray{T,1}; lo=1, hi=length(x)) where {T<:Union{Missing,INTEGERS,FLOATS}}
183-
all(ismissing, view(x, lo:hi)) && return f(first(x))
184-
_dmiss(y) = ifelse(ismissing(f(y)), zero(T), f(y))
185-
Base.mapreduce_impl(_dmiss, _stat_add_sum, x, lo, hi)
181+
Base.mapreduce_impl(f, _stat_add_sum, x, lo, hi)
186182
end
187183
stat_sum(x::AbstractArray{T,1}; lo=1, hi=length(x)) where {T<:Union{Missing,INTEGERS,FLOATS}} = stat_sum(identity, x; lo=lo, hi=hi)
188184

@@ -300,7 +296,7 @@ stat_wmean(x::AbstractVector{T}, w::AbstractArray{S,1}) where {T} where {S} = st
300296
_abs2_var_barrier(x,y,f::F) where F = abs2(f(x)-y)
301297
_meanval_var_barrier(n, sval)::Union{Missing, Float64} = n == 0 ? missing : sval / n
302298
function stat_var(f, x::AbstractArray{T,1}, dof=true)::Union{Float64,Missing} where {T<:Union{Missing,INTEGERS,FLOATS}}
303-
all(ismissing, x) && return missing
299+
# all(ismissing, x) && return missing
304300
# any(ISNAN, x) && return convert(eltype(x), NaN)
305301
# meanval = stat_mean(f, x)
306302
# n = mapreduce(!ismissing∘f, +, x)
@@ -345,6 +341,7 @@ function stat_median(v::AbstractArray{T,1}) where {T}
345341
end
346342
end
347343

344+
# TODO in julia1.9+ partialsort! allocates, and it is not a good idea if we need to call stat_median! many times
348345
function stat_median!(v::AbstractArray{T,1}) where {T}
349346
isempty(v) && throw(ArgumentError("median of an empty array is undefined, $(repr(v))"))
350347
all(ismissing, v) && return missing

test/stats.jl

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,4 +163,56 @@ end
163163
@test isequal(IMD.cumprod(x4, missings = :skip), [missing,missing,missing,2])
164164
@test isequal(IMD.cumprod(x5, missings = :skip), [missing,missing,-9.0,-18.0])
165165
@test isequal(IMD.cumprod(x6, missings = :skip), [missing,missing, missing, missing])
166+
end
167+
@testset "IMD.sum & IMD.mean & IMD.var" begin
168+
x = Union{Missing, Int32}[missing, missing, missing, missing]
169+
@test isequal(IMD.sum(x), missing)
170+
@test IMD.sum(y->ismissing(y) ? 1 : y, x) == 4
171+
push!(x, 1)
172+
@test IMD.sum(x) == 1
173+
@test IMD.sum(y->ismissing(y) ? 1 : y, x) == 5
174+
175+
@test IMD.mean(x) == 1
176+
@test ismissing(IMD.mean(y->isequal(y,1) ? missing : y, x) )
177+
@test IMD.mean(y->ismissing(y) ? 1 : y, x) == 1
178+
179+
@test isequal(IMD.var(x),missing)
180+
@test isequal(IMD.var(x, false), 0.0)
181+
182+
@test isequal(IMD.var(y->ismissing(y) ? 1 : y, x), 0.0)
183+
@test isequal(IMD.var(y->ismissing(y) ? 1 : y, x, false), 0.0)
184+
185+
x = [true, false, true, missing]
186+
@test IMD.sum(x) == 2
187+
@test IMD.sum(y->isequal(y, true) ? 100 : y, x) == 200
188+
189+
for i in 1:10
190+
x=rand(1:10000, 100)
191+
@test IMD.sum(x) == sum(x)
192+
x = allowmissing(x)
193+
x[50] = missing
194+
@test IMD.sum(y->ismissing(y) ? 0 : y, x) == sum(y->ismissing(y) ? 0 : y, x)
195+
end
196+
197+
x = rand(10)
198+
n_a = [@allocated IMD.sum(x) for _ in 1:10]
199+
@test n_a[end] <= 16
200+
201+
x = Union{Int32, Missing}[1,2,missing, 4]
202+
n_a = [@allocated IMD.sum(x) for _ in 1:10]
203+
@test n_a[end] == 0
204+
205+
n_a = [@allocated IMD.sum(y->ismissing(y) ? 0 : y, x) for _ in 1:10]
206+
@test n_a[end] <= 16
207+
208+
x = rand(10)
209+
n_a = [@allocated IMD.mean(x) for _ in 1:10]
210+
@test n_a[end] <= 16
211+
212+
x = Union{Int32, Missing}[1,2,missing, 4]
213+
n_a = [@allocated IMD.mean(x) for _ in 1:10]
214+
@test n_a[end] <= 16
215+
216+
n_a = [@allocated IMD.mean(y->ismissing(y) ? 0 : y, x) for _ in 1:10]
217+
@test n_a[end] <= 16
166218
end

0 commit comments

Comments
 (0)