Skip to content

Commit eafefa1

Browse files
committed
fix #67 - topk now can returns indices
1 parent dfe93e8 commit eafefa1

File tree

3 files changed

+249
-143
lines changed

3 files changed

+249
-143
lines changed

docs/src/man/missing.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ The following functions are also exported by InMemoryDatasets:
115115
* `lag!` : Replace its input with a lag-k values
116116
* `lead` : Create a lead-k of the provided vector
117117
* `lead!` : Replace its input with a lead-k values
118-
* `topk` : Return top(bottom) k values of a vector. It ignores `missing` values, unless all values are `missing` which it returns `[missing]`.
118+
* `topk` : Return top(bottom) k values of a vector. It ignores `missing` values, unless all values are `missing` which it returns `[missing]`. It can also return the indices of the top(bottom) k values.
119119

120120
and the following functions are not exported but are available via `dot` notation:
121121

src/abstractdataset/dscol.jl

Lines changed: 46 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# internal function for easy accessing a view of a column
2-
__!(col1::DatasetColumn) = col1.val
3-
__!(col1::SubDatasetColumn) = view(col1.val, col1.selected_index)
4-
const SubOrDSCol = Union{SubDatasetColumn, DatasetColumn}
2+
__!(col1::DatasetColumn) = col1.val
3+
__!(col1::SubDatasetColumn) = view(col1.val, col1.selected_index)
4+
const SubOrDSCol = Union{SubDatasetColumn,DatasetColumn}
55

66
# we treat DatasetColumn as a one-column data set. and we need to manage every thing ourselves
77
# we don't encourage people to use ds[!, 1] syntax, manipulating a column of a data set should happen in modify/!
@@ -76,71 +76,72 @@ Base.:(*)(col2::SubOrDSCol, col1::SubOrDSCol) = *(__!(col2), __!(col1))
7676
Base.:(+)(col2::SubOrDSCol, col1::SubOrDSCol) = +(__!(col2), __!(col1))
7777
Base.:(/)(col2::SubOrDSCol, col1::SubOrDSCol) = /(__!(col2), __!(col1))
7878
Base.:(-)(col2::SubOrDSCol, col1::SubOrDSCol) = -(__!(col2), __!(col1))
79-
function Base.convert(::Type{T}, x::T) where T<:DatasetColumn
79+
function Base.convert(::Type{T}, x::T) where {T<:DatasetColumn}
8080
x
8181
end
82-
function Base.convert(::Type{T}, x::T) where T<:SubDatasetColumn
82+
function Base.convert(::Type{T}, x::T) where {T<:SubDatasetColumn}
8383
x
8484
end
8585

8686
# threads is on for SubOrDSCol since it naturally shouldn't be used for unfavourable situations
87-
Base.maximum(f, col::SubOrDSCol; threads = true) = maximum(f, __!(col), threads = threads)
88-
Base.maximum(col::SubOrDSCol; threads = true) = maximum(identity, __!(col), threads = threads)
89-
Base.minimum(f, col::SubOrDSCol; threads = true) = minimum(f, __!(col), threads = threads)
90-
Base.minimum(col::SubOrDSCol; threads = true) = minimum(identity, __!(col), threads = threads)
91-
Base.sum(f, col::SubOrDSCol; threads = true) = sum(f, __!(col), threads = threads)
92-
Base.sum(col::SubOrDSCol; threads = true) = sum(identity, __!(col), threads = threads)
93-
Statistics.mean(f, col::SubOrDSCol) = mean(f, __!(col))
94-
Statistics.mean(col::SubOrDSCol) = mean(identity, __!(col))
95-
Statistics.var(f, col::SubOrDSCol, dof = true) = var(f, __!(col), dof)
96-
Statistics.var(col::SubOrDSCol, dof = true) = var(identity, __!(col), dof)
97-
Statistics.std(f, col::SubOrDSCol, dof = true) = std(f, __!(col), dof)
98-
Statistics.std(col::SubOrDSCol, dof = true) = std(identity, __!(col), dof)
99-
Statistics.median(col::SubOrDSCol) = median(__!(col))
100-
function Statistics.median!(col::SubOrDSCol)
87+
Base.maximum(f, col::SubOrDSCol; threads=true) = maximum(f, __!(col), threads=threads)
88+
Base.maximum(col::SubOrDSCol; threads=true) = maximum(identity, __!(col), threads=threads)
89+
Base.minimum(f, col::SubOrDSCol; threads=true) = minimum(f, __!(col), threads=threads)
90+
Base.minimum(col::SubOrDSCol; threads=true) = minimum(identity, __!(col), threads=threads)
91+
Base.sum(f, col::SubOrDSCol; threads=true) = sum(f, __!(col), threads=threads)
92+
Base.sum(col::SubOrDSCol; threads=true) = sum(identity, __!(col), threads=threads)
93+
mean(f, col::SubOrDSCol) = mean(f, __!(col))
94+
mean(col::SubOrDSCol) = mean(identity, __!(col))
95+
var(f, col::SubOrDSCol, dof=true) = var(f, __!(col), dof)
96+
var(col::SubOrDSCol, dof=true) = var(identity, __!(col), dof)
97+
std(f, col::SubOrDSCol, dof=true) = std(f, __!(col), dof)
98+
std(col::SubOrDSCol, dof=true) = std(identity, __!(col), dof)
99+
median(col::SubOrDSCol) = median(__!(col))
100+
function median!(col::SubOrDSCol)
101101
median!(__!(col))
102102
col.col index(parent(col.ds)).sortedcols && _reset_grouping_info!(parent(col.ds))
103103
_modified(_attributes(parent(col.ds)))
104104
col
105105
end
106-
Base.extrema(f, col::SubOrDSCol; threads = true) = extrema(f, __!(col), threads = threads)
107-
Base.extrema(col::SubOrDSCol; threads = true) = extrema(identity, __!(col), threads = threads)
108-
Base.argmax(col::SubOrDSCol; by = identity) = argmax(__!(col), by = by)
109-
Base.argmin(col::SubOrDSCol; by = identity) = argmin(__!(col), by = by)
106+
Base.extrema(f, col::SubOrDSCol; threads=true) = extrema(f, __!(col), threads=threads)
107+
Base.extrema(col::SubOrDSCol; threads=true) = extrema(identity, __!(col), threads=threads)
108+
Base.argmax(col::SubOrDSCol; by=identity) = argmax(__!(col), by=by)
109+
Base.argmin(col::SubOrDSCol; by=identity) = argmin(__!(col), by=by)
110110
Base.findmax(f, col::SubOrDSCol) = findmax(f, __!(col))
111111
Base.findmax(col::SubOrDSCol) = findmax(identity, __!(col))
112112
Base.findmin(f, col::SubOrDSCol) = findmin(f, __!(col))
113113
Base.findmin(col::SubOrDSCol) = findmin(identity, __!(col))
114-
Base.cumsum(col::SubOrDSCol; missings = :ignore) = cumsum(__!(col), missings = missings)
115-
Base.cumprod(col::SubOrDSCol; missings = :ignore) = cumprod(__!(col), missings = missings)
116-
cummin(col::SubOrDSCol; missings = :ignore) = cummin(__!(col), missings = missings)
117-
cummax(col::SubOrDSCol; missings = :ignore) = cummax(__!(col), missings = missings)
114+
Base.cumsum(col::SubOrDSCol; missings=:ignore) = cumsum(__!(col), missings=missings)
115+
Base.cumprod(col::SubOrDSCol; missings=:ignore) = cumprod(__!(col), missings=missings)
116+
cummin(col::SubOrDSCol; missings=:ignore) = cummin(__!(col), missings=missings)
117+
cummax(col::SubOrDSCol; missings=:ignore) = cummax(__!(col), missings=missings)
118118

119-
lag(col::SubOrDSCol; default = missing) = lag(__!(col), default = default)
120-
lag(col::SubOrDSCol, k; default = missing) = lag(__!(col), k, default = default)
121-
lead(col::SubOrDSCol; default = missing) = lead(__!(col), default = default)
122-
lead(col::SubOrDSCol, k; default = missing) = lead(__!(col), k, default = default)
119+
topk(col::SubOrDSCol, k; rev=false, output_indices = false) = topk(__!(col), k, rev=rev, output_indices = output_indices)
120+
lag(col::SubOrDSCol; default=missing) = lag(__!(col), default=default)
121+
lag(col::SubOrDSCol, k; default=missing) = lag(__!(col), k, default=default)
122+
lead(col::SubOrDSCol; default=missing) = lead(__!(col), default=default)
123+
lead(col::SubOrDSCol, k; default=missing) = lead(__!(col), k, default=default)
123124

124-
function lag!(col::SubOrDSCol; default = missing)
125-
lag!(__!(col), default = default)
125+
function lag!(col::SubOrDSCol; default=missing)
126+
lag!(__!(col), default=default)
126127
_modified(_attributes(parent(col.ds)))
127128
col.col index(parent(col.ds)).sortedcols && _reset_grouping_info!(parent(col.ds))
128129
col
129130
end
130-
function lag!(col::SubOrDSCol, k; default = missing)
131-
lag!(__!(col), k, default = default)
131+
function lag!(col::SubOrDSCol, k; default=missing)
132+
lag!(__!(col), k, default=default)
132133
_modified(_attributes(parent(col.ds)))
133134
col.col index(parent(col.ds)).sortedcols && _reset_grouping_info!(parent(col.ds))
134135
col
135136
end
136-
function lead!(col::SubOrDSCol; default = missing)
137-
lead!(__!(col), default = default)
137+
function lead!(col::SubOrDSCol; default=missing)
138+
lead!(__!(col), default=default)
138139
_modified(_attributes(parent(col.ds)))
139140
col.col index(parent(col.ds)).sortedcols && _reset_grouping_info!(parent(col.ds))
140141
col
141142
end
142-
function lead!(col::SubOrDSCol, k; default = missing)
143-
lead!(__!(col), k, default = default)
143+
function lead!(col::SubOrDSCol, k; default=missing)
144+
lead!(__!(col), k, default=default)
144145
_modified(_attributes(parent(col.ds)))
145146
col.col index(parent(col.ds)).sortedcols && _reset_grouping_info!(parent(col.ds))
146147
col
@@ -150,15 +151,15 @@ end
150151

151152
Base.Sort.defalg(col::SubOrDSCol) = Base.Sort.defalg(__!(col))
152153
function Base.sort!(col::SubOrDSCol; alg::Base.Sort.Algorithm=Base.Sort.defalg(col), lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
153-
sort!(__!(col), alg = alg, lt = lt, by = by, rev = rev, order = order)
154-
_modified(_attributes(parent(col.ds)))
155-
col.col index(parent(col.ds)).sortedcols && _reset_grouping_info!(parent(col.ds))
156-
col
154+
sort!(__!(col), alg=alg, lt=lt, by=by, rev=rev, order=order)
155+
_modified(_attributes(parent(col.ds)))
156+
col.col index(parent(col.ds)).sortedcols && _reset_grouping_info!(parent(col.ds))
157+
col
157158
end
158159
function Base.sort(col::SubOrDSCol; alg::Base.Sort.Algorithm=Base.Sort.defalg(col), lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
159-
sort(__!(col), alg = alg, lt = lt, by = by, rev = rev, order = order)
160+
sort(__!(col), alg=alg, lt=lt, by=by, rev=rev, order=order)
160161
end
161162

162163
function Base.sortperm(col::SubOrDSCol; alg::Base.Sort.Algorithm=Base.Sort.DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
163-
sortperm(__!(col), alg = alg, lt = lt, by = by, rev = rev, order = order)
164+
sortperm(__!(col), alg=alg, lt=lt, by=by, rev=rev, order=order)
164165
end

0 commit comments

Comments
 (0)