Skip to content

Commit 755501b

Browse files
committed
bug and type fix
1 parent 8343012 commit 755501b

File tree

4 files changed

+12
-4
lines changed

4 files changed

+12
-4
lines changed

docs/src/man/filter.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ julia> mask(ds, [isodd, ==(2)], 2:3, missings = missing) # using a vector of fun
262262

263263
Filtering a data set based on another data set should be done via `contains`, `semijoin`, `semijoin!`, `antijoin`, and `antijoin!` functions. These functions are discussed in the section about joining data sets, and here we just provide some examples about how to use them for filtering a data set.
264264

265-
Additionally, these functions can be used in situations when a data set needed to be filter when a column's values belong to a set of values. In this case, a temporary data set can be formed based on given values and then one of the aforementioned functions can be used.
265+
Additionally, these functions can be used in situations when a data set needed to be filter based on a set of values. In these cases, a temporary data set can be formed by given values and then one of the aforementioned functions can be used.
266266

267267
### Examples
268268

docs/src/man/tutorial.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ julia> select(flights, :DepTime, :ArrTime, :Flight_Number)
8080
467382 rows omitted
8181
```
8282

83-
`InMemoryDatasets` provide different ways for referring to multiple columns. For instance, let's select all columns between `:FlightDate` and `:IATA` as well as all columns containing "Taxi" or "Delay" in their names. `Between` selects columns between two specified extremes, and regular expressions can be used to select columns with specified patterns.
83+
`InMemoryDatasets` provides different ways for referring to multiple columns. For instance, let's select all columns between `:FlightDate` and `:IATA` as well as all columns containing "Taxi" or "Delay" in their names. `Between` selects columns between two specified extremes, and regular expressions can be used to select columns with specified patterns.
8484

8585

8686
```julia

src/sort/groupby.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ function combine(gds::Union{GroupBy, GatherBy}, @nospecialize(args...); dropgrou
207207
else
208208
if !(ms[i].first isa Tuple)
209209
if old_x !== ms[i].first
210-
if haskey(index(gds.parent), ms[i].first)
210+
if !(ms[i].second.first isa Expr) && haskey(index(gds.parent), ms[i].first)
211211
curr_x = _threaded_permute_for_groupby(_columns(gds.parent)[index(gds.parent)[ms[i].first]], a[1])
212212
old_x = ms[i].first
213213
else
@@ -228,7 +228,7 @@ function combine(gds::Union{GroupBy, GatherBy}, @nospecialize(args...); dropgrou
228228
if ms[i].first isa Tuple
229229
_combine_f_barrier_tuple(ntuple(j-> _threaded_permute_for_groupby(_columns(gds.parent)[index(gds.parent)[ms[i].first[j]]], a[1]), length(ms[i].first)), newds, ms[i].first, ms[i].second.first, ms[i].second.second, newds_lookup, starts, ngroups, new_lengths, total_lengths)
230230
else
231-
_combine_f_barrier(haskey(index(gds.parent), ms[i].first) ? curr_x : view(_columns(gds.parent)[1], a[1]), newds, ms[i].first, ms[i].second.first, ms[i].second.second, newds_lookup, starts, ngroups, new_lengths, total_lengths)
231+
_combine_f_barrier(!(ms[i].second.first isa Expr) && haskey(index(gds.parent), ms[i].first) ? curr_x : view(_columns(gds.parent)[1], a[1]), newds, ms[i].first, ms[i].second.first, ms[i].second.second, newds_lookup, starts, ngroups, new_lengths, total_lengths)
232232
end
233233
end
234234
if !haskey(index(newds), ms[i].second.second)

test/grouping.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,4 +368,12 @@ end
368368
@test all(byrow(compare(c1, Dataset(x2=[0, missing], maximum_y=[600.0, 200]), mapformats=true), all))
369369
@test all(byrow(compare(c2, Dataset(x2=[0, 0, missing], maximum_y=[100, 600.0, 200]), mapformats=true), all))
370370
@test all(byrow(compare(c3, Dataset(x2=[0,missing], function_x1_y=[(2, 100.0), (2, 200.0)]), mapformats=true), all))
371+
ds = Dataset(x1 = [1,2,1,2,2,1], x2=[1.0,missing,1.1,1.1,1.1,1.1],y=100:100:600.0)
372+
@test byrow(compare(combine(gatherby(ds, 1), :x2=>[sum, maximum],2:3=>byrow(+)=>:row), Dataset(x1=[1,2], sum_x2=[3.2,2.2], maximum_x2=[1.1,1.1], row = [4.3, 3.3] ), eq = isapprox), all)|>all
373+
@test byrow(compare(combine(gatherby(ds, 1), :x2=>[x->sum(x), maximum],2:3=>byrow(+)=>:row), Dataset(x1=[1,2], function_x2=[3.2,2.2], maximum_x2=[1.1,1.1], row = [4.3, 3.3] ), eq = isapprox), all)|>all
374+
@test byrow(compare(combine(groupby(ds, 1), :x2=>[sum, maximum],2:3=>byrow(+)=>:row), Dataset(x1=[1,2], sum_x2=[3.2,2.2], maximum_x2=[1.1,1.1], row = [4.3, 3.3] ), eq = isapprox), all)|>all
375+
376+
@test byrow(compare(combine(gatherby(view(ds, :, [2,1]), :x1), :x2=>[sum, maximum],2:3=>byrow(+)=>:row), Dataset(x1=[1,2], sum_x2=[3.2,2.2], maximum_x2=[1.1,1.1], row = [4.3, 3.3] ), eq = isapprox), all)|>all
377+
@test byrow(compare(combine(gatherby(view(ds, :, [2,1]), :x1), :x2=>[x->sum(x), maximum],2:3=>byrow(+)=>:row), Dataset(x1=[1,2], function_x2=[3.2,2.2], maximum_x2=[1.1,1.1], row = [4.3, 3.3] ), eq = isapprox), all)|>all
378+
@test byrow(compare(combine(groupby(view(ds, :, [2,1]), :x1), :x2=>[sum, maximum],2:3=>byrow(+)=>:row), Dataset(x1=[1,2], sum_x2=[3.2,2.2], maximum_x2=[1.1,1.1], row = [4.3, 3.3] ), eq = isapprox), all)|>all
371379
end

0 commit comments

Comments
 (0)