bug and type fix

sl-solution · sl-solution · commit 755501b0edd3 · 2021-12-12T20:09:38.000+13:00
diff --git a/docs/src/man/filter.md b/docs/src/man/filter.md
@@ -262,7 +262,7 @@ julia> mask(ds, [isodd, ==(2)], 2:3, missings = missing) # using a vector of fun
 
 Filtering a data set based on another data set should be done via `contains`, `semijoin`, `semijoin!`, `antijoin`, and `antijoin!` functions. These functions are discussed in the section about joining data sets, and here we just provide some examples about how to use them for filtering a data set.
 
-Additionally, these functions can be used in situations when a data set needed to be filter when a column's values belong to a set of values. In this case, a temporary data set can be formed based on given values and then one of the aforementioned functions can be used.
+Additionally, these functions can be used in situations when a data set needed to be filter based on a set of values. In these cases, a temporary data set can be formed by given values and then one of the aforementioned functions can be used.
 
 ### Examples
 
diff --git a/docs/src/man/tutorial.md b/docs/src/man/tutorial.md
@@ -80,7 +80,7 @@ julia> select(flights, :DepTime, :ArrTime, :Flight_Number)
                          467382 rows omitted
 ```
 
-`InMemoryDatasets` provide different ways for referring to multiple columns. For instance, let's select all columns between `:FlightDate` and `:IATA` as well as all columns containing "Taxi" or "Delay" in their names. `Between` selects columns between two specified extremes, and regular expressions can be used to select columns with specified patterns.
+`InMemoryDatasets` provides different ways for referring to multiple columns. For instance, let's select all columns between `:FlightDate` and `:IATA` as well as all columns containing "Taxi" or "Delay" in their names. `Between` selects columns between two specified extremes, and regular expressions can be used to select columns with specified patterns.
 
 
 ```julia
diff --git a/src/sort/groupby.jl b/src/sort/groupby.jl
@@ -207,7 +207,7 @@ function combine(gds::Union{GroupBy, GatherBy}, @nospecialize(args...); dropgrou
 		else
 			if !(ms[i].first isa Tuple)
 				if old_x !== ms[i].first
-					if haskey(index(gds.parent), ms[i].first)
+					if !(ms[i].second.first isa Expr) && haskey(index(gds.parent), ms[i].first)
 						curr_x = _threaded_permute_for_groupby(_columns(gds.parent)[index(gds.parent)[ms[i].first]], a[1])
 						old_x = ms[i].first
 					else
@@ -228,7 +228,7 @@ function combine(gds::Union{GroupBy, GatherBy}, @nospecialize(args...); dropgrou
 			if ms[i].first isa Tuple
 				_combine_f_barrier_tuple(ntuple(j-> _threaded_permute_for_groupby(_columns(gds.parent)[index(gds.parent)[ms[i].first[j]]], a[1]), length(ms[i].first)), newds, ms[i].first, ms[i].second.first, ms[i].second.second, newds_lookup, starts, ngroups, new_lengths, total_lengths)
 			else
-				_combine_f_barrier(haskey(index(gds.parent), ms[i].first) ? curr_x : view(_columns(gds.parent)[1], a[1]), newds, ms[i].first, ms[i].second.first, ms[i].second.second, newds_lookup, starts, ngroups, new_lengths, total_lengths)
+				_combine_f_barrier(!(ms[i].second.first isa Expr) && haskey(index(gds.parent), ms[i].first) ? curr_x : view(_columns(gds.parent)[1], a[1]), newds, ms[i].first, ms[i].second.first, ms[i].second.second, newds_lookup, starts, ngroups, new_lengths, total_lengths)
 			end
 		end
 		if !haskey(index(newds), ms[i].second.second)
diff --git a/test/grouping.jl b/test/grouping.jl
@@ -368,4 +368,12 @@ end
     @test all(byrow(compare(c1, Dataset(x2=[0, missing], maximum_y=[600.0, 200]), mapformats=true), all))
     @test all(byrow(compare(c2, Dataset(x2=[0, 0, missing], maximum_y=[100, 600.0, 200]), mapformats=true), all))
     @test all(byrow(compare(c3, Dataset(x2=[0,missing], function_x1_y=[(2, 100.0), (2, 200.0)]), mapformats=true), all))
+    ds = Dataset(x1 = [1,2,1,2,2,1], x2=[1.0,missing,1.1,1.1,1.1,1.1],y=100:100:600.0)
+    @test byrow(compare(combine(gatherby(ds, 1), :x2=>[sum, maximum],2:3=>byrow(+)=>:row), Dataset(x1=[1,2], sum_x2=[3.2,2.2], maximum_x2=[1.1,1.1], row = [4.3, 3.3] ), eq = isapprox), all)|>all
+    @test byrow(compare(combine(gatherby(ds, 1), :x2=>[x->sum(x), maximum],2:3=>byrow(+)=>:row), Dataset(x1=[1,2], function_x2=[3.2,2.2], maximum_x2=[1.1,1.1], row = [4.3, 3.3] ), eq = isapprox), all)|>all
+    @test byrow(compare(combine(groupby(ds, 1), :x2=>[sum, maximum],2:3=>byrow(+)=>:row), Dataset(x1=[1,2], sum_x2=[3.2,2.2], maximum_x2=[1.1,1.1], row = [4.3, 3.3] ), eq = isapprox), all)|>all
+
+    @test byrow(compare(combine(gatherby(view(ds, :, [2,1]), :x1), :x2=>[sum, maximum],2:3=>byrow(+)=>:row), Dataset(x1=[1,2], sum_x2=[3.2,2.2], maximum_x2=[1.1,1.1], row = [4.3, 3.3] ), eq = isapprox), all)|>all
+    @test byrow(compare(combine(gatherby(view(ds, :, [2,1]), :x1), :x2=>[x->sum(x), maximum],2:3=>byrow(+)=>:row), Dataset(x1=[1,2], function_x2=[3.2,2.2], maximum_x2=[1.1,1.1], row = [4.3, 3.3] ), eq = isapprox), all)|>all
+    @test byrow(compare(combine(groupby(view(ds, :, [2,1]), :x1), :x2=>[sum, maximum],2:3=>byrow(+)=>:row), Dataset(x1=[1,2], sum_x2=[3.2,2.2], maximum_x2=[1.1,1.1], row = [4.3, 3.3] ), eq = isapprox), all)|>all
 end