Skip to content

Commit 0107290

Browse files
committed
improve the performance
1 parent d12ded7 commit 0107290

File tree

1 file changed

+14
-3
lines changed

1 file changed

+14
-3
lines changed

src/dataset/transpose.jl

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -399,11 +399,20 @@ function update_outputmat!(outputmat, x, starts, perms, ids, n_row_names, _is_ce
399399
end
400400
end
401401

402+
function _preallocate_outputmat!(res, n1, n2, fillval, threads, ::Val{CT}) where CT
403+
@_threadsfor threads for j in 1:n2
404+
res[j] = fill!(_our_vect_alloc(CT, n1), fillval)
405+
end
406+
res
407+
end
408+
402409
function _fill_outputmat_withoutid(T, in_cols, ds, starts, perms, new_col_names, row_names_length, threads; default_fill = missing)
403410

404411
@assert _check_allocation_limit(nonmissingtype(T), row_names_length*_ngroups(ds), length(new_col_names)) < 1.0 "The output data frame is huge and there is not enough resource to allocate it."
405412
CT = promote_type(T, typeof(default_fill))
406-
outputmat = [fill!(_our_vect_alloc(CT, row_names_length*_ngroups(ds)), default_fill) for _ in 1:length(new_col_names)]
413+
# outputmat = [__fill!(_our_vect_alloc(CT, row_names_length*_ngroups(ds)), default_fill) for _ in 1:length(new_col_names)]
414+
outputmat = Vector{Vector{CT}}(undef, length(new_col_names))
415+
_preallocate_outputmat!(outputmat, row_names_length*_ngroups(ds), length(new_col_names), default_fill, threads, Val(CT) )
407416
update_outputmat!(outputmat, in_cols, starts, perms, row_names_length, threads)
408417

409418
outputmat
@@ -413,7 +422,9 @@ function _fill_outputmat_withid(T, in_cols, ds, starts, perms, ids, new_col_name
413422

414423
@assert _check_allocation_limit(nonmissingtype(T), row_names_length*_ngroups(ds), length(new_col_names)) < 1.0 "The output data frame is huge and there is not enough resource to allocate it."
415424
CT = promote_type(T, typeof(default_fill))
416-
outputmat = [fill!(_our_vect_alloc(CT, row_names_length*_ngroups(ds)), default_fill) for _ in 1:length(new_col_names)]
425+
# outputmat = [fill!(_our_vect_alloc(CT, row_names_length*_ngroups(ds)), default_fill) for _ in 1:length(new_col_names)]
426+
outputmat = Vector{Vector{CT}}(undef, length(new_col_names))
427+
_preallocate_outputmat!(outputmat, row_names_length*_ngroups(ds), length(new_col_names), default_fill, threads, Val(CT) )
417428

418429
_is_cell_filled = zeros(Bool, row_names_length*_ngroups(ds), length(new_col_names))
419430

@@ -544,7 +555,7 @@ function ds_transpose(ds::Union{Dataset, GroupBy, GatherBy}, cols::Union{Tuple,
544555
end
545556
outds2 = Dataset(outputmat, new_col_names, copycols = false)
546557

547-
for j in 1:ncol(outds2)
558+
for j in 1:ncol(outds2)
548559
push!(_columns(outds), _columns(outds2)[j])
549560
end
550561
merge!(index(outds), index(outds2), makeunique = true)

0 commit comments

Comments
 (0)