Skip to content

Commit 7f6cc47

Browse files
committed
usehash for update/!
1 parent 201689a commit 7f6cc47

File tree

3 files changed

+171
-1
lines changed

3 files changed

+171
-1
lines changed

src/join/join_dict.jl

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,3 +384,35 @@ function _in_use_Set(ldata, rdata, _fl, _fr)
384384
end
385385
res
386386
end
387+
388+
389+
function _update!_dict(dsl, dsr, ranges, onleft, onright, right_cols, ::Val{T}; allowmissing = true, mode = :all, mapformats = [true, true], stable = false, alg = HeapSort) where T
390+
_fl = _date_valueidentity
391+
_fr = _date_valueidentity
392+
if mapformats[1]
393+
_fl = _date_valuegetformat(dsl, onleft[1])
394+
end
395+
if mapformats[2]
396+
_fr = _date_valuegetformat(dsr, onright[1])
397+
end
398+
dict, maxprob, sz, fallback, type = _create_dictionary_for_join(_fr, _columns(dsr)[onright[1]], _fl, _columns(dsl)[onleft[1]], Val(T))
399+
# key is not unique, fall back to sort
400+
if fallback
401+
return false, Dataset()
402+
end
403+
404+
_fill_ranges_for_dict_join!(ranges, dict, maxprob, _fl, _fr, _columns(dsl)[onleft[1]], _columns(dsr)[onright[1]], sz, type)
405+
406+
for j in 1:length(right_cols)
407+
if haskey(index(dsl).lookup, _names(dsr)[right_cols[j]])
408+
left_cols_idx = index(dsl)[_names(dsr)[right_cols[j]]]
409+
TL = nonmissingtype(eltype(_columns(dsl)[left_cols_idx]))
410+
TR = nonmissingtype(eltype(_columns(dsr)[right_cols[j]]))
411+
if promote_type(TR, TL) <: TL
412+
_update_left_with_right!(_columns(dsl)[left_cols_idx], _columns(dsr)[right_cols[j]], ranges, allowmissing, mode)
413+
end
414+
end
415+
end
416+
_modified(_attributes(dsl))
417+
true, dsl
418+
end

src/join/update.jl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,19 @@ function _update_left_with_right!(x, y, ranges, allowmissing, mode)
2020
end
2121
end
2222

23-
function _update!(dsl::Dataset, dsr::AbstractDataset, ::Val{T}; onleft, onright, check = true, allowmissing = true, mode = :all, mapformats = [true, true], stable = false, alg = HeapSort, accelerate = false) where T
23+
function _update!(dsl::Dataset, dsr::AbstractDataset, ::Val{T}; onleft, onright, check = true, allowmissing = true, mode = :all, mapformats = [true, true], stable = false, alg = HeapSort, accelerate = false, usehash = true) where T
2424
isempty(dsl) && return dsl
2525
oncols_left = index(dsl)[onleft]
2626
oncols_right = index(dsr)[onright]
2727
right_cols = setdiff(1:length(index(dsr)), oncols_right)
2828

2929
ranges = Vector{UnitRange{T}}(undef, nrow(dsl))
30+
if usehash && length(oncols_left) == 1 && nrow(dsr)>1
31+
success, result = _update!_dict(dsl, dsr, ranges, oncols_left, oncols_right, right_cols, Val(T); mapformats = mapformats, allowmissing = allowmissing, mode = mode)
32+
if success
33+
return result
34+
end
35+
end
3036
idx, uniquemode = _find_permute_and_fill_range_for_join!(ranges, dsr, dsl, oncols_right, oncols_left, stable, alg, mapformats, accelerate)
3137

3238
for j in 1:length(oncols_left)

test/join.jl

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,4 +1316,136 @@ end
13161316
@test up1_v == up1_t
13171317
@test up1_a == up1_t
13181318

1319+
1320+
main = Dataset(group = ["G1", "G1", "G1", "G1", "G2", "G2", "G2"],
1321+
id = [ 1 , 1 , 2 , 2 , 1 , 1 , 2 ],
1322+
x1 = [1.2, 2.3,missing, 2.3, 1.3, 2.1 , 0.0 ],
1323+
x2 = [ 5 , 4 , 4 , 2 , 1 ,missing, 2 ])
1324+
transaction = Dataset(group = ["G1", "G2"], id = [2, 1],
1325+
x1 = [2.5, missing], x2 = [missing, 3])
1326+
@test update(main, transaction, on = :group) == Dataset(group = ["G1", "G1", "G1", "G1", "G2", "G2", "G2"],
1327+
id = [ 2 , 2 , 2 , 2 , 1 , 1 , 1 ],
1328+
x1 = [2.5, 2.5,2.5,2.5, 1.3, 2.1 , 0.0 ],
1329+
x2 = [ 5 , 4 , 4 , 2 ,3,3,3 ])
1330+
@test update(main, transaction, on = :group, mode = :missing) == Dataset(group = ["G1", "G1", "G1", "G1", "G2", "G2", "G2"],
1331+
id = [ 1 , 1 , 2 , 2 , 1 , 1 , 2 ],
1332+
x1 = [1.2, 2.3,2.5, 2.3, 1.3, 2.1 , 0.0 ],
1333+
x2 = [ 5 , 4 , 4 , 2 , 1 ,3, 2 ])
1334+
@test update(main, transaction, on = :group, allowmissing = true, mode = :all) == Dataset(group = ["G1", "G1", "G1", "G1", "G2", "G2", "G2"],
1335+
id = [ 2,2,2,2,1,1,1],
1336+
x1 = [2.5,2.5,2.5,2.5, missing, missing, missing],
1337+
x2 = [ missing, missing, missing, missing, 3,3,3])
1338+
@test update(main, transaction, on = :group) == update(main, view(transaction, [2,1], :), on = :group)
1339+
@test update(main, transaction, on = :group, mode = :missing) == update(main, view(transaction, [2,1], :), on = :group, mode = :missing)
1340+
@test update(main, transaction, on = :group, allowmissing = true, mode = :all) == update(main, view(transaction, [2,1], :), on = :group, allowmissing = true, mode = :all)
1341+
1342+
update!(main, transaction, on = :group, mode = :missing)
1343+
@test main == Dataset(group = ["G1", "G1", "G1", "G1", "G2", "G2", "G2"],
1344+
id = [ 1 , 1 , 2 , 2 , 1 , 1 , 2 ],
1345+
x1 = [1.2, 2.3,2.5, 2.3, 1.3, 2.1 , 0.0 ],
1346+
x2 = [ 5 , 4 , 4 , 2 , 1 ,3, 2 ])
1347+
update!(main, transaction, on = :group, allowmissing = true, mode = :all)
1348+
@test main == Dataset(group = ["G1", "G1", "G1", "G1", "G2", "G2", "G2"],
1349+
id = [ 2,2,2,2,1,1,1],
1350+
x1 = [2.5,2.5,2.5,2.5, missing, missing, missing],
1351+
x2 = [ missing, missing, missing, missing, 3,3,3])
1352+
1353+
main = Dataset(group = [3,3,3,3,1,1,1],
1354+
id = [ 1 , 1 , 2 , 2 , 1 , 1 , 2 ],
1355+
x1 = [1.2, 2.3,missing, 2.3, 1.3, 2.1 , 0.0 ],
1356+
x2 = [ 5 , 4 , 4 , 2 , 1 ,missing, 2 ])
1357+
transaction = Dataset(group = [3,1], id = [2, 1],
1358+
x1 = [2.5, missing], x2 = [missing, 3])
1359+
@test update(main, transaction, on = :group) == Dataset(group = [3,3,3,3,1,1,1],
1360+
id = [ 2 , 2 , 2 , 2 , 1 , 1 , 1 ],
1361+
x1 = [2.5, 2.5,2.5,2.5, 1.3, 2.1 , 0.0 ],
1362+
x2 = [ 5 , 4 , 4 , 2 ,3,3,3 ])
1363+
@test update(main, transaction, on = :group, mode = :missing) == Dataset(group = [3,3,3,3,1,1,1],
1364+
id = [ 1 , 1 , 2 , 2 , 1 , 1 , 2 ],
1365+
x1 = [1.2, 2.3,2.5, 2.3, 1.3, 2.1 , 0.0 ],
1366+
x2 = [ 5 , 4 , 4 , 2 , 1 ,3, 2 ])
1367+
@test update(main, transaction, on = :group, allowmissing = true, mode = :all) == Dataset(group = [3,3,3,3,1,1,1],
1368+
id = [ 2,2,2,2,1,1,1],
1369+
x1 = [2.5,2.5,2.5,2.5, missing, missing, missing],
1370+
x2 = [ missing, missing, missing, missing, 3,3,3])
1371+
@test update(main, transaction, on = :group) == update(main, view(transaction, [2,1], :), on = :group)
1372+
@test update(main, transaction, on = :group, mode = :missing) == update(main, view(transaction, [2,1], :), on = :group, mode = :missing)
1373+
@test update(main, transaction, on = :group, allowmissing = true, mode = :all) == update(main, view(transaction, [2,1], :), on = :group, allowmissing = true, mode = :all)
1374+
1375+
update!(main, transaction, on = :group, mode = :missing)
1376+
@test main == Dataset(group = [3,3,3,3,1,1,1],
1377+
id = [ 1 , 1 , 2 , 2 , 1 , 1 , 2 ],
1378+
x1 = [1.2, 2.3,2.5, 2.3, 1.3, 2.1 , 0.0 ],
1379+
x2 = [ 5 , 4 , 4 , 2 , 1 ,3, 2 ])
1380+
update!(main, transaction, on = :group, allowmissing = true, mode = :all)
1381+
@test main == Dataset(group = [3,3,3,3,1,1,1],
1382+
id = [ 2,2,2,2,1,1,1],
1383+
x1 = [2.5,2.5,2.5,2.5, missing, missing, missing],
1384+
x2 = [ missing, missing, missing, missing, 3,3,3])
1385+
1386+
main = Dataset(group = [3,3,3,3,1,1,1],
1387+
id = [ 1 , 1 , 2 , 2 , 1 , 1 , 2 ],
1388+
x1 = [1.2, 2.3,missing, 2.3, 1.3, 2.1 , 0.0 ],
1389+
x2 = [ 5 , 4 , 4 , 2 , 1 ,missing, 2 ])
1390+
transaction = Dataset(group = PooledArray([3,1]), id = [2, 1],
1391+
x1 = [2.5, missing], x2 = [missing, 3])
1392+
@test update(main, transaction, on = :group) == Dataset(group = [3,3,3,3,1,1,1],
1393+
id = [ 2 , 2 , 2 , 2 , 1 , 1 , 1 ],
1394+
x1 = [2.5, 2.5,2.5,2.5, 1.3, 2.1 , 0.0 ],
1395+
x2 = [ 5 , 4 , 4 , 2 ,3,3,3 ])
1396+
@test update(main, transaction, on = :group, mode = :missing) == Dataset(group = [3,3,3,3,1,1,1],
1397+
id = [ 1 , 1 , 2 , 2 , 1 , 1 , 2 ],
1398+
x1 = [1.2, 2.3,2.5, 2.3, 1.3, 2.1 , 0.0 ],
1399+
x2 = [ 5 , 4 , 4 , 2 , 1 ,3, 2 ])
1400+
@test update(main, transaction, on = :group, allowmissing = true, mode = :all) == Dataset(group = [3,3,3,3,1,1,1],
1401+
id = [ 2,2,2,2,1,1,1],
1402+
x1 = [2.5,2.5,2.5,2.5, missing, missing, missing],
1403+
x2 = [ missing, missing, missing, missing, 3,3,3])
1404+
@test update(main, transaction, on = :group) == update(main, view(transaction, [2,1], :), on = :group)
1405+
@test update(main, transaction, on = :group, mode = :missing) == update(main, view(transaction, [2,1], :), on = :group, mode = :missing)
1406+
@test update(main, transaction, on = :group, allowmissing = true, mode = :all) == update(main, view(transaction, [2,1], :), on = :group, allowmissing = true, mode = :all)
1407+
1408+
update!(main, transaction, on = :group, mode = :missing)
1409+
@test main == Dataset(group = [3,3,3,3,1,1,1],
1410+
id = [ 1 , 1 , 2 , 2 , 1 , 1 , 2 ],
1411+
x1 = [1.2, 2.3,2.5, 2.3, 1.3, 2.1 , 0.0 ],
1412+
x2 = [ 5 , 4 , 4 , 2 , 1 ,3, 2 ])
1413+
update!(main, transaction, on = :group, allowmissing = true, mode = :all)
1414+
@test main == Dataset(group = [3,3,3,3,1,1,1],
1415+
id = [ 2,2,2,2,1,1,1],
1416+
x1 = [2.5,2.5,2.5,2.5, missing, missing, missing],
1417+
x2 = [ missing, missing, missing, missing, 3,3,3])
1418+
1419+
main = Dataset(group = [3,3,3,3,1,1,1],
1420+
id = [ 1 , 1 , 2 , 2 , 1 , 1 , 2 ],
1421+
x1 = [1.2, 2.3,missing, 2.3, 1.3, 2.1 , 0.0 ],
1422+
x2 = [ 5 , 4 , 4 , 2 , 1 ,missing, 2 ])
1423+
transaction = Dataset(group = [3.0,1.0], id = [2, 1],
1424+
x1 = [2.5, missing], x2 = [missing, 3])
1425+
@test update(main, transaction, on = :group) == Dataset(group = [3,3,3,3,1,1,1],
1426+
id = [ 2 , 2 , 2 , 2 , 1 , 1 , 1 ],
1427+
x1 = [2.5, 2.5,2.5,2.5, 1.3, 2.1 , 0.0 ],
1428+
x2 = [ 5 , 4 , 4 , 2 ,3,3,3 ])
1429+
@test update(main, transaction, on = :group, mode = :missing) == Dataset(group = [3,3,3,3,1,1,1],
1430+
id = [ 1 , 1 , 2 , 2 , 1 , 1 , 2 ],
1431+
x1 = [1.2, 2.3,2.5, 2.3, 1.3, 2.1 , 0.0 ],
1432+
x2 = [ 5 , 4 , 4 , 2 , 1 ,3, 2 ])
1433+
@test update(main, transaction, on = :group, allowmissing = true, mode = :all) == Dataset(group = [3,3,3,3,1,1,1],
1434+
id = [ 2,2,2,2,1,1,1],
1435+
x1 = [2.5,2.5,2.5,2.5, missing, missing, missing],
1436+
x2 = [ missing, missing, missing, missing, 3,3,3])
1437+
@test update(main, transaction, on = :group) == update(main, view(transaction, [2,1], :), on = :group)
1438+
@test update(main, transaction, on = :group, mode = :missing) == update(main, view(transaction, [2,1], :), on = :group, mode = :missing)
1439+
@test update(main, transaction, on = :group, allowmissing = true, mode = :all) == update(main, view(transaction, [2,1], :), on = :group, allowmissing = true, mode = :all)
1440+
1441+
update!(main, transaction, on = :group, mode = :missing)
1442+
@test main == Dataset(group = [3,3,3,3,1,1,1],
1443+
id = [ 1 , 1 , 2 , 2 , 1 , 1 , 2 ],
1444+
x1 = [1.2, 2.3,2.5, 2.3, 1.3, 2.1 , 0.0 ],
1445+
x2 = [ 5 , 4 , 4 , 2 , 1 ,3, 2 ])
1446+
update!(main, transaction, on = :group, allowmissing = true, mode = :all)
1447+
@test main == Dataset(group = [3,3,3,3,1,1,1],
1448+
id = [ 2,2,2,2,1,1,1],
1449+
x1 = [2.5,2.5,2.5,2.5, missing, missing, missing],
1450+
x2 = [ missing, missing, missing, missing, 3,3,3])
13191451
end

0 commit comments

Comments
 (0)