Skip to content

Commit fe5aabd

Browse files
authored
Sequence hashing: Bugfix and slight simplification (#229)
* Sequence hashing: Bugfix and slight simplification Slightly simplify hashing code and fix a minor bug: * Replace manual bitrotate code with Base.bitrotate (from Julia 1.5) * Fix an error from a typo in in murmur2 function * Slightly simplify the code * Improve test coverage
1 parent 26e05f7 commit fe5aabd

File tree

2 files changed

+17
-17
lines changed

2 files changed

+17
-17
lines changed

src/longsequences/hash.jl

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@
1818
const c1 = 0x87c37b91114253d5
1919
const c2 = 0x4cf5ad432745937f
2020

21-
@inline function rotl64(x::UInt64, r)
22-
return (x << (r & 63)) | (x >>> (-r & 63))
23-
end
24-
2521
@inline function fmix64(k::UInt64)
2622
k = k k >> 33
2723
k *= 0xff51afd7ed558ccd
@@ -33,28 +29,28 @@ end
3329

3430
@inline function murmur1(h1, k1)
3531
k1 *= c1
36-
k1 = rotl64(k1, 31)
32+
k1 = bitrotate(k1, 31)
3733
k1 *= c2
3834
h1 = h1 k1
3935
return (h1, k1)
4036
end
4137

42-
@inline function murmur2(h1, h2, k2)
38+
@inline function murmur2(h2, k2)
4339
k2 *= c2
44-
k2 = rotl64(k2, 33)
40+
k2 = bitrotate(k2, 33)
4541
k2 *= c1
46-
h2 = h1 k2
42+
h2 = h2 k2
4743
return (h2, k2)
4844
end
4945

5046
@inline function murmur(h1, h2, k1, k2)
5147
h1, k1 = murmur1(h1, k1)
52-
h1 = rotl64(h1, 27)
48+
h1 = bitrotate(h1, 27)
5349
h1 += h2
5450
h1 = h1 * 5 + 0x52dce729
5551

56-
h2, k2 = murmur2(h1, h2, k2)
57-
h2 = rotl64(h2, 31)
52+
h2, k2 = murmur2(h2, k2)
53+
h2 = bitrotate(h2, 31)
5854
h2 += h1
5955
h2 = h2 * 5 + 0x38495ab5
6056

@@ -71,6 +67,8 @@ function finalize(h1, h2, len)
7167
h1 += h2
7268
h2 += h1
7369

70+
# Ref. implementation returns (h1, h2) for 128 bits, but we truncate to 64.
71+
# last needless modification of h2 is optimised away by the compiler
7472
return h1
7573
end
7674

@@ -90,7 +88,7 @@ function tail(::Type{<:LongSequence}, data, next, stop, h1, h2)
9088
end
9189

9290
h1, k1 = murmur1(h1, k1)
93-
h2, k2 = murmur2(h1, h2, k2)
91+
h2, k2 = murmur2(h2, k2)
9492
return (h1, h2)
9593
end
9694

@@ -158,17 +156,15 @@ function tail(::Type{<:LongSubSeq}, data, next, stop, h1, h2)
158156
end
159157

160158
h1, k1 = murmur1(h1, k1)
161-
h2, k2 = murmur2(h1, h2, k2)
159+
h2, k2 = murmur2(h2, k2)
162160

163161
return (h1, h2)
164162
end
165163

166164
function Base.hash(seq::SeqOrView, seed::UInt64)
167-
# Mix sequence length so that dna"A" and dna"AA"
168-
# return the different hash values.
169-
h1::UInt64 = h2::UInt64 = hash(length(seq), seed)
165+
h1, h2 = UInt64(0), seed
170166
next = bitindex(seq, 1)
171-
stop = bitindex(seq, lastindex(seq) + 1)
167+
stop = bitindex(seq, (lastindex(seq) + 1) % UInt)
172168
data = seq.data
173169

174170
h1, h2, next = body(typeof(seq), next, stop, data, h1, h2)

test/longsequences/hashing.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,8 @@
4949
# Test hash of longer view to engange some inner loops
5050
seq = randdnaseq(250)
5151
@test hash(seq[33:201]) == hash(view(seq, 33:201))
52+
@test hash(seq[23:201]) == hash(view(seq, 23:201))
53+
@test hash(seq[37:249]) == hash(view(seq, 37:249))
54+
@test hash(seq[50:250]) == hash(view(seq, 50:250))
55+
@test hash(seq[10:232]) == hash(view(seq, 10:232))
5256
end

0 commit comments

Comments
 (0)