@@ -212,10 +212,7 @@ Computes the Jaccard similarity between sets ``A`` and ``B``, which is defined a
212212``\t ext{Jaccard}(A,B) = \f rac{\l eft|A \c ap B\r ight|}{\l eft|A \c up B\r ight|}``
213213
214214# Arguments
215- - `A::Set`, `B::Set`: the two sets with which to compute Jaccard similarity.
216-
217- # Returns
218- `Float64`: the Jaccard similarity between sets `A` and `B`, which is between `0` and `1`.
215+ - `A::Set`, `B::Set`: two sets whose Jaccard similarity we would like to compute.
219216
220217# Examples
221218```jldoctest; setup = :(using LSHFunctions)
243240@doc raw """
244241 function jaccard(x::BitArray{1}, y::BitArray{1})
245242
246- Computes the Jaccard similarity between a pair of binary vectors. Here, Jaccard similarity is defined as
243+ Computes the Jaccard similarity between a pair of binary vectors:
247244
248- ``J(x, y) = \\ frac{\\ sum_{i} \\ min{(x_i,y_i)}}{\\ sum_{i} \ \ max{(x_i,y_i)}}``
245+ ``J(x, y) = \f rac{\s um_{i} \m in{(x_i,y_i)}}{\s um_{i} \m ax{(x_i,y_i)}}``
249246
250247# Arguments
251248- `x::BitArray{1}`, `y::BitArray{1}`: two binary vectors, in the form of `BitArray`s.
275272@doc raw """
276273 function jaccard(x::AbstractVector{<:Real}, y::AbstractVector{<:Real})
277274
278- Computes the Jaccard similarity between a pair of vectors of real numbers. Here, Jaccard similarity is defined as
275+ Computes the Jaccard similarity between a pair of vectors of real numbers:
279276
280- ``J(x, y) = \\ frac{\\ sum_{i} \\ min{(x_i,y_i)}}{\\ sum_{i} \ \ max{(x_i,y_i)}}``
277+ ``J(x, y) = \f rac{\s um_{i} \m in{(x_i,y_i)}}{\s um_{i} \m ax{(x_i,y_i)}}``
281278
282279# Arguments
283280- `x::AbstractVector{<:Real}`, `y::AbstractVector{<:Real}`: a pair of vectors containing real numbers (subtypes of `Real`).
@@ -292,7 +289,8 @@ julia> jaccard(x,y)
2922890.5
293290```
294291"""
295- function jaccard (x:: AbstractVector{T} , y:: AbstractVector{<:Real} ) where {T <: Real }
292+ function jaccard (x:: AbstractVector{T} ,
293+ y:: AbstractVector ) :: Float64 where {T <: Real }
296294 if length (x) != length (y)
297295 DimensionMismatch (" dimensions must match" ) |> throw
298296 end
@@ -315,15 +313,64 @@ function jaccard(x::AbstractVector{T}, y::AbstractVector{<:Real}) where {T <: Re
315313 if union == T (0 )
316314 # Use the convention that if x and y are full of zeros, their Jaccard
317315 # similarity is zero.
318- T (union )
316+ Float64 ( 0 )
319317 else
320- T (intersection / union)
318+ Float64 (intersection / union)
321319 end
322320end
323321
324322jaccard (x:: AbstractVector{<:Integer} , y:: AbstractVector{<:AbstractFloat} ) =
325323 jaccard (y, x)
326324
325+ @doc raw """
326+ function jaccard(A::Set{<:K},
327+ B::Set{<:K},
328+ weights::Dict{K,V}) where {K,V<:Number}
329+
330+ Computes the weighted Jaccard similarity between two sets:
331+
332+ ``J(x, y) = \f rac{\s um_{x\i n A\c ap B} w_x}{\s um_{y\i n A\c up B} w_y}``
333+
334+ # Arguments
335+ - `A::Set`, `B::Set`: two sets whose Jaccard similarity we would like to compute.
336+ - `weights::Dict`: a dictionary mapping symbols in the sets `A` and `B` to numerical weights. These weights must be positive.
337+
338+ # Examples
339+ ```jldoctest; setup = :(using LSHFunctions)
340+ julia> A = Set(["a", "b", "c"]);
341+
342+ julia> B = Set(["b", "c", "d"]);
343+
344+ julia> W = Dict("a" => 0.2, "b" => 2.4, "c" => 0.6, "d" => 1.8);
345+
346+ julia> jaccard(A,B,W)
347+ 0.6
348+ ```
349+ """
350+ function jaccard (A:: Set{<:K} ,
351+ B:: Set{<:K} ,
352+ weights:: Dict{K,V} ) :: Float64 where {K,V<: Real }
353+
354+ union_weight = V (0 )
355+
356+ for el in A ∪ B
357+ w = weights[el]
358+ if w < 0
359+ ErrorException (" weights must be non-negative" ) |> throw
360+ end
361+ union_weight += w
362+ end
363+
364+ intersection_weight = sum (weights[el] for el in A ∩ B)
365+
366+ # By convention, if A = B = ∅, their Jaccard similarity is zero
367+ if union_weight == V (0 )
368+ Float64 (0 )
369+ else
370+ Float64 (intersection_weight / union_weight)
371+ end
372+ end
373+
327374#= ===================
328375Inner product and norms
329376====================#
0 commit comments