Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
15d0cbc
rem mutable + add Ref(x)
jaksle Feb 6, 2024
cd7723e
broadcast
jaksle Feb 7, 2024
eee2128
Update interp.jl
jaksle Feb 7, 2024
976c630
interface tests
jaksle Feb 7, 2024
e00c4cc
Update interp.jl
jaksle Feb 7, 2024
6ba8bad
comments
jaksle Feb 7, 2024
a06b49a
Update interp.jl
jaksle Feb 7, 2024
9fd648a
Update Project.toml
jaksle Feb 7, 2024
b30a950
Update interp.jl
jaksle Feb 7, 2024
9974b8c
tests update
jaksle Feb 7, 2024
eac3b90
Update interp.jl
jaksle Feb 7, 2024
d4b7976
Merge branch 'interface-fix' of https://github.com/jaksle/KernelDensi…
jaksle Feb 7, 2024
d3ade85
trying Compat for eachslice in Julia 1.0
jaksle Feb 7, 2024
8d44f99
remove Compat
jaksle Feb 7, 2024
84eaa96
adding Compat
jaksle Mar 11, 2024
60db7b0
Compat ver
jaksle Mar 11, 2024
8b0126c
rem Compat, eachslice to maplices + reshape
jaksle Mar 11, 2024
8cd5a1c
alternative to ;;;
jaksle Mar 11, 2024
3e94a5f
init
jaksle Nov 1, 2025
53c04b3
feature_computation
jaksle Nov 2, 2025
fcbeb72
gitignore
jaksle Nov 2, 2025
dc0f6be
Merge branch 'interface-fix' of https://github.com/jaksle/KernelDensi…
jaksle Nov 3, 2025
d04a4a2
Update exampleUse.jl
jaksle Nov 3, 2025
b8e9783
rem interp
jaksle Nov 3, 2025
fec20e3
old interp
jaksle Nov 3, 2025
635c21e
Update KernelDensity.jl
jaksle Nov 3, 2025
5b53859
Update Project.toml
jaksle Nov 3, 2025
c214556
rand + kernel_dist
jaksle Nov 3, 2025
94317fa
ver compat
jaksle Nov 11, 2025
6228640
Update bandwidth_selection.jl
jaksle Nov 11, 2025
258e5f7
Update bandwidth_selection.jl
jaksle Nov 11, 2025
b984532
const field new implementation
jaksle Nov 11, 2025
48a20a8
change to isassigned
jaksle Nov 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
name = "KernelDensity"
uuid = "5ab0869b-81aa-558d-bb23-cbf5423bbe9b"
version = "0.6.8"
authors = ["Simon Byrne and various contributors"]
version = "0.6.10"

[deps]
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

[compat]
Distributions = "0.23, 0.24, 0.25"
DocStringExtensions = "0.8, 0.9"
FFTW = "1"
Interpolations = "0.9, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16"
Interpolations = "0.9, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15"
IrrationalConstants = "0.2.4"
StatsBase = "0.33, 0.34"
julia = "1"

Expand Down
17 changes: 14 additions & 3 deletions src/KernelDensity.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,29 @@ using StatsBase
using Distributions
using Interpolations

import Distributions: twoπ, pdf
import IrrationalConstants: twoπ
import Base: getproperty, propertynames

import Distributions: pdf
import Distributions: ncomponents, component, probs

import FFTW: rfft, irfft

export DiscretisedPDF, KernelEstimate, BandwidthMethod, Silverman, LSCV
export kernel_estimate, precompute!

export kde, kde_lscv, UnivariateKDE, BivariateKDE, InterpKDE, pdf

abstract type AbstractKDE end

# define broadcasting behavior
Base.Broadcast.broadcastable(x::AbstractKDE) = Ref(x)

include("univariate.jl")
include("bivariate.jl")
include("interp.jl")

end # module
include("initialisation.jl")
include("bandwidth_selection.jl")
include("feature_computation.jl")

end
84 changes: 84 additions & 0 deletions src/bandwidth_selection.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@


# Silverman's rule of thumb for KDE bandwidth selection
function get_bandwidth(::Silverman, data::AbstractVector{<:Real}, kernelType = Normal, prior = DiscreteUniform(1,length(data)), alpha::Float64 = 0.9)
# Determine length of data
ndata = length(data)
ndata <= 1 && return alpha

# Calculate width using variance and IQR
var_width = std(data)
q25, q75 = quantile(data, (0.25, 0.75))
quantile_width = (q75 - q25) / 1.34

# Deal with edge cases with 0 IQR or variance
width = min(var_width, quantile_width)
if width == 0.0
if var_width == 0.0
width = 1.0
else
width = var_width
end
end

# Set bandwidth using Silverman's rule of thumb
return alpha * width * ndata^(-0.2), nothing
end

struct LSCV <: BandwidthMethod # default args in constructor for backwards comp
nPoints::Int
initBandwidth::Float64
boundary::Tuple{Float64,Float64}
LSCV(nPoints = 2048, initBandwidth = NaN, boundary = (NaN,NaN)) = new(nPoints, initBandwidth, boundary)
end

# Select bandwidth using least-squares cross validation, from:
# Density Estimation for Statistics and Data Analysis
# B. W. Silverman (1986)
# sections 3.4.3 (pp. 48-52) and 3.5 (pp. 61-66)
function get_bandwidth(lscv::LSCV, data::AbstractVector{<:Real}, kernelType = Normal, prior = DiscreteUniform(1,length(data)))
K = lscv.nPoints
initBandwidth = isnan(lscv.initBandwidth) ? get_bandwidth(Silverman(), data)[1] : lscv.initBandwidth
boundaryLow, boundaryHigh = lscv.boundary
if isnan(boundaryLow) || isnan(boundaryHigh)
lo, hi = extrema(data)
boundaryLow = isnan(boundaryLow) ? lo - 4.0*initBandwidth : boundaryLow
boundaryHigh = isnan(boundaryHigh) ? hi + 4.0*initBandwidth : boundaryHigh
end

ndata = length(data)

midpoints = range(boundaryLow, boundaryHigh, K)

initDen = tabulate(data, midpoints, prior).values

# the ft here is K/ba*sqrt(2pi) * u(s), it is K times the Yl in Silverman's book
ft = rfft(initDen)

ft2 = abs2.(ft)
c = -twoπ/(step(midpoints)*K)
hlb, hub = 0.25*initBandwidth, 1.5*initBandwidth

optimalBandwidth = optimize(hlb, hub) do h
dist = kernel_dist(kernelType, h)
ψ = 0.0
for j in 1:length(ft2)-1
ks = real(cf(dist, j*c))
ψ += ft2[j+1]*(ks-2.0)*ks
end
ψ*step(midpoints)/K + pdf(dist,0.0)/ndata
end

dist = kernel_dist(kernelType, optimalBandwidth)
for j in 0:length(ft)-1
ft[j+1] *= cf(dist, j*c)
end

convolved = irfft(ft, K)

# fix rounding error.
convolved .= max.(0., convolved)

# Invert the Fourier transform to get the KDE
optimalBandwidth, DiscretisedPDF(convolved, midpoints)
end
48 changes: 48 additions & 0 deletions src/exampleUse.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@

using Plots

using Distributions
using .KernelDensity

##

points = [1. 2 3 4 5]
cat = Categorical([1/5,1/5,1/5,1/5,1/5])

ds = DiscretisedPDF(fill(1/20,20),LinRange(0,1,20))

k = KernelEstimate(points,cat, Normal(0,1),1.0, nothing)
ncomponents(k1)
component(k1,3)
probs(k1)

##

X = randn(10^3)

k1 = kernel_estimate(X, 0.2, Normal)

k2 = kernel_estimate(X, Silverman(), Epanechnikov)

k3 = kernel_estimate(X, LSCV(), Epanechnikov)

precompute!(k2,2048,(-5,5))

pdf(k2, 1)
pdf.(k2, [1, 2, 3])
pdf(k2, 1, :precomputed)
pdf.(k2, [1, 2, 3], :precomputed)

cdf(k2, 1)
mean(k2), var(k2)
quantile(k2, 0.9)

rand(k2)

kde = k2.precomputedPDF[] # simpler access can be added
plot(kde.xs,kde.values,label = "precomputed")

xs = LinRange(-4,4,100)
plot!(xs,pdf.(k2, xs),label = "naive")


97 changes: 97 additions & 0 deletions src/feature_computation.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@

# 1D pdf precomputation

function precompute!(ke::UnivariateKernelEstimate, nPoints::Integer = 2048,
boundary::Tuple{<:Real,<:Real} =((lo, hi) = extrema(ke.data); (lo - 4.0*ke.bandwidth, hi + 4.0*ke.bandwidth)))

# find the element type of range in ke.precomputedPDF
T = eltype(typeof(ke).parameters[5].parameters[2])
midpoints = range(T(boundary[1]), T(boundary[2]), Int(nPoints))
ke.precomputedPDF[] = conv(tabulate(vec(ke.data), midpoints, ke.prior), ke.kernel)
end

function tabulate(data::AbstractVector{<:Real}, midpoints::AbstractRange, prior::UnivariateDistribution{Discrete})
npoints = length(midpoints)
s = step(midpoints)

# Set up a grid for discretized data
grid = zeros(Float64, npoints)
ainc = 1.0 / (s*s)

# weighted discretization (cf. Jones and Lotwick)
for (i,x) in enumerate(data)
k = searchsortedfirst(midpoints,x)
j = k-1
if 1 <= j <= npoints-1
grid[j] += (midpoints[k]-x)*ainc*pdf(prior,i)
grid[k] += (x-midpoints[j])*ainc*pdf(prior,i)
end
end

return DiscretisedPDF(grid, midpoints)
end

function conv(den::DiscretisedPDF{1, R, T}, kernel::UnivariateDistribution) where {T<:Real,R<:AbstractRange}
# Transform to Fourier basis
K = length(den.values)
ft = rfft(den.values)

# Convolve fft with characteristic function of kernel
# empirical cf
# = \sum_{n=1}^N e^{i*t*X_n} / N
# = \sum_{k=0}^K e^{i*t*(a+k*s)} N_k / N
# = e^{i*t*a} \sum_{k=0}^K e^{-2pi*i*k*(-t*s*K/2pi)/K} N_k / N
# = A * fft(N_k/N)[-t*s*K/2pi + 1]
c = -twoπ/(step(den.xs)*K)
for j in 0:length(ft)-1
ft[j+1] *= cf(kernel,j*c)
end

# Invert the Fourier transform to get the KDE
convolved = irfft(ft, K)

# fix rounding error.
convolved .= max.(0., convolved)

DiscretisedPDF(convolved, den.xs)
end

# pdf methods

# pdf(ke, x) is implemented in Distributions.jl

function pdf(ke::UnivariateKernelEstimate, x::Real, method::Symbol)
if method == :precomputed
den = ke.precomputedPDF[]
den === nothing && error("PDF must be first precomputed.")
itp_u = interpolate(den.values, BSpline(Quadratic(Line(OnGrid()))))
itp = scale(itp_u, den.xs)
etp = extrapolate(itp, 0.)
return etp.itp(x)
elseif method == :naive
return pdf(ke, x)
else
error("Method not available.")
end
end

# custom broadcast prepares for interpolation only once for all xs
function Base.Broadcast.broadcasted(::typeof(pdf), ke::UnivariateKernelEstimate, xs, method::Symbol)
if method == :precomputed
isassigned(ke.precomputedPDF) || error("PDF must be first precomputed.")
den = ke.precomputedPDF[]
itp_u = interpolate(den.values, BSpline(Quadratic(Line(OnGrid()))))
itp = scale(itp_u, den.xs)
etp = extrapolate(itp, 0.)
return etp.itp.(xs)
elseif method == :naive
return pdf.(ke, x)
else
error("Method not available.")
end
end


# it is possible to add cdf(ke, :precomputed)

# it is possibble to add cf(ke, t)
95 changes: 95 additions & 0 deletions src/initialisation.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@


struct DiscretisedPDF{N,R,T}
values::Array{T,N}
labels::NTuple{N,R}
DiscretisedPDF(values::Array{T,N}, labels::NTuple{N,R}) where {N,T<:Real,R<:AbstractRange} = new{N,R,T}(values, labels)
end

DiscretisedPDF(values::Vector, label::AbstractRange) = DiscretisedPDF(values, (label,))

# provides d.xs, d.ys, d.zs for convenience
function Base.getproperty(d::DiscretisedPDF, prop::Symbol)
if prop == :xs
return d.labels[1]
elseif prop == :ys
return d.labels[2]
elseif prop == :zs
return d.labels[3]
else getfield(d, prop)
end
end
Base.propertynames(::DiscretisedPDF) = (:values, :labels, :xs, :ys, :zs)

# note: in v1.8+ 4 first fields can be made constant using const
struct KernelEstimate{VF<:VariateForm, VS<:ValueSupport, KernelType<:Distribution, PriorDist<:UnivariateDistribution{Discrete}, PDF<:DiscretisedPDF} <: AbstractMixtureModel{VF, VS, KernelType}
data::Matrix{Float64}
prior::PriorDist
kernel::KernelType
bandwidth::Float64
precomputedPDF::Base.RefValue{PDF} # the value here is mutable

# these constructors guarantee type agreement
function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, precomputedPDF::PDF) where {
PriorDist<:UnivariateDistribution{Discrete},
KernelType <: Distribution,
PDF <: DiscretisedPDF}
VF, VS = supertype(KernelType).parameters
new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, Ref(precomputedPDF))
end
function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, ::Nothing) where {
PriorDist<:UnivariateDistribution{Discrete},
KernelType <: Distribution}
VF, VS = supertype(KernelType).parameters

# the default PDF type is based on Float64 and Int
R = Base.return_types(range,(Float64,Float64,Int))[1]
PDF = DiscretisedPDF{size(data)[1],R,Float64}
new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, Ref{PDF}())
end

end

UnivariateKernelEstimate{VS, K, P, PDF} = KernelEstimate{Univariate, VS, K, P, PDF}
MultivariateKernelEstimate{VS, K, P, PDF} = KernelEstimate{Multivariate, VS, K, P, PDF}

# KernelEstimate is a scalar
Base.broadcastable(ke::KernelEstimate) = Ref(ke)

# It is possible to add linear transformations a*ke + b

abstract type BandwidthMethod end

# default algorithm
struct Silverman<:BandwidthMethod end

# implementing common interface of AbstractMixtureModel
ncomponents(ke::KernelEstimate) = size(ke.data)[2]
component(ke::UnivariateKernelEstimate, k) = ke.kernel + ke.data[1,k]
component(ke::MultivariateKernelEstimate, k) = ke.kernel + ke.data[:,k]
probs(ke::KernelEstimate) = probs(ke.prior)


# creating KernelEstimate instance

# make kernel density given bandwidth
function kernel_estimate(data::Vector{<:Real}, bandwidth::Real, kernelType = Normal, prior::UnivariateDistribution{Discrete} = DiscreteUniform(1,length(data)))
kernel = kernel_dist(kernelType, bandwidth)
KernelEstimate(reshape(data, 1, length(data)), prior, kernel, Float64(bandwidth), nothing)
end

# find bandwidth, then make kernel density
function kernel_estimate(data::Vector{<:Real}, method::BandwidthMethod = Silverman(), kernelType = Normal, prior::UnivariateDistribution{Discrete} = DiscreteUniform(1,length(data)))
bandwidth, kde = get_bandwidth(method, data, kernelType, prior)
kernel = kernel_dist(kernelType, bandwidth)
KernelEstimate(reshape(data,1,length(data)), prior, kernel, bandwidth, kde)
end

# Can add kernel_estimate which takes prior as a vector.

# construct kernel from bandwidth
kernel_dist(::Type{Normal}, bandwidth::Real) = Normal(0.0, bandwidth)
kernel_dist(::Type{Uniform}, bandwidth::Real) = Uniform(-√3*bandwidth, √3*bandwidth)

const LocationScale = Union{Laplace, Logistic, SymTriangularDist, Cosine, Epanechnikov}
kernel_dist(::Type{D},w::Real) where {D<:LocationScale} = D(0.0, w/std(D(0.0,1.0)))
Loading
Loading