From 15d0cbc7868a9f06b46fea6dfe63efeb7f25bdca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Tue, 6 Feb 2024 18:41:17 +0100 Subject: [PATCH 01/31] rem mutable + add Ref(x) --- .gitignore | 4 +++- src/KernelDensity.jl | 2 ++ src/interp.jl | 3 +-- src/univariate.jl | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index ae3a70e3..291b930a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ examples/.ipynb_checkpoints/* -Manifest.toml \ No newline at end of file +Manifest.toml +.vscode/settings.json +tmp.jl diff --git a/src/KernelDensity.jl b/src/KernelDensity.jl index 722e118d..b6201c8a 100644 --- a/src/KernelDensity.jl +++ b/src/KernelDensity.jl @@ -12,6 +12,8 @@ export kde, kde_lscv, UnivariateKDE, BivariateKDE, InterpKDE, pdf abstract type AbstractKDE end +Base.broadcastable(x::AbstractKDE) = Ref(x) + include("univariate.jl") include("bivariate.jl") include("interp.jl") diff --git a/src/interp.jl b/src/interp.jl index 05e648e6..4503e43d 100644 --- a/src/interp.jl +++ b/src/interp.jl @@ -1,12 +1,11 @@ import Interpolations: interpolate, scale -mutable struct InterpKDE{K,I} <: AbstractKDE +struct InterpKDE{K,I} <: AbstractKDE kde::K itp::I InterpKDE{K,I}(kde::K, itp::I) where {K,I} = new{K,I}(kde, itp) end - function InterpKDE(kde::UnivariateKDE, opts...) itp_u = interpolate(kde.density, opts...) itp = scale(itp_u, kde.x) diff --git a/src/univariate.jl b/src/univariate.jl index 5f71a809..993c4339 100644 --- a/src/univariate.jl +++ b/src/univariate.jl @@ -13,7 +13,7 @@ sum(density) * step(x) ≈ 1 $(FIELDS) """ -mutable struct UnivariateKDE{R<:AbstractRange} <: AbstractKDE +struct UnivariateKDE{R<:AbstractRange} <: AbstractKDE "Gridpoints for evaluating the density." x::R "Kernel density at corresponding gridpoints `x`." From cd7723ea3a238b4153781f076f3b63cc1303a263 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Wed, 7 Feb 2024 01:09:09 +0100 Subject: [PATCH 02/31] broadcast --- .gitignore | 1 + Project.toml | 5 +++++ src/interp.jl | 5 +++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 291b930a..d66bf271 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ examples/.ipynb_checkpoints/* Manifest.toml .vscode/settings.json tmp.jl +Project.toml diff --git a/Project.toml b/Project.toml index e0df42fe..7a9ff6df 100644 --- a/Project.toml +++ b/Project.toml @@ -4,10 +4,15 @@ authors = ["Simon Byrne and various contributors"] version = "0.6.8" [deps] +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" +Loess = "4345ca2d-374a-55d4-8d30-97f9976e7612" +LsqFit = "2fda8390-95c7-5789-9bda-21331edee243" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] diff --git a/src/interp.jl b/src/interp.jl index 4503e43d..cb125f20 100644 --- a/src/interp.jl +++ b/src/interp.jl @@ -23,9 +23,10 @@ function InterpKDE(kde::BivariateKDE, opts...) end InterpKDE(kde::BivariateKDE) = InterpKDE(kde::BivariateKDE, BSpline(Quadratic(Line(OnGrid())))) +pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) pdf(ik::InterpKDE,x::Real...) = ik.itp(x...) pdf(ik::InterpKDE,xs::AbstractVector) = [ik.itp(x) for x in xs] -pdf(ik::InterpKDE,xs::AbstractVector,ys::AbstractVector) = [ik.itp(x,y) for x in xs, y in ys] +Base.broadcasted(::typeof(pdf),k::UnivariateKDE,xs) = InterpKDE(k).itp.(xs) -pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) pdf(k::BivariateKDE,x,y) = pdf(InterpKDE(k),x,y) +pdf(ik::InterpKDE,xs::AbstractVector,ys::AbstractVector) = [ik.itp(x,y) for x in xs, y in ys] From eee2128baa87705113c893fb3d1a4b5360581e08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Wed, 7 Feb 2024 01:39:55 +0100 Subject: [PATCH 03/31] Update interp.jl --- src/interp.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interp.jl b/src/interp.jl index cb125f20..435a9acc 100644 --- a/src/interp.jl +++ b/src/interp.jl @@ -26,7 +26,7 @@ InterpKDE(kde::BivariateKDE) = InterpKDE(kde::BivariateKDE, BSpline(Quadratic(Li pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) pdf(ik::InterpKDE,x::Real...) = ik.itp(x...) pdf(ik::InterpKDE,xs::AbstractVector) = [ik.itp(x) for x in xs] -Base.broadcasted(::typeof(pdf),k::UnivariateKDE,xs) = InterpKDE(k).itp.(xs) +Base.broadcast(::typeof(pdf),k::UnivariateKDE,xs) = InterpKDE(k).itp.(xs) pdf(k::BivariateKDE,x,y) = pdf(InterpKDE(k),x,y) pdf(ik::InterpKDE,xs::AbstractVector,ys::AbstractVector) = [ik.itp(x,y) for x in xs, y in ys] From 976c63088369cdc97fd249989725a0cf403fa2ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Wed, 7 Feb 2024 14:53:33 +0100 Subject: [PATCH 04/31] interface tests --- src/interp.jl | 17 ++++++++++++----- src/univariate.jl | 2 +- test/interp.jl | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/interp.jl b/src/interp.jl index cb125f20..e8e1afc9 100644 --- a/src/interp.jl +++ b/src/interp.jl @@ -1,6 +1,6 @@ import Interpolations: interpolate, scale -struct InterpKDE{K,I} <: AbstractKDE +mutable struct InterpKDE{K,I} <: AbstractKDE kde::K itp::I InterpKDE{K,I}(kde::K, itp::I) where {K,I} = new{K,I}(kde, itp) @@ -23,10 +23,17 @@ function InterpKDE(kde::BivariateKDE, opts...) end InterpKDE(kde::BivariateKDE) = InterpKDE(kde::BivariateKDE, BSpline(Quadratic(Line(OnGrid())))) -pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) pdf(ik::InterpKDE,x::Real...) = ik.itp(x...) -pdf(ik::InterpKDE,xs::AbstractVector) = [ik.itp(x) for x in xs] -Base.broadcasted(::typeof(pdf),k::UnivariateKDE,xs) = InterpKDE(k).itp.(xs) + +# interface implementation +# it should be consistent with Distributions.pdf + +pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) +Base.broadcasted(::typeof(pdf),k::UnivariateKDE,xs) = Base.broadcasted(InterpKDE(k).itp, xs) +pdf(ik::InterpKDE,xs::AbstractVector) = pdf.(ik, xs) pdf(k::BivariateKDE,x,y) = pdf(InterpKDE(k),x,y) -pdf(ik::InterpKDE,xs::AbstractVector,ys::AbstractVector) = [ik.itp(x,y) for x in xs, y in ys] +pdf(ik::InterpKDE,xs::AbstractVector,ys::AbstractVector) = ik.itp.(xs,ys') +pdf(k::BivariateKDE, M) = pdf(InterpKDE(k),M) +pdf(ik::InterpKDE, M::AbstractArray{<:Real, 1}) = ik.itp(M[1],M[2]) +pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = pdf.(ik,eachslice(M, dims=ntuple(i->i+1, N-1)) ) diff --git a/src/univariate.jl b/src/univariate.jl index 993c4339..5f71a809 100644 --- a/src/univariate.jl +++ b/src/univariate.jl @@ -13,7 +13,7 @@ sum(density) * step(x) ≈ 1 $(FIELDS) """ -struct UnivariateKDE{R<:AbstractRange} <: AbstractKDE +mutable struct UnivariateKDE{R<:AbstractRange} <: AbstractKDE "Gridpoints for evaluating the density." x::R "Kernel density at corresponding gridpoints `x`." diff --git a/test/interp.jl b/test/interp.jl index ed93d914..1a917145 100644 --- a/test/interp.jl +++ b/test/interp.jl @@ -23,3 +23,19 @@ k = kde(([0.0],[0.0]), bandwidth=(1.0, 1.0)) @test pdf(k, +10.0, 0.0) ≤ pdf(MvNormal(2, 1.0), [+10.0, 0.0]) @test pdf(k, 0.0, -10.0) ≤ pdf(MvNormal(2, 1.0), [0.0, -10.0]) @test pdf(k, 0.0, +10.0) ≤ pdf(MvNormal(2, 1.0), [0.0, +10.0]) + +@testset "pdf method interface" begin + k = kde([-1., 1.]) + ik = InterpKDE(k) + + @test pdf.(k, [0., 1.]) ≈ [pdf(k, 0.), pdf(k, 1.)] ≈ pdf.(k,[0., 1.]) ≈ [pdf(ik, 0.), pdf(ik, 1.)] + @test all( pdf.(k, (0., 1.)) .≈ (pdf(k, 0.), pdf(k, 1.)) ) + @test pdf.(k, [0. 1.; 2. -1.]) ≈ [pdf(k, 0.) pdf(k, 1.); pdf(k, 2.) pdf(k, -1.)] + + k2d = kde(([-1., 1.], [0., 1.])) + ik2d = InterpKDE(k2d) + + @test pdf(k2d, [0.5, 0.1]) ≈ pdf(k2d, 0.5, 0.1) ≈ pdf(ik2d, 0.5, 0.1) + @test pdf(k2d, [0.5 1.; 0.1 1.]) ≈ [pdf(ik2d, 0.5, 0.1), pdf(ik2d, 1., 1.)] + @test pdf(k2d, [0.5; 1. ;;; 0.1; 1.]) ≈ [pdf(ik2d, 0.5, 1.) pdf(ik2d, 0.1, 1.)] +end \ No newline at end of file From e00c4ccd5c48ad3a5ffc7e26e9d5a1a1b9202882 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Wed, 7 Feb 2024 16:12:44 +0100 Subject: [PATCH 05/31] Update interp.jl --- src/interp.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/interp.jl b/src/interp.jl index e8e1afc9..b294827f 100644 --- a/src/interp.jl +++ b/src/interp.jl @@ -30,10 +30,9 @@ pdf(ik::InterpKDE,x::Real...) = ik.itp(x...) pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) Base.broadcasted(::typeof(pdf),k::UnivariateKDE,xs) = Base.broadcasted(InterpKDE(k).itp, xs) -pdf(ik::InterpKDE,xs::AbstractVector) = pdf.(ik, xs) pdf(k::BivariateKDE,x,y) = pdf(InterpKDE(k),x,y) pdf(ik::InterpKDE,xs::AbstractVector,ys::AbstractVector) = ik.itp.(xs,ys') pdf(k::BivariateKDE, M) = pdf(InterpKDE(k),M) -pdf(ik::InterpKDE, M::AbstractArray{<:Real, 1}) = ik.itp(M[1],M[2]) +pdf(ik::InterpKDE, V::AbstractVector) = ik.itp(V[1],V[2]) pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = pdf.(ik,eachslice(M, dims=ntuple(i->i+1, N-1)) ) From 6ba8bad9e553e27a6ffca65462c9f6240427d2b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Wed, 7 Feb 2024 18:06:20 +0100 Subject: [PATCH 06/31] comments --- src/interp.jl | 8 ++++++-- test/interp.jl | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/interp.jl b/src/interp.jl index b294827f..ed5c0aa6 100644 --- a/src/interp.jl +++ b/src/interp.jl @@ -25,14 +25,18 @@ InterpKDE(kde::BivariateKDE) = InterpKDE(kde::BivariateKDE, BSpline(Quadratic(Li pdf(ik::InterpKDE,x::Real...) = ik.itp(x...) -# interface implementation +# pdf interface implementation # it should be consistent with Distributions.pdf +# 1 dimension pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) Base.broadcasted(::typeof(pdf),k::UnivariateKDE,xs) = Base.broadcasted(InterpKDE(k).itp, xs) +# 2 dimensions pdf(k::BivariateKDE,x,y) = pdf(InterpKDE(k),x,y) pdf(ik::InterpKDE,xs::AbstractVector,ys::AbstractVector) = ik.itp.(xs,ys') pdf(k::BivariateKDE, M) = pdf(InterpKDE(k),M) -pdf(ik::InterpKDE, V::AbstractVector) = ik.itp(V[1],V[2]) pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = pdf.(ik,eachslice(M, dims=ntuple(i->i+1, N-1)) ) + +# any dimension +pdf(ik::InterpKDE, V::AbstractVector) = ik.itp(V...) diff --git a/test/interp.jl b/test/interp.jl index 1a917145..d2f7c4a6 100644 --- a/test/interp.jl +++ b/test/interp.jl @@ -35,7 +35,7 @@ k = kde(([0.0],[0.0]), bandwidth=(1.0, 1.0)) k2d = kde(([-1., 1.], [0., 1.])) ik2d = InterpKDE(k2d) - @test pdf(k2d, [0.5, 0.1]) ≈ pdf(k2d, 0.5, 0.1) ≈ pdf(ik2d, 0.5, 0.1) + @test pdf(k2d, [0.5, 0.1]) ≈ pdf(k2d, [0.5; 0.1]) ≈ pdf(k2d, 0.5, 0.1) ≈ pdf(ik2d, 0.5, 0.1) @test pdf(k2d, [0.5 1.; 0.1 1.]) ≈ [pdf(ik2d, 0.5, 0.1), pdf(ik2d, 1., 1.)] @test pdf(k2d, [0.5; 1. ;;; 0.1; 1.]) ≈ [pdf(ik2d, 0.5, 1.) pdf(ik2d, 0.1, 1.)] end \ No newline at end of file From a06b49a71ba0343f063c5d14b07b48924546a3fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Wed, 7 Feb 2024 18:16:55 +0100 Subject: [PATCH 07/31] Update interp.jl --- src/interp.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/interp.jl b/src/interp.jl index ed5c0aa6..b92525b4 100644 --- a/src/interp.jl +++ b/src/interp.jl @@ -23,11 +23,15 @@ function InterpKDE(kde::BivariateKDE, opts...) end InterpKDE(kde::BivariateKDE) = InterpKDE(kde::BivariateKDE, BSpline(Quadratic(Line(OnGrid())))) -pdf(ik::InterpKDE,x::Real...) = ik.itp(x...) # pdf interface implementation # it should be consistent with Distributions.pdf +# any dimension +pdf(ik::InterpKDE,x::Real...) = ik.itp(x...) +pdf(ik::InterpKDE, V::AbstractVector) = ik.itp(V...) +pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = pdf.(ik,eachslice(M, dims=ntuple(i->i+1, N-1)) ) + # 1 dimension pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) Base.broadcasted(::typeof(pdf),k::UnivariateKDE,xs) = Base.broadcasted(InterpKDE(k).itp, xs) @@ -36,7 +40,3 @@ Base.broadcasted(::typeof(pdf),k::UnivariateKDE,xs) = Base.broadcasted(InterpKDE pdf(k::BivariateKDE,x,y) = pdf(InterpKDE(k),x,y) pdf(ik::InterpKDE,xs::AbstractVector,ys::AbstractVector) = ik.itp.(xs,ys') pdf(k::BivariateKDE, M) = pdf(InterpKDE(k),M) -pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = pdf.(ik,eachslice(M, dims=ntuple(i->i+1, N-1)) ) - -# any dimension -pdf(ik::InterpKDE, V::AbstractVector) = ik.itp(V...) From 9fd648a0462e69c99f6a6ba607651b457ec429b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Wed, 7 Feb 2024 19:33:13 +0100 Subject: [PATCH 08/31] Update Project.toml --- Project.toml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Project.toml b/Project.toml index 7a9ff6df..e0df42fe 100644 --- a/Project.toml +++ b/Project.toml @@ -4,15 +4,10 @@ authors = ["Simon Byrne and various contributors"] version = "0.6.8" [deps] -BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" -Loess = "4345ca2d-374a-55d4-8d30-97f9976e7612" -LsqFit = "2fda8390-95c7-5789-9bda-21331edee243" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" -Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] From b30a95025b9c1bc5f3536d54b77685b75f034a9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Wed, 7 Feb 2024 19:43:18 +0100 Subject: [PATCH 09/31] Update interp.jl --- test/interp.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/interp.jl b/test/interp.jl index d2f7c4a6..cbdeb7c5 100644 --- a/test/interp.jl +++ b/test/interp.jl @@ -13,7 +13,7 @@ k = kde((X,Y)) # Try to evaluate the KDE outside the interpolation domain # The KDE is allowed to be zero, but it should not be greater than the exact solution k = kde([0.0], bandwidth=1.0) -@test pdf(k, k.x) ≈ k.density +@test pdf.(k, k.x) ≈ k.density @test pdf(k, -10.0) ≤ pdf(Normal(), -10.0) @test pdf(k, +10.0) ≤ pdf(Normal(), +10.0) From 9974b8c935156c1d622284d1b8012f4bb3b7a035 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Wed, 7 Feb 2024 19:47:56 +0100 Subject: [PATCH 10/31] tests update --- test/interp.jl | 48 +++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/test/interp.jl b/test/interp.jl index cbdeb7c5..3b02de07 100644 --- a/test/interp.jl +++ b/test/interp.jl @@ -1,28 +1,30 @@ using Test using KernelDensity -X = randn(100) -Y = randn(100) - -k = kde(X) -@test pdf(k, k.x) ≈ k.density - -k = kde((X,Y)) -@test pdf(k, k.x, k.y) ≈ k.density - -# Try to evaluate the KDE outside the interpolation domain -# The KDE is allowed to be zero, but it should not be greater than the exact solution -k = kde([0.0], bandwidth=1.0) -@test pdf.(k, k.x) ≈ k.density -@test pdf(k, -10.0) ≤ pdf(Normal(), -10.0) -@test pdf(k, +10.0) ≤ pdf(Normal(), +10.0) - -k = kde(([0.0],[0.0]), bandwidth=(1.0, 1.0)) -@test pdf(k, k.x, k.y) ≈ k.density -@test pdf(k, -10.0, 0.0) ≤ pdf(MvNormal(2, 1.0), [-10.0, 0.0]) -@test pdf(k, +10.0, 0.0) ≤ pdf(MvNormal(2, 1.0), [+10.0, 0.0]) -@test pdf(k, 0.0, -10.0) ≤ pdf(MvNormal(2, 1.0), [0.0, -10.0]) -@test pdf(k, 0.0, +10.0) ≤ pdf(MvNormal(2, 1.0), [0.0, +10.0]) +@testset "interpolation computation" begin + X = randn(100) + Y = randn(100) + + k = kde(X) + @test pdf.(k, k.x) ≈ k.density + + k = kde((X,Y)) + @test pdf(k, k.x, k.y) ≈ k.density + + # Try to evaluate the KDE outside the interpolation domain + # The KDE is allowed to be zero, but it should not be greater than the exact solution + k = kde([0.0], bandwidth=1.0) + @test pdf.(k, k.x) ≈ k.density + @test pdf(k, -10.0) ≤ pdf(Normal(), -10.0) + @test pdf(k, +10.0) ≤ pdf(Normal(), +10.0) + + k = kde(([0.0],[0.0]), bandwidth=(1.0, 1.0)) + @test pdf(k, k.x, k.y) ≈ k.density + @test pdf(k, -10.0, 0.0) ≤ pdf(MvNormal(2, 1.0), [-10.0, 0.0]) + @test pdf(k, +10.0, 0.0) ≤ pdf(MvNormal(2, 1.0), [+10.0, 0.0]) + @test pdf(k, 0.0, -10.0) ≤ pdf(MvNormal(2, 1.0), [0.0, -10.0]) + @test pdf(k, 0.0, +10.0) ≤ pdf(MvNormal(2, 1.0), [0.0, +10.0]) +end @testset "pdf method interface" begin k = kde([-1., 1.]) @@ -38,4 +40,4 @@ k = kde(([0.0],[0.0]), bandwidth=(1.0, 1.0)) @test pdf(k2d, [0.5, 0.1]) ≈ pdf(k2d, [0.5; 0.1]) ≈ pdf(k2d, 0.5, 0.1) ≈ pdf(ik2d, 0.5, 0.1) @test pdf(k2d, [0.5 1.; 0.1 1.]) ≈ [pdf(ik2d, 0.5, 0.1), pdf(ik2d, 1., 1.)] @test pdf(k2d, [0.5; 1. ;;; 0.1; 1.]) ≈ [pdf(ik2d, 0.5, 1.) pdf(ik2d, 0.1, 1.)] -end \ No newline at end of file +end From eac3b90ca3840977407d3527d8d25980eb323074 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Wed, 7 Feb 2024 20:02:14 +0100 Subject: [PATCH 11/31] Update interp.jl --- src/interp.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interp.jl b/src/interp.jl index b92525b4..3f2e726a 100644 --- a/src/interp.jl +++ b/src/interp.jl @@ -34,7 +34,7 @@ pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = pdf.(ik,eachslice(M, d # 1 dimension pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) -Base.broadcasted(::typeof(pdf),k::UnivariateKDE,xs) = Base.broadcasted(InterpKDE(k).itp, xs) +Base.Broadcast.broadcasted(::typeof(pdf),k::UnivariateKDE,xs) = Base.Broadcast.broadcasted(InterpKDE(k).itp, xs) # 2 dimensions pdf(k::BivariateKDE,x,y) = pdf(InterpKDE(k),x,y) From d3ade85b48fa4ac7a7c959ea86a49535f13e0ea6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Wed, 7 Feb 2024 23:49:47 +0100 Subject: [PATCH 12/31] trying Compat for eachslice in Julia 1.0 --- Project.toml | 1 + src/KernelDensity.jl | 1 + src/interp.jl | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index e0df42fe..e9ed4a9d 100644 --- a/Project.toml +++ b/Project.toml @@ -11,6 +11,7 @@ Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] +Compat = "2.2.0" Distributions = "0.23, 0.24, 0.25" DocStringExtensions = "0.8, 0.9" FFTW = "1" diff --git a/src/KernelDensity.jl b/src/KernelDensity.jl index b6201c8a..61cde468 100644 --- a/src/KernelDensity.jl +++ b/src/KernelDensity.jl @@ -4,6 +4,7 @@ using DocStringExtensions: TYPEDEF, FIELDS using StatsBase using Distributions using Interpolations +using Compat import Distributions: twoπ, pdf import FFTW: rfft, irfft diff --git a/src/interp.jl b/src/interp.jl index 3f2e726a..a404ec7d 100644 --- a/src/interp.jl +++ b/src/interp.jl @@ -30,7 +30,7 @@ InterpKDE(kde::BivariateKDE) = InterpKDE(kde::BivariateKDE, BSpline(Quadratic(Li # any dimension pdf(ik::InterpKDE,x::Real...) = ik.itp(x...) pdf(ik::InterpKDE, V::AbstractVector) = ik.itp(V...) -pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = pdf.(ik,eachslice(M, dims=ntuple(i->i+1, N-1)) ) +@compat pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = pdf.(ik,eachslice(M, dims=ntuple(i->i+1, N-1)) ) # 1 dimension pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) From 8d44f99d30d570338b1bd9948a249f69a952076d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Thu, 8 Feb 2024 00:02:56 +0100 Subject: [PATCH 13/31] remove Compat --- Project.toml | 1 - src/KernelDensity.jl | 1 - src/interp.jl | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index e9ed4a9d..e0df42fe 100644 --- a/Project.toml +++ b/Project.toml @@ -11,7 +11,6 @@ Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] -Compat = "2.2.0" Distributions = "0.23, 0.24, 0.25" DocStringExtensions = "0.8, 0.9" FFTW = "1" diff --git a/src/KernelDensity.jl b/src/KernelDensity.jl index 61cde468..b6201c8a 100644 --- a/src/KernelDensity.jl +++ b/src/KernelDensity.jl @@ -4,7 +4,6 @@ using DocStringExtensions: TYPEDEF, FIELDS using StatsBase using Distributions using Interpolations -using Compat import Distributions: twoπ, pdf import FFTW: rfft, irfft diff --git a/src/interp.jl b/src/interp.jl index a404ec7d..3f2e726a 100644 --- a/src/interp.jl +++ b/src/interp.jl @@ -30,7 +30,7 @@ InterpKDE(kde::BivariateKDE) = InterpKDE(kde::BivariateKDE, BSpline(Quadratic(Li # any dimension pdf(ik::InterpKDE,x::Real...) = ik.itp(x...) pdf(ik::InterpKDE, V::AbstractVector) = ik.itp(V...) -@compat pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = pdf.(ik,eachslice(M, dims=ntuple(i->i+1, N-1)) ) +pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = pdf.(ik,eachslice(M, dims=ntuple(i->i+1, N-1)) ) # 1 dimension pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) From 84eaa9689b7272e5da9ccbab765dae88bd7bd3eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Mon, 11 Mar 2024 19:33:52 +0100 Subject: [PATCH 14/31] adding Compat --- Project.toml | 2 ++ src/interp.jl | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index e0df42fe..e3bd9594 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["Simon Byrne and various contributors"] version = "0.6.8" [deps] +Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" @@ -17,6 +18,7 @@ FFTW = "1" Interpolations = "0.9, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15" StatsBase = "0.33, 0.34" julia = "1" +Compat = "3.22, 4" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/src/interp.jl b/src/interp.jl index 3f2e726a..ea56df1b 100644 --- a/src/interp.jl +++ b/src/interp.jl @@ -1,5 +1,7 @@ +import Compat: @compat import Interpolations: interpolate, scale + mutable struct InterpKDE{K,I} <: AbstractKDE kde::K itp::I @@ -30,7 +32,7 @@ InterpKDE(kde::BivariateKDE) = InterpKDE(kde::BivariateKDE, BSpline(Quadratic(Li # any dimension pdf(ik::InterpKDE,x::Real...) = ik.itp(x...) pdf(ik::InterpKDE, V::AbstractVector) = ik.itp(V...) -pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = pdf.(ik,eachslice(M, dims=ntuple(i->i+1, N-1)) ) +pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = @compat pdf.(ik,eachslice(M, dims=ntuple(i->i+1, N-1)) ) # 1 dimension pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) From 60db7b0a93687f9a586d7086883383623b118601 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Mon, 11 Mar 2024 19:58:20 +0100 Subject: [PATCH 15/31] Compat ver --- Project.toml | 2 +- src/interp.jl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index e3bd9594..c43890d0 100644 --- a/Project.toml +++ b/Project.toml @@ -18,7 +18,7 @@ FFTW = "1" Interpolations = "0.9, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15" StatsBase = "0.33, 0.34" julia = "1" -Compat = "3.22, 4" +Compat = "2.2.0, 4" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/src/interp.jl b/src/interp.jl index ea56df1b..c4498242 100644 --- a/src/interp.jl +++ b/src/interp.jl @@ -1,4 +1,4 @@ -import Compat: @compat +import Compat: eachslice import Interpolations: interpolate, scale @@ -32,7 +32,7 @@ InterpKDE(kde::BivariateKDE) = InterpKDE(kde::BivariateKDE, BSpline(Quadratic(Li # any dimension pdf(ik::InterpKDE,x::Real...) = ik.itp(x...) pdf(ik::InterpKDE, V::AbstractVector) = ik.itp(V...) -pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = @compat pdf.(ik,eachslice(M, dims=ntuple(i->i+1, N-1)) ) +pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = pdf.(ik,eachslice(M, dims=ntuple(i->i+1, N-1)) ) # 1 dimension pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) From 8b0126c67426cf1ed8761e0edf8730032d717e00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Mon, 11 Mar 2024 23:19:52 +0100 Subject: [PATCH 16/31] rem Compat, eachslice to maplices + reshape --- Project.toml | 2 -- src/interp.jl | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index c43890d0..e0df42fe 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,6 @@ authors = ["Simon Byrne and various contributors"] version = "0.6.8" [deps] -Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" @@ -18,7 +17,6 @@ FFTW = "1" Interpolations = "0.9, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15" StatsBase = "0.33, 0.34" julia = "1" -Compat = "2.2.0, 4" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/src/interp.jl b/src/interp.jl index c4498242..748dbc90 100644 --- a/src/interp.jl +++ b/src/interp.jl @@ -1,4 +1,3 @@ -import Compat: eachslice import Interpolations: interpolate, scale @@ -32,7 +31,7 @@ InterpKDE(kde::BivariateKDE) = InterpKDE(kde::BivariateKDE, BSpline(Quadratic(Li # any dimension pdf(ik::InterpKDE,x::Real...) = ik.itp(x...) pdf(ik::InterpKDE, V::AbstractVector) = ik.itp(V...) -pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = pdf.(ik,eachslice(M, dims=ntuple(i->i+1, N-1)) ) +pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = reshape(mapslices(v->pdf(ik, v), M, dims = 1), size(M)[2:end]) # 1 dimension pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) From 8cd5a1c6c1ae1bae7620948319900d92a79f556a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Mon, 11 Mar 2024 23:36:05 +0100 Subject: [PATCH 17/31] alternative to ;;; --- test/interp.jl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/test/interp.jl b/test/interp.jl index 3b02de07..b2ea5411 100644 --- a/test/interp.jl +++ b/test/interp.jl @@ -39,5 +39,12 @@ end @test pdf(k2d, [0.5, 0.1]) ≈ pdf(k2d, [0.5; 0.1]) ≈ pdf(k2d, 0.5, 0.1) ≈ pdf(ik2d, 0.5, 0.1) @test pdf(k2d, [0.5 1.; 0.1 1.]) ≈ [pdf(ik2d, 0.5, 0.1), pdf(ik2d, 1., 1.)] - @test pdf(k2d, [0.5; 1. ;;; 0.1; 1.]) ≈ [pdf(ik2d, 0.5, 1.) pdf(ik2d, 0.1, 1.)] + @static if VERSION >= v"1.1" + @test pdf(k2d, [0.5; 1. ;;; 0.1; 1.]) ≈ [pdf(ik2d, 0.5, 1.) pdf(ik2d, 0.1, 1.)] + else + M = zeros(2, 1, 2) + M[:,1,1] .= [0.5, 1] + M[:,1,2] .= [0.1, 1] + @test pdf(k2d, M) ≈ [pdf(ik2d, 0.5, 1.) pdf(ik2d, 0.1, 1.)] + end end From 3e94a5f6d871c491c3ee3371afacab3c7fbf69c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Sat, 1 Nov 2025 20:37:27 +0100 Subject: [PATCH 18/31] init --- Project.toml | 4 +- src/KernelDensity.jl | 15 +++- src/bandwidth_selection.jl | 74 +++++++++++++++++++ src/initialisation.jl | 141 +++++++++++++++++++++++++++++++++++++ src/pull.md | 15 ++++ src/test.jl | 39 ++++++++++ 6 files changed, 285 insertions(+), 3 deletions(-) create mode 100644 src/bandwidth_selection.jl create mode 100644 src/initialisation.jl create mode 100644 src/pull.md create mode 100644 src/test.jl diff --git a/Project.toml b/Project.toml index e0df42fe..1db450df 100644 --- a/Project.toml +++ b/Project.toml @@ -1,13 +1,14 @@ name = "KernelDensity" uuid = "5ab0869b-81aa-558d-bb23-cbf5423bbe9b" -authors = ["Simon Byrne and various contributors"] version = "0.6.8" +authors = ["Simon Byrne and various contributors"] [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" +IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] @@ -15,6 +16,7 @@ Distributions = "0.23, 0.24, 0.25" DocStringExtensions = "0.8, 0.9" FFTW = "1" Interpolations = "0.9, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15" +IrrationalConstants = "0.2.6" StatsBase = "0.33, 0.34" julia = "1" diff --git a/src/KernelDensity.jl b/src/KernelDensity.jl index b6201c8a..fab82556 100644 --- a/src/KernelDensity.jl +++ b/src/KernelDensity.jl @@ -5,9 +5,17 @@ using StatsBase using Distributions using Interpolations -import Distributions: twoπ, pdf +import IrrationalConstants: twoπ +import Base: getproperty, propertynames + +import Distributions: pdf +import Distributions: ncomponents, component, probs + import FFTW: rfft, irfft +export DiscretisedPDF, KernelEstimate, BandwidthMethod, Silverman, LSCV +export kernel_estimate, precompute! + export kde, kde_lscv, UnivariateKDE, BivariateKDE, InterpKDE, pdf abstract type AbstractKDE end @@ -18,4 +26,7 @@ include("univariate.jl") include("bivariate.jl") include("interp.jl") -end # module +include("initialisation.jl") +include("bandwidth_selection.jl") + +end diff --git a/src/bandwidth_selection.jl b/src/bandwidth_selection.jl new file mode 100644 index 00000000..03eb731d --- /dev/null +++ b/src/bandwidth_selection.jl @@ -0,0 +1,74 @@ + + +# Silverman's rule of thumb for KDE bandwidth selection +function get_bandwidth(::Silverman, data::AbstractVector{<:Real}, kernelType = Normal, prior = DiscreteUniform(1,length(data)), alpha::Float64 = 0.9) + # Determine length of data + ndata = length(data) + ndata <= 1 && return alpha + + # Calculate width using variance and IQR + var_width = std(data) + q25, q75 = quantile(data, (0.25, 0.75)) + quantile_width = (q75 - q25) / 1.34 + + # Deal with edge cases with 0 IQR or variance + width = min(var_width, quantile_width) + if width == 0.0 + if var_width == 0.0 + width = 1.0 + else + width = var_width + end + end + + # Set bandwidth using Silverman's rule of thumb + return alpha * width * ndata^(-0.2), nothing +end + +@kwdef struct LSCV <: BandwidthMethod + nPoints::Int = 2048 + initBandwidth::Float64 = NaN +end + +# Select bandwidth using least-squares cross validation, from: +# Density Estimation for Statistics and Data Analysis +# B. W. Silverman (1986) +# sections 3.4.3 (pp. 48-52) and 3.5 (pp. 61-66) +function get_bandwidth(lscv::LSCV, data::AbstractVector{<:Real}, kernelType = Normal, prior = DiscreteUniform(1,length(data))) + K = lscv.nPoints + initBandwidth::Float64 = isnan(lscv.initBandwidth) ? get_bandwidth(Silverman(), data)[1] : lscv.initBandwidth + ndata = length(data) + lo, hi = extrema(data) + midpoints = range(lo - 4.0*initBandwidth, hi + 4.0*initBandwidth, K) + initDen = tabulate(data, midpoints, prior).values + + # the ft here is K/ba*sqrt(2pi) * u(s), it is K times the Yl in Silverman's book + ft = rfft(initDen) + + ft2 = abs2.(ft) + c = -twoπ/(step(midpoints)*K) + hlb, hub = 0.25*initBandwidth, 1.5*initBandwidth + + optimalBandwidth = optimize(hlb, hub) do h + dist = kernel_dist(kernelType, h) + ψ = 0.0 + for j in 1:length(ft2)-1 + ks = real(cf(dist, j*c)) + ψ += ft2[j+1]*(ks-2.0)*ks + end + ψ*step(midpoints)/K + pdf(dist,0.0)/ndata + end + + dist = kernel_dist(kernelType, optimalBandwidth) + for j in 0:length(ft)-1 + ft[j+1] *= cf(dist, j*c) + end + + convolved = irfft(ft, K) + + # fix rounding error. + convolved .= max.(0., convolved) + + # Invert the Fourier transform to get the KDE + optimalBandwidth, DiscretisedPDF(convolved, midpoints) +end \ No newline at end of file diff --git a/src/initialisation.jl b/src/initialisation.jl new file mode 100644 index 00000000..309d5c6a --- /dev/null +++ b/src/initialisation.jl @@ -0,0 +1,141 @@ + + +struct DiscretisedPDF{N,R,T} + values::Array{T,N} + labels::NTuple{N,R} + DiscretisedPDF(values::Array{T,N}, labels::NTuple{N,R}) where {N,T<:Real,R<:AbstractRange} = new{N,R,T}(values, labels) +end + +DiscretisedPDF(values::Vector, label::AbstractRange) = DiscretisedPDF(values, (label,)) + +# provides d.xs, d.ys, d.zs for convenience +function Base.getproperty(d::DiscretisedPDF, prop::Symbol) + if prop == :xs + return d.labels[1] + elseif prop == :ys + return d.labels[2] + elseif prop == :zs + return d.labels[3] + else getfield(d, prop) + end +end +Base.propertynames(::DiscretisedPDF) = (:values, :labels, :xs, :ys, :zs) + + +mutable struct KernelEstimate{VF<:VariateForm, VS<:ValueSupport, KernelType<:Distribution, PriorDist<:UnivariateDistribution{Discrete}, PDF<:DiscretisedPDF} <: AbstractMixtureModel{VF, VS, KernelType} + const data::Matrix{Float64} + const prior::PriorDist + const kernel::KernelType + const bandwidth::Float64 + precomputedPDF::Union{Nothing, PDF} + + # these guarantee type agreement and nothing more + function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, precomputedPDF::PDF) where { + PriorDist<:UnivariateDistribution{Discrete}, + KernelType <: Distribution, + PDF <: DiscretisedPDF} + VF, VS = supertype(KernelType).parameters + new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, precomputedPDF) + end + function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, ::Nothing) where { + PriorDist<:UnivariateDistribution{Discrete}, + KernelType <: Distribution} + VF, VS = supertype(KernelType).parameters + R = Base.return_types(range,(Float64,Float64,Int))[1] + PDF = DiscretisedPDF{size(data)[1],R,eltype(data)} + new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, nothing) + end + +end + +UnivariateKernelEstimate{VS, K, P, PDF} = KernelEstimate{Univariate, VS, K, P, PDF} +MultivariateKernelEstimate{VS, K, P, PDF} = KernelEstimate{Multivariate, VS, K, P, PDF} + +abstract type BandwidthMethod end + +# default algorithm +struct Silverman<:BandwidthMethod end + +# implementing common interface of AbstractMixtureModel +ncomponents(ke::KernelEstimate) = size(ke.data)[2] +component(ke::UnivariateKernelEstimate, k) = ke.kernel - ke.data[1,k] +component(ke::MultivariateKernelEstimate, k) = ke.kernel - ke.data[:,k] +probs(ke::KernelEstimate) = probs(ke.prior) + + +# creating KernelEstimate instance + +# make kernel density given bandwidth +function kernel_estimate(data::Vector{<:Real}, bandwidth::Real, kernelType = Normal, prior::UnivariateDistribution{Discrete} = DiscreteUniform(1,length(data))) + kernel = kernel_dist(kernelType, bandwidth) + KernelEstimate(reshape(data, 1, length(data)), prior, kernel, Float64(bandwidth), nothing) +end + +# find bandwidth, then make kernel density +function kernel_estimate(data::Vector{<:Real}, method::BandwidthMethod = Silverman(), kernelType = Normal, prior::UnivariateDistribution{Discrete} = DiscreteUniform(1,length(data))) + bandwidth, kde = get_bandwidth(method, data, kernelType, prior) + kernel = kernel_dist(kernelType, bandwidth) + KernelEstimate(reshape(data,1,length(data)), prior, kernel, bandwidth, kde) +end + + +# construct kernel from bandwidth +kernel_dist(::Type{Normal}, bandwidth::Real) = Normal(0.0, bandwidth) +kernel_dist(::Type{Uniform}, bandwidth::Real) = Uniform(-√3*bandwidth, √3*bandwidth) + +const LocationScale = Union{Laplace, Logistic, SymTriangularDist, Cosine, Epanechnikov} +kernel_dist(::Type{D},w::Real) where {D<:LocationScale} = D(0.0, w/std(D(0.0,1.0))) + +# 1D precomputation + +function precompute!(ke::UnivariateKernelEstimate, nPoints::Int = 2048) + lo, hi = extrema(ke.data) + midpoints = range(lo - 4.0*ke.bandwidth, hi + 4.0*ke.bandwidth, nPoints) + ke.precomputedPDF = conv(tabulate(vec(ke.data), midpoints, ke.prior), ke.kernel) +end + +function tabulate(data::AbstractVector{<:Real}, midpoints::AbstractRange, prior::UnivariateDistribution{Discrete}) + npoints = length(midpoints) + s = step(midpoints) + + # Set up a grid for discretized data + grid = zeros(Float64, npoints) + ainc = 1.0 / (s*s) + + # weighted discretization (cf. Jones and Lotwick) + for (i,x) in enumerate(data) + k = searchsortedfirst(midpoints,x) + j = k-1 + if 1 <= j <= npoints-1 + grid[j] += (midpoints[k]-x)*ainc*pdf(prior,i) + grid[k] += (x-midpoints[j])*ainc*pdf(prior,i) + end + end + + return DiscretisedPDF(grid, midpoints) +end + +function conv(den::DiscretisedPDF{1, R, T}, kernel::UnivariateDistribution) where {T<:Real,R<:AbstractRange} + # Transform to Fourier basis + K = length(den.values) + ft = rfft(den.values) + + # Convolve fft with characteristic function of kernel + # empirical cf + # = \sum_{n=1}^N e^{i*t*X_n} / N + # = \sum_{k=0}^K e^{i*t*(a+k*s)} N_k / N + # = e^{i*t*a} \sum_{k=0}^K e^{-2pi*i*k*(-t*s*K/2pi)/K} N_k / N + # = A * fft(N_k/N)[-t*s*K/2pi + 1] + c = -twoπ/(step(den.xs)*K) + for j in 0:length(ft)-1 + ft[j+1] *= cf(kernel,j*c) + end + + # Invert the Fourier transform to get the KDE + convolved = irfft(ft, K) + + # fix rounding error. + convolved .= max.(0., convolved) + + DiscretisedPDF(convolved, den.xs) +end \ No newline at end of file diff --git a/src/pull.md b/src/pull.md new file mode 100644 index 00000000..de7cb52f --- /dev/null +++ b/src/pull.md @@ -0,0 +1,15 @@ +The current capabilities of `KernelDensity` are quite limited, especially if we compare them to what is avaiable e.g. in SciPy, and this supposed to be is the main source of kernel estimation tools in Julia statistical ecosystem. The current interface and type system were made for calculating discretised pdf using Fourier transform and not for much else. This is a proposition of completely new type system and interface. + +The main principles of this proposition are: +- The new `KernelEstimate` datatype contains full information about the kernel fit, so a statistician can infer all they possibly want from it. +- `KernelEstimate` is a subtype of `MixtureDistribution` so it fits well the rest of `JuliaStats`, can be much more easily used in other tasks and has a lot of useful default methods already implemented in `Distributions`. +- This type system can be used for kernel estimation in any dimension. +- The discretised values of the pdf are distinct from `KernelEstimate`. However, they can be stored in it and if so, getting pdf from interpolation of it becomes avaiable. If it is not present, different methods for getting pdf can be used (e.g. from the naive sum formula, which for small samples is completely reasonable). It can be also added using `precompute!`. + +The current approach does not allow for adaptive kde. You can allow for it with the same design, but for the sake of efficiency and code simplicity it might be better to add something like `AdaptiveKernelEstimate` if such a need arises. + +I've curretly implemented methods only for 1D. Translating 2D case should be easy, but it might be best to forget about it and implement any dimension from the get go. But this is better to do that after getting some feedback. + +Old interface may, or may not stay for backwards compability. + +As it stands this proposition does not add any new numerical methods, but even simply linking it to `Distributions` adds a lot of capabilities to this library, which for example Scipy or Sklearn do not have, making it I hope quite more attractive. \ No newline at end of file diff --git a/src/test.jl b/src/test.jl new file mode 100644 index 00000000..7515ea57 --- /dev/null +++ b/src/test.jl @@ -0,0 +1,39 @@ +# use scale instead of std +# LocationScale ? +# you can use support to detect +# argument for boundary? +# give any kernel? +# fix cosine distribution + +# add precompute! + +using Distributions +using Plots +using .KernelDensity + +# + +points = [1. 2 3 4 5] +cat = Categorical([1/5,1/5,1/5,1/5,1/5]) + +ds = DiscretisedPDF(fill(1/20,20),LinRange(0,1,20)) + +k = KernelEstimate(points,cat, Normal(0,1),1.0, nothing) + +X = randn(10^5) + +k1 = kernel_estimate(X,0.2,Normal) + +k2 = kernel_estimate(X,Silverman(),Epanechnikov) + +precompute!(k2) + + +kde = k1.precomputedPDF +plot(kde.xs,kde.values) + +KernelEstimate(reshape(X,1,length(X)), cat, kernel_dist(Normal,1.0), 1.0) + +ncompoments(k) +component(k,3) +probs(k) \ No newline at end of file From 53c04b3c63672a63a3559e01899131bcbabd0b04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Mon, 3 Nov 2025 00:43:22 +0100 Subject: [PATCH 19/31] feature_computation --- src/KernelDensity.jl | 1 + src/bandwidth_selection.jl | 15 ++++-- src/{test.jl => exampleUse.jl} | 34 ++++++++---- src/feature_computation.jl | 97 ++++++++++++++++++++++++++++++++++ src/initialisation.jl | 66 ++++------------------- src/pull.md | 15 ------ 6 files changed, 143 insertions(+), 85 deletions(-) rename src/{test.jl => exampleUse.jl} (53%) create mode 100644 src/feature_computation.jl delete mode 100644 src/pull.md diff --git a/src/KernelDensity.jl b/src/KernelDensity.jl index fab82556..6b5472ea 100644 --- a/src/KernelDensity.jl +++ b/src/KernelDensity.jl @@ -28,5 +28,6 @@ include("interp.jl") include("initialisation.jl") include("bandwidth_selection.jl") +include("feature_computation.jl") end diff --git a/src/bandwidth_selection.jl b/src/bandwidth_selection.jl index 03eb731d..3c3c5aa2 100644 --- a/src/bandwidth_selection.jl +++ b/src/bandwidth_selection.jl @@ -28,6 +28,7 @@ end @kwdef struct LSCV <: BandwidthMethod nPoints::Int = 2048 initBandwidth::Float64 = NaN + boundary::Tuple{Float64,Float64} = (NaN,NaN) end # Select bandwidth using least-squares cross validation, from: @@ -36,10 +37,18 @@ end # sections 3.4.3 (pp. 48-52) and 3.5 (pp. 61-66) function get_bandwidth(lscv::LSCV, data::AbstractVector{<:Real}, kernelType = Normal, prior = DiscreteUniform(1,length(data))) K = lscv.nPoints - initBandwidth::Float64 = isnan(lscv.initBandwidth) ? get_bandwidth(Silverman(), data)[1] : lscv.initBandwidth + initBandwidth = isnan(lscv.initBandwidth) ? get_bandwidth(Silverman(), data)[1] : lscv.initBandwidth + boundaryLow, boundaryHigh = lscv.boundary + if isnan(boundaryLow) || isnan(boundaryHigh) + lo, hi = extrema(data) + boundaryLow = isnan(boundaryLow) ? lo - 4.0*initBandwidth : boundaryLow + boundaryHigh = isnan(boundaryHigh) ? hi + 4.0*initBandwidth : boundaryHigh + end + ndata = length(data) - lo, hi = extrema(data) - midpoints = range(lo - 4.0*initBandwidth, hi + 4.0*initBandwidth, K) + + midpoints = range(boundaryLow, boundaryHigh, K) + initDen = tabulate(data, midpoints, prior).values # the ft here is K/ba*sqrt(2pi) * u(s), it is K times the Yl in Silverman's book diff --git a/src/test.jl b/src/exampleUse.jl similarity index 53% rename from src/test.jl rename to src/exampleUse.jl index 7515ea57..ff35d8cb 100644 --- a/src/test.jl +++ b/src/exampleUse.jl @@ -5,13 +5,12 @@ # give any kernel? # fix cosine distribution -# add precompute! +using Plots using Distributions -using Plots using .KernelDensity -# +## points = [1. 2 3 4 5] cat = Categorical([1/5,1/5,1/5,1/5,1/5]) @@ -19,21 +18,34 @@ cat = Categorical([1/5,1/5,1/5,1/5,1/5]) ds = DiscretisedPDF(fill(1/20,20),LinRange(0,1,20)) k = KernelEstimate(points,cat, Normal(0,1),1.0, nothing) +ncomponents(k1) +component(k1,3) +probs(k1) + +## X = randn(10^5) -k1 = kernel_estimate(X,0.2,Normal) +k1 = kernel_estimate(X, 0.2, Normal) + +k2 = kernel_estimate(X, Silverman(), Epanechnikov) + +k3 = kernel_estimate(X, LSCV(), Epanechnikov) + + +precompute!(k2,2048,(-5,5)) -k2 = kernel_estimate(X,Silverman(),Epanechnikov) +pdf(k2, 1) +pdf.(k2, [1, 2, 3]) +pdf(k2, 1, :precomputed) +pdf.(k2, [1, 2, 3], :precomputed) -precompute!(k2) +cdf(k2, 1) +mean(k2), var(k2) +quantile(k2, 0.9) -kde = k1.precomputedPDF +kde = k2.precomputedPDF plot(kde.xs,kde.values) -KernelEstimate(reshape(X,1,length(X)), cat, kernel_dist(Normal,1.0), 1.0) -ncompoments(k) -component(k,3) -probs(k) \ No newline at end of file diff --git a/src/feature_computation.jl b/src/feature_computation.jl new file mode 100644 index 00000000..bdaccbc8 --- /dev/null +++ b/src/feature_computation.jl @@ -0,0 +1,97 @@ + +# 1D pdf precomputation + +function precompute!(ke::UnivariateKernelEstimate, nPoints::Integer = 2048, + boundary::Tuple{<:Real,<:Real} =((lo, hi) = extrema(ke.data); (lo - 4.0*ke.bandwidth, hi + 4.0*ke.bandwidth))) + + # find the element type of range in ke.precomputedPDF + T = eltype(typeof(ke).parameters[5].parameters[2]) + midpoints = range(T(boundary[1]), T(boundary[2]), Int(nPoints)) + ke.precomputedPDF = conv(tabulate(vec(ke.data), midpoints, ke.prior), ke.kernel) +end + +function tabulate(data::AbstractVector{<:Real}, midpoints::AbstractRange, prior::UnivariateDistribution{Discrete}) + npoints = length(midpoints) + s = step(midpoints) + + # Set up a grid for discretized data + grid = zeros(Float64, npoints) + ainc = 1.0 / (s*s) + + # weighted discretization (cf. Jones and Lotwick) + for (i,x) in enumerate(data) + k = searchsortedfirst(midpoints,x) + j = k-1 + if 1 <= j <= npoints-1 + grid[j] += (midpoints[k]-x)*ainc*pdf(prior,i) + grid[k] += (x-midpoints[j])*ainc*pdf(prior,i) + end + end + + return DiscretisedPDF(grid, midpoints) +end + +function conv(den::DiscretisedPDF{1, R, T}, kernel::UnivariateDistribution) where {T<:Real,R<:AbstractRange} + # Transform to Fourier basis + K = length(den.values) + ft = rfft(den.values) + + # Convolve fft with characteristic function of kernel + # empirical cf + # = \sum_{n=1}^N e^{i*t*X_n} / N + # = \sum_{k=0}^K e^{i*t*(a+k*s)} N_k / N + # = e^{i*t*a} \sum_{k=0}^K e^{-2pi*i*k*(-t*s*K/2pi)/K} N_k / N + # = A * fft(N_k/N)[-t*s*K/2pi + 1] + c = -twoπ/(step(den.xs)*K) + for j in 0:length(ft)-1 + ft[j+1] *= cf(kernel,j*c) + end + + # Invert the Fourier transform to get the KDE + convolved = irfft(ft, K) + + # fix rounding error. + convolved .= max.(0., convolved) + + DiscretisedPDF(convolved, den.xs) +end + +# pdf methods + +# pdf(ke, x) is implemented in Distributions.jl + +function pdf(ke::UnivariateKernelEstimate, x::Real, method::Symbol) + if method == :precomputed + den = ke.precomputedPDF + den === nothing || error("PDF must be first precomputed.") + itp_u = interpolate(den.values, BSpline(Quadratic(Line(OnGrid())))) + itp = scale(itp_u, den.xs) + etp = extrapolate(itp, 0.) + return etp.itp(x) + elseif method == :naive + return pdf(ke, x) + else + error("Method not available.") + end +end + +# custom broadcast prepares for interpolation only once for all xs +function Base.Broadcast.broadcasted(::typeof(pdf), ke::UnivariateKernelEstimate, xs, method::Symbol) + if method == :precomputed + den = ke.precomputedPDF + den === nothing || error("PDF must be first precomputed.") + itp_u = interpolate(den.values, BSpline(Quadratic(Line(OnGrid())))) + itp = scale(itp_u, den.xs) + etp = extrapolate(itp, 0.) + return etp.itp.(xs) + elseif method == :naive + return pdf.(ke, x) + else + error("Method not available.") + end +end + + +# it is possible to add cdf(ke, :precomputed) + +# it is possibble to add cf(ke, t) \ No newline at end of file diff --git a/src/initialisation.jl b/src/initialisation.jl index 309d5c6a..1f9a4cda 100644 --- a/src/initialisation.jl +++ b/src/initialisation.jl @@ -29,7 +29,7 @@ mutable struct KernelEstimate{VF<:VariateForm, VS<:ValueSupport, KernelType<:Dis const bandwidth::Float64 precomputedPDF::Union{Nothing, PDF} - # these guarantee type agreement and nothing more + # these constructors guarantee type agreement function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, precomputedPDF::PDF) where { PriorDist<:UnivariateDistribution{Discrete}, KernelType <: Distribution, @@ -41,8 +41,10 @@ mutable struct KernelEstimate{VF<:VariateForm, VS<:ValueSupport, KernelType<:Dis PriorDist<:UnivariateDistribution{Discrete}, KernelType <: Distribution} VF, VS = supertype(KernelType).parameters + + # the default PDF type is based on Float64 and Int R = Base.return_types(range,(Float64,Float64,Int))[1] - PDF = DiscretisedPDF{size(data)[1],R,eltype(data)} + PDF = DiscretisedPDF{size(data)[1],R,Float64} new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, nothing) end @@ -51,6 +53,11 @@ end UnivariateKernelEstimate{VS, K, P, PDF} = KernelEstimate{Univariate, VS, K, P, PDF} MultivariateKernelEstimate{VS, K, P, PDF} = KernelEstimate{Multivariate, VS, K, P, PDF} +# KernelEstimate is a scalar +Base.broadcastable(ke::KernelEstimate) = Ref(ke) + +# It is possible to add linear transformations a*ke + b + abstract type BandwidthMethod end # default algorithm @@ -78,6 +85,7 @@ function kernel_estimate(data::Vector{<:Real}, method::BandwidthMethod = Silverm KernelEstimate(reshape(data,1,length(data)), prior, kernel, bandwidth, kde) end +# Can add kernel_estimate which takes prior as a vector. # construct kernel from bandwidth kernel_dist(::Type{Normal}, bandwidth::Real) = Normal(0.0, bandwidth) @@ -85,57 +93,3 @@ kernel_dist(::Type{Uniform}, bandwidth::Real) = Uniform(-√3*bandwidth, √3*ba const LocationScale = Union{Laplace, Logistic, SymTriangularDist, Cosine, Epanechnikov} kernel_dist(::Type{D},w::Real) where {D<:LocationScale} = D(0.0, w/std(D(0.0,1.0))) - -# 1D precomputation - -function precompute!(ke::UnivariateKernelEstimate, nPoints::Int = 2048) - lo, hi = extrema(ke.data) - midpoints = range(lo - 4.0*ke.bandwidth, hi + 4.0*ke.bandwidth, nPoints) - ke.precomputedPDF = conv(tabulate(vec(ke.data), midpoints, ke.prior), ke.kernel) -end - -function tabulate(data::AbstractVector{<:Real}, midpoints::AbstractRange, prior::UnivariateDistribution{Discrete}) - npoints = length(midpoints) - s = step(midpoints) - - # Set up a grid for discretized data - grid = zeros(Float64, npoints) - ainc = 1.0 / (s*s) - - # weighted discretization (cf. Jones and Lotwick) - for (i,x) in enumerate(data) - k = searchsortedfirst(midpoints,x) - j = k-1 - if 1 <= j <= npoints-1 - grid[j] += (midpoints[k]-x)*ainc*pdf(prior,i) - grid[k] += (x-midpoints[j])*ainc*pdf(prior,i) - end - end - - return DiscretisedPDF(grid, midpoints) -end - -function conv(den::DiscretisedPDF{1, R, T}, kernel::UnivariateDistribution) where {T<:Real,R<:AbstractRange} - # Transform to Fourier basis - K = length(den.values) - ft = rfft(den.values) - - # Convolve fft with characteristic function of kernel - # empirical cf - # = \sum_{n=1}^N e^{i*t*X_n} / N - # = \sum_{k=0}^K e^{i*t*(a+k*s)} N_k / N - # = e^{i*t*a} \sum_{k=0}^K e^{-2pi*i*k*(-t*s*K/2pi)/K} N_k / N - # = A * fft(N_k/N)[-t*s*K/2pi + 1] - c = -twoπ/(step(den.xs)*K) - for j in 0:length(ft)-1 - ft[j+1] *= cf(kernel,j*c) - end - - # Invert the Fourier transform to get the KDE - convolved = irfft(ft, K) - - # fix rounding error. - convolved .= max.(0., convolved) - - DiscretisedPDF(convolved, den.xs) -end \ No newline at end of file diff --git a/src/pull.md b/src/pull.md deleted file mode 100644 index de7cb52f..00000000 --- a/src/pull.md +++ /dev/null @@ -1,15 +0,0 @@ -The current capabilities of `KernelDensity` are quite limited, especially if we compare them to what is avaiable e.g. in SciPy, and this supposed to be is the main source of kernel estimation tools in Julia statistical ecosystem. The current interface and type system were made for calculating discretised pdf using Fourier transform and not for much else. This is a proposition of completely new type system and interface. - -The main principles of this proposition are: -- The new `KernelEstimate` datatype contains full information about the kernel fit, so a statistician can infer all they possibly want from it. -- `KernelEstimate` is a subtype of `MixtureDistribution` so it fits well the rest of `JuliaStats`, can be much more easily used in other tasks and has a lot of useful default methods already implemented in `Distributions`. -- This type system can be used for kernel estimation in any dimension. -- The discretised values of the pdf are distinct from `KernelEstimate`. However, they can be stored in it and if so, getting pdf from interpolation of it becomes avaiable. If it is not present, different methods for getting pdf can be used (e.g. from the naive sum formula, which for small samples is completely reasonable). It can be also added using `precompute!`. - -The current approach does not allow for adaptive kde. You can allow for it with the same design, but for the sake of efficiency and code simplicity it might be better to add something like `AdaptiveKernelEstimate` if such a need arises. - -I've curretly implemented methods only for 1D. Translating 2D case should be easy, but it might be best to forget about it and implement any dimension from the get go. But this is better to do that after getting some feedback. - -Old interface may, or may not stay for backwards compability. - -As it stands this proposition does not add any new numerical methods, but even simply linking it to `Distributions` adds a lot of capabilities to this library, which for example Scipy or Sklearn do not have, making it I hope quite more attractive. \ No newline at end of file From fcbeb72d82e86c29d9ac5becbab0066db23a1943 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Mon, 3 Nov 2025 00:56:22 +0100 Subject: [PATCH 20/31] gitignore --- .gitignore | 1 + src/exampleUse.jl | 7 ------- src/feature_computation.jl | 4 ++-- src/initialisation.jl | 4 ++-- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index d66bf271..8b74cf32 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ Manifest.toml .vscode/settings.json tmp.jl Project.toml +interp.jl \ No newline at end of file diff --git a/src/exampleUse.jl b/src/exampleUse.jl index ff35d8cb..97ee5cce 100644 --- a/src/exampleUse.jl +++ b/src/exampleUse.jl @@ -1,9 +1,3 @@ -# use scale instead of std -# LocationScale ? -# you can use support to detect -# argument for boundary? -# give any kernel? -# fix cosine distribution using Plots @@ -32,7 +26,6 @@ k2 = kernel_estimate(X, Silverman(), Epanechnikov) k3 = kernel_estimate(X, LSCV(), Epanechnikov) - precompute!(k2,2048,(-5,5)) pdf(k2, 1) diff --git a/src/feature_computation.jl b/src/feature_computation.jl index bdaccbc8..8a7fb804 100644 --- a/src/feature_computation.jl +++ b/src/feature_computation.jl @@ -63,7 +63,7 @@ end function pdf(ke::UnivariateKernelEstimate, x::Real, method::Symbol) if method == :precomputed den = ke.precomputedPDF - den === nothing || error("PDF must be first precomputed.") + den === nothing && error("PDF must be first precomputed.") itp_u = interpolate(den.values, BSpline(Quadratic(Line(OnGrid())))) itp = scale(itp_u, den.xs) etp = extrapolate(itp, 0.) @@ -79,7 +79,7 @@ end function Base.Broadcast.broadcasted(::typeof(pdf), ke::UnivariateKernelEstimate, xs, method::Symbol) if method == :precomputed den = ke.precomputedPDF - den === nothing || error("PDF must be first precomputed.") + den === nothing && error("PDF must be first precomputed.") itp_u = interpolate(den.values, BSpline(Quadratic(Line(OnGrid())))) itp = scale(itp_u, den.xs) etp = extrapolate(itp, 0.) diff --git a/src/initialisation.jl b/src/initialisation.jl index 1f9a4cda..58c9f55f 100644 --- a/src/initialisation.jl +++ b/src/initialisation.jl @@ -65,8 +65,8 @@ struct Silverman<:BandwidthMethod end # implementing common interface of AbstractMixtureModel ncomponents(ke::KernelEstimate) = size(ke.data)[2] -component(ke::UnivariateKernelEstimate, k) = ke.kernel - ke.data[1,k] -component(ke::MultivariateKernelEstimate, k) = ke.kernel - ke.data[:,k] +component(ke::UnivariateKernelEstimate, k) = ke.kernel + ke.data[1,k] +component(ke::MultivariateKernelEstimate, k) = ke.kernel + ke.data[:,k] probs(ke::KernelEstimate) = probs(ke.prior) From d04a4a2362b6f694e42808cc94315049a293c358 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Mon, 3 Nov 2025 01:08:37 +0100 Subject: [PATCH 21/31] Update exampleUse.jl --- src/exampleUse.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/exampleUse.jl b/src/exampleUse.jl index 97ee5cce..bc15dee3 100644 --- a/src/exampleUse.jl +++ b/src/exampleUse.jl @@ -18,7 +18,7 @@ probs(k1) ## -X = randn(10^5) +X = randn(10^3) k1 = kernel_estimate(X, 0.2, Normal) @@ -39,6 +39,9 @@ quantile(k2, 0.9) kde = k2.precomputedPDF -plot(kde.xs,kde.values) +plot(kde.xs,kde.values,label = "precomputed") + +xs = LinRange(-4,4,100) +plot!(xs,pdf.(k2, xs),label = "naive") From b8e978349a4b71f46f7e41ec46e29961f0496208 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Mon, 3 Nov 2025 01:13:36 +0100 Subject: [PATCH 22/31] rem interp --- src/interp.jl | 43 ------------------------------------------- test/interp.jl | 50 -------------------------------------------------- 2 files changed, 93 deletions(-) delete mode 100644 src/interp.jl delete mode 100644 test/interp.jl diff --git a/src/interp.jl b/src/interp.jl deleted file mode 100644 index 748dbc90..00000000 --- a/src/interp.jl +++ /dev/null @@ -1,43 +0,0 @@ -import Interpolations: interpolate, scale - - -mutable struct InterpKDE{K,I} <: AbstractKDE - kde::K - itp::I - InterpKDE{K,I}(kde::K, itp::I) where {K,I} = new{K,I}(kde, itp) -end - -function InterpKDE(kde::UnivariateKDE, opts...) - itp_u = interpolate(kde.density, opts...) - itp = scale(itp_u, kde.x) - etp = extrapolate(itp, zero(eltype(kde.density))) - InterpKDE{typeof(kde),typeof(etp)}(kde, etp) -end -InterpKDE(kde::UnivariateKDE) = InterpKDE(kde, BSpline(Quadratic(Line(OnGrid())))) - - -function InterpKDE(kde::BivariateKDE, opts...) - itp_u = interpolate(kde.density,opts...) - itp = scale(itp_u, kde.x, kde.y) - etp = extrapolate(itp, zero(eltype(kde.density))) - InterpKDE{typeof(kde),typeof(etp)}(kde, etp) -end -InterpKDE(kde::BivariateKDE) = InterpKDE(kde::BivariateKDE, BSpline(Quadratic(Line(OnGrid())))) - - -# pdf interface implementation -# it should be consistent with Distributions.pdf - -# any dimension -pdf(ik::InterpKDE,x::Real...) = ik.itp(x...) -pdf(ik::InterpKDE, V::AbstractVector) = ik.itp(V...) -pdf(ik::InterpKDE, M::AbstractArray{<:Real, N}) where N = reshape(mapslices(v->pdf(ik, v), M, dims = 1), size(M)[2:end]) - -# 1 dimension -pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) -Base.Broadcast.broadcasted(::typeof(pdf),k::UnivariateKDE,xs) = Base.Broadcast.broadcasted(InterpKDE(k).itp, xs) - -# 2 dimensions -pdf(k::BivariateKDE,x,y) = pdf(InterpKDE(k),x,y) -pdf(ik::InterpKDE,xs::AbstractVector,ys::AbstractVector) = ik.itp.(xs,ys') -pdf(k::BivariateKDE, M) = pdf(InterpKDE(k),M) diff --git a/test/interp.jl b/test/interp.jl deleted file mode 100644 index b2ea5411..00000000 --- a/test/interp.jl +++ /dev/null @@ -1,50 +0,0 @@ -using Test -using KernelDensity - -@testset "interpolation computation" begin - X = randn(100) - Y = randn(100) - - k = kde(X) - @test pdf.(k, k.x) ≈ k.density - - k = kde((X,Y)) - @test pdf(k, k.x, k.y) ≈ k.density - - # Try to evaluate the KDE outside the interpolation domain - # The KDE is allowed to be zero, but it should not be greater than the exact solution - k = kde([0.0], bandwidth=1.0) - @test pdf.(k, k.x) ≈ k.density - @test pdf(k, -10.0) ≤ pdf(Normal(), -10.0) - @test pdf(k, +10.0) ≤ pdf(Normal(), +10.0) - - k = kde(([0.0],[0.0]), bandwidth=(1.0, 1.0)) - @test pdf(k, k.x, k.y) ≈ k.density - @test pdf(k, -10.0, 0.0) ≤ pdf(MvNormal(2, 1.0), [-10.0, 0.0]) - @test pdf(k, +10.0, 0.0) ≤ pdf(MvNormal(2, 1.0), [+10.0, 0.0]) - @test pdf(k, 0.0, -10.0) ≤ pdf(MvNormal(2, 1.0), [0.0, -10.0]) - @test pdf(k, 0.0, +10.0) ≤ pdf(MvNormal(2, 1.0), [0.0, +10.0]) -end - -@testset "pdf method interface" begin - k = kde([-1., 1.]) - ik = InterpKDE(k) - - @test pdf.(k, [0., 1.]) ≈ [pdf(k, 0.), pdf(k, 1.)] ≈ pdf.(k,[0., 1.]) ≈ [pdf(ik, 0.), pdf(ik, 1.)] - @test all( pdf.(k, (0., 1.)) .≈ (pdf(k, 0.), pdf(k, 1.)) ) - @test pdf.(k, [0. 1.; 2. -1.]) ≈ [pdf(k, 0.) pdf(k, 1.); pdf(k, 2.) pdf(k, -1.)] - - k2d = kde(([-1., 1.], [0., 1.])) - ik2d = InterpKDE(k2d) - - @test pdf(k2d, [0.5, 0.1]) ≈ pdf(k2d, [0.5; 0.1]) ≈ pdf(k2d, 0.5, 0.1) ≈ pdf(ik2d, 0.5, 0.1) - @test pdf(k2d, [0.5 1.; 0.1 1.]) ≈ [pdf(ik2d, 0.5, 0.1), pdf(ik2d, 1., 1.)] - @static if VERSION >= v"1.1" - @test pdf(k2d, [0.5; 1. ;;; 0.1; 1.]) ≈ [pdf(ik2d, 0.5, 1.) pdf(ik2d, 0.1, 1.)] - else - M = zeros(2, 1, 2) - M[:,1,1] .= [0.5, 1] - M[:,1,2] .= [0.1, 1] - @test pdf(k2d, M) ≈ [pdf(ik2d, 0.5, 1.) pdf(ik2d, 0.1, 1.)] - end -end From fec20e33a615dfb0099ad0b538fbd398ca1a03bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Mon, 3 Nov 2025 01:15:11 +0100 Subject: [PATCH 23/31] old interp --- src/interp.jl | 32 ++++++++++++++++++++++++++++++++ test/interp.jl | 25 +++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 src/interp.jl create mode 100644 test/interp.jl diff --git a/src/interp.jl b/src/interp.jl new file mode 100644 index 00000000..05e648e6 --- /dev/null +++ b/src/interp.jl @@ -0,0 +1,32 @@ +import Interpolations: interpolate, scale + +mutable struct InterpKDE{K,I} <: AbstractKDE + kde::K + itp::I + InterpKDE{K,I}(kde::K, itp::I) where {K,I} = new{K,I}(kde, itp) +end + + +function InterpKDE(kde::UnivariateKDE, opts...) + itp_u = interpolate(kde.density, opts...) + itp = scale(itp_u, kde.x) + etp = extrapolate(itp, zero(eltype(kde.density))) + InterpKDE{typeof(kde),typeof(etp)}(kde, etp) +end +InterpKDE(kde::UnivariateKDE) = InterpKDE(kde, BSpline(Quadratic(Line(OnGrid())))) + + +function InterpKDE(kde::BivariateKDE, opts...) + itp_u = interpolate(kde.density,opts...) + itp = scale(itp_u, kde.x, kde.y) + etp = extrapolate(itp, zero(eltype(kde.density))) + InterpKDE{typeof(kde),typeof(etp)}(kde, etp) +end +InterpKDE(kde::BivariateKDE) = InterpKDE(kde::BivariateKDE, BSpline(Quadratic(Line(OnGrid())))) + +pdf(ik::InterpKDE,x::Real...) = ik.itp(x...) +pdf(ik::InterpKDE,xs::AbstractVector) = [ik.itp(x) for x in xs] +pdf(ik::InterpKDE,xs::AbstractVector,ys::AbstractVector) = [ik.itp(x,y) for x in xs, y in ys] + +pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) +pdf(k::BivariateKDE,x,y) = pdf(InterpKDE(k),x,y) diff --git a/test/interp.jl b/test/interp.jl new file mode 100644 index 00000000..ed93d914 --- /dev/null +++ b/test/interp.jl @@ -0,0 +1,25 @@ +using Test +using KernelDensity + +X = randn(100) +Y = randn(100) + +k = kde(X) +@test pdf(k, k.x) ≈ k.density + +k = kde((X,Y)) +@test pdf(k, k.x, k.y) ≈ k.density + +# Try to evaluate the KDE outside the interpolation domain +# The KDE is allowed to be zero, but it should not be greater than the exact solution +k = kde([0.0], bandwidth=1.0) +@test pdf(k, k.x) ≈ k.density +@test pdf(k, -10.0) ≤ pdf(Normal(), -10.0) +@test pdf(k, +10.0) ≤ pdf(Normal(), +10.0) + +k = kde(([0.0],[0.0]), bandwidth=(1.0, 1.0)) +@test pdf(k, k.x, k.y) ≈ k.density +@test pdf(k, -10.0, 0.0) ≤ pdf(MvNormal(2, 1.0), [-10.0, 0.0]) +@test pdf(k, +10.0, 0.0) ≤ pdf(MvNormal(2, 1.0), [+10.0, 0.0]) +@test pdf(k, 0.0, -10.0) ≤ pdf(MvNormal(2, 1.0), [0.0, -10.0]) +@test pdf(k, 0.0, +10.0) ≤ pdf(MvNormal(2, 1.0), [0.0, +10.0]) From 635c21eb06fde5d9a51e47e3d5164ebfd59a55c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Mon, 3 Nov 2025 01:17:59 +0100 Subject: [PATCH 24/31] Update KernelDensity.jl --- src/KernelDensity.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KernelDensity.jl b/src/KernelDensity.jl index 6b5472ea..af4c84b3 100644 --- a/src/KernelDensity.jl +++ b/src/KernelDensity.jl @@ -20,7 +20,7 @@ export kde, kde_lscv, UnivariateKDE, BivariateKDE, InterpKDE, pdf abstract type AbstractKDE end -Base.broadcastable(x::AbstractKDE) = Ref(x) +Base.Broadcast.broadcastable(x::AbstractKDE) = Ref(x) include("univariate.jl") include("bivariate.jl") From 5b538596d2d1002cc221ae02ff2353b25efe0764 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Mon, 3 Nov 2025 12:15:55 +0100 Subject: [PATCH 25/31] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 1db450df..c431d83c 100644 --- a/Project.toml +++ b/Project.toml @@ -16,7 +16,7 @@ Distributions = "0.23, 0.24, 0.25" DocStringExtensions = "0.8, 0.9" FFTW = "1" Interpolations = "0.9, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15" -IrrationalConstants = "0.2.6" +IrrationalConstants = "0.2.4" StatsBase = "0.33, 0.34" julia = "1" From c21455649dbe1f54e16c1f13a5d890f346f4e3ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Mon, 3 Nov 2025 12:59:44 +0100 Subject: [PATCH 26/31] rand + kernel_dist --- src/exampleUse.jl | 1 + src/univariate.jl | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/exampleUse.jl b/src/exampleUse.jl index bc15dee3..8f93f2b4 100644 --- a/src/exampleUse.jl +++ b/src/exampleUse.jl @@ -37,6 +37,7 @@ cdf(k2, 1) mean(k2), var(k2) quantile(k2, 0.9) +rand(k2) kde = k2.precomputedPDF plot(kde.xs,kde.values,label = "precomputed") diff --git a/src/univariate.jl b/src/univariate.jl index 5f71a809..9d835f4a 100644 --- a/src/univariate.jl +++ b/src/univariate.jl @@ -21,11 +21,11 @@ mutable struct UnivariateKDE{R<:AbstractRange} <: AbstractKDE end # construct kernel from bandwidth -kernel_dist(::Type{Normal},w::Real) = Normal(0.0,w) -kernel_dist(::Type{Uniform},w::Real) = (s = 1.7320508075688772*w; Uniform(-s,s)) +#kernel_dist(::Type{Normal},w::Real) = Normal(0.0,w) +#kernel_dist(::Type{Uniform},w::Real) = (s = 1.7320508075688772*w; Uniform(-s,s)) -const LocationScale = Union{Laplace,Logistic,SymTriangularDist} -kernel_dist(::Type{D},w::Real) where {D} = (s = w/std(D(0.0,1.0)); D(0.0,s)) +#const LocationScale = Union{Laplace,Logistic,SymTriangularDist} +#kernel_dist(::Type{D},w::Real) where {D} = (s = w/std(D(0.0,1.0)); D(0.0,s)) # Silverman's rule of thumb for KDE bandwidth selection From 94317fae5b899fe683f0f087856758c07fba11c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Tue, 11 Nov 2025 15:41:44 +0100 Subject: [PATCH 27/31] ver compat --- src/initialisation.jl | 32 +++++--------------------------- src/ke-1.6.jl | 28 ++++++++++++++++++++++++++++ src/ke-1.8.jl | 28 ++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 27 deletions(-) create mode 100644 src/ke-1.6.jl create mode 100644 src/ke-1.8.jl diff --git a/src/initialisation.jl b/src/initialisation.jl index 58c9f55f..2c1f8824 100644 --- a/src/initialisation.jl +++ b/src/initialisation.jl @@ -21,35 +21,13 @@ function Base.getproperty(d::DiscretisedPDF, prop::Symbol) end Base.propertynames(::DiscretisedPDF) = (:values, :labels, :xs, :ys, :zs) - -mutable struct KernelEstimate{VF<:VariateForm, VS<:ValueSupport, KernelType<:Distribution, PriorDist<:UnivariateDistribution{Discrete}, PDF<:DiscretisedPDF} <: AbstractMixtureModel{VF, VS, KernelType} - const data::Matrix{Float64} - const prior::PriorDist - const kernel::KernelType - const bandwidth::Float64 - precomputedPDF::Union{Nothing, PDF} - - # these constructors guarantee type agreement - function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, precomputedPDF::PDF) where { - PriorDist<:UnivariateDistribution{Discrete}, - KernelType <: Distribution, - PDF <: DiscretisedPDF} - VF, VS = supertype(KernelType).parameters - new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, precomputedPDF) - end - function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, ::Nothing) where { - PriorDist<:UnivariateDistribution{Discrete}, - KernelType <: Distribution} - VF, VS = supertype(KernelType).parameters - - # the default PDF type is based on Float64 and Int - R = Base.return_types(range,(Float64,Float64,Int))[1] - PDF = DiscretisedPDF{size(data)[1],R,Float64} - new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, nothing) - end - +if VERSION >= v"1.8" # for backwards compatibility no const fields in < v1.8 + include("ke-1.8.jl") +else + include("ke-1.6.jl") end + UnivariateKernelEstimate{VS, K, P, PDF} = KernelEstimate{Univariate, VS, K, P, PDF} MultivariateKernelEstimate{VS, K, P, PDF} = KernelEstimate{Multivariate, VS, K, P, PDF} diff --git a/src/ke-1.6.jl b/src/ke-1.6.jl new file mode 100644 index 00000000..d2c753a4 --- /dev/null +++ b/src/ke-1.6.jl @@ -0,0 +1,28 @@ + +mutable struct KernelEstimate{VF<:VariateForm, VS<:ValueSupport, KernelType<:Distribution, PriorDist<:UnivariateDistribution{Discrete}, PDF<:DiscretisedPDF} <: AbstractMixtureModel{VF, VS, KernelType} + data::Matrix{Float64} + prior::PriorDist + kernel::KernelType + bandwidth::Float64 + precomputedPDF::Union{Nothing, PDF} + + # these constructors guarantee type agreement + function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, precomputedPDF::PDF) where { + PriorDist<:UnivariateDistribution{Discrete}, + KernelType <: Distribution, + PDF <: DiscretisedPDF} + VF, VS = supertype(KernelType).parameters + new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, precomputedPDF) + end + function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, ::Nothing) where { + PriorDist<:UnivariateDistribution{Discrete}, + KernelType <: Distribution} + VF, VS = supertype(KernelType).parameters + + # the default PDF type is based on Float64 and Int + R = Base.return_types(range,(Float64,Float64,Int))[1] + PDF = DiscretisedPDF{size(data)[1],R,Float64} + new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, nothing) + end + +end \ No newline at end of file diff --git a/src/ke-1.8.jl b/src/ke-1.8.jl new file mode 100644 index 00000000..0a9ee085 --- /dev/null +++ b/src/ke-1.8.jl @@ -0,0 +1,28 @@ + +mutable struct KernelEstimate{VF<:VariateForm, VS<:ValueSupport, KernelType<:Distribution, PriorDist<:UnivariateDistribution{Discrete}, PDF<:DiscretisedPDF} <: AbstractMixtureModel{VF, VS, KernelType} + const data::Matrix{Float64} + const prior::PriorDist + const kernel::KernelType + const bandwidth::Float64 + precomputedPDF::Union{Nothing, PDF} + + # these constructors guarantee type agreement + function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, precomputedPDF::PDF) where { + PriorDist<:UnivariateDistribution{Discrete}, + KernelType <: Distribution, + PDF <: DiscretisedPDF} + VF, VS = supertype(KernelType).parameters + new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, precomputedPDF) + end + function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, ::Nothing) where { + PriorDist<:UnivariateDistribution{Discrete}, + KernelType <: Distribution} + VF, VS = supertype(KernelType).parameters + + # the default PDF type is based on Float64 and Int + R = Base.return_types(range,(Float64,Float64,Int))[1] + PDF = DiscretisedPDF{size(data)[1],R,Float64} + new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, nothing) + end + +end \ No newline at end of file From 62286401599fc2d0e23de5e12e130c04ecbd9694 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Tue, 11 Nov 2025 15:49:37 +0100 Subject: [PATCH 28/31] Update bandwidth_selection.jl --- src/bandwidth_selection.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bandwidth_selection.jl b/src/bandwidth_selection.jl index 3c3c5aa2..27671704 100644 --- a/src/bandwidth_selection.jl +++ b/src/bandwidth_selection.jl @@ -25,7 +25,7 @@ function get_bandwidth(::Silverman, data::AbstractVector{<:Real}, kernelType = N return alpha * width * ndata^(-0.2), nothing end -@kwdef struct LSCV <: BandwidthMethod +Base.@kwdef struct LSCV <: BandwidthMethod nPoints::Int = 2048 initBandwidth::Float64 = NaN boundary::Tuple{Float64,Float64} = (NaN,NaN) From 258e5f77c51d0cf9406f9e64aa02d96c957e06d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Tue, 11 Nov 2025 15:57:47 +0100 Subject: [PATCH 29/31] Update bandwidth_selection.jl --- src/bandwidth_selection.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/bandwidth_selection.jl b/src/bandwidth_selection.jl index 27671704..9f11e179 100644 --- a/src/bandwidth_selection.jl +++ b/src/bandwidth_selection.jl @@ -25,10 +25,11 @@ function get_bandwidth(::Silverman, data::AbstractVector{<:Real}, kernelType = N return alpha * width * ndata^(-0.2), nothing end -Base.@kwdef struct LSCV <: BandwidthMethod - nPoints::Int = 2048 - initBandwidth::Float64 = NaN - boundary::Tuple{Float64,Float64} = (NaN,NaN) +struct LSCV <: BandwidthMethod # default args in constructor for backwards comp + nPoints::Int + initBandwidth::Float64 + boundary::Tuple{Float64,Float64} + LSCV(nPoints = 2048, initBandwidth = NaN, boundary = (NaN,NaN)) = new(nPoints, initBandwidth, boundary) end # Select bandwidth using least-squares cross validation, from: From b984532d40c74a619be8b2565dcbc5ecc682a0fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Tue, 11 Nov 2025 16:26:05 +0100 Subject: [PATCH 30/31] const field new implementation --- src/exampleUse.jl | 2 +- src/feature_computation.jl | 6 +++--- src/initialisation.jl | 32 +++++++++++++++++++++++++++----- src/ke-1.6.jl | 28 ---------------------------- src/ke-1.8.jl | 28 ---------------------------- 5 files changed, 31 insertions(+), 65 deletions(-) delete mode 100644 src/ke-1.6.jl delete mode 100644 src/ke-1.8.jl diff --git a/src/exampleUse.jl b/src/exampleUse.jl index 8f93f2b4..8c05c1f3 100644 --- a/src/exampleUse.jl +++ b/src/exampleUse.jl @@ -39,7 +39,7 @@ quantile(k2, 0.9) rand(k2) -kde = k2.precomputedPDF +kde = k2.precomputedPDF[] # simpler access can be added plot(kde.xs,kde.values,label = "precomputed") xs = LinRange(-4,4,100) diff --git a/src/feature_computation.jl b/src/feature_computation.jl index 8a7fb804..0f1fa10b 100644 --- a/src/feature_computation.jl +++ b/src/feature_computation.jl @@ -7,7 +7,7 @@ function precompute!(ke::UnivariateKernelEstimate, nPoints::Integer = 2048, # find the element type of range in ke.precomputedPDF T = eltype(typeof(ke).parameters[5].parameters[2]) midpoints = range(T(boundary[1]), T(boundary[2]), Int(nPoints)) - ke.precomputedPDF = conv(tabulate(vec(ke.data), midpoints, ke.prior), ke.kernel) + ke.precomputedPDF[] = conv(tabulate(vec(ke.data), midpoints, ke.prior), ke.kernel) end function tabulate(data::AbstractVector{<:Real}, midpoints::AbstractRange, prior::UnivariateDistribution{Discrete}) @@ -62,7 +62,7 @@ end function pdf(ke::UnivariateKernelEstimate, x::Real, method::Symbol) if method == :precomputed - den = ke.precomputedPDF + den = ke.precomputedPDF[] den === nothing && error("PDF must be first precomputed.") itp_u = interpolate(den.values, BSpline(Quadratic(Line(OnGrid())))) itp = scale(itp_u, den.xs) @@ -78,7 +78,7 @@ end # custom broadcast prepares for interpolation only once for all xs function Base.Broadcast.broadcasted(::typeof(pdf), ke::UnivariateKernelEstimate, xs, method::Symbol) if method == :precomputed - den = ke.precomputedPDF + den = ke.precomputedPDF[] den === nothing && error("PDF must be first precomputed.") itp_u = interpolate(den.values, BSpline(Quadratic(Line(OnGrid())))) itp = scale(itp_u, den.xs) diff --git a/src/initialisation.jl b/src/initialisation.jl index 2c1f8824..48ddd772 100644 --- a/src/initialisation.jl +++ b/src/initialisation.jl @@ -21,12 +21,34 @@ function Base.getproperty(d::DiscretisedPDF, prop::Symbol) end Base.propertynames(::DiscretisedPDF) = (:values, :labels, :xs, :ys, :zs) -if VERSION >= v"1.8" # for backwards compatibility no const fields in < v1.8 - include("ke-1.8.jl") -else - include("ke-1.6.jl") -end +# note: in v1.8+ 4 first fields can be made constant using const +struct KernelEstimate{VF<:VariateForm, VS<:ValueSupport, KernelType<:Distribution, PriorDist<:UnivariateDistribution{Discrete}, PDF<:DiscretisedPDF} <: AbstractMixtureModel{VF, VS, KernelType} + data::Matrix{Float64} + prior::PriorDist + kernel::KernelType + bandwidth::Float64 + precomputedPDF::Base.RefValue{PDF} # the value it is mutable + + # these constructors guarantee type agreement + function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, precomputedPDF::PDF) where { + PriorDist<:UnivariateDistribution{Discrete}, + KernelType <: Distribution, + PDF <: DiscretisedPDF} + VF, VS = supertype(KernelType).parameters + new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, Ref(precomputedPDF)) + end + function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, ::Nothing) where { + PriorDist<:UnivariateDistribution{Discrete}, + KernelType <: Distribution} + VF, VS = supertype(KernelType).parameters + + # the default PDF type is based on Float64 and Int + R = Base.return_types(range,(Float64,Float64,Int))[1] + PDF = DiscretisedPDF{size(data)[1],R,Float64} + new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, Ref{PDF}()) + end +end UnivariateKernelEstimate{VS, K, P, PDF} = KernelEstimate{Univariate, VS, K, P, PDF} MultivariateKernelEstimate{VS, K, P, PDF} = KernelEstimate{Multivariate, VS, K, P, PDF} diff --git a/src/ke-1.6.jl b/src/ke-1.6.jl deleted file mode 100644 index d2c753a4..00000000 --- a/src/ke-1.6.jl +++ /dev/null @@ -1,28 +0,0 @@ - -mutable struct KernelEstimate{VF<:VariateForm, VS<:ValueSupport, KernelType<:Distribution, PriorDist<:UnivariateDistribution{Discrete}, PDF<:DiscretisedPDF} <: AbstractMixtureModel{VF, VS, KernelType} - data::Matrix{Float64} - prior::PriorDist - kernel::KernelType - bandwidth::Float64 - precomputedPDF::Union{Nothing, PDF} - - # these constructors guarantee type agreement - function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, precomputedPDF::PDF) where { - PriorDist<:UnivariateDistribution{Discrete}, - KernelType <: Distribution, - PDF <: DiscretisedPDF} - VF, VS = supertype(KernelType).parameters - new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, precomputedPDF) - end - function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, ::Nothing) where { - PriorDist<:UnivariateDistribution{Discrete}, - KernelType <: Distribution} - VF, VS = supertype(KernelType).parameters - - # the default PDF type is based on Float64 and Int - R = Base.return_types(range,(Float64,Float64,Int))[1] - PDF = DiscretisedPDF{size(data)[1],R,Float64} - new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, nothing) - end - -end \ No newline at end of file diff --git a/src/ke-1.8.jl b/src/ke-1.8.jl deleted file mode 100644 index 0a9ee085..00000000 --- a/src/ke-1.8.jl +++ /dev/null @@ -1,28 +0,0 @@ - -mutable struct KernelEstimate{VF<:VariateForm, VS<:ValueSupport, KernelType<:Distribution, PriorDist<:UnivariateDistribution{Discrete}, PDF<:DiscretisedPDF} <: AbstractMixtureModel{VF, VS, KernelType} - const data::Matrix{Float64} - const prior::PriorDist - const kernel::KernelType - const bandwidth::Float64 - precomputedPDF::Union{Nothing, PDF} - - # these constructors guarantee type agreement - function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, precomputedPDF::PDF) where { - PriorDist<:UnivariateDistribution{Discrete}, - KernelType <: Distribution, - PDF <: DiscretisedPDF} - VF, VS = supertype(KernelType).parameters - new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, precomputedPDF) - end - function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, ::Nothing) where { - PriorDist<:UnivariateDistribution{Discrete}, - KernelType <: Distribution} - VF, VS = supertype(KernelType).parameters - - # the default PDF type is based on Float64 and Int - R = Base.return_types(range,(Float64,Float64,Int))[1] - PDF = DiscretisedPDF{size(data)[1],R,Float64} - new{VF,VS,KernelType,PriorDist,PDF}(data, prior, kernel, bandwidth, nothing) - end - -end \ No newline at end of file From 48a20a82e807a1658d6147322f2252e678fa331a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=9Al=C4=99zak?= <128084860+jaksle@users.noreply.github.com> Date: Tue, 11 Nov 2025 16:59:08 +0100 Subject: [PATCH 31/31] change to isassigned --- src/feature_computation.jl | 2 +- src/initialisation.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/feature_computation.jl b/src/feature_computation.jl index 0f1fa10b..92c71f38 100644 --- a/src/feature_computation.jl +++ b/src/feature_computation.jl @@ -78,8 +78,8 @@ end # custom broadcast prepares for interpolation only once for all xs function Base.Broadcast.broadcasted(::typeof(pdf), ke::UnivariateKernelEstimate, xs, method::Symbol) if method == :precomputed + isassigned(ke.precomputedPDF) || error("PDF must be first precomputed.") den = ke.precomputedPDF[] - den === nothing && error("PDF must be first precomputed.") itp_u = interpolate(den.values, BSpline(Quadratic(Line(OnGrid())))) itp = scale(itp_u, den.xs) etp = extrapolate(itp, 0.) diff --git a/src/initialisation.jl b/src/initialisation.jl index 48ddd772..8cd4693e 100644 --- a/src/initialisation.jl +++ b/src/initialisation.jl @@ -27,7 +27,7 @@ struct KernelEstimate{VF<:VariateForm, VS<:ValueSupport, KernelType<:Distributio prior::PriorDist kernel::KernelType bandwidth::Float64 - precomputedPDF::Base.RefValue{PDF} # the value it is mutable + precomputedPDF::Base.RefValue{PDF} # the value here is mutable # these constructors guarantee type agreement function KernelEstimate(data::Matrix{Float64}, prior::PriorDist, kernel::KernelType, bandwidth::Float64, precomputedPDF::PDF) where {