Skip to content

Commit 41ddbad

Browse files
authored
Merge pull request #133 from dseuss/feature-custom-meanfn
Feature custom meanfn
2 parents b4c4924 + d11aa12 commit 41ddbad

File tree

5 files changed

+375
-730
lines changed

5 files changed

+375
-730
lines changed

src/qinfer/clustering.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
##
66
# © 2013 Chris Ferrie (csferrie@gmail.com) and
77
# Christopher E. Granade (cgranade@gmail.com)
8-
#
8+
#
99
# This file is a part of the Qinfer project.
1010
# Licensed under the AGPL version 3.
1111
##
@@ -46,7 +46,7 @@
4646
import numpy as np
4747
import scipy.linalg as la
4848

49-
from qinfer.utils import outer_product, particle_meanfn, particle_covariance_mtx
49+
from qinfer.utils import outer_product
5050
from qinfer._exceptions import ResamplerWarning
5151
import qinfer.metrics as metrics
5252

@@ -85,20 +85,20 @@ def particle_clusters(
8585
of all particles in that cluster. That is, particle ``i`` is in the cluster
8686
if ``cluster_particles[i] == True``.
8787
"""
88-
89-
88+
89+
9090
if weighted == True and particle_weights is None:
9191
raise ValueError("Weights must be specified for weighted clustering.")
92-
93-
# Allocate new arrays to hold the weights and locations.
92+
93+
# Allocate new arrays to hold the weights and locations.
9494
new_weights = np.empty(particle_weights.shape)
9595
new_locs = np.empty(particle_locations.shape)
96-
96+
9797
# Calculate and possibly reweight the metric.
9898
if weighted:
9999
M = sklearn.metrics.pairwise.pairwise_distances(particle_locations, metric=metric)
100100
M = metrics.weighted_pairwise_distances(M, particle_weights, w_pow=w_pow)
101-
101+
102102
# Create and run a SciKit-Learn DBSCAN clusterer.
103103
clusterer = sklearn.cluster.DBSCAN(
104104
min_samples=min_particles,
@@ -113,33 +113,33 @@ def particle_clusters(
113113
metric=metric
114114
)
115115
cluster_labels = clusterer.fit_predict(particle_locations)
116-
116+
117117
# Find out how many clusters were identified.
118118
# Cluster counting logic from:
119119
# [http://scikit-learn.org/stable/auto_examples/cluster/plot_dbscan.html].
120120
is_noise = -1 in cluster_labels
121121
n_clusters = len(set(cluster_labels)) - (1 if is_noise else 0)
122-
122+
123123
# If more than 10% of the particles were labeled as NOISE,
124124
# warn.
125125
n_noise = np.sum(cluster_labels == -1)
126126
if n_noise / particle_weights.shape[0] >= 0.1:
127127
warnings.warn("More than 10% of the particles were classified as NOISE. Consider increasing the neighborhood size ``eps``.", ResamplerWarning)
128-
128+
129129
# Print debugging info.
130130
if not quiet:
131131
print("[Clustering] DBSCAN identified {} cluster{}. "\
132132
"{} particles identified as NOISE.".format(
133133
n_clusters, "s" if n_clusters > 1 else "", n_noise
134134
))
135-
135+
136136
# Loop over clusters, calling the secondary resampler for each.
137137
# The loop should include -1 if noise was found.
138138
for idx_cluster in range(-1 if is_noise else 0, n_clusters):
139139
# Grab a boolean array identifying the particles in a particular
140140
# cluster.
141141
this_cluster = cluster_labels == idx_cluster
142-
142+
143143
yield idx_cluster, this_cluster
144-
144+
145145

src/qinfer/distributions.py

Lines changed: 72 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
from qinfer import utils as u
4646
from qinfer.metrics import rescaled_distance_mtx
4747
from qinfer.clustering import particle_clusters
48+
from qinfer._exceptions import ApproximationWarning
4849

4950
import warnings
5051

@@ -321,6 +322,70 @@ def sample(self, n=1):
321322
), len(cumsum_weights) - 1)]
322323

323324
## MOMENT FUNCTIONS ##
325+
@staticmethod
326+
def particle_mean(weights, locations):
327+
r"""
328+
Returns the arithmetic mean of the `locations` weighted by `weights`
329+
330+
:param numpy.ndarray weights: Weights of each particle in array of
331+
shape ``(n_particles,)``.
332+
:param numpy.ndarray locations: Locations of each particle in array
333+
of shape ``(n_particles, n_modelparams)``
334+
:rtype: :class:`numpy.ndarray`, shape ``(n_modelparams,)``.
335+
:returns: An array containing the mean
336+
"""
337+
return np.dot(weights, locations)
338+
339+
@classmethod
340+
def particle_covariance_mtx(cls, weights, locations):
341+
"""
342+
Returns an estimate of the covariance of a distribution
343+
represented by a given set of SMC particle.
344+
345+
:param weights: An array of shape ``(n_particles,)`` containing
346+
the weights of each particle.
347+
:param location: An array of shape ``(n_particles, n_modelparams)``
348+
containing the locations of each particle.
349+
:rtype: :class:`numpy.ndarray`, shape
350+
``(n_modelparams, n_modelparams)``.
351+
:returns: An array containing the estimated covariance matrix.
352+
"""
353+
# Find the mean model vector, shape (n_modelparams, ).
354+
mu = cls.particle_mean(weights, locations)
355+
356+
# Transpose the particle locations to have shape
357+
# (n_modelparams, n_particles).
358+
xs = locations.transpose([1, 0])
359+
# Give a shorter name to the particle weights, shape (n_particles, ).
360+
ws = weights
361+
362+
cov = (
363+
# This sum is a reduction over the particle index, chosen to be
364+
# axis=2. Thus, the sum represents an expectation value over the
365+
# outer product $x . x^T$.
366+
#
367+
# All three factors have the particle index as the rightmost
368+
# index, axis=2. Using the Einstein summation convention (ESC),
369+
# we can reduce over the particle index easily while leaving
370+
# the model parameter index to vary between the two factors
371+
# of xs.
372+
#
373+
# This corresponds to evaluating A_{m,n} = w_{i} x_{m,i} x_{n,i}
374+
# using the ESC, where A_{m,n} is the temporary array created.
375+
np.einsum('i,mi,ni', ws, xs, xs)
376+
# We finish by subracting from the above expectation value
377+
# the outer product $mu . mu^T$.
378+
- np.dot(mu[..., np.newaxis], mu[np.newaxis, ...])
379+
)
380+
381+
# The SMC approximation is not guaranteed to produce a
382+
# positive-semidefinite covariance matrix. If a negative eigenvalue
383+
# is produced, we should warn the caller of this.
384+
assert np.all(np.isfinite(cov))
385+
if not np.all(la.eig(cov)[0] >= 0):
386+
warnings.warn('Numerical error in covariance estimation causing positive semidefinite violation.', ApproximationWarning)
387+
388+
return cov
324389

325390
def est_mean(self):
326391
"""
@@ -329,13 +394,8 @@ def est_mean(self):
329394
:rtype: :class:`numpy.ndarray`, shape ``(n_mps,)``.
330395
:returns: An array containing the an estimate of the mean model vector.
331396
"""
332-
return np.sum(
333-
# We need the particle index to be the rightmost index, so that
334-
# the two arrays align on the particle index as opposed to the
335-
# modelparam index.
336-
self.particle_weights * self.particle_locations.transpose([1, 0]),
337-
axis=1
338-
)
397+
return self.particle_mean(self.particle_weights,
398+
self.particle_locations)
339399

340400
def est_meanfn(self, fn):
341401
"""
@@ -372,9 +432,8 @@ def est_covariance_mtx(self, corr=False):
372432
:returns: An array containing the estimated covariance matrix.
373433
"""
374434

375-
cov = u.particle_covariance_mtx(
376-
self.particle_weights,
377-
self.particle_locations)
435+
cov = self.particle_covariance_mtx(self.particle_weights,
436+
self.particle_locations)
378437

379438
if corr:
380439
dstd = np.sqrt(np.diag(cov))
@@ -448,8 +507,8 @@ def est_cluster_moments(self, cluster_opts=None):
448507
yield (
449508
cluster_label,
450509
sum(w), # The zeroth moment is very useful here!
451-
u.particle_meanfn(w, l, lambda x: x),
452-
u.particle_covariance_mtx(w, l)
510+
self.particle_mean(w, l),
511+
self.particle_covariance_mtx(w, l)
453512
)
454513

455514
def est_cluster_covs(self, cluster_opts=None):
@@ -467,7 +526,7 @@ def est_cluster_covs(self, cluster_opts=None):
467526
ws = cluster_moments['weight'][:, np.newaxis, np.newaxis]
468527

469528
within_cluster_var = np.sum(ws * cluster_moments['cov'], axis=0)
470-
between_cluster_var = u.particle_covariance_mtx(
529+
between_cluster_var = self.particle_covariance_mtx(
471530
# Treat the cluster means as a new very small particle cloud.
472531
cluster_moments['weight'], cluster_moments['mean']
473532
)

0 commit comments

Comments
 (0)