Source code for clustering.gauss
import logging
import numpy as np
[docs]class GaussFull:
"""
Class to model a speaker by a gaussian with full covariance
"""
def __init__(self, name, dim):
self.logger = logging.getLogger(__name__)
self.name = name
self.count = 0
self.dim = dim
self.stat1 = np.zeros(dim)
self.stat2 = np.zeros((dim, dim))
self.cov_log_det = 0;
self.mu = None
self.mu_dot = np.NAN
self.cov = None
self.partial_bic = np.NaN
[docs] def add(self, features):
"""
Accumulate statistics for *features*
:param features: numpy.ndarray
"""
self.count += features.shape[0] # add the number of features
self.stat1 += features.sum(axis=0)
self.stat2 += np.dot(features.T, features)
def _cov_log_det(self):
"""
Compute the log det of the covariance matrix
:return: float
"""
s, d = np.linalg.slogdet(self.cov)
return d
# cov_chol, lower = scipy.linalg.cho_factor(self.cov)
# return 2.0 * numpy.sum(numpy.log(numpy.diagonal(cov_chol)))
[docs] def compute(self):
"""
Compute the mean and covariance according the statistique, the log det
of the covariance and the partial BIC :math:`PBIC`.
:math:`PBIC_{x} = \\frac{n_x}{2} \\log|\\Sigma_x|`
"""
self.mu = self.stat1 / self.count
tmp = self.mu[:, np.newaxis]
self.mu_dot = np.dot(tmp, tmp.T)
self.cov = self.stat2 / self.count - self.mu_dot
self.cov_log_det = self._cov_log_det()
self.partial_bic = self.cov_log_det * 0.5 * self.count
@classmethod
[docs] def merge(cls, m1, m2):
"""
Merge two models *m1* and *m2*. Compute the new mean (*mu*),
covariance (*cov*) and PBIC *partial_bic*
:param m1: a GaussFull object
:param m2: a GaussFull object
:return: a GaussFull object
"""
m = GaussFull(m1.name, m1.dim)
m.count = m1.count + m2.count
m.stat1 = m1.stat1 + m2.stat1
m.stat2 = m1.stat2 + m2.stat2
m.mu = m.stat1 / m.count
tmp = m.mu[:, np.newaxis]
m.cov = m.stat2 / m.count - np.dot(tmp, tmp.T)
m.cov_log_det = m._cov_log_det()
m.partial_bic = m.cov_log_det * 0.5 * m.count
return m
@classmethod
[docs] def merge_partial_bic(cls, m1, m2):
"""
Merge statistic accumulators of two a GaussFull objects, compute
the new log det.
:param m1: a GaussFull object
:param m2: a GaussFull object
:return: the log det
"""
try:
count = m1.count + m2.count
mu = ((m1.stat1 + m2.stat1) / count)[:, np.newaxis]
cov = (m1.stat2 + m2.stat2) / count - np.dot(mu, mu.T)
s, d = np.linalg.slogdet(cov)
# cov_chol, lower = scipy.linalg.cho_factor(cov, overwrite_a=True, check_finite=False)
# d = 2.0 * numpy.sum(numpy.log(numpy.diagonal(cov_chol)))
d *= 0.5 * count
return d
except:
logging.warning('Det problem set to NaN ', m1.name, m2.nname)
return np.nan
@classmethod
[docs] def cst_bic(cls, dim, alpha):
"""
Compute the BIC constant:
:math:`cst = \\frac{1}{2} \\alpha \\left(d + \\frac{d(d+1)}{2}\\right)`
where :math:`d`is the feature dimension (*dim*)
and :math:`alpha` a threshold (*alpha*)
:param dim: the feature dimension
:param alpha: the threshold
:return: the constant
"""
return 0.5 * alpha * (dim + (0.5 * ((dim + 1) * dim)))