Source code for clustering.gauss

import logging
import numpy as np


[docs]class GaussFull: """ Class to model a speaker by a gaussian with full covariance """ def __init__(self, name, dim): self.logger = logging.getLogger(__name__) self.name = name self.count = 0 self.dim = dim self.stat1 = np.zeros(dim) self.stat2 = np.zeros((dim, dim)) self.cov_log_det = 0; self.mu = None self.mu_dot = np.NAN self.cov = None self.partial_bic = np.NaN
[docs] def add(self, features): """ Accumulate statistics for *features* :param features: numpy.ndarray """ self.count += features.shape[0] # add the number of features self.stat1 += features.sum(axis=0) self.stat2 += np.dot(features.T, features)
def _cov_log_det(self): """ Compute the log det of the covariance matrix :return: float """ s, d = np.linalg.slogdet(self.cov) return d # cov_chol, lower = scipy.linalg.cho_factor(self.cov) # return 2.0 * numpy.sum(numpy.log(numpy.diagonal(cov_chol)))
[docs] def compute(self): """ Compute the mean and covariance according the statistique, the log det of the covariance and the partial BIC :math:`PBIC`. :math:`PBIC_{x} = \\frac{n_x}{2} \\log|\\Sigma_x|` """ self.mu = self.stat1 / self.count tmp = self.mu[:, np.newaxis] self.mu_dot = np.dot(tmp, tmp.T) self.cov = self.stat2 / self.count - self.mu_dot self.cov_log_det = self._cov_log_det() self.partial_bic = self.cov_log_det * 0.5 * self.count
@classmethod
[docs] def merge(cls, m1, m2): """ Merge two models *m1* and *m2*. Compute the new mean (*mu*), covariance (*cov*) and PBIC *partial_bic* :param m1: a GaussFull object :param m2: a GaussFull object :return: a GaussFull object """ m = GaussFull(m1.name, m1.dim) m.count = m1.count + m2.count m.stat1 = m1.stat1 + m2.stat1 m.stat2 = m1.stat2 + m2.stat2 m.mu = m.stat1 / m.count tmp = m.mu[:, np.newaxis] m.cov = m.stat2 / m.count - np.dot(tmp, tmp.T) m.cov_log_det = m._cov_log_det() m.partial_bic = m.cov_log_det * 0.5 * m.count return m
@classmethod
[docs] def merge_partial_bic(cls, m1, m2): """ Merge statistic accumulators of two a GaussFull objects, compute the new log det. :param m1: a GaussFull object :param m2: a GaussFull object :return: the log det """ try: count = m1.count + m2.count mu = ((m1.stat1 + m2.stat1) / count)[:, np.newaxis] cov = (m1.stat2 + m2.stat2) / count - np.dot(mu, mu.T) s, d = np.linalg.slogdet(cov) # cov_chol, lower = scipy.linalg.cho_factor(cov, overwrite_a=True, check_finite=False) # d = 2.0 * numpy.sum(numpy.log(numpy.diagonal(cov_chol))) d *= 0.5 * count return d except: logging.warning('Det problem set to NaN ', m1.name, m2.nname) return np.nan
@classmethod
[docs] def cst_bic(cls, dim, alpha): """ Compute the BIC constant: :math:`cst = \\frac{1}{2} \\alpha \\left(d + \\frac{d(d+1)}{2}\\right)` where :math:`d`is the feature dimension (*dim*) and :math:`alpha` a threshold (*alpha*) :param dim: the feature dimension :param alpha: the threshold :return: the constant """ return 0.5 * alpha * (dim + (0.5 * ((dim + 1) * dim)))