Source code for segmentation

__author__ = 'meignier'

import logging

from s4d.diar import Diar
import numpy as np
import pandas as pd
from s4d.clustering.hac_bic import GaussFull
from s4d.clustering.hac_utils import bic_square_root
import copy
import scipy

[docs]def sanity_check(cep, show, cluster='init'):
    """
    Removes equal MFCC of *cep* and return a diarization.

    :param cep: numpy.ndarry containing MFCC
    :param show: speaker of the show
    :return: a dirization object
    """
    table = Diar()

    # 1- diff on cep[i] - cep[i+1]
    # 2- sum of the n MFCC
    # 3- take equal values, give a boolean array
    b = np.sum(np.diff(cep, axis=0), axis=1) == 0
    # make a xor on the boolean array, true index+1 correspond to a boundary
    bits = b[:-1] ^ b[1:]
    # convert true value into a list of feature indexes
    # append 0 at the beginning of the list, append the last index to the list
    idx = [0] + (np.arange(len(bits))[bits] + 1).tolist() + [cep.shape[0]]
    # for each pair of indexes (idx[i] and idx[i+1]), create a segment
    for i in range(0, len(idx) - 1, 2):
        table.append(show=show, start=idx[i], stop=idx[i + 1], cluster=cluster)

    return table


[docs]def init_seg(cep, show='empty', cluster='init'):
    """
    Return a initial segmentation composed of one segment from the first to the
    last feature in *cep*.

    :param cep: numpy.ndarry containing MFCC
    :param show: the speaker of the cep
    :param cluster: str
    :return: a Diar object
    """
    length = cep.shape[0]
    table_out = Diar()
    table_out.append(show=show, start=0, stop=length, cluster=cluster)
    return table_out


[docs]def adjust(cep, diarization):
    """
    Moves the border of segment of *diarization* into lowest energy region and split
    segments gretter than 30s

    :todo: changes numpy.convolve to the panada version

    :param cep: a numpy.ndarray containing MFCC
    :param diarization: a Diarization object
    :return: a Diar object
    """
    energy_index = 0
    box = np.ones(100) / 100

    smooth = np.convolve(cep[:, energy_index], box, mode='same')
    adj_table = _adjust(smooth, diarization)
    return _split_e(smooth, adj_table, 30*100)


def _adjust(smooth, diarization, window_size=25):
    """
    The segment boundaries of *diarization* are moved slightly: segment start and
    segment stop will be located in low energy regions.

    :param smooth: sliding means of the energy (numpy.ndarry)
    :param diarization: the diarization object to adjust
    :param window_size: the half size of the zone to find the minimum energy around a
    border
    :return: a Diar object
    """
    diarization_out = copy.deepcopy(diarization)
    diarization_out.sort(['start'])
    prev = diarization_out[0]
    for i in range(1, len(diarization_out)):
        cur = diarization_out[i]
        start = cur['start']
        p = np.argmin(smooth[start - window_size:start + window_size])
        l1 = p + start - window_size - prev['start']
        l2 = prev['stop'] - p + start - window_size
        if l1 > 500 and l2 > 500:
            prev['stop'] = p + start - window_size
            cur['start'] = p + start - window_size
        prev = cur
    return diarization_out


def _split_e(smooth, diarization, split_size):
    """
    Long segments of *diarization* are  cut recursively at their points of lowest energy
    in order to yield segments shorter than *split_size* seconds.

    :param smooth: sliding means of the energy (numpy.ndarry)
    :param diarization: a Diarization object
    :param split_size: maximum size of a segment
    :return: a Diar object
    """
    diarization_out = Diar()
    for segment in diarization:
        _split_seg(smooth, segment, 250, split_size, diarization_out.segments)
    return diarization_out


def _split_seg(smooth, segment, min_seg_size, split_size, lst):
    """
    *segment*, a long segment, is cut recursively at their points of lowest energy
    in order to yield segments shorter than *split_size* seconds. The new
    segments gretter than *min_seg_size* are append into *lst*

    :param smooth: sliding means of the energy (numpy.ndarry)
    :param segment: a segment
    :param min_seg_size: minimum size of a segment
    :param split_size: maximum size of a segment
    :param lst: the new segments are added to this list
    :return:
    """
    stop = segment['stop'] - min_seg_size
    start = segment['start'] + min_seg_size
    l = segment['stop'] - segment['start']
    if l > split_size:
        m = start + np.argmin(smooth[start:stop])
        row_left = copy.deepcopy(segment)
        row_left['stop'] = m
        row_right = copy.deepcopy(segment)
        row_right['start'] = m
        _split_seg(smooth, row_left, min_seg_size, split_size, lst)
        _split_seg(smooth, row_right, min_seg_size, split_size, lst)
    else:
        lst.append(copy.deepcopy(segment))



[docs]def div_gauss(cep, show='empty', win=250, shift=0):
    """
    Segmentation based on divergence gaussien.

    The segmentation detects the instantaneous change points corresponding to
    segment boundaries. The proposed algorithm is based on the detection of
    local maxima. It detects the change points through a gaussian divergence
    (see equation below),
    computed using Gaussians with diagonal covariance matrices. The left and
    right gaussians are estimated over a five-second window sliding along the
    whole signal (2.5 seconds for each gaussian, given *win* =250 features).
    A change point, i.e. a segment boundary, is present in the middle of the
    window when the gaussian diverence score reaches a local maximum.


        :math:`GD(s_l,s_r)=(\\mu_r-\\mu_l)^t\\Sigma_l^{-1/2}\\Sigma_r^{-1/2}(\\mu_r-\\mu_l)`

    where :math:`s_l` is the left segment modeled by the mean :math:`\mu_l` and
    the diagonal covariance matrix :math:`\\Sigma_l`, :math:`s_l` is the right
    segment modeled by the mean :math:`\mu_r` and the diagonal covariance
    matrix :math:`\\Sigma_r`.

    :param cep: numpy array of frames
    :param show: speaker of the show
    :param win: windows size in number of frames
    :return: a diarization object (s4d annotation)
    """

    length = cep.shape[0]
    # start and stop of the rolling windows A
    start_a = win - 1  # end of NAN
    stop_a = length - win
    # start and stop of the rolling windows B
    start_b = win + win - 1  # end of nan + delay
    stop_b = length

    # put features in a Pandas DataFrame
    df = pd.DataFrame(cep)
    # compute rolling mean and std in the window of size win, get numpy array
    # mean and std have NAN at the beginning and the end of the output array
    #mean = pd.rolling_mean(df, win).values
    #std = pd.rolling_std(df, win).values
    r = df.rolling(window=win, center=False)
    mean = r.mean().values
    std = r.std().values

    # compute GD scores using 2 windows A and B
    dist = (np.square(mean[start_a:stop_a, :] - mean[start_b:stop_b, :]) / (
        std[start_a:stop_a, :] * std[start_b:stop_b, :])).sum(axis=1)

    # replace missing value to match cep size
    dist_pad = np.lib.pad(dist, (win - 1, win), 'constant',
                          constant_values=(dist[0], dist[-1]))

    # remove non-speech frame
    # find local maximal at + or - win size
    borders = scipy.signal.argrelmax(dist_pad, order=win)[0].tolist()
    # append the first and last
    borders = [0] + borders + [length]

    diarization_out = Diar()
    spk = 0
    for i in range(0, len(borders) - 1):
        diarization_out.append(show=show, start=shift+borders[i],
                         stop=shift+borders[i + 1], cluster='S' + str(spk))
        spk += 1
    return diarization_out


def segmentation(cep, diarization, win_size=250):
    diarization_out = Diar()
    for segment in diarization:
        l = segment.duration()
        # logging.info('start: ', seg['start'],'end: ', seg['stop'], 'len: ', l)
        if l > 2 * win_size:
            cep_seg = segment.seg_features(cep)
            tmp = div_gauss(cep_seg, show=segment['show'], win=win_size, shift=segment['start'])
            diarization_out.append_diar(tmp)
        else:
            diarization_out.append_seg(segment)

    i=0
    for segment in diarization_out:
        segment['cluster'] = 'S'+str(i)
        i += 1

    return diarization_out


[docs]def bic_linear(cep, diarization, alpha, sr=False):
    """
    This segmentation over the signal fuses consecutive segments of the same
    speaker from the start to the end of the record.  The measure employs
    the :math:`\Delta BIC` based on Bayesian Information Criterion , using full
    covariance Gaussians (see :class:`gauss.GaussFull`), as defined in equation below.

        :math:`\\Delta BIC_{i,j} = PBIC_{i+j} - PBIC_{i} - PBIC_{j} -  P`

        :math:`PBIC_{x}  = \\frac{n_x}{2} \\log|\\Sigma_x|`

        :math:`cst  = \\frac{1}{2} \\alpha \\left(d + \\frac{d(d+1)}{2}\\right)`

        :math:`P  = cst + log(n_i+n_j)`

    where :math:`|\\Sigma_i|`, :math:`|\\Sigma_j|` and :math:`|\\Sigma|` are the
    determinants of gaussians associated to the left and right segmnents
    :math:`i`, :math:`j`
    and :math:`i+j`. :math:`\\alpha` is a parameter to set up. The penalty
    factor :math:`P` depends on :math:`d`, the dimension of the cep, as
    well as on :math:`n_i` and :math:`n_j`, refering to the total length of
    left segment :math:`i` and right segment :math:`j` respectively.

    if *sr* is True, BIC distance is replace by the square root bic
    (see :py:func:`clustering.hac_utils.bic_square_root`)

    :param cep: numpy.ndarray
    :param diarization: a Diarization object
    :param alpha: the threshold
    :param sr: boolean
    :return: a Diar object
    """
    # logger = logging.getLogger(__name__)

    diarization_out = copy.deepcopy(diarization)
    diarization_out.sort(['show', 'start'])
    dim = cep.shape[1]
    cst = GaussFull.cst_bic(dim, alpha)

    if len(diarization) <= 1:
        return diarization_out
    segment1 = diarization_out[0];
    features1 = segment1.seg_features(cep)
    model1 = GaussFull(segment1['cluster'], dim)
    model1.add(features1)
    model1.compute()
    i = 1

    while i < len(diarization_out):
        segment2 = diarization_out[i];
        if segment2['start'] > segment1['stop']+1:
            # logging.warning('there is a hole between segment')
            i += 1
            segment1 = segment2
            continue
        features2 = segment2.seg_features(cep)
        model2 = GaussFull(segment2['cluster'], dim)
        model2.add(features2)
        model2.compute()

        model12 = GaussFull.merge(model1, model2)
        p = cst * np.log(model1.count + model2.count)
        if sr:
            p = bic_square_root(model1.count, model2.count, alpha, dim)
        delta_bic = model12.partial_bic - model1.partial_bic - model2.partial_bic - p
        #print(i, v, p)
        if delta_bic < 0.0:
            logging.debug('linear remove %s %s: %i/%i %f', model1.name, model2.name, i,
                          len(diarization_out), delta_bic)
            segment1['stop'] = segment2['stop']
            model1 = model12
            del diarization_out[i]
        else:
            logging.debug('linear next %s %s: %i/%i %f', model1.name, model2.name, i,
                          len(diarization_out), delta_bic)
            segment1 = segment2
            model1 = model2
            i += 1
    return diarization_out