Module `koma.clustering`

Clustering analysis module

All functions related to the clustering of poles for automatic OMA.

Expand source code

"""
##############################################
Clustering analysis module
##############################################
All functions related to the clustering of poles for automatic OMA.
"""

import numpy as np
# import hdbscan
from sklearn.cluster import HDBSCAN
from . import modal


def crossdiff(arr, relative=False, allow_negatives=False):
    """
    Establish cross difference matrix used for clustering analysis.

    Arguments
    ---------------------------
    arr : double
        array to provide cross difference of (n_points long)
    relative : False, optional
        output relative difference or absolute difference
    allow_negatives : False, optional
        whether or not to allow negative difference

    Returns
    ---------------------------
    diff : double
        cross-difference matrix (n_points-by-n_points)

    References
    ---------------------------
    Kvåle and Øiseth :cite:`Kvale2020`
    """

    arr1, arr2 = np.meshgrid(arr, arr)

    if relative:
        scaling = arr1
    else:
        scaling = arr1*0+1.0

    if allow_negatives:
        diff = np.minimum(np.real((arr1-arr2)/scaling), np.real((arr1+arr2)/scaling)) + np.minimum(np.imag((arr1-arr2)/scaling), np.imag((arr1+arr2)/scaling))*1j
    else:
        diff = (arr1-arr2)/scaling


    # Remove imaginary 0 if input is float (real)
    if ~np.any(np.iscomplex(arr)):
        diff = np.real(diff).astype('float')

    return diff


def establish_tot_diff(lambd, phi, order, boolean_stops='default', scaling=None, normalize_distances=False, ensure_symmetric=True):
    """
    Establish total difference matrix based on input modes (from find_stable_modes).
        
    Arguments
    ---------------------------
    lambd : double
        array with complex-valued eigenvalues deemed stable
    phi : double
        2d array with complex-valued eigenvectors (stacked as columns), each column corresponds to a mode
    orders : int
        corresponding order for each stable mode
    boolean_stops : None, optional
        boolean stops to remove problematic poles (refers to difference matrices), e.g., to avoid same-order poles to appear
        in the same cluster the standard value {'order': [1, np.inf]} could be used (enforced by setting to 'avoid_same_order')
    scaling : {'mac': 1.0, 'lambda_real': 1.0, 'lambda_imag': 1.0}, optional
        scaling of predefined available variables used in total difference (available
        variables: 'mac', 'lambda_real', 'lambda_imag', 'omega_d', 'omega_n', 'order', 'xi')
    normalize_distances : True
        whether or not to normalize the distances (ensures compatibility between mac and other parameters)
    ensure_symmetric : True
        ensure that output is numerically symmetric

    Returns
    ---------------------------
    tot_diff : double
        cross-difference matrix (n_points-by-n_points)

    References
    ---------------------------
    Kvåle and Øiseth :cite:`Kvale2020`
    """
    
    # Establish dictionary with available difference variables
    diff_vars = dict()
    
    if type(boolean_stops)==str and boolean_stops == 'avoid_same_order':
        boolean_stops = {'order': [1, np.inf]}
        diff_vars['order'] = np.abs(crossdiff(order, relative=False))   #generates != integers?
                    
    elif boolean_stops is None:
         boolean_stops = {}  
    
    if scaling is None:
        scaling = {'mac': 1.0, 'lambda_real': 1.0, 'lambda_imag': 1.0}

    omega_n = np.abs(lambd)
    omega_d = np.abs(np.imag(lambd))
    xi = -np.real(lambd)/np.abs(lambd)


    if 'mac' in scaling:
        diff_vars['mac'] = np.abs(1.0 - modal.xmacmat(phi))
    
    if ('lambda_real' in scaling) or ('lambda_imag' in scaling):
        xlambda_diff = crossdiff(lambd, relative=True, allow_negatives=True)
        diff_vars['lambda_real']  = np.abs(np.real(xlambda_diff))
        diff_vars['lambda_imag']  = np.abs(np.imag(xlambda_diff))
        
    if 'omega_n' in scaling:
        diff_vars['omega_n'] = np.abs(crossdiff(omega_n, relative=True))
    
    if 'omega_d' in scaling:
        diff_vars['omega_d'] = np.abs(crossdiff(omega_d, relative=True))
        
    if 'order' in scaling:
        diff_vars['order'] = np.abs(crossdiff(order, relative=False))   #generates != integers?
        
    if 'xi' in scaling:
        diff_vars['xi']  = np.abs(crossdiff(xi, relative=True))

    # Normalize distances
    if normalize_distances:
        for key in scaling:
            diff_vars[key] = diff_vars[key]/np.max(np.abs(diff_vars[key])[:])

    # Establish boolean hard stop differences
    boolean_stop_diff = np.zeros([lambd.shape[0], lambd.shape[0]])
    
    for key in boolean_stops.keys():
        stops = boolean_stops[key]
        invalid_ix = np.logical_or((diff_vars[key]<stops[0]), (diff_vars[key]>stops[1]))
        boolean_stop_diff[invalid_ix] = np.inf
     
    # Establish total difference
    tot_diff = np.zeros([lambd.shape[0], lambd.shape[0]])

    for var in scaling:
        tot_diff += boolean_stop_diff + (diff_vars[var]*scaling[var])**2

    tot_diff = np.sqrt(tot_diff) + boolean_stop_diff

    if ensure_symmetric:
        tot_diff = (tot_diff + tot_diff.T)/2

    return tot_diff


class PoleClusterer:
    """
    Object to create pole clusters.
    
    Arguments
    ---------------------------
    lambd : double
        array with complex-valued eigenvalues deemed stable
    phi : double
        2d array with complex-valued eigenvectors (stacked as columns), each column corresponds to a mode
    orders : int
        corresponding order for each stable mode
    min_samples : 20, optional
        number of points in neighbourhood for point to be 
        considered core point (larger value => more conservative clustering)
    min_cluster_size : 20, optional
        when min_cluster_size points fall out of cluster it is not a split,
        it is merely points falling out of the cluster
    alpha : 1.0, optional
        distance scaling parameter, implies conservativism of clustering (higher => fewer points)
    boolean_stops : None, optional
        boolean stops to remove problematic poles (refers to difference matrices), e.g., to avoid same-order poles to appear
        in the same cluster the standard value {'order': [1, np.inf]} could be used (enforced by setting to 'avoid_same_order')
    scaling : {'mac': 1.0, 'lambda_real': 1.0, 'lambda_imag': 1.0}, optional
        scaling of predefined available variables used in total difference (available
        variables: 'mac', 'lambda_real', 'lambda_imag', 'omega_d', 'omega_n', 'order', 'xi')
    normalize_distances : False
        whether or not to normalize the distances (ensures compatibility between mac and other parameters)

    References
    ---------------------------
    Kvåle and Øiseth :cite:`Kvale2020`
    """


    def __init__(self, lambd, phi, order, min_samples=20, min_cluster_size=20, 
                 alpha=1.0, boolean_stops=None, scaling=None, normalize_distances=False,
                 run_clustering=True):
        
        self.boolean_stops = boolean_stops
        
        if scaling is None:
            self.scaling = {'mac': 1.0, 'lambda_real': 1.0, 'lambda_imag': 1.0}
        else:
            self.scaling = scaling

        self.normalize_distances = normalize_distances
        self.hdbscan_clusterer = HDBSCAN(metric='precomputed', 
                                                 min_samples=min_samples, 
                                                 min_cluster_size=min_cluster_size, 
                                                 alpha=alpha)
        self.lambd = lambd
        self.phi = phi
        self.order = order
        
        if run_clustering:
            self.cluster()


    def cluster(self):
        """
        Create tot_diff matrix and HDBSCAN cluster object from input data.
        """

        self.tot_diff = establish_tot_diff(self.lambd, self.phi, self.order, 
                                           boolean_stops=self.boolean_stops, scaling=self.scaling,
                                           normalize_distances=self.normalize_distances,
                                           ensure_symmetric=True)
        self.hdbscan_clusterer.fit(self.tot_diff)


    def postprocess(self, prob_threshold=0.0, normalize_and_maxreal=True):
        """
        Postprocess cluster object (sort and restrict).

        Arguments
        ---------------------------
        prob_threshold : 0.0, optional
            threshold value for probability of point belonging 
            to its determined cluster
        normalize_and_maxreal : True, optional
            whether or not to normalize each mode shape and maximize its real value 
            (rotate all components equally much in complex plane)

        Returns
        ---------------------------
        lambd_used : double
            sorted/remaining eigenvalues after restrictions/sort
        phi_used : double
            sorted/remaining eigenvectors after restrictions/sort
        order_stab_used : double
            corresponding orders
        group_ix : int
            indices (sorted based on damped natural freq.) of modes
        all_single_ix : double 
            index corresponding to input data
        probs : double
            probabilities of all points in all clusters
    
        """

        omega_d = np.abs(np.imag(self.lambd))

        if normalize_and_maxreal:
            phi0,__ = modal.normalize_phi(modal.maxreal(self.phi))
        else:
            phi0 = self.phi*1.0   
    

        # Establish all labels
        labels_all = self.hdbscan_clusterer.labels_

        # Align modes
        for label in np.unique(labels_all):
            phi0[:, labels_all==label] = modal.align_modes(phi0[:, labels_all==label])  #also align modes
            
        # Remove noise (label <0)
        keep_ix_temp = np.where(labels_all>=0)[0]

        # Apply probability threshold
        probs_temp = self.hdbscan_clusterer.probabilities_[keep_ix_temp]
        keep_ix = keep_ix_temp[probs_temp>=prob_threshold]
        probs_unsorted = self.hdbscan_clusterer.probabilities_[keep_ix]

        # Retain only "kept" indices from arrays
        labels_unsorted = labels_all[keep_ix]
        if len(labels_unsorted) == 0:
            return [], [], [], [], [], []
        
        n_labels = max(labels_unsorted)+1
            
        # Sort of cluster groups based on mean frequency
        wmean = [np.mean(omega_d[keep_ix][labels_unsorted==label]) for label in range(0, max(labels_unsorted)+1)]
        sort_ix = np.argsort(wmean)

        # Rearrange labels and probs (sorted based on frequency)
        labels = np.array([np.where(sort_ix==label)[0][0] for label in labels_unsorted]).flatten()
        probs = np.hstack([probs_unsorted[labels==label] for label in range(0, n_labels)])

        # Remove double results (more poles at same order within same cluster)
        keep_single_ix = [None]*n_labels
        for label in range(0, n_labels):           
            relevant_orders = self.order[keep_ix][labels==label]
            unique_relevant_orders = np.unique(relevant_orders)
            groups = [np.where(o==relevant_orders)[0] for o in unique_relevant_orders]
            keep_best_ix = []
            these_ix = np.where(labels==label)[0]
            
            for group in groups:
                keep_best_ix.append(these_ix[group[np.argmax(probs[labels==label][group])]])
            
            keep_single_ix[label] = np.array(keep_best_ix)

        all_single_ix = np.hstack(keep_single_ix)
        group_ixs = labels[all_single_ix]
        probs = probs[all_single_ix]
        order_stab_used = self.order[keep_ix][all_single_ix]
        lambd_used = self.lambd[keep_ix][all_single_ix]
        phi_used = phi0[:, keep_ix][:, all_single_ix]

        return lambd_used, phi_used, order_stab_used, group_ixs, all_single_ix, probs


def group_clusters(lambd_used, phi_used, order_stab_used, group_ixs, all_single_ixs, probs, return_lambda=False):
    '''
    Group the output of PoleClusterer.postprocess()

    Arguments
    ---------------------------
    lambd_used : double
        sorted/remaining eigenvalues after restrictions/sort
        to its determined cluster
    phi_used : double
        sorted/remaining eigenvectors after restrictions/sort
    order_stab_used : double
        corresponding orders
    group_ixs : int
        indices (sorted based on damped natural freq.) of modes
    all_single_ixs : double 
        index corresponding to input data
    probs : double
        probabilities of all points in all clusters
    return_lambda : boolean
        whether or not to return as lambda instead of xi, omega

    Returns
    ---------------------------
    xi_cluster : double
        list of arrays with xi grouped
    omega_n_cluster : double
        list of arrays with omega_n grouped
    phi_cluster : double
        list of arrays with phi grouped
    order_cluster : double
        list of arrays with orders grouped
    probs_cluster : double
        list of arrays with probs grouped
    ixs_cluster : double
        list of arrays with ixs corresponding to each cluster
    (lambd_cluster) : double, complex
        grouped clusters of lambd, only returned if return_lambda is True (returned in stead of xi_cluster and omega_n_cluster)

    '''  

    n_groups = len(np.unique(group_ixs))
    xi_cluster = [None]*n_groups
    omega_n_cluster = [None]*n_groups
    lambd_cluster = [None]*n_groups
    phi_cluster = [None]*n_groups
    order_cluster = [None]*n_groups
    probs_cluster = [None]*n_groups
    ixs_cluster = [None]*n_groups
    
    for group_ix in range(n_groups):
        this_ix = group_ixs==group_ix
        lambd_cluster[group_ix] = lambd_used[this_ix]
        xi_cluster[group_ix] = -np.real(lambd_used[this_ix])/np.abs(lambd_used[this_ix])
        omega_n_cluster[group_ix] = np.abs(lambd_used[this_ix])
        phi_cluster[group_ix] = phi_used[:, this_ix]
        order_cluster[group_ix] = order_stab_used[this_ix]
        probs_cluster[group_ix] = probs[this_ix]
        ixs_cluster[group_ix] = all_single_ixs[this_ix]
    
    if return_lambda:
        return lambd_cluster, phi_cluster, order_cluster, probs_cluster, ixs_cluster
    else:
        return xi_cluster, omega_n_cluster, phi_cluster, order_cluster, probs_cluster, ixs_cluster


def group_array(arr, group_ixs, axis=0):
    '''
    Group a single output array of PoleClusterer.postprocess() based on group indices.

    Arguments
    ---------------------------
    arr : double
        array corresponding
    group_ixs : int
        indices (sorted based on damped natural freq.) of modes

    Returns
    ---------------------------
    arr_grouped : double
        grouped array
    '''  

    n_groups = len(np.unique(group_ixs))
    arr_grouped = [None]*n_groups

    for group_ix in range(n_groups):
        this_ix = np.where(group_ixs==group_ix)[0]
        arr_grouped[group_ix] = np.take(arr, this_ix, axis=axis)

    return arr_grouped

Functions

def crossdiff(arr, relative=False, allow_negatives=False)

Establish cross difference matrix used for clustering analysis.

Arguments

arr : double: array to provide cross difference of (n_points long)
relative : False, optional: output relative difference or absolute difference
allow_negatives : False, optional: whether or not to allow negative difference

Returns

diff : double: cross-difference matrix (n_points-by-n_points)

References

Kvåle and Øiseth :cite:Kvale2020

Expand source code

def crossdiff(arr, relative=False, allow_negatives=False):
    """
    Establish cross difference matrix used for clustering analysis.

    Arguments
    ---------------------------
    arr : double
        array to provide cross difference of (n_points long)
    relative : False, optional
        output relative difference or absolute difference
    allow_negatives : False, optional
        whether or not to allow negative difference

    Returns
    ---------------------------
    diff : double
        cross-difference matrix (n_points-by-n_points)

    References
    ---------------------------
    Kvåle and Øiseth :cite:`Kvale2020`
    """

    arr1, arr2 = np.meshgrid(arr, arr)

    if relative:
        scaling = arr1
    else:
        scaling = arr1*0+1.0

    if allow_negatives:
        diff = np.minimum(np.real((arr1-arr2)/scaling), np.real((arr1+arr2)/scaling)) + np.minimum(np.imag((arr1-arr2)/scaling), np.imag((arr1+arr2)/scaling))*1j
    else:
        diff = (arr1-arr2)/scaling


    # Remove imaginary 0 if input is float (real)
    if ~np.any(np.iscomplex(arr)):
        diff = np.real(diff).astype('float')

    return diff

def establish_tot_diff(lambd, phi, order, boolean_stops='default', scaling=None, normalize_distances=False, ensure_symmetric=True)

Establish total difference matrix based on input modes (from find_stable_modes).

Arguments

lambd : double: array with complex-valued eigenvalues deemed stable
phi : double: 2d array with complex-valued eigenvectors (stacked as columns), each column corresponds to a mode
orders : int: corresponding order for each stable mode
boolean_stops : None, optional: boolean stops to remove problematic poles (refers to difference matrices), e.g., to avoid same-order poles to appear in the same cluster the standard value {'order': [1, np.inf]} could be used (enforced by setting to 'avoid_same_order')
scaling : {'mac': 1.0, 'lambda_real': 1.0, 'lambda_imag': 1.0}, optional: scaling of predefined available variables used in total difference (available variables: 'mac', 'lambda_real', 'lambda_imag', 'omega_d', 'omega_n', 'order', 'xi')
normalize_distances : True: whether or not to normalize the distances (ensures compatibility between mac and other parameters)
ensure_symmetric : True: ensure that output is numerically symmetric

Returns

tot_diff : double: cross-difference matrix (n_points-by-n_points)

References

Kvåle and Øiseth :cite:Kvale2020

Expand source code

def establish_tot_diff(lambd, phi, order, boolean_stops='default', scaling=None, normalize_distances=False, ensure_symmetric=True):
    """
    Establish total difference matrix based on input modes (from find_stable_modes).
        
    Arguments
    ---------------------------
    lambd : double
        array with complex-valued eigenvalues deemed stable
    phi : double
        2d array with complex-valued eigenvectors (stacked as columns), each column corresponds to a mode
    orders : int
        corresponding order for each stable mode
    boolean_stops : None, optional
        boolean stops to remove problematic poles (refers to difference matrices), e.g., to avoid same-order poles to appear
        in the same cluster the standard value {'order': [1, np.inf]} could be used (enforced by setting to 'avoid_same_order')
    scaling : {'mac': 1.0, 'lambda_real': 1.0, 'lambda_imag': 1.0}, optional
        scaling of predefined available variables used in total difference (available
        variables: 'mac', 'lambda_real', 'lambda_imag', 'omega_d', 'omega_n', 'order', 'xi')
    normalize_distances : True
        whether or not to normalize the distances (ensures compatibility between mac and other parameters)
    ensure_symmetric : True
        ensure that output is numerically symmetric

    Returns
    ---------------------------
    tot_diff : double
        cross-difference matrix (n_points-by-n_points)

    References
    ---------------------------
    Kvåle and Øiseth :cite:`Kvale2020`
    """
    
    # Establish dictionary with available difference variables
    diff_vars = dict()
    
    if type(boolean_stops)==str and boolean_stops == 'avoid_same_order':
        boolean_stops = {'order': [1, np.inf]}
        diff_vars['order'] = np.abs(crossdiff(order, relative=False))   #generates != integers?
                    
    elif boolean_stops is None:
         boolean_stops = {}  
    
    if scaling is None:
        scaling = {'mac': 1.0, 'lambda_real': 1.0, 'lambda_imag': 1.0}

    omega_n = np.abs(lambd)
    omega_d = np.abs(np.imag(lambd))
    xi = -np.real(lambd)/np.abs(lambd)


    if 'mac' in scaling:
        diff_vars['mac'] = np.abs(1.0 - modal.xmacmat(phi))
    
    if ('lambda_real' in scaling) or ('lambda_imag' in scaling):
        xlambda_diff = crossdiff(lambd, relative=True, allow_negatives=True)
        diff_vars['lambda_real']  = np.abs(np.real(xlambda_diff))
        diff_vars['lambda_imag']  = np.abs(np.imag(xlambda_diff))
        
    if 'omega_n' in scaling:
        diff_vars['omega_n'] = np.abs(crossdiff(omega_n, relative=True))
    
    if 'omega_d' in scaling:
        diff_vars['omega_d'] = np.abs(crossdiff(omega_d, relative=True))
        
    if 'order' in scaling:
        diff_vars['order'] = np.abs(crossdiff(order, relative=False))   #generates != integers?
        
    if 'xi' in scaling:
        diff_vars['xi']  = np.abs(crossdiff(xi, relative=True))

    # Normalize distances
    if normalize_distances:
        for key in scaling:
            diff_vars[key] = diff_vars[key]/np.max(np.abs(diff_vars[key])[:])

    # Establish boolean hard stop differences
    boolean_stop_diff = np.zeros([lambd.shape[0], lambd.shape[0]])
    
    for key in boolean_stops.keys():
        stops = boolean_stops[key]
        invalid_ix = np.logical_or((diff_vars[key]<stops[0]), (diff_vars[key]>stops[1]))
        boolean_stop_diff[invalid_ix] = np.inf
     
    # Establish total difference
    tot_diff = np.zeros([lambd.shape[0], lambd.shape[0]])

    for var in scaling:
        tot_diff += boolean_stop_diff + (diff_vars[var]*scaling[var])**2

    tot_diff = np.sqrt(tot_diff) + boolean_stop_diff

    if ensure_symmetric:
        tot_diff = (tot_diff + tot_diff.T)/2

    return tot_diff

def group_array(arr, group_ixs, axis=0)

Group a single output array of PoleClusterer.postprocess() based on group indices.

Arguments

arr : double: array corresponding
group_ixs : int: indices (sorted based on damped natural freq.) of modes

Returns

arr_grouped : double: grouped array

Expand source code

def group_array(arr, group_ixs, axis=0):
    '''
    Group a single output array of PoleClusterer.postprocess() based on group indices.

    Arguments
    ---------------------------
    arr : double
        array corresponding
    group_ixs : int
        indices (sorted based on damped natural freq.) of modes

    Returns
    ---------------------------
    arr_grouped : double
        grouped array
    '''  

    n_groups = len(np.unique(group_ixs))
    arr_grouped = [None]*n_groups

    for group_ix in range(n_groups):
        this_ix = np.where(group_ixs==group_ix)[0]
        arr_grouped[group_ix] = np.take(arr, this_ix, axis=axis)

    return arr_grouped

def group_clusters(lambd_used, phi_used, order_stab_used, group_ixs, all_single_ixs, probs, return_lambda=False)

Group the output of PoleClusterer.postprocess()

Arguments

lambd_used : double: sorted/remaining eigenvalues after restrictions/sort to its determined cluster
phi_used : double: sorted/remaining eigenvectors after restrictions/sort
order_stab_used : double: corresponding orders
group_ixs : int: indices (sorted based on damped natural freq.) of modes
all_single_ixs : double: index corresponding to input data
probs : double: probabilities of all points in all clusters
return_lambda : boolean: whether or not to return as lambda instead of xi, omega

Returns

xi_cluster : double: list of arrays with xi grouped
omega_n_cluster : double: list of arrays with omega_n grouped
phi_cluster : double: list of arrays with phi grouped
order_cluster : double: list of arrays with orders grouped
probs_cluster : double: list of arrays with probs grouped
ixs_cluster : double: list of arrays with ixs corresponding to each cluster

(lambd_cluster) : double, complex grouped clusters of lambd, only returned if return_lambda is True (returned in stead of xi_cluster and omega_n_cluster)

Expand source code

def group_clusters(lambd_used, phi_used, order_stab_used, group_ixs, all_single_ixs, probs, return_lambda=False):
    '''
    Group the output of PoleClusterer.postprocess()

    Arguments
    ---------------------------
    lambd_used : double
        sorted/remaining eigenvalues after restrictions/sort
        to its determined cluster
    phi_used : double
        sorted/remaining eigenvectors after restrictions/sort
    order_stab_used : double
        corresponding orders
    group_ixs : int
        indices (sorted based on damped natural freq.) of modes
    all_single_ixs : double 
        index corresponding to input data
    probs : double
        probabilities of all points in all clusters
    return_lambda : boolean
        whether or not to return as lambda instead of xi, omega

    Returns
    ---------------------------
    xi_cluster : double
        list of arrays with xi grouped
    omega_n_cluster : double
        list of arrays with omega_n grouped
    phi_cluster : double
        list of arrays with phi grouped
    order_cluster : double
        list of arrays with orders grouped
    probs_cluster : double
        list of arrays with probs grouped
    ixs_cluster : double
        list of arrays with ixs corresponding to each cluster
    (lambd_cluster) : double, complex
        grouped clusters of lambd, only returned if return_lambda is True (returned in stead of xi_cluster and omega_n_cluster)

    '''  

    n_groups = len(np.unique(group_ixs))
    xi_cluster = [None]*n_groups
    omega_n_cluster = [None]*n_groups
    lambd_cluster = [None]*n_groups
    phi_cluster = [None]*n_groups
    order_cluster = [None]*n_groups
    probs_cluster = [None]*n_groups
    ixs_cluster = [None]*n_groups
    
    for group_ix in range(n_groups):
        this_ix = group_ixs==group_ix
        lambd_cluster[group_ix] = lambd_used[this_ix]
        xi_cluster[group_ix] = -np.real(lambd_used[this_ix])/np.abs(lambd_used[this_ix])
        omega_n_cluster[group_ix] = np.abs(lambd_used[this_ix])
        phi_cluster[group_ix] = phi_used[:, this_ix]
        order_cluster[group_ix] = order_stab_used[this_ix]
        probs_cluster[group_ix] = probs[this_ix]
        ixs_cluster[group_ix] = all_single_ixs[this_ix]
    
    if return_lambda:
        return lambd_cluster, phi_cluster, order_cluster, probs_cluster, ixs_cluster
    else:
        return xi_cluster, omega_n_cluster, phi_cluster, order_cluster, probs_cluster, ixs_cluster

Classes

class PoleClusterer (lambd, phi, order, min_samples=20, min_cluster_size=20, alpha=1.0, boolean_stops=None, scaling=None, normalize_distances=False, run_clustering=True)

Object to create pole clusters.

Arguments

lambd : double: array with complex-valued eigenvalues deemed stable
phi : double: 2d array with complex-valued eigenvectors (stacked as columns), each column corresponds to a mode
orders : int: corresponding order for each stable mode
min_samples : 20, optional: number of points in neighbourhood for point to be considered core point (larger value => more conservative clustering)
min_cluster_size : 20, optional: when min_cluster_size points fall out of cluster it is not a split, it is merely points falling out of the cluster
alpha : 1.0, optional: distance scaling parameter, implies conservativism of clustering (higher => fewer points)
boolean_stops : None, optional: boolean stops to remove problematic poles (refers to difference matrices), e.g., to avoid same-order poles to appear in the same cluster the standard value {'order': [1, np.inf]} could be used (enforced by setting to 'avoid_same_order')
scaling : {'mac': 1.0, 'lambda_real': 1.0, 'lambda_imag': 1.0}, optional: scaling of predefined available variables used in total difference (available variables: 'mac', 'lambda_real', 'lambda_imag', 'omega_d', 'omega_n', 'order', 'xi')
normalize_distances : False: whether or not to normalize the distances (ensures compatibility between mac and other parameters)

References

Kvåle and Øiseth :cite:Kvale2020

Expand source code

class PoleClusterer:
    """
    Object to create pole clusters.
    
    Arguments
    ---------------------------
    lambd : double
        array with complex-valued eigenvalues deemed stable
    phi : double
        2d array with complex-valued eigenvectors (stacked as columns), each column corresponds to a mode
    orders : int
        corresponding order for each stable mode
    min_samples : 20, optional
        number of points in neighbourhood for point to be 
        considered core point (larger value => more conservative clustering)
    min_cluster_size : 20, optional
        when min_cluster_size points fall out of cluster it is not a split,
        it is merely points falling out of the cluster
    alpha : 1.0, optional
        distance scaling parameter, implies conservativism of clustering (higher => fewer points)
    boolean_stops : None, optional
        boolean stops to remove problematic poles (refers to difference matrices), e.g., to avoid same-order poles to appear
        in the same cluster the standard value {'order': [1, np.inf]} could be used (enforced by setting to 'avoid_same_order')
    scaling : {'mac': 1.0, 'lambda_real': 1.0, 'lambda_imag': 1.0}, optional
        scaling of predefined available variables used in total difference (available
        variables: 'mac', 'lambda_real', 'lambda_imag', 'omega_d', 'omega_n', 'order', 'xi')
    normalize_distances : False
        whether or not to normalize the distances (ensures compatibility between mac and other parameters)

    References
    ---------------------------
    Kvåle and Øiseth :cite:`Kvale2020`
    """


    def __init__(self, lambd, phi, order, min_samples=20, min_cluster_size=20, 
                 alpha=1.0, boolean_stops=None, scaling=None, normalize_distances=False,
                 run_clustering=True):
        
        self.boolean_stops = boolean_stops
        
        if scaling is None:
            self.scaling = {'mac': 1.0, 'lambda_real': 1.0, 'lambda_imag': 1.0}
        else:
            self.scaling = scaling

        self.normalize_distances = normalize_distances
        self.hdbscan_clusterer = HDBSCAN(metric='precomputed', 
                                                 min_samples=min_samples, 
                                                 min_cluster_size=min_cluster_size, 
                                                 alpha=alpha)
        self.lambd = lambd
        self.phi = phi
        self.order = order
        
        if run_clustering:
            self.cluster()


    def cluster(self):
        """
        Create tot_diff matrix and HDBSCAN cluster object from input data.
        """

        self.tot_diff = establish_tot_diff(self.lambd, self.phi, self.order, 
                                           boolean_stops=self.boolean_stops, scaling=self.scaling,
                                           normalize_distances=self.normalize_distances,
                                           ensure_symmetric=True)
        self.hdbscan_clusterer.fit(self.tot_diff)


    def postprocess(self, prob_threshold=0.0, normalize_and_maxreal=True):
        """
        Postprocess cluster object (sort and restrict).

        Arguments
        ---------------------------
        prob_threshold : 0.0, optional
            threshold value for probability of point belonging 
            to its determined cluster
        normalize_and_maxreal : True, optional
            whether or not to normalize each mode shape and maximize its real value 
            (rotate all components equally much in complex plane)

        Returns
        ---------------------------
        lambd_used : double
            sorted/remaining eigenvalues after restrictions/sort
        phi_used : double
            sorted/remaining eigenvectors after restrictions/sort
        order_stab_used : double
            corresponding orders
        group_ix : int
            indices (sorted based on damped natural freq.) of modes
        all_single_ix : double 
            index corresponding to input data
        probs : double
            probabilities of all points in all clusters
    
        """

        omega_d = np.abs(np.imag(self.lambd))

        if normalize_and_maxreal:
            phi0,__ = modal.normalize_phi(modal.maxreal(self.phi))
        else:
            phi0 = self.phi*1.0   
    

        # Establish all labels
        labels_all = self.hdbscan_clusterer.labels_

        # Align modes
        for label in np.unique(labels_all):
            phi0[:, labels_all==label] = modal.align_modes(phi0[:, labels_all==label])  #also align modes
            
        # Remove noise (label <0)
        keep_ix_temp = np.where(labels_all>=0)[0]

        # Apply probability threshold
        probs_temp = self.hdbscan_clusterer.probabilities_[keep_ix_temp]
        keep_ix = keep_ix_temp[probs_temp>=prob_threshold]
        probs_unsorted = self.hdbscan_clusterer.probabilities_[keep_ix]

        # Retain only "kept" indices from arrays
        labels_unsorted = labels_all[keep_ix]
        if len(labels_unsorted) == 0:
            return [], [], [], [], [], []
        
        n_labels = max(labels_unsorted)+1
            
        # Sort of cluster groups based on mean frequency
        wmean = [np.mean(omega_d[keep_ix][labels_unsorted==label]) for label in range(0, max(labels_unsorted)+1)]
        sort_ix = np.argsort(wmean)

        # Rearrange labels and probs (sorted based on frequency)
        labels = np.array([np.where(sort_ix==label)[0][0] for label in labels_unsorted]).flatten()
        probs = np.hstack([probs_unsorted[labels==label] for label in range(0, n_labels)])

        # Remove double results (more poles at same order within same cluster)
        keep_single_ix = [None]*n_labels
        for label in range(0, n_labels):           
            relevant_orders = self.order[keep_ix][labels==label]
            unique_relevant_orders = np.unique(relevant_orders)
            groups = [np.where(o==relevant_orders)[0] for o in unique_relevant_orders]
            keep_best_ix = []
            these_ix = np.where(labels==label)[0]
            
            for group in groups:
                keep_best_ix.append(these_ix[group[np.argmax(probs[labels==label][group])]])
            
            keep_single_ix[label] = np.array(keep_best_ix)

        all_single_ix = np.hstack(keep_single_ix)
        group_ixs = labels[all_single_ix]
        probs = probs[all_single_ix]
        order_stab_used = self.order[keep_ix][all_single_ix]
        lambd_used = self.lambd[keep_ix][all_single_ix]
        phi_used = phi0[:, keep_ix][:, all_single_ix]

        return lambd_used, phi_used, order_stab_used, group_ixs, all_single_ix, probs

Methods

def cluster(self)

Create tot_diff matrix and HDBSCAN cluster object from input data.

Expand source code

def cluster(self):
    """
    Create tot_diff matrix and HDBSCAN cluster object from input data.
    """

    self.tot_diff = establish_tot_diff(self.lambd, self.phi, self.order, 
                                       boolean_stops=self.boolean_stops, scaling=self.scaling,
                                       normalize_distances=self.normalize_distances,
                                       ensure_symmetric=True)
    self.hdbscan_clusterer.fit(self.tot_diff)

def postprocess(self, prob_threshold=0.0, normalize_and_maxreal=True)

Postprocess cluster object (sort and restrict).

Arguments

prob_threshold : 0.0, optional: threshold value for probability of point belonging to its determined cluster
normalize_and_maxreal : True, optional: whether or not to normalize each mode shape and maximize its real value (rotate all components equally much in complex plane)

Returns

lambd_used : double: sorted/remaining eigenvalues after restrictions/sort
phi_used : double: sorted/remaining eigenvectors after restrictions/sort
order_stab_used : double: corresponding orders
group_ix : int: indices (sorted based on damped natural freq.) of modes
all_single_ix : double: index corresponding to input data
probs : double: probabilities of all points in all clusters

Expand source code

def postprocess(self, prob_threshold=0.0, normalize_and_maxreal=True):
    """
    Postprocess cluster object (sort and restrict).

    Arguments
    ---------------------------
    prob_threshold : 0.0, optional
        threshold value for probability of point belonging 
        to its determined cluster
    normalize_and_maxreal : True, optional
        whether or not to normalize each mode shape and maximize its real value 
        (rotate all components equally much in complex plane)

    Returns
    ---------------------------
    lambd_used : double
        sorted/remaining eigenvalues after restrictions/sort
    phi_used : double
        sorted/remaining eigenvectors after restrictions/sort
    order_stab_used : double
        corresponding orders
    group_ix : int
        indices (sorted based on damped natural freq.) of modes
    all_single_ix : double 
        index corresponding to input data
    probs : double
        probabilities of all points in all clusters

    """

    omega_d = np.abs(np.imag(self.lambd))

    if normalize_and_maxreal:
        phi0,__ = modal.normalize_phi(modal.maxreal(self.phi))
    else:
        phi0 = self.phi*1.0   


    # Establish all labels
    labels_all = self.hdbscan_clusterer.labels_

    # Align modes
    for label in np.unique(labels_all):
        phi0[:, labels_all==label] = modal.align_modes(phi0[:, labels_all==label])  #also align modes
        
    # Remove noise (label <0)
    keep_ix_temp = np.where(labels_all>=0)[0]

    # Apply probability threshold
    probs_temp = self.hdbscan_clusterer.probabilities_[keep_ix_temp]
    keep_ix = keep_ix_temp[probs_temp>=prob_threshold]
    probs_unsorted = self.hdbscan_clusterer.probabilities_[keep_ix]

    # Retain only "kept" indices from arrays
    labels_unsorted = labels_all[keep_ix]
    if len(labels_unsorted) == 0:
        return [], [], [], [], [], []
    
    n_labels = max(labels_unsorted)+1
        
    # Sort of cluster groups based on mean frequency
    wmean = [np.mean(omega_d[keep_ix][labels_unsorted==label]) for label in range(0, max(labels_unsorted)+1)]
    sort_ix = np.argsort(wmean)

    # Rearrange labels and probs (sorted based on frequency)
    labels = np.array([np.where(sort_ix==label)[0][0] for label in labels_unsorted]).flatten()
    probs = np.hstack([probs_unsorted[labels==label] for label in range(0, n_labels)])

    # Remove double results (more poles at same order within same cluster)
    keep_single_ix = [None]*n_labels
    for label in range(0, n_labels):           
        relevant_orders = self.order[keep_ix][labels==label]
        unique_relevant_orders = np.unique(relevant_orders)
        groups = [np.where(o==relevant_orders)[0] for o in unique_relevant_orders]
        keep_best_ix = []
        these_ix = np.where(labels==label)[0]
        
        for group in groups:
            keep_best_ix.append(these_ix[group[np.argmax(probs[labels==label][group])]])
        
        keep_single_ix[label] = np.array(keep_best_ix)

    all_single_ix = np.hstack(keep_single_ix)
    group_ixs = labels[all_single_ix]
    probs = probs[all_single_ix]
    order_stab_used = self.order[keep_ix][all_single_ix]
    lambd_used = self.lambd[keep_ix][all_single_ix]
    phi_used = phi0[:, keep_ix][:, all_single_ix]

    return lambd_used, phi_used, order_stab_used, group_ixs, all_single_ix, probs