Source code for brainspace.gradient.embedding

"""
Embedding approaches.
"""

# Author: Oualid Benkarim <oualid.benkarim@mcgill.ca>
# License: BSD 3 clause


import warnings
from abc import ABCMeta, abstractmethod

import numpy as np
from scipy import sparse as ssp
from scipy.sparse.linalg import eigsh
from scipy.sparse.csgraph import laplacian
from scipy.sparse.csgraph import connected_components

from sklearn.utils import check_random_state
from sklearn.base import BaseEstimator
from sklearn.decomposition import PCA

from .utils import is_symmetric, make_symmetric


def _graph_is_connected(graph):
    return connected_components(graph)[0] == 1


[docs]def diffusion_mapping(adj, n_components=10, alpha=0.5, diffusion_time=0,
                      random_state=None):
    """Compute diffusion map of affinity matrix.

    Parameters
    ----------
    adj : ndarray or sparse matrix, shape = (n, n)
        Affinity matrix.
    n_components : int or None, optional
        Number of eigenvectors. If None, selection of `n_components` is based
        on 95% drop-off in eigenvalues. When `n_components` is None,
        the maximum number of eigenvectors is restricted to
        ``n_components <= sqrt(n)``. Default is 10.
    alpha : float, optional
        Anisotropic diffusion parameter, ``0 <= alpha <= 1``. Default is 0.5.
    diffusion_time : int, optional
        Diffusion time or scale. If ``diffusion_time == 0`` use multi-scale
        diffusion maps. Default is 0.
    random_state : int or None, optional
        Random state. Default is None.

    Returns
    -------
    v : ndarray, shape (n, n_components)
        Eigenvectors of the affinity matrix in same order.
    w : ndarray, shape (n_components,)
        Eigenvalues of the affinity matrix in descending order.

    References
    ----------
    * Coifman, R.R.; S. Lafon. (2006). "Diffusion maps". Applied and
      Computational Harmonic Analysis 21: 5-30. doi:10.1016/j.acha.2006.04.006
    * Joseph W.R., Peter E.F., Ann B.L., Chad M.S. Accurate parameter
      estimation for star formation history in galaxies using SDSS spectra.
    """

    rs = check_random_state(random_state)
    use_sparse = ssp.issparse(adj)

    # Make symmetric
    if not is_symmetric(adj, tol=1E-10):
        warnings.warn('Affinity is not symmetric. Making symmetric.')
        adj = make_symmetric(adj, check=False, copy=True, sparse_format='coo')
    else:  # Copy anyways because we will be working on the matrix
        adj = adj.tocoo(copy=True) if use_sparse else adj.copy()

    # Check connected
    if not _graph_is_connected(adj):
        warnings.warn('Graph is not fully connected.')

    ###########################################################
    # Step 2
    ###########################################################
    # When α=0, you get back the diffusion map based on the random walk-style
    # diffusion operator (and Laplacian Eigenmaps). For α=1, the diffusion
    # operator approximates the Laplace-Beltrami operator and for α=0.5,
    # you get Fokker-Planck diffusion. The anisotropic diffusion
    # parameter: \alpha \in \[0, 1\]
    # W(α) = D^{−1/\alpha} W D^{−1/\alpha}
    if alpha > 0:
        if use_sparse:
            d = np.power(adj.sum(axis=1).A1, -alpha)
            adj.data *= d[adj.row]
            adj.data *= d[adj.col]
        else:
            d = adj.sum(axis=1, keepdims=True)
            d = np.power(d, -alpha)
            adj *= d.T
            adj *= d

    ###########################################################
    # Step 3
    ###########################################################
    # Diffusion operator
    # P(α) = D(α)^{−1}W(α)
    if use_sparse:
        d_alpha = np.power(adj.sum(axis=1).A1, -1)
        adj.data *= d_alpha[adj.row]
    else:
        adj *= np.power(adj.sum(axis=1, keepdims=True), -1)

    ###########################################################
    # Step 4
    ###########################################################
    if n_components is None:
        n_components = max(2, int(np.sqrt(adj.shape[0])))
        auto_n_comp = True
    else:
        auto_n_comp = False

    # For repeatability of results
    v0 = rs.uniform(-1, 1, adj.shape[0])

    # Find largest eigenvalues and eigenvectors
    w, v = eigsh(adj, k=n_components + 1, which='LM', tol=0, v0=v0)

    # Sort descending
    w, v = w[::-1], v[:, ::-1]

    ###########################################################
    # Step 5
    ###########################################################
    # Force first eigenvector to be all ones.
    v /= v[:, [0]]

    # Largest eigenvalue should be equal to one too
    w /= w[0]

    # Discard first (largest) eigenvalue and eigenvector
    w, v = w[1:], v[:, 1:]

    if diffusion_time <= 0:
        # use multi-scale diffusion map, ref [4]
        # considers all scales: t=1,2,3,...
        w /= (1 - w)
    else:
        # Raise eigenvalues to the power of diffusion time
        w **= diffusion_time

    if auto_n_comp:
        # Choose n_comp to coincide with a 95 % drop-off
        # in the eigenvalue multipliers, ref [4]
        lambda_ratio = w / w[0]

        # If all eigenvalues larger than 0.05, select all
        # (i.e., sqrt(adj.shape[0]))
        threshold = max(0.05, lambda_ratio[-1])
        n_components = np.argmin(lambda_ratio > threshold)

        w = w[:n_components]
        v = v[:, :n_components]

    # Rescale eigenvectors with eigenvalues
    v *= w[None, :]

    # Consistent sign (s.t. largest value of element eigenvector is pos)
    v *= np.sign(v[np.abs(v).argmax(axis=0), range(v.shape[1])])
    return v, w


[docs]def laplacian_eigenmaps(adj, n_components=10, norm_laplacian=True,
                        random_state=None):
    """Compute embedding using Laplacian eigenmaps.

    Adapted from Scikit-learn to also provide eigenvalues.

    Parameters
    ----------
    adj : 2D ndarray or sparse matrix
        Affinity matrix.
    n_components : int, optional
        Number of eigenvectors. Default is 10.
    norm_laplacian : bool, optional
        If True use normalized Laplacian. Default is True.
    random_state : int or None, optional
        Random state. Default is None.

    Returns
    -------
    v : 2D ndarray, shape (n, n_components)
        Eigenvectors of the affinity matrix in same order. Where `n` is
        the number of rows of the affinity matrix.
    w : 1D ndarray, shape (n_components,)
        Eigenvalues of the affinity matrix in ascending order.

    References
    ----------
    * Belkin, M. and Niyogi, P. (2003). Laplacian Eigenmaps for
      dimensionality reduction and data representation.
      Neural Computation 15(6): 1373-96. doi:10.1162/089976603321780317

    """

    rs = check_random_state(random_state)

    # Make symmetric
    if not is_symmetric(adj, tol=1E-10):
        warnings.warn('Affinity is not symmetric. Making symmetric.')
        adj = make_symmetric(adj, check=False)

    # Check connected
    if not _graph_is_connected(adj):
        warnings.warn('Graph is not fully connected.')

    lap, dd = laplacian(adj, normed=norm_laplacian, return_diag=True)
    if norm_laplacian:
        if ssp.issparse(lap):
            lap.setdiag(1)
        else:
            np.fill_diagonal(lap, 1)

    lap *= -1
    v0 = rs.uniform(-1, 1, lap.shape[0])
    w, v = eigsh(lap, k=n_components + 1, sigma=1, which='LM', tol=0, v0=v0)

    # Sort descending and change sign of eigenvalues
    w, v = -w[::-1], v[:, ::-1]

    if norm_laplacian:
        v /= dd[:, None]

    # Drop smallest
    w, v = w[1:], v[:, 1:]

    # Consistent sign (s.t. largest value of element eigenvector is pos)
    v *= np.sign(v[np.abs(v).argmax(axis=0), range(v.shape[1])])
    return v, w


[docs]class Embedding(BaseEstimator, metaclass=ABCMeta):
    """Base class for embedding approaches.

    Defines fit_transform method.

    """
[docs]    def __init__(self, n_components=2):
        self.n_components = n_components
        self.maps_ = None
        self.lambdas_ = None

[docs]    @abstractmethod
    def fit(self, x):
        pass

[docs]    def fit_transform(self, x):
        """Compute embedding for `x`.

        Parameters
        ----------
        x : ndarray, shape = (n, n)
            Input matrix.

        Returns
        -------
        embedding : ndarray, shape(n, n_components)
            Embedded data.

        """

        return self.fit(x).maps_


[docs]class DiffusionMaps(Embedding):
    """Diffusion maps.

    Parameters
    ----------
    n_components : int or None, optional
        Number of eigenvectors. Default is 10.
    alpha : float, optional
        Anisotropic diffusion parameter, ``0 <= alpha <= 1``. Default is 0.5.
    diffusion_time : int, optional
        Diffusion time or scale.  If ``diffusion_time == 0`` use multi-scale
        diffusion maps. Default is 0.
    random_state : int or None, optional
        Random state. Default is None.

    Attributes
    ----------
    lambdas_ : 1D ndarray, shape (n_components,)
        Eigenvalues of the affinity matrix in descending order.
    maps_ : 2D ndarray, shape (n, n_components)
        Eigenvectors of the affinity matrix in same order. Where `n` is
        the number of rows of the affinity matrix.

    See Also
    --------
    :class:`.LaplacianEigenmaps`
    :class:`.PCAMaps`

    References
    ----------
    * Coifman, R.R.; S. Lafon. (2006). "Diffusion maps". Applied and
      Computational Harmonic Analysis 21: 5-30. doi:10.1016/j.acha.2006.04.006
    * Joseph W.R., Peter E.F., Ann B.L., Chad M.S. Accurate parameter
      estimation for star formation history in galaxies using SDSS spectra.

    """

[docs]    def __init__(self, n_components=10, alpha=0.5, diffusion_time=0,
                 random_state=None):
        super().__init__(n_components=n_components)
        self.alpha = alpha
        self.diffusion_time = diffusion_time
        self.random_state = random_state

[docs]    def fit(self, affinity):
        """ Compute the diffusion maps.

        Parameters
        ----------
        affinity : ndarray or sparse matrix, shape = (n, n)
            Affinity matrix.

        Returns
        -------
        self : object
            Returns self.

        """

        self.maps_, self.lambdas_ = \
            diffusion_mapping(affinity, n_components=self.n_components,
                              alpha=self.alpha,
                              diffusion_time=self.diffusion_time,
                              random_state=self.random_state)

        return self


[docs]class LaplacianEigenmaps(Embedding):
    """Laplacian eigenmaps.

    Parameters
    ----------
    n_components : int or None, optional
        Number of eigenvectors. Default is 10.
    norm_laplacian : bool, optional
        If True, use normalized Laplacian. Default is True.
    random_state : int or None, optional
        Random state. Default is None.

    Attributes
    ----------
    lambdas_ : ndarray, shape (n_components,)
        Eigenvalues of the affinity matrix in ascending order.
    maps_ : ndarray, shape (n, n_components)
        Eigenvectors of the affinity matrix in same order. Where `n` is
        the number of rows of the affinity matrix.

    See Also
    --------
    :class:`.DiffusionMaps`
    :class:`.PCAMaps`

    """

[docs]    def __init__(self, n_components=10, norm_laplacian=True, random_state=None):
        super().__init__(n_components=n_components)
        self.norm_laplacian = norm_laplacian
        self.random_state = random_state

[docs]    def fit(self, affinity):
        """ Compute the Laplacian maps.

        Parameters
        ----------
        affinity : ndarray or sparse matrix, shape = (n, n)
            Affinity matrix.

        Returns
        -------
        self : object
            Returns self.

        """

        self.maps_, self.lambdas_ = \
            laplacian_eigenmaps(affinity, n_components=self.n_components,
                                norm_laplacian=self.norm_laplacian,
                                random_state=self.random_state)

        return self


[docs]class PCAMaps(Embedding):
    """Principal component analysis.

    Parameters
    ----------
    n_components : int or None, optional
        Number of principal components. Default is 10.
    random_state : int, RandomState instance or None, optional
         Random state. Default is None.

    Attributes
    ----------
    lambdas_ :ndarray, shape (n_components,)
        Explained variance for first principal components in descending order.
    maps_ : ndarray, shape (n_samples, n_components)
        Projection of input data onto the principal components.

    See Also
    --------
    :class:`.DiffusionMaps`
    :class:`.LaplacianEigenmaps`

    """
[docs]    def __init__(self, n_components=10, random_state=None):
        super().__init__(n_components=n_components)
        self.random_state = random_state

[docs]    def fit(self, x):
        """ Compute PCA.

        Parameters
        ----------
        x : ndarray, shape(n_samples, n_feat)
            Input matrix.

        Returns
        -------
        self : object
            Returns self.

        """

        pca = PCA(n_components=self.n_components,
                  random_state=self.random_state)
        self.maps_ = pca.fit_transform(x)
        self.lambdas_ = pca.explained_variance_

        return self