Source code for menpo.math.decomposition

from __future__ import division
import numpy as np
from .linalg import dot_inplace_right


[docs]def eigenvalue_decomposition(S, eps=10**-10): r""" Eigenvalue decomposition of a given covariance (or scatter) matrix. Parameters ---------- S : ``(N, N)`` `ndarray` Covariance/Scatter matrix eps : `float`, optional Small value to be used for the tolerance limit computation. The final limit is computed as :: limit = np.max(np.abs(eigenvalues)) * eps Returns ------- pos_eigenvectors : ``(N, p)`` `ndarray` The matrix with the eigenvectors corresponding to positive eigenvalues. pos_eigenvalues : ``(p,)`` `ndarray` The array of positive eigenvalues. """ # compute eigenvalue decomposition eigenvalues, eigenvectors = np.linalg.eigh(S) # sort eigenvalues from largest to smallest index = np.argsort(eigenvalues)[::-1] eigenvalues = eigenvalues[index] eigenvectors = eigenvectors[:, index] # set tolerance limit limit = np.max(np.abs(eigenvalues)) * eps # select positive eigenvalues pos_index = eigenvalues > 0.0 pos_eigenvalues = eigenvalues[pos_index] pos_eigenvectors = eigenvectors[:, pos_index] # check they are within the expected tolerance index = pos_eigenvalues > limit pos_eigenvalues = pos_eigenvalues[index] pos_eigenvectors = pos_eigenvectors[:, index] return pos_eigenvectors, pos_eigenvalues
[docs]def principal_component_decomposition(X, whiten=False, centre=True, bias=False, inplace=False): r""" Apply Principal Component Analysis (PCA) on the data matrix `X`. In the case where the data matrix is very large, it is advisable to set ``inplace = True``. However, note this destructively edits the data matrix by subtracting the mean inplace. Parameters ---------- X : ``(n_samples, n_features)`` `ndarray` Training data. whiten : `bool`, optional Normalise the eigenvectors to have unit magnitude. centre : `bool`, optional Whether to centre the data matrix. If ``False``, zero will be subtracted. bias : `bool`, optional Whether to use a biased estimate of the number of samples. If ``False``, subtracts ``1`` from the number of samples. inplace : `bool`, optional Whether to do the mean subtracting inplace or not. This is crucial if the data matrix is greater than half the available memory size. Returns ------- eigenvectors : ``(n_components, n_features)`` `ndarray` The eigenvectors of the data matrix. eigenvalues : ``(n_components,)`` `ndarray` The positive eigenvalues from the data matrix. mean_vector : ``(n_components,)`` `ndarray` The mean that was subtracted from the dataset. """ n_samples, n_features = X.shape if bias: N = n_samples else: N = n_samples - 1.0 if centre: # centre data mean_vector = np.mean(X, axis=0) else: mean_vector = np.zeros(n_features) # This is required if the data matrix is very large! if inplace: X -= mean_vector else: X = X - mean_vector if n_features < n_samples: # compute covariance matrix # S: n_features x n_features S = np.dot(X.T, X) / N # S should be perfectly symmetrical, but numerical error can creep # in. Enforce symmetry here to avoid creating complex # eigenvectors from eigendecomposition S = (S + S.T) / 2.0 # perform eigenvalue decomposition # eigenvectors: n_features x n_features # eigenvalues: n_features eigenvectors, eigenvalues = eigenvalue_decomposition(S) if whiten: # whiten eigenvectors eigenvectors *= np.sqrt(1.0 / eigenvalues) # transpose eigenvectors # eigenvectors: n_samples x n_features eigenvectors = eigenvectors.T else: # n_features > n_samples # compute covariance matrix # S: n_samples x n_samples S = np.dot(X, X.T) / N # S should be perfectly symmetrical, but numerical error can creep # in. Enforce symmetry here to avoid creating complex # eigenvectors from eigendecomposition S = (S + S.T) / 2.0 # perform eigenvalue decomposition # eigenvectors: n_samples x n_samples # eigenvalues: n_samples eigenvectors_s, eigenvalues = eigenvalue_decomposition(S) # compute final eigenvectors # eigenvectors: n_samples x n_features if whiten: w = (N * eigenvalues) ** -1.0 else: w = np.sqrt(1.0 / (N * eigenvalues)) dot = dot_inplace_right if inplace else np.dot eigenvectors = dot(eigenvectors_s.T, X) # whiten, and we are done. eigenvectors *= w[:, None] return eigenvectors, eigenvalues, mean_vector