Source code for menpo.model.gmrf

from functools import partial
import numpy as np
from scipy.sparse import bsr_matrix

from menpo.base import name_of_callable
from menpo.math import as_matrix
from menpo.shape import UndirectedGraph
from menpo.visualize import print_progress, bytes_str, print_dynamic


def _covariance_matrix_inverse(cov_mat, n_components):
    if n_components is None:
        return np.linalg.inv(cov_mat)
    else:
        try:
            s, v, d = np.linalg.svd(cov_mat)
            s = s[:, :n_components]
            v = v[:n_components]
            d = d[:n_components, :]
            return s.dot(np.diag(1 / v)).dot(d)
        except:
            return np.linalg.inv(cov_mat)


def _create_sparse_precision(
    X,
    graph,
    n_features,
    n_features_per_vertex,
    mode="concatenation",
    dtype=np.float32,
    n_components=None,
    bias=0,
    return_covariances=False,
    verbose=False,
):
    # check mode argument
    if mode not in ["concatenation", "subtraction"]:
        raise ValueError(
            "mode must be either ''concatenation'' "
            "or ''subtraction''; {} is given.".format(mode)
        )

    # Initialize arrays
    all_blocks = np.zeros(
        (graph.n_edges * 4, n_features_per_vertex, n_features_per_vertex), dtype=dtype
    )
    if return_covariances:
        if mode == "concatenation":
            cov_shape = (
                graph.n_edges,
                2 * n_features_per_vertex,
                2 * n_features_per_vertex,
            )
        else:
            cov_shape = (graph.n_edges, n_features_per_vertex, n_features_per_vertex)
        all_covariances = np.zeros(cov_shape, dtype=dtype)
    columns = np.zeros(graph.n_edges * 4)
    rows = np.zeros(graph.n_edges * 4)

    # Print information if asked
    if verbose:
        edges = print_progress(
            range(graph.n_edges),
            n_items=graph.n_edges,
            prefix="Precision per edge",
            end_with_newline=False,
        )
    else:
        edges = range(graph.n_edges)

    # Compute covariance matrix for each edge, invert it and store it
    count = -1
    for e in edges:
        # edge vertices
        v1 = graph.edges[e, 0]
        v2 = graph.edges[e, 1]

        # find indices in data matrix
        v1_from = v1 * n_features_per_vertex
        v1_to = (v1 + 1) * n_features_per_vertex
        v2_from = v2 * n_features_per_vertex
        v2_to = (v2 + 1) * n_features_per_vertex

        # data concatenation
        if mode == "concatenation":
            edge_data = X[:, list(range(v1_from, v1_to)) + list(range(v2_from, v2_to))]
        else:
            edge_data = X[:, v1_from:v1_to] - X[:, v2_from:v2_to]

        # compute covariance matrix
        covmat = np.cov(edge_data, rowvar=0, bias=bias)
        if return_covariances:
            all_covariances[e] = covmat

        # invert it
        covmat = _covariance_matrix_inverse(covmat, n_components)

        # store it
        if mode == "concatenation":
            # v1, v1
            count += 1
            all_blocks[count] = covmat[:n_features_per_vertex, :n_features_per_vertex]
            rows[count] = v1
            columns[count] = v1
            # v2, v2
            count += 1
            all_blocks[count] = covmat[n_features_per_vertex::, n_features_per_vertex::]
            rows[count] = v2
            columns[count] = v2
            # v1, v2
            count += 1
            all_blocks[count] = covmat[:n_features_per_vertex, n_features_per_vertex::]
            rows[count] = v1
            columns[count] = v2
            # v2, v1
            count += 1
            all_blocks[count] = covmat[n_features_per_vertex::, :n_features_per_vertex]
            rows[count] = v2
            columns[count] = v1
        else:
            # v1, v1
            count += 1
            all_blocks[count] = covmat
            rows[count] = v1
            columns[count] = v1
            # v2, v2
            count += 1
            all_blocks[count] = covmat
            rows[count] = v2
            columns[count] = v2
            # v1, v2
            count += 1
            all_blocks[count] = -covmat
            rows[count] = v1
            columns[count] = v2
            # v2, v1
            count += 1
            all_blocks[count] = -covmat
            rows[count] = v2
            columns[count] = v1

    # sort rows, columns and all_blocks
    rows_arg_sort = rows.argsort()
    columns = columns[rows_arg_sort]
    all_blocks = all_blocks[rows_arg_sort]
    rows = rows[rows_arg_sort]

    # create indptr
    n_rows = graph.n_vertices
    indptr = np.zeros(n_rows + 1)
    for i in range(n_rows):
        (inds,) = np.where(rows == i)
        if inds.size == 0:
            indptr[i + 1] = indptr[i]
        else:
            indptr[i] = inds[0]
            indptr[i + 1] = inds[-1] + 1

    # create block sparse matrix
    if return_covariances:
        return (
            bsr_matrix(
                (all_blocks, columns, indptr),
                shape=(n_features, n_features),
                dtype=dtype,
            ),
            all_covariances,
        )
    else:
        return bsr_matrix(
            (all_blocks, columns, indptr), shape=(n_features, n_features), dtype=dtype
        )


def _create_dense_precision(
    X,
    graph,
    n_features,
    n_features_per_vertex,
    mode="concatenation",
    dtype=np.float32,
    n_components=None,
    bias=0,
    return_covariances=False,
    verbose=False,
):
    # check mode argument
    if mode not in ["concatenation", "subtraction"]:
        raise ValueError(
            "mode must be either ''concatenation'' "
            "or ''subtraction''; {} is given.".format(mode)
        )

    # Initialize precision
    precision = np.zeros((n_features, n_features), dtype=dtype)
    if return_covariances:
        if mode == "concatenation":
            cov_shape = (
                graph.n_edges,
                2 * n_features_per_vertex,
                2 * n_features_per_vertex,
            )
        else:
            cov_shape = (graph.n_edges, n_features_per_vertex, n_features_per_vertex)
        all_covariances = np.zeros(cov_shape, dtype=dtype)

    # Print information if asked
    if verbose:
        print_dynamic(
            "Allocated precision matrix of size {}".format(bytes_str(precision.nbytes))
        )
        edges = print_progress(
            range(graph.n_edges),
            n_items=graph.n_edges,
            prefix="Precision per edge",
            end_with_newline=False,
        )
    else:
        edges = range(graph.n_edges)

    # Compute covariance matrix for each edge, invert it and store it
    for e in edges:
        # edge vertices
        v1 = graph.edges[e, 0]
        v2 = graph.edges[e, 1]

        # find indices in data matrix
        v1_from = v1 * n_features_per_vertex
        v1_to = (v1 + 1) * n_features_per_vertex
        v2_from = v2 * n_features_per_vertex
        v2_to = (v2 + 1) * n_features_per_vertex

        # data concatenation
        if mode == "concatenation":
            edge_data = X[:, list(range(v1_from, v1_to)) + list(range(v2_from, v2_to))]
        else:
            edge_data = X[:, v1_from:v1_to] - X[:, v2_from:v2_to]

        # compute covariance matrix
        covmat = np.cov(edge_data, rowvar=0, bias=bias)
        if return_covariances:
            all_covariances[e] = covmat

        # invert it
        covmat = _covariance_matrix_inverse(covmat, n_components)

        # store it
        if mode == "concatenation":
            # v1, v1
            precision[v1_from:v1_to, v1_from:v1_to] += covmat[
                :n_features_per_vertex, :n_features_per_vertex
            ]
            # v2, v2
            precision[v2_from:v2_to, v2_from:v2_to] += covmat[
                n_features_per_vertex::, n_features_per_vertex::
            ]
            # v1, v2
            precision[v1_from:v1_to, v2_from:v2_to] = covmat[
                :n_features_per_vertex, n_features_per_vertex::
            ]
            # v2, v1
            precision[v2_from:v2_to, v1_from:v1_to] = covmat[
                n_features_per_vertex::, :n_features_per_vertex
            ]
        elif mode == "subtraction":
            # v1, v2
            precision[v1_from:v1_to, v2_from:v2_to] = -covmat
            # v2, v1
            precision[v2_from:v2_to, v1_from:v1_to] = -covmat
            # v1, v1
            precision[v1_from:v1_to, v1_from:v1_to] += covmat
            # v2, v2
            precision[v2_from:v2_to, v2_from:v2_to] += covmat

    # return covariances
    if return_covariances:
        return precision, all_covariances
    else:
        return precision


def _create_sparse_diagonal_precision(
    X,
    graph,
    n_features,
    n_features_per_vertex,
    dtype=np.float32,
    n_components=None,
    bias=0,
    return_covariances=False,
    verbose=False,
):
    # initialize covariances matrix
    all_blocks = np.zeros(
        (graph.n_vertices, n_features_per_vertex, n_features_per_vertex), dtype=dtype
    )
    if return_covariances:
        all_covariances = np.zeros(
            (graph.n_vertices, n_features_per_vertex, n_features_per_vertex),
            dtype=dtype,
        )
    columns = np.zeros(graph.n_vertices)
    rows = np.zeros(graph.n_vertices)

    # Print information if asked
    if verbose:
        vertices = print_progress(
            range(graph.n_vertices),
            n_items=graph.n_vertices,
            prefix="Precision per vertex",
            end_with_newline=False,
        )
    else:
        vertices = range(graph.n_vertices)

    # Compute covariance matrix for each patch
    for v in vertices:
        # find indices in target precision matrix
        i_from = v * n_features_per_vertex
        i_to = (v + 1) * n_features_per_vertex

        # compute covariance
        covmat = np.cov(X[:, i_from:i_to], rowvar=0, bias=bias)
        if return_covariances:
            all_covariances[v] = covmat

        # invert it
        all_blocks[v] = _covariance_matrix_inverse(covmat, n_components)

        # store the inverse covariance and its locations
        rows[v] = v
        columns[v] = v

    # sort rows, columns and all_blocks
    rows_arg_sort = rows.argsort()
    columns = columns[rows_arg_sort]
    all_blocks = all_blocks[rows_arg_sort]
    rows = rows[rows_arg_sort]

    # create indptr
    n_rows = graph.n_vertices
    indptr = np.zeros(n_rows + 1)
    for i in range(n_rows):
        (inds,) = np.where(rows == i)
        if inds.size == 0:
            indptr[i + 1] = indptr[i]
        else:
            indptr[i] = inds[0]
            indptr[i + 1] = inds[-1] + 1

    # create block sparse matrix
    if return_covariances:
        return (
            bsr_matrix(
                (all_blocks, columns, indptr),
                shape=(n_features, n_features),
                dtype=dtype,
            ),
            all_covariances,
        )
    else:
        return bsr_matrix(
            (all_blocks, columns, indptr), shape=(n_features, n_features), dtype=dtype
        )


def _create_dense_diagonal_precision(
    X,
    graph,
    n_features,
    n_features_per_vertex,
    dtype=np.float32,
    n_components=None,
    bias=0,
    return_covariances=False,
    verbose=False,
):
    # Initialize precision
    precision = np.zeros((n_features, n_features), dtype=dtype)
    if return_covariances:
        all_covariances = np.zeros(
            (graph.n_vertices, n_features_per_vertex, n_features_per_vertex),
            dtype=dtype,
        )
    if verbose:
        print_dynamic(
            "Allocated precision matrix of size {}".format(bytes_str(precision.nbytes))
        )

    # Print information if asked
    if verbose:
        vertices = print_progress(
            range(graph.n_vertices),
            n_items=graph.n_vertices,
            prefix="Precision per vertex",
            end_with_newline=False,
        )
    else:
        vertices = range(graph.n_vertices)

    # Compute covariance matrix for each patch
    for v in vertices:
        # find indices in target precision matrix
        i_from = v * n_features_per_vertex
        i_to = (v + 1) * n_features_per_vertex

        # compute covariance
        covmat = np.cov(X[:, i_from:i_to], rowvar=0, bias=bias)
        if return_covariances:
            all_covariances[v] = covmat

        # invert it
        covmat = _covariance_matrix_inverse(covmat, n_components)

        # insert to precision matrix
        precision[i_from:i_to, i_from:i_to] = covmat

    # return covariances
    if return_covariances:
        return precision, all_covariances
    else:
        return precision


def _increment_sparse_precision(
    X,
    mean_vector,
    covariances,
    n,
    graph,
    n_features,
    n_features_per_vertex,
    mode="concatenation",
    dtype=np.float32,
    n_components=None,
    bias=0,
    verbose=False,
):
    # check mode argument
    if mode not in ["concatenation", "subtraction"]:
        raise ValueError(
            "mode must be either ''concatenation'' "
            "or ''subtraction''; {} is given.".format(mode)
        )

    # Initialize arrays
    all_blocks = np.zeros(
        (graph.n_edges * 4, n_features_per_vertex, n_features_per_vertex), dtype=dtype
    )
    columns = np.zeros(graph.n_edges * 4)
    rows = np.zeros(graph.n_edges * 4)

    # Print information if asked
    if verbose:
        edges = print_progress(
            range(graph.n_edges),
            n_items=graph.n_edges,
            prefix="Precision per edge",
            end_with_newline=False,
        )
    else:
        edges = range(graph.n_edges)

    # Compute covariance matrix for each edge, invert it and store it
    count = -1
    for e in edges:
        # edge vertices
        v1 = graph.edges[e, 0]
        v2 = graph.edges[e, 1]

        # find indices in data matrix
        v1_from = v1 * n_features_per_vertex
        v1_to = (v1 + 1) * n_features_per_vertex
        v2_from = v2 * n_features_per_vertex
        v2_to = (v2 + 1) * n_features_per_vertex

        # data concatenation
        if mode == "concatenation":
            edge_data = X[:, list(range(v1_from, v1_to)) + list(range(v2_from, v2_to))]
            m = mean_vector[list(range(v1_from, v1_to)) + list(range(v2_from, v2_to))]
        else:
            edge_data = X[:, v1_from:v1_to] - X[:, v2_from:v2_to]
            m = mean_vector[v1_from:v1_to] - mean_vector[v2_from:v2_to]

        # increment
        _, covariances[e] = _increment_multivariate_gaussian_cov(
            edge_data, m, covariances[e], n, bias=bias
        )

        # invert it
        covmat = _covariance_matrix_inverse(covariances[e], n_components)

        # store it
        if mode == "concatenation":
            # v1, v1
            count += 1
            all_blocks[count] = covmat[:n_features_per_vertex, :n_features_per_vertex]
            rows[count] = v1
            columns[count] = v1
            # v2, v2
            count += 1
            all_blocks[count] = covmat[n_features_per_vertex::, n_features_per_vertex::]
            rows[count] = v2
            columns[count] = v2
            # v1, v2
            count += 1
            all_blocks[count] = covmat[:n_features_per_vertex, n_features_per_vertex::]
            rows[count] = v1
            columns[count] = v2
            # v2, v1
            count += 1
            all_blocks[count] = covmat[n_features_per_vertex::, :n_features_per_vertex]
            rows[count] = v2
            columns[count] = v1
        else:
            # v1, v1
            count += 1
            all_blocks[count] = covmat
            rows[count] = v1
            columns[count] = v1
            # v2, v2
            count += 1
            all_blocks[count] = covmat
            rows[count] = v2
            columns[count] = v2
            # v1, v2
            count += 1
            all_blocks[count] = -covmat
            rows[count] = v1
            columns[count] = v2
            # v2, v1
            count += 1
            all_blocks[count] = -covmat
            rows[count] = v2
            columns[count] = v1

    # sort rows, columns and all_blocks
    rows_arg_sort = rows.argsort()
    columns = columns[rows_arg_sort]
    all_blocks = all_blocks[rows_arg_sort]
    rows = rows[rows_arg_sort]

    # create indptr
    n_rows = graph.n_vertices
    indptr = np.zeros(n_rows + 1)
    for i in range(n_rows):
        (inds,) = np.where(rows == i)
        if inds.size == 0:
            indptr[i + 1] = indptr[i]
        else:
            indptr[i] = inds[0]
            indptr[i + 1] = inds[-1] + 1

    # create block sparse matrix
    return (
        bsr_matrix(
            (all_blocks, columns, indptr), shape=(n_features, n_features), dtype=dtype
        ),
        covariances,
    )


def _increment_dense_precision(
    X,
    mean_vector,
    covariances,
    n,
    graph,
    n_features,
    n_features_per_vertex,
    mode="concatenation",
    dtype=np.float32,
    n_components=None,
    bias=0,
    verbose=False,
):
    # check mode argument
    if mode not in ["concatenation", "subtraction"]:
        raise ValueError(
            "mode must be either ''concatenation'' "
            "or ''subtraction''; {} is given.".format(mode)
        )

    # Initialize precision
    precision = np.zeros((n_features, n_features), dtype=dtype)

    # Print information if asked
    if verbose:
        print_dynamic(
            "Allocated precision matrix of size {}".format(bytes_str(precision.nbytes))
        )
        edges = print_progress(
            range(graph.n_edges),
            n_items=graph.n_edges,
            prefix="Precision per edge",
            end_with_newline=False,
        )
    else:
        edges = range(graph.n_edges)

    # Compute covariance matrix for each edge, invert it and store it
    for e in edges:
        # edge vertices
        v1 = graph.edges[e, 0]
        v2 = graph.edges[e, 1]

        # find indices in data matrix
        v1_from = v1 * n_features_per_vertex
        v1_to = (v1 + 1) * n_features_per_vertex
        v2_from = v2 * n_features_per_vertex
        v2_to = (v2 + 1) * n_features_per_vertex

        # data concatenation
        if mode == "concatenation":
            edge_data = X[:, list(range(v1_from, v1_to)) + list(range(v2_from, v2_to))]
            m = mean_vector[list(range(v1_from, v1_to)) + list(range(v2_from, v2_to))]
        else:
            edge_data = X[:, v1_from:v1_to] - X[:, v2_from:v2_to]
            m = mean_vector[v1_from:v1_to] - mean_vector[v2_from:v2_to]

        # increment
        _, covariances[e] = _increment_multivariate_gaussian_cov(
            edge_data, m, covariances[e], n, bias=bias
        )

        # invert it
        covmat = _covariance_matrix_inverse(covariances[e], n_components)

        # store it
        if mode == "concatenation":
            # v1, v1
            precision[v1_from:v1_to, v1_from:v1_to] += covmat[
                :n_features_per_vertex, :n_features_per_vertex
            ]
            # v2, v2
            precision[v2_from:v2_to, v2_from:v2_to] += covmat[
                n_features_per_vertex::, n_features_per_vertex::
            ]
            # v1, v2
            precision[v1_from:v1_to, v2_from:v2_to] = covmat[
                :n_features_per_vertex, n_features_per_vertex::
            ]
            # v2, v1
            precision[v2_from:v2_to, v1_from:v1_to] = covmat[
                n_features_per_vertex::, :n_features_per_vertex
            ]
        elif mode == "subtraction":
            # v1, v2
            precision[v1_from:v1_to, v2_from:v2_to] = -covmat
            # v2, v1
            precision[v2_from:v2_to, v1_from:v1_to] = -covmat
            # v1, v1
            precision[v1_from:v1_to, v1_from:v1_to] += covmat
            # v2, v2
            precision[v2_from:v2_to, v2_from:v2_to] += covmat

    # return covariances
    return precision, covariances


def _increment_sparse_diagonal_precision(
    X,
    mean_vector,
    covariances,
    n,
    graph,
    n_features,
    n_features_per_vertex,
    dtype=np.float32,
    n_components=None,
    bias=0,
    verbose=False,
):
    # initialize covariances matrix
    all_blocks = np.zeros(
        (graph.n_vertices, n_features_per_vertex, n_features_per_vertex), dtype=dtype
    )
    columns = np.zeros(graph.n_vertices)
    rows = np.zeros(graph.n_vertices)

    # Print information if asked
    if verbose:
        vertices = print_progress(
            range(graph.n_vertices),
            n_items=graph.n_vertices,
            prefix="Precision per vertex",
            end_with_newline=False,
        )
    else:
        vertices = range(graph.n_vertices)

    # Compute covariance matrix for each patch
    for v in vertices:
        # find indices in target precision matrix
        i_from = v * n_features_per_vertex
        i_to = (v + 1) * n_features_per_vertex

        # get data
        edge_data = X[:, i_from:i_to]
        m = mean_vector[i_from:i_to]

        # increment
        _, covariances[v] = _increment_multivariate_gaussian_cov(
            edge_data, m, covariances[v], n, bias=bias
        )

        # invert it
        all_blocks[v] = _covariance_matrix_inverse(covariances[v], n_components)

        # store the inverse covariance and its locations
        rows[v] = v
        columns[v] = v

    # sort rows, columns and all_blocks
    rows_arg_sort = rows.argsort()
    columns = columns[rows_arg_sort]
    all_blocks = all_blocks[rows_arg_sort]
    rows = rows[rows_arg_sort]

    # create indptr
    n_rows = graph.n_vertices
    indptr = np.zeros(n_rows + 1)
    for i in range(n_rows):
        (inds,) = np.where(rows == i)
        if inds.size == 0:
            indptr[i + 1] = indptr[i]
        else:
            indptr[i] = inds[0]
            indptr[i + 1] = inds[-1] + 1

    # create block sparse matrix
    return (
        bsr_matrix(
            (all_blocks, columns, indptr), shape=(n_features, n_features), dtype=dtype
        ),
        covariances,
    )


def _increment_dense_diagonal_precision(
    X,
    mean_vector,
    covariances,
    n,
    graph,
    n_features,
    n_features_per_vertex,
    dtype=np.float32,
    n_components=None,
    bias=0,
    verbose=False,
):
    # Initialize precision
    precision = np.zeros((n_features, n_features), dtype=dtype)

    # Print information if asked
    if verbose:
        print_dynamic(
            "Allocated precision matrix of size {}".format(bytes_str(precision.nbytes))
        )
        vertices = print_progress(
            range(graph.n_vertices),
            n_items=graph.n_vertices,
            prefix="Precision per vertex",
            end_with_newline=False,
        )
    else:
        vertices = range(graph.n_vertices)

    # Compute covariance matrix for each patch
    for v in vertices:
        # find indices in target precision matrix
        i_from = v * n_features_per_vertex
        i_to = (v + 1) * n_features_per_vertex

        # get data
        edge_data = X[:, i_from:i_to]
        m = mean_vector[i_from:i_to]

        # increment
        _, covariances[v] = _increment_multivariate_gaussian_cov(
            edge_data, m, covariances[v], n, bias=bias
        )

        # invert it
        precision[i_from:i_to, i_from:i_to] = _covariance_matrix_inverse(
            covariances[v], n_components
        )

    # return covariances
    return precision, covariances


def _increment_multivariate_gaussian_mean(X, m, n):
    # Get new number of samples
    new_n = X.shape[0]

    # Update mean vector
    # m_{new} = (n m + \sum_{i=1}^{n_{new}} x_i) / (n + n_{new})
    # where: m       -> old mean vector
    #        n_{new} -> new number of samples
    #        n       -> old number of samples
    #        x_i     -> new data vectors
    return (n * m + np.sum(X, axis=0)) / (n + new_n)


def _increment_multivariate_gaussian_cov(X, m, S, n, bias=0):
    # Get new number of samples
    new_n = X.shape[0]

    # Update mean vector
    # m_{new} = (n m + \sum_{i=1}^{n_{new}} x_i) / (n + n_{new})
    # where: m_{new} -> new mean vector
    #        m       -> old mean vector
    #        n_{new} -> new number of samples
    #        n       -> old number of samples
    #        x_i     -> new data vectors
    new_m = _increment_multivariate_gaussian_mean(X, m, n)

    # Select the normalization value
    if bias == 1:
        k = n
    elif bias == 0:
        k = n - 1
    else:
        raise ValueError("bias must be either 0 or 1")

    # Update covariance matrix
    # S__{new} = (k S + n m^T m + X^T X - (n + n_{new}) m_{new}^T m_{new})
    #                                                            / (k + n_{new})
    m1 = n * m[None, :].T.dot(m[None, :])
    m2 = (n + new_n) * new_m[None, :].T.dot(new_m[None, :])
    new_S = (k * S + m1 + X.T.dot(X) - m2) / (k + new_n)

    return new_m, new_S


[docs]class GMRFVectorModel(object):
    r"""
    Trains a Gaussian Markov Random Field (GMRF).

    Parameters
    ----------
    samples : `ndarray` or `list` or `iterable` of `ndarray`
        List or iterable of numpy arrays to build the model from, or an
        existing data matrix.
    graph : :map:`UndirectedGraph` or :map:`DirectedGraph` or :map:`Tree`
        The graph that defines the relations between the features.
    n_samples : `int`, optional
        If provided then ``samples``  must be an iterator that yields
        ``n_samples``. If not provided then samples has to be a `list` (so we
        know how large the data matrix needs to be).
    mode : ``{'concatenation', 'subtraction'}``, optional
        Defines the feature vector of each edge. Assuming that
        :math:`\mathbf{x}_i` and :math:`\mathbf{x}_j` are the feature vectors
        of two adjacent vertices (:math:`i,j:(v_i,v_j)\in E`), then the edge's
        feature vector in the case of ``'concatenation'`` is

        .. math::
           \left[{\mathbf{x}_i}^T, {\mathbf{x}_j}^T\right]^T

        and in the case of ``'subtraction'``

        .. math::
           \mathbf{x}_i - \mathbf{x}_j

    n_components : `int` or ``None``, optional
        When ``None`` (default), the covariance matrix of each edge is inverted
        using `np.linalg.inv`. If `int`, it is inverted using truncated SVD
        using the specified number of compnents.
    dtype : `numpy.dtype`, optional
        The data type of the GMRF's precision matrix. For example, it can be set
        to `numpy.float32` for single precision or to `numpy.float64` for double
        precision. Depending on the size of the precision matrix, this option can
        you a lot of memory.
    sparse : `bool`, optional
        When ``True``, the GMRF's precision matrix has type
        `scipy.sparse.bsr_matrix`, otherwise it is a `numpy.array`.
    bias : `int`, optional
        Default normalization is by ``(N - 1)``, where ``N`` is the number of
        observations given (unbiased estimate). If `bias` is 1, then
        normalization is by ``N``. These values can be overridden by using
        the keyword ``ddof`` in numpy versions >= 1.5.
    incremental : `bool`, optional
        This argument must be set to ``True`` in case the user wants to
        incrementally update the GMRF. Note that if ``True``, the model
        occupies 2x memory.
    verbose : `bool`, optional
        If ``True``, the progress of the model's training is printed.

    Notes
    -----
    Let us denote a graph as :math:`G=(V,E)`, where
    :math:`V=\{v_i,v_2,\ldots, v_{|V|}\}` is the set of :math:`|V|` vertices and
    there is an edge :math:`(v_i,v_j)\in E` for each pair of connected vertices.
    Let us also assume that we have a set of random variables
    :math:`X=\{X_i\}, \forall i:v_i\in V`, which represent an abstract feature
    vector of length :math:`k` extracted from each vertex :math:`v_i`, i.e.
    :math:`\mathbf{x}_i,i:v_i\in V`.

    A GMRF is described by an undirected graph, where the vertexes stand for
    random variables and the edges impose statistical constraints on these
    random variables. Thus, the GMRF models the set of random variables with
    a multivariate normal distribution

    .. math::
       p(X=\mathbf{x}|G)\sim\mathcal{N}(\boldsymbol{\mu},\boldsymbol{\Sigma})

    We denote by :math:`\mathbf{Q}` the block-sparse precision matrix that is
    the inverse of the covariance matrix :math:`\boldsymbol{\Sigma}`, i.e.
    :math:`\mathbf{Q}=\boldsymbol{\Sigma}^{-1}`.  By applying the GMRF we make
    the assumption that the random variables satisfy the three Markov
    properties (pairwise, local and global) and that the blocks of the
    precision matrix that correspond to disjoint vertexes are zero, i.e.

    .. math::
       \mathbf{Q}_{ij}=\mathbf{0}_{k\times k},\forall i,j:(v_i,v_j)\notin E

    References
    ----------
    .. [1] H. Rue, and L. Held. "Gaussian Markov random fields: theory and
       applications," CRC Press, 2005.
    .. [2] E. Antonakos, J. Alabort-i-Medina, and S. Zafeiriou. "Active
       Pictorial Structures", IEEE International Conference on Computer Vision
       & Pattern Recognition (CVPR), Boston, MA, USA, pp. 5435-5444, June 2015.
    """

    def __init__(
        self,
        samples,
        graph,
        n_samples=None,
        mode="concatenation",
        n_components=None,
        dtype=np.float64,
        sparse=True,
        bias=0,
        incremental=False,
        verbose=False,
    ):
        # Generate data matrix
        # (n_samples, n_features)
        data, self.n_samples = self._data_to_matrix(samples, n_samples)

        # n_features and n_features_per_vertex
        self.n_features = data.shape[1]
        self.n_features_per_vertex = int(self.n_features / graph.n_vertices)

        # Assign arguments
        self.graph = graph
        self.mode = mode
        self.n_components = n_components
        self.sparse = sparse
        self.dtype = dtype
        self.bias = bias
        self.is_incremental = incremental

        # Compute mean vector
        self.mean_vector = np.mean(data, axis=0)

        # Select correct method to create the precision matrix based on the
        # graph type and the sparse flag
        if self.graph.n_edges == 0:
            if self.sparse:
                constructor = _create_sparse_diagonal_precision
            else:
                constructor = _create_dense_diagonal_precision
        else:
            if self.sparse:
                constructor = partial(_create_sparse_precision, mode=self.mode)
            else:
                constructor = partial(_create_dense_precision, mode=self.mode)

        # Create the precision matrix and optionally store the covariance
        # matrices
        if self.is_incremental:
            self.precision, self._covariance_matrices = constructor(
                data,
                self.graph,
                self.n_features,
                self.n_features_per_vertex,
                dtype=self.dtype,
                n_components=self.n_components,
                bias=self.bias,
                return_covariances=self.is_incremental,
                verbose=verbose,
            )
        else:
            self._covariance_matrices = None
            self.precision = constructor(
                data,
                self.graph,
                self.n_features,
                self.n_features_per_vertex,
                dtype=self.dtype,
                n_components=self.n_components,
                bias=self.bias,
                return_covariances=self.is_incremental,
                verbose=verbose,
            )

    def _data_to_matrix(self, data, n_samples):
        # build a data matrix from all the samples
        if n_samples is None:
            n_samples = len(data)
        # Assumed data is ndarray of (n_samples, n_features) or list of samples
        if not isinstance(data, np.ndarray):
            # Make sure we have an array, slice of the number of requested
            # samples
            data = np.array(data)[:n_samples]
        return data, n_samples

[docs]    def mean(self):
        r"""
        Return the mean of the model. For this model, returns the same result
        as ``mean_vector``.

        :type: `ndarray`
        """
        return self.mean_vector

[docs]    def increment(self, samples, n_samples=None, verbose=False):
        r"""
        Update the mean and precision matrix of the GMRF by updating the
        distributions of all the edges.

        Parameters
        ----------
        samples : `ndarray` or `list` or `iterable` of `ndarray`
            List or iterable of numpy arrays to build the model from, or an
            existing data matrix.
        n_samples : `int`, optional
            If provided then ``samples``  must be an iterator that yields
            ``n_samples``. If not provided then samples has to be a
            list (so we know how large the data matrix needs to be).
        verbose : `bool`, optional
            If ``True``, the progress of the model's incremental update is
            printed.
        """
        # Check if it can be incrementally updated
        if not self.is_incremental:
            raise ValueError("GMRF cannot be incrementally updated.")

        # Build a data matrix from the new samples
        data, _ = self._data_to_matrix(samples, n_samples)

        # Increment the model
        self._increment(data=data, verbose=verbose)

    def _increment(self, data, verbose):
        # Empty memory
        self.precision = 0

        # Select correct method to create the precision matrix based on the
        # graph type and the sparse flag
        if self.graph.n_edges == 0:
            if self.sparse:
                constructor = _increment_sparse_diagonal_precision
            else:
                constructor = _increment_dense_diagonal_precision
        else:
            if self.sparse:
                constructor = partial(_increment_sparse_precision, mode=self.mode)
            else:
                constructor = partial(_increment_dense_precision, mode=self.mode)

        # Create the precision matrix and optionally store the covariance
        # matrices
        self.precision, self._covariance_matrices = constructor(
            data,
            self.mean_vector,
            self._covariance_matrices,
            self.n_samples,
            self.graph,
            self.n_features,
            self.n_features_per_vertex,
            dtype=self.dtype,
            n_components=self.n_components,
            bias=self.bias,
            verbose=verbose,
        )

        # Update mean and number of samples
        self.mean_vector = _increment_multivariate_gaussian_mean(
            data, self.mean_vector, self.n_samples
        )
        self.n_samples += data.shape[0]

[docs]    def mahalanobis_distance(self, samples, subtract_mean=True, square_root=False):
        r"""
        Compute the mahalanobis distance given a sample :math:`\mathbf{x}` or an
        array of samples :math:`\mathbf{X}`, i.e.

        .. math::
           \sqrt{(\mathbf{x}-\boldsymbol{\mu})^T \mathbf{Q} (\mathbf{x}-\boldsymbol{\mu})}
           \text{ or }
           \sqrt{(\mathbf{X}-\boldsymbol{\mu})^T \mathbf{Q} (\mathbf{X}-\boldsymbol{\mu})}

        Parameters
        ----------
        samples : `ndarray`
            A single data vector or an array of multiple data vectors.
        subtract_mean : `bool`, optional
            When ``True``, the mean vector is subtracted from the data vector.
        square_root : `bool`, optional
            If ``False``, the mahalanobis distance gets squared.
        """
        samples, _ = self._data_to_matrix(samples, None)
        if len(samples.shape) == 1:
            samples = samples[..., None].T
        return self._mahalanobis_distance(
            samples=samples, subtract_mean=subtract_mean, square_root=square_root
        )

    def _mahalanobis_distance(self, samples, subtract_mean, square_root):
        # we assume that samples is an ndarray of n_samples x n_features

        # create data matrix
        if subtract_mean:
            n_samples = samples.shape[0]
            samples = samples - np.tile(self.mean_vector[..., None], n_samples).T

        # compute mahalanobis per sample
        if self.sparse:
            # if sparse, unfortunately the einstein sum is not implemented
            tmp = self.precision.dot(samples.T)
            d = samples.dot(tmp)
            d = np.diag(d)
        else:
            # if dense, then the einstein sum is much faster
            d = np.einsum("ij,ij->i", np.dot(samples, self.precision), samples)

        # if only one sample, then return a scalar
        if d.shape[0] == 1:
            d = d[0]

        # square root
        if square_root:
            return np.sqrt(d)
        else:
            return d

[docs]    def principal_components_analysis(self, max_n_components=None):
        r"""
        Returns a :map:`PCAVectorModel` with the Principal Components.

        Note that the eigenvalue decomposition is applied directly on the
        precision matrix and then the eigenvalues are inverted.

        Parameters
        ----------
        max_n_components : `int` or ``None``, optional
            The maximum number of principal components. If ``None``, all the
            components are returned.

        Returns
        -------
        pca : :map:`PCAVectorModel`
            The PCA model.
        """
        from .pca import PCAVectorModel

        return PCAVectorModel.init_from_covariance_matrix(
            C=self.precision,
            mean=self.mean_vector,
            n_samples=self.n_samples,
            centred=True,
            is_inverse=True,
            max_n_components=max_n_components,
        )

    @property
    def _str_title(self):
        r"""
        Returns a string containing the name of the model.

        :type: `str`
        """
        tmp = "a"
        if isinstance(self.graph, UndirectedGraph):
            tmp = "an"
        return "GMRF model on {} {}".format(tmp, self.graph)

    def __str__(self):
        incremental_str = (
            " - Can be incrementally updated."
            if self.is_incremental
            else " - Cannot be " "incrementally updated."
        )
        svd_str = (
            " - # SVD components:        {}".format(self.n_components)
            if self.n_components is not None
            else " - No " "SVD used."
        )
        _Q_sparse = "scipy.sparse" if self.sparse else "numpy.array"
        q_str = " - Q is stored as {} with {} precision".format(
            _Q_sparse, name_of_callable(self.dtype)
        )
        mode_str = "concatenated" if self.mode == "concatenation" else "subtracted"
        str_out = (
            "Gaussian MRF Model \n"
            " - {}\n"
            " - The data of the vertexes of each edge are {}.\n"
            "{}\n"
            " - # variables (vertexes):  {}\n"
            " - # features per variable: {}\n"
            " - # features in total:     {}\n"
            "{}\n"
            " - # samples:               {}\n"
            "{}\n".format(
                self.graph.__str__(),
                mode_str,
                q_str,
                self.graph.n_vertices,
                self.n_features_per_vertex,
                self.n_features,
                svd_str,
                self.n_samples,
                incremental_str,
            )
        )
        return str_out


[docs]class GMRFModel(GMRFVectorModel):
    r"""
    Trains a Gaussian Markov Random Field (GMRF).

    Parameters
    ----------
    samples : `list` or `iterable` of :map:`Vectorizable`
        List or iterable of samples to build the model from.
    graph : :map:`UndirectedGraph` or :map:`DirectedGraph` or :map:`Tree`
        The graph that defines the relations between the features.
    n_samples : `int`, optional
        If provided then ``samples``  must be an iterator that yields
        ``n_samples``. If not provided then samples has to be a `list` (so we
        know how large the data matrix needs to be).
    mode : ``{'concatenation', 'subtraction'}``, optional
        Defines the feature vector of each edge. Assuming that
        :math:`\mathbf{x}_i` and :math:`\mathbf{x}_j` are the feature vectors
        of two adjacent vertices (:math:`i,j:(v_i,v_j)\in E`), then the edge's
        feature vector in the case of ``'concatenation'`` is

        .. math::
           \left[{\mathbf{x}_i}^T, {\mathbf{x}_j}^T\right]^T

        and in the case of ``'subtraction'``

        .. math::
           \mathbf{x}_i - \mathbf{x}_j

    n_components : `int` or ``None``, optional
        When ``None`` (default), the covariance matrix of each edge is inverted
        using `np.linalg.inv`. If `int`, it is inverted using truncated SVD
        using the specified number of compnents.
    dtype : `numpy.dtype`, optional
        The data type of the GMRF's precision matrix. For example, it can be set
        to `numpy.float32` for single precision or to `numpy.float64` for double
        precision. Depending on the size of the precision matrix, this option can
        you a lot of memory.
    sparse : `bool`, optional
        When ``True``, the GMRF's precision matrix has type
        `scipy.sparse.bsr_matrix`, otherwise it is a `numpy.array`.
    bias : `int`, optional
        Default normalization is by ``(N - 1)``, where ``N`` is the number of
        observations given (unbiased estimate). If `bias` is 1, then
        normalization is by ``N``. These values can be overridden by using
        the keyword ``ddof`` in numpy versions >= 1.5.
    incremental : `bool`, optional
        This argument must be set to ``True`` in case the user wants to
        incrementally update the GMRF. Note that if ``True``, the model
        occupies 2x memory.
    verbose : `bool`, optional
        If ``True``, the progress of the model's training is printed.

    Notes
    -----
    Let us denote a graph as :math:`G=(V,E)`, where
    :math:`V=\{v_i,v_2,\ldots, v_{|V|}\}` is the set of :math:`|V|` vertices and
    there is an edge :math:`(v_i,v_j)\in E` for each pair of connected vertices.
    Let us also assume that we have a set of random variables
    :math:`X=\{X_i\}, \forall i:v_i\in V`, which represent an abstract feature
    vector of length :math:`k` extracted from each vertex :math:`v_i`, i.e.
    :math:`\mathbf{x}_i,i:v_i\in V`.

    A GMRF is described by an undirected graph, where the vertexes stand for
    random variables and the edges impose statistical constraints on these
    random variables. Thus, the GMRF models the set of random variables with
    a multivariate normal distribution

    .. math::
       p(X=\mathbf{x}|G)\sim\mathcal{N}(\boldsymbol{\mu},\boldsymbol{\Sigma})

    We denote by :math:`\mathbf{Q}` the block-sparse precision matrix that is
    the inverse of the covariance matrix :math:`\boldsymbol{\Sigma}`, i.e.
    :math:`\mathbf{Q}=\boldsymbol{\Sigma}^{-1}`.  By applying the GMRF we make
    the assumption that the random variables satisfy the three Markov
    properties (pairwise, local and global) and that the blocks of the
    precision matrix that correspond to disjoint vertexes are zero, i.e.

    .. math::
       \mathbf{Q}_{ij}=\mathbf{0}_{k\times k},\forall i,j:(v_i,v_j)\notin E

    References
    ----------
    .. [1] H. Rue, and L. Held. "Gaussian Markov random fields: theory and
       applications," CRC Press, 2005.
    .. [2] E. Antonakos, J. Alabort-i-Medina, and S. Zafeiriou. "Active
       Pictorial Structures", IEEE International Conference on Computer Vision
       & Pattern Recognition (CVPR), Boston, MA, USA, pp. 5435-5444, June 2015.
    """

    def __init__(
        self,
        samples,
        graph,
        mode="concatenation",
        n_components=None,
        dtype=np.float64,
        sparse=True,
        n_samples=None,
        bias=0,
        incremental=False,
        verbose=False,
    ):
        # Build a data matrix from all the samples
        data, self.template_instance = as_matrix(
            samples, length=n_samples, return_template=True, verbose=verbose
        )
        n_samples = data.shape[0]

        GMRFVectorModel.__init__(
            self,
            data,
            graph,
            mode=mode,
            n_components=n_components,
            dtype=dtype,
            sparse=sparse,
            n_samples=n_samples,
            bias=bias,
            incremental=incremental,
            verbose=verbose,
        )

[docs]    def mean(self):
        r"""
        Return the mean of the model.

        :type: :map:`Vectorizable`
        """
        return self.template_instance.from_vector(self.mean_vector)

[docs]    def increment(self, samples, n_samples=None, verbose=False):
        r"""
        Update the mean and precision matrix of the GMRF by updating the
        distributions of all the edges.

        Parameters
        ----------
        samples : `list` or `iterable` of :map:`Vectorizable`
            List or iterable of samples to build the model from.
        n_samples : `int`, optional
            If provided then ``samples``  must be an iterator that yields
            ``n_samples``. If not provided then samples has to be a
            list (so we know how large the data matrix needs to be).
        verbose : `bool`, optional
            If ``True``, the progress of the model's incremental update is
            printed.
        """
        # Check if it can be incrementally updated
        if not self.is_incremental:
            raise ValueError("GMRF cannot be incrementally updated.")

        # Build a data matrix from the new samples
        data = as_matrix(samples, length=n_samples, verbose=verbose)

        # Increment the model
        self._increment(data=data, verbose=verbose)

[docs]    def mahalanobis_distance(self, samples, subtract_mean=True, square_root=False):
        r"""
        Compute the mahalanobis distance given a sample :math:`\mathbf{x}` or an
        array of samples :math:`\mathbf{X}`, i.e.

        .. math::
           \sqrt{(\mathbf{x}-\boldsymbol{\mu})^T \mathbf{Q} (\mathbf{x}-\boldsymbol{\mu})}
           \text{ or }
           \sqrt{(\mathbf{X}-\boldsymbol{\mu})^T \mathbf{Q} (\mathbf{X}-\boldsymbol{\mu})}

        Parameters
        ----------
        samples : :map:`Vectorizable` or `list` of :map:`Vectorizable`
            The new data sample or a list of samples.
        subtract_mean : `bool`, optional
            When ``True``, the mean vector is subtracted from the data vector.
        square_root : `bool`, optional
            If ``False``, the mahalanobis distance gets squared.
        """
        if isinstance(samples, list):
            samples = as_matrix(
                samples, length=None, return_template=False, verbose=False
            )
        else:
            samples = samples.as_vector()[..., None].T
        return self._mahalanobis_distance(
            samples=samples, subtract_mean=subtract_mean, square_root=square_root
        )

[docs]    def principal_components_analysis(self, max_n_components=None):
        r"""
        Returns a :map:`PCAModel` with the Principal Components.

        Note that the eigenvalue decomposition is applied directly on the
        precision matrix and then the eigenvalues are inverted.

        Parameters
        ----------
        max_n_components : `int` or ``None``, optional
            The maximum number of principal components. If ``None``, all the
            components are returned.

        Returns
        -------
        pca : :map:`PCAModel`
            The PCA model.
        """
        from .pca import PCAModel

        return PCAModel.init_from_covariance_matrix(
            C=self.precision,
            mean=self.mean(),
            n_samples=self.n_samples,
            centred=True,
            is_inverse=True,
            max_n_components=max_n_components,
        )