Source code for barnacle.decomposition

import warnings

import numpy as np
import opt_einsum as oe
import tensorly as tl
from tensorly import check_random_state
from tensorly.decomposition._base_decomposition import DecompositionMixin
from tensorly.decomposition._cp import initialize_cp
from threadpoolctl import threadpool_limits

from .fista import fista_solve
from .tensors import SparseCPTensor


def _create_mttkrp_function(shape, rank):
    """Helper function to generate the function for calculating the Matricized
    Tensor Times Khatri-Rao Product (MTTKRP) using the opt_einsum package.
    
    Parameters
    ----------
    shape : [int, int, int]
        List of 3 integers delineating the shape of the input tensor.
    rank : int
        Number of components in the CP tensor decomposition model.
        
    Returns
    -------
    mttkrp : function
        MTTKRP function parameterized to the input tensor size and rank.
    """
    # define einsum paths for each skip mode
    mttkrp_paths = [
        oe.contract_expression('jr,kr,ijk -> ir', (shape[1], rank), (shape[2], rank), shape),
        oe.contract_expression('ir,kr,ijk -> jr', (shape[0], rank), (shape[2], rank), shape),
        oe.contract_expression('ir,jr,ijk -> kr', (shape[0], rank), (shape[1], rank), shape)
    ]
    # define mttkrp function
    def mttkrp(X, factors, mode):
        mttkrp_path = mttkrp_paths[mode]
        summed_factors = [fm for m, fm in enumerate(factors) if m != mode]
        return mttkrp_path(*summed_factors, X)
    # return the function
    return mttkrp



[docs]
def als_lasso(
    tensor, 
    rank, 
    lambdas, 
    nonneg_modes=None, 
    norm_constraint=True, 
    init='random', 
    tol=1e-6, 
    n_iter_max=1000,  
    random_state=None, 
    threads=None, 
    verbose=0, 
    return_losses=False
):
    r"""Computes a rank-`rank` decomposition of `tensor` such that::
    
        tensor = [|weights; factors[0], ..., factors[-1]|].
    
    The algorithm aims to minimize the loss as defined by:

    .. math::
    
        \|Y - \hat{Y}\|^2 + \lambda_0\|F_0\|_1 + ... + \lambda_{-1}\|F_{-1}\|_1
        
    where :math:`Y` is the input `tensor`, :math:`\hat{Y}` is the model reconstruction
    of the input tensor, and the following terms encompass the l1 norm (calculated column-wise)
    of each factor matrix :math:`F_i` multiplied by its corresponding sparsity coefficient 
    :math:`\lambda_i`. Any mode for which :math:`\lambda` has been set to zero will incur no
    sparsity penalty.
            
    Furthermore, the factor matrices indicated in the `nonneg_modes` parameter are forced
    to be non-negative, and if `norm_constraint` = True, the l2 norm of any 
    factor matrix without an l1 sparsity penalty (:math:`\lambda=0.0`) is constrained to 
    be unit length.
    
    Parameters
    ----------
    tensor : numpy.ndarray
        Input data tensor.
    rank : int
        Number of components.
    lambdas : [float]
        Vector of length tensor.ndim in which lambdas[i] is the l1 sparsity 
        coefficient for factor[i]. If `lambdas` is set to all zeros, this is
        the equivalent of fitting a standard CP decomposition without any
        sparsity constraints.
    nonneg_modes : [int], default is None
        List of modes forced to be non-negative.
    norm_constraint : bool, default is True
        If `norm_constraint` = True, the l2 norm of any factor matrix without an 
        l1 sparsity penalty (:math:`\lambda=0.0`) is constrained to unit length. If the
        sparsity penalty of every mode is 0.0, the l2 norm constraint is 
        automatically turned off in every mode.
    init : {'random', CPTensor}, default is 'random'.
        Values used to initialized the factor matrices. If `init = 'random'` 
        then factor matrices are initialized with uniform distribution using 
        `random_state`. If `init` is a previously initialized `cp tensor`, any 
        weights are incorporated into the last factor, and then the initial 
        weight values for the output decomposition are set to '1'.
    tol : float, default is 1e-6
        Convergence tolerance. The algorithm is considered to have found the 
        global minimum when the change in loss from one iteration 
        to the next falls below the `tol` threshold. The calculated change in
        loss is relative when loss > 1 and absolute when loss <= 1. In other 
        words, convergence is defined as :
        
        .. math::
        
            \frac{\|l^{(n-1)} - l^{(n)}\|}{\max(l^{(n)}, 1)} < t
        
        where :math:`l^{(n)}` is the loss at iteration :math:`n`, and
        :math:`t` is the tolerance threshold set by `tol`. 
        
    n_iter_max : int, default is 1000
        Maximum number of iterations. If the algorithm fails to converge 
        according to the `tol` threshold set, a warning will be raised.
    random_state : {None, int, numpy.random.RandomState}, default is None
        Random state used to initialized factor matrices and weights.
    threads : int, default is None
        Maximum number of threads allocated to the algorithm. 
        If `threads = None`, then all available threads will be used.
    verbose : int, default is 0
        Level of verbosity.
    return_losses : bool, default is False
        Activate return of iteration loss values at each iteration.
        
    Returns
    -------
    cp_tensor : (weight, factors)
        * weights : 1D array of shape (rank,) that contains the weights denoting
            the relative contribution of each factor.
        * factors : List of factors of the CP decomposition where factor matrix 
            `i` is of shape `(tensor.shape[i], rank)`
    losses : list
        A list of loss values calculated at each iteration of the algorithm. 
        Only returned when `return_losses` is set to True.
    """    
    # wrap operations in threadpool limit
    with threadpool_limits(limits=threads, user_api='blas'):
    
        # calculate number of modes in tensor
        n_modes = tl.ndim(tensor)
            
        # set modes to be non-negative
        if nonneg_modes is None:
            nonneg = [False for i in range(n_modes)]
        else:
            nonneg = [True if i in nonneg_modes else False for i in range(n_modes)]
            
        # check lambdas
        lambdas = np.array(lambdas, dtype=float)
        if len(lambdas) != n_modes:
            raise ValueError(
                'The number of sparsity coefficients in `lambdas`' +
                'is not equal to the number of modes in `tensor`.'
            )
        if any(lambdas < 0):
            raise ValueError('l1 sparsity penalty must be nonnegative.')
        
        # set normalization modes
        if not norm_constraint or not np.any(lambdas):
            normalize = [False for i in range(n_modes)]
        else:
            normalize = [True if lambdas[i] == 0 else False for i in range(n_modes)]
            
        # initialize list to store losses
        losses = []

        # initialize factors and weights
        weights, factors = initialize_cp(
            tensor, 
            rank, 
            init=init, 
            random_state=random_state
        )
        weights = None  # Make sure the weights are 1

        tensor_norm_sq = tl.norm(tensor)**2
        
        # build MTTKRP function if opt_einsum optimization has been selected
        compute_mttkrp = _create_mttkrp_function(tensor.shape, rank)
        
        # begin iterations
        for iteration in range(n_iter_max):
            if verbose > 2:
                print('\nStarting iteration {}'.format(iteration), flush=True)
                
            # loop through modes
            for mode in range(n_modes):
                if verbose > 3:
                    print('\tMode {} of {}'.format(mode, n_modes), flush=True)

                # form DtD, containing kr_product.T @ kr_product
                DtD = np.ones((rank, rank))
                for krp_mode in range(n_modes):
                    if krp_mode == mode:
                        continue
                    DtD *= np.matmul(factors[krp_mode].T, factors[krp_mode])
                DtX = compute_mttkrp(tensor, factors, mode)

                # generate new factor by solving lasso problem with MTTKRP
                # fista_solve solves
                #     min 0.5 ||DY - X||^2 + lambda ||Y||_1.
                # We want to solve
                #     min ||DY - X||^2 + lambda ||Y||_1,
                # so we multiply the whole problem by 2 (doesn't change anything) and see that
                # fista_solve solves
                #     min ||DY - X|| + 2 lambda ||Y||_1.
                # If we multiply the lambda by 0.5, then we (informally) get
                #     min ||DY - X|| + 2 (0.5) lambda ||Y||_1 = min ||DY - X|| + lambda ||Y||_1
                # which is what we want to solve.
                factors[mode] = fista_solve(
                    lhs=DtD, 
                    rhs=DtX.T,
                    l1_reg=0.5*lambdas[mode],
                    nonnegative=nonneg[mode], 
                    normalize=normalize[mode],
                    init=factors[mode].T
                ).T
                
            # Compute loss using tensor reconstructed from latest factor updates
            # Faster version to compute the loss, uses the fact that
            # ||DY - X||^2 = Tr(Y^T D^TD X) - 2 Tr(X^T D^T B) + Tr(B^T B)
            #              = ||X_estimated||^2 - 2 Tr(X^T D^T B) + ||X||^2
            #              = cp_norm^2 + ||X||^2 - 2 sum(X^T * D^T B)
            # where * is hadamard product and cp_norm can be estimated efficiently
            iprod = tl.sum(tl.sum(DtX * factors[mode], axis=0))
            factors_norm_sq = tl.cp_tensor.cp_norm((weights, factors))**2
            sse = tensor_norm_sq + factors_norm_sq - 2*iprod

            # calculate l1 sparsity penalties
            factor_l1_norms = np.array([tl.norm(f, 1) for f in factors])
            penalties = tl.dot(lambdas, factor_l1_norms)
            # compute loss
            loss = sse + penalties
            # append loss to history
            losses.append(loss)
            if verbose > 1:
                print('loss: {}'.format(losses[-1]), flush=True)
            
            # stop iterations if loss has acheived zero 
            if loss == 0.0:
                if verbose > 0:
                    message = 'Algorithm converged after {} iterations'.format(iteration+1)
                    print(message, flush=True)
                break
            
            # check convergence
            if tol != 0 and iteration != 0:
                # calculate change in loss (relative if loss is > 0, abso)
                loss_change = abs(losses[-2] - losses[-1]) / max(losses[-1], 1)
                # compare change in loss to tolerance
                if loss_change < tol:
                    if verbose > 0:
                        message = 'Algorithm converged after {} iterations'.format(iteration+1)
                        print(message, flush=True)
                    break
                # close out with warnings if the iteration maximum has been reached
                elif iteration == n_iter_max - 1:
                    message = 'Algorithm failed to converge after {} iterations'.format(iteration+1)
                    if verbose > 0:
                        print(message, flush=True)
                    warnings.warn(message)
        
        # return result
        if return_losses:
            return SparseCPTensor((None, factors)), losses
        else:
            return SparseCPTensor((None, factors))




[docs]
class SparseCP(DecompositionMixin):
    r"""Sparse CP decomposition by l1-penalized Alternating Least Squares (ALS)
    
    Parameterizes a rank-`rank` decomposition of `tensor` such that::
    
        tensor = [|weights; factors[0], ..., factors[-1]|].
             
    The `lambdas` values indicate the sparsity-inducing l1 regularization 
    coefficient to be applied to each mode when the model is fit to data. The 
    factor matrices indicated in `nonneg_modes` are forced to be non-negative, 
    and if `norm_constraint` = True, the l2 norm of any factor matrix without an 
    l1 sparsity penalty (lambda=0.0) is constrained to be unit length.
    
    Parameters
    ----------
    rank : int
        Number of components.
    lambdas : [float]
        Vector of length tensor.ndim in which lambdas[i] is the l1 sparsity 
        coefficient for factor[i]. If `lambdas` is set to all zeros, this is
        the equivalent of fitting a standard CP decomposition without any
        sparsity constraints.
    nonneg_modes : [int], default is None
        List of modes forced to be non-negative.
    norm_constraint : bool, default is True
        If `norm_constraint` = True, the l2 norm of any factor matrix without an 
        l1 sparsity penalty (lambda=0.0) is constrained to unit length. If the
        sparsity penalty of every mode is 0.0, the l2 norm constraint is 
        automatically turned off in every mode.
    init : {'random', CPTensor}, default is 'random'.
        Values used to initialized the factor matrices. If `init == 'random'` 
        then factor matrices are initialized with uniform distribution using 
        `random_state`. If init is a previously initialized `cp tensor`, any 
        weights are incorporated into the last factor, and then the initial 
        weight values for the output decomposition are set to '1'.
    tol : float, default is 1e-6
        Convergence tolerance. The algorithm is considered to have found the 
        global minimum when the change in loss from one iteration 
        to the next falls below the `tol` threshold. The calculated change in
        loss is relative when loss > 1 and absolute when loss <= 1. In other 
        words, convergence is defined as :
        
        .. math::
        
            \frac{\|l^{(n-1)} - l^{(n)}\|}{\max(l^{(n)}, 1)} < t
        
        where :math:`l^{(n)}` is the loss at iteration :math:`n`, and
        :math:`t` is the tolerance threshold set by ``tol``. 
        
    n_iter_max : int, default is 1000
        Maximum number of iterations. If the algorithm fails to converge 
        according to the `tol` threshold set, a warning will be raised.
    random_state : {None, int, numpy.random.RandomState}, default is None
        Random state used to initialized factor matrices and weights.
    n_initializations : int, default is 1
        Number of random initializations to compute.
    """
    def __init__(
        self, 
        rank, 
        lambdas, 
        nonneg_modes=None, 
        norm_constraint=True, 
        init='random', 
        tol=1e-6, 
        n_iter_max=1000, 
        random_state=None, 
        n_initializations=1
    ):
        # initialize passed parameters
        self.rank = rank
        self.lambdas = lambdas
        self.nonneg_modes = nonneg_modes
        self.norm_constraint = norm_constraint
        self.init = init
        self.tol = tol
        self.n_iter_max = n_iter_max
        self.random_state = random_state
        self.n_initializations = n_initializations
        
        # initialize internal parameters
        self._best_cp_index = None
        
    @property  
    def decomposition_(self):
        if self._best_cp_index is None:
            raise AttributeError('The model has not yet been fit with data.')
        else:
            return self.candidates_[self._best_cp_index]
        
    @property  
    def loss_(self):
        if self._best_cp_index is None:
            raise AttributeError('The model has not yet been fit with data.')
        else:
            return self.candidate_losses_[self._best_cp_index]
        

[docs]
    def fit_transform(
        self, 
        tensor, 
        threads=None, 
        verbose=0, 
        return_losses=False
    ):
        """Fits `n_initializations` sparse tensor decomposition models to the
        provided data tensor, using the als_lasso() method. Fit models are
        stored with the SparseCP object, and are accessible via the 
        SparseCP.decomposition_(), SparseCP.loss_(), SparseCP.candidates_(), 
        and SparseCP.candidate_losses_() properties.
        
        Parameters
        ----------
        tensor : numpy.ndarray
            Input data tensor.
        threads : int, default is None
            Maximum number of threads allocated to the algorithm. If 
            `threads` = None, then all available threads will be used.
        verbose : int, default is 0
            Level of verbosity.
        return_losses : bool, default is False
            Activate return of iteration loss values at each iteration.
            
        Returns
        -------
        cp_tensor : (weight, factors)
            * weights : 1D array of shape (rank,) that contains the weights 
                denoting the relative contributio of each factor.
            * factors : List of factors of the CP decomposition where factor 
                matrix `i` is of shape `(tensor.shape[i], rank)`
        losses : list
            A list of loss values calculated at each iteration of the algorithm. 
            Only returned when `return_losses` is set to True.
        """
        # initialize lists of candidate cp_tensors and their losses
        candidates = list()
        candidate_losses = list()
        
        # initialize lowest error
        lowest_err = float('inf')
        
        # initialize random state
        rns = check_random_state(self.random_state)
        
        # run multiple initializations
        for i in range(self.n_initializations):
            if verbose > 0:
                print('\nBeginning initialization {} of {}'.format(
                    i+1, self.n_initializations))
            # fit model
            cp, loss = als_lasso(
                tensor, 
                self.rank, 
                self.lambdas, 
                nonneg_modes=self.nonneg_modes, 
                norm_constraint=self.norm_constraint, 
                init=self.init, 
                tol=self.tol, 
                n_iter_max=self.n_iter_max, 
                random_state=rns, 
                threads=threads, 
                verbose=verbose - 1, 
                return_losses=True
            )
            # store candidates
            candidates.append(cp)
            candidate_losses.append(loss)
            # keep best fit
            if loss[-1] < lowest_err:
                lowest_err = loss[-1]
                best_cp_index = i
        
        # store results
        self.candidates_ = candidates
        self.candidate_losses_ = candidate_losses
        self._best_cp_index = best_cp_index
        
        # return best decomposition and, optionally, losses
        if return_losses:
            return candidates[best_cp_index], candidate_losses[best_cp_index]
        else:
            return candidates[best_cp_index]
Source code for barnacle.decomposition

Barnacle

Navigation

Related Topics