import numpy as np
import scipy.sparse as sparse
import scipy.stats as stats
from tensorly import check_random_state
from tensorly.cp_tensor import CPTensor
[docs]
class Component(CPTensor):
"""Rank-1 CPTensor that constitutes one component of the CPTensor from
which it was derived.
"""
def __init__(self, component):
super().__init__(component)
if self.rank != 1:
raise ValueError('Component object must be a rank-1 CPTensor')
self.n_modes = len(self.shape)
def __repr__(self):
message = 'CPTensor component of shape {}'.format(self.shape)
return message
[docs]
def support(self, modes=None, boolean=False, thold=None):
"""Method that returns the indices of all non-zero elements. Optionally,
if a tuple of thresholds is provided, elements greater than `thold[0]`
and less than `thold[1]` will be considered zero-valued.
Parameters
----------
mode : int, list of ints, default is None
Mode(s) of Component to extract support from. If `modes=None`, all
Component modes will be included.
boolean : bool, default is False
If True, returns non-zero indices of each mode as an array of
booleans. Otherwise indices are returned as an array of ints.
thold : tuple of ints, default is None
Thresholds of values to be considered zero-valued. Values
greater than or equal to `thold[0]` and less than or equal to
`thold[1]` will be considered zero-valued.
Returns
-------
indices : numpy.ndarray or list of numpy.ndarrays
Arrays of Component indices. One array for each mode. If mode is an
int, just the index array of the corresponding mode is returned.
"""
# check modes parameter
if modes is None:
modes = [i for i in range(self.n_modes)]
single_mode = False
elif type(modes) is int:
if modes not in np.arange(self.n_modes):
raise ValueError('Mode not in range of tensor of shape {}'.format(self.shape))
modes = [modes]
single_mode = True
elif type(modes) is list:
for mode in modes:
if mode not in np.arange(self.n_modes):
raise ValueError('Modes not in range of tensor of shape {}'.format(self.shape))
single_mode = False
else:
raise ValueError('Parameter `modes` must be an int or list of ints.')
# check thold parameter
if thold is None:
thold = (0, 0)
else:
if len(thold) != 2:
raise ValueError('Parameter `thold` must be a tuple (lower bound, upper bound).')
if thold[0] > thold[1]:
raise ValueError('The lower bound of `thold` is greater than the upper bound.')
# get support indices
indices = []
for i, f in enumerate(self.factors):
if i in modes:
index = np.any([f < thold[0], f > thold[1]], axis=0)
if boolean:
indices.append(index)
else:
indices.append(np.where(index)[0])
# return results
if single_mode:
return indices[0]
return indices
[docs]
class SparseCPTensor(CPTensor):
"""Class container for methods related to sparse CP tensors.
"""
def __init__(self, cp_tensor):
super().__init__(cp_tensor)
def __repr__(self):
message = 'Rank-{} SparseCPTensor of shape {}'.format(self.rank, self.shape)
return message
[docs]
def get_components(self):
"""Generate list of Component objects from SparseCPTensor factors.
Returns
-------
components : list of Components
List of Component objects, where components[i] is the i-th factor
of the parent SparseCPTensor.
"""
components = []
for i in range(self.rank):
factor_weights = [factor.T[i].reshape((-1, 1)) for factor in self.factors]
component_weight = np.array([self.weights[i]])
components.append(Component((component_weight, factor_weights)))
return components
[docs]
def get_clusters(self, mode, boolean=False, thold=None):
"""Each component of a factor matrix resulting from a sparse tensor
decomposition can be considered as a cluster, where the support (indices
of non-zero values) delineates cluster membership. This method extracts
a list of indices, one for each component, delineating cluster
memberships indicated by the factor matrix in one mode of the
decomposition. Indices can either be an array of integers, or a boolean
array spanning the length of the mode.
Parameters
----------
mode : int
Mode to get clusters from.
boolean : bool, default is False
If True, returns non-zero indices of each mode as an array of
booleans. Otherwise indices are returned as an array of ints.
thold : tuple of ints, default is None
Thresholds of values to be considered zero-valued. Values
greater than or equal to `thold[0]` and less than or equal to
`thold[1]` will be considered zero-valued.
Returns
-------
clusters : list of numpy.ndarrays
List of cluster indices of the selected mode.
"""
clusters = []
components = self.get_components()
for component in components:
cluster = component.support(
modes=mode,
boolean=boolean,
thold=thold
)
clusters.append(cluster)
return clusters
[docs]
class SimSparseCPTensor(SparseCPTensor):
"""Class container for methods related to simulated sparse CP tensors.
"""
def __init__(self,
cp_tensor):
super().__init__(cp_tensor)
def __repr__(self):
message = 'Rank-{} SimSparseCPTensor of shape {}'.format(self.rank, self.shape)
return message
[docs]
def to_tensor(
self,
noise_level=0,
sparse_noise=False,
noise_distribution=None,
random_state=None
):
"""Generate optionally noisey data tensor from factorized CP tensor.
This method overwrites the tensorly.cp_tensor.CPTensor.to_tensor()
parent method.
Parameters
----------
noise_level : float, optional
Scale factor for the noise tensor, relative to the l2 norms.
sparse_noise : bool
If True, will set all positions in the noise matrix that correspond
to sparse positions in the signal matrix to zero. Default is False.
noise_distribution : scipy.stats.rv_continuous, optional
Parameterized continuous distribution to generate the noise tensor.
This parameter cannot be None if noise_level > 0.
random_state : {None, int, numpy.random.RandomState}, optional
Random state to seed the noise_distribution generator.
Returns
-------
data : numpy.ndarray
Tensorized data formatted in an n-dimensional numpy array.
"""
# get tensorized data
data = super().to_tensor()
if noise_level == 0:
return data
else:
if noise_distribution is None:
noise_distribution = stats.norm()
# initialize random generator
rns = check_random_state(random_state)
# add noise to data tensor
noise = noise_distribution.rvs(size=self.shape, random_state=rns)
if sparse_noise:
noise = noise * (data != 0)
noise /= np.linalg.norm(noise)
noise *= noise_level * self.norm()
data += noise
return data
[docs]
def simulated_sparse_tensor(
shape,
rank,
densities=None,
factor_dist_list=None,
weights=None,
random_state=None
):
"""Generates simulated data in the form of a sparse cp_tensor
Parameters
----------
shape : tuple of ints
Tensor shape where len(shape) = n modes in tensor.
rank : int
The number of components in the tensor.
densities : list of floats [0.0, 1.0], optional
The proportion of elements that are non-zero in the factor matrices.
Must be the same length as the `shape` parameter.
If not set, the densities are set to 1 for fully dense factor matrices.
factor_dist_list : list of scipy.stats._distn_infrastructure.rv_frozen, optional
Distributions from which the factor matrices will be drawn. Must be the
same length as the `shape` parameter and must have a .rvs() method
for drawing random values, and a `random_state` attribute specifying state.
Example: `scipy.stats.uniform()`
weights : list of floats, optional
Weights to assign to each factor. If not set, then defaults to ones.
random_state : {None, int, np.random.RandomState}
Random state to seed the value_distribution and
cluster_size_distribution generators.
Returns
-------
sim_cp : SimSparseCPTensor
Parameterized simulated data.
"""
rns = check_random_state(random_state)
if densities is None:
densities = np.ones(rank)
if factor_dist_list is None:
factor_dist_list = [stats.uniform() for i in range(rank)]
if weights is None:
weights = np.ones(rank)
factors = []
for i, dim in enumerate(shape):
dist = factor_dist_list[i]
dist.random_state = rns
factor = sparse.random(
dim,
rank,
density=densities[i],
random_state=rns,
data_rvs=dist.rvs
)
factors.append(factor.toarray())
sim_cp = SimSparseCPTensor((weights, factors))
return sim_cp