bnmf-algs
|
Namespace containing bld_mult update functions (CUDA/non-CUDA). More...
Namespaces | |
kernel | |
Namespace containing CUDA kernels used in bld_mult CUDA updates. | |
Functions | |
template<typename Real > | |
void | update_grad_plus_cuda (const cuda::DeviceMemory3D< Real > &S, const cuda::DeviceMemory2D< Real > &beta_eph, cuda::DeviceMemory3D< Real > &grad_plus) |
Perform grad_plus update employed in bld_mult algorithm using CUDA. More... | |
template<typename Real > | |
void | update_nom_cuda (const cuda::DeviceMemory2D< Real > &X_reciprocal, const cuda::DeviceMemory2D< Real > &grad_minus, const cuda::DeviceMemory3D< Real > &S, cuda::DeviceMemory2D< Real > &nom) |
Perform nom_mult update employed in bld_mult algorithm using CUDA. More... | |
template<typename Real > | |
void | update_denom_cuda (const cuda::DeviceMemory2D< Real > &X_reciprocal, const cuda::DeviceMemory3D< Real > &grad_plus, const cuda::DeviceMemory3D< Real > &S, cuda::DeviceMemory2D< Real > &denom) |
Perform denom update employed in bld_mult algorithm using CUDA. More... | |
template<typename Real > | |
void | update_S_cuda (const cuda::DeviceMemory2D< Real > &X, const cuda::DeviceMemory2D< Real > &nom, const cuda::DeviceMemory2D< Real > &denom, const cuda::DeviceMemory2D< Real > &grad_minus, const cuda::DeviceMemory3D< Real > &grad_plus, const cuda::DeviceMemory2D< Real > &S_ijp, cuda::DeviceMemory3D< Real > &S) |
Perform S update employed in bld_mult algorithm using CUDA. More... | |
template<typename T > | |
tensor_t< T, 3 > | init_S (const matrix_t< T > &X, size_t z) |
Initialize S tensor using a Dirichlet sample of size z with all concentration parameters set to 1. More... | |
template<typename T > | |
matrix_t< T > | X_reciprocal (const matrix_t< T > &X, double eps) |
Compute the reciprocal \(\hat{X}\) of the input matrix \(X\). More... | |
template<typename Scalar > | |
std::pair< vector_t< Scalar >, vector_t< Scalar > > | init_alpha_beta (const alloc_model::Params< Scalar > ¶ms) |
Initialize alpha and beta vectors used in bld_mult. More... | |
template<typename T > | |
void | update_alpha_eph (const tensor_t< T, 2 > &S_ipk, const vector_t< T > &alpha, matrix_t< T > &alpha_eph) |
Update alpha_eph matrix used in bld_mult. More... | |
template<typename T > | |
void | update_beta_eph (const tensor_t< T, 2 > &S_pjk, const vector_t< T > &beta, matrix_t< T > &beta_eph) |
Update beta_eph matrix used in bld_mult. More... | |
template<typename T , typename PsiFunction > | |
void | update_grad_plus (const tensor_t< T, 3 > &S, const matrix_t< T > &beta_eph, PsiFunction psi_fn, tensor_t< T, 3 > &grad_plus) |
Update grad_plus tensor used in bld_mult. More... | |
template<typename T , typename PsiFunction > | |
void | update_grad_minus (const matrix_t< T > &alpha_eph, PsiFunction psi_fn, matrix_t< T > &grad_minus) |
Update grad_minus tensor used in bld_mult. More... | |
template<typename T > | |
void | update_nom_mult (const matrix_t< T > &X_reciprocal, const matrix_t< T > &grad_minus, const tensor_t< T, 3 > &S, matrix_t< T > &nom_mult) |
Update nom_mult matrix used in bld_mult. More... | |
template<typename T > | |
void | update_denom_mult (const matrix_t< T > &X_reciprocal, const tensor_t< T, 3 > &grad_plus, const tensor_t< T, 3 > &S, matrix_t< T > &denom_mult) |
Update denom_mult matrix used in bld_mult. More... | |
template<typename T > | |
void | update_S (const matrix_t< T > &X, const matrix_t< T > &nom, const matrix_t< T > &denom, const matrix_t< T > &grad_minus, const tensor_t< T, 3 > &grad_plus, const tensor_t< T, 2 > &S_ijp, tensor_t< T, 3 > &S, double eps) |
Update S tensor (output of bld_mult algorithm). More... | |
Namespace containing bld_mult update functions (CUDA/non-CUDA).
std::pair<vector_t<Scalar>, vector_t<Scalar> > bnmf_algs::details::bld_mult::init_alpha_beta | ( | const alloc_model::Params< Scalar > & | params | ) |
tensor_t<T, 3> bnmf_algs::details::bld_mult::init_S | ( | const matrix_t< T > & | X, |
size_t | z | ||
) |
Initialize S tensor using a Dirichlet sample of size z with all concentration parameters set to 1.
S tensor is initialized according to the following update
\[ S_{ijk} \leftarrow X_{ij}\mathcal{D}_{ij}(k) \]
where \(\mathcal{D}_{ij}(k)\) is the kth entry of the \(ij^{th}\) Dirichlet sample.
T | Type of the entries of the matrix X and tensor S |
X | X matrix sent as input to bld algorithm. |
z | Rank of the decomposition. |
void bnmf_algs::details::bld_mult::update_denom_cuda | ( | const cuda::DeviceMemory2D< Real > & | X_reciprocal, |
const cuda::DeviceMemory3D< Real > & | grad_plus, | ||
const cuda::DeviceMemory3D< Real > & | S, | ||
cuda::DeviceMemory2D< Real > & | denom | ||
) |
Perform denom update employed in bld_mult algorithm using CUDA.
This function performs the denom update employed in bld_mult algorithm using CUDA. To learn more about denom update, refer to details::bld_mult::update_denom_mult function documentation.
All the given tensor and matrix objects are assumed to be in row-major order . This is the allocation order specified in defs.hpp file. All the CUDA indexing operations are performed according to the row-major allocation order.
Real | Type of the entries of matrices and tensors such as double or float. |
X_reciprocal | Matrix \(\hat{X}\) where \(\hat{X}_{ij} = \frac{1}{X_{ij}}\). Should reside on the GPU. |
grad_plus | grad_plus tensor used in bld_mult on the GPU. |
S | \(S\) tensor on the GPU. |
denom | denom_mult matrix on the GPU. |
void bnmf_algs::details::bld_mult::update_denom_mult | ( | const matrix_t< T > & | X_reciprocal, |
const tensor_t< T, 3 > & | grad_plus, | ||
const tensor_t< T, 3 > & | S, | ||
matrix_t< T > & | denom_mult | ||
) |
Update denom_mult matrix used in bld_mult.
T | Type of the matrix entries. |
X_reciprocal | Matrix containing reciprocals of entries of input matrix X. |
grad_plus | grad_plus tensor. |
S | S tensor |
denom_mult | denom_mult matrix to update. |
void bnmf_algs::details::bld_mult::update_grad_minus | ( | const matrix_t< T > & | alpha_eph, |
PsiFunction | psi_fn, | ||
matrix_t< T > & | grad_minus | ||
) |
Update grad_minus tensor used in bld_mult.
T | Type of the matrix/tensor entries |
PsiFunction | A callable type that will return \(\psi(x)\) for an input parameter \(x\). |
alpha_eph | alpha_matrix used in bld_mult. |
psi_fn | Psi computing callable. |
grad_minus | grad_minus tensor to update. |
void bnmf_algs::details::bld_mult::update_grad_plus | ( | const tensor_t< T, 3 > & | S, |
const matrix_t< T > & | beta_eph, | ||
PsiFunction | psi_fn, | ||
tensor_t< T, 3 > & | grad_plus | ||
) |
Update grad_plus tensor used in bld_mult.
T | Type of the matrix/tensor entries |
PsiFunction | A callable type that will return \(\psi(x)\) for an input parameter \(x\). |
void bnmf_algs::details::bld_mult::update_grad_plus_cuda | ( | const cuda::DeviceMemory3D< Real > & | S, |
const cuda::DeviceMemory2D< Real > & | beta_eph, | ||
cuda::DeviceMemory3D< Real > & | grad_plus | ||
) |
Perform grad_plus update employed in bld_mult algorithm using CUDA.
This function performs the grad_plus update employed in bld_mult algorithm using CUDA. To learn more about grad_plus update, refer to details::bld_mult::update_grad_plus function documentation.
All the given tensor and matrix objects are assumed to be in row-major order . This is the allocation order specified in defs.hpp file. All the CUDA indexing operations are performed according to the row-major allocation order.
Row-major order means that the last index of a matrix/tensor objects changes the fastest. This is easy to understand with matrices: Matrix is stored in terms of its rows (0th row, then 1st row, ...). Allocation order of a 3D tensor follows the same idea: Tensor is stored in terms of its fibers (depth rows). Therefore, for a a 2x2x3 tensor is stored as follows (only writing the indices):
(0,0,0) (0,0,1) (0,0,2) (0,1,0) (0,1,1) (0,1,2) (1,0,0) (1,0,1) (1,0,2) (1,1,0) (1,1,1) (1,1,2)
Real | Type of the entries of matrices and tensors such as double or float. |
S | \(S\) tensor on the GPU. |
beta_eph | beta_eph matrix on the GPU. |
grad_plus | grad_plus tensor on the GPU. |
void bnmf_algs::details::bld_mult::update_nom_cuda | ( | const cuda::DeviceMemory2D< Real > & | X_reciprocal, |
const cuda::DeviceMemory2D< Real > & | grad_minus, | ||
const cuda::DeviceMemory3D< Real > & | S, | ||
cuda::DeviceMemory2D< Real > & | nom | ||
) |
Perform nom_mult update employed in bld_mult algorithm using CUDA.
This function performs the nom_mult update employed in bld_mult algorithm using CUDA. To learn more about nom_mult update, refer to details::bld_mult::update_nom_mult function documentation.
All the given tensor and matrix objects are assumed to be in row-major order . This is the allocation order specified in defs.hpp file. All the CUDA indexing operations are performed according to the row-major allocation order.
Real | Type of the entries of matrices and tensors such as double or float. |
X_reciprocal | Matrix \(\hat{X}\) where \(\hat{X}_{ij} = \frac{1}{X_{ij}}\). Should reside on the GPU. |
grad_minus | grad_minus matrix used in bld_mult on the GPU. |
S | \(S\) tensor on the GPU. |
nom | nom_mult matrix on the GPU. |
void bnmf_algs::details::bld_mult::update_nom_mult | ( | const matrix_t< T > & | X_reciprocal, |
const matrix_t< T > & | grad_minus, | ||
const tensor_t< T, 3 > & | S, | ||
matrix_t< T > & | nom_mult | ||
) |
Update nom_mult matrix used in bld_mult.
T | Type of the matrix entries. |
X_reciprocal | Matrix containing reciprocals of entries of input matrix X. |
grad_minus | grad_minus matrix. |
S | S tensor |
nom_mult | nom_mult matrix to update. |
void bnmf_algs::details::bld_mult::update_S | ( | const matrix_t< T > & | X, |
const matrix_t< T > & | nom, | ||
const matrix_t< T > & | denom, | ||
const matrix_t< T > & | grad_minus, | ||
const tensor_t< T, 3 > & | grad_plus, | ||
const tensor_t< T, 2 > & | S_ijp, | ||
tensor_t< T, 3 > & | S, | ||
double | eps | ||
) |
Update S tensor (output of bld_mult algorithm).
T | Type of tensor entries. |
X | Input matrix passed to bld_mult algorithm. |
nom | nom_mult matrix used in bld_mult algorithm. |
denom | denom_mult matrix used in bld_mult algorithm. |
grad_minus | grad_minus matrix used in bld_mult algorithm. |
grad_plus | grad_plus tensor used in bld_mult algorithm. |
S_ijp | Matrix computed by summing tensor S along its 2nd axis (0 based indexing). |
S | S tensor to update. |
eps | Epsilon value to prevent division by 0 errors. |
void bnmf_algs::details::bld_mult::update_S_cuda | ( | const cuda::DeviceMemory2D< Real > & | X, |
const cuda::DeviceMemory2D< Real > & | nom, | ||
const cuda::DeviceMemory2D< Real > & | denom, | ||
const cuda::DeviceMemory2D< Real > & | grad_minus, | ||
const cuda::DeviceMemory3D< Real > & | grad_plus, | ||
const cuda::DeviceMemory2D< Real > & | S_ijp, | ||
cuda::DeviceMemory3D< Real > & | S | ||
) |
Perform S update employed in bld_mult algorithm using CUDA.
This function performs the S update employed in bld_mult algorithm using CUDA. To learn more about S update, refer to details::bld_mult::update_S function documentation.
All the given tensor and matrix objects are assumed to be in row-major order . This is the allocation order specified in defs.hpp file. All the CUDA indexing operations are performed according to the row-major allocation order.
Real | Type of the entries of matrices and tensors such as double or float. |
X | Input X matrix on the GPU. |
nom | nom_mult. |
denom | denom_mult. |
grad_minus | grad_minus matrix. |
grad_plus | grad_plus tensor used in bld_mult on the GPU. |
S_ijp | Matrix formed by summing S tensor along its axis 2. (last axis) |
S | \(S\) tensor on the GPU. |
matrix_t<T> bnmf_algs::details::bld_mult::X_reciprocal | ( | const matrix_t< T > & | X, |
double | eps | ||
) |
Compute the reciprocal \(\hat{X}\) of the input matrix \(X\).
\(\hat{X}_{ij} = \frac{1}{X_{ij} + \epsilon}\).
T | Type of the entries of the input matrix. |
X | Input matrix. |
eps | Epsilon value to be used to prevent division by 0 errors. |