bnmf-algs
Namespaces | Functions
bld_mult_cuda_kernels.hpp File Reference

Go to the source code of this file.

Namespaces

 bnmf_algs
 Main namespace for bnmf-algs library.
 
 bnmf_algs::details
 Namespace that contains types and functions for internal computations.
 
 bnmf_algs::details::bld_mult
 Namespace containing bld_mult update functions (CUDA/non-CUDA).
 
 bnmf_algs::details::bld_mult::kernel
 Namespace containing CUDA kernels used in bld_mult CUDA updates.
 

Functions

template<typename Real >
__device__ Real bnmf_algs::details::bld_mult::kernel::psi_appr (Real x)
 Device function to return psi_appr of a real number. More...
 
template<typename Real >
__global__ void bnmf_algs::details::bld_mult::kernel::update_grad_plus (cudaPitchedPtr S, const Real *beta_eph, size_t pitch, cudaPitchedPtr grad_plus, size_t width, size_t height, size_t depth)
 Perform grad_plus update employed in bld_mult algorithm using tensors/matrices residing on a GPU device. More...
 
template<typename Real >
__global__ void bnmf_algs::details::bld_mult::kernel::update_nom (cudaPitchedPtr S, const Real *X_reciprocal, size_t X_reciprocal_pitch, const Real *grad_minus, size_t grad_minus_pitch, Real *nom_mult, size_t nom_mult_pitch, size_t width, size_t height, size_t depth)
 Perform nom_mult update employed in bld_mult algorithm using tensors/matrices residing on a GPU device. More...
 
template<typename Real >
__global__ void bnmf_algs::details::bld_mult::kernel::update_denom (cudaPitchedPtr S, const Real *X_reciprocal, size_t X_reciprocal_pitch, cudaPitchedPtr grad_plus, Real *denom_mult, size_t denom_mult_pitch, size_t width, size_t height, size_t depth)
 Perform denom_mult update employed in bld_mult algorithm using tensors/matrices residing on a GPU device. More...
 
template<typename Real >
__global__ void bnmf_algs::details::bld_mult::kernel::update_S (const Real *X, size_t X_pitch, const Real *nom_mult, size_t nom_mult_pitch, const Real *denom_mult, size_t denom_mult_pitch, const Real *grad_minus, size_t grad_minus_pitch, cudaPitchedPtr grad_plus, const Real *S_ijp, size_t S_ijp_pitch, cudaPitchedPtr S, size_t width, size_t height, size_t depth)
 Perform S update employed in bld_mult algorithm using tensors/matrices residing on a GPU device. More...