bnmf-algs: src/bld/bld_mult/bld_mult_cuda_kernels.hpp File Reference

bnmf-algs

File List

Go to the source code of this file.

Namespaces
	bnmf_algs
	Main namespace for bnmf-algs library.

	bnmf_algs::details
	Namespace that contains types and functions for internal computations.

	bnmf_algs::details::bld_mult
	Namespace containing bld_mult update functions (CUDA/non-CUDA).

	bnmf_algs::details::bld_mult::kernel
	Namespace containing CUDA kernels used in bld_mult CUDA updates.

Functions
template<typename Real >
__device__ Real	bnmf_algs::details::bld_mult::kernel::psi_appr (Real x)
	Device function to return psi_appr of a real number. More...

template<typename Real >
__global__ void	bnmf_algs::details::bld_mult::kernel::update_grad_plus (cudaPitchedPtr S, const Real *beta_eph, size_t pitch, cudaPitchedPtr grad_plus, size_t width, size_t height, size_t depth)
	Perform grad_plus update employed in bld_mult algorithm using tensors/matrices residing on a GPU device. More...

template<typename Real >
__global__ void	bnmf_algs::details::bld_mult::kernel::update_nom (cudaPitchedPtr S, const Real X_reciprocal, size_t X_reciprocal_pitch, const Real grad_minus, size_t grad_minus_pitch, Real *nom_mult, size_t nom_mult_pitch, size_t width, size_t height, size_t depth)
	Perform nom_mult update employed in bld_mult algorithm using tensors/matrices residing on a GPU device. More...

template<typename Real >
__global__ void	bnmf_algs::details::bld_mult::kernel::update_denom (cudaPitchedPtr S, const Real X_reciprocal, size_t X_reciprocal_pitch, cudaPitchedPtr grad_plus, Real denom_mult, size_t denom_mult_pitch, size_t width, size_t height, size_t depth)
	Perform denom_mult update employed in bld_mult algorithm using tensors/matrices residing on a GPU device. More...

template<typename Real >
__global__ void	bnmf_algs::details::bld_mult::kernel::update_S (const Real X, size_t X_pitch, const Real nom_mult, size_t nom_mult_pitch, const Real denom_mult, size_t denom_mult_pitch, const Real grad_minus, size_t grad_minus_pitch, cudaPitchedPtr grad_plus, const Real *S_ijp, size_t S_ijp_pitch, cudaPitchedPtr S, size_t width, size_t height, size_t depth)
	Perform S update employed in bld_mult algorithm using tensors/matrices residing on a GPU device. More...