Namespace containing CUDA kernels used in bld_mult CUDA updates. More...

Functions
template<typename Real >
__device__ Real	psi_appr (Real x)
	Device function to return psi_appr of a real number. More...

template<typename Real >
__global__ void	update_grad_plus (cudaPitchedPtr S, const Real *beta_eph, size_t pitch, cudaPitchedPtr grad_plus, size_t width, size_t height, size_t depth)
	Perform grad_plus update employed in bld_mult algorithm using tensors/matrices residing on a GPU device. More...

template<typename Real >
__global__ void	update_nom (cudaPitchedPtr S, const Real X_reciprocal, size_t X_reciprocal_pitch, const Real grad_minus, size_t grad_minus_pitch, Real *nom_mult, size_t nom_mult_pitch, size_t width, size_t height, size_t depth)
	Perform nom_mult update employed in bld_mult algorithm using tensors/matrices residing on a GPU device. More...

template<typename Real >
__global__ void	update_denom (cudaPitchedPtr S, const Real X_reciprocal, size_t X_reciprocal_pitch, cudaPitchedPtr grad_plus, Real denom_mult, size_t denom_mult_pitch, size_t width, size_t height, size_t depth)
	Perform denom_mult update employed in bld_mult algorithm using tensors/matrices residing on a GPU device. More...

template<typename Real >
__global__ void	update_S (const Real X, size_t X_pitch, const Real nom_mult, size_t nom_mult_pitch, const Real denom_mult, size_t denom_mult_pitch, const Real grad_minus, size_t grad_minus_pitch, cudaPitchedPtr grad_plus, const Real *S_ijp, size_t S_ijp_pitch, cudaPitchedPtr S, size_t width, size_t height, size_t depth)
	Perform S update employed in bld_mult algorithm using tensors/matrices residing on a GPU device. More...

Detailed Description

Namespace containing CUDA kernels used in bld_mult CUDA updates.

Function Documentation

template<typename Real >

__device__ Real bnmf_algs::details::bld_mult::kernel::psi_appr ( Real x )

Device function to return psi_appr of a real number.

Template Parameters

Real	Type of the input parameter. Must be double or float.

Parameters

x	Value to apply psi_appr function.

Returns: psi_appr of x.

Todo:: Check relocatable device code and move this function to a common .cu file to be used by other files as well.

Device function to return psi_appr of a real number.

Digamma function is defined as

\[ \psi(x) = \frac{\Gamma'(x)}{\Gamma(x)} \]

This function computes an approximation for \(\psi(x)\) using the below formula:

\[ \psi(x) \approx \ln(x) - \frac{1}{2x} - \frac{1}{12x^2} + \frac{1}{120x^4} - \frac{1}{252x^6} + \frac{1}{240x^8} - \frac{5}{660x^{10}} + \frac{691}{32760x^{12}} - \frac{1}{12x^{14}} \]

This approximation is more accurate for larger values of \(x\). When computing \(\psi(x)\) for \(x < 6\), the below recurrence relation is used to shift the \(x\) value to use in the approximation formula to a value greater than \(6\):

\[ \psi(x + 1) = \frac{1}{x} + \psi(x) \]

Template Parameters

Real	A real scalar value such as double and float.

Parameters

x	Parameter to \(\psi(x)\).

Returns: \(\psi(x)\).

See also: Appendix C.1 of [1] for a discussion of this method.

template<typename Real >

__global__ void bnmf_algs::details::bld_mult::kernel::update_denom	(	cudaPitchedPtr	S,
		const Real *	X_reciprocal,
		size_t	X_reciprocal_pitch,
		cudaPitchedPtr	grad_plus,
		Real *	denom_mult,
		size_t	denom_mult_pitch,
		size_t	width,
		size_t	height,
		size_t	depth
	)

Perform denom_mult update employed in bld_mult algorithm using tensors/matrices residing on a GPU device.

This function applies denom_mult update to denom_mult matrix given as a raw GPU pointer. All the given tensor and matrix pointers are assumed to store their objects in row-major order.

Template Parameters

Real	Type of the entries of the matrices/tensors. Must be double or float.

Parameters

S	Pitched pointer pointing to S tensor on the GPU device.
X_reciprocal	Raw pointer pointing to X_reciprocal matrix on the GPU.
X_reciprocal_pitch	Pitch of X_reciprocal matrix (number of bytes used to store a single row of the matrix including padding bytes).
grad_plus	Pitched pointer pointing to grad_plus tensor on the GPU device.
denom_mult	Raw pointer pointing to denom_mult matrix on the GPU.
denom_mult_pitch	Pitch of denom_mult matrix.
width	Width (1st dimension) of S and grad_plus tensors in terms of elements.
height	Height (0th dimension) of S and grad_plus tensors in terms of elements.
depth	Depth (2nd dimension) of S and grad_plus tensors in terms of elements.

template<typename Real >

__global__ void bnmf_algs::details::bld_mult::kernel::update_grad_plus	(	cudaPitchedPtr	S,
		const Real *	beta_eph,
		size_t	pitch,
		cudaPitchedPtr	grad_plus,
		size_t	width,
		size_t	height,
		size_t	depth
	)

Perform grad_plus update employed in bld_mult algorithm using tensors/matrices residing on a GPU device.

This function applies grad_plus update to grad_plus tensor given as a cudaPitchedPtr using tensor S given as cudaPitchedPtr and beta_eph matrix given as a device pointer and the pitch of the allocation. All the given tensor and matrix pointers are assumed to store their objects in row-major order. See bnmf_algs::cuda::bld_mult::update_grad_plus function documentation for more information regarding row-major storage of multidimensional tensors.

Template Parameters

Real	Type of the entries of the matrices/tensors. Must be double or float.

Parameters

S	Pitched pointer pointing to S tensor on the GPU device.
beta_eph	Raw pointer pointing to beta_eph matrix on the GPU device.
pitch	Pitch of the beta_eph matrix (number of bytes used to store a single row of the matrix including padding bytes).
grad_plus	Pitched pointer pointing to grad_plus tensor on the GPU device.
width	Width (1st dimension) of S and grad_plus tensors in terms of elements.
height	Height (0th dimension) of S and grad_plus tensors in terms of elements.
depth	Depth (2nd dimension) of S and grad_plus tensors in terms of elements.

template<typename Real >

__global__ void bnmf_algs::details::bld_mult::kernel::update_nom	(	cudaPitchedPtr	S,
		const Real *	X_reciprocal,
		size_t	X_reciprocal_pitch,
		const Real *	grad_minus,
		size_t	grad_minus_pitch,
		Real *	nom_mult,
		size_t	nom_mult_pitch,
		size_t	width,
		size_t	height,
		size_t	depth
	)

Perform nom_mult update employed in bld_mult algorithm using tensors/matrices residing on a GPU device.

This function applies nom_mult update to nom_mult matrix given as a raw GPU pointer. All the given tensor and matrix pointers are assumed to store their objects in row-major order.

Template Parameters

Real	Type of the entries of the matrices/tensors. Must be double or float.

Parameters

S	Pitched pointer pointing to S tensor on the GPU device.
X_reciprocal	Raw pointer pointing to X_reciprocal matrix on the GPU.
X_reciprocal_pitch	Pitch of X_reciprocal matrix (number of bytes used to store a single row of the matrix including padding bytes).
grad_minus	Raw pointer pointing to grad_minus matrix on the GPU.
grad_minus_pitch	Pitch of grad_minus matrix.
nom_mult	Raw pointer pointing to nom_mult matrix on the GPU.
nom_mult_pitch	Pitch of nom_mult matrix.
width	Width (1st dimension) of S and grad_plus tensors in terms of elements.
height	Height (0th dimension) of S and grad_plus tensors in terms of elements.
depth	Depth (2nd dimension) of S and grad_plus tensors in terms of elements.

template<typename Real >

__global__ void bnmf_algs::details::bld_mult::kernel::update_S	(	const Real *	X,
		size_t	X_pitch,
		const Real *	nom_mult,
		size_t	nom_mult_pitch,
		const Real *	denom_mult,
		size_t	denom_mult_pitch,
		const Real *	grad_minus,
		size_t	grad_minus_pitch,
		cudaPitchedPtr	grad_plus,
		const Real *	S_ijp,
		size_t	S_ijp_pitch,
		cudaPitchedPtr	S,
		size_t	width,
		size_t	height,
		size_t	depth
	)

Perform S update employed in bld_mult algorithm using tensors/matrices residing on a GPU device.

This function applies S update to S tensor given as a cudaPitchedPtr. All the given tensor and matrix pointers are assumed to store their objects in row-major order.

Template Parameters

Real	Type of the entries of the matrices/tensors. Must be double or float.

Parameters

X	Raw pointer pointing to X matrix on the GPU.
X_pitch	Pitch of X matrix (number of bytes used to store a single row of the matrix including padding bytes).
nom_mult	Raw pointer pointing to nom_mult matrix on the GPU.
nom_mult_pitch	Pitch of nom_mult matrix
denom_mult	Raw pointer pointing to denom_mult matrix on the GPU.
denom_mult_pitch	Pitch of denom_mult matrix.
grad_minus	Raw pointer pointing to grad_minus matrix on the GPU.
grad_minus_pitch	Pitch of grad_minus matrix.
grad_plus	Pitched pointer pointing to grad_plus tensor on the GPU.
S_ijp	Raw pointer pointing to S_ijp matrix on the GPU.
S_ijp_pitch	Pitch of S_ijp matrix.
S	Pitched pointer pointing to S tensor on the GPU.
width	Width (1st dimension) of S and grad_plus tensors in terms of elements.
height	Height (0th dimension) of S and grad_plus tensors in terms of elements.
depth	Depth (2nd dimension) of S and grad_plus tensors in terms of elements.

Functions

Detailed Description

Function Documentation