bnmf-algs
|
Namespace containing CUDA kernels used in bld_mult CUDA updates. More...
Functions | |
template<typename Real > | |
__device__ Real | psi_appr (Real x) |
Device function to return psi_appr of a real number. More... | |
template<typename Real > | |
__global__ void | update_grad_plus (cudaPitchedPtr S, const Real *beta_eph, size_t pitch, cudaPitchedPtr grad_plus, size_t width, size_t height, size_t depth) |
Perform grad_plus update employed in bld_mult algorithm using tensors/matrices residing on a GPU device. More... | |
template<typename Real > | |
__global__ void | update_nom (cudaPitchedPtr S, const Real *X_reciprocal, size_t X_reciprocal_pitch, const Real *grad_minus, size_t grad_minus_pitch, Real *nom_mult, size_t nom_mult_pitch, size_t width, size_t height, size_t depth) |
Perform nom_mult update employed in bld_mult algorithm using tensors/matrices residing on a GPU device. More... | |
template<typename Real > | |
__global__ void | update_denom (cudaPitchedPtr S, const Real *X_reciprocal, size_t X_reciprocal_pitch, cudaPitchedPtr grad_plus, Real *denom_mult, size_t denom_mult_pitch, size_t width, size_t height, size_t depth) |
Perform denom_mult update employed in bld_mult algorithm using tensors/matrices residing on a GPU device. More... | |
template<typename Real > | |
__global__ void | update_S (const Real *X, size_t X_pitch, const Real *nom_mult, size_t nom_mult_pitch, const Real *denom_mult, size_t denom_mult_pitch, const Real *grad_minus, size_t grad_minus_pitch, cudaPitchedPtr grad_plus, const Real *S_ijp, size_t S_ijp_pitch, cudaPitchedPtr S, size_t width, size_t height, size_t depth) |
Perform S update employed in bld_mult algorithm using tensors/matrices residing on a GPU device. More... | |
Namespace containing CUDA kernels used in bld_mult CUDA updates.
__device__ Real bnmf_algs::details::bld_mult::kernel::psi_appr | ( | Real | x | ) |
Device function to return psi_appr of a real number.
Real | Type of the input parameter. Must be double or float. |
x | Value to apply psi_appr function. |
Device function to return psi_appr of a real number.
Digamma function is defined as
\[ \psi(x) = \frac{\Gamma'(x)}{\Gamma(x)} \]
This function computes an approximation for \(\psi(x)\) using the below formula:
\[ \psi(x) \approx \ln(x) - \frac{1}{2x} - \frac{1}{12x^2} + \frac{1}{120x^4} - \frac{1}{252x^6} + \frac{1}{240x^8} - \frac{5}{660x^{10}} + \frac{691}{32760x^{12}} - \frac{1}{12x^{14}} \]
This approximation is more accurate for larger values of \(x\). When computing \(\psi(x)\) for \(x < 6\), the below recurrence relation is used to shift the \(x\) value to use in the approximation formula to a value greater than \(6\):
\[ \psi(x + 1) = \frac{1}{x} + \psi(x) \]
Real | A real scalar value such as double and float. |
x | Parameter to \(\psi(x)\). |
__global__ void bnmf_algs::details::bld_mult::kernel::update_denom | ( | cudaPitchedPtr | S, |
const Real * | X_reciprocal, | ||
size_t | X_reciprocal_pitch, | ||
cudaPitchedPtr | grad_plus, | ||
Real * | denom_mult, | ||
size_t | denom_mult_pitch, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth | ||
) |
Perform denom_mult update employed in bld_mult algorithm using tensors/matrices residing on a GPU device.
This function applies denom_mult update to denom_mult matrix given as a raw GPU pointer. All the given tensor and matrix pointers are assumed to store their objects in row-major order.
Real | Type of the entries of the matrices/tensors. Must be double or float. |
S | Pitched pointer pointing to S tensor on the GPU device. |
X_reciprocal | Raw pointer pointing to X_reciprocal matrix on the GPU. |
X_reciprocal_pitch | Pitch of X_reciprocal matrix (number of bytes used to store a single row of the matrix including padding bytes). |
grad_plus | Pitched pointer pointing to grad_plus tensor on the GPU device. |
denom_mult | Raw pointer pointing to denom_mult matrix on the GPU. |
denom_mult_pitch | Pitch of denom_mult matrix. |
width | Width (1st dimension) of S and grad_plus tensors in terms of elements. |
height | Height (0th dimension) of S and grad_plus tensors in terms of elements. |
depth | Depth (2nd dimension) of S and grad_plus tensors in terms of elements. |
__global__ void bnmf_algs::details::bld_mult::kernel::update_grad_plus | ( | cudaPitchedPtr | S, |
const Real * | beta_eph, | ||
size_t | pitch, | ||
cudaPitchedPtr | grad_plus, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth | ||
) |
Perform grad_plus update employed in bld_mult algorithm using tensors/matrices residing on a GPU device.
This function applies grad_plus update to grad_plus tensor given as a cudaPitchedPtr using tensor S given as cudaPitchedPtr and beta_eph matrix given as a device pointer and the pitch of the allocation. All the given tensor and matrix pointers are assumed to store their objects in row-major order. See bnmf_algs::cuda::bld_mult::update_grad_plus function documentation for more information regarding row-major storage of multidimensional tensors.
Real | Type of the entries of the matrices/tensors. Must be double or float. |
S | Pitched pointer pointing to S tensor on the GPU device. |
beta_eph | Raw pointer pointing to beta_eph matrix on the GPU device. |
pitch | Pitch of the beta_eph matrix (number of bytes used to store a single row of the matrix including padding bytes). |
grad_plus | Pitched pointer pointing to grad_plus tensor on the GPU device. |
width | Width (1st dimension) of S and grad_plus tensors in terms of elements. |
height | Height (0th dimension) of S and grad_plus tensors in terms of elements. |
depth | Depth (2nd dimension) of S and grad_plus tensors in terms of elements. |
__global__ void bnmf_algs::details::bld_mult::kernel::update_nom | ( | cudaPitchedPtr | S, |
const Real * | X_reciprocal, | ||
size_t | X_reciprocal_pitch, | ||
const Real * | grad_minus, | ||
size_t | grad_minus_pitch, | ||
Real * | nom_mult, | ||
size_t | nom_mult_pitch, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth | ||
) |
Perform nom_mult update employed in bld_mult algorithm using tensors/matrices residing on a GPU device.
This function applies nom_mult update to nom_mult matrix given as a raw GPU pointer. All the given tensor and matrix pointers are assumed to store their objects in row-major order.
Real | Type of the entries of the matrices/tensors. Must be double or float. |
S | Pitched pointer pointing to S tensor on the GPU device. |
X_reciprocal | Raw pointer pointing to X_reciprocal matrix on the GPU. |
X_reciprocal_pitch | Pitch of X_reciprocal matrix (number of bytes used to store a single row of the matrix including padding bytes). |
grad_minus | Raw pointer pointing to grad_minus matrix on the GPU. |
grad_minus_pitch | Pitch of grad_minus matrix. |
nom_mult | Raw pointer pointing to nom_mult matrix on the GPU. |
nom_mult_pitch | Pitch of nom_mult matrix. |
width | Width (1st dimension) of S and grad_plus tensors in terms of elements. |
height | Height (0th dimension) of S and grad_plus tensors in terms of elements. |
depth | Depth (2nd dimension) of S and grad_plus tensors in terms of elements. |
__global__ void bnmf_algs::details::bld_mult::kernel::update_S | ( | const Real * | X, |
size_t | X_pitch, | ||
const Real * | nom_mult, | ||
size_t | nom_mult_pitch, | ||
const Real * | denom_mult, | ||
size_t | denom_mult_pitch, | ||
const Real * | grad_minus, | ||
size_t | grad_minus_pitch, | ||
cudaPitchedPtr | grad_plus, | ||
const Real * | S_ijp, | ||
size_t | S_ijp_pitch, | ||
cudaPitchedPtr | S, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth | ||
) |
Perform S update employed in bld_mult algorithm using tensors/matrices residing on a GPU device.
This function applies S update to S tensor given as a cudaPitchedPtr. All the given tensor and matrix pointers are assumed to store their objects in row-major order.
Real | Type of the entries of the matrices/tensors. Must be double or float. |
X | Raw pointer pointing to X matrix on the GPU. |
X_pitch | Pitch of X matrix (number of bytes used to store a single row of the matrix including padding bytes). |
nom_mult | Raw pointer pointing to nom_mult matrix on the GPU. |
nom_mult_pitch | Pitch of nom_mult matrix |
denom_mult | Raw pointer pointing to denom_mult matrix on the GPU. |
denom_mult_pitch | Pitch of denom_mult matrix. |
grad_minus | Raw pointer pointing to grad_minus matrix on the GPU. |
grad_minus_pitch | Pitch of grad_minus matrix. |
grad_plus | Pitched pointer pointing to grad_plus tensor on the GPU. |
S_ijp | Raw pointer pointing to S_ijp matrix on the GPU. |
S_ijp_pitch | Pitch of S_ijp matrix. |
S | Pitched pointer pointing to S tensor on the GPU. |
width | Width (1st dimension) of S and grad_plus tensors in terms of elements. |
height | Height (0th dimension) of S and grad_plus tensors in terms of elements. |
depth | Depth (2nd dimension) of S and grad_plus tensors in terms of elements. |