bnmf-algs
bld_mult_cuda_kernels.hpp
Go to the documentation of this file.
1 #pragma once
3 
4 namespace bnmf_algs {
5 namespace details {
6 namespace bld_mult {
10 namespace kernel {
21 template <typename Real> __device__ Real psi_appr(Real x);
22 
50 template <typename Real>
51 __global__ void update_grad_plus(cudaPitchedPtr S, const Real* beta_eph,
52  size_t pitch, cudaPitchedPtr grad_plus,
53  size_t width, size_t height, size_t depth);
54 
81 template <typename Real>
82 __global__ void update_nom(cudaPitchedPtr S, const Real* X_reciprocal,
83  size_t X_reciprocal_pitch, const Real* grad_minus,
84  size_t grad_minus_pitch, Real* nom_mult,
85  size_t nom_mult_pitch, size_t width, size_t height,
86  size_t depth);
87 
114 template <typename Real>
115 __global__ void update_denom(cudaPitchedPtr S, const Real* X_reciprocal,
116  size_t X_reciprocal_pitch,
117  cudaPitchedPtr grad_plus, Real* denom_mult,
118  size_t denom_mult_pitch, size_t width,
119  size_t height, size_t depth);
120 
151 template <typename Real>
152 __global__ void
153 update_S(const Real* X, size_t X_pitch, const Real* nom_mult,
154  size_t nom_mult_pitch, const Real* denom_mult, size_t denom_mult_pitch,
155  const Real* grad_minus, size_t grad_minus_pitch,
156  cudaPitchedPtr grad_plus, const Real* S_ijp, size_t S_ijp_pitch,
157  cudaPitchedPtr S, size_t width, size_t height, size_t depth);
158 } // namespace kernel
159 } // namespace bld_mult
160 } // namespace details
161 } // namespace bnmf_algs
__global__ void update_denom(cudaPitchedPtr S, const Real *X_reciprocal, size_t X_reciprocal_pitch, cudaPitchedPtr grad_plus, Real *denom_mult, size_t denom_mult_pitch, size_t width, size_t height, size_t depth)
Perform denom_mult update employed in bld_mult algorithm using tensors/matrices residing on a GPU dev...
matrix_t< T > X_reciprocal(const matrix_t< T > &X, double eps)
Compute the reciprocal of the input matrix .
Definition: bld_mult_funcs.hpp:87
__global__ void update_grad_plus(cudaPitchedPtr S, const Real *beta_eph, size_t pitch, cudaPitchedPtr grad_plus, size_t width, size_t height, size_t depth)
Perform grad_plus update employed in bld_mult algorithm using tensors/matrices residing on a GPU devi...
tensor_t< T, 3 > bld_mult(const matrix_t< T > &X, const size_t z, const alloc_model::Params< Scalar > &model_params, size_t max_iter=1000, bool use_psi_appr=false, double eps=1e-50)
Compute tensor , the solution of BLD problem , from matrix using multiplicative update rules...
Definition: bld_mult.hpp:83
__global__ void update_nom(cudaPitchedPtr S, const Real *X_reciprocal, size_t X_reciprocal_pitch, const Real *grad_minus, size_t grad_minus_pitch, Real *nom_mult, size_t nom_mult_pitch, size_t width, size_t height, size_t depth)
Perform nom_mult update employed in bld_mult algorithm using tensors/matrices residing on a GPU devic...
__device__ Real psi_appr(Real x)
Device function to return psi_appr of a real number.
Definition: util.hpp:440
__global__ void update_S(const Real *X, size_t X_pitch, const Real *nom_mult, size_t nom_mult_pitch, const Real *denom_mult, size_t denom_mult_pitch, const Real *grad_minus, size_t grad_minus_pitch, cudaPitchedPtr grad_plus, const Real *S_ijp, size_t S_ijp_pitch, cudaPitchedPtr S, size_t width, size_t height, size_t depth)
Perform S update employed in bld_mult algorithm using tensors/matrices residing on a GPU device...
Main namespace for bnmf-algs library.
Definition: alloc_model_funcs.hpp:12