21 template <
typename Real> __device__ Real
psi_appr(Real x);
50 template <
typename Real>
52 size_t pitch, cudaPitchedPtr grad_plus,
53 size_t width,
size_t height,
size_t depth);
81 template <
typename Real>
83 size_t X_reciprocal_pitch,
const Real* grad_minus,
84 size_t grad_minus_pitch, Real* nom_mult,
85 size_t nom_mult_pitch,
size_t width,
size_t height,
114 template <
typename Real>
115 __global__
void update_denom(cudaPitchedPtr S,
const Real* X_reciprocal,
116 size_t X_reciprocal_pitch,
117 cudaPitchedPtr grad_plus, Real* denom_mult,
118 size_t denom_mult_pitch,
size_t width,
119 size_t height,
size_t depth);
151 template <
typename Real>
153 update_S(
const Real* X,
size_t X_pitch,
const Real* nom_mult,
154 size_t nom_mult_pitch,
const Real* denom_mult,
size_t denom_mult_pitch,
155 const Real* grad_minus,
size_t grad_minus_pitch,
156 cudaPitchedPtr grad_plus,
const Real* S_ijp,
size_t S_ijp_pitch,
157 cudaPitchedPtr S,
size_t width,
size_t height,
size_t depth);
__global__ void update_denom(cudaPitchedPtr S, const Real *X_reciprocal, size_t X_reciprocal_pitch, cudaPitchedPtr grad_plus, Real *denom_mult, size_t denom_mult_pitch, size_t width, size_t height, size_t depth)
Perform denom_mult update employed in bld_mult algorithm using tensors/matrices residing on a GPU dev...
matrix_t< T > X_reciprocal(const matrix_t< T > &X, double eps)
Compute the reciprocal of the input matrix .
Definition: bld_mult_funcs.hpp:87
__global__ void update_grad_plus(cudaPitchedPtr S, const Real *beta_eph, size_t pitch, cudaPitchedPtr grad_plus, size_t width, size_t height, size_t depth)
Perform grad_plus update employed in bld_mult algorithm using tensors/matrices residing on a GPU devi...
tensor_t< T, 3 > bld_mult(const matrix_t< T > &X, const size_t z, const alloc_model::Params< Scalar > &model_params, size_t max_iter=1000, bool use_psi_appr=false, double eps=1e-50)
Compute tensor , the solution of BLD problem , from matrix using multiplicative update rules...
Definition: bld_mult.hpp:83
__global__ void update_nom(cudaPitchedPtr S, const Real *X_reciprocal, size_t X_reciprocal_pitch, const Real *grad_minus, size_t grad_minus_pitch, Real *nom_mult, size_t nom_mult_pitch, size_t width, size_t height, size_t depth)
Perform nom_mult update employed in bld_mult algorithm using tensors/matrices residing on a GPU devic...
__device__ Real psi_appr(Real x)
Device function to return psi_appr of a real number.
Definition: util.hpp:440
__global__ void update_S(const Real *X, size_t X_pitch, const Real *nom_mult, size_t nom_mult_pitch, const Real *denom_mult, size_t denom_mult_pitch, const Real *grad_minus, size_t grad_minus_pitch, cudaPitchedPtr grad_plus, const Real *S_ijp, size_t S_ijp_pitch, cudaPitchedPtr S, size_t width, size_t height, size_t depth)
Perform S update employed in bld_mult algorithm using tensors/matrices residing on a GPU device...
Main namespace for bnmf-algs library.
Definition: alloc_model_funcs.hpp:12