bnmf-algs
bld_mult_cuda_funcs.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include "cuda/memory.hpp"
4 
5 namespace bnmf_algs {
6 namespace details {
7 namespace bld_mult {
38 template <typename Real>
40  const cuda::DeviceMemory2D<Real>& beta_eph,
41  cuda::DeviceMemory3D<Real>& grad_plus);
42 
63 template <typename Real>
65  const cuda::DeviceMemory2D<Real>& grad_minus,
68 
89 template <typename Real>
90 void update_denom_cuda(const cuda::DeviceMemory2D<Real>& X_reciprocal,
91  const cuda::DeviceMemory3D<Real>& grad_plus,
94 
117 template <typename Real>
119  const cuda::DeviceMemory2D<Real>& nom,
120  const cuda::DeviceMemory2D<Real>& denom,
121  const cuda::DeviceMemory2D<Real>& grad_minus,
122  const cuda::DeviceMemory3D<Real>& grad_plus,
123  const cuda::DeviceMemory2D<Real>& S_ijp,
125 
126 } // namespace bld_mult
127 } // namespace details
128 } // namespace bnmf_algs
A wrapper template class around 3D row-major pitched memory stored in device memory (GPU memory)...
Definition: device_memory_3d.hpp:30
void update_nom_cuda(const cuda::DeviceMemory2D< Real > &X_reciprocal, const cuda::DeviceMemory2D< Real > &grad_minus, const cuda::DeviceMemory3D< Real > &S, cuda::DeviceMemory2D< Real > &nom)
Perform nom_mult update employed in bld_mult algorithm using CUDA.
matrix_t< T > X_reciprocal(const matrix_t< T > &X, double eps)
Compute the reciprocal of the input matrix .
Definition: bld_mult_funcs.hpp:87
tensor_t< T, 3 > bld_mult(const matrix_t< T > &X, const size_t z, const alloc_model::Params< Scalar > &model_params, size_t max_iter=1000, bool use_psi_appr=false, double eps=1e-50)
Compute tensor , the solution of BLD problem , from matrix using multiplicative update rules...
Definition: bld_mult.hpp:83
void update_denom_cuda(const cuda::DeviceMemory2D< Real > &X_reciprocal, const cuda::DeviceMemory3D< Real > &grad_plus, const cuda::DeviceMemory3D< Real > &S, cuda::DeviceMemory2D< Real > &denom)
Perform denom update employed in bld_mult algorithm using CUDA.
void update_grad_plus_cuda(const cuda::DeviceMemory3D< Real > &S, const cuda::DeviceMemory2D< Real > &beta_eph, cuda::DeviceMemory3D< Real > &grad_plus)
Perform grad_plus update employed in bld_mult algorithm using CUDA.
A wrapper template class around 2D row-major pitched memory stored in device memory (GPU memory)...
Definition: device_memory_2d.hpp:30
void update_S_cuda(const cuda::DeviceMemory2D< Real > &X, const cuda::DeviceMemory2D< Real > &nom, const cuda::DeviceMemory2D< Real > &denom, const cuda::DeviceMemory2D< Real > &grad_minus, const cuda::DeviceMemory3D< Real > &grad_plus, const cuda::DeviceMemory2D< Real > &S_ijp, cuda::DeviceMemory3D< Real > &S)
Perform S update employed in bld_mult algorithm using CUDA.
Main namespace for bnmf-algs library.
Definition: alloc_model_funcs.hpp:12