8 #include <gsl/gsl_sf_psi.h> 82 template <
typename T,
typename Scalar>
85 size_t max_iter = 1000,
bool use_psi_appr =
false,
89 const auto x =
static_cast<size_t>(X.rows());
90 const auto y =
static_cast<size_t>(X.cols());
114 Eigen::ThreadPoolDevice thread_dev(&tp,
120 use_psi_appr ? util::psi_appr<T> : details::gsl_psi_wrapper<T>;
123 for (
size_t eph = 0; eph < max_iter; ++eph) {
126 S_pjk.device(thread_dev) = S.sum(
shape<1>({0}));
127 S_ipk.device(thread_dev) = S.sum(
shape<1>({1}));
128 S_ijp.device(thread_dev) = S.sum(
shape<1>({2}));
146 grad_plus, S_ijp, S, eps);
225 template <
typename T,
typename Scalar>
228 size_t max_iter = 1000,
bool use_psi_appr =
false,
229 double eps = 1e-50) {
232 const auto x =
static_cast<size_t>(X.rows());
233 const auto y =
static_cast<size_t>(X.cols());
286 const auto psi_fn = use_psi_appr ? util::psi_appr<T> : gsl_sf_psi;
289 for (
size_t eph = 0; eph < max_iter; ++eph) {
303 X_reciprocal_device, grad_plus_device, S_device, denom_device);
314 X_reciprocal_device, grad_minus_device, S_device, nom_device);
317 const auto& S_ijp_device = device_sums[2];
319 grad_minus_device, grad_plus_device,
320 S_ijp_device, S_device);
Structure to hold the parameters for the Allocation Model .
Definition: alloc_model_params.hpp:25
A wrapper template class around 3D row-major pitched memory stored in device memory (GPU memory)...
Definition: device_memory_3d.hpp:30
void update_denom_mult(const matrix_t< T > &X_reciprocal, const tensor_t< T, 3 > &grad_plus, const tensor_t< T, 3 > &S, matrix_t< T > &denom_mult)
Update denom_mult matrix used in bld_mult.
Definition: bld_mult_funcs.hpp:272
void update_nom_cuda(const cuda::DeviceMemory2D< Real > &X_reciprocal, const cuda::DeviceMemory2D< Real > &grad_minus, const cuda::DeviceMemory3D< Real > &S, cuda::DeviceMemory2D< Real > &nom)
Perform nom_mult update employed in bld_mult algorithm using CUDA.
void update_grad_minus(const matrix_t< T > &alpha_eph, PsiFunction psi_fn, matrix_t< T > &grad_minus)
Update grad_minus tensor used in bld_mult.
Definition: bld_mult_funcs.hpp:205
void tensor_sums(const DeviceMemory3D< T > &tensor, std::array< DeviceMemory2D< T >, 3 > &result_arr)
Sum the given 3D input tensor along each of its axes and return all 2D sum tensors.
void update_grad_plus(const tensor_t< T, 3 > &S, const matrix_t< T > &beta_eph, PsiFunction psi_fn, tensor_t< T, 3 > &grad_plus)
Update grad_plus tensor used in bld_mult.
Definition: bld_mult_funcs.hpp:177
A wrapper template class around a row-major 3D tensor stored in main memory (host memory)...
Definition: host_memory_3d.hpp:37
void update_alpha_eph(const tensor_t< T, 2 > &S_ipk, const vector_t< T > &alpha, matrix_t< T > &alpha_eph)
Update alpha_eph matrix used in bld_mult.
Definition: bld_mult_funcs.hpp:130
matrix_t< T > X_reciprocal(const matrix_t< T > &X, double eps)
Compute the reciprocal of the input matrix .
Definition: bld_mult_funcs.hpp:87
Eigen::Tensor< Scalar, N, Eigen::RowMajor > tensor_t
Tensor type used in the computations.
Definition: defs.hpp:52
Eigen::Matrix< Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor > matrix_t
Matrix type used in the computations.
Definition: defs.hpp:41
tensor_t< T, 3 > bld_mult(const matrix_t< T > &X, const size_t z, const alloc_model::Params< Scalar > &model_params, size_t max_iter=1000, bool use_psi_appr=false, double eps=1e-50)
Compute tensor , the solution of BLD problem , from matrix using multiplicative update rules...
Definition: bld_mult.hpp:83
void update_denom_cuda(const cuda::DeviceMemory2D< Real > &X_reciprocal, const cuda::DeviceMemory3D< Real > &grad_plus, const cuda::DeviceMemory3D< Real > &S, cuda::DeviceMemory2D< Real > &denom)
Perform denom update employed in bld_mult algorithm using CUDA.
T hardware_concurrency(T...args)
Eigen::array< size_t, N > shape
Shape of vectors, matrices, tensors, etc.
Definition: defs.hpp:66
tensor_t< T, 3 > init_S(const matrix_t< T > &X, size_t z)
Initialize S tensor using a Dirichlet sample of size z with all concentration parameters set to 1...
Definition: bld_mult_funcs.hpp:36
void update_grad_plus_cuda(const cuda::DeviceMemory3D< Real > &S, const cuda::DeviceMemory2D< Real > &beta_eph, cuda::DeviceMemory3D< Real > &grad_plus)
Perform grad_plus update employed in bld_mult algorithm using CUDA.
void update_S(const matrix_t< T > &X, const matrix_t< T > &nom, const matrix_t< T > &denom, const matrix_t< T > &grad_minus, const tensor_t< T, 3 > &grad_plus, const tensor_t< T, 2 > &S_ijp, tensor_t< T, 3 > &S, double eps)
Update S tensor (output of bld_mult algorithm).
Definition: bld_mult_funcs.hpp:307
void copy3D(DstPitchedMemory3D &destination, const SrcPitchedMemory3D &source)
Copy a contiguous 3D pitched memory from a host/device memory to a host/device memory using CUDA func...
Definition: copy.hpp:169
A wrapper template class around 2D row-major pitched memory stored in device memory (GPU memory)...
Definition: device_memory_2d.hpp:30
void update_beta_eph(const tensor_t< T, 2 > &S_pjk, const vector_t< T > &beta, matrix_t< T > &beta_eph)
Update beta_eph matrix used in bld_mult.
Definition: bld_mult_funcs.hpp:152
A wrapper template class around a row-major matrix type stored in main memory (host memory)...
Definition: host_memory_2d.hpp:37
std::pair< vector_t< Scalar >, vector_t< Scalar > > init_alpha_beta(const alloc_model::Params< Scalar > ¶ms)
Initialize alpha and beta vectors used in bld_mult.
Definition: bld_mult_funcs.hpp:112
void copy2D(DstPitchedMemory2D &destination, const SrcPitchedMemory2D &source)
Copy a contiguous 2D pitched memory from a host/device memory to a host/device memory using CUDA func...
Definition: copy.hpp:131
void update_nom_mult(const matrix_t< T > &X_reciprocal, const matrix_t< T > &grad_minus, const tensor_t< T, 3 > &S, matrix_t< T > &nom_mult)
Update nom_mult matrix used in bld_mult.
Definition: bld_mult_funcs.hpp:242
void check_bld_params(const matrix_t< T > &X, size_t z, const alloc_model::Params< Scalar > &model_params)
Do parameter checks on BLD computing function parameters and throw an assertion error if the paramete...
Definition: util_details.hpp:20
void update_S_cuda(const cuda::DeviceMemory2D< Real > &X, const cuda::DeviceMemory2D< Real > &nom, const cuda::DeviceMemory2D< Real > &denom, const cuda::DeviceMemory2D< Real > &grad_minus, const cuda::DeviceMemory3D< Real > &grad_plus, const cuda::DeviceMemory2D< Real > &S_ijp, cuda::DeviceMemory3D< Real > &S)
Perform S update employed in bld_mult algorithm using CUDA.
Eigen::Matrix< Scalar, 1, Eigen::Dynamic, Eigen::RowMajor > vector_t
Vector type used in the computations.
Definition: defs.hpp:27
Main namespace for bnmf-algs library.
Definition: alloc_model_funcs.hpp:12
tensor_t< T, 3 > bld_mult_cuda(const matrix_t< T > &X, const size_t z, const alloc_model::Params< Scalar > &model_params, size_t max_iter=1000, bool use_psi_appr=false, double eps=1e-50)
Compute tensor , the solution of BLD problem , from matrix using multiplicative update rules...
Definition: bld_mult.hpp:226