bnmf-algs
bld_appr.hpp
Go to the documentation of this file.
1 #pragma once
2 
4 #include "defs.hpp"
5 #include "util_details.hpp"
6 
7 namespace bnmf_algs {
8 namespace details {
9 
10 template <typename T>
11 void update_X_hat(const matrix_t<T>& nu, const matrix_t<T>& mu,
12  matrix_t<T>& X_hat) {
13  X_hat = nu * mu;
14 }
15 
16 template <typename T>
17 void update_orig_over_appr(const matrix_t<T>& X, const matrix_t<T>& X_hat,
18  const matrix_t<T>& eps_mat,
19  matrix_t<T>& orig_over_appr) {
20  orig_over_appr = X.array() / (X_hat + eps_mat).array();
21 }
22 
23 template <typename T>
25  const matrix_t<T>& X_hat,
26  const matrix_t<T>& eps_mat,
27  matrix_t<T>& orig_over_appr_squared) {
28  matrixd appr_squared = X_hat.array().pow(2);
29  orig_over_appr_squared = X.array() / (appr_squared + eps_mat).array();
30 }
31 
32 template <typename T>
33 void update_S(const matrix_t<T>& orig_over_appr, const matrix_t<T>& nu,
34  const matrix_t<T>& mu, tensor_t<T, 3>& S) {
35  auto x = static_cast<size_t>(S.dimension(0));
36  auto y = static_cast<size_t>(S.dimension(1));
37  auto z = static_cast<size_t>(S.dimension(2));
38 
39  #pragma omp parallel for schedule(static)
40  for (size_t i = 0; i < x; ++i) {
41  for (size_t j = 0; j < y; ++j) {
42  for (size_t k = 0; k < z; ++k) {
43  S(i, j, k) = orig_over_appr(i, j) * nu(i, k) * mu(k, j);
44  }
45  }
46  }
47 }
48 
49 template <typename T>
50 void update_alpha_eph(const vector_t<T>& alpha, const tensor_t<T, 3>& S,
51  matrix_t<T>& alpha_eph) {
52  auto x = static_cast<size_t>(S.dimension(0));
53  auto z = static_cast<size_t>(S.dimension(2));
54 
55  tensor_t<T, 2> S_ipk_tensor(x, z);
56  S_ipk_tensor = S.sum(shape<1>({1}));
57  Eigen::Map<matrix_t<T>> S_ipk(S_ipk_tensor.data(), x, z);
58  alpha_eph = S_ipk.array().colwise() + alpha.transpose().array();
59 }
60 
61 template <typename T>
62 void update_beta_eph(const vector_t<T>& beta, const tensor_t<T, 3>& S,
63  matrix_t<T>& beta_eph) {
64  auto y = static_cast<size_t>(S.dimension(1));
65  auto z = static_cast<size_t>(S.dimension(2));
66 
67  tensor_t<T, 2> S_pjk_tensor = S.sum(shape<1>({0}));
68  Eigen::Map<matrix_t<T>> S_pjk(S_pjk_tensor.data(), y, z);
69  beta_eph = S_pjk.array().rowwise() + beta.array();
70 }
71 
72 template <typename T>
73 void update_grad_plus(const matrix_t<T>& beta_eph, const tensor_t<T, 3>& S,
74  tensor_t<T, 3>& grad_plus) {
75  auto x = static_cast<size_t>(grad_plus.dimension(0));
76  auto y = static_cast<size_t>(grad_plus.dimension(1));
77  auto z = static_cast<size_t>(grad_plus.dimension(2));
78 
79  #pragma omp parallel for
80  for (size_t i = 0; i < x; ++i) {
81  for (size_t j = 0; j < y; ++j) {
82  for (size_t k = 0; k < z; ++k) {
83  grad_plus(i, j, k) =
84  util::psi_appr(beta_eph(j, k)) - util::psi_appr(S(i, j, k) + 1);
85  }
86  }
87  }
88 }
89 
90 template <typename T>
91 void update_grad_minus(const matrix_t<T>& alpha_eph, matrix_t<T>& grad_minus) {
92  auto x = static_cast<size_t>(grad_minus.rows());
93  auto z = static_cast<size_t>(grad_minus.cols());
94 
95  vector_t<T> alpha_eph_sum = alpha_eph.colwise().sum();
96  #pragma omp parallel for
97  for (size_t i = 0; i < x; ++i) {
98  for (size_t k = 0; k < z; ++k) {
99  grad_minus(i, k) =
100  util::psi_appr(alpha_eph_sum(k)) - util::psi_appr(alpha_eph(i, k));
101  }
102  }
103 }
104 
105 template <typename T>
106 void update_nu(const matrix_t<T>& orig_over_appr,
107  const matrix_t<T>& orig_over_appr_squared, const matrix_t<T>& mu,
108  const tensor_t<T, 3>& grad_plus, const matrix_t<T>& grad_minus,
109  double eps, matrix_t<T>& nu) {
110  auto x = static_cast<size_t>(grad_plus.dimension(0));
111  auto y = static_cast<size_t>(grad_plus.dimension(1));
112  auto z = static_cast<size_t>(grad_plus.dimension(2));
113 
114  matrix_t<T> nom = matrix_t<T>::Zero(x, z);
115  #pragma omp parallel for
116  for (size_t i = 0; i < x; ++i) {
117  for (size_t j = 0; j < y; ++j) {
118  for (size_t k = 0; k < z; ++k) {
119  nom(i, k) +=
120  orig_over_appr(i, j) * mu(k, j) * grad_plus(i, j, k);
121  }
122  }
123  }
124  #pragma omp parallel for
125  for (size_t i = 0; i < x; ++i) {
126  for (size_t j = 0; j < y; ++j) {
127  for (size_t k = 0; k < z; ++k) {
128  for (size_t c = 0; c < z; ++c) {
129  nom(i, k) += orig_over_appr_squared(i, j) * mu(k, j) *
130  nu(i, c) * mu(c, j) * grad_minus(i, c);
131  }
132  }
133  }
134  }
135  matrix_t<T> denom = matrix_t<T>::Zero(x, z);
136  #pragma omp parallel for
137  for (size_t i = 0; i < x; ++i) {
138  for (size_t j = 0; j < y; ++j) {
139  for (size_t k = 0; k < z; ++k) {
140  denom(i, k) +=
141  orig_over_appr(i, j) * mu(k, j) * grad_minus(i, k);
142  }
143  }
144  }
145  #pragma omp parallel for
146  for (size_t i = 0; i < x; ++i) {
147  for (size_t j = 0; j < y; ++j) {
148  for (size_t k = 0; k < z; ++k) {
149  for (size_t c = 0; c < z; ++c) {
150  denom(i, k) += orig_over_appr_squared(i, j) * mu(k, j) *
151  nu(i, c) * mu(c, j) * grad_plus(i, j, c);
152  }
153  }
154  }
155  }
156 
157  #pragma omp parallel for schedule(static)
158  for (size_t i = 0; i < x; ++i) {
159  for (size_t k = 0; k < z; ++k) {
160  nu(i, k) *= nom(i, k) / (denom(i, k) + eps);
161  }
162  }
163 
164  // normalize
165  vector_t<T> nu_rowsum_plus_eps =
166  nu.rowwise().sum() + vector_t<T>::Constant(nu.rows(), eps).transpose();
167  nu = nu.array().colwise() / nu_rowsum_plus_eps.transpose().array();
168 }
169 
170 template <typename T>
171 static void update_mu(const matrix_t<T>& orig_over_appr,
172  const matrix_t<T>& orig_over_appr_squared,
173  const matrix_t<T>& nu, const tensor_t<T, 3>& grad_plus,
174  const matrix_t<T>& grad_minus, double eps,
175  matrix_t<T>& mu) {
176  auto x = static_cast<size_t>(grad_plus.dimension(0));
177  auto y = static_cast<size_t>(grad_plus.dimension(1));
178  auto z = static_cast<size_t>(grad_plus.dimension(2));
179 
180  matrix_t<T> nom = matrix_t<T>::Zero(z, y);
181  #pragma omp parallel for
182  for (size_t j = 0; j < y; ++j) {
183  for (size_t k = 0; k < z; ++k) {
184  for (size_t i = 0; i < x; ++i) {
185  nom(k, j) +=
186  orig_over_appr(i, j) * nu(i, k) * grad_plus(i, j, k);
187  }
188  }
189  }
190  #pragma omp parallel for
191  for (size_t j = 0; j < y; ++j) {
192  for (size_t k = 0; k < z; ++k) {
193  for (size_t i = 0; i < x; ++i) {
194  for (size_t c = 0; c < z; ++c) {
195  nom(k, j) += orig_over_appr_squared(i, j) * nu(i, k) *
196  nu(i, c) * mu(c, j) * grad_minus(i, c);
197  }
198  }
199  }
200  }
201  matrix_t<T> denom = matrix_t<T>::Zero(z, y);
202  #pragma omp parallel for
203  for (size_t j = 0; j < y; ++j) {
204  for (size_t k = 0; k < z; ++k) {
205  for (size_t i = 0; i < x; ++i) {
206  denom(k, j) +=
207  orig_over_appr(i, j) * nu(i, k) * grad_minus(i, k);
208  }
209  }
210  }
211  #pragma omp parallel for
212  for (size_t j = 0; j < y; ++j) {
213  for (size_t k = 0; k < z; ++k) {
214  for (size_t i = 0; i < x; ++i) {
215  for (size_t c = 0; c < z; ++c) {
216  denom(k, j) += orig_over_appr_squared(i, j) * nu(i, k) *
217  nu(i, c) * mu(c, j) * grad_plus(i, j, c);
218  }
219  }
220  }
221  }
222 
223  #pragma omp parallel for schedule(static)
224  for (size_t k = 0; k < z; ++k) {
225  for (size_t j = 0; j < y; ++j) {
226  mu(k, j) *= nom(k, j) / (denom(k, j) + eps);
227  }
228  }
229 
230  // normalize
231  vector_t<T> mu_colsum_plus_eps =
232  mu.colwise().sum() + vector_t<T>::Constant(mu.cols(), eps);
233  mu = mu.array().rowwise() / mu_colsum_plus_eps.array();
234 }
235 } // namespace details
236 
237 namespace bld {
285 template <typename T, typename Scalar>
287 bld_appr(const matrix_t<T>& X, size_t z,
288  const alloc_model::Params<Scalar>& model_params,
289  size_t max_iter = 1000, double eps = 1e-50) {
290  details::check_bld_params(X, z, model_params);
291 
292  auto x = static_cast<size_t>(X.rows());
293  auto y = static_cast<size_t>(X.cols());
294 
295  vector_t<Scalar> alpha(model_params.alpha.size());
296  vector_t<Scalar> beta(model_params.beta.size());
297  std::copy(model_params.alpha.begin(), model_params.alpha.end(),
298  alpha.data());
299  std::copy(model_params.beta.begin(), model_params.beta.end(), beta.data());
300 
301  matrix_t<T> nu(x, z);
302  matrix_t<T> mu(y, z);
303  // initialize nu and mu using Dirichlet
304  {
305  util::gsl_rng_wrapper rnd_gen(gsl_rng_alloc(gsl_rng_taus),
306  gsl_rng_free);
307  std::vector<double> dirichlet_params(z, 1);
308 
309  // remark: following code depends on the fact that matrixd is row
310  // major
311  for (size_t i = 0; i < x; ++i) {
312  gsl_ran_dirichlet(rnd_gen.get(), z, dirichlet_params.data(),
313  nu.data() + i * z);
314  }
315  for (size_t j = 0; j < y; ++j) {
316  gsl_ran_dirichlet(rnd_gen.get(), z, dirichlet_params.data(),
317  mu.data() + j * z);
318  }
319 
320  mu.transposeInPlace();
321  }
322 
323  tensor_t<T, 3> grad_plus(x, y, z);
324  matrix_t<T> grad_minus(x, z);
325  tensor_t<T, 3> S(x, y, z);
326  matrix_t<T> eps_mat = matrix_t<T>::Constant(x, y, eps);
327  matrix_t<T> X_hat, orig_over_appr, orig_over_appr_squared, alpha_eph,
328  beta_eph;
329 
330  for (size_t eph = 0; eph < max_iter; ++eph) {
331  // update helpers
332  details::update_X_hat(nu, mu, X_hat);
333  details::update_orig_over_appr(X, X_hat, eps_mat,
334  orig_over_appr);
336  X, X_hat, eps_mat, orig_over_appr_squared);
337  details::update_S(orig_over_appr, nu, mu, S);
338  details::update_alpha_eph(alpha, S, alpha_eph);
339  details::update_beta_eph(beta, S, beta_eph);
340  details::update_grad_plus(beta_eph, S, grad_plus);
341  details::update_grad_minus(alpha_eph, grad_minus);
342 
343  // update nu
344  details::update_nu(orig_over_appr, orig_over_appr_squared,
345  mu, grad_plus, grad_minus, eps, nu);
346 
347  // update helpers
348  details::update_X_hat(nu, mu, X_hat);
349  details::update_orig_over_appr(X, X_hat, eps_mat,
350  orig_over_appr);
352  X, X_hat, eps_mat, orig_over_appr_squared);
353  details::update_S(orig_over_appr, nu, mu, S);
354  details::update_alpha_eph(alpha, S, alpha_eph);
355  details::update_beta_eph(beta, S, beta_eph);
356  details::update_grad_plus(beta_eph, S, grad_plus);
357  details::update_grad_minus(alpha_eph, grad_minus);
358 
359  // update mu
360  details::update_mu(orig_over_appr, orig_over_appr_squared,
361  nu, grad_plus, grad_minus, eps, mu);
362  }
363 
364  details::update_X_hat(nu, mu, X_hat);
365  details::update_orig_over_appr(X, X_hat, eps_mat,
366  orig_over_appr);
367  details::update_S(orig_over_appr, nu, mu, S);
368 
369  return std::make_tuple(S, nu, mu);
370 }
371 } // namespace bld
372 } // namespace bnmf_algs
void update_grad_minus(const matrix_t< T > &alpha_eph, matrix_t< T > &grad_minus)
Definition: bld_appr.hpp:91
Structure to hold the parameters for the Allocation Model .
Definition: alloc_model_params.hpp:25
T copy(T...args)
static void update_mu(const matrix_t< T > &orig_over_appr, const matrix_t< T > &orig_over_appr_squared, const matrix_t< T > &nu, const tensor_t< T, 3 > &grad_plus, const matrix_t< T > &grad_minus, double eps, matrix_t< T > &mu)
Definition: bld_appr.hpp:171
Eigen::Tensor< Scalar, N, Eigen::RowMajor > tensor_t
Tensor type used in the computations.
Definition: defs.hpp:52
Eigen::Matrix< Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor > matrix_t
Matrix type used in the computations.
Definition: defs.hpp:41
void update_beta_eph(const vector_t< T > &beta, const tensor_t< T, 3 > &S, matrix_t< T > &beta_eph)
Definition: bld_appr.hpp:62
void update_grad_plus(const matrix_t< T > &beta_eph, const tensor_t< T, 3 > &S, tensor_t< T, 3 > &grad_plus)
Definition: bld_appr.hpp:73
T make_tuple(T...args)
T end(T...args)
void update_orig_over_appr(const matrix_t< T > &X, const matrix_t< T > &X_hat, const matrix_t< T > &eps_mat, matrix_t< T > &orig_over_appr)
Definition: bld_appr.hpp:17
matrix_t< double > matrixd
Matrix type specialization using double as Scalar value.
Definition: defs.hpp:46
void update_S(const matrix_t< T > &orig_over_appr, const matrix_t< T > &nu, const matrix_t< T > &mu, tensor_t< T, 3 > &S)
Definition: bld_appr.hpp:33
Eigen::array< size_t, N > shape
Shape of vectors, matrices, tensors, etc.
Definition: defs.hpp:66
T data(T...args)
std::vector< Scalar > alpha
Parameter vector of Dirichlet prior for matrix of size .
Definition: alloc_model_params.hpp:37
Real psi_appr(Real x)
Calculate digamma function approximately using the first 8 terms of the asymptotic expansion of digam...
Definition: util.hpp:440
T get(T...args)
void check_bld_params(const matrix_t< T > &X, size_t z, const alloc_model::Params< Scalar > &model_params)
Do parameter checks on BLD computing function parameters and throw an assertion error if the paramete...
Definition: util_details.hpp:20
T size(T...args)
T begin(T...args)
std::tuple< tensor_t< T, 3 >, matrix_t< T >, matrix_t< T > > bld_appr(const matrix_t< T > &X, size_t z, const alloc_model::Params< Scalar > &model_params, size_t max_iter=1000, double eps=1e-50)
Compute tensor , the solution of BLD problem , and optimization matrices and from matrix using the...
Definition: bld_appr.hpp:287
void update_nu(const matrix_t< T > &orig_over_appr, const matrix_t< T > &orig_over_appr_squared, const matrix_t< T > &mu, const tensor_t< T, 3 > &grad_plus, const matrix_t< T > &grad_minus, double eps, matrix_t< T > &nu)
Definition: bld_appr.hpp:106
void update_orig_over_appr_squared(const matrix_t< T > &X, const matrix_t< T > &X_hat, const matrix_t< T > &eps_mat, matrix_t< T > &orig_over_appr_squared)
Definition: bld_appr.hpp:24
void update_alpha_eph(const vector_t< T > &alpha, const tensor_t< T, 3 > &S, matrix_t< T > &alpha_eph)
Definition: bld_appr.hpp:50
std::vector< Scalar > beta
Parameter vector of Dirichlet prior for matrix of size .
Definition: alloc_model_params.hpp:42
Eigen::Matrix< Scalar, 1, Eigen::Dynamic, Eigen::RowMajor > vector_t
Vector type used in the computations.
Definition: defs.hpp:27
Main namespace for bnmf-algs library.
Definition: alloc_model_funcs.hpp:12
void update_X_hat(const matrix_t< T > &nu, const matrix_t< T > &mu, matrix_t< T > &X_hat)
Definition: bld_appr.hpp:11