bnmf-algs
tensor_ops_kernels.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include "defs.hpp"
4 #include <cuda.h>
5 #include <device_launch_parameters.h>
6 
7 namespace bnmf_algs {
8 namespace cuda {
12 namespace kernel {
45 template <typename Scalar>
46 __global__ void sum_tensor3D(cudaPitchedPtr tensor, Scalar* out,
47  size_t out_pitch, size_t axis, size_t n_rows,
48  size_t n_cols, size_t n_layers);
49 } // namespace kernel
50 } // namespace cuda
51 } // namespace bnmf_algs
Main namespace for bnmf-algs library.
Definition: alloc_model_funcs.hpp:12
__global__ void sum_tensor3D(cudaPitchedPtr tensor, Scalar *out, size_t out_pitch, size_t axis, size_t n_rows, size_t n_cols, size_t n_layers)
Sum the given 3D tensor along the given axis and write the results to the corresponding index of the ...