4 #include <cuda_runtime.h> 29 template <
typename DstMemory,
typename SrcMemory,
30 template <
typename>
class HostMemoryBase,
31 template <
typename>
class DeviceMemoryBase>
34 typedef typename DstMemory::value_type DstT;
35 typedef typename SrcMemory::value_type SrcT;
47 ? cudaMemcpyKind::cudaMemcpyHostToHost
53 ? cudaMemcpyKind::cudaMemcpyDeviceToHost
59 ? cudaMemcpyKind::cudaMemcpyHostToDevice
65 ? cudaMemcpyKind::cudaMemcpyDeviceToDevice
66 : cudaMemcpyKind::cudaMemcpyDefault;
93 template <
typename DstMemory1D,
typename SrcMemory1D>
94 void copy1D(DstMemory1D& destination,
const SrcMemory1D& source) {
95 static constexpr cudaMemcpyKind kind =
98 static_assert(kind != cudaMemcpyDefault,
99 "Invalid copy direction in cuda::copy1D");
102 cudaMemcpy(destination.data(), source.data(), source.bytes(), kind);
103 BNMF_ASSERT(err == cudaSuccess,
"Error copying memory in cuda::copy1D");
130 template <
typename DstPitchedMemory2D,
typename SrcPitchedMemory2D>
131 void copy2D(DstPitchedMemory2D& destination,
const SrcPitchedMemory2D& source) {
132 static constexpr cudaMemcpyKind kind =
135 static_assert(kind != cudaMemcpyDefault,
136 "Invalid copy direction in cuda::copy2D");
139 cudaMemcpy2D(destination.data(), destination.pitch(), source.data(),
140 source.pitch(), source.width(), source.height(), kind);
141 BNMF_ASSERT(err == cudaSuccess,
"Error copying memory in cuda::copy2D");
168 template <
typename DstPitchedMemory3D,
typename SrcPitchedMemory3D>
169 void copy3D(DstPitchedMemory3D& destination,
const SrcPitchedMemory3D& source) {
170 static constexpr cudaMemcpyKind kind =
173 static_assert(kind != cudaMemcpyDefault,
174 "Invalid copy direction in cuda::copy3D");
176 cudaMemcpy3DParms params = {
nullptr};
177 params.srcPtr = source.pitched_ptr();
178 params.dstPtr = destination.pitched_ptr();
179 params.extent = source.extent();
182 auto err = cudaMemcpy3D(¶ms);
183 BNMF_ASSERT(err == cudaSuccess,
"Error copying memory in cuda::copy3D");
A wrapper template class around a contiguous array of T types laid out in main memory (host memory)...
Definition: host_memory_1d.hpp:25
A wrapper template class around 3D row-major pitched memory stored in device memory (GPU memory)...
Definition: device_memory_3d.hpp:30
A wrapper template class around a row-major 3D tensor stored in main memory (host memory)...
Definition: host_memory_3d.hpp:37
A wrapper template class around a contiguous array of T types laid out in device memory (GPU memory)...
Definition: device_memory_1d.hpp:29
void copy3D(DstPitchedMemory3D &destination, const SrcPitchedMemory3D &source)
Copy a contiguous 3D pitched memory from a host/device memory to a host/device memory using CUDA func...
Definition: copy.hpp:169
A wrapper template class around 2D row-major pitched memory stored in device memory (GPU memory)...
Definition: device_memory_2d.hpp:30
constexpr cudaMemcpyKind infer_copy_kind()
Infer the value of cudaMemcpyKind enum to be used with CUDA copying functions from the types of the m...
Definition: copy.hpp:32
A wrapper template class around a row-major matrix type stored in main memory (host memory)...
Definition: host_memory_2d.hpp:37
void copy2D(DstPitchedMemory2D &destination, const SrcPitchedMemory2D &source)
Copy a contiguous 2D pitched memory from a host/device memory to a host/device memory using CUDA func...
Definition: copy.hpp:131
Main namespace for bnmf-algs library.
Definition: alloc_model_funcs.hpp:12
void copy1D(DstMemory1D &destination, const SrcMemory1D &source)
Copy a contiguous 1D memory from a host/device memory to a host/device memory using CUDA function cud...
Definition: copy.hpp:94