bnmf-algs
copy.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include "defs.hpp"
4 #include <cuda_runtime.h>
5 #include <type_traits>
6 
7 namespace bnmf_algs {
8 namespace cuda {
9 
29 template <typename DstMemory, typename SrcMemory,
30  template <typename> class HostMemoryBase,
31  template <typename> class DeviceMemoryBase>
32 constexpr cudaMemcpyKind infer_copy_kind() {
33  // Type of the entries in memory objects
34  typedef typename DstMemory::value_type DstT;
35  typedef typename SrcMemory::value_type SrcT;
36 
37  // Type of the memory objects without cv qualifiers
38  typedef typename std::remove_cv<DstMemory>::type DstType;
39  typedef typename std::remove_cv<SrcMemory>::type SrcType;
40 
41  // Infer the cudaMemcpyKind from type of the memory objects
42  // We have to write using ternaries due to C++11 restriction in CUDA 8
43  return
44  // Host <-- Host
47  ? cudaMemcpyKind::cudaMemcpyHostToHost
48  :
49 
50  // Host <-- Device
51  (std::is_same<DstType, HostMemoryBase<DstT>>::value &&
52  std::is_same<SrcType, DeviceMemoryBase<SrcT>>::value)
53  ? cudaMemcpyKind::cudaMemcpyDeviceToHost
54  :
55 
56  // Device <-- Host
59  ? cudaMemcpyKind::cudaMemcpyHostToDevice
60  :
61 
62  // Device <-- Device
63  (std::is_same<DstType, DeviceMemoryBase<DstT>>::value &&
64  std::is_same<SrcType, DeviceMemoryBase<SrcT>>::value)
65  ? cudaMemcpyKind::cudaMemcpyDeviceToDevice
66  : cudaMemcpyKind::cudaMemcpyDefault;
67 }
68 
93 template <typename DstMemory1D, typename SrcMemory1D>
94 void copy1D(DstMemory1D& destination, const SrcMemory1D& source) {
95  static constexpr cudaMemcpyKind kind =
96  infer_copy_kind<DstMemory1D, SrcMemory1D, HostMemory1D,
97  DeviceMemory1D>();
98  static_assert(kind != cudaMemcpyDefault,
99  "Invalid copy direction in cuda::copy1D");
100 
101  auto err =
102  cudaMemcpy(destination.data(), source.data(), source.bytes(), kind);
103  BNMF_ASSERT(err == cudaSuccess, "Error copying memory in cuda::copy1D");
104 }
105 
130 template <typename DstPitchedMemory2D, typename SrcPitchedMemory2D>
131 void copy2D(DstPitchedMemory2D& destination, const SrcPitchedMemory2D& source) {
132  static constexpr cudaMemcpyKind kind =
133  infer_copy_kind<DstPitchedMemory2D, SrcPitchedMemory2D, HostMemory2D,
134  DeviceMemory2D>();
135  static_assert(kind != cudaMemcpyDefault,
136  "Invalid copy direction in cuda::copy2D");
137 
138  auto err =
139  cudaMemcpy2D(destination.data(), destination.pitch(), source.data(),
140  source.pitch(), source.width(), source.height(), kind);
141  BNMF_ASSERT(err == cudaSuccess, "Error copying memory in cuda::copy2D");
142 }
143 
168 template <typename DstPitchedMemory3D, typename SrcPitchedMemory3D>
169 void copy3D(DstPitchedMemory3D& destination, const SrcPitchedMemory3D& source) {
170  static constexpr cudaMemcpyKind kind =
171  infer_copy_kind<DstPitchedMemory3D, SrcPitchedMemory3D, HostMemory3D,
172  DeviceMemory3D>();
173  static_assert(kind != cudaMemcpyDefault,
174  "Invalid copy direction in cuda::copy3D");
175 
176  cudaMemcpy3DParms params = {nullptr};
177  params.srcPtr = source.pitched_ptr();
178  params.dstPtr = destination.pitched_ptr();
179  params.extent = source.extent();
180  params.kind = kind;
181 
182  auto err = cudaMemcpy3D(&params);
183  BNMF_ASSERT(err == cudaSuccess, "Error copying memory in cuda::copy3D");
184 }
185 } // namespace cuda
186 } // namespace bnmf_algs
A wrapper template class around a contiguous array of T types laid out in main memory (host memory)...
Definition: host_memory_1d.hpp:25
A wrapper template class around 3D row-major pitched memory stored in device memory (GPU memory)...
Definition: device_memory_3d.hpp:30
A wrapper template class around a row-major 3D tensor stored in main memory (host memory)...
Definition: host_memory_3d.hpp:37
A wrapper template class around a contiguous array of T types laid out in device memory (GPU memory)...
Definition: device_memory_1d.hpp:29
void copy3D(DstPitchedMemory3D &destination, const SrcPitchedMemory3D &source)
Copy a contiguous 3D pitched memory from a host/device memory to a host/device memory using CUDA func...
Definition: copy.hpp:169
A wrapper template class around 2D row-major pitched memory stored in device memory (GPU memory)...
Definition: device_memory_2d.hpp:30
constexpr cudaMemcpyKind infer_copy_kind()
Infer the value of cudaMemcpyKind enum to be used with CUDA copying functions from the types of the m...
Definition: copy.hpp:32
A wrapper template class around a row-major matrix type stored in main memory (host memory)...
Definition: host_memory_2d.hpp:37
void copy2D(DstPitchedMemory2D &destination, const SrcPitchedMemory2D &source)
Copy a contiguous 2D pitched memory from a host/device memory to a host/device memory using CUDA func...
Definition: copy.hpp:131
Main namespace for bnmf-algs library.
Definition: alloc_model_funcs.hpp:12
void copy1D(DstMemory1D &destination, const SrcMemory1D &source)
Copy a contiguous 1D memory from a host/device memory to a host/device memory using CUDA function cud...
Definition: copy.hpp:94