OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
CudaMemory.cu
1 #include "config.h"
2 #include <cstddef>
3 #include "CudaMemory.cuh"
4 #include "cuda_macro.h"
5 #include "util/cudify/cudify.hpp"
6 #include <cstring>
7 
8 #define CUDA_EVENT 0x1201
9 
16 {
17  if (hm != NULL && dm != NULL)
18  {
20 
21  #ifdef __HIP__
22  CUDA_SAFE_CALL(hipMemcpy(dm,hm,sz,hipMemcpyHostToDevice));
23  #else
24  CUDA_SAFE_CALL(cudaMemcpy(dm,hm,sz,cudaMemcpyHostToDevice));
25  #endif
26  }
27 
28  return true;
29 }
30 
38 bool CudaMemory::allocate(size_t sz)
39 {
41  if (dm == NULL)
42  {
43  #ifdef __HIP__
44  CUDA_SAFE_CALL(hipMalloc(&dm,sz));
45  #elif defined(CUDIFY_USE_CUDA)
46  CUDA_SAFE_CALL(cudaMalloc(&dm,sz));
47  #else
48  if (sz != 0)
49  {
50  dm = new unsigned char[sz];
51  #ifdef GARBAGE_INJECTOR
52  memset(dm,0xFF,sz);
53  #endif
54  }
55  #endif
56  }
57  else
58  {
59  if (sz != this->sz)
60  {
61  std::cout << __FILE__ << ":" << __LINE__ << " error FATAL: using allocate to resize the memory, please use resize." << std::endl;
62  return false;
63  }
64  }
65 
66  this->sz = sz;
67 
68 #if defined(GARBAGE_INJECTOR) && !defined(CUDA_ON_CPU)
69  CUDA_SAFE_CALL(cudaMemset(dm,-1,sz))
70 #endif
71 
72  return true;
73 }
74 
81 {
82  if (dm != NULL)
83  {
85  #ifdef __HIP__
86  CUDA_SAFE_CALL(hipFree(dm));
87  #elif defined(CUDIFY_USE_CUDA)
88  CUDA_SAFE_CALL(cudaFree(dm));
89  #else
90  delete [] (unsigned char *)dm;
91  #endif
92  dm = NULL;
93  }
94 
95  if (hm != NULL)
96  {
98  #ifdef __HIP__
99  CUDA_SAFE_CALL(hipHostFree(hm));
100  #elif defined(CUDIFY_USE_CUDA)
101  CUDA_SAFE_CALL(cudaFreeHost(hm));
102  #else
103  delete [] (unsigned char *)hm;
104  #endif
105  hm = NULL;
106  }
107 
108  sz = 0;
109 }
110 
119 void CudaMemory::deviceToDevice(void * ptr, size_t start, size_t stop, size_t offset)
120 {
121  #ifdef __HIP__
122  CUDA_SAFE_CALL(hipMemcpy(((unsigned char *)dm)+offset,((unsigned char *)ptr)+start,(stop-start),hipMemcpyDeviceToDevice));
123  #else
124  CUDA_SAFE_CALL(cudaMemcpy(((unsigned char *)dm)+offset,((unsigned char *)ptr)+start,(stop-start),cudaMemcpyDeviceToDevice));
125  #endif
126 }
127 
133 void CudaMemory::allocate_host(size_t sz) const
134 {
135  if (hm == NULL)
136  {
137  #ifdef __HIP__
138  CUDA_SAFE_CALL(hipHostMalloc(&hm,sz,hipHostMallocMapped))
139  #elif defined(CUDIFY_USE_CUDA)
140  CUDA_SAFE_CALL(cudaHostAlloc(&hm,sz,cudaHostAllocMapped))
141  #else
142  hm = new unsigned char[sz];
143  #ifdef GARBAGE_INJECTOR
144  memset(hm,0xFF,sz);
145  #endif
146  #endif
147  }
148 }
149 
157 bool CudaMemory::copyFromPointer(const void * ptr)
158 {
159  // check if we have a host buffer, if not allocate it
160 
161  allocate_host(sz);
162 
163  // get the device pointer
164 
165  void * dvp;
166  #ifdef __HIP__
167  CUDA_SAFE_CALL(hipHostGetDevicePointer(&dvp,hm,0));
168  // memory copy
169  memcpy(dvp,ptr,sz);
170  #else
171  CUDA_SAFE_CALL(cudaHostGetDevicePointer(&dvp,hm,0));
172  // memory copy
173  memcpy(dvp,ptr,sz);
174  #endif
175 
176  return true;
177 }
178 
188 {
190 
191  if (m.sz > sz)
192  {
193  std::cerr << "Error " << __LINE__ << __FILE__ << ": source buffer is too big to copy";
194  return false;
195  }
196 
198  #ifdef __HIP__
199  CUDA_SAFE_CALL(hipMemcpy(dm,m.dm,m.sz,hipMemcpyDeviceToDevice));
200  #else
201  CUDA_SAFE_CALL(cudaMemcpy(dm,m.dm,m.sz,cudaMemcpyDeviceToDevice));
202  #endif
203 
204  return true;
205 }
206 
214 bool CudaMemory::copy(const memory & m)
215 {
217  const CudaMemory * ofpm = dynamic_cast<const CudaMemory *>(&m);
218 
220 
221  if (ofpm == NULL)
222  {
223  // copy the memory from device to host and from host to device
224 
225  return copyFromPointer(m.getPointer());
226  }
227  else
228  {
229  // they are the same memory type, use cuda/thrust buffer copy
230 
231  return copyDeviceToDevice(*ofpm);
232  }
233 }
234 
243 size_t CudaMemory::size() const
244 {
245  return sz;
246 }
247 
248 
259 bool CudaMemory::resize(size_t sz)
260 {
261  // if the allocated memory is enough, do not resize
262  if (sz <= CudaMemory::size())
263  {return true;}
264 
266 
267  if (CudaMemory::size() == 0)
268  {return allocate(sz);}
269 
271  void * thm = NULL;
272 
274  void * tdm = NULL;
275 
276  if (dm != NULL)
277  {
278  if (this->sz < sz)
279  {
280  #ifdef __HIP__
281  CUDA_SAFE_CALL(hipMalloc(&tdm,sz));
282  #elif defined(CUDIFY_USE_CUDA)
283  CUDA_SAFE_CALL(cudaMalloc(&tdm,sz));
284  #else
285  tdm = new unsigned char [sz];
286  #ifdef GARBAGE_INJECTOR
287  memset(tdm,0xFF,sz);
288  #endif
289  #endif
290 
291 #ifdef GARBAGE_INJECTOR
292  #ifdef __HIP__
293  CUDA_SAFE_CALL(hipMemset(tdm,-1,sz));
294  #elif defined(CUDIFY_USE_CUDA)
295  CUDA_SAFE_CALL(cudaMemset(tdm,-1,sz));
296  #endif
297 #endif
298  }
299 
301  #ifdef __HIP__
302  CUDA_SAFE_CALL(hipMemcpy(tdm,dm,CudaMemory::size(),hipMemcpyDeviceToDevice));
303  #else
304  CUDA_SAFE_CALL(cudaMemcpy(tdm,dm,CudaMemory::size(),cudaMemcpyDeviceToDevice));
305  #endif
306  }
307 
308  if (hm != NULL)
309  {
310  if (this->sz < sz)
311  {
312  #ifdef __HIP__
313  CUDA_SAFE_CALL(hipHostMalloc(&thm,sz,hipHostMallocMapped));
314  #elif defined(CUDIFY_USE_CUDA)
315  CUDA_SAFE_CALL(cudaHostAlloc(&thm,sz,cudaHostAllocMapped));
316  #else
317  thm = new unsigned char [sz];
318  #ifdef GARBAGE_INJECTOR
319  memset(thm,0xFF,sz);
320  #endif
321  #endif
322  }
323 
325  #ifdef __HIP__
326  CUDA_SAFE_CALL(hipMemcpy(thm,hm,CudaMemory::size(),hipMemcpyHostToHost));
327  #else
328  CUDA_SAFE_CALL(cudaMemcpy(thm,hm,CudaMemory::size(),cudaMemcpyHostToHost));
329  #endif
330  }
331 
333 
334  destroy();
335 
336  dm = tdm;
337  hm = thm;
338 
340 
341  this->sz = sz;
342 
343  return true;
344 }
345 
353 {
354  // allocate an host memory if not allocated
355  if (hm == NULL)
356  allocate_host(sz);
357 
358  return hm;
359 }
360 
368 {
369  // allocate an host memory if not allocated
370  if (hm == NULL)
371  allocate_host(sz);
372 
374  #ifdef __HIP__
375  CUDA_SAFE_CALL(hipMemcpy(hm,dm,sz,hipMemcpyDeviceToHost));
376  #else
377  CUDA_SAFE_CALL(cudaMemcpy(hm,dm,sz,cudaMemcpyDeviceToHost));
378  #endif
379 }
380 
387 {
388  // allocate an host memory if not allocated
389  if (mem.hm == NULL)
390  mem.allocate_host(sz);
391 
392  if (mem.sz > sz)
393  {resize(mem.sz);}
394 
396  #ifdef __HIP__
397  CUDA_SAFE_CALL(hipMemcpy(mem.hm,dm,mem.sz,hipMemcpyDeviceToHost));
398  #else
399  CUDA_SAFE_CALL(cudaMemcpy(mem.hm,dm,mem.sz,cudaMemcpyDeviceToHost));
400  #endif
401 }
402 
409 {
410  // allocate an host memory if not allocated
411  if (mem.hm == NULL)
412  mem.allocate_host(sz);
413 
414  if (mem.sz > sz)
415  {resize(mem.sz);}
416 
418  #ifdef __HIP__
419  CUDA_SAFE_CALL(hipMemcpy(dm,mem.hm,mem.sz,hipMemcpyHostToDevice));
420  #else
421  CUDA_SAFE_CALL(cudaMemcpy(dm,mem.hm,mem.sz,cudaMemcpyHostToDevice));
422  #endif
423 }
424 
425 void CudaMemory::hostToDevice(size_t start, size_t stop)
426 {
427  // allocate an host memory if not allocated
428  if (hm == NULL)
429  allocate_host(sz);
430 
432  #ifdef __HIP__
433  CUDA_SAFE_CALL(hipMemcpy(((unsigned char *)dm)+start,((unsigned char *)hm)+start,(stop-start),hipMemcpyHostToDevice));
434  #else
435  CUDA_SAFE_CALL(cudaMemcpy(((unsigned char *)dm)+start,((unsigned char *)hm)+start,(stop-start),cudaMemcpyHostToDevice));
436  #endif
437 }
438 
444 void CudaMemory::deviceToHost(size_t start, size_t stop)
445 {
446  // allocate an host memory if not allocated
447  if (hm == NULL)
448  allocate_host(sz);
449 
451  #ifdef __HIP__
452  CUDA_SAFE_CALL(hipMemcpy(((unsigned char *)hm)+start,((unsigned char *)dm)+start,(stop-start),hipMemcpyDeviceToHost));
453  #else
454  CUDA_SAFE_CALL(cudaMemcpy(((unsigned char *)hm)+start,((unsigned char *)dm)+start,(stop-start),cudaMemcpyDeviceToHost));
455  #endif
456 }
457 
458 
459 
466 const void * CudaMemory::getPointer() const
467 {
468  // allocate an host memory if not allocated
469  if (hm == NULL)
470  allocate_host(sz);
471 
472  return hm;
473 }
474 
479 void CudaMemory::fill(unsigned char c)
480 {
481  #ifdef __HIP__
482  CUDA_SAFE_CALL(hipMemset(dm,c,size()));
483  #elif defined(CUDIFY_USE_CUDA)
484  CUDA_SAFE_CALL(cudaMemset(dm,c,size()));
485  #else
486  memset(dm,c,size());
487  #endif
488  if (hm != NULL)
489  {memset(hm,c,size());}
490 }
491 
498 {
499  return dm;
500 }
501 
509 {
510  // allocate an host memory if not allocated
511  if (hm == NULL)
512  allocate_host(sz);
513 
515  #ifdef __HIP__
516  CUDA_SAFE_CALL(hipMemcpy(dm,hm,sz,hipMemcpyHostToDevice));
517  #else
518  CUDA_SAFE_CALL(cudaMemcpy(dm,hm,sz,cudaMemcpyHostToDevice));
519  #endif
520 }
521 
522 
529 {
530  size_t sz_tmp;
531  void * dm_tmp;
532 // long int ref_cnt_tmp;
533  bool is_hm_sync_tmp;
534  void * hm_tmp;
535 
536  hm_tmp = hm;
537  is_hm_sync_tmp = is_hm_sync;
538  sz_tmp = sz;
539  dm_tmp = dm;
540 // ref_cnt_tmp = ref_cnt;
541 
542  hm = mem.hm;
543  is_hm_sync = mem.is_hm_sync;
544  sz = mem.sz;
545  dm = mem.dm;
546  ref_cnt = mem.ref_cnt;
547 
548  mem.hm = hm_tmp;
549  mem.is_hm_sync = is_hm_sync_tmp;
550  mem.sz = sz_tmp;
551  mem.dm = dm_tmp;
552 // mem.ref_cnt = ref_cnt_tmp;
553 }
void allocate_host(size_t sz) const
Allocate an host buffer.
Definition: CudaMemory.cu:133
bool copyFromPointer(const void *ptr)
copy from Pointer to GPU
Definition: CudaMemory.cu:157
virtual bool allocate(size_t sz)
allocate memory
Definition: CudaMemory.cu:38
void * hm
host memory
Definition: CudaMemory.cuh:70
virtual void hostToDevice()
Move memory from host to device.
Definition: CudaMemory.cu:508
virtual void * getPointer()
get a readable pointer with the data
Definition: CudaMemory.cu:352
void swap(CudaMemory &mem)
Swap the memory.
Definition: CudaMemory.cu:528
size_t ref_cnt
Reference counter.
Definition: CudaMemory.cuh:73
virtual void fill(unsigned char c)
fill the buffer with a byte
Definition: CudaMemory.cu:479
virtual void * getDevicePointer()
get a readable pointer with the data
Definition: CudaMemory.cu:497
void deviceToDevice(void *ptr, size_t start, size_t stop, size_t offset)
copy memory from device to device
Definition: CudaMemory.cu:119
virtual bool copy(const memory &m)
copy from a General device
Definition: CudaMemory.cu:214
virtual bool flush()
flush the memory
Definition: CudaMemory.cu:15
size_t sz
Size of the memory.
Definition: CudaMemory.cuh:64
virtual bool resize(size_t sz)
resize the momory allocated
Definition: CudaMemory.cu:259
virtual size_t size() const
the the size of the allocated memory
Definition: CudaMemory.cu:243
void * dm
device memory
Definition: CudaMemory.cuh:67
bool copyDeviceToDevice(const CudaMemory &m)
copy from GPU to GPU buffer directly
Definition: CudaMemory.cu:187
virtual void deviceToHost()
Move memory from device to host.
Definition: CudaMemory.cu:367
bool is_hm_sync
Is the host memory synchronized with the GPU memory.
Definition: CudaMemory.cuh:61
virtual void * getPointer()=0
return a data pointer
virtual void destroy()
destroy memory
Definition: CudaMemory.cu:80