OpenFPM  5.2.0
Project that contain the implementation of distributed structures
CudaMemory.cu
1 #include "config.h"
2 #include <cstddef>
3 #include "CudaMemory.cuh"
4 #include "cuda_macro.h"
5 #include "util/cuda_util.hpp"
6 #include <cstring>
7 
8 #define CUDA_EVENT 0x1201
9 
16 {
17  if (hm != NULL && dm != NULL)
18  {
20 
21  #ifdef __HIP__
22  CUDA_SAFE_CALL(hipMemcpy(dm,hm,sz+32,hipMemcpyHostToDevice));
23  #else
24  CUDA_SAFE_CALL(cudaMemcpy(dm,hm,sz+32,cudaMemcpyHostToDevice));
25  #endif
26  }
27 
28  return true;
29 }
30 
38 bool CudaMemory::allocate(size_t sz)
39 {
41  if (dm == NULL)
42  {
43  #ifdef __HIP__
44  CUDA_SAFE_CALL(hipMalloc(&dm,sz+32));
45  #elif defined(CUDIFY_USE_CUDA)
46  CUDA_SAFE_CALL(cudaMalloc(&dm,sz+32));
47  #else
48  if (sz != 0)
49  {
50  dm = new unsigned char[sz+32];
51  #ifdef GARBAGE_INJECTOR
52  memset(dm,0xFF,sz+32);
53  #endif
54  }
55  #endif
56  }
57  else
58  {
59  if (sz != this->sz)
60  {
61  std::cout << __FILE__ << ":" << __LINE__ << " error FATAL: using allocate to resize the memory, please use resize." << std::endl;
62  return false;
63  }
64  }
65 
66  this->sz = sz;
67 
68 #if defined(GARBAGE_INJECTOR) && !defined(CUDA_ON_CPU)
69  CUDA_SAFE_CALL(cudaMemset(dm,-1,sz))
70 #endif
71 
72  return true;
73 }
74 
81 {
82  if (dm != NULL)
83  {
85  #ifdef __HIP__
86  CUDA_SAFE_CALL(hipFree(dm));
87  #elif defined(CUDIFY_USE_CUDA)
88  CUDA_SAFE_CALL(cudaFree(dm));
89  #else
90  delete [] (unsigned char *)dm;
91  #endif
92  dm = NULL;
93  }
94 
95  if (hm != NULL)
96  {
98  #ifdef __HIP__
99  CUDA_SAFE_CALL(hipHostFree(hm));
100  #elif defined(CUDIFY_USE_CUDA)
101  CUDA_SAFE_CALL(cudaFreeHost(hm));
102  #else
103  delete [] (unsigned char *)hm;
104  #endif
105  hm = NULL;
106  }
107 
108  sz = 0;
109 }
110 
119 void CudaMemory::deviceToDevice(void * ptr, size_t start, size_t stop, size_t offset)
120 {
121  #ifdef __HIP__
122  CUDA_SAFE_CALL(hipMemcpy(((unsigned char *)dm)+offset,((unsigned char *)ptr)+start,(stop-start),hipMemcpyDeviceToDevice));
123  #else
124  CUDA_SAFE_CALL(cudaMemcpy(((unsigned char *)dm)+offset,((unsigned char *)ptr)+start,(stop-start),cudaMemcpyDeviceToDevice));
125  #endif
126 }
127 
133 void CudaMemory::allocate_host(size_t sz) const
134 {
135  if (hm == NULL)
136  {
137  #ifdef __HIP__
138  CUDA_SAFE_CALL(hipHostMalloc(&hm,sz+32,hipHostMallocMapped))
139  #elif defined(CUDIFY_USE_CUDA)
140  CUDA_SAFE_CALL(cudaHostAlloc(&hm,sz+32,cudaHostAllocMapped))
141  #else
142  hm = new unsigned char[sz+32];
143  #ifdef GARBAGE_INJECTOR
144  memset(hm,0xFF,sz+32);
145  #endif
146  #endif
147  }
148 }
149 
157 bool CudaMemory::copyFromPointer(const void * ptr)
158 {
159  // check if we have a host buffer, if not allocate it
160 
161  allocate_host(sz);
162 
163  // get the device pointer
164 
165  void * dvp;
166  #ifdef __HIP__
167  CUDA_SAFE_CALL(hipHostGetDevicePointer(&dvp,hm,0));
168  // memory copy
169  memcpy(dvp,ptr,sz+32);
170  #else
171  CUDA_SAFE_CALL(cudaHostGetDevicePointer(&dvp,hm,0));
172  // memory copy
173  memcpy(dvp,ptr,sz+32);
174  #endif
175 
176  return true;
177 }
178 
188 {
190 
191  if (m.sz > sz)
192  {
193  std::cerr << "Error " << __LINE__ << __FILE__ << ": source buffer is too big to copy";
194  return false;
195  }
196 
198  if (sz != 0)
199  {
200  #ifdef __HIP__
201  CUDA_SAFE_CALL(hipMemcpy(dm,m.dm,m.sz+32,hipMemcpyDeviceToDevice));
202  #else
203  CUDA_SAFE_CALL(cudaMemcpy(dm,m.dm,m.sz+32,cudaMemcpyDeviceToDevice));
204  #endif
205  }
206  return true;
207 }
208 
216 bool CudaMemory::copy(const memory & m)
217 {
219  const CudaMemory * ofpm = dynamic_cast<const CudaMemory *>(&m);
220 
222 
223  if (ofpm == NULL)
224  {
225  // copy the memory from device to host and from host to device
226 
227  return copyFromPointer(m.getPointer());
228  }
229  else
230  {
231  // they are the same memory type, use cuda/thrust buffer copy
232 
233  return copyDeviceToDevice(*ofpm);
234  }
235 }
236 
245 size_t CudaMemory::size() const
246 {
247  return sz;
248 }
249 
250 
261 bool CudaMemory::resize(size_t sz)
262 {
263  // if the allocated memory is enough, do not resize
264  if (sz <= CudaMemory::size())
265  {return true;}
266 
268 
269  if (CudaMemory::size() == 0)
270  {return allocate(sz);}
271 
273  void * thm = NULL;
274 
276  void * tdm = NULL;
277 
278  if (dm != NULL)
279  {
280  if (this->sz < sz)
281  {
282  #ifdef __HIP__
283  CUDA_SAFE_CALL(hipMalloc(&tdm,sz+32));
284  #elif defined(CUDIFY_USE_CUDA)
285  CUDA_SAFE_CALL(cudaMalloc(&tdm,sz+32));
286  #else
287  tdm = new unsigned char [sz+32];
288  #ifdef GARBAGE_INJECTOR
289  memset(tdm,0xFF,sz+32);
290  #endif
291  #endif
292 
293 #ifdef GARBAGE_INJECTOR
294  #ifdef __HIP__
295  CUDA_SAFE_CALL(hipMemset(tdm,-1,sz+32));
296  #elif defined(CUDIFY_USE_CUDA)
297  CUDA_SAFE_CALL(cudaMemset(tdm,-1,sz+32));
298  #endif
299 #endif
300  }
301 
303  #ifdef __HIP__
304  CUDA_SAFE_CALL(hipMemcpy(tdm,dm,CudaMemory::size(),hipMemcpyDeviceToDevice));
305  #else
306  CUDA_SAFE_CALL(cudaMemcpy(tdm,dm,CudaMemory::size(),cudaMemcpyDeviceToDevice));
307  #endif
308  }
309 
310  if (hm != NULL)
311  {
312  if (this->sz < sz)
313  {
314  #ifdef __HIP__
315  CUDA_SAFE_CALL(hipHostMalloc(&thm,sz+32,hipHostMallocMapped));
316  #elif defined(CUDIFY_USE_CUDA)
317  CUDA_SAFE_CALL(cudaHostAlloc(&thm,sz+32,cudaHostAllocMapped));
318  #else
319  thm = new unsigned char [sz+32];
320  #ifdef GARBAGE_INJECTOR
321  memset(thm,0xFF,sz+32);
322  #endif
323  #endif
324  }
325 
327  #ifdef __HIP__
328  CUDA_SAFE_CALL(hipMemcpy(thm,hm,CudaMemory::size(),hipMemcpyHostToHost));
329  #else
330  CUDA_SAFE_CALL(cudaMemcpy(thm,hm,CudaMemory::size(),cudaMemcpyHostToHost));
331  #endif
332  }
333 
335 
336  destroy();
337 
338  dm = tdm;
339  hm = thm;
340 
342 
343  this->sz = sz;
344 
345  return true;
346 }
347 
355 {
356  // allocate an host memory if not allocated
357  if (hm == NULL)
358  allocate_host(sz);
359 
360  return hm;
361 }
362 
370 {
371  // allocate an host memory if not allocated
372  if (hm == NULL)
373  allocate_host(sz);
374 
376  #ifdef __HIP__
377  CUDA_SAFE_CALL(hipMemcpy(hm,dm,sz+32,hipMemcpyDeviceToHost));
378  #else
379  CUDA_SAFE_CALL(cudaMemcpy(hm,dm,sz+32,cudaMemcpyDeviceToHost));
380  #endif
381 }
382 
389 {
390  // allocate an host memory if not allocated
391  if (mem.hm == NULL)
392  mem.allocate_host(sz);
393 
394  if (mem.sz > sz)
395  {resize(mem.sz);}
396 
397 
398  if (sz != 0)
399  {
401  #ifdef __HIP__
402  CUDA_SAFE_CALL(hipMemcpy(mem.hm,dm,mem.sz+32,hipMemcpyDeviceToHost));
403  #else
404  CUDA_SAFE_CALL(cudaMemcpy(mem.hm,dm,mem.sz+32,cudaMemcpyDeviceToHost));
405  #endif
406  }
407 }
408 
415 {
416  // allocate an host memory if not allocated
417  if (mem.hm == NULL)
418  mem.allocate_host(sz);
419 
420  if (mem.sz > sz)
421  {resize(mem.sz);}
422 
424  #ifdef __HIP__
425  CUDA_SAFE_CALL(hipMemcpy(dm,mem.hm,mem.sz+32,hipMemcpyHostToDevice));
426  #else
427  CUDA_SAFE_CALL(cudaMemcpy(dm,mem.hm,mem.sz+32,cudaMemcpyHostToDevice));
428  #endif
429 }
430 
431 void CudaMemory::hostToDevice(size_t start, size_t stop)
432 {
433  // allocate an host memory if not allocated
434  if (hm == NULL)
435  allocate_host(sz);
436 
438  #ifdef __HIP__
439  CUDA_SAFE_CALL(hipMemcpy(((unsigned char *)dm)+start,((unsigned char *)hm)+start,(stop-start),hipMemcpyHostToDevice));
440  #else
441  CUDA_SAFE_CALL(cudaMemcpy(((unsigned char *)dm)+start,((unsigned char *)hm)+start,(stop-start),cudaMemcpyHostToDevice));
442  #endif
443 }
444 
450 void CudaMemory::deviceToHost(size_t start, size_t stop)
451 {
452  // allocate an host memory if not allocated
453  if (hm == NULL)
454  allocate_host(sz);
455 
457  #ifdef __HIP__
458  CUDA_SAFE_CALL(hipMemcpy(((unsigned char *)hm)+start,((unsigned char *)dm)+start,(stop-start),hipMemcpyDeviceToHost));
459  #else
460  CUDA_SAFE_CALL(cudaMemcpy(((unsigned char *)hm)+start,((unsigned char *)dm)+start,(stop-start),cudaMemcpyDeviceToHost));
461  #endif
462 }
463 
464 
465 
472 const void * CudaMemory::getPointer() const
473 {
474  // allocate an host memory if not allocated
475  if (hm == NULL)
476  allocate_host(sz);
477 
478  return hm;
479 }
480 
485 void CudaMemory::fill(unsigned char c)
486 {
487  #ifdef __HIP__
488  CUDA_SAFE_CALL(hipMemset(dm,c,size()));
489  #elif defined(CUDIFY_USE_CUDA)
490  CUDA_SAFE_CALL(cudaMemset(dm,c,size()));
491  #else
492  memset(dm,c,size());
493  #endif
494  if (hm != NULL)
495  {memset(hm,c,size());}
496 }
497 
504 {
505  return dm;
506 }
507 
515 {
516  // allocate an host memory if not allocated
517  if (hm == NULL)
518  allocate_host(sz);
519 
521  #ifdef __HIP__
522  CUDA_SAFE_CALL(hipMemcpy(dm,hm,sz+32,hipMemcpyHostToDevice));
523  #else
524  CUDA_SAFE_CALL(cudaMemcpy(dm,hm,sz+32,cudaMemcpyHostToDevice));
525  #endif
526 }
527 
528 
535 {
536  size_t sz_tmp;
537  void * dm_tmp;
538 // long int ref_cnt_tmp;
539  bool is_hm_sync_tmp;
540  void * hm_tmp;
541 
542  hm_tmp = hm;
543  is_hm_sync_tmp = is_hm_sync;
544  sz_tmp = sz;
545  dm_tmp = dm;
546 // ref_cnt_tmp = ref_cnt;
547 
548  hm = mem.hm;
549  is_hm_sync = mem.is_hm_sync;
550  sz = mem.sz;
551  dm = mem.dm;
552  ref_cnt = mem.ref_cnt;
553 
554  mem.hm = hm_tmp;
555  mem.is_hm_sync = is_hm_sync_tmp;
556  mem.sz = sz_tmp;
557  mem.dm = dm_tmp;
558 // mem.ref_cnt = ref_cnt_tmp;
559 }
virtual void * getDevicePointer()
get a readable pointer with the data
Definition: CudaMemory.cu:503
void deviceToDevice(void *ptr, size_t start, size_t stop, size_t offset)
copy memory from device to device
Definition: CudaMemory.cu:119
virtual void deviceToHost()
Move memory from device to host.
Definition: CudaMemory.cu:369
virtual bool resize(size_t sz)
resize the momory allocated
Definition: CudaMemory.cu:261
virtual bool copy(const memory &m)
copy from a General device
Definition: CudaMemory.cu:216
virtual bool flush()
flush the memory
Definition: CudaMemory.cu:15
virtual void hostToDevice()
Move memory from host to device.
Definition: CudaMemory.cu:514
size_t ref_cnt
Reference counter.
Definition: CudaMemory.cuh:73
bool is_hm_sync
Is the host memory synchronized with the GPU memory.
Definition: CudaMemory.cuh:61
void * dm
device memory
Definition: CudaMemory.cuh:67
void * hm
host memory
Definition: CudaMemory.cuh:70
virtual size_t size() const
the the size of the allocated memory
Definition: CudaMemory.cu:245
bool copyFromPointer(const void *ptr)
copy from Pointer to GPU
Definition: CudaMemory.cu:157
virtual void fill(unsigned char c)
fill the buffer with a byte
Definition: CudaMemory.cu:485
void swap(CudaMemory &mem)
Swap the memory.
Definition: CudaMemory.cu:534
size_t sz
Size of the memory.
Definition: CudaMemory.cuh:64
virtual void * getPointer()
get a readable pointer with the data
Definition: CudaMemory.cu:354
void allocate_host(size_t sz) const
Allocate an host buffer.
Definition: CudaMemory.cu:133
bool copyDeviceToDevice(const CudaMemory &m)
copy from GPU to GPU buffer directly
Definition: CudaMemory.cu:187
virtual bool allocate(size_t sz)
allocate memory
Definition: CudaMemory.cu:38
virtual void destroy()
destroy memory
Definition: CudaMemory.cu:80