OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
CudaMemory.cu
1#include "config.h"
2#include <cstddef>
3#include "CudaMemory.cuh"
4#include "cuda_macro.h"
5#include "util/cudify/cudify.hpp"
6#include <cstring>
7
8#define CUDA_EVENT 0x1201
9
16{
17 if (hm != NULL && dm != NULL)
18 {
20
21 #ifdef __HIP__
22 CUDA_SAFE_CALL(hipMemcpy(dm,hm,sz+32,hipMemcpyHostToDevice));
23 #else
24 CUDA_SAFE_CALL(cudaMemcpy(dm,hm,sz+32,cudaMemcpyHostToDevice));
25 #endif
26 }
27
28 return true;
29}
30
38bool CudaMemory::allocate(size_t sz)
39{
41 if (dm == NULL)
42 {
43 #ifdef __HIP__
44 CUDA_SAFE_CALL(hipMalloc(&dm,sz+32));
45 #elif defined(CUDIFY_USE_CUDA)
46 CUDA_SAFE_CALL(cudaMalloc(&dm,sz+32));
47 #else
48 if (sz != 0)
49 {
50 dm = new unsigned char[sz+32];
51 #ifdef GARBAGE_INJECTOR
52 memset(dm,0xFF,sz+32);
53 #endif
54 }
55 #endif
56 }
57 else
58 {
59 if (sz != this->sz)
60 {
61 std::cout << __FILE__ << ":" << __LINE__ << " error FATAL: using allocate to resize the memory, please use resize." << std::endl;
62 return false;
63 }
64 }
65
66 this->sz = sz;
67
68#if defined(GARBAGE_INJECTOR) && !defined(CUDA_ON_CPU)
69 CUDA_SAFE_CALL(cudaMemset(dm,-1,sz))
70#endif
71
72 return true;
73}
74
81{
82 if (dm != NULL)
83 {
85 #ifdef __HIP__
86 CUDA_SAFE_CALL(hipFree(dm));
87 #elif defined(CUDIFY_USE_CUDA)
88 CUDA_SAFE_CALL(cudaFree(dm));
89 #else
90 delete [] (unsigned char *)dm;
91 #endif
92 dm = NULL;
93 }
94
95 if (hm != NULL)
96 {
98 #ifdef __HIP__
99 CUDA_SAFE_CALL(hipHostFree(hm));
100 #elif defined(CUDIFY_USE_CUDA)
101 CUDA_SAFE_CALL(cudaFreeHost(hm));
102 #else
103 delete [] (unsigned char *)hm;
104 #endif
105 hm = NULL;
106 }
107
108 sz = 0;
109}
110
119void CudaMemory::deviceToDevice(void * ptr, size_t start, size_t stop, size_t offset)
120{
121 #ifdef __HIP__
122 CUDA_SAFE_CALL(hipMemcpy(((unsigned char *)dm)+offset,((unsigned char *)ptr)+start,(stop-start),hipMemcpyDeviceToDevice));
123 #else
124 CUDA_SAFE_CALL(cudaMemcpy(((unsigned char *)dm)+offset,((unsigned char *)ptr)+start,(stop-start),cudaMemcpyDeviceToDevice));
125 #endif
126}
127
133void CudaMemory::allocate_host(size_t sz) const
134{
135 if (hm == NULL)
136 {
137 #ifdef __HIP__
138 CUDA_SAFE_CALL(hipHostMalloc(&hm,sz+32,hipHostMallocMapped))
139 #elif defined(CUDIFY_USE_CUDA)
140 CUDA_SAFE_CALL(cudaHostAlloc(&hm,sz+32,cudaHostAllocMapped))
141 #else
142 hm = new unsigned char[sz+32];
143 #ifdef GARBAGE_INJECTOR
144 memset(hm,0xFF,sz+32);
145 #endif
146 #endif
147 }
148}
149
157bool CudaMemory::copyFromPointer(const void * ptr)
158{
159 // check if we have a host buffer, if not allocate it
160
162
163 // get the device pointer
164
165 void * dvp;
166 #ifdef __HIP__
167 CUDA_SAFE_CALL(hipHostGetDevicePointer(&dvp,hm,0));
168 // memory copy
169 memcpy(dvp,ptr,sz+32);
170 #else
171 CUDA_SAFE_CALL(cudaHostGetDevicePointer(&dvp,hm,0));
172 // memory copy
173 memcpy(dvp,ptr,sz+32);
174 #endif
175
176 return true;
177}
178
188{
190
191 if (m.sz > sz)
192 {
193 std::cerr << "Error " << __LINE__ << __FILE__ << ": source buffer is too big to copy";
194 return false;
195 }
196
198 if (sz != 0)
199 {
200 #ifdef __HIP__
201 CUDA_SAFE_CALL(hipMemcpy(dm,m.dm,m.sz+32,hipMemcpyDeviceToDevice));
202 #else
203 CUDA_SAFE_CALL(cudaMemcpy(dm,m.dm,m.sz+32,cudaMemcpyDeviceToDevice));
204 #endif
205 }
206 return true;
207}
208
217{
219 const CudaMemory * ofpm = dynamic_cast<const CudaMemory *>(&m);
220
222
223 if (ofpm == NULL)
224 {
225 // copy the memory from device to host and from host to device
226
227 return copyFromPointer(m.getPointer());
228 }
229 else
230 {
231 // they are the same memory type, use cuda/thrust buffer copy
232
233 return copyDeviceToDevice(*ofpm);
234 }
235}
236
245size_t CudaMemory::size() const
246{
247 return sz;
248}
249
250
261bool CudaMemory::resize(size_t sz)
262{
263 // if the allocated memory is enough, do not resize
264 if (sz <= CudaMemory::size())
265 {return true;}
266
268
269 if (CudaMemory::size() == 0)
270 {return allocate(sz);}
271
273 void * thm = NULL;
274
276 void * tdm = NULL;
277
278 if (dm != NULL)
279 {
280 if (this->sz < sz)
281 {
282 #ifdef __HIP__
283 CUDA_SAFE_CALL(hipMalloc(&tdm,sz+32));
284 #elif defined(CUDIFY_USE_CUDA)
285 CUDA_SAFE_CALL(cudaMalloc(&tdm,sz+32));
286 #else
287 tdm = new unsigned char [sz+32];
288 #ifdef GARBAGE_INJECTOR
289 memset(tdm,0xFF,sz+32);
290 #endif
291 #endif
292
293#ifdef GARBAGE_INJECTOR
294 #ifdef __HIP__
295 CUDA_SAFE_CALL(hipMemset(tdm,-1,sz+32));
296 #elif defined(CUDIFY_USE_CUDA)
297 CUDA_SAFE_CALL(cudaMemset(tdm,-1,sz+32));
298 #endif
299#endif
300 }
301
303 #ifdef __HIP__
304 CUDA_SAFE_CALL(hipMemcpy(tdm,dm,CudaMemory::size(),hipMemcpyDeviceToDevice));
305 #else
306 CUDA_SAFE_CALL(cudaMemcpy(tdm,dm,CudaMemory::size(),cudaMemcpyDeviceToDevice));
307 #endif
308 }
309
310 if (hm != NULL)
311 {
312 if (this->sz < sz)
313 {
314 #ifdef __HIP__
315 CUDA_SAFE_CALL(hipHostMalloc(&thm,sz+32,hipHostMallocMapped));
316 #elif defined(CUDIFY_USE_CUDA)
317 CUDA_SAFE_CALL(cudaHostAlloc(&thm,sz+32,cudaHostAllocMapped));
318 #else
319 thm = new unsigned char [sz+32];
320 #ifdef GARBAGE_INJECTOR
321 memset(thm,0xFF,sz+32);
322 #endif
323 #endif
324 }
325
327 #ifdef __HIP__
328 CUDA_SAFE_CALL(hipMemcpy(thm,hm,CudaMemory::size(),hipMemcpyHostToHost));
329 #else
330 CUDA_SAFE_CALL(cudaMemcpy(thm,hm,CudaMemory::size(),cudaMemcpyHostToHost));
331 #endif
332 }
333
335
336 destroy();
337
338 dm = tdm;
339 hm = thm;
340
342
343 this->sz = sz;
344
345 return true;
346}
347
355{
356 // allocate an host memory if not allocated
357 if (hm == NULL)
359
360 return hm;
361}
362
370{
371 // allocate an host memory if not allocated
372 if (hm == NULL)
374
376 #ifdef __HIP__
377 CUDA_SAFE_CALL(hipMemcpy(hm,dm,sz+32,hipMemcpyDeviceToHost));
378 #else
379 CUDA_SAFE_CALL(cudaMemcpy(hm,dm,sz+32,cudaMemcpyDeviceToHost));
380 #endif
381}
382
389{
390 // allocate an host memory if not allocated
391 if (mem.hm == NULL)
392 mem.allocate_host(sz);
393
394 if (mem.sz > sz)
395 {resize(mem.sz);}
396
397
398 if (sz != 0)
399 {
401 #ifdef __HIP__
402 CUDA_SAFE_CALL(hipMemcpy(mem.hm,dm,mem.sz+32,hipMemcpyDeviceToHost));
403 #else
404 CUDA_SAFE_CALL(cudaMemcpy(mem.hm,dm,mem.sz+32,cudaMemcpyDeviceToHost));
405 #endif
406 }
407}
408
415{
416 // allocate an host memory if not allocated
417 if (mem.hm == NULL)
418 mem.allocate_host(sz);
419
420 if (mem.sz > sz)
421 {resize(mem.sz);}
422
424 #ifdef __HIP__
425 CUDA_SAFE_CALL(hipMemcpy(dm,mem.hm,mem.sz+32,hipMemcpyHostToDevice));
426 #else
427 CUDA_SAFE_CALL(cudaMemcpy(dm,mem.hm,mem.sz+32,cudaMemcpyHostToDevice));
428 #endif
429}
430
431void CudaMemory::hostToDevice(size_t start, size_t stop)
432{
433 // allocate an host memory if not allocated
434 if (hm == NULL)
436
438 #ifdef __HIP__
439 CUDA_SAFE_CALL(hipMemcpy(((unsigned char *)dm)+start,((unsigned char *)hm)+start,(stop-start),hipMemcpyHostToDevice));
440 #else
441 CUDA_SAFE_CALL(cudaMemcpy(((unsigned char *)dm)+start,((unsigned char *)hm)+start,(stop-start),cudaMemcpyHostToDevice));
442 #endif
443}
444
450void CudaMemory::deviceToHost(size_t start, size_t stop)
451{
452 // allocate an host memory if not allocated
453 if (hm == NULL)
455
457 #ifdef __HIP__
458 CUDA_SAFE_CALL(hipMemcpy(((unsigned char *)hm)+start,((unsigned char *)dm)+start,(stop-start),hipMemcpyDeviceToHost));
459 #else
460 CUDA_SAFE_CALL(cudaMemcpy(((unsigned char *)hm)+start,((unsigned char *)dm)+start,(stop-start),cudaMemcpyDeviceToHost));
461 #endif
462}
463
464
465
472const void * CudaMemory::getPointer() const
473{
474 // allocate an host memory if not allocated
475 if (hm == NULL)
477
478 return hm;
479}
480
485void CudaMemory::fill(unsigned char c)
486{
487 #ifdef __HIP__
488 CUDA_SAFE_CALL(hipMemset(dm,c,size()));
489 #elif defined(CUDIFY_USE_CUDA)
490 CUDA_SAFE_CALL(cudaMemset(dm,c,size()));
491 #else
492 memset(dm,c,size());
493 #endif
494 if (hm != NULL)
495 {memset(hm,c,size());}
496}
497
504{
505 return dm;
506}
507
515{
516 // allocate an host memory if not allocated
517 if (hm == NULL)
519
521 #ifdef __HIP__
522 CUDA_SAFE_CALL(hipMemcpy(dm,hm,sz+32,hipMemcpyHostToDevice));
523 #else
524 CUDA_SAFE_CALL(cudaMemcpy(dm,hm,sz+32,cudaMemcpyHostToDevice));
525 #endif
526}
527
528
535{
536 size_t sz_tmp;
537 void * dm_tmp;
538// long int ref_cnt_tmp;
539 bool is_hm_sync_tmp;
540 void * hm_tmp;
541
542 hm_tmp = hm;
543 is_hm_sync_tmp = is_hm_sync;
544 sz_tmp = sz;
545 dm_tmp = dm;
546// ref_cnt_tmp = ref_cnt;
547
548 hm = mem.hm;
550 sz = mem.sz;
551 dm = mem.dm;
552 ref_cnt = mem.ref_cnt;
553
554 mem.hm = hm_tmp;
555 mem.is_hm_sync = is_hm_sync_tmp;
556 mem.sz = sz_tmp;
557 mem.dm = dm_tmp;
558// mem.ref_cnt = ref_cnt_tmp;
559}
void deviceToDevice(void *ptr, size_t start, size_t stop, size_t offset)
copy memory from device to device
virtual bool resize(size_t sz)
resize the momory allocated
size_t ref_cnt
Reference counter.
bool is_hm_sync
Is the host memory synchronized with the GPU memory.
void * dm
device memory
void * hm
host memory
virtual size_t size() const
the the size of the allocated memory
bool copyFromPointer(const void *ptr)
copy from Pointer to GPU
size_t sz
Size of the memory.
void allocate_host(size_t sz) const
Allocate an host buffer.
bool copyDeviceToDevice(const CudaMemory &m)
copy from GPU to GPU buffer directly
virtual void destroy()
destroy memory
Definition CudaMemory.cu:80
virtual bool flush()
flush the memory
virtual bool allocate(size_t sz)
allocate memory
bool copyFromPointer(const void *ptr, size_t sz)
copy from Pointer to Heap
virtual void fill(unsigned char c)
fill host and device memory with the selected byte
virtual bool resize(size_t sz)
resize the memory allocated
virtual void * getPointer()
get a readable pointer with the data
virtual size_t size() const
the the size of the allocated memory
virtual void deviceToHost()
Do nothing.
virtual bool copy(const memory &m)
copy memory
void swap(HeapMemory &mem)
Swap the memory.
virtual void hostToDevice()
Do nothing.
virtual void * getDevicePointer()
get a device pointer for HeapMemory getPointer and getDevicePointer are equivalents
virtual void destroy()
destroy memory
bool copyDeviceToDevice(const HeapMemory &m)
copy from same Heap to Heap
virtual void * getPointer()=0
return a data pointer
this class is a functor for "for_each" algorithm