OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
map_vector_sparse_cuda_ker.cuh
1/*
2 * map_vector_sparse_cuda_ker.cuh
3 *
4 * Created on: Jan 23, 2019
5 * Author: i-bird
6 */
7
8#ifndef MAP_VECTOR_SPARSE_CUDA_KER_CUH_
9#define MAP_VECTOR_SPARSE_CUDA_KER_CUH_
10
11#include "util/for_each_ref.hpp"
12
13//todo: Check where it's a good place to put the following method...
14template<typename dim3Ta, typename dim3Tb>
15inline __device__ __host__ int dim3CoordToInt(const dim3Ta & coord, const dim3Tb & dimensions)
16{
17 int res = coord.z;
18 res *= dimensions.y;
19 res += coord.y;
20 res *= dimensions.x;
21 res += coord.x;
22 return res;
23}
24// Specialization allowing transparency
25inline __device__ __host__ int dim3CoordToInt(int coord, int dimension)
26{
27 return coord;
28}
29
30namespace openfpm
31{
32 template<typename index_type>
34 {
35 index_type id;
36 };
37
38#if defined(__NVCC__) && !defined(CUDA_ON_CPU)
39 static __shared__ int vct_atomic_add;
40 static __shared__ int vct_atomic_rem;
41#endif
42
43 template<typename T,
44 typename Ti,
45 template<typename> class layout_base>
47 {
48 vector_gpu_ker<aggregate<Ti>,layout_base> vct_index;
49
51
52 vector_gpu_ker<aggregate<Ti>,layout_base> vct_add_index;
53
54 vector_gpu_ker<aggregate<Ti>,layout_base> vct_rem_index;
55
56 vector_gpu_ker<aggregate<Ti>,layout_base> vct_nadd_index;
57
58 vector_gpu_ker<aggregate<Ti>,layout_base> vct_nrem_index;
59
61
62 // the const is forced by the getter that only return const encap that should not allow the modification of bck
63 // this should possible avoid to define an object const_encap
64 //mutable vector_gpu_ker<T,layout_base> vct_data_bck;
65
66 int nslot_add;
67 int nslot_rem;
68
75 inline __device__ void _branchfree_search(Ti x, Ti & id) const
76 {
77 if (vct_index.size() == 0) {id = 0; return;}
78 const Ti *base = &vct_index.template get<0>(0);
79 const Ti *end = (const Ti *)vct_index.template getPointer<0>() + vct_index.size();
80 Ti n = vct_data.size()-1;
81 while (n > 1)
82 {
83 Ti half = n / 2;
84 base = (base[half] < x) ? base+half : base;
85 n -= half;
86 }
87
88 int off = (*base < x);
89 id = base - &vct_index.template get<0>(0) + off;
90 Ti v = (base + off != end)?*(base + off):(Ti)-1;
91 id = (x == v)?id:vct_data.size()-1;
92 }
93
94 public:
95
96 typedef Ti index_type;
97
100
103 vector_gpu_ker<aggregate<Ti>,layout_base> vct_add_index,
104 vector_gpu_ker<aggregate<Ti>,layout_base> vct_rem_index,
106 vector_gpu_ker<aggregate<Ti>,layout_base> vct_nadd_index,
107 vector_gpu_ker<aggregate<Ti>,layout_base> vct_nrem_index,
108 int nslot_add,
109 int nslot_rem)
110 :vct_index(vct_index),vct_data(vct_data),
111 vct_add_index(vct_add_index),vct_rem_index(vct_rem_index),vct_add_data(vct_add_data),
112 vct_nadd_index(vct_nadd_index),vct_nrem_index(vct_nrem_index),
113 nslot_add(nslot_add),nslot_rem(nslot_rem)
114 {}
115
121 __device__ inline int size()
122 {
123 return vct_index.size();
124 }
125
129 __device__ inline void init()
130 {
131#ifdef __NVCC__
132 if (threadIdx.x == 0)
133 {
134 vct_atomic_add = 0;
135 vct_atomic_rem = 0;
136 }
137
138 __syncthreads();
139#endif
140 }
141
145 __device__ inline void init_ins_inc()
146 {
147#ifdef __NVCC__
148 if (threadIdx.x == 0)
149 {
150 int blockId = dim3CoordToInt(blockIdx, gridDim);
151 vct_atomic_add = vct_nadd_index.template get<0>(blockId);
152 }
153
154 __syncthreads();
155#endif
156 }
157
161 __device__ inline void init_rem_inc()
162 {
163#ifdef __NVCC__
164 if (threadIdx.x == 0)
165 {
166 int blockId = dim3CoordToInt(blockIdx, gridDim);
167 vct_atomic_rem = vct_nrem_index.template get<0>(blockId);
168 }
169
170 __syncthreads();
171#endif
172 }
173
185 __device__ inline openfpm::sparse_index<Ti> get_sparse(Ti id) const
186 {
187 Ti di;
188 this->_branchfree_search(id,di);
190 sid.id = di;
191
192 return sid;
193 }
194
197 template <unsigned int p>
198 __device__ inline auto getBackground() const -> decltype(vct_data.template get<p>(0)) &
199 {
200 return vct_data.template get<p>(vct_data.size()-1);
201 }
202
213 template <unsigned int p>
214 __device__ inline auto get(Ti id) const -> decltype(vct_data.template get<p>(id))
215 {
216 Ti di;
217 this->_branchfree_search(id,di);
218 return vct_data.template get<p>(di);
219 }
220
221 __device__ inline auto get(Ti id) const -> decltype(vct_data.get(0))
222 {
223 Ti di;
224 Ti v = this->_branchfree_search(id,di);
225 return vct_data.get(static_cast<size_t>(di));
226 }
227
238 template <unsigned int p>
239 __device__ inline auto get(openfpm::sparse_index<Ti> id) const -> decltype(vct_data.template get<p>(id.id))
240 {
241 return vct_data.template get<p>(id.id);
242 }
243
254 template <unsigned int p>
255 __device__ inline auto get(openfpm::sparse_index<Ti> id) -> decltype(vct_data.template get<p>(id.id))
256 {
257 return vct_data.template get<p>(id.id);
258 }
259
266 __device__ inline Ti get_index(openfpm::sparse_index<Ti> id) const
267 {
268 return vct_index.template get<0>(id.id);
269 }
270
281 template <unsigned int p>
282 __device__ inline auto get(Ti id, Ti & di) const -> decltype(vct_data.template get<p>(id))
283 {
284 this->_branchfree_search(id,di);
285 return vct_data.template get<p>(di);
286 }
287
298 template <unsigned int p>
299 __device__ inline auto get_ele(Ti di) const -> decltype(vct_data.template get<p>(di))
300 {
301 return vct_data.template get<p>(di);
302 }
303
308 template <unsigned int p>
309 __device__ auto insert(Ti ele) -> decltype(vct_data.template get<p>(0))
310 {
311#ifdef __NVCC__
312
313 int blockId = dim3CoordToInt(blockIdx, gridDim);
314 int slot_base = blockId;
315
316 int pos = atomicAdd(&vct_atomic_add,1);
317 vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele;
318 return vct_add_data.template get<p>(slot_base*nslot_add+pos);
319#else
320
321 printf("vector_sparse_gpu_ker.insert[1]: Error, this function in order to work is supposed to be compiled with nvcc\n");
322
323#endif
324 }
325
333 __device__ void remove(Ti ele)
334 {
335#ifdef __NVCC__
336
337 int blockId = dim3CoordToInt(blockIdx, gridDim);
338 int slot_base = blockId;
339
340 int pos = atomicAdd(&vct_atomic_rem,1);
341 vct_rem_index.template get<0>(slot_base*nslot_rem+pos) = ele;
342
343#else
344 printf("vector_sparse_gpu_ker.remove: Error, this function in order to work is supposed to be compiled with nvcc\n");
345#endif
346 }
347
355 __device__ auto insert(Ti ele) -> decltype(vct_add_data.get(0))
356 {
357#ifdef __NVCC__
358
359 int blockId = dim3CoordToInt(blockIdx, gridDim);
360 int slot_base = blockId;
361
362 int pos = atomicAdd(&vct_atomic_add,1);
363 vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele;
364
365 return vct_add_data.get(slot_base*nslot_add+pos);
366#else
367 printf("vector_sparse_gpu_ker.insert[2]: Error, this function in order to work is supposed to be compiled with nvcc\n");
368#endif
369 }
370
375 __device__ void remove_b(Ti ele,Ti slot_base)
376 {
377#ifdef __NVCC__
378
379 int pos = atomicAdd(&vct_atomic_rem,1);
380 vct_rem_index.template get<0>(slot_base*nslot_rem+pos) = ele;
381
382#else
383 printf("vector_sparse_gpu_ker.remove_b: Error, this function in order to work is supposed to be compiled with nvcc\n");
384#endif
385 }
386
391 template <unsigned int p>
392 __device__ auto insert_b(Ti ele,Ti slot_base) -> decltype(vct_data.template get<p>(0))
393 {
394#ifdef __NVCC__
395
396 int pos = atomicAdd(&vct_atomic_add,1);
397 vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele;
398 return vct_add_data.template get<p>(slot_base*nslot_add+pos);
399#else
400 printf("vector_sparse_gpu_ker.insert_b: Error, this function in order to work is supposed to be compiled with nvcc\n");
401#endif
402 }
403
408 __device__ auto insert_b(Ti ele,Ti slot_base) -> decltype(vct_add_data.get(0))
409 {
410#ifdef __NVCC__
411
412 int pos = atomicAdd(&vct_atomic_add,1);
413 vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele;
414 return vct_add_data.get(slot_base*nslot_add+pos);
415#else
416 printf("vector_sparse_gpu_ker.insert_b: Error, this function in order to work is supposed to be compiled with nvcc\n");
417#endif
418 }
419
424 __device__ void flush_block_insert()
425 {
426#ifdef __NVCC__
427
428 __syncthreads();
429
430 if (threadIdx.x == 0)
431 {
432 int blockId = dim3CoordToInt(blockIdx, gridDim);
433 vct_nadd_index.template get<0>(blockId) = vct_atomic_add;
434 }
435
436#else
437 printf("vector_sparse_gpu_ker.flush_block_insert: Error, this function in order to work is supposed to be compiled with nvcc\n");
438#endif
439 }
440
445 __device__ void flush_block_remove()
446 {
447#ifdef __NVCC__
448
449 __syncthreads();
450
451 if (threadIdx.x == 0)
452 {
453 int blockId = dim3CoordToInt(blockIdx, gridDim);
454 vct_nrem_index.template get<0>(blockId) = vct_atomic_rem;
455 }
456
457#else
458 printf("vector_sparse_gpu_ker.flush_block_remove: Error, this function in order to work is supposed to be compiled with nvcc\n");
459#endif
460 }
461
462 auto & private_get_vct_nadd_index()
463 {
464 return vct_nadd_index;
465 }
466
471 __device__ void flush_block_insert(Ti b, bool flusher)
472 {
473#ifdef __NVCC__
474
475 __syncthreads();
476
477 if (flusher == true)
478 {vct_nadd_index.template get<0>(b) = vct_atomic_add;}
479
480
481#else
482 printf("vector_sparse_gpu_ker.flush_block_insert: Error, this function in order to work is supposed to be compiled with nvcc\n");
483#endif
484 }
485
486 __device__ auto private_get_data() -> decltype(vct_add_data.getBase().get_data_())
487 {
488 return vct_add_data.getBase().get_data_();
489 }
490
495 __device__ void flush_block_remove(unsigned int b, bool flusher)
496 {
497#ifdef __NVCC__
498
499 __syncthreads();
500
501 if (flusher == true)
502 {vct_nrem_index.template get<0>(b) = vct_atomic_rem;}
503
504#else
505 printf("vector_sparse_gpu_ker.flush_block_remove: Error, this function in order to work is supposed to be compiled with nvcc\n");
506#endif
507 }
508
513 __device__ auto getAddDataBuffer() -> decltype(vct_add_data)&
514 {
515 return vct_add_data;
516 }
517
522 __device__ auto getDataBuffer() -> decltype(vct_data)&
523 {
524 return vct_data;
525 }
526
531 __device__ auto getAddIndexBuffer() const -> const decltype(vct_add_index)&
532 {
533 return vct_add_index;
534 }
535
540 __device__ auto getIndexBuffer() const -> const decltype(vct_index)&
541 {
542 return vct_index;
543 }
544
549 __device__ auto getDataBuffer() const -> const decltype(vct_data)&
550 {
551 return vct_data;
552 }
553
554#ifdef SE_CLASS1
555
561 pointer_check check_device_pointer(void * ptr)
562 {
563 pointer_check pc;
564
565 pc = vct_index.check_device_pointer(ptr);
566
567 if (pc.match == true)
568 {
569 pc.match_str = std::string("Index vector overflow: ") + "\n" + pc.match_str;
570 return pc;
571 }
572
573 pc = vct_data.check_device_pointer(ptr);
574
575 if (pc.match == true)
576 {
577 pc.match_str = std::string("Data vector overflow: ") + "\n" + pc.match_str;
578 return pc;
579 }
580
581 pc = vct_add_index.check_device_pointer(ptr);
582
583 if (pc.match == true)
584 {
585 pc.match_str = std::string("Add index vector overflow: ") + "\n" + pc.match_str;
586 return pc;
587 }
588
589 pc = vct_rem_index.check_device_pointer(ptr);
590
591 if (pc.match == true)
592 {
593 pc.match_str = std::string("Remove index vector overflow: ") + "\n" + pc.match_str;
594 return pc;
595 }
596
597 pc = vct_nadd_index.check_device_pointer(ptr);
598
599 if (pc.match == true)
600 {
601 pc.match_str = std::string("Add index counter vector overflow: ") + "\n" + pc.match_str;
602 return pc;
603 }
604
605 pc = vct_nrem_index.check_device_pointer(ptr);
606
607 if (pc.match == true)
608 {
609 pc.match_str = std::string("Remove index counter vector overflow: ") + "\n" + pc.match_str;
610 return pc;
611 }
612
613 pc = vct_add_data.check_device_pointer(ptr);
614
615 if (pc.match == true)
616 {
617 pc.match_str = std::string("Add data vector overflow: ") + "\n" + pc.match_str;
618 return pc;
619 }
620
621 return pc;
622 }
623
624#endif
625
626 };
627}
628
629#endif /* MAP_VECTOR_SPARSE_CUDA_KER_CUH_ */
__device__ __host__ layout & get_data_()
Get the internal data_ structure.
__device__ auto getDataBuffer() const -> const decltype(vct_data)&
Get the data buffer.
__device__ void flush_block_remove(unsigned int b, bool flusher)
It insert an element in the sparse vector.
__device__ void flush_block_remove()
It insert an element in the sparse vector.
__device__ auto get_ele(Ti di) const -> decltype(vct_data.template get< p >(di))
Get an element of the vector.
int yes_has_check_device_pointer
Indicate this structure has a function to check the device pointer.
__device__ void _branchfree_search(Ti x, Ti &id) const
get the element i
__device__ void init()
This function must be called.
__device__ int size()
Get the number of elements.
__device__ openfpm::sparse_index< Ti > get_sparse(Ti id) const
Get the sparse index.
__device__ auto insert_b(Ti ele, Ti slot_base) -> decltype(vct_data.template get< p >(0))
It insert an element in the sparse vector.
__device__ void flush_block_insert()
It insert an element in the sparse vector.
__device__ Ti get_index(openfpm::sparse_index< Ti > id) const
Get the index associated to the element id.
__device__ auto getAddIndexBuffer() const -> const decltype(vct_add_index)&
Get the indices buffer.
__device__ auto insert_b(Ti ele, Ti slot_base) -> decltype(vct_add_data.get(0))
It insert an element in the sparse vector.
__device__ void remove(Ti ele)
It insert an element in the sparse vector.
__device__ auto get(Ti id, Ti &di) const -> decltype(vct_data.template get< p >(id))
Get an element of the vector.
__device__ auto get(Ti id) const -> decltype(vct_data.template get< p >(id))
Get an element of the vector.
__device__ auto insert(Ti ele) -> decltype(vct_data.template get< p >(0))
It insert an element in the sparse vector.
__device__ auto insert(Ti ele) -> decltype(vct_add_data.get(0))
It insert an element in the sparse vector.
__device__ auto getIndexBuffer() const -> const decltype(vct_index)&
Get the indices buffer.
__device__ void init_rem_inc()
This function must be called.
__device__ auto get(openfpm::sparse_index< Ti > id) const -> decltype(vct_data.template get< p >(id.id))
Get an element of the vector.
__device__ void remove_b(Ti ele, Ti slot_base)
It insert an element in the sparse vector.
__device__ auto getBackground() const -> decltype(vct_data.template get< p >(0)) &
Get the background value.
__device__ auto getDataBuffer() -> decltype(vct_data)&
Get the data buffer.
__device__ void flush_block_insert(Ti b, bool flusher)
It insert an element in the sparse vector.
__device__ void init_ins_inc()
This function must be called.
__device__ auto get(openfpm::sparse_index< Ti > id) -> decltype(vct_data.template get< p >(id.id))
Get an element of the vector.
__device__ auto getAddDataBuffer() -> decltype(vct_add_data)&
Get the data buffer.
convert a type into constant type
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
grid interface available when on gpu
__device__ __host__ auto get(unsigned int id) const -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
__device__ __host__ unsigned int size() const
Return the size of the vector.
__device__ grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void > > & getBase()
Return the base.
std::string match_str
match string
bool match
Indicate if the pointer match.