OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
map_vector_sparse_cuda_ker.cuh
1 /*
2  * map_vector_sparse_cuda_ker.cuh
3  *
4  * Created on: Jan 23, 2019
5  * Author: i-bird
6  */
7 
8 #ifndef MAP_VECTOR_SPARSE_CUDA_KER_CUH_
9 #define MAP_VECTOR_SPARSE_CUDA_KER_CUH_
10 
11 #include "util/for_each_ref.hpp"
12 
13 //todo: Check where it's a good place to put the following method...
14 template<typename dim3Ta, typename dim3Tb>
15 inline __device__ __host__ int dim3CoordToInt(const dim3Ta & coord, const dim3Tb & dimensions)
16 {
17  int res = coord.z;
18  res *= dimensions.y;
19  res += coord.y;
20  res *= dimensions.x;
21  res += coord.x;
22  return res;
23 }
24 // Specialization allowing transparency
25 inline __device__ __host__ int dim3CoordToInt(int coord, int dimension)
26 {
27  return coord;
28 }
29 
30 namespace openfpm
31 {
32  template<typename index_type>
33  struct sparse_index
34  {
35  index_type id;
36  };
37 
38 #if defined(__NVCC__) && !defined(CUDA_ON_CPU)
39  static __shared__ int vct_atomic_add;
40  static __shared__ int vct_atomic_rem;
41 #endif
42 
43  template<typename T,
44  typename Ti,
45  template<typename> class layout_base>
47  {
48  vector_gpu_ker<aggregate<Ti>,layout_base> vct_index;
49 
51 
52  vector_gpu_ker<aggregate<Ti>,layout_base> vct_add_index;
53 
54  vector_gpu_ker<aggregate<Ti>,layout_base> vct_rem_index;
55 
56  vector_gpu_ker<aggregate<Ti>,layout_base> vct_nadd_index;
57 
58  vector_gpu_ker<aggregate<Ti>,layout_base> vct_nrem_index;
59 
60  vector_gpu_ker<T,layout_base> vct_add_data;
61 
62  // the const is forced by the getter that only return const encap that should not allow the modification of bck
63  // this should possible avoid to define an object const_encap
64  //mutable vector_gpu_ker<T,layout_base> vct_data_bck;
65 
66  int nslot_add;
67  int nslot_rem;
68 
75  inline __device__ void _branchfree_search(Ti x, Ti & id) const
76  {
77  if (vct_index.size() == 0) {id = 0; return;}
78  const Ti *base = &vct_index.template get<0>(0);
79  const Ti *end = (const Ti *)vct_index.template getPointer<0>() + vct_index.size();
80  Ti n = vct_data.size()-1;
81  while (n > 1)
82  {
83  Ti half = n / 2;
84  base = (base[half] < x) ? base+half : base;
85  n -= half;
86  }
87 
88  int off = (*base < x);
89  id = base - &vct_index.template get<0>(0) + off;
90  Ti v = (base + off != end)?*(base + off):(Ti)-1;
91  id = (x == v)?id:vct_data.size()-1;
92  }
93 
94  public:
95 
96  typedef Ti index_type;
97 
100 
101  vector_sparse_gpu_ker(vector_gpu_ker<aggregate<Ti>,layout_base> vct_index,
103  vector_gpu_ker<aggregate<Ti>,layout_base> vct_add_index,
104  vector_gpu_ker<aggregate<Ti>,layout_base> vct_rem_index,
105  vector_gpu_ker<T,layout_base> vct_add_data,
106  vector_gpu_ker<aggregate<Ti>,layout_base> vct_nadd_index,
107  vector_gpu_ker<aggregate<Ti>,layout_base> vct_nrem_index,
108  int nslot_add,
109  int nslot_rem)
110  :vct_index(vct_index),vct_data(vct_data),
111  vct_add_index(vct_add_index),vct_rem_index(vct_rem_index),vct_add_data(vct_add_data),
112  vct_nadd_index(vct_nadd_index),vct_nrem_index(vct_nrem_index),
113  nslot_add(nslot_add),nslot_rem(nslot_rem)
114  {}
115 
121  __device__ inline int size()
122  {
123  return vct_index.size();
124  }
125 
129  __device__ inline void init()
130  {
131 #ifdef __NVCC__
132  if (threadIdx.x == 0)
133  {
134  vct_atomic_add = 0;
135  vct_atomic_rem = 0;
136  }
137 
138  __syncthreads();
139 #endif
140  }
141 
145  __device__ inline void init_ins_inc()
146  {
147 #ifdef __NVCC__
148  if (threadIdx.x == 0)
149  {
150  int blockId = dim3CoordToInt(blockIdx, gridDim);
151  vct_atomic_add = vct_nadd_index.template get<0>(blockId);
152  }
153 
154  __syncthreads();
155 #endif
156  }
157 
161  __device__ inline void init_rem_inc()
162  {
163 #ifdef __NVCC__
164  if (threadIdx.x == 0)
165  {
166  int blockId = dim3CoordToInt(blockIdx, gridDim);
167  vct_atomic_rem = vct_nrem_index.template get<0>(blockId);
168  }
169 
170  __syncthreads();
171 #endif
172  }
173 
185  __device__ inline openfpm::sparse_index<Ti> get_sparse(Ti id) const
186  {
187  Ti di;
188  this->_branchfree_search(id,di);
190  sid.id = di;
191 
192  return sid;
193  }
194 
197  template <unsigned int p>
198  __device__ inline auto getBackground() const -> decltype(vct_data.template get<p>(0)) &
199  {
200  return vct_data.template get<p>(vct_data.size()-1);
201  }
202 
213  template <unsigned int p>
214  __device__ inline auto get(Ti id) const -> decltype(vct_data.template get<p>(id))
215  {
216  Ti di;
217  this->_branchfree_search(id,di);
218  return vct_data.template get<p>(di);
219  }
220 
221  __device__ inline auto get(Ti id) const -> decltype(vct_data.get(0))
222  {
223  Ti di;
224  Ti v = this->_branchfree_search(id,di);
225  return vct_data.get(static_cast<size_t>(di));
226  }
227 
238  template <unsigned int p>
239  __device__ inline auto get(openfpm::sparse_index<Ti> id) const -> decltype(vct_data.template get<p>(id.id))
240  {
241  return vct_data.template get<p>(id.id);
242  }
243 
254  template <unsigned int p>
255  __device__ inline auto get(openfpm::sparse_index<Ti> id) -> decltype(vct_data.template get<p>(id.id))
256  {
257  return vct_data.template get<p>(id.id);
258  }
259 
266  __device__ inline Ti get_index(openfpm::sparse_index<Ti> id) const
267  {
268  return vct_index.template get<0>(id.id);
269  }
270 
281  template <unsigned int p>
282  __device__ inline auto get(Ti id, Ti & di) const -> decltype(vct_data.template get<p>(id))
283  {
284  this->_branchfree_search(id,di);
285  return vct_data.template get<p>(di);
286  }
287 
298  template <unsigned int p>
299  __device__ inline auto get_ele(Ti di) const -> decltype(vct_data.template get<p>(di))
300  {
301  return vct_data.template get<p>(di);
302  }
303 
308  template <unsigned int p>
309  __device__ auto insert(Ti ele) -> decltype(vct_data.template get<p>(0))
310  {
311 #ifdef __NVCC__
312 
313  int blockId = dim3CoordToInt(blockIdx, gridDim);
314  int slot_base = blockId;
315 
316  int pos = atomicAdd(&vct_atomic_add,1);
317  vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele;
318  return vct_add_data.template get<p>(slot_base*nslot_add+pos);
319 #else
320 
321  printf("vector_sparse_gpu_ker.insert[1]: Error, this function in order to work is supposed to be compiled with nvcc\n");
322 
323 #endif
324  }
325 
333  __device__ void remove(Ti ele)
334  {
335 #ifdef __NVCC__
336 
337  int blockId = dim3CoordToInt(blockIdx, gridDim);
338  int slot_base = blockId;
339 
340  int pos = atomicAdd(&vct_atomic_rem,1);
341  vct_rem_index.template get<0>(slot_base*nslot_rem+pos) = ele;
342 
343 #else
344  printf("vector_sparse_gpu_ker.remove: Error, this function in order to work is supposed to be compiled with nvcc\n");
345 #endif
346  }
347 
355  __device__ auto insert(Ti ele) -> decltype(vct_add_data.get(0))
356  {
357 #ifdef __NVCC__
358 
359  int blockId = dim3CoordToInt(blockIdx, gridDim);
360  int slot_base = blockId;
361 
362  int pos = atomicAdd(&vct_atomic_add,1);
363  vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele;
364 
365  return vct_add_data.get(slot_base*nslot_add+pos);
366 #else
367  printf("vector_sparse_gpu_ker.insert[2]: Error, this function in order to work is supposed to be compiled with nvcc\n");
368 #endif
369  }
370 
375  __device__ void remove_b(Ti ele,Ti slot_base)
376  {
377 #ifdef __NVCC__
378 
379  int pos = atomicAdd(&vct_atomic_rem,1);
380  vct_rem_index.template get<0>(slot_base*nslot_rem+pos) = ele;
381 
382 #else
383  printf("vector_sparse_gpu_ker.remove_b: Error, this function in order to work is supposed to be compiled with nvcc\n");
384 #endif
385  }
386 
391  template <unsigned int p>
392  __device__ auto insert_b(Ti ele,Ti slot_base) -> decltype(vct_data.template get<p>(0))
393  {
394 #ifdef __NVCC__
395 
396  int pos = atomicAdd(&vct_atomic_add,1);
397  vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele;
398  return vct_add_data.template get<p>(slot_base*nslot_add+pos);
399 #else
400  printf("vector_sparse_gpu_ker.insert_b: Error, this function in order to work is supposed to be compiled with nvcc\n");
401 #endif
402  }
403 
408  __device__ auto insert_b(Ti ele,Ti slot_base) -> decltype(vct_add_data.get(0))
409  {
410 #ifdef __NVCC__
411 
412  int pos = atomicAdd(&vct_atomic_add,1);
413  vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele;
414  return vct_add_data.get(slot_base*nslot_add+pos);
415 #else
416  printf("vector_sparse_gpu_ker.insert_b: Error, this function in order to work is supposed to be compiled with nvcc\n");
417 #endif
418  }
419 
424  __device__ void flush_block_insert()
425  {
426 #ifdef __NVCC__
427 
428  __syncthreads();
429 
430  if (threadIdx.x == 0)
431  {
432  int blockId = dim3CoordToInt(blockIdx, gridDim);
433  vct_nadd_index.template get<0>(blockId) = vct_atomic_add;
434  }
435 
436 #else
437  printf("vector_sparse_gpu_ker.flush_block_insert: Error, this function in order to work is supposed to be compiled with nvcc\n");
438 #endif
439  }
440 
445  __device__ void flush_block_remove()
446  {
447 #ifdef __NVCC__
448 
449  __syncthreads();
450 
451  if (threadIdx.x == 0)
452  {
453  int blockId = dim3CoordToInt(blockIdx, gridDim);
454  vct_nrem_index.template get<0>(blockId) = vct_atomic_rem;
455  }
456 
457 #else
458  printf("vector_sparse_gpu_ker.flush_block_remove: Error, this function in order to work is supposed to be compiled with nvcc\n");
459 #endif
460  }
461 
462  auto & private_get_vct_nadd_index()
463  {
464  return vct_nadd_index;
465  }
466 
471  __device__ void flush_block_insert(Ti b, bool flusher)
472  {
473 #ifdef __NVCC__
474 
475  __syncthreads();
476 
477  if (flusher == true)
478  {vct_nadd_index.template get<0>(b) = vct_atomic_add;}
479 
480 
481 #else
482  printf("vector_sparse_gpu_ker.flush_block_insert: Error, this function in order to work is supposed to be compiled with nvcc\n");
483 #endif
484  }
485 
486  __device__ auto private_get_data() -> decltype(vct_add_data.getBase().get_data_())
487  {
488  return vct_add_data.getBase().get_data_();
489  }
490 
495  __device__ void flush_block_remove(unsigned int b, bool flusher)
496  {
497 #ifdef __NVCC__
498 
499  __syncthreads();
500 
501  if (flusher == true)
502  {vct_nrem_index.template get<0>(b) = vct_atomic_rem;}
503 
504 #else
505  printf("vector_sparse_gpu_ker.flush_block_remove: Error, this function in order to work is supposed to be compiled with nvcc\n");
506 #endif
507  }
508 
513  __device__ auto getAddDataBuffer() -> decltype(vct_add_data)&
514  {
515  return vct_add_data;
516  }
517 
522  __device__ auto getDataBuffer() -> decltype(vct_data)&
523  {
524  return vct_data;
525  }
526 
531  __device__ auto getAddIndexBuffer() const -> const decltype(vct_add_index)&
532  {
533  return vct_add_index;
534  }
535 
540  __device__ auto getIndexBuffer() const -> const decltype(vct_index)&
541  {
542  return vct_index;
543  }
544 
549  __device__ auto getDataBuffer() const -> const decltype(vct_data)&
550  {
551  return vct_data;
552  }
553 
554 #ifdef SE_CLASS1
555 
561  pointer_check check_device_pointer(void * ptr)
562  {
563  pointer_check pc;
564 
565  pc = vct_index.check_device_pointer(ptr);
566 
567  if (pc.match == true)
568  {
569  pc.match_str = std::string("Index vector overflow: ") + "\n" + pc.match_str;
570  return pc;
571  }
572 
573  pc = vct_data.check_device_pointer(ptr);
574 
575  if (pc.match == true)
576  {
577  pc.match_str = std::string("Data vector overflow: ") + "\n" + pc.match_str;
578  return pc;
579  }
580 
581  pc = vct_add_index.check_device_pointer(ptr);
582 
583  if (pc.match == true)
584  {
585  pc.match_str = std::string("Add index vector overflow: ") + "\n" + pc.match_str;
586  return pc;
587  }
588 
589  pc = vct_rem_index.check_device_pointer(ptr);
590 
591  if (pc.match == true)
592  {
593  pc.match_str = std::string("Remove index vector overflow: ") + "\n" + pc.match_str;
594  return pc;
595  }
596 
597  pc = vct_nadd_index.check_device_pointer(ptr);
598 
599  if (pc.match == true)
600  {
601  pc.match_str = std::string("Add index counter vector overflow: ") + "\n" + pc.match_str;
602  return pc;
603  }
604 
605  pc = vct_nrem_index.check_device_pointer(ptr);
606 
607  if (pc.match == true)
608  {
609  pc.match_str = std::string("Remove index counter vector overflow: ") + "\n" + pc.match_str;
610  return pc;
611  }
612 
613  pc = vct_add_data.check_device_pointer(ptr);
614 
615  if (pc.match == true)
616  {
617  pc.match_str = std::string("Add data vector overflow: ") + "\n" + pc.match_str;
618  return pc;
619  }
620 
621  return pc;
622  }
623 
624 #endif
625 
626  };
627 }
628 
629 #endif /* MAP_VECTOR_SPARSE_CUDA_KER_CUH_ */
__device__ auto insert_b(Ti ele, Ti slot_base) -> decltype(vct_add_data.get(0))
It insert an element in the sparse vector.
__device__ auto getDataBuffer() -> decltype(vct_data)&
Get the data buffer.
convert a type into constant type
Definition: aggregate.hpp:292
__device__ auto get(Ti id) const -> decltype(vct_data.template get< p >(id))
Get an element of the vector.
bool match
Indicate if the pointer match.
grid interface available when on gpu
__device__ void remove(Ti ele)
It insert an element in the sparse vector.
__device__ auto getAddIndexBuffer() const -> const decltype(vct_add_index)&
Get the indices buffer.
__device__ void flush_block_insert(Ti b, bool flusher)
It insert an element in the sparse vector.
__device__ auto getDataBuffer() const -> const decltype(vct_data)&
Get the data buffer.
__device__ void init_rem_inc()
This function must be called.
__device__ Ti get_index(openfpm::sparse_index< Ti > id) const
Get the index associated to the element id.
__device__ void init()
This function must be called.
__device__ auto insert(Ti ele) -> decltype(vct_add_data.get(0))
It insert an element in the sparse vector.
__device__ auto getBackground() const -> decltype(vct_data.template get< p >(0)) &
Get the background value.
__device__ void remove_b(Ti ele, Ti slot_base)
It insert an element in the sparse vector.
__device__ auto getAddDataBuffer() -> decltype(vct_add_data)&
Get the data buffer.
__device__ void flush_block_insert()
It insert an element in the sparse vector.
__device__ void flush_block_remove(unsigned int b, bool flusher)
It insert an element in the sparse vector.
__device__ auto insert_b(Ti ele, Ti slot_base) -> decltype(vct_data.template get< p >(0))
It insert an element in the sparse vector.
__device__ void _branchfree_search(Ti x, Ti &id) const
get the element i
__device__ __host__ auto get(unsigned int id) const -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
__device__ void init_ins_inc()
This function must be called.
__device__ auto get(openfpm::sparse_index< Ti > id) const -> decltype(vct_data.template get< p >(id.id))
Get an element of the vector.
__device__ openfpm::sparse_index< Ti > get_sparse(Ti id) const
Get the sparse index.
__device__ grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void > > & getBase()
Return the base.
__device__ void flush_block_remove()
It insert an element in the sparse vector.
__device__ __host__ unsigned int size() const
Return the size of the vector.
__device__ auto get(openfpm::sparse_index< Ti > id) -> decltype(vct_data.template get< p >(id.id))
Get an element of the vector.
int yes_has_check_device_pointer
Indicate this structure has a function to check the device pointer.
std::string match_str
match string
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:214
__device__ __host__ layout & get_data_()
Get the internal data_ structure.
__device__ auto getIndexBuffer() const -> const decltype(vct_index)&
Get the indices buffer.
__device__ int size()
Get the number of elements.
__device__ auto get(Ti id, Ti &di) const -> decltype(vct_data.template get< p >(id))
Get an element of the vector.
__device__ auto insert(Ti ele) -> decltype(vct_data.template get< p >(0))
It insert an element in the sparse vector.
__device__ auto get_ele(Ti di) const -> decltype(vct_data.template get< p >(di))
Get an element of the vector.