OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
CellList_gpu_ker.cuh
1 /*
2  * CellList_gpu_ker.cuh
3  *
4  * Created on: Jul 30, 2018
5  * Author: i-bird
6  */
7 
8 #ifndef CELLLIST_GPU_KER_CUH_
9 #define CELLLIST_GPU_KER_CUH_
10 
11 #include "NN/CellList/CellList_def.hpp"
12 #include "NN/CellList/cuda/CellDecomposer_gpu_ker.cuh"
13 
14 #ifdef USE_LOW_REGISTER_ITERATOR
15 
16 #ifdef __NVCC__
17 __constant__ int cells_striding[126];
18 #endif
19 
20 template<unsigned int dim, int r_int, unsigned int pr_int, typename ids_type, typename cnt_type>
21 struct NN_gpu_int_base_lr_impl
22 {
23  cnt_type ca_pnt;
24 
25  cnt_type ca_lin;
26 
27  cnt_type c_id;
28 
29  __device__ inline void init_impl(const grid_key_dx<dim,ids_type> & cell_pos, const openfpm::array<ids_type,dim,cnt_type> & div_c)
30  {
31 #ifdef __NVCC__
32  ca_pnt = 0;
33 
34  ca_lin = cid_<dim,cnt_type,ids_type,int>::get_cid(div_c,cell_pos);
35  c_id = ca_lin + cells_striding[ca_pnt];
36 #endif
37  }
38 
39  __device__ inline void SelectValid_impl(const openfpm::array<ids_type,dim,cnt_type> & div_c)
40  {
41 #ifdef __NVCC__
42  ++ca_pnt;
43 
44  c_id = ca_lin + cells_striding[ca_pnt];
45 #endif
46  }
47 
48  __device__ inline bool isNext_impl()
49  {
50  return ca_pnt < pr_int;
51  }
52 };
53 
54 #endif
55 
56 template<unsigned int dim, int r_int, unsigned int pr_int, typename ids_type, typename cnt_type>
58 {
60 
61  grid_key_dx<dim,ids_type> cell_start;
62  grid_key_dx<dim,ids_type> cell_stop;
63 
64  cnt_type c_id;
65 
66  __device__ __host__ inline void init_impl(const grid_key_dx<dim,ids_type> & cell_pos, const openfpm::array<ids_type,dim,cnt_type> & div_c)
67  {
68  for (size_t i = 0 ; i < dim ; i++)
69  {
70  cell_start.set_d(i,cell_pos.get(i) - r_int);
71  cell_stop.set_d(i,cell_pos.get(i) + r_int);
72  cell_act.set_d(i,cell_pos.get(i) - r_int);
73  }
74 
75  c_id = cid_<dim,cnt_type,ids_type,int>::get_cid(div_c,cell_start);
76  }
77 
78  __device__ __host__ inline void SelectValid_impl(const openfpm::array<ids_type,dim,cnt_type> & div_c)
79  {
80  cnt_type id = cell_act.get(0);
81  cell_act.set_d(0,id+1);
82 
84 
85  int i = 0;
86  for ( ; i < dim-1 ; i++)
87  {
88  size_t id = cell_act.get(i);
89  if ((int)id > cell_stop.get(i))
90  {
91  // ! overflow, increment the next index
92 
93  cell_act.set_d(i,cell_start.get(i));
94  id = cell_act.get(i+1);
95  cell_act.set_d(i+1,id+1);
96  }
97  else
98  {
99  break;
100  }
101  }
102 
103  c_id = cid_<dim,cnt_type,ids_type,int>::get_cid(div_c,cell_act);
104  }
105 
106  __device__ __host__ inline bool isNext_impl()
107  {
108  return cell_act.get(dim-1) <= cell_stop.get(dim-1);
109  }
110 };
111 
112 #ifdef USE_LOW_REGISTER_ITERATOR
113 
114 template<unsigned int dim, int r_int, unsigned int pr_int, typename ids_type, typename cnt_type>
115 struct NN_gpu_int_base: public NN_gpu_int_base_hr_impl<dim,r_int,pr_int,ids_type,cnt_type>
116 {};
117 
118 template<int r_int, unsigned int pr_int, typename ids_type, typename cnt_type>
119 struct NN_gpu_int_base<2,r_int,pr_int,ids_type,cnt_type>: public NN_gpu_int_base_lr_impl<2,r_int,pr_int,ids_type,cnt_type>
120 {};
121 
122 template<int r_int, unsigned int pr_int, typename ids_type, typename cnt_type>
123 struct NN_gpu_int_base<3,r_int,pr_int,ids_type,cnt_type>: public NN_gpu_int_base_lr_impl<3,r_int,pr_int,ids_type,cnt_type>
124 {};
125 
126 #else
127 
128 template<unsigned int dim, int r_int, unsigned int pr_int, typename ids_type, typename cnt_type>
129 struct NN_gpu_int_base: public NN_gpu_int_base_hr_impl<dim,r_int,pr_int,ids_type,cnt_type>
130 {};
131 
132 #endif
133 
134 template<unsigned int dim, typename cnt_type, typename ids_type, unsigned int r_int, bool is_sparse>
135 class NN_gpu_it: public NN_gpu_int_base<dim,r_int,openfpm::math::pow(2*r_int+1,dim),ids_type,cnt_type>
136 {
138 
140 
142 
144 
145  cnt_type p_id;
146  cnt_type p_id_end;
147 
148  inline __device__ __host__ void SelectValid()
149  {
150  while (p_id >= p_id_end && isNext())
151  {
152  this->SelectValid_impl(div_c);
153 
154  if (isNext() == false) {break;}
155 
156  p_id = starts.template get<0>(this->c_id);
157  p_id_end = starts.template get<0>(this->c_id+1);
158  }
159  }
160 
161 
162 public:
163 
164  inline __device__ __host__ NN_gpu_it(const grid_key_dx<dim,ids_type> & cell_pos,
169  :starts(starts),srt(srt),div_c(div_c),off(off)
170  {
171  // calculate start and stop
172 
173  this->init_impl(cell_pos,div_c);
174 
175  p_id = starts.template get<0>(this->c_id);
176  p_id_end = starts.template get<0>(this->c_id+1);
177 
178  SelectValid();
179  }
180 
181  inline __device__ __host__ cnt_type get_sort()
182  {
183  return p_id;
184  }
185 
186  inline __device__ __host__ cnt_type get()
187  {
188  return srt.template get<0>(p_id);
189  }
190 
191  inline __device__ __host__ NN_gpu_it<dim,cnt_type,ids_type,r_int,is_sparse> & operator++()
192  {
193  ++p_id;
194 
195  SelectValid();
196 
197  return *this;
198  }
199 
200  inline __device__ cnt_type get_start(unsigned int ce_id)
201  {
202  return starts.template get<0>(ce_id);
203  }
204 
205  inline __device__ cnt_type get_cid()
206  {
207  return this->c_id;
208  }
209 
210  inline __device__ __host__ bool isNext()
211  {
212  return this->isNext_impl();
213  }
214 };
215 
216 template<unsigned int dim, typename cnt_type, typename ids_type, unsigned int r_int>
217 class NN_gpu_it<dim,cnt_type,ids_type,r_int,true>
218 {
219  cnt_type p_id;
220  cnt_type p_id_end;
221 
222  cnt_type cells_list_start;
223  cnt_type cells_list_stop;
224 
226 
228 
230 
231  __device__ __host__ void SelectValid()
232  {
233  while (p_id >= p_id_end && isNext())
234  {
235  ++cells_list_start;
236 
237  if (cells_list_start < cells_list_stop)
238  {
239  // calculate start and stop
240  p_id = cell_nn_list.template get<0>(cells_list_start);
241  p_id_end = cell_nn_list.template get<1>(cells_list_start);
242  }
243  }
244  }
245 
246 
247 public:
248 
249  __device__ NN_gpu_it(cnt_type c_id_sparse,
253  :srt(srt),cells_nn(cells_nn),cell_nn_list(cell_nn_list)
254  {
255  if (c_id_sparse == (cnt_type)-1)
256  {
257  cells_list_stop = cells_list_start;
258  return;
259  }
260 
261  cells_list_start = cells_nn.template get<0>(c_id_sparse);
262  cells_list_stop = cells_nn.template get<0>(c_id_sparse + 1);
263 
264  // calculate start and stop
265  p_id = cell_nn_list.template get<0>(cells_list_start);
266  p_id_end = cell_nn_list.template get<1>(cells_list_start);
267 
268  SelectValid();
269  }
270 
271  __device__ cnt_type get_sort()
272  {
273  return p_id;
274  }
275 
276  __device__ cnt_type get()
277  {
278  return srt.template get<0>(p_id);
279  }
280 
281  __device__ __host__ NN_gpu_it<dim,cnt_type,ids_type,r_int,true> & operator++()
282  {
283  ++p_id;
284 
285  SelectValid();
286 
287  return *this;
288  }
289 
290  __device__ __host__ bool isNext()
291  {
292  return cells_list_start < cells_list_stop;
293  }
294 };
295 
296 template<unsigned int dim, typename cnt_type, typename ids_type>
298 {
299  cnt_type pos;
300 
301  cnt_type act;
302 
304 
306 
308 
310 
312 
313  cnt_type p_id;
314  cnt_type c_id;
315 
316  __device__ __host__ inline void SelectValid()
317  {
318  while (isNext() && p_id >= starts.template get<0>(c_id+1))
319  {
320  act++;
321 
322  if (act >= cells.size())
323  {break;}
324 
325  c_id = pos + cells.template get<0>(act);
326  p_id = starts.template get<0>(c_id);
327  }
328  }
329 
330 
331 public:
332 
333  __device__ __host__ inline NN_gpu_it_radius(const grid_key_dx<dim,ids_type> & cell_pos,
339  :act(0),cells(cells),starts(starts),srt(srt),div_c(div_c),off(off)
340  {
341  // calculate start and stop
342 
343  pos = cid_<dim,cnt_type,ids_type,int>::get_cid(div_c,cell_pos);
344  c_id = pos + cells.template get<0>(act);
345  p_id = starts.template get<0>(c_id);
346 
347  SelectValid();
348  }
349 
350  __device__ cnt_type get_sort()
351  {
352  return p_id;
353  }
354 
355  __device__ cnt_type get()
356  {
357  return srt.template get<0>(p_id);
358  }
359 
360  __device__ __host__ NN_gpu_it_radius<dim,cnt_type,ids_type> & operator++()
361  {
362  ++p_id;
363 
364  SelectValid();
365 
366  return *this;
367  }
368 
369  __device__ cnt_type get_start(unsigned int ce_id)
370  {
371  return starts.template get<0>(ce_id);
372  }
373 
374  __device__ cnt_type get_cid()
375  {
376  return c_id;
377  }
378 
379  __device__ __host__ bool isNext()
380  {
381  return act < cells.size();
382  }
383 };
384 
385 template<unsigned int dim,typename cnt_type,typename ids_type,bool is_sparse>
387 {
393  {
394  NN_gpu_it<dim,cnt_type,ids_type,1,is_sparse> ngi(cid,starts,srt,div_c,off);
395 
396  return ngi;
397  }
398 };
399 
400 template<unsigned int dim,typename cnt_type,typename ids_type>
401 struct NN_gpu_selector<dim,cnt_type,ids_type,true>
402 {
404  cnt_type c_id_sparse,
411  {
412  NN_gpu_it<dim,cnt_type,ids_type,1,true> ngi(c_id_sparse,cells_nn,cell_nn_list,srt);
413 
414  return ngi;
415  }
416 };
417 
418 template<unsigned int dim, typename T, typename cnt_type, typename ids_type, typename transform, bool is_sparse>
419 class CellList_gpu_ker: public CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>
420 {
423 
426 
429 
432 
434  unsigned int g_m;
435 
436 public:
437 
438  typedef int yes_is_gpu_ker_celllist;
439 
442 
443  __host__ __device__ inline CellList_gpu_ker()
444  :g_m(0)
445  {}
446 
454  const transform & t,
455  unsigned int g_m)
456  :CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t),
458  {
459  }
460 
461 
462  template<unsigned int stub = NO_CHECK>
463  inline __device__ __host__ NN_gpu_it<dim,cnt_type,ids_type,1,is_sparse> getNNIterator(const grid_key_dx<dim,ids_type> & cid)
464  {
465  NN_gpu_it<dim,cnt_type,ids_type,1,is_sparse> ngi(cid,starts,srt,this->get_div_c(),this->get_off());
466 
467  return ngi;
468  }
469 
470  inline __device__ __host__ NN_gpu_it_radius<dim,cnt_type,ids_type> getNNIteratorRadius(const grid_key_dx<dim,ids_type> & cid)
471  {
472  NN_gpu_it_radius<dim,cnt_type,ids_type> ngi(cid,rad_cells,starts,srt,this->get_div_c(),this->get_off());
473 
474  return ngi;
475  }
476 
477  template<unsigned int r_int = 2> inline __device__ NN_gpu_it<dim,cnt_type,ids_type,r_int,is_sparse> getNNIteratorBox(const grid_key_dx<dim,ids_type> & cid)
478  {
479  NN_gpu_it<dim,cnt_type,ids_type,r_int,is_sparse> ngi(cid,starts,srt,this->get_div_c(),this->get_off());
480 
481  return ngi;
482  }
483 
484  inline __device__ openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> & getDomainSortIds()
485  {
486  return dprt;
487  }
488 
489  inline __device__ openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> & getSortToNonSort()
490  {
491  return srt;
492  }
493 
498  inline __device__ unsigned int getNCells() const
499  {
500  return starts.size() - 1;
501  }
502 
510  inline __device__ cnt_type getNelements(const cnt_type cell_id) const
511  {
512  return starts.template get<0>(cell_id+1) - starts.template get<0>(cell_id);
513  }
514 
525  inline __device__ cnt_type get(size_t cell, size_t ele)
526  {
527  cnt_type p_id = starts.template get<0>(cell) + ele;
528  return srt.template get<0>(p_id);
529  }
530 
531 
532  inline __device__ unsigned int get_g_m()
533  {
534  return g_m;
535  }
536 
537 #ifdef SE_CLASS1
538 
544  pointer_check check_device_pointer(void * ptr)
545  {
546  pointer_check pc;
547 
548  pc = starts.check_device_pointer(ptr);
549 
550  if (pc.match == true)
551  {
552  pc.match_str = std::string("Cell index overflow (starts): ") + "\n" + pc.match_str;
553  return pc;
554  }
555 
556  pc = srt.check_device_pointer(ptr);
557 
558  if (pc.match == true)
559  {
560  pc.match_str = std::string("Particle index overflow (str): ") + "\n" + pc.match_str;
561  return pc;
562  }
563 
564  pc = dprt.check_device_pointer(ptr);
565 
566  if (pc.match == true)
567  {
568  pc.match_str = std::string("Particle index overflow (dprt): ") + "\n" + pc.match_str;
569  return pc;
570  }
571 
572  pc = rad_cells.check_device_pointer(ptr);
573 
574  if (pc.match == true)
575  {
576  pc.match_str = std::string("Particle index overflow (dprt): ") + "\n" + pc.match_str;
577  return pc;
578  }
579 
580  return pc;
581  }
582 
583 #endif
584 };
585 
586 
587 template<unsigned int dim, typename T, typename cnt_type, typename ids_type, typename transform>
588 class CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,true>: public CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>
589 {
592 
595 
598 
601 
604 
606  unsigned int g_m;
607 
608 public:
609 
612 
621  const transform & t,
622  unsigned int g_m)
623  :CellDecomposer_gpu_ker<dim,T,cnt_type,ids_type,transform>(spacing_c,div_c,off,t),cell_nn(cell_nn),cell_nn_list(cell_nn_list),srt(srt),dprt(dprt),
624  cl_sparse(cl_sparse),g_m(g_m)
625  {
626  }
627 
628  inline __device__ auto getCell(const Point<dim,T> & xp) const -> decltype(cl_sparse.get_sparse(0))
629  {
630  cnt_type cell = cid_<dim,cnt_type,ids_type,transform>::get_cid(this->get_div_c(),this->get_spacing_c(),this->get_off(),this->get_t(),xp);
631 
632  return cl_sparse.get_sparse(cell);
633  }
634 
635 
636  template<unsigned int stub = NO_CHECK>
637  inline __device__ NN_gpu_it<dim,cnt_type,ids_type,1,true> getNNIterator(decltype(cl_sparse.get_sparse(0)) cid)
638  {
639  NN_gpu_it<dim,cnt_type,ids_type,1,true> ngi(cid.id,cell_nn,cell_nn_list,srt);
640 
641  return ngi;
642  }
643 
644  template<unsigned int r_int = 2>
646  getNNIteratorBox(decltype(cl_sparse.get_sparse(0)) cid)
647  {
648  NN_gpu_it<dim,cnt_type,ids_type,r_int,true> ngi(cid.id,cell_nn,cell_nn_list,srt);
649 
650  return ngi;
651  }
652 
653 
654  inline __device__ openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> & getDomainSortIds()
655  {
656  return dprt;
657  }
658 
659  inline __device__ openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> & getSortToNonSort()
660  {
661  return srt;
662  }
663 
664 
665  inline __device__ unsigned int get_g_m()
666  {
667  return g_m;
668  }
669 
670 #ifdef SE_CLASS1
671 
677  pointer_check check_device_pointer(void * ptr)
678  {
679  pointer_check pc;
680 
681  pc = cell_nn.check_device_pointer(ptr);
682 
683  if (pc.match == true)
684  {
685  pc.match_str = std::string("Cell index overflow (starts): ") + "\n" + pc.match_str;
686  return pc;
687  }
688 
689  pc = cell_nn_list.check_device_pointer(ptr);
690 
691  if (pc.match == true)
692  {
693  pc.match_str = std::string("Cell particle buffer overflow (cell_nn_list): ") + "\n" + pc.match_str;
694  return pc;
695  }
696 
697  pc = srt.check_device_pointer(ptr);
698 
699  if (pc.match == true)
700  {
701  pc.match_str = std::string("Particle index overflow (str): ") + "\n" + pc.match_str;
702  return pc;
703  }
704 
705  pc = dprt.check_device_pointer(ptr);
706 
707  if (pc.match == true)
708  {
709  pc.match_str = std::string("Particle index overflow (dprt): ") + "\n" + pc.match_str;
710  return pc;
711  }
712 
713  return pc;
714  }
715 
716 #endif
717 };
718 
719 
720 #endif /* CELLLIST_GPU_KER_CUH_ */
openfpm::vector_gpu_ker< aggregate< cnt_type >, memory_traits_inte > dprt
Domain particles ids.
bool match
Indicate if the pointer match.
openfpm::array< ids_type, dim, cnt_type > div_c
number of sub-divisions in each direction
__device__ cnt_type getNelements(const cnt_type cell_id) const
Return the number of elements in the cell.
openfpm::vector_gpu_ker< aggregate< cnt_type >, memory_traits_inte > starts
starting point for each cell
openfpm::vector_gpu_ker< aggregate< cnt_type >, memory_traits_inte > srt
Sorted to non sorted ids conversion.
grid interface available when on gpu
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition: grid_key.hpp:503
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:27
transform t
transformation
Transform the boost::fusion::vector into memory specification (memory_traits)
Definition: memory_conf.hpp:83
openfpm::vector_gpu_ker< aggregate< cnt_type >, memory_traits_inte > srt
Sorted to non sorted ids conversion.
openfpm::array< ids_type, dim, cnt_type > off
cell offset
openfpm::vector_gpu_ker< aggregate< int >, memory_traits_inte > rad_cells
radius cells
openfpm::vector_gpu_ker< aggregate< cnt_type >, memory_traits_inte > dprt
Domain particles ids.
openfpm::vector_gpu_ker< aggregate< cnt_type >, memory_traits_inte > cell_nn
starting point for each cell
openfpm::vector_sparse_gpu_ker< aggregate< cnt_type >, int, memory_traits_inte > cl_sparse
Set of cells sparse.
__device__ __host__ void SelectValid_impl(const openfpm::array< ids_type, dim, cnt_type > &div_c)
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
__device__ cnt_type get(size_t cell, size_t ele)
Get an element in the cell.
__device__ unsigned int getNCells() const
Get the number of cells this cell-list contain.
int yes_has_check_device_pointer
Indicate this structure has a function to check the device pointer.
__device__ openfpm::sparse_index< Ti > get_sparse(Ti id) const
Get the sparse index.
openfpm::array< T, dim, cnt_type > spacing_c
Spacing.
int yes_has_check_device_pointer
Indicate this structure has a function to check the device pointer.
__device__ __host__ unsigned int size() const
Return the size of the vector.
std::string match_str
match string
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition: grid_key.hpp:516
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:214
unsigned int g_m
Ghost particle marker.
openfpm::vector_gpu_ker< aggregate< cnt_type, cnt_type >, memory_traits_inte > cell_nn_list
starting point for each cell