OpenFPM  5.2.0
Project that contain the implementation of distributed structures
Cuda_cell_list_util_func.hpp
1 /*
2  * Cuda_cell_list_util_func.hpp
3  *
4  * Created on: Jun 17, 2018
5  * Author: i-bird
6  */
7 
8 #ifndef OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_
9 #define OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_
10 
11 #include <boost/integer/integer_mask.hpp>
12 #include <Vector/map_vector_sparse.hpp>
13 
14 
15 template<unsigned int dim, typename ids_type, typename transform_type>
16 struct cid_
17 {
18  static inline __device__ __host__ unsigned int get_cid(
19  openfpm::array<ids_type,1> & numCellDiv,
20  ids_type * e)
21  {
22  unsigned int id = e[dim-1];
23 
24 #pragma unroll
25  for (int i = 1; i >= 0 ; i-- )
26  {id = e[i] + numCellDiv[i]*id;}
27 
28  return id;
29  }
30 
31  static inline __device__ __host__ unsigned int get_cid(
32  const openfpm::array<ids_type,dim> & numCellDiv,
34  {
35  unsigned int id = e.get(dim-1);
36 
37 #pragma unroll
38  for (int i = 1; i >= 0 ; i-- )
39  {id = e.get(i) + numCellDiv[i]*id;}
40 
41  return id;
42  }
43 
44  template<typename T>
45  static inline __device__ __host__ unsigned int get_cid(
46  const openfpm::array<ids_type,dim> & numCellDiv,
47  const openfpm::array<T,dim> & unitCellP2,
48  const transform_type & pointTransform,
49  const Point<dim,T> & p)
50  {
51  unsigned int id = p.get(dim-1) / unitCellP2[dim-1];
52 
53 #pragma unroll
54  for (int i = 1; i >= 0 ; i-- )
55  {id = pointTransform.transform(p.get(i),i) / unitCellP2[i] + numCellDiv[i]*id;}
56 
57  return id;
58  }
59 };
60 
61 template<typename ids_type, typename transform_type>
62 struct cid_<1,ids_type, transform_type>
63 {
64  static inline __device__ __host__ unsigned int get_cid(
65  const openfpm::array<ids_type,1> & numCellDiv,
66  ids_type * e)
67  {
68  return e[0];
69  }
70 
71  static inline __device__ __host__ unsigned int get_cid(
72  const openfpm::array<ids_type,1> & numCellDiv,
73  const grid_key_dx<1,ids_type> & e)
74  {
75  return e.get(0);
76  }
77 
78  template<typename T>
79  static inline __device__ __host__ unsigned int get_cid(
80  const openfpm::array<ids_type,1> & numCellDiv,
81  const openfpm::array<T,1> & unitCellP2,
82  const openfpm::array<ids_type,1> & cellPadDim,
83  const transform_type & pointTransform,
84  const Point<1,T> & p)
85  {
86  return pointTransform.transform(p.get(0),0) / unitCellP2[0] + cellPadDim[0];
87  }
88 
89  template<typename T>
90  static inline __device__ __host__ unsigned int get_cid(
91  const openfpm::array<ids_type,1> & numCellDiv,
92  const openfpm::array<T,1> & unitCellP2,
93  const openfpm::array<ids_type,1> & cellPadDim,
94  const transform_type & pointTransform,
95  const T * p,
96  ids_type * e)
97  {
98  e[0] = openfpm::math::uint_floor(pointTransform.transform(p,0)/unitCellP2[0]) + cellPadDim[0];
99 
100  return e[0];
101  }
102 
103  template<typename T>
104  static inline __device__ __host__ grid_key_dx<2,ids_type> get_cid_key(
105  const openfpm::array<T,1> & unitCellP2,
106  const openfpm::array<ids_type,1> & cellPadDim,
107  const transform_type & pointTransform,
108  const Point<2,T> & p)
109  {
111 
112  e.set_d(0,openfpm::math::uint_floor(pointTransform.transform(p,0)/unitCellP2[0]) + cellPadDim[0]);
113 
114  return e;
115  }
116 
117  template <typename U = unsigned int, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
118  static inline __device__ __host__ unsigned int get_cid(
119  const openfpm::array<ids_type,1> & numCellDiv,
120  const grid_key_dx<1,unsigned int> & e)
121  {
122  return e.get(0);
123  }
124 };
125 
126 template<typename ids_type, typename transform_type>
127 struct cid_<2,ids_type,transform_type>
128 {
129  static inline __device__ __host__ unsigned int get_cid(
130  const openfpm::array<ids_type,2> & numCellDiv,
131  ids_type * e)
132  {
133  return e[0] + numCellDiv[0] * e[1];
134  }
135 
136  static inline __device__ __host__ unsigned int get_cid(
137  const openfpm::array<ids_type,2> & numCellDiv,
138  const grid_key_dx<2,ids_type> & e)
139  {
140  return e.get(0) + numCellDiv[0] * e.get(1);
141  }
142 
143  template<typename T>
144  static inline __device__ __host__ unsigned int get_cid(
145  const openfpm::array<ids_type,2> & numCellDiv,
146  const openfpm::array<T,2> & unitCellP2,
147  const openfpm::array<ids_type,2> & cellPadDim,
148  const transform_type & pointTransform,
149  const Point<2,T> & p)
150  {
151 
152  return openfpm::math::uint_floor(pointTransform.transform(p,0)/unitCellP2[0]) + cellPadDim[0] +
153  (openfpm::math::uint_floor(pointTransform.transform(p,1)/unitCellP2[1]) + cellPadDim[1])*numCellDiv[0];
154  }
155 
156  template<typename T>
157  static inline __device__ __host__ unsigned int get_cid(
158  const openfpm::array<ids_type,2> & numCellDiv,
159  const openfpm::array<T,2> & unitCellP2,
160  const openfpm::array<ids_type,2> & cellPadDim,
161  const transform_type & pointTransform,
162  const T * p,
163  ids_type * e)
164  {
165  e[0] = openfpm::math::uint_floor(pointTransform.transform(p,0)/unitCellP2[0]) + cellPadDim[0];
166  e[1] = openfpm::math::uint_floor(pointTransform.transform(p,1)/unitCellP2[1]) + cellPadDim[1];
167 
168  return e[0] + e[1]*numCellDiv[0];
169  }
170 
171  template<typename T>
172  static inline __device__ __host__ grid_key_dx<2,ids_type> get_cid_key(
173  const openfpm::array<T,2> & unitCellP2,
174  const openfpm::array<ids_type,2> & cellPadDim,
175  const transform_type & pointTransform,
176  const Point<2,T> & p)
177  {
179 
180  e.set_d(0,openfpm::math::uint_floor(pointTransform.transform(p,0)/unitCellP2[0]) + cellPadDim[0]);
181  e.set_d(1,openfpm::math::uint_floor(pointTransform.transform(p,1)/unitCellP2[1]) + cellPadDim[1]);
182 
183  return e;
184  }
185 
186  template <typename U = unsigned int, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
187  static inline __device__ __host__ unsigned int get_cid(
188  const openfpm::array<ids_type,2> & numCellDiv,
189  const grid_key_dx<2,unsigned int> & e)
190  {
191  return e.get(0) + e.get(1)*numCellDiv[0];
192  }
193 };
194 
195 
196 template<typename ids_type,typename transform_type>
197 struct cid_<3,ids_type,transform_type>
198 {
199 
200  static inline __device__ __host__ unsigned int get_cid(
201  const openfpm::array<ids_type,3> & numCellDiv,
202  const ids_type * e)
203  {
204  return e[0] + (e[1] + e[2]*numCellDiv[1])*numCellDiv[0];
205  }
206 
207  static inline __device__ __host__ unsigned int get_cid(
208  const openfpm::array<ids_type,3> & numCellDiv,
209  const grid_key_dx<3,ids_type> & e)
210  {
211  return e.get(0) + (e.get(1) + e.get(2)*numCellDiv[1])*numCellDiv[0];
212  }
213 
214  static inline __device__ __host__ unsigned int get_cid(
215  const openfpm::array<ids_type,3> & numCellDiv,
216  const openfpm::array<ids_type,3> & cellPadDim,
217  const grid_key_dx<3,ids_type> & e)
218  {
219  return (e.get(0) + cellPadDim[0]) + ((e.get(1) + cellPadDim[1]) + (e.get(2) + cellPadDim[2])*numCellDiv[1])*numCellDiv[0];
220  }
221 
222  template<typename T> static inline __device__ __host__ unsigned int get_cid(
223  const openfpm::array<ids_type,3> & numCellDiv,
224  const openfpm::array<T,3> & unitCellP2,
225  const openfpm::array<ids_type,3> & cellPadDim,
226  const transform_type & pointTransform,
227  const Point<3,T> & p)
228  {
229  return openfpm::math::uint_floor(pointTransform.transform(p,0)/unitCellP2[0]) + cellPadDim[0] +
230  (openfpm::math::uint_floor(pointTransform.transform(p,1)/unitCellP2[1]) + cellPadDim[1] +
231  (openfpm::math::uint_floor(pointTransform.transform(p,2)/unitCellP2[2]) + cellPadDim[2])*numCellDiv[1])*numCellDiv[0];
232  }
233 
234  template<typename T>
235  static inline __device__ __host__ unsigned int get_cid(
236  const openfpm::array<ids_type,3> & numCellDiv,
237  const openfpm::array<T,3> & unitCellP2,
238  const openfpm::array<ids_type,3> & cellPadDim,
239  const transform_type & pointTransform,
240  const T * p,
241  ids_type * e)
242  {
243  e[0] = openfpm::math::uint_floor(pointTransform.transform(p,0)/unitCellP2[0]) + cellPadDim[0];
244  e[1] = openfpm::math::uint_floor(pointTransform.transform(p,1)/unitCellP2[1]) + cellPadDim[1];
245  e[2] = openfpm::math::uint_floor(pointTransform.transform(p,2)/unitCellP2[2]) + cellPadDim[2];
246 
247  return e[0] + (e[1] + e[2]*numCellDiv[1])*numCellDiv[0];
248  }
249 
250  template<typename T>
251  static inline __device__ __host__ grid_key_dx<3,ids_type> get_cid_key(
252  const openfpm::array<T,3> & unitCellP2,
253  const openfpm::array<ids_type,3> & cellPadDim,
254  const transform_type & pointTransform,
255  const Point<3,T> & p)
256  {
258 
259  e.set_d(0,openfpm::math::uint_floor(pointTransform.transform(p,0)/unitCellP2[0]) + cellPadDim[0]);
260  e.set_d(1,openfpm::math::uint_floor(pointTransform.transform(p,1)/unitCellP2[1]) + cellPadDim[1]);
261  e.set_d(2,openfpm::math::uint_floor(pointTransform.transform(p,2)/unitCellP2[2]) + cellPadDim[2]);
262 
263  return e;
264  }
265 
266  template <typename U = unsigned int, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
267  static inline __device__ __host__ unsigned int get_cid(
268  const openfpm::array<ids_type,3> & numCellDiv,
269  const grid_key_dx<3,unsigned int> & e)
270  {
271  return e.get(0) + (e.get(1) + e.get(2)*numCellDiv[1])*numCellDiv[0];
272  }
273 };
274 
276 
277 #ifdef __NVCC__
278 
279 template<unsigned int dim, typename pos_type, typename ids_type, typename transform_type,
280 typename vector_pos_type, typename vector_cnt_type, typename vector_pids_type>
281 __global__ void fill_cellIndex_LocalIndex(
282  openfpm::array<ids_type,dim> numCellDiv,
283  openfpm::array<pos_type,dim> unitCellP2,
284  openfpm::array<ids_type,dim> cellPadDim,
285  transform_type pointTransform,
286  size_t n_part,
287  size_t start,
288  vector_pos_type vPos,
289  vector_cnt_type numPartInCell,
290  vector_pids_type cellIndex_LocalIndex)
291 {
292  unsigned int i, cid, ins;
293  ids_type e[dim+1];
294 
295  i = threadIdx.x + blockIdx.x * blockDim.x + start;
296  ins = threadIdx.x + blockIdx.x * blockDim.x;
297  if (i >= n_part) return;
298 
299  pos_type p[dim];
300 
301  for (size_t k = 0 ; k < dim ; k++)
302  p[k] = vPos.template get<0>(i)[k];
303 
304  cid = cid_<dim,ids_type,transform_type>::get_cid(numCellDiv,unitCellP2,cellPadDim,pointTransform,p,e);
305 
306  e[dim] = atomicAdd(&numPartInCell.template get<0>(cid), 1);
307  cellIndex_LocalIndex.template get<0>(ins)[0] = cid;
308  cellIndex_LocalIndex.template get<0>(ins)[1] = e[dim];
309 }
310 
311 template<unsigned int dim, typename pos_type, typename ids_type, typename transform_type,
312 typename vector_pos_type, typename vector_cnt_type>
313 __global__ void fill_cellIndex(
314  openfpm::array<ids_type,dim> numCellDiv,
315  openfpm::array<pos_type,dim> unitCellP2,
316  openfpm::array<ids_type,dim> cellPadDim,
317  transform_type pointTransform,
318  size_t n_part,
319  size_t start,
320  vector_pos_type vPos,
321  vector_cnt_type cellIndex)
322 {
323  int i = threadIdx.x + blockIdx.x * blockDim.x + start;
324  int ins = threadIdx.x + blockIdx.x * blockDim.x;
325  if (i >= n_part) return;
326 
328 
329  for (size_t k = 0 ; k < dim ; k++)
330  p[k] = vPos.template get<0>(i)[k];
331 
332  cellIndex.template get<0>(ins) = cid_<dim,ids_type,transform_type>::get_cid(numCellDiv,unitCellP2,cellPadDim,pointTransform,p);
333 }
334 
335 template<typename vector_sparse, typename vector_cell>
336 __global__ void fill_vsCellIndex_PartIndex(
337  vector_sparse vecSparseCellIndex_PartIndex,
338  vector_cell cellIndex)
339 {
340  vecSparseCellIndex_PartIndex.init();
341 
342  int p = blockIdx.x*blockDim.x + threadIdx.x;
343 
344  if (p < cellIndex.size())
345  {
346  int c = cellIndex.template get<0>(p);
347  vecSparseCellIndex_PartIndex.template insert<0>(c) = p;
348  }
349 
350  vecSparseCellIndex_PartIndex.flush_block_insert();
351 }
352 
353 template<typename vector_starts_type, typename vector_pids_type, typename vector_cells_type>
354 __global__ void fill_cells(
355  vector_starts_type numPartInCellPrefixSum,
356  vector_pids_type cellIndex_LocalIndex,
357  vector_cells_type cellIndexLocalIndexToUnsorted,
358  size_t startParticle=0)
359 {
360  unsigned int cid, id, cellStart;
361 
362  int tid = threadIdx.x + blockIdx.x * blockDim.x;
363  if (tid >= cellIndex_LocalIndex.size()) return;
364 
365  cid = cellIndex_LocalIndex.template get<0>(tid)[0];
366 
367  cellStart = numPartInCellPrefixSum.template get<0>(cid);
368  id = cellStart + cellIndex_LocalIndex.template get<0>(tid)[1];
369 
370  cellIndexLocalIndexToUnsorted.template get<0>(id) = tid + startParticle;
371 }
372 
373 template <typename vector_map_type, typename vector_cells_type>
374 __global__ void constructSortUnsortBidirectMap(
375  vector_map_type sortedToUnsortedIndex,
376  vector_map_type unsortedToSortedIndex,
377  const vector_cells_type cellIndexLocalIndexToUnsorted)
378 {
379  unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x;
380  if (tid >= sortedToUnsortedIndex.size()) {return;}
381 
382  unsigned int pid = cellIndexLocalIndexToUnsorted.template get<0>(tid);
383 
384  sortedToUnsortedIndex.template get<0>(tid) = pid;
385  unsortedToSortedIndex.template get<0>(pid) = tid;
386 }
387 
388 template <typename vector_type, typename vector_map_type>
389 __global__ void reorderParticlesPos(
390  const vector_type vectorIn,
391  vector_type vectorOut,
392  const vector_map_type indexMap,
393  size_t start = 0)
394 {
395  int keyIn = start + threadIdx.x + blockIdx.x * blockDim.x;
396  if (keyIn >= indexMap.size()) {return;}
397 
398  unsigned int keyOut = indexMap.template get<0>(keyIn);
399 
400  vectorOut.set(keyOut,vectorIn,keyIn);
401 }
402 
403 template <typename vector_type, typename vector_map_type, unsigned int ... prp>
404 __global__ void reorderParticlesPrp(
405  const vector_type vectorIn,
406  vector_type vectorOut,
407  vector_map_type indexMap,
408  size_t start = 0)
409 {
410  int keyIn = start + threadIdx.x + blockIdx.x * blockDim.x;
411  if (keyIn >= indexMap.size()) {return;}
412 
413  unsigned int keyOut = indexMap.template get<0>(keyIn);
414 
415  vectorOut.template set<prp ...>(keyOut,vectorIn,keyIn);
416 }
417 
418 template<typename vector_sort_index, typename vector_out_type>
419 __global__ void mark_domain_particles(
420  vector_sort_index sortedToUnsortedIndex,
421  vector_out_type isSortedDomainOrGhost,
422  size_t ghostMarker)
423 {
424  int i = threadIdx.x + blockIdx.x * blockDim.x;
425 
426  if (i >= sortedToUnsortedIndex.size()) return;
427 
428  isSortedDomainOrGhost.template get<0>(i) = (sortedToUnsortedIndex.template get<0>(i) < ghostMarker)?1:0;
429 }
430 
431 template<typename scan_type, typename vector_out_type>
432 __global__ void collect_domain_ghost_ids(
433  scan_type isUnsortedDomainOrGhostPrefixSum,
434  vector_out_type sortedToSortedIndexNoGhost)
435 {
436  int i = threadIdx.x + blockIdx.x * blockDim.x;
437 
438  if (i >= isUnsortedDomainOrGhostPrefixSum.size()-1) return;
439 
440  auto pp = isUnsortedDomainOrGhostPrefixSum.template get<0>(i+1);
441  auto p = isUnsortedDomainOrGhostPrefixSum.template get<0>(i);
442 
443  if (pp != p)
444  sortedToSortedIndexNoGhost.template get<0>(isUnsortedDomainOrGhostPrefixSum.template get<0>(i)) = i;
445 }
446 
447 template<typename cl_sparse_type, typename vector_type, typename vector_type2>
448 __global__ void countNonEmptyNeighborCells(
449  cl_sparse_type vecSparseCellIndex_PartIndex,
450  vector_type neighborCellCount,
451  vector_type2 neighborCellOffset)
452 {
453  typedef typename cl_sparse_type::index_type index_type;
454 
455  int tid = threadIdx.x + blockIdx.x * blockDim.x;
456  if (tid >= vecSparseCellIndex_PartIndex.size()) {return;}
457 
459  id.id = tid;
460 
461  index_type cell = vecSparseCellIndex_PartIndex.get_index(id);
462 
463  for (int i = 0 ; i < neighborCellOffset.size() ; i++)
464  {
465  index_type neighborCellIndex = cell + neighborCellOffset.template get<0>(i);
466  index_type start = vecSparseCellIndex_PartIndex.template get<0>(neighborCellIndex);
467 
468  if (start != (index_type)-1)
469  {
470  // Cell exist
471  neighborCellCount.template get<0>(tid) += 1;
472  }
473  }
474 };
475 
476 template<typename cl_sparse_type, typename vector_type, typename vector_type2, typename vector_type3>
477 __global__ void fillNeighborCellList(
478  cl_sparse_type vecSparseCellIndex_PartIndex,
479  vector_type neighborCellCountPrefixSum,
480  vector_type2 neighborCellOffset,
481  vector_type3 neighborPartIndexFrom_To,
482  typename cl_sparse_type::index_type stop)
483 {
484  typedef typename cl_sparse_type::index_type index_type;
485 
486  int tid = threadIdx.x + blockIdx.x * blockDim.x;
487  if (tid >= vecSparseCellIndex_PartIndex.size()) {return;}
488 
489  openfpm::sparse_index<index_type> id; id.id = tid;
490  index_type cell = vecSparseCellIndex_PartIndex.get_index(id);
491 
492  for (int i = 0, cnt = 0; i < neighborCellOffset.size(); i++)
493  {
494  index_type neighborCellIndex = cell + neighborCellOffset.template get<0>(i);
495  auto sid = vecSparseCellIndex_PartIndex.get_sparse(neighborCellIndex);
496 
497  if (sid.id != vecSparseCellIndex_PartIndex.size())
498  {
499  neighborPartIndexFrom_To.template get<0>(neighborCellCountPrefixSum.template get<0>(tid) + cnt) = vecSparseCellIndex_PartIndex.template get<0>(sid);
500 
501  if (++sid.id != vecSparseCellIndex_PartIndex.size())
502  neighborPartIndexFrom_To.template get<1>(neighborCellCountPrefixSum.template get<0>(tid) + cnt) = vecSparseCellIndex_PartIndex.template get<0>(sid);
503  else
504  neighborPartIndexFrom_To.template get<1>(neighborCellCountPrefixSum.template get<0>(tid) + cnt) = stop;
505 
506  ++cnt;
507  }
508  }
509 };
510 
511 
513 
514 #endif
515 
516 #endif /* OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_ */
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:28
grid_key_dx is the key to access any element in the grid
Definition: grid_key.hpp:19
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition: grid_key.hpp:516
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition: grid_key.hpp:503
Distributed vector.