OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
Cuda_cell_list_util_func.hpp
1 /*
2  * Cuda_cell_list_util_func.hpp
3  *
4  * Created on: Jun 17, 2018
5  * Author: i-bird
6  */
7 
8 #ifndef OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_
9 #define OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_
10 
11 #include <boost/integer/integer_mask.hpp>
12 #include <Vector/map_vector_sparse.hpp>
13 
14 template<unsigned int dim, typename cnt_type, typename ids_type, typename transform>
15 struct cid_
16 {
17  static inline __device__ __host__ cnt_type get_cid(openfpm::array<ids_type,1,cnt_type> & div_c , ids_type * e)
18  {
19  cnt_type id = e[dim-1];
20 
21 #pragma unroll
22  for (int i = 1; i >= 0 ; i-- )
23  {id = e[i] + div_c[i]*id;}
24 
25  return id;
26  }
27 
28  static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,dim,cnt_type> & div_c , const grid_key_dx<1,cnt_type> & e)
29  {
30  cnt_type id = e.get(dim-1);
31 
32 #pragma unroll
33  for (int i = 1; i >= 0 ; i-- )
34  {id = e.get(i) + div_c[i]*id;}
35 
36  return id;
37  }
38 
39  template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,dim,cnt_type> & div_c,
40  const openfpm::array<T,dim,cnt_type> & spacing,
41  const transform & t,
42  const Point<dim,T> & p)
43  {
44  cnt_type id = p.get(dim-1) / spacing[dim-1];
45 
46 #pragma unroll
47  for (int i = 1; i >= 0 ; i-- )
48  {id = t.transform(p.get(i),i) / spacing[i] + div_c[i]*id;}
49 
50  return id;
51  }
52 };
53 
54 template<typename cnt_type, typename ids_type, typename transform>
55 struct cid_<1,cnt_type,ids_type, transform>
56 {
57  static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,1,cnt_type> & div_c, ids_type * e)
58  {
59  return e[0];
60  }
61 
62  static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,1,cnt_type> & div_c, const grid_key_dx<1,ids_type> & e)
63  {
64  return e.get(0);
65  }
66 
67  template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,1,cnt_type> & div_c,
68  const openfpm::array<T,1,cnt_type> & spacing,
70  const transform & t,
71  const Point<1,T> & p)
72  {
73  return t.transform(p.get(0),0) / spacing[0] + off[0];
74  }
75 
76  template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,1,cnt_type> & div_c,
77  const openfpm::array<T,1,cnt_type> & spacing,
79  const transform & t,
80  const T * p,
81  ids_type * e)
82  {
83  e[0] = openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0];
84 
85  return e[0];
86  }
87 
88  template<typename T>
89  static inline __device__ __host__ grid_key_dx<2,ids_type> get_cid_key(const openfpm::array<T,1,cnt_type> & spacing,
91  const transform & t,
92  const Point<2,T> & p)
93  {
95 
96  e.set_d(0,openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0]);
97 
98  return e;
99  }
100 
101  template <typename U = cnt_type, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
102  static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,1,cnt_type> & div_c,
103  const grid_key_dx<1,cnt_type> & e)
104  {
105  return e.get(0);
106  }
107 };
108 
109 template<typename cnt_type, typename ids_type, typename transform>
110 struct cid_<2,cnt_type,ids_type,transform>
111 {
112  static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c, ids_type * e)
113  {
114  return e[0] + div_c[0] * e[1];
115  }
116 
117  static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c, const grid_key_dx<2,ids_type> & e)
118  {
119  return e.get(0) + div_c[0] * e.get(1);
120  }
121 
122  template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c,
123  const openfpm::array<T,2,cnt_type> & spacing,
125  const transform & t,
126  const Point<2,T> & p)
127  {
128 
129  return openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0] +
130  (openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1])*div_c[0];
131  }
132 
133  template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c,
134  const openfpm::array<T,2,cnt_type> & spacing,
136  const transform & t,
137  const T * p,
138  ids_type * e)
139  {
140  e[0] = openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0];
141  e[1] = openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1];
142 
143  return e[0] + e[1]*div_c[0];
144  }
145 
146  template<typename T>
147  static inline __device__ __host__ grid_key_dx<2,ids_type> get_cid_key(const openfpm::array<T,2,cnt_type> & spacing,
149  const transform & t,
150  const Point<2,T> & p)
151  {
153 
154  e.set_d(0,openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0]);
155  e.set_d(1,openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1]);
156 
157  return e;
158  }
159 
160  template <typename U = cnt_type, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
161  static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c,
162  const grid_key_dx<2,cnt_type> & e)
163  {
164  return e.get(0) + e.get(1)*div_c[0];
165  }
166 };
167 
168 
169 template<typename cnt_type, typename ids_type,typename transform>
170 struct cid_<3,cnt_type,ids_type,transform>
171 {
172 
173  static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
174  const ids_type * e)
175  {
176  return e[0] + (e[1] + e[2]*div_c[1])*div_c[0];
177  }
178 
179  static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
180  const grid_key_dx<3,ids_type> & e)
181  {
182  return e.get(0) + (e.get(1) + e.get(2)*div_c[1])*div_c[0];
183  }
184 
185  static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
187  const grid_key_dx<3,ids_type> & e)
188  {
189  return (e.get(0) + off[0]) + ((e.get(1) + off[1]) + (e.get(2) + off[2])*div_c[1])*div_c[0];
190  }
191 
192  template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
193  const openfpm::array<T,3,cnt_type> & spacing,
195  const transform & t,
196  const Point<3,T> & p)
197  {
198 
199  return openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0] +
200  (openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1] +
201  (openfpm::math::uint_floor(t.transform(p,2)/spacing[2]) + off[2])*div_c[1])*div_c[0];
202  }
203 
204  template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
205  const openfpm::array<T,3,cnt_type> & spacing,
207  const transform & t,
208  const T * p,
209  ids_type * e)
210  {
211  e[0] = openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0];
212  e[1] = openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1];
213  e[2] = openfpm::math::uint_floor(t.transform(p,2)/spacing[2]) + off[2];
214 
215  return e[0] + (e[1] + e[2]*div_c[1])*div_c[0];
216  }
217 
218  template<typename T>
219  static inline __device__ __host__ grid_key_dx<3,ids_type> get_cid_key(const openfpm::array<T,3,cnt_type> & spacing,
221  const transform & t,
222  const Point<3,T> & p)
223  {
225 
226  e.set_d(0,openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0]);
227  e.set_d(1,openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1]);
228  e.set_d(2,openfpm::math::uint_floor(t.transform(p,2)/spacing[2]) + off[2]);
229 
230  return e;
231  }
232 
233  template <typename U = cnt_type, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
234  static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
235  const grid_key_dx<3,cnt_type> & e)
236  {
237  return e.get(0) + (e.get(1) + e.get(2)*div_c[1])*div_c[0];
238  }
239 };
240 
241 template<unsigned int bit_phases, typename cnt_type>
242 struct shift_ph
243 {
244  typedef boost::mpl::int_<sizeof(cnt_type) - bit_phases> type;
245  typedef boost::low_bits_mask_t<sizeof(size_t)*8-bit_phases> mask_low;
246 };
247 
248 
249 template<typename cnt_type, typename ph>
250 __device__ __host__ cnt_type encode_phase_id(cnt_type ph_id,cnt_type pid)
251 {
252  return pid + (ph_id << ph::type::value);
253 }
254 
255 
257 
258 #ifdef __NVCC__
259 
260 template<bool is_sparse,unsigned int dim, typename pos_type,
261  typename cnt_type, typename ids_type, typename transform>
262 __global__ void subindex(openfpm::array<ids_type,dim,cnt_type> div,
265  transform t,
266  int n_cap,
267  int n_part,
268  int n_cap2,
269  cnt_type start,
270  pos_type * p_pos,
271  cnt_type *counts,
272  cnt_type * p_ids)
273 {
274  cnt_type i, cid, ins;
275  ids_type e[dim+1];
276 
277  i = threadIdx.x + blockIdx.x * blockDim.x + start;
278  ins = threadIdx.x + blockIdx.x * blockDim.x;
279  if (i >= n_part) return;
280 
281  pos_type p[dim];
282 
283  for (size_t k = 0 ; k < dim ; k++)
284  {p[k] = p_pos[i+k*n_cap];}
285 
286  cid = cid_<dim,cnt_type,ids_type,transform>::get_cid(div,spacing,off,t,p,e);
287 
288  if (is_sparse == false)
289  {
290  e[dim] = atomicAdd(counts + cid, 1);
291 
292  p_ids[ins] = cid;
293  {p_ids[ins+1*(n_cap2)] = e[dim];}
294  }
295  else
296  {
297  p_ids[ins] = cid;
298  {p_ids[ins+1*(n_cap2)] = e[dim];}
299 
300  counts[ins] = cid;
301  }
302 }
303 
304 template<typename vector_sparse, typename vector_cell>
305 __global__ void fill_cells_sparse(vector_sparse vs, vector_cell vc)
306 {
307  vs.init();
308 
309  int p = blockIdx.x*blockDim.x + threadIdx.x;
310 
311  int c = 0;
312  if (p < vc.size())
313  {
314  c = vc.template get<0>(p);
315  vs.template insert<0>(c) = p;
316  }
317 
318  vs.flush_block_insert();
319 }
320 
321 
322 template<unsigned int dim, typename pos_type, typename cnt_type, typename ids_type, typename transform>
323 __global__ void subindex_without_count(openfpm::array<ids_type,dim,cnt_type> div,
326  transform t,
327  int n_cap,
328  int n_part,
329  int n_cap2,
330  pos_type * p_pos,
331  cnt_type *counts,
332  cnt_type * p_ids)
333 {
334  cnt_type i, cid;
335  ids_type e[dim+1];
336 
337  i = threadIdx.x + blockIdx.x * blockDim.x;
338  if (i >= n_part) return;
339 
340  pos_type p[dim];
341 
342  for (size_t k = 0 ; k < dim ; k++)
343  {p[k] = p_pos[i+k*n_cap];}
344 
345  cid = cid_<dim,cnt_type,ids_type,transform>::get_cid(div,spacing,off,t,p,e);
346 
347  e[dim] = atomicAdd(counts + cid, 1);
348 
349  p_ids[i] = cid;
350  p_ids[i+1*(n_cap2)] = e[dim];
351 }
352 
353 
354 #ifdef MAKE_CELLLIST_DETERMINISTIC
355 
356 template<unsigned int dim, typename cnt_type, typename ids_type, typename ph>
357 __global__ void fill_cells(cnt_type phase_id ,
358  cnt_type n,
359  cnt_type *cells)
360 {
361  cnt_type i;
362 
363  i = threadIdx.x + blockIdx.x * blockDim.x;
364  if (i >= n) return;
365 
366  cells[i] = encode_phase_id<cnt_type,ph>(phase_id,i);
367 }
368 
369 #else
370 
371 template<unsigned int dim, typename cnt_type, typename ids_type, typename ph>
372 __global__ void fill_cells(cnt_type phase_id ,
375  cnt_type n,
376  cnt_type n_cap,
377  cnt_type start_p,
378  const cnt_type *starts,
379  const cnt_type * p_ids,
380  cnt_type *cells)
381 {
382  cnt_type i, cid, id, start;
383 
384  i = threadIdx.x + blockIdx.x * blockDim.x;
385  if (i >= n) return;
386 
387  cid = p_ids[i];
388 
389  start = starts[cid];
390  id = start + p_ids[1*n_cap+i];
391 
392  cells[id] = encode_phase_id<cnt_type,ph>(phase_id,i + start_p);
393 }
394 
395 #endif
396 
397 
398 
399 
400 
401 template<typename cnt_type, typename ph>
402 __device__ inline void phase_and_id(cnt_type c, cnt_type *ph_id, cnt_type *pid)
403 {
404  *pid = c & ph::mask_low::value;
405  *ph_id = c << ph::type::value;
406 }
407 
408 
409 template <typename vector_prp , typename cnt_type>
410 __device__ inline void reorder(const vector_prp & input,
411  vector_prp & output,
412  cnt_type src_id,
413  cnt_type dst_id)
414 {
415  output.set(dst_id,input,src_id);
416 }
417 
418 template <typename vector_prp , typename cnt_type, unsigned int ... prp>
419 __device__ inline void reorder_wprp(const vector_prp & input,
420  vector_prp & output,
421  cnt_type src_id,
422  cnt_type dst_id)
423 {
424  output.template set<prp ...>(dst_id,input,src_id);
425 }
426 
427 template <typename vector_prp, typename vector_pos, typename vector_ns, typename cnt_type, typename sh>
428 __global__ void reorder_parts(int n,
429  const vector_prp input,
430  vector_prp output,
431  const vector_pos input_pos,
432  vector_pos output_pos,
433  vector_ns sorted_non_sorted,
434  vector_ns non_sorted_to_sorted,
435  const cnt_type * cells)
436 {
437  cnt_type i = threadIdx.x + blockIdx.x * blockDim.x;
438  if (i >= n) return;
439 
440  cnt_type code = cells[i];
441 
442  reorder(input, output, code,i);
443  reorder(input_pos,output_pos,code,i);
444 
445  sorted_non_sorted.template get<0>(i) = code;
446  non_sorted_to_sorted.template get<0>(code) = i;
447 }
448 
449 template <typename vector_prp, typename vector_pos, typename vector_ns, typename cnt_type, typename sh, unsigned int ... prp>
450 __global__ void reorder_parts_wprp(int n,
451  const vector_prp input,
452  vector_prp output,
453  const vector_pos input_pos,
454  vector_pos output_pos,
455  vector_ns sorted_non_sorted,
456  vector_ns non_sorted_to_sorted,
457  const cnt_type * cells)
458 {
459  cnt_type i = threadIdx.x + blockIdx.x * blockDim.x;
460  if (i >= n) return;
461 
462  cnt_type code = cells[i];
463  reorder_wprp<vector_prp,cnt_type,prp...>(input, output, code,i);
464  reorder(input_pos,output_pos,code,i);
465 
466  sorted_non_sorted.template get<0>(i) = code;
467  non_sorted_to_sorted.template get<0>(code) = i;
468 }
469 
470 template<typename vector_sort_index, typename vector_out_type>
471 __global__ void mark_domain_particles(vector_sort_index vsi, vector_out_type vout_dg, int g_m)
472 {
473  int i = threadIdx.x + blockIdx.x * blockDim.x;
474 
475  if (i >= vsi.size()) return;
476 
477  vout_dg.template get<0>(i) = (vsi.template get<0>(i) < g_m)?1:0;
478 }
479 
480 template<typename scan_type, typename vector_out_type>
481 __global__ void collect_domain_ghost_ids(scan_type scan, vector_out_type vout_ids)
482 {
483  int i = threadIdx.x + blockIdx.x * blockDim.x;
484 
485  if (i >= scan.size()-1) return;
486 
487  auto pp = scan.template get<0>(i+1);
488  auto p = scan.template get<0>(i);
489 
490  if (pp != p)
491  {vout_ids.template get<0>(scan.template get<0>(i)) = i;}
492 }
493 
494 template<typename cl_sparse_type, typename vector_type, typename vector_type2>
495 __global__ void count_nn_cells(cl_sparse_type cl_sparse, vector_type output, vector_type2 nn_to_test)
496 {
497  typedef typename cl_sparse_type::index_type index_type;
498 
499  int tid = threadIdx.x + blockIdx.x * blockDim.x;
501  id.id = tid;
502 
503  if (tid >= cl_sparse.size()) {return;}
504 
505  index_type cell = cl_sparse.get_index(id);
506 
507  for (int i = 0 ; i < nn_to_test.size() ; i++)
508  {
509  index_type cell_n = cell + nn_to_test.template get<0>(i);
510 
511  index_type start = cl_sparse.template get<0>(cell_n);
512 
513  if (start != (index_type)-1)
514  {
515  // Cell exist
516  output.template get<0>(tid) += 1;
517  }
518  }
519 };
520 
521 template<typename cl_sparse_type, typename vector_type, typename vector_type2, typename vector_type3>
522 __global__ void fill_nn_cells(cl_sparse_type cl_sparse, vector_type starts, vector_type2 nn_to_test, vector_type3 output, typename cl_sparse_type::index_type max_stop)
523 {
524  typedef typename cl_sparse_type::index_type index_type;
525 
526  int tid = threadIdx.x + blockIdx.x * blockDim.x;
528  id.id = tid;
529 
530  if (tid >= cl_sparse.size()) {return;}
531 
532  index_type cell = cl_sparse.get_index(id);
533 
534  int cnt = 0;
535  for (int i = 0 ; i < nn_to_test.size() ; i++)
536  {
537  index_type cell_n = cell + nn_to_test.template get<0>(i);
538 
539  auto sid = cl_sparse.get_sparse(cell_n);
540 
541  if (sid.id != (index_type)cl_sparse.size())
542  {
543  index_type start = cl_sparse.template get<0>(sid);
544  // Cell exist
545  output.template get<0>(starts.template get<0>(tid) + cnt) = start;
546 
547  if (sid.id == cl_sparse.size() - 1)
548  {output.template get<1>(starts.template get<0>(tid) + cnt) = max_stop;}
549  else
550  {
551  decltype(sid) sid_t;
552  sid_t.id = sid.id+1;
553  output.template get<1>(starts.template get<0>(tid) + cnt) = cl_sparse.template get<0>(sid_t);
554  }
555  ++cnt;
556  }
557  }
558 };
559 
560 
562 
563 #endif
564 
565 #endif /* OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_ */
grid_key_dx is the key to access any element in the grid
Definition: grid_key.hpp:18
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition: grid_key.hpp:503
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:27
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
Distributed vector.
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition: grid_key.hpp:516