OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
Cuda_cell_list_util_func.hpp
1/*
2 * Cuda_cell_list_util_func.hpp
3 *
4 * Created on: Jun 17, 2018
5 * Author: i-bird
6 */
7
8#ifndef OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_
9#define OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_
10
11#include <boost/integer/integer_mask.hpp>
12#include <Vector/map_vector_sparse.hpp>
13
14template<unsigned int dim, typename cnt_type, typename ids_type, typename transform>
15struct cid_
16{
17 static inline __device__ __host__ cnt_type get_cid(openfpm::array<ids_type,1,cnt_type> & div_c , ids_type * e)
18 {
19 cnt_type id = e[dim-1];
20
21#pragma unroll
22 for (int i = 1; i >= 0 ; i-- )
23 {id = e[i] + div_c[i]*id;}
24
25 return id;
26 }
27
28 static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,dim,cnt_type> & div_c , const grid_key_dx<1,cnt_type> & e)
29 {
30 cnt_type id = e.get(dim-1);
31
32#pragma unroll
33 for (int i = 1; i >= 0 ; i-- )
34 {id = e.get(i) + div_c[i]*id;}
35
36 return id;
37 }
38
39 template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,dim,cnt_type> & div_c,
40 const openfpm::array<T,dim,cnt_type> & spacing,
41 const transform & t,
42 const Point<dim,T> & p)
43 {
44 cnt_type id = p.get(dim-1) / spacing[dim-1];
45
46#pragma unroll
47 for (int i = 1; i >= 0 ; i-- )
48 {id = t.transform(p.get(i),i) / spacing[i] + div_c[i]*id;}
49
50 return id;
51 }
52};
53
54template<typename cnt_type, typename ids_type, typename transform>
55struct cid_<1,cnt_type,ids_type, transform>
56{
57 static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,1,cnt_type> & div_c, ids_type * e)
58 {
59 return e[0];
60 }
61
62 static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,1,cnt_type> & div_c, const grid_key_dx<1,ids_type> & e)
63 {
64 return e.get(0);
65 }
66
67 template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,1,cnt_type> & div_c,
68 const openfpm::array<T,1,cnt_type> & spacing,
70 const transform & t,
71 const Point<1,T> & p)
72 {
73 return t.transform(p.get(0),0) / spacing[0] + off[0];
74 }
75
76 template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,1,cnt_type> & div_c,
77 const openfpm::array<T,1,cnt_type> & spacing,
79 const transform & t,
80 const T * p,
81 ids_type * e)
82 {
83 e[0] = openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0];
84
85 return e[0];
86 }
87
88 template<typename T>
89 static inline __device__ __host__ grid_key_dx<2,ids_type> get_cid_key(const openfpm::array<T,1,cnt_type> & spacing,
91 const transform & t,
92 const Point<2,T> & p)
93 {
95
96 e.set_d(0,openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0]);
97
98 return e;
99 }
100
101 template <typename U = cnt_type, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
102 static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,1,cnt_type> & div_c,
103 const grid_key_dx<1,cnt_type> & e)
104 {
105 return e.get(0);
106 }
107};
108
109template<typename cnt_type, typename ids_type, typename transform>
110struct cid_<2,cnt_type,ids_type,transform>
111{
112 static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c, ids_type * e)
113 {
114 return e[0] + div_c[0] * e[1];
115 }
116
117 static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c, const grid_key_dx<2,ids_type> & e)
118 {
119 return e.get(0) + div_c[0] * e.get(1);
120 }
121
122 template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c,
123 const openfpm::array<T,2,cnt_type> & spacing,
125 const transform & t,
126 const Point<2,T> & p)
127 {
128
129 return openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0] +
130 (openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1])*div_c[0];
131 }
132
133 template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c,
134 const openfpm::array<T,2,cnt_type> & spacing,
136 const transform & t,
137 const T * p,
138 ids_type * e)
139 {
140 e[0] = openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0];
141 e[1] = openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1];
142
143 return e[0] + e[1]*div_c[0];
144 }
145
146 template<typename T>
147 static inline __device__ __host__ grid_key_dx<2,ids_type> get_cid_key(const openfpm::array<T,2,cnt_type> & spacing,
149 const transform & t,
150 const Point<2,T> & p)
151 {
153
154 e.set_d(0,openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0]);
155 e.set_d(1,openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1]);
156
157 return e;
158 }
159
160 template <typename U = cnt_type, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
161 static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c,
162 const grid_key_dx<2,cnt_type> & e)
163 {
164 return e.get(0) + e.get(1)*div_c[0];
165 }
166};
167
168
169template<typename cnt_type, typename ids_type,typename transform>
170struct cid_<3,cnt_type,ids_type,transform>
171{
172
173 static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
174 const ids_type * e)
175 {
176 return e[0] + (e[1] + e[2]*div_c[1])*div_c[0];
177 }
178
179 static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
180 const grid_key_dx<3,ids_type> & e)
181 {
182 return e.get(0) + (e.get(1) + e.get(2)*div_c[1])*div_c[0];
183 }
184
185 static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
187 const grid_key_dx<3,ids_type> & e)
188 {
189 return (e.get(0) + off[0]) + ((e.get(1) + off[1]) + (e.get(2) + off[2])*div_c[1])*div_c[0];
190 }
191
192 template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
193 const openfpm::array<T,3,cnt_type> & spacing,
195 const transform & t,
196 const Point<3,T> & p)
197 {
198
199 return openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0] +
200 (openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1] +
201 (openfpm::math::uint_floor(t.transform(p,2)/spacing[2]) + off[2])*div_c[1])*div_c[0];
202 }
203
204 template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
205 const openfpm::array<T,3,cnt_type> & spacing,
207 const transform & t,
208 const T * p,
209 ids_type * e)
210 {
211 e[0] = openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0];
212 e[1] = openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1];
213 e[2] = openfpm::math::uint_floor(t.transform(p,2)/spacing[2]) + off[2];
214
215 return e[0] + (e[1] + e[2]*div_c[1])*div_c[0];
216 }
217
218 template<typename T>
219 static inline __device__ __host__ grid_key_dx<3,ids_type> get_cid_key(const openfpm::array<T,3,cnt_type> & spacing,
221 const transform & t,
222 const Point<3,T> & p)
223 {
225
226 e.set_d(0,openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0]);
227 e.set_d(1,openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1]);
228 e.set_d(2,openfpm::math::uint_floor(t.transform(p,2)/spacing[2]) + off[2]);
229
230 return e;
231 }
232
233 template <typename U = cnt_type, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
234 static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
235 const grid_key_dx<3,cnt_type> & e)
236 {
237 return e.get(0) + (e.get(1) + e.get(2)*div_c[1])*div_c[0];
238 }
239};
240
241template<unsigned int bit_phases, typename cnt_type>
243{
244 typedef boost::mpl::int_<sizeof(cnt_type) - bit_phases> type;
245 typedef boost::low_bits_mask_t<sizeof(size_t)*8-bit_phases> mask_low;
246};
247
248
249template<typename cnt_type, typename ph>
250__device__ __host__ cnt_type encode_phase_id(cnt_type ph_id,cnt_type pid)
251{
252 return pid + (ph_id << ph::type::value);
253}
254
255
257
258#ifdef __NVCC__
259
260template<bool is_sparse,unsigned int dim, typename pos_type,
261 typename cnt_type, typename ids_type, typename transform,
262 typename vector_pos_type, typename vector_cnt_type, typename vector_pids_type>
263__global__ void subindex(openfpm::array<ids_type,dim,cnt_type> div,
266 transform t,
267 int n_part,
268 cnt_type start,
269 vector_pos_type p_pos,
270 vector_cnt_type counts,
271 vector_pids_type p_ids)
272{
273 cnt_type i, cid, ins;
274 ids_type e[dim+1];
275
276 i = threadIdx.x + blockIdx.x * blockDim.x + start;
277 ins = threadIdx.x + blockIdx.x * blockDim.x;
278 if (i >= n_part) return;
279
280 pos_type p[dim];
281
282 for (size_t k = 0 ; k < dim ; k++)
283 {p[k] = p_pos.template get<0>(i)[k];}
284
285 cid = cid_<dim,cnt_type,ids_type,transform>::get_cid(div,spacing,off,t,p,e);
286
287 if (is_sparse == false)
288 {
289 e[dim] = atomicAdd(&counts.template get<0>(cid), 1);
290
291 p_ids.template get<0>(ins)[0] = cid;
292 {p_ids.template get<0>(ins)[1] = e[dim];}
293 }
294 else
295 {
296 p_ids.template get<0>(ins)[0] = cid;
297 {p_ids.template get<0>(ins)[1] = e[dim];}
298
299 counts.template get<0>(ins) = cid;
300 }
301}
302
303template<typename vector_sparse, typename vector_cell>
304__global__ void fill_cells_sparse(vector_sparse vs, vector_cell vc)
305{
306 vs.init();
307
308 int p = blockIdx.x*blockDim.x + threadIdx.x;
309
310 int c = 0;
311 if (p < vc.size())
312 {
313 c = vc.template get<0>(p);
314 vs.template insert<0>(c) = p;
315 }
316
317 vs.flush_block_insert();
318}
319
320
321template<unsigned int dim, typename pos_type, typename cnt_type, typename ids_type, typename transform>
322__global__ void subindex_without_count(openfpm::array<ids_type,dim,cnt_type> div,
325 transform t,
326 int n_cap,
327 int n_part,
328 int n_cap2,
329 pos_type * p_pos,
330 cnt_type *counts,
331 cnt_type * p_ids)
332{
333 cnt_type i, cid;
334 ids_type e[dim+1];
335
336 i = threadIdx.x + blockIdx.x * blockDim.x;
337 if (i >= n_part) return;
338
339 pos_type p[dim];
340
341 for (size_t k = 0 ; k < dim ; k++)
342 {p[k] = p_pos[i+k*n_cap];}
343
344 cid = cid_<dim,cnt_type,ids_type,transform>::get_cid(div,spacing,off,t,p,e);
345
346 e[dim] = atomicAdd(counts + cid, 1);
347
348 p_ids[i] = cid;
349 p_ids[i+1*(n_cap2)] = e[dim];
350}
351
352
353#ifdef MAKE_CELLLIST_DETERMINISTIC
354
355template<unsigned int dim, typename cnt_type, typename ids_type, typename ph, typename vector_cells_type>
356__global__ void fill_cells(cnt_type phase_id ,
357 cnt_type n,
358 vector_cells_type cells)
359{
360 cnt_type i;
361
362 i = threadIdx.x + blockIdx.x * blockDim.x;
363 if (i >= n) return;
364
365 cells.template get<0>(i) = encode_phase_id<cnt_type,ph>(phase_id,i);
366}
367
368#else
369
370template<unsigned int dim, typename cnt_type, typename ids_type, typename ph,typename vector_starts_type, typename vector_pids_type, typename vector_cells_type>
371__global__ void fill_cells(cnt_type phase_id ,
374 cnt_type n,
375 cnt_type start_p,
376 vector_starts_type starts,
377 vector_pids_type p_ids,
378 vector_cells_type cells)
379{
380 cnt_type i, cid, id, start;
381
382 i = threadIdx.x + blockIdx.x * blockDim.x;
383 if (i >= n) return;
384
385 cid = p_ids.template get<0>(i)[0];
386
387 start = starts.template get<0>(cid);
388 id = start + p_ids.template get<0>(i)[1];
389
390 cells.template get<0>(id) = encode_phase_id<cnt_type,ph>(phase_id,i + start_p);
391}
392
393#endif
394
395
396
397
398
399template<typename cnt_type, typename ph>
400__device__ inline void phase_and_id(cnt_type c, cnt_type *ph_id, cnt_type *pid)
401{
402 *pid = c & ph::mask_low::value;
403 *ph_id = c << ph::type::value;
404}
405
406
407template <typename vector_prp , typename cnt_type>
408__device__ inline void reorder(const vector_prp & input,
409 vector_prp & output,
410 cnt_type src_id,
411 cnt_type dst_id)
412{
413 output.set(dst_id,input,src_id);
414}
415
416template <typename vector_prp , typename cnt_type, unsigned int ... prp>
417__device__ inline void reorder_wprp(const vector_prp & input,
418 vector_prp & output,
419 cnt_type src_id,
420 cnt_type dst_id)
421{
422 output.template set<prp ...>(dst_id,input,src_id);
423}
424
425template <typename vector_prp, typename vector_pos, typename vector_ns, typename vector_cells_type, typename cnt_type, typename sh>
426__global__ void reorder_parts(int n,
427 const vector_prp input,
428 vector_prp output,
429 const vector_pos input_pos,
430 vector_pos output_pos,
431 vector_ns sorted_non_sorted,
432 vector_ns non_sorted_to_sorted,
433 const vector_cells_type cells)
434{
435 cnt_type i = threadIdx.x + blockIdx.x * blockDim.x;
436 if (i >= n) return;
437
438 cnt_type code = cells.template get<0>(i);
439
440 reorder(input, output, code,i);
441 reorder(input_pos,output_pos,code,i);
442
443 sorted_non_sorted.template get<0>(i) = code;
444 non_sorted_to_sorted.template get<0>(code) = i;
445}
446
447template <typename vector_prp, typename vector_pos, typename vector_ns, typename vector_cells_type, typename cnt_type, typename sh, unsigned int ... prp>
448__global__ void reorder_parts_wprp(int n,
449 const vector_prp input,
450 vector_prp output,
451 const vector_pos input_pos,
452 vector_pos output_pos,
453 vector_ns sorted_non_sorted,
454 vector_ns non_sorted_to_sorted,
455 const vector_cells_type cells)
456{
457 cnt_type i = threadIdx.x + blockIdx.x * blockDim.x;
458 if (i >= n) return;
459
460 cnt_type code = cells.template get<0>(i);
461 reorder_wprp<vector_prp,cnt_type,prp...>(input, output, code,i);
462 reorder(input_pos,output_pos,code,i);
463
464 sorted_non_sorted.template get<0>(i) = code;
465 non_sorted_to_sorted.template get<0>(code) = i;
466}
467
468template<typename vector_sort_index, typename vector_out_type>
469__global__ void mark_domain_particles(vector_sort_index vsi, vector_out_type vout_dg, int g_m)
470{
471 int i = threadIdx.x + blockIdx.x * blockDim.x;
472
473 if (i >= vsi.size()) return;
474
475 vout_dg.template get<0>(i) = (vsi.template get<0>(i) < g_m)?1:0;
476}
477
478template<typename scan_type, typename vector_out_type>
479__global__ void collect_domain_ghost_ids(scan_type scan, vector_out_type vout_ids)
480{
481 int i = threadIdx.x + blockIdx.x * blockDim.x;
482
483 if (i >= scan.size()-1) return;
484
485 auto pp = scan.template get<0>(i+1);
486 auto p = scan.template get<0>(i);
487
488 if (pp != p)
489 {vout_ids.template get<0>(scan.template get<0>(i)) = i;}
490}
491
492template<typename cl_sparse_type, typename vector_type, typename vector_type2>
493__global__ void count_nn_cells(cl_sparse_type cl_sparse, vector_type output, vector_type2 nn_to_test)
494{
495 typedef typename cl_sparse_type::index_type index_type;
496
497 int tid = threadIdx.x + blockIdx.x * blockDim.x;
499 id.id = tid;
500
501 if (tid >= cl_sparse.size()) {return;}
502
503 index_type cell = cl_sparse.get_index(id);
504
505 for (int i = 0 ; i < nn_to_test.size() ; i++)
506 {
507 index_type cell_n = cell + nn_to_test.template get<0>(i);
508
509 index_type start = cl_sparse.template get<0>(cell_n);
510
511 if (start != (index_type)-1)
512 {
513 // Cell exist
514 output.template get<0>(tid) += 1;
515 }
516 }
517};
518
519template<typename cl_sparse_type, typename vector_type, typename vector_type2, typename vector_type3>
520__global__ void fill_nn_cells(cl_sparse_type cl_sparse, vector_type starts, vector_type2 nn_to_test, vector_type3 output, typename cl_sparse_type::index_type max_stop)
521{
522 typedef typename cl_sparse_type::index_type index_type;
523
524 int tid = threadIdx.x + blockIdx.x * blockDim.x;
526 id.id = tid;
527
528 if (tid >= cl_sparse.size()) {return;}
529
530 index_type cell = cl_sparse.get_index(id);
531
532 int cnt = 0;
533 for (int i = 0 ; i < nn_to_test.size() ; i++)
534 {
535 index_type cell_n = cell + nn_to_test.template get<0>(i);
536
537 auto sid = cl_sparse.get_sparse(cell_n);
538
539 if (sid.id != (index_type)cl_sparse.size())
540 {
541 index_type start = cl_sparse.template get<0>(sid);
542 // Cell exist
543 output.template get<0>(starts.template get<0>(tid) + cnt) = start;
544
545 if (sid.id == cl_sparse.size() - 1)
546 {output.template get<1>(starts.template get<0>(tid) + cnt) = max_stop;}
547 else
548 {
549 decltype(sid) sid_t;
550 sid_t.id = sid.id+1;
551 output.template get<1>(starts.template get<0>(tid) + cnt) = cl_sparse.template get<0>(sid_t);
552 }
553 ++cnt;
554 }
555 }
556};
557
558
560
561#endif
562
563#endif /* OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_ */
This class implement the point shape in an N-dimensional space.
Definition Point.hpp:28
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition Point.hpp:172
grid_key_dx is the key to access any element in the grid
Definition grid_key.hpp:19
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition grid_key.hpp:516
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition grid_key.hpp:503
Distributed vector.