8 #ifndef OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_ 9 #define OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_ 11 #include <boost/integer/integer_mask.hpp> 12 #include <Vector/map_vector_sparse.hpp> 14 template<
unsigned int dim,
typename cnt_type,
typename ids_type,
typename transform>
19 cnt_type
id = e[dim-1];
22 for (
int i = 1; i >= 0 ; i-- )
23 {
id = e[i] + div_c[i]*id;}
30 cnt_type
id = e.
get(dim-1);
33 for (
int i = 1; i >= 0 ; i-- )
34 {
id = e.
get(i) + div_c[i]*id;}
44 cnt_type
id = p.
get(dim-1) / spacing[dim-1];
47 for (
int i = 1; i >= 0 ; i-- )
48 {
id = t.transform(p.
get(i),i) / spacing[i] + div_c[i]*
id;}
54 template<
typename cnt_type,
typename ids_type,
typename transform>
55 struct cid_<1,cnt_type,ids_type, transform>
73 return t.transform(p.
get(0),0) / spacing[0] + off[0];
83 e[0] = openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0];
96 e.
set_d(0,openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0]);
101 template <typename U = cnt_type, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
109 template<
typename cnt_type,
typename ids_type,
typename transform>
110 struct cid_<2,cnt_type,ids_type,transform>
114 return e[0] + div_c[0] * e[1];
119 return e.
get(0) + div_c[0] * e.
get(1);
129 return openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0] +
130 (openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1])*div_c[0];
140 e[0] = openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0];
141 e[1] = openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1];
143 return e[0] + e[1]*div_c[0];
154 e.
set_d(0,openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0]);
155 e.
set_d(1,openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1]);
160 template <typename U = cnt_type, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
164 return e.
get(0) + e.
get(1)*div_c[0];
169 template<
typename cnt_type,
typename ids_type,
typename transform>
170 struct cid_<3,cnt_type,ids_type,transform>
176 return e[0] + (e[1] + e[2]*div_c[1])*div_c[0];
182 return e.
get(0) + (e.
get(1) + e.
get(2)*div_c[1])*div_c[0];
189 return (e.
get(0) + off[0]) + ((e.
get(1) + off[1]) + (e.
get(2) + off[2])*div_c[1])*div_c[0];
199 return openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0] +
200 (openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1] +
201 (openfpm::math::uint_floor(t.transform(p,2)/spacing[2]) + off[2])*div_c[1])*div_c[0];
211 e[0] = openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0];
212 e[1] = openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1];
213 e[2] = openfpm::math::uint_floor(t.transform(p,2)/spacing[2]) + off[2];
215 return e[0] + (e[1] + e[2]*div_c[1])*div_c[0];
226 e.
set_d(0,openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0]);
227 e.
set_d(1,openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1]);
228 e.
set_d(2,openfpm::math::uint_floor(t.transform(p,2)/spacing[2]) + off[2]);
233 template <typename U = cnt_type, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
237 return e.
get(0) + (e.
get(1) + e.
get(2)*div_c[1])*div_c[0];
241 template<
unsigned int bit_phases,
typename cnt_type>
244 typedef boost::mpl::int_<
sizeof(cnt_type) - bit_phases> type;
245 typedef boost::low_bits_mask_t<
sizeof(size_t)*8-bit_phases> mask_low;
249 template<
typename cnt_type,
typename ph>
250 __device__ __host__ cnt_type encode_phase_id(cnt_type ph_id,cnt_type pid)
252 return pid + (ph_id << ph::type::value);
260 template<
bool is_sparse,
unsigned int dim,
typename pos_type,
261 typename cnt_type,
typename ids_type,
typename transform>
274 cnt_type i, cid, ins;
277 i = threadIdx.x + blockIdx.x * blockDim.x + start;
278 ins = threadIdx.x + blockIdx.x * blockDim.x;
279 if (i >= n_part)
return;
283 for (
size_t k = 0 ; k < dim ; k++)
284 {p[k] = p_pos[i+k*n_cap];}
288 if (is_sparse ==
false)
290 e[dim] = atomicAdd(counts + cid, 1);
293 {p_ids[ins+1*(n_cap2)] = e[dim];}
298 {p_ids[ins+1*(n_cap2)] = e[dim];}
304 template<
typename vector_sparse,
typename vector_cell>
305 __global__
void fill_cells_sparse(vector_sparse vs, vector_cell vc)
309 int p = blockIdx.x*blockDim.x + threadIdx.x;
314 c = vc.template get<0>(p);
315 vs.template insert<0>(c) = p;
318 vs.flush_block_insert();
322 template<
unsigned int dim,
typename pos_type,
typename cnt_type,
typename ids_type,
typename transform>
337 i = threadIdx.x + blockIdx.x * blockDim.x;
338 if (i >= n_part)
return;
342 for (
size_t k = 0 ; k < dim ; k++)
343 {p[k] = p_pos[i+k*n_cap];}
347 e[dim] = atomicAdd(counts + cid, 1);
350 p_ids[i+1*(n_cap2)] = e[dim];
354 #ifdef MAKE_CELLLIST_DETERMINISTIC 356 template<
unsigned int dim,
typename cnt_type,
typename ids_type,
typename ph>
357 __global__
void fill_cells(cnt_type phase_id ,
363 i = threadIdx.x + blockIdx.x * blockDim.x;
366 cells[i] = encode_phase_id<cnt_type,ph>(phase_id,i);
371 template<
unsigned int dim,
typename cnt_type,
typename ids_type,
typename ph>
372 __global__
void fill_cells(cnt_type phase_id ,
378 const cnt_type *starts,
379 const cnt_type * p_ids,
382 cnt_type i, cid, id, start;
384 i = threadIdx.x + blockIdx.x * blockDim.x;
390 id = start + p_ids[1*n_cap+i];
392 cells[id] = encode_phase_id<cnt_type,ph>(phase_id,i + start_p);
401 template<
typename cnt_type,
typename ph>
402 __device__
inline void phase_and_id(cnt_type c, cnt_type *ph_id, cnt_type *pid)
404 *pid = c & ph::mask_low::value;
405 *ph_id = c << ph::type::value;
409 template <
typename vector_prp ,
typename cnt_type>
410 __device__
inline void reorder(
const vector_prp & input,
415 output.set(dst_id,input,src_id);
418 template <
typename vector_prp ,
typename cnt_type,
unsigned int ... prp>
419 __device__
inline void reorder_wprp(
const vector_prp & input,
424 output.template set<prp ...>(dst_id,input,src_id);
427 template <
typename vector_prp,
typename vector_pos,
typename vector_ns,
typename cnt_type,
typename sh>
428 __global__
void reorder_parts(
int n,
429 const vector_prp input,
431 const vector_pos input_pos,
432 vector_pos output_pos,
433 vector_ns sorted_non_sorted,
434 vector_ns non_sorted_to_sorted,
435 const cnt_type * cells)
437 cnt_type i = threadIdx.x + blockIdx.x * blockDim.x;
440 cnt_type code = cells[i];
442 reorder(input, output, code,i);
443 reorder(input_pos,output_pos,code,i);
445 sorted_non_sorted.template get<0>(i) = code;
446 non_sorted_to_sorted.template get<0>(code) = i;
449 template <
typename vector_prp,
typename vector_pos,
typename vector_ns,
typename cnt_type,
typename sh,
unsigned int ... prp>
450 __global__
void reorder_parts_wprp(
int n,
451 const vector_prp input,
453 const vector_pos input_pos,
454 vector_pos output_pos,
455 vector_ns sorted_non_sorted,
456 vector_ns non_sorted_to_sorted,
457 const cnt_type * cells)
459 cnt_type i = threadIdx.x + blockIdx.x * blockDim.x;
462 cnt_type code = cells[i];
463 reorder_wprp<vector_prp,cnt_type,prp...>(input, output, code,i);
464 reorder(input_pos,output_pos,code,i);
466 sorted_non_sorted.template get<0>(i) = code;
467 non_sorted_to_sorted.template get<0>(code) = i;
470 template<
typename vector_sort_index,
typename vector_out_type>
471 __global__
void mark_domain_particles(vector_sort_index vsi, vector_out_type vout_dg,
int g_m)
473 int i = threadIdx.x + blockIdx.x * blockDim.x;
475 if (i >= vsi.size())
return;
477 vout_dg.template get<0>(i) = (vsi.template get<0>(i) < g_m)?1:0;
480 template<
typename scan_type,
typename vector_out_type>
481 __global__
void collect_domain_ghost_ids(scan_type scan, vector_out_type vout_ids)
483 int i = threadIdx.x + blockIdx.x * blockDim.x;
485 if (i >= scan.size()-1)
return;
487 auto pp = scan.template get<0>(i+1);
488 auto p = scan.template get<0>(i);
491 {vout_ids.template get<0>(scan.template get<0>(i)) = i;}
494 template<
typename cl_sparse_type,
typename vector_type,
typename vector_type2>
495 __global__
void count_nn_cells(cl_sparse_type cl_sparse,
vector_type output, vector_type2 nn_to_test)
497 typedef typename cl_sparse_type::index_type index_type;
499 int tid = threadIdx.x + blockIdx.x * blockDim.x;
503 if (tid >= cl_sparse.size()) {
return;}
505 index_type cell = cl_sparse.get_index(
id);
507 for (
int i = 0 ; i < nn_to_test.size() ; i++)
509 index_type cell_n = cell + nn_to_test.template get<0>(i);
511 index_type start = cl_sparse.template get<0>(cell_n);
513 if (start != (index_type)-1)
516 output.template get<0>(tid) += 1;
521 template<
typename cl_sparse_type,
typename vector_type,
typename vector_type2,
typename vector_type3>
522 __global__
void fill_nn_cells(cl_sparse_type cl_sparse,
vector_type starts, vector_type2 nn_to_test, vector_type3 output,
typename cl_sparse_type::index_type max_stop)
524 typedef typename cl_sparse_type::index_type index_type;
526 int tid = threadIdx.x + blockIdx.x * blockDim.x;
530 if (tid >= cl_sparse.size()) {
return;}
532 index_type cell = cl_sparse.get_index(
id);
535 for (
int i = 0 ; i < nn_to_test.size() ; i++)
537 index_type cell_n = cell + nn_to_test.template get<0>(i);
539 auto sid = cl_sparse.get_sparse(cell_n);
541 if (sid.id != (index_type)cl_sparse.size())
543 index_type start = cl_sparse.template get<0>(sid);
545 output.template get<0>(starts.template get<0>(tid) + cnt) = start;
547 if (sid.id == cl_sparse.size() - 1)
548 {output.template get<1>(starts.template get<0>(tid) + cnt) = max_stop;}
553 output.template get<1>(starts.template get<0>(tid) + cnt) = cl_sparse.template get<0>(sid_t);
grid_key_dx is the key to access any element in the grid
__device__ __host__ index_type get(index_type i) const
Get the i index.
This class implement the point shape in an N-dimensional space.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.