8#ifndef OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_
9#define OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_
11#include <boost/integer/integer_mask.hpp>
12#include <Vector/map_vector_sparse.hpp>
14template<
unsigned int dim,
typename cnt_type,
typename ids_type,
typename transform>
19 cnt_type
id = e[dim-1];
22 for (
int i = 1; i >= 0 ; i-- )
23 {
id = e[i] + div_c[i]*id;}
30 cnt_type
id = e.
get(dim-1);
33 for (
int i = 1; i >= 0 ; i-- )
34 {
id = e.
get(i) + div_c[i]*id;}
44 cnt_type
id = p.
get(dim-1) / spacing[dim-1];
47 for (
int i = 1; i >= 0 ; i-- )
48 {
id = t.transform(p.
get(i),i) / spacing[i] + div_c[i]*id;}
54template<
typename cnt_type,
typename ids_type,
typename transform>
55struct cid_<1,cnt_type,ids_type, transform>
73 return t.transform(p.
get(0),0) / spacing[0] + off[0];
83 e[0] = openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0];
96 e.
set_d(0,openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0]);
101 template <typename U = cnt_type, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
109template<
typename cnt_type,
typename ids_type,
typename transform>
110struct cid_<2,cnt_type,ids_type,transform>
114 return e[0] + div_c[0] * e[1];
119 return e.
get(0) + div_c[0] * e.
get(1);
129 return openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0] +
130 (openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1])*div_c[0];
140 e[0] = openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0];
141 e[1] = openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1];
143 return e[0] + e[1]*div_c[0];
154 e.
set_d(0,openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0]);
155 e.
set_d(1,openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1]);
160 template <typename U = cnt_type, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
164 return e.
get(0) + e.
get(1)*div_c[0];
169template<
typename cnt_type,
typename ids_type,
typename transform>
170struct cid_<3,cnt_type,ids_type,transform>
176 return e[0] + (e[1] + e[2]*div_c[1])*div_c[0];
182 return e.
get(0) + (e.
get(1) + e.
get(2)*div_c[1])*div_c[0];
189 return (e.
get(0) + off[0]) + ((e.
get(1) + off[1]) + (e.
get(2) + off[2])*div_c[1])*div_c[0];
199 return openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0] +
200 (openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1] +
201 (openfpm::math::uint_floor(t.transform(p,2)/spacing[2]) + off[2])*div_c[1])*div_c[0];
211 e[0] = openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0];
212 e[1] = openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1];
213 e[2] = openfpm::math::uint_floor(t.transform(p,2)/spacing[2]) + off[2];
215 return e[0] + (e[1] + e[2]*div_c[1])*div_c[0];
226 e.
set_d(0,openfpm::math::uint_floor(t.transform(p,0)/spacing[0]) + off[0]);
227 e.
set_d(1,openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1]);
228 e.
set_d(2,openfpm::math::uint_floor(t.transform(p,2)/spacing[2]) + off[2]);
233 template <typename U = cnt_type, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
237 return e.
get(0) + (e.
get(1) + e.
get(2)*div_c[1])*div_c[0];
241template<
unsigned int bit_phases,
typename cnt_type>
244 typedef boost::mpl::int_<
sizeof(cnt_type) - bit_phases> type;
245 typedef boost::low_bits_mask_t<
sizeof(size_t)*8-bit_phases> mask_low;
249template<
typename cnt_type,
typename ph>
250__device__ __host__ cnt_type encode_phase_id(cnt_type ph_id,cnt_type pid)
252 return pid + (ph_id << ph::type::value);
260template<
bool is_sparse,
unsigned int dim,
typename pos_type,
261 typename cnt_type,
typename ids_type,
typename transform,
262 typename vector_pos_type,
typename vector_cnt_type,
typename vector_pids_type>
269 vector_pos_type p_pos,
270 vector_cnt_type counts,
271 vector_pids_type p_ids)
273 cnt_type i, cid, ins;
276 i = threadIdx.x + blockIdx.x * blockDim.x + start;
277 ins = threadIdx.x + blockIdx.x * blockDim.x;
278 if (i >= n_part)
return;
282 for (
size_t k = 0 ; k < dim ; k++)
283 {p[k] = p_pos.template get<0>(i)[k];}
287 if (is_sparse ==
false)
289 e[dim] = atomicAdd(&counts.template get<0>(cid), 1);
291 p_ids.template get<0>(ins)[0] = cid;
292 {p_ids.template get<0>(ins)[1] = e[dim];}
296 p_ids.template get<0>(ins)[0] = cid;
297 {p_ids.template get<0>(ins)[1] = e[dim];}
299 counts.template get<0>(ins) = cid;
303template<
typename vector_sparse,
typename vector_cell>
304__global__
void fill_cells_sparse(vector_sparse vs, vector_cell vc)
308 int p = blockIdx.x*blockDim.x + threadIdx.x;
313 c = vc.template get<0>(p);
314 vs.template insert<0>(c) = p;
317 vs.flush_block_insert();
321template<
unsigned int dim,
typename pos_type,
typename cnt_type,
typename ids_type,
typename transform>
336 i = threadIdx.x + blockIdx.x * blockDim.x;
337 if (i >= n_part)
return;
341 for (
size_t k = 0 ; k < dim ; k++)
342 {p[k] = p_pos[i+k*n_cap];}
346 e[dim] = atomicAdd(counts + cid, 1);
349 p_ids[i+1*(n_cap2)] = e[dim];
353#ifdef MAKE_CELLLIST_DETERMINISTIC
355template<
unsigned int dim,
typename cnt_type,
typename ids_type,
typename ph,
typename vector_cells_type>
356__global__
void fill_cells(cnt_type phase_id ,
358 vector_cells_type cells)
362 i = threadIdx.x + blockIdx.x * blockDim.x;
365 cells.template get<0>(i) = encode_phase_id<cnt_type,ph>(phase_id,i);
370template<
unsigned int dim,
typename cnt_type,
typename ids_type,
typename ph,
typename vector_starts_type,
typename vector_p
ids_type,
typename vector_cells_type>
371__global__
void fill_cells(cnt_type phase_id ,
376 vector_starts_type starts,
377 vector_pids_type p_ids,
378 vector_cells_type cells)
380 cnt_type i, cid, id, start;
382 i = threadIdx.x + blockIdx.x * blockDim.x;
385 cid = p_ids.template get<0>(i)[0];
387 start = starts.template get<0>(cid);
388 id = start + p_ids.template get<0>(i)[1];
390 cells.template get<0>(
id) = encode_phase_id<cnt_type,ph>(phase_id,i + start_p);
399template<
typename cnt_type,
typename ph>
400__device__
inline void phase_and_id(cnt_type c, cnt_type *ph_id, cnt_type *pid)
402 *pid = c & ph::mask_low::value;
403 *ph_id = c << ph::type::value;
407template <
typename vector_prp ,
typename cnt_type>
408__device__
inline void reorder(
const vector_prp & input,
413 output.set(dst_id,input,src_id);
416template <
typename vector_prp ,
typename cnt_type,
unsigned int ... prp>
417__device__
inline void reorder_wprp(
const vector_prp & input,
422 output.template set<prp ...>(dst_id,input,src_id);
425template <
typename vector_prp,
typename vector_pos,
typename vector_ns,
typename vector_cells_type,
typename cnt_type,
typename sh>
426__global__
void reorder_parts(
int n,
427 const vector_prp input,
429 const vector_pos input_pos,
430 vector_pos output_pos,
431 vector_ns sorted_non_sorted,
432 vector_ns non_sorted_to_sorted,
433 const vector_cells_type cells)
435 cnt_type i = threadIdx.x + blockIdx.x * blockDim.x;
438 cnt_type code = cells.template get<0>(i);
440 reorder(input, output, code,i);
441 reorder(input_pos,output_pos,code,i);
443 sorted_non_sorted.template get<0>(i) = code;
444 non_sorted_to_sorted.template get<0>(code) = i;
447template <
typename vector_prp,
typename vector_pos,
typename vector_ns,
typename vector_cells_type,
typename cnt_type,
typename sh,
unsigned int ... prp>
448__global__
void reorder_parts_wprp(
int n,
449 const vector_prp input,
451 const vector_pos input_pos,
452 vector_pos output_pos,
453 vector_ns sorted_non_sorted,
454 vector_ns non_sorted_to_sorted,
455 const vector_cells_type cells)
457 cnt_type i = threadIdx.x + blockIdx.x * blockDim.x;
460 cnt_type code = cells.template get<0>(i);
461 reorder_wprp<vector_prp,cnt_type,prp...>(input, output, code,i);
462 reorder(input_pos,output_pos,code,i);
464 sorted_non_sorted.template get<0>(i) = code;
465 non_sorted_to_sorted.template get<0>(code) = i;
468template<
typename vector_sort_index,
typename vector_out_type>
469__global__
void mark_domain_particles(vector_sort_index vsi, vector_out_type vout_dg,
int g_m)
471 int i = threadIdx.x + blockIdx.x * blockDim.x;
473 if (i >= vsi.size())
return;
475 vout_dg.template get<0>(i) = (vsi.template get<0>(i) < g_m)?1:0;
478template<
typename scan_type,
typename vector_out_type>
479__global__
void collect_domain_ghost_ids(scan_type scan, vector_out_type vout_ids)
481 int i = threadIdx.x + blockIdx.x * blockDim.x;
483 if (i >= scan.size()-1)
return;
485 auto pp = scan.template get<0>(i+1);
486 auto p = scan.template get<0>(i);
489 {vout_ids.template get<0>(scan.template get<0>(i)) = i;}
492template<
typename cl_sparse_type,
typename vector_type,
typename vector_type2>
495 typedef typename cl_sparse_type::index_type index_type;
497 int tid = threadIdx.x + blockIdx.x * blockDim.x;
501 if (tid >= cl_sparse.size()) {
return;}
503 index_type cell = cl_sparse.get_index(
id);
505 for (
int i = 0 ; i < nn_to_test.size() ; i++)
507 index_type cell_n = cell + nn_to_test.template get<0>(i);
509 index_type start = cl_sparse.template get<0>(cell_n);
511 if (start != (index_type)-1)
514 output.template get<0>(tid) += 1;
519template<
typename cl_sparse_type,
typename vector_type,
typename vector_type2,
typename vector_type3>
520__global__
void fill_nn_cells(cl_sparse_type cl_sparse,
vector_type starts,
vector_type2 nn_to_test, vector_type3 output,
typename cl_sparse_type::index_type max_stop)
522 typedef typename cl_sparse_type::index_type index_type;
524 int tid = threadIdx.x + blockIdx.x * blockDim.x;
528 if (tid >= cl_sparse.size()) {
return;}
530 index_type cell = cl_sparse.get_index(
id);
533 for (
int i = 0 ; i < nn_to_test.size() ; i++)
535 index_type cell_n = cell + nn_to_test.template get<0>(i);
537 auto sid = cl_sparse.get_sparse(cell_n);
539 if (sid.id != (index_type)cl_sparse.size())
541 index_type start = cl_sparse.template get<0>(sid);
543 output.template get<0>(starts.template get<0>(tid) + cnt) = start;
545 if (sid.id == cl_sparse.size() - 1)
546 {output.template get<1>(starts.template get<0>(tid) + cnt) = max_stop;}
551 output.template get<1>(starts.template get<0>(tid) + cnt) = cl_sparse.template get<0>(sid_t);
This class implement the point shape in an N-dimensional space.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
grid_key_dx is the key to access any element in the grid
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
__device__ __host__ index_type get(index_type i) const
Get the i index.