8 #ifndef OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_
9 #define OPENFPM_DATA_SRC_NN_CELLLIST_CUDA_CUDA_CELL_LIST_UTIL_FUNC_HPP_
11 #include <boost/integer/integer_mask.hpp>
12 #include <Vector/map_vector_sparse.hpp>
15 template<
unsigned int dim,
typename ids_type,
typename transform_type>
18 static inline __device__ __host__
unsigned int get_cid(
22 unsigned int id = e[dim-1];
25 for (
int i = 1; i >= 0 ; i-- )
26 {
id = e[i] + numCellDiv[i]*id;}
31 static inline __device__ __host__
unsigned int get_cid(
35 unsigned int id = e.
get(dim-1);
38 for (
int i = 1; i >= 0 ; i-- )
39 {
id = e.
get(i) + numCellDiv[i]*id;}
45 static inline __device__ __host__
unsigned int get_cid(
48 const transform_type & pointTransform,
51 unsigned int id = p.get(dim-1) / unitCellP2[dim-1];
54 for (
int i = 1; i >= 0 ; i-- )
55 {
id = pointTransform.transform(p.get(i),i) / unitCellP2[i] + numCellDiv[i]*id;}
61 template<
typename ids_type,
typename transform_type>
62 struct cid_<1,ids_type, transform_type>
64 static inline __device__ __host__
unsigned int get_cid(
71 static inline __device__ __host__
unsigned int get_cid(
79 static inline __device__ __host__
unsigned int get_cid(
83 const transform_type & pointTransform,
86 return pointTransform.transform(p.get(0),0) / unitCellP2[0] + cellPadDim[0];
90 static inline __device__ __host__
unsigned int get_cid(
94 const transform_type & pointTransform,
98 e[0] = openfpm::math::uint_floor(pointTransform.transform(p,0)/unitCellP2[0]) + cellPadDim[0];
107 const transform_type & pointTransform,
112 e.
set_d(0,openfpm::math::uint_floor(pointTransform.transform(p,0)/unitCellP2[0]) + cellPadDim[0]);
117 template <typename U = unsigned int, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
118 static inline __device__ __host__
unsigned int get_cid(
126 template<
typename ids_type,
typename transform_type>
127 struct cid_<2,ids_type,transform_type>
129 static inline __device__ __host__
unsigned int get_cid(
133 return e[0] + numCellDiv[0] * e[1];
136 static inline __device__ __host__
unsigned int get_cid(
140 return e.
get(0) + numCellDiv[0] * e.
get(1);
144 static inline __device__ __host__
unsigned int get_cid(
148 const transform_type & pointTransform,
152 return openfpm::math::uint_floor(pointTransform.transform(p,0)/unitCellP2[0]) + cellPadDim[0] +
153 (openfpm::math::uint_floor(pointTransform.transform(p,1)/unitCellP2[1]) + cellPadDim[1])*numCellDiv[0];
157 static inline __device__ __host__
unsigned int get_cid(
161 const transform_type & pointTransform,
165 e[0] = openfpm::math::uint_floor(pointTransform.transform(p,0)/unitCellP2[0]) + cellPadDim[0];
166 e[1] = openfpm::math::uint_floor(pointTransform.transform(p,1)/unitCellP2[1]) + cellPadDim[1];
168 return e[0] + e[1]*numCellDiv[0];
175 const transform_type & pointTransform,
180 e.
set_d(0,openfpm::math::uint_floor(pointTransform.transform(p,0)/unitCellP2[0]) + cellPadDim[0]);
181 e.
set_d(1,openfpm::math::uint_floor(pointTransform.transform(p,1)/unitCellP2[1]) + cellPadDim[1]);
186 template <typename U = unsigned int, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
187 static inline __device__ __host__
unsigned int get_cid(
191 return e.
get(0) + e.
get(1)*numCellDiv[0];
196 template<
typename ids_type,
typename transform_type>
197 struct cid_<3,ids_type,transform_type>
200 static inline __device__ __host__
unsigned int get_cid(
204 return e[0] + (e[1] + e[2]*numCellDiv[1])*numCellDiv[0];
207 static inline __device__ __host__
unsigned int get_cid(
211 return e.
get(0) + (e.
get(1) + e.
get(2)*numCellDiv[1])*numCellDiv[0];
214 static inline __device__ __host__
unsigned int get_cid(
219 return (e.
get(0) + cellPadDim[0]) + ((e.
get(1) + cellPadDim[1]) + (e.
get(2) + cellPadDim[2])*numCellDiv[1])*numCellDiv[0];
222 template<
typename T>
static inline __device__ __host__
unsigned int get_cid(
226 const transform_type & pointTransform,
229 return openfpm::math::uint_floor(pointTransform.transform(p,0)/unitCellP2[0]) + cellPadDim[0] +
230 (openfpm::math::uint_floor(pointTransform.transform(p,1)/unitCellP2[1]) + cellPadDim[1] +
231 (openfpm::math::uint_floor(pointTransform.transform(p,2)/unitCellP2[2]) + cellPadDim[2])*numCellDiv[1])*numCellDiv[0];
235 static inline __device__ __host__
unsigned int get_cid(
239 const transform_type & pointTransform,
243 e[0] = openfpm::math::uint_floor(pointTransform.transform(p,0)/unitCellP2[0]) + cellPadDim[0];
244 e[1] = openfpm::math::uint_floor(pointTransform.transform(p,1)/unitCellP2[1]) + cellPadDim[1];
245 e[2] = openfpm::math::uint_floor(pointTransform.transform(p,2)/unitCellP2[2]) + cellPadDim[2];
247 return e[0] + (e[1] + e[2]*numCellDiv[1])*numCellDiv[0];
254 const transform_type & pointTransform,
259 e.
set_d(0,openfpm::math::uint_floor(pointTransform.transform(p,0)/unitCellP2[0]) + cellPadDim[0]);
260 e.
set_d(1,openfpm::math::uint_floor(pointTransform.transform(p,1)/unitCellP2[1]) + cellPadDim[1]);
261 e.
set_d(2,openfpm::math::uint_floor(pointTransform.transform(p,2)/unitCellP2[2]) + cellPadDim[2]);
266 template <typename U = unsigned int, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
267 static inline __device__ __host__
unsigned int get_cid(
271 return e.
get(0) + (e.
get(1) + e.
get(2)*numCellDiv[1])*numCellDiv[0];
279 template<
unsigned int dim,
typename pos_type,
typename ids_type,
typename transform_type,
280 typename vector_pos_type,
typename vector_cnt_type,
typename vector_pids_type>
281 __global__
void fill_cellIndex_LocalIndex(
285 transform_type pointTransform,
288 vector_pos_type vPos,
289 vector_cnt_type numPartInCell,
290 vector_pids_type cellIndex_LocalIndex)
292 unsigned int i, cid, ins;
295 i = threadIdx.x + blockIdx.x * blockDim.x + start;
296 ins = threadIdx.x + blockIdx.x * blockDim.x;
297 if (i >= n_part)
return;
301 for (
size_t k = 0 ; k < dim ; k++)
302 p[k] = vPos.template get<0>(i)[k];
306 e[dim] = atomicAdd(&numPartInCell.template get<0>(cid), 1);
307 cellIndex_LocalIndex.template get<0>(ins)[0] = cid;
308 cellIndex_LocalIndex.template get<0>(ins)[1] = e[dim];
311 template<
unsigned int dim,
typename pos_type,
typename ids_type,
typename transform_type,
312 typename vector_pos_type,
typename vector_cnt_type>
313 __global__
void fill_cellIndex(
317 transform_type pointTransform,
320 vector_pos_type vPos,
321 vector_cnt_type cellIndex)
323 int i = threadIdx.x + blockIdx.x * blockDim.x + start;
324 int ins = threadIdx.x + blockIdx.x * blockDim.x;
325 if (i >= n_part)
return;
329 for (
size_t k = 0 ; k < dim ; k++)
330 p[k] = vPos.template get<0>(i)[k];
335 template<
typename vector_sparse,
typename vector_cell>
336 __global__
void fill_vsCellIndex_PartIndex(
337 vector_sparse vecSparseCellIndex_PartIndex,
338 vector_cell cellIndex)
340 vecSparseCellIndex_PartIndex.init();
342 int p = blockIdx.x*blockDim.x + threadIdx.x;
344 if (p < cellIndex.size())
346 int c = cellIndex.template get<0>(p);
347 vecSparseCellIndex_PartIndex.template insert<0>(c) = p;
350 vecSparseCellIndex_PartIndex.flush_block_insert();
353 template<
typename vector_starts_type,
typename vector_p
ids_type,
typename vector_cells_type>
354 __global__
void fill_cells(
355 vector_starts_type numPartInCellPrefixSum,
356 vector_pids_type cellIndex_LocalIndex,
357 vector_cells_type cellIndexLocalIndexToUnsorted,
358 size_t startParticle=0)
360 unsigned int cid, id, cellStart;
362 int tid = threadIdx.x + blockIdx.x * blockDim.x;
363 if (tid >= cellIndex_LocalIndex.size())
return;
365 cid = cellIndex_LocalIndex.template get<0>(tid)[0];
367 cellStart = numPartInCellPrefixSum.template get<0>(cid);
368 id = cellStart + cellIndex_LocalIndex.template get<0>(tid)[1];
370 cellIndexLocalIndexToUnsorted.template get<0>(
id) = tid + startParticle;
373 template <
typename vector_map_type,
typename vector_cells_type>
374 __global__
void constructSortUnsortBidirectMap(
375 vector_map_type sortedToUnsortedIndex,
376 vector_map_type unsortedToSortedIndex,
377 const vector_cells_type cellIndexLocalIndexToUnsorted)
379 unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x;
380 if (tid >= sortedToUnsortedIndex.size()) {
return;}
382 unsigned int pid = cellIndexLocalIndexToUnsorted.template get<0>(tid);
384 sortedToUnsortedIndex.template get<0>(tid) = pid;
385 unsortedToSortedIndex.template get<0>(pid) = tid;
388 template <
typename vector_type,
typename vector_map_type>
389 __global__
void reorderParticlesPos(
392 const vector_map_type indexMap,
395 int keyIn = start + threadIdx.x + blockIdx.x * blockDim.x;
396 if (keyIn >= indexMap.size()) {
return;}
398 unsigned int keyOut = indexMap.template get<0>(keyIn);
400 vectorOut.set(keyOut,vectorIn,keyIn);
403 template <
typename vector_type,
typename vector_map_type,
unsigned int ... prp>
404 __global__
void reorderParticlesPrp(
407 vector_map_type indexMap,
410 int keyIn = start + threadIdx.x + blockIdx.x * blockDim.x;
411 if (keyIn >= indexMap.size()) {
return;}
413 unsigned int keyOut = indexMap.template get<0>(keyIn);
415 vectorOut.template set<prp ...>(keyOut,vectorIn,keyIn);
418 template<
typename vector_sort_index,
typename vector_out_type>
419 __global__
void mark_domain_particles(
420 vector_sort_index sortedToUnsortedIndex,
421 vector_out_type isSortedDomainOrGhost,
424 int i = threadIdx.x + blockIdx.x * blockDim.x;
426 if (i >= sortedToUnsortedIndex.size())
return;
428 isSortedDomainOrGhost.template get<0>(i) = (sortedToUnsortedIndex.template get<0>(i) < ghostMarker)?1:0;
431 template<
typename scan_type,
typename vector_out_type>
432 __global__
void collect_domain_ghost_ids(
433 scan_type isUnsortedDomainOrGhostPrefixSum,
434 vector_out_type sortedToSortedIndexNoGhost)
436 int i = threadIdx.x + blockIdx.x * blockDim.x;
438 if (i >= isUnsortedDomainOrGhostPrefixSum.size()-1)
return;
440 auto pp = isUnsortedDomainOrGhostPrefixSum.template get<0>(i+1);
441 auto p = isUnsortedDomainOrGhostPrefixSum.template get<0>(i);
444 sortedToSortedIndexNoGhost.template get<0>(isUnsortedDomainOrGhostPrefixSum.template get<0>(i)) = i;
447 template<
typename cl_sparse_type,
typename vector_type,
typename vector_type2>
448 __global__
void countNonEmptyNeighborCells(
449 cl_sparse_type vecSparseCellIndex_PartIndex,
453 typedef typename cl_sparse_type::index_type index_type;
455 int tid = threadIdx.x + blockIdx.x * blockDim.x;
456 if (tid >= vecSparseCellIndex_PartIndex.size()) {
return;}
461 index_type cell = vecSparseCellIndex_PartIndex.get_index(
id);
463 for (
int i = 0 ; i < neighborCellOffset.size() ; i++)
465 index_type neighborCellIndex = cell + neighborCellOffset.template get<0>(i);
466 index_type start = vecSparseCellIndex_PartIndex.template get<0>(neighborCellIndex);
468 if (start != (index_type)-1)
471 neighborCellCount.template get<0>(tid) += 1;
476 template<
typename cl_sparse_type,
typename vector_type,
typename vector_type2,
typename vector_type3>
477 __global__
void fillNeighborCellList(
478 cl_sparse_type vecSparseCellIndex_PartIndex,
481 vector_type3 neighborPartIndexFrom_To,
482 typename cl_sparse_type::index_type stop)
484 typedef typename cl_sparse_type::index_type index_type;
486 int tid = threadIdx.x + blockIdx.x * blockDim.x;
487 if (tid >= vecSparseCellIndex_PartIndex.size()) {
return;}
490 index_type cell = vecSparseCellIndex_PartIndex.get_index(
id);
492 for (
int i = 0, cnt = 0; i < neighborCellOffset.size(); i++)
494 index_type neighborCellIndex = cell + neighborCellOffset.template get<0>(i);
495 auto sid = vecSparseCellIndex_PartIndex.get_sparse(neighborCellIndex);
497 if (sid.id != vecSparseCellIndex_PartIndex.size())
499 neighborPartIndexFrom_To.template get<0>(neighborCellCountPrefixSum.template get<0>(tid) + cnt) = vecSparseCellIndex_PartIndex.template get<0>(sid);
501 if (++sid.id != vecSparseCellIndex_PartIndex.size())
502 neighborPartIndexFrom_To.template get<1>(neighborCellCountPrefixSum.template get<0>(tid) + cnt) = vecSparseCellIndex_PartIndex.template get<0>(sid);
504 neighborPartIndexFrom_To.template get<1>(neighborCellCountPrefixSum.template get<0>(tid) + cnt) = stop;
This class implement the point shape in an N-dimensional space.
grid_key_dx is the key to access any element in the grid
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
__device__ __host__ index_type get(index_type i) const
Get the i index.