5#ifndef OPENFPM_PDATA_SPARSEGRIDGPU_KERNELS_CUH
6#define OPENFPM_PDATA_SPARSEGRIDGPU_KERNELS_CUH
8#include <SparseGridGpu/BlockMapGpu.hpp>
9#include <SparseGridGpu/TemplateUtils/mathUtils.hpp>
10#include "util/cuda_util.hpp"
11#include "SparseGrid/cp_block.hpp"
13#ifndef SPARSEGRIDGPU_LAUNCH_BOUND_APPLY_STENCIL_IN_PLACE
14#define SPARSEGRIDGPU_LAUNCH_BOUND_APPLY_STENCIL_IN_PLACE
17#ifndef SPARSEGRIDGPU_LAUNCH_BOUND_APPLY_STENCIL_IN_PLACE_NO_SHARED
18#define SPARSEGRIDGPU_LAUNCH_BOUND_APPLY_STENCIL_IN_PLACE_NO_SHARED
30namespace SparseGridGpuKernels
32 template<
typename SparseGr
idGpuType,
typename po
inters_type,
typename headers_type>
33 __global__
void unpack_headers(pointers_type pointers, headers_type headers,
int * result,
unsigned int sz_pack,
int n_slot)
37 if (t > pointers.size()) {
return;}
39 unsigned char * data_pack = (
unsigned char *)pointers.template get<0>(t);
41 while (data_pack < pointers.template get<1>(t) )
43 int ih = pointers.template get<2>(t);
46 if (
sizeof(
typename SparseGridGpuType::indexT_) == 8)
47 {headers.template get<0>(t*n_slot + ih) = *(
size_t *)data_pack;}
50 unsigned int dp1 = *(
unsigned int *)data_pack;
51 unsigned int dp2 = *(
unsigned int *)&(data_pack[4]);
52 headers.template get<0>(t*n_slot + ih) = (size_t)dp1 + (((
size_t)dp2) << 32);
54 data_pack +=
sizeof(size_t);
55 data_pack += SparseGridGpuType::unpack_headers(headers,data_pack,t*n_slot + ih,sz_pack);
56 pointers.template get<2>(t) += 1;
68 template<
unsigned int dim>
71 template<
typename ScalarT,
typename coordType,
typename SparseGridT,
unsigned int enlargedBlockSize,
typename lambda_func,
typename ... ArgsT>
72 __device__
static inline void stencil(ScalarT & res, ScalarT & cur, coordType & coord ,
73 ScalarT (& enlargedBlock)[enlargedBlockSize],
75 SparseGridT & sparseGrid, ArgsT ... args)
79 for (
int d = 0; d < dim; ++d)
81 auto nPlusId = sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, d, 1);
82 auto nMinusId = sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, d, -1);
83 ScalarT neighbourPlus = enlargedBlock[nPlusId];
84 ScalarT neighbourMinus = enlargedBlock[nMinusId];
86 cs.xm[d] = neighbourMinus;
87 cs.xp[d] = neighbourPlus;
90 res = f(cur,cs, args ...);
94 template<
unsigned int dim>
97 template<
typename ScalarT,
typename coordType,
typename CpBlockType,
typename lambda_func,
typename ... ArgsT>
98 __device__
static inline void stencil(ScalarT & res, coordType & coord ,
103 printf(
"Convolution operation on GPU: Dimension not implemented \n");
106 template<
typename ScalarT,
typename coordType,
typename CpBlockType,
typename lambda_func,
typename ... ArgsT>
107 __device__
static inline void stencil2(ScalarT & res1, ScalarT & res2, coordType & coord ,
113 printf(
"Convolution operation on GPU: Dimension not implemented \n");
120 template<
typename ScalarT,
typename coordType,
typename CpBlockType,
typename DataBlockWrapperT,
typename lambda_func,
typename ... ArgsT>
121 __device__
static inline void stencil_block(ScalarT & res, coordType & coord ,
123 DataBlockWrapperT & DataBlockLoad,
128 res = f(cpb,DataBlockLoad,offset,coord[0],coord[1],coord[2]);
131 template<
typename ScalarT,
typename coordType,
typename CpBlockType,
typename lambda_func,
typename ... ArgsT>
132 __device__
static inline void stencil(ScalarT & res, coordType & coord ,
137 res = f(cpb,coord[0],coord[1],coord[2]);
140 template<
typename ScalarT,
typename coordType,
typename CpBlockType,
typename lambda_func,
typename ... ArgsT>
141 __device__
static inline void stencil2(ScalarT & res1, ScalarT & res2, coordType & coord ,
147 f(res1,res2,cpb1,cpb2,coord[0],coord[1],coord[2]);
150 template<
typename ScalarT,
typename coordType,
typename CpBlockType,
typename DataBlockWrapperT,
typename lambda_func,
typename ... ArgsT>
151 __device__
static inline void stencil2_block(ScalarT & res1, ScalarT & res2, coordType & coord ,
154 DataBlockWrapperT & DataBlockLoad,
159 f(res1,res2,cpb1,cpb2,DataBlockLoad,offset,coord[0],coord[1],coord[2]);
162 template<
typename ScalarT,
typename coordType,
typename CpBlockType,
typename DataBlockWrapperT,
typename lambda_func,
typename ... ArgsT>
163 __device__
static inline void stencil3_block(ScalarT & res1, ScalarT & res2, ScalarT & res3, coordType & coord ,
167 DataBlockWrapperT & DataBlockLoad,
172 f(res1,res2,res3,cpb1,cpb2,cpb3,DataBlockLoad,offset,coord[0],coord[1],coord[2]);
179 template<
typename ScalarT,
typename coordType,
typename CpBlockType,
typename DataBlockWrapperT,
typename lambda_func,
typename ... ArgsT>
180 __device__
static inline void stencil_block(ScalarT & res, coordType & coord,
182 DataBlockWrapperT & DataBlockLoad,
187 res = f(cpb,DataBlockLoad,offset,coord[0],coord[1]);
190 template<
typename ScalarT,
typename coordType,
typename CpBlockType,
typename lambda_func,
typename ... ArgsT>
191 __device__
static inline void stencil(ScalarT & res, coordType & coord ,
196 res = f(cpb,coord[0],coord[1]);
199 template<
typename ScalarT,
typename coordType,
typename CpBlockType,
typename lambda_func,
typename ... ArgsT>
200 __device__
static inline void stencil2(ScalarT & res1, ScalarT & res2, coordType & coord ,
206 f(res1,res2,cpb1,cpb2,coord[0],coord[1]);
209 template<
typename ScalarT,
typename coordType,
typename CpBlockType,
typename DataBlockWrapperT,
typename lambda_func,
typename ... ArgsT>
210 __device__
static inline void stencil2_block(ScalarT & res1, ScalarT & res2, coordType & coord ,
213 DataBlockWrapperT & DataBlockLoad,
218 f(res1,res2,cpb1,cpb2,DataBlockLoad,offset,coord[0],coord[1]);
221 template<
typename ScalarT,
typename coordType,
typename CpBlockType,
typename DataBlockWrapperT,
typename lambda_func,
typename ... ArgsT>
222 __device__
static inline void stencil3_block(ScalarT & res1, ScalarT & res2, ScalarT & res3, coordType & coord ,
226 DataBlockWrapperT & DataBlockLoad,
231 f(res1,res2,res3,cpb1,cpb2,cpb3,DataBlockLoad,offset,coord[0],coord[1]);
235 template<
unsigned int dim,
unsigned int n_loop,
unsigned int p_src,
unsigned int p_dst,
unsigned int stencil_size>
240 static constexpr unsigned int supportRadius = stencil_size;
242 template<
typename SparseGridT,
typename DataBlockWrapperT,
typename lambda_func,
typename ... ArgT>
243 static inline __device__
void stencil(
244 SparseGridT & sparseGrid,
245 const unsigned int dataBlockId,
249 DataBlockWrapperT & dataBlockLoad,
250 DataBlockWrapperT & dataBlockStore,
251 unsigned char curMask,
255 typedef typename SparseGridT::AggregateBlockType AggregateT;
256 typedef ScalarTypeOf<AggregateT, p_src> ScalarT;
258 constexpr unsigned int enlargedBlockSize =
IntPow<
259 SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
261 __shared__ ScalarT enlargedBlock[enlargedBlockSize];
263 for (
int i = 0; i < n_loop ; i++)
265 if (i*
IntPow<SparseGridT::getBlockEdgeSize(), dim>::value + threadIdx.x < enlargedBlockSize)
267 enlargedBlock[i*
IntPow<SparseGridT::getBlockEdgeSize(), dim>::value + threadIdx.x] = sparseGrid.getblockMap().template getBackground<p_src>()[0];
274 typedef typename vmpl_sum_constant<2*stencil_size,block_sizes>::type vmpl_sizes;
278 sparseGrid.template loadGhostBlock<p_src>(dataBlockLoad, dataBlockIdPos, enlargedBlock);
284 if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
288 unsigned int linIdTmp = offset;
289 for (
unsigned int d = 0; d < dim; ++d)
291 coord[d] = linIdTmp % SparseGridT::blockEdgeSize_;
292 linIdTmp /= SparseGridT::blockEdgeSize_;
297 dataBlockStore.template get<p_dst>()[offset] = res;
301 template <
typename SparseGr
idT,
typename CtxT>
302 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
309 template<
unsigned int dim,
unsigned int n_loop,
unsigned int p_src,
unsigned int p_dst,
unsigned int stencil_size>
314 static constexpr unsigned int supportRadius = stencil_size;
316 template<
typename SparseGridT,
typename DataBlockWrapperT,
typename lambda_func,
typename ... ArgT>
317 static inline __device__
void stencil(
318 SparseGridT & sparseGrid,
319 const unsigned int dataBlockId,
323 DataBlockWrapperT & dataBlockLoad,
324 DataBlockWrapperT & dataBlockStore,
325 unsigned char curMask,
329 typedef typename SparseGridT::AggregateBlockType AggregateT;
330 typedef ScalarTypeOf<AggregateT, p_src> ScalarT;
332 constexpr unsigned int enlargedBlockSize =
IntPow<
333 SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
335 __shared__ ScalarT enlargedBlock[enlargedBlockSize];
337 for (
int i = 0; i < n_loop ; i++)
339 if (i*
IntPow<SparseGridT::getBlockEdgeSize(), dim>::value + threadIdx.x < enlargedBlockSize)
341 enlargedBlock[i*
IntPow<SparseGridT::getBlockEdgeSize(), dim>::value + threadIdx.x] = sparseGrid.getblockMap().template getBackground<p_src>()[0];
348 typedef typename vmpl_sum_constant<2*stencil_size,block_sizes>::type vmpl_sizes;
352 sparseGrid.template loadGhostBlock<p_src>(dataBlockLoad, dataBlockIdPos, enlargedBlock);
358 if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
362 unsigned int linIdTmp = offset;
363 for (
unsigned int d = 0; d < dim; ++d)
365 coord[d] = linIdTmp % SparseGridT::blockEdgeSize_;
366 linIdTmp /= SparseGridT::blockEdgeSize_;
371 dataBlockStore.template get<p_dst>()[offset] = res;
375 template <
typename SparseGr
idT,
typename CtxT>
376 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
382 template<
unsigned int dim,
unsigned int n_loop,
unsigned int p_src1,
unsigned int p_src2,
unsigned int p_dst1,
unsigned int p_dst2,
unsigned int stencil_size>
387 static constexpr unsigned int supportRadius = stencil_size;
389 template<
typename SparseGridT,
typename DataBlockWrapperT,
typename lambda_func,
typename ... ArgT>
390 static inline __device__
void stencil(
391 SparseGridT & sparseGrid,
392 const unsigned int dataBlockId,
396 DataBlockWrapperT & dataBlockLoad,
397 DataBlockWrapperT & dataBlockStore,
398 unsigned char curMask,
402 typedef typename SparseGridT::AggregateBlockType AggregateT;
403 typedef ScalarTypeOf<AggregateT, p_src1> ScalarT1;
404 typedef ScalarTypeOf<AggregateT, p_src1> ScalarT2;
406 constexpr unsigned int enlargedBlockSize =
IntPow<
407 SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
409 __shared__ ScalarT1 enlargedBlock1[enlargedBlockSize];
410 __shared__ ScalarT2 enlargedBlock2[enlargedBlockSize];
415 typedef typename vmpl_sum_constant<2*stencil_size,block_sizes>::type vmpl_sizes;
420 sparseGrid.template loadGhostBlock<p_src1>(dataBlockLoad, dataBlockIdPos, enlargedBlock1);
421 sparseGrid.template loadGhostBlock<p_src2>(dataBlockLoad, dataBlockIdPos, enlargedBlock2);
428 if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
432 unsigned int linIdTmp = offset;
433 for (
unsigned int d = 0; d < dim; ++d)
435 coord[d] = linIdTmp % SparseGridT::blockEdgeSize_;
436 linIdTmp /= SparseGridT::blockEdgeSize_;
441 dataBlockStore.template get<p_dst1>()[offset] = res1;
442 dataBlockStore.template get<p_dst2>()[offset] = res2;
446 template <
typename SparseGr
idT,
typename CtxT>
447 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
453 template<
unsigned int dim,
unsigned int n_loop,
454 unsigned int p_src1,
unsigned int p_src2,
unsigned int p_src3,
455 unsigned int p_dst1,
unsigned int p_dst2,
unsigned int p_dst3,
456 unsigned int stencil_size>
461 static constexpr unsigned int supportRadius = stencil_size;
463 template<
typename SparseGridT,
typename DataBlockWrapperT,
typename lambda_func,
typename ... ArgT>
464 static inline __device__
void stencil(
465 SparseGridT & sparseGrid,
466 const unsigned int dataBlockId,
470 DataBlockWrapperT & dataBlockLoad,
471 DataBlockWrapperT & dataBlockStore,
472 unsigned char curMask,
476 typedef typename SparseGridT::AggregateBlockType AggregateT;
477 typedef ScalarTypeOf<AggregateT, p_src1> ScalarT1;
478 typedef ScalarTypeOf<AggregateT, p_src1> ScalarT2;
479 typedef ScalarTypeOf<AggregateT, p_src1> ScalarT3;
481 constexpr unsigned int enlargedBlockSize =
IntPow<
482 SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
484 __shared__ ScalarT1 enlargedBlock1[enlargedBlockSize];
485 __shared__ ScalarT2 enlargedBlock2[enlargedBlockSize];
486 __shared__ ScalarT3 enlargedBlock3[enlargedBlockSize];
491 typedef typename vmpl_sum_constant<2*stencil_size,block_sizes>::type vmpl_sizes;
497 sparseGrid.template loadGhostBlock<p_src1>(dataBlockLoad, dataBlockIdPos, enlargedBlock1);
498 sparseGrid.template loadGhostBlock<p_src2>(dataBlockLoad, dataBlockIdPos, enlargedBlock2);
499 sparseGrid.template loadGhostBlock<p_src3>(dataBlockLoad, dataBlockIdPos, enlargedBlock3);
507 if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
511 unsigned int linIdTmp = offset;
512 for (
unsigned int d = 0; d < dim; ++d)
514 coord[d] = linIdTmp % SparseGridT::blockEdgeSize_;
515 linIdTmp /= SparseGridT::blockEdgeSize_;
518 stencil_conv_func_impl<dim>::stencil3_block(res1,res2,res3,coord,cpb1,cpb2,cpb3,dataBlockLoad,offset,f,args...);
520 dataBlockStore.template get<p_dst1>()[offset] = res1;
521 dataBlockStore.template get<p_dst2>()[offset] = res2;
522 dataBlockStore.template get<p_dst3>()[offset] = res3;
526 template <
typename SparseGr
idT,
typename CtxT>
527 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
533 template<
unsigned int dim,
unsigned int n_loop,
unsigned int p_src1,
unsigned int p_src2,
unsigned int p_dst1,
unsigned int p_dst2,
unsigned int stencil_size>
538 static constexpr unsigned int supportRadius = stencil_size;
540 template<
typename SparseGridT,
typename DataBlockWrapperT,
typename lambda_func,
typename ... ArgT>
541 static inline __device__
void stencil(
542 SparseGridT & sparseGrid,
543 const unsigned int dataBlockId,
547 DataBlockWrapperT & dataBlockLoad,
548 DataBlockWrapperT & dataBlockStore,
549 unsigned char curMask,
553 typedef typename SparseGridT::AggregateBlockType AggregateT;
554 typedef ScalarTypeOf<AggregateT, p_src1> ScalarT1;
555 typedef ScalarTypeOf<AggregateT, p_src1> ScalarT2;
557 constexpr unsigned int enlargedBlockSize =
IntPow<
558 SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
560 __shared__ ScalarT1 enlargedBlock1[enlargedBlockSize];
561 __shared__ ScalarT2 enlargedBlock2[enlargedBlockSize];
566 typedef typename vmpl_sum_constant<2*stencil_size,block_sizes>::type vmpl_sizes;
571 sparseGrid.template loadGhostBlock<p_src1>(dataBlockLoad, dataBlockIdPos, enlargedBlock1);
572 sparseGrid.template loadGhostBlock<p_src2>(dataBlockLoad, dataBlockIdPos, enlargedBlock2);
579 if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
583 unsigned int linIdTmp = offset;
584 for (
unsigned int d = 0; d < dim; ++d)
586 coord[d] = linIdTmp % SparseGridT::blockEdgeSize_;
587 linIdTmp /= SparseGridT::blockEdgeSize_;
592 dataBlockStore.template get<p_dst1>()[offset] = res1;
593 dataBlockStore.template get<p_dst2>()[offset] = res2;
597 template <
typename SparseGr
idT,
typename CtxT>
598 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
604 template<
unsigned int dim,
unsigned int p_src,
unsigned int p_dst,
unsigned int stencil_size>
609 static constexpr unsigned int supportRadius = stencil_size;
611 template<
typename SparseGridT,
typename DataBlockWrapperT,
typename lambda_func,
typename ... ArgT>
612 static inline __device__
void stencil(
613 SparseGridT & sparseGrid,
614 const unsigned int dataBlockId,
618 DataBlockWrapperT & dataBlockLoad,
619 DataBlockWrapperT & dataBlockStore,
620 unsigned char curMask,
624 typedef typename SparseGridT::AggregateBlockType AggregateT;
625 typedef ScalarTypeOf<AggregateT, p_src> ScalarT;
627 constexpr unsigned int enlargedBlockSize =
IntPow<
628 SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
630 __shared__ ScalarT enlargedBlock[enlargedBlockSize];
632 sparseGrid.template loadGhostBlock<p_src>(dataBlockLoad, dataBlockIdPos, enlargedBlock);
636 decltype(sparseGrid.getLinIdInEnlargedBlock(0)) linId = 0;
639 if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
641 const auto coord = sparseGrid.getCoordInEnlargedBlock(offset);
643 linId = sparseGrid.getLinIdInEnlargedBlock(offset);
644 ScalarT cur = enlargedBlock[linId];
651 if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
653 enlargedBlock[linId] = res;
656 sparseGrid.template storeBlock<p_dst>(dataBlockStore, enlargedBlock);
659 template <
typename SparseGr
idT,
typename CtxT>
660 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
667 template <
unsigned int dim,
668 unsigned int stencilSupportRadius,
671 typename checker_type,
674 typename SparseGridT,
676 __global__
void tagBoundaries(IndexBufT indexBuffer, DataBufT dataBuffer, SparseGridT sparseGrid,nn_blocksT nbT, checker_type chk)
679 constexpr unsigned int pIndex = 0;
681 typedef typename IndexBufT::value_type IndexAggregateT;
682 typedef BlockTypeOf<IndexAggregateT, pIndex> IndexT;
684 typedef typename DataBufT::value_type AggregateT;
685 typedef BlockTypeOf<AggregateT, pMask> MaskBlockT;
686 typedef ScalarTypeOf<AggregateT, pMask> MaskT;
687 constexpr unsigned int blockSize = MaskBlockT::size;
691 const unsigned int dataBlockPos = blockIdx.x;
692 const unsigned int offset = threadIdx.x;
694 constexpr unsigned int enlargedBlockSize =
IntPow<
695 sparseGrid.getBlockEdgeSize() + 2 * stencilSupportRadius, dim>::value;
696 __shared__ MaskT enlargedBlock[enlargedBlockSize];
698 if (dataBlockPos >= indexBuffer.size())
703 const long long dataBlockId = indexBuffer.template get<pIndex>(dataBlockPos);
704 auto dataBlock = dataBuffer.get(dataBlockPos);
707 sdataBlockPos.id = dataBlockPos;
708 sparseGrid.template loadGhostBlock<pMask>(dataBlock,sdataBlockPos,enlargedBlock);
712 bool check = chk.check(sparseGrid,dataBlockId,offset);
715 if (offset < blockSize && check ==
true)
717 const auto coord = sparseGrid.getCoordInEnlargedBlock(offset);
718 const auto linId = sparseGrid.getLinIdInEnlargedBlock(offset);
720 MaskT cur = enlargedBlock[linId];
721 if (sparseGrid.exist(cur))
723 bool isPadding = NN_type::isPadding(sparseGrid,coord,enlargedBlock);
726 sparseGrid.setPadding(enlargedBlock[linId]);
730 sparseGrid.unsetPadding(enlargedBlock[linId]);
736 sparseGrid.template storeBlock<pMask>(dataBlock, enlargedBlock);
743 template<
unsigned int dim,
unsigned int pMask,
unsigned int chunk_size ,
typename SparseGr
idType,
typename outputType>
744 __global__
void link_construct(SparseGridType grid_up, SparseGridType grid_cu, outputType out)
746 const unsigned int dataBlockPos = blockIdx.x;
747 const unsigned int offset = threadIdx.x;
749 auto & indexBuffer = grid_cu.getIndexBuffer();
750 auto & dataBuffer = grid_cu.getDataBuffer();
753 if (dataBuffer.template get <pMask>(dataBlockPos)[offset] & 0x2)
755 auto id = indexBuffer.template get<0>(dataBlockPos);
758 printf(
"HERE %d %d \n",pos.
get(0),pos.
get(1));
760 for (
int i = 0 ; i < dim ; i++)
763 if (grid_up.template get<pMask>(pos) == 0x1)
765 atomicAdd(&out.template get<0>(dataBlockPos),1);
774 template<
unsigned int dim,
unsigned int pMask,
unsigned int chunk_size ,
typename SparseGr
idType,
typename outputType,
typename BoxType>
775 __global__
void count_paddings(SparseGridType grid_cu, outputType out, BoxType box)
777 const unsigned int dataBlockPos = blockIdx.x;
778 const unsigned int offset = threadIdx.x;
780 auto & indexBuffer = grid_cu.getIndexBuffer();
781 auto & dataBuffer = grid_cu.getDataBuffer();
783 auto id = indexBuffer.template get<0>(dataBlockPos);
786 auto coord = grid_cu.getCoord(
id,offset);
788 bool active = box.isInsideKey(coord);
794 if (dataBuffer.template get <pMask>(dataBlockPos)[offset] & 0x2)
796 atomicAdd(&out.template get<0>(dataBlockPos),1);
804 template<
unsigned int pMask,
typename SparseGr
idType,
typename ScanType,
typename outputType,
typename BoxType>
805 __global__
void collect_paddings(SparseGridType grid_cu, ScanType stp, outputType out, BoxType box)
807 const unsigned int dataBlockPos = blockIdx.x;
808 const unsigned int offset = threadIdx.x;
810 __shared__
int counter;
814 auto & indexBuffer = grid_cu.getIndexBuffer();
815 auto & dataBuffer = grid_cu.getDataBuffer();
817 auto id = indexBuffer.template get<0>(dataBlockPos);
820 auto coord = grid_cu.getCoord(
id,offset);
822 bool active = box.isInsideKey(coord);
827 int pad_offset = stp.template get<0>(dataBlockPos);
830 if (dataBuffer.template get <pMask>(dataBlockPos)[offset] & 0x2)
832 int cnt = atomicAdd(&counter,1);
834 out.template get<0>(pad_offset + cnt) = dataBlockPos;
835 out.template get<1>(pad_offset + cnt) = offset;
843 template<
unsigned int dim,
unsigned int pMask,
unsigned int chunk_size,
844 typename padPointType ,
typename SparseGridType,
846 __global__
void link_construct_dw_count(padPointType padPoints, SparseGridType grid_dw, SparseGridType grid_cu, outputType out,
Point<dim,int> p_dw)
848 const unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
850 if (p >= padPoints.size()) {
return;}
852 const unsigned int dataBlockPos = padPoints.template get<0>(p);
853 const unsigned int offset = padPoints.template get<1>(p);
855 auto & indexBuffer = grid_cu.getIndexBuffer();
856 auto & dataBuffer = grid_cu.getDataBuffer();
858 auto id = indexBuffer.template get<0>(dataBlockPos);
861 for (
int i = 0 ; i < dim ; i++)
864 for (
int j = 0 ; j < 2*dim ; j++)
867 for (
int k = 0 ; k < dim ; k++)
869 kc.
set_d(k,pos.
get(k) + ((j >> k) & 0x1) );
872 if (grid_dw.template get<pMask>(kc) & 0x1)
874 int a = atomicAdd(&out.template get<0>(p),1);
883 template<
unsigned int dim,
unsigned int pMask,
unsigned int chunk_size,
884 typename padPointType ,
typename SparseGridType,
886 __global__
void link_construct_up_count(padPointType padPoints, SparseGridType grid_up, SparseGridType grid_cu, outputType out,
Point<dim,int> p_up)
888 const unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
890 if (p >= padPoints.size()) {
return;}
892 const unsigned int dataBlockPos = padPoints.template get<0>(p);
893 const unsigned int offset = padPoints.template get<1>(p);
895 auto & indexBuffer = grid_cu.getIndexBuffer();
896 auto & dataBuffer = grid_cu.getDataBuffer();
898 auto id = indexBuffer.template get<0>(dataBlockPos);
901 for (
int i = 0 ; i < dim ; i++)
904 if (grid_up.template get<pMask>(pos) & 0x1)
906 int a = atomicAdd(&out.template get<0>(p),1);
914 template<
unsigned int dim,
unsigned int pMask,
unsigned int chunk_size,
915 typename padPointType ,
typename SparseGridType,
typename scanType,
typename outputType>
916 __global__
void link_construct_insert_dw(padPointType padPoints, SparseGridType grid_dw, SparseGridType grid_cu, scanType scan, outputType out,
Point<dim,int> p_dw)
918 const unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
920 if (p >= padPoints.size()) {
return;}
922 const unsigned int dataBlockPos = padPoints.template get<0>(p);
923 const unsigned int offset = padPoints.template get<1>(p);
925 auto & indexBuffer = grid_cu.getIndexBuffer();
926 auto & dataBuffer = grid_cu.getDataBuffer();
928 auto & dataBuffer_dw = grid_dw.getDataBuffer();
930 auto id = indexBuffer.template get<0>(dataBlockPos);
933 for (
int i = 0 ; i < dim ; i++)
936 unsigned int dataBlockPos_dw;
937 unsigned int offset_dw;
939 int link_offset = scan.template get<0>(p);
942 for (
int j = 0 ; j < 2*dim ; j++)
945 for (
int k = 0 ; k < dim ; k++)
947 kc.
set_d(k,pos.
get(k) + ((j >> k) & 0x1) );
950 grid_dw.get_sparse(kc,dataBlockPos_dw,offset_dw);
952 if (dataBuffer_dw.template get<pMask>(dataBlockPos_dw)[offset_dw] & 0x1)
954 out.template get<0>(link_offset + c) = dataBlockPos_dw;
955 out.template get<1>(link_offset + c) = offset_dw;
966 template<
unsigned int dim,
unsigned int pMask,
unsigned int chunk_size,
967 typename padPointType ,
typename SparseGridType,
typename scanType,
typename outputType>
968 __global__
void link_construct_insert_up(padPointType padPoints, SparseGridType grid_up, SparseGridType grid_cu, scanType scan, outputType out,
Point<dim,int> p_up)
970 const unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
972 if (p >= padPoints.size()) {
return;}
974 const unsigned int dataBlockPos = padPoints.template get<0>(p);
975 const unsigned int offset = padPoints.template get<1>(p);
977 auto & indexBuffer = grid_cu.getIndexBuffer();
978 auto & dataBuffer = grid_cu.getDataBuffer();
980 auto & dataBuffer_dw = grid_up.getDataBuffer();
982 auto id = indexBuffer.template get<0>(dataBlockPos);
985 for (
int i = 0 ; i < dim ; i++)
988 unsigned int dataBlockPos_dw;
989 unsigned int offset_dw;
991 int link_offset = scan.template get<0>(p);
993 grid_up.get_sparse(pos,dataBlockPos_dw,offset_dw);
995 if (dataBuffer_dw.template get<pMask>(dataBlockPos_dw)[offset_dw] & 0x1)
997 out.template get<0>(link_offset) = dataBlockPos_dw;
998 out.template get<1>(link_offset) = offset_dw;
1009 template <
unsigned int dim,
1012 typename SparseGridT,
1013 typename nn_blocksT>
1014 __global__
void findNeighbours(IndexBufT indexBuffer, SparseGridT sparseGrid, nn_blocksT nn_blocks)
1017 constexpr unsigned int pIndex = 0;
1019 typedef typename IndexBufT::value_type IndexAggregateT;
1020 typedef BlockTypeOf<IndexAggregateT , pIndex> IndexT;
1022 const unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
1024 const unsigned int dataBlockPos = pos / nNN_type::nNN;
1025 const unsigned int offset = pos % nNN_type::nNN;
1027 if (dataBlockPos >= indexBuffer.size())
1030 const auto dataBlockId = indexBuffer.template get<pIndex>(dataBlockPos);
1032 auto neighbourPos = sparseGrid.template getNeighboursPos<nNN_type>(dataBlockId, offset);
1034 nn_blocks.template get<0>(dataBlockPos*nNN_type::nNN + offset) = neighbourPos;
1037 template <
unsigned int dim,
1042 typename SparseGridT,
1045 applyStencilInPlace(
1047 IndexBufT indexBuffer,
1048 DataBufT dataBuffer,
1049 SparseGridT sparseGrid,
1052 constexpr unsigned int pIndex = 0;
1054 typedef typename IndexBufT::value_type IndexAggregateT;
1055 typedef BlockTypeOf<IndexAggregateT , pIndex> IndexT;
1057 typedef typename DataBufT::value_type AggregateT;
1058 typedef BlockTypeOf<AggregateT, pMask> MaskBlockT;
1059 typedef ScalarTypeOf<AggregateT, pMask> MaskT;
1060 constexpr unsigned int blockSize = MaskBlockT::size;
1064 const unsigned int dataBlockPos = blockIdx.x;
1065 const unsigned int offset = threadIdx.x;
1067 if (dataBlockPos >= indexBuffer.size())
1072 auto dataBlockLoad = dataBuffer.get(dataBlockPos);
1075 const auto dataBlockId = indexBuffer.template get<pIndex>(dataBlockPos);
1078 unsigned char curMask;
1080 if (offset < blockSize)
1083 curMask = dataBlockLoad.template get<pMask>()[offset];
1084 for (
int i = 0 ; i < dim ; i++)
1085 {curMask &= (pointCoord.
get(i) < bx.
getLow(i) || pointCoord.
get(i) > bx.
getHigh(i))?0:0xFF;}
1089 sdataBlockPos.id = dataBlockPos;
1092 sparseGrid, dataBlockId, sdataBlockPos , offset, pointCoord, dataBlockLoad, dataBlockLoad,
1096 template <
unsigned int dim,
1101 typename SparseGridT,
1104 applyStencilInPlaceNoShared(
1106 IndexBufT indexBuffer,
1107 DataBufT dataBuffer,
1108 SparseGridT sparseGrid,
1111 constexpr unsigned int pIndex = 0;
1113 typedef typename IndexBufT::value_type IndexAggregateT;
1114 typedef BlockTypeOf<IndexAggregateT , pIndex> IndexT;
1116 typedef typename DataBufT::value_type AggregateT;
1117 typedef BlockTypeOf<AggregateT, pMask> MaskBlockT;
1118 typedef ScalarTypeOf<AggregateT, pMask> MaskT;
1119 constexpr unsigned int blockSize = MaskBlockT::size;
1121 int p = blockIdx.x * blockDim.x + threadIdx.x;
1123 auto & pntBuff = sparseGrid.getPointBuffer();
1125 if (p >= pntBuff.size())
1130 auto id = pntBuff.template get<0>(p);
1132 const unsigned int dataBlockPos =
id / blockSize;
1133 const unsigned int offset =
id % blockSize;
1135 auto dataBlockLoad = dataBuffer.get(dataBlockPos);
1137 const unsigned int dataBlockId = indexBuffer.template get<pIndex>(dataBlockPos);
1140 unsigned char curMask;
1142 if (offset < blockSize)
1145 curMask = dataBlockLoad.template get<pMask>()[offset];
1146 if (bx.
isInsideKey(pointCoord) ==
false) {curMask = 0;}
1150 sdataBlockPos.id = dataBlockPos;
1153 sparseGrid, dataBlockId, sdataBlockPos , offset, pointCoord, dataBlockLoad, dataBlockLoad,
1157 template<
unsigned int pMask,
1158 typename dataBuffType,
1161 __global__
void fill_e_points(dataBuffType dataBuf, scanType scanBuf, outType output)
1163 typedef typename dataBuffType::value_type AggregateT;
1164 typedef BlockTypeOf<AggregateT, pMask> MaskBlockT;
1165 constexpr unsigned int blockSize = MaskBlockT::size;
1167 const unsigned int dataBlockPos = blockIdx.x;
1168 const unsigned int offset = threadIdx.x % blockSize;
1170 __shared__
int ato_cnt;
1172 if (threadIdx.x == 0)
1177 if (dataBlockPos >= scanBuf.size() - 1)
1182 int predicate = dataBuf.template get<pMask>(dataBlockPos)[offset] & 0x1;
1184 int id = atomicAdd(&ato_cnt,predicate);
1188 if (predicate ==
true)
1190 output.template get<0>(
id + scanBuf.template get<0>(dataBlockPos)) = offset + dataBlockPos * blockSize;
1194 template<
unsigned int pMask,
1195 typename dataBufferType,
1197 __global__
void calc_exist_points(dataBufferType dataBuf, outType output)
1199 typedef typename dataBufferType::value_type AggregateT;
1200 typedef BlockTypeOf<AggregateT, pMask> MaskBlockT;
1201 constexpr unsigned int blockSize = MaskBlockT::size;
1203 const unsigned int dataBlockPos = blockIdx.x;
1204 const unsigned int offset = threadIdx.x % blockSize;
1206 __shared__
int ato_cnt;
1208 if (threadIdx.x == 0)
1213 if (dataBlockPos >= output.size())
1218 int predicate = dataBuf.template get<pMask>(dataBlockPos)[offset] & 0x1;
1220 atomicAdd(&ato_cnt,predicate);
1224 output.template get<0>(dataBlockPos) = ato_cnt;
1227 template<
unsigned int dim,
1229 unsigned int blockEdgeSize,
1230 typename dataBufferType,
1232 typename boxesVector_type,
1233 typename grid_smb_type,
1234 typename indexBuffer_type>
1235 __global__
void calc_remove_points_chunks_boxes(indexBuffer_type indexBuffer,
1236 boxesVector_type boxes,
1238 dataBufferType dataBuf,
1241 typedef typename dataBufferType::value_type AggregateT;
1242 typedef BlockTypeOf<AggregateT, pMask> MaskBlockT;
1244 const unsigned int dataBlockPos = blockIdx.x * blockDim.x + threadIdx.x;
1246 if (dataBlockPos >= indexBuffer.size())
1249 auto id = indexBuffer.template get<0>(dataBlockPos);
1254 for (
int i = 0 ; i < dim ; i++)
1256 b.setLow(i,pnt.
get(i));
1257 b.setHigh(i,pnt.
get(i) + blockEdgeSize - 1);
1262 output.template get<1>(dataBlockPos) = 0;
1263 for (
int k = 0 ; k < boxes.size() ; k++ )
1271 output.template get<1>(dataBlockPos) = 1;
1276 template<
typename outType,
1277 typename activeCnkType>
1278 __global__
void collect_rem_chunks(activeCnkType act,
1281 const unsigned int dataBlockPos = blockIdx.x * blockDim.x + threadIdx.x;
1283 if (dataBlockPos >= act.size()-1)
1286 auto id = act.template get<1>(dataBlockPos);
1287 auto id_p1 = act.template get<1>(dataBlockPos+1);
1291 output.template get<0>(
id) = dataBlockPos;
1295 template<
unsigned int dim,
unsigned int pMask,
1296 typename dataBufferType,
1297 typename indexBufferType,
1298 typename grid_smb_type,
1299 typename activeCntType,
1301 __global__
void remove_points(indexBufferType indexBuffer,
1303 dataBufferType dataBuffer,
1304 activeCntType active_blocks,
1307 typedef typename dataBufferType::value_type AggregateT;
1308 typedef BlockTypeOf<AggregateT, pMask> MaskBlockT;
1309 constexpr unsigned int blockSize = MaskBlockT::size;
1311 const unsigned int dataBlockPos = active_blocks.template get<0>(blockIdx.x);
1312 const unsigned int offset = threadIdx.x % blockSize;
1314 if (dataBlockPos >= dataBuffer.size()-1)
1317 int predicate = dataBuffer.template get<pMask>(dataBlockPos)[offset] & 0x1;
1319 auto id = indexBuffer.template get<0>(dataBlockPos);
1323 for (
int i = 0 ; i < dim ; i++)
1328 if (predicate ==
true)
1330 for (
int k = 0 ; k < boxes.size() ; k++ )
1336 dataBuffer.template get<pMask>(dataBlockPos)[offset] = 0;
1342 template<
unsigned int dim,
1344 unsigned int numCnt,
1346 typename dataBufferType,
1348 typename boxesVector_type,
1349 typename grid_smb_type,
1350 typename indexBuffer_type>
1351 __global__
void calc_exist_points_with_boxes(indexBuffer_type indexBuffer,
1352 boxesVector_type boxes,
1354 dataBufferType dataBuf,
1356 unsigned int stride_size)
1358 typedef typename dataBufferType::value_type AggregateT;
1359 typedef BlockTypeOf<AggregateT, pMask> MaskBlockT;
1360 constexpr unsigned int blockSize = MaskBlockT::size;
1362 const unsigned int dataBlockPos = blockIdx.x;
1363 const unsigned int offset = threadIdx.x % blockSize;
1365 __shared__
int ato_cnt[numCnt];
1367 if (threadIdx.x < numCnt)
1368 {ato_cnt[threadIdx.x] = 0;}
1374 if (numCnt > blockDim.x)
1375 {printf(
"Error calc_exist_points_with_boxes assertion failed numCnt >= blockDim.x %d %d \n",numCnt,(
int)blockDim.x);}
1379 if (dataBlockPos >= output.size())
1382 int predicate = dataBuf.template get<pMask>(dataBlockPos)[offset] & 0x1;
1384 indexT
id = indexBuffer.template get<0>(dataBlockPos);
1388 for (
int i = 0 ; i < dim ; i++)
1393 if (predicate ==
true)
1395 for (
int k = 0 ; k < boxes.size() ; k++ )
1401 atomicAdd(&ato_cnt[k],1);
1408 if (threadIdx.x < boxes.size())
1410 output.template get<0>(dataBlockPos+threadIdx.x*stride_size) = ato_cnt[threadIdx.x];
1411 output.template get<1>(dataBlockPos+threadIdx.x*stride_size) = (ato_cnt[threadIdx.x] != 0);
1425 template<
unsigned int dim,
1427 unsigned int numCnt,
1429 typename dataBufferType,
1430 typename packBufferType,
1432 typename scanItType,
1433 typename outputType,
1434 typename boxesVector_type,
1435 typename grid_smb_type,
1436 typename indexBuffer_type>
1437 __global__
void get_exist_points_with_boxes(indexBuffer_type indexBuffer,
1438 boxesVector_type boxes,
1440 dataBufferType dataBuf,
1441 packBufferType pack_output,
1446 typedef typename dataBufferType::value_type AggregateT;
1447 typedef BlockTypeOf<AggregateT, pMask> MaskBlockT;
1448 constexpr unsigned int blockSize = MaskBlockT::size;
1450 const unsigned int dataBlockPos = blockIdx.x;
1451 const unsigned int offset = threadIdx.x % blockSize;
1453 __shared__
int ato_cnt[numCnt];
1455 if (threadIdx.x < numCnt)
1456 {ato_cnt[threadIdx.x] = 0;}
1462 if (numCnt > blockDim.x)
1463 {printf(
"Error get_exist_points_with_boxes assertion failed numCnt >= blockDim.x %d %d \n",numCnt,(
int)blockDim.x);}
1467 int predicate = dataBuf.template get<pMask>(dataBlockPos)[offset] & 0x1;
1469 indexT
id = indexBuffer.template get<0>(dataBlockPos);
1473 for (
int i = 0 ; i < dim ; i++)
1474 {p_.
get(i) = pnt.
get(i);}
1478 if (predicate ==
true)
1480 for (
int k = 0 ; k < boxes.size() ; k++ )
1487 int p = atomicAdd(&ato_cnt[k] , 1);
1490 const unsigned int dataBlockPosPack = scan.template get<1>(dataBlockPos + k*(indexBuffer.size() + 1));
1491 unsigned int sit = scan.template get<0>(dataBlockPos + k*(indexBuffer.size() + 1));
1492 int scan_id = scan.template get<0>(dataBlockPos + k*(indexBuffer.size() + 1)) + scan_it.template get<0>(k);
1493 output.template get<0>(scan_id + p) = (offset + dataBlockPos * blockSize) * numCnt + k;
1494 pack_output.template get<0>(scan_id + p) = p + sit;
1503 template<
unsigned int dim,
1504 unsigned int blockSize,
1505 unsigned int blockEdgeSize,
1506 unsigned int nshifts,
1508 typename linearizer,
1509 typename shiftTypeVector,
1510 typename outputType>
1511 __global__
void convert_chunk_ids(indexT * ids,
1513 linearizer gridGeoPack,
1518 shiftTypeVector shifts,
1523 const unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
1530 for (
int i = 0 ; i < nshifts ; i++)
1534 for (
int j = 0 ; j < dim ; j++)
1536 pos.
set_d(j,pos.
get(j) + shifts.template get<0>(i)[j]*blockEdgeSize);
1541 if (pos.
get(j) >= sz.get(j))
1543 pos.
set_d(j,pos.
get(j) - blockEdgeSize);
1547 auto plin = gridGeo.LinId(pos);
1549 output.template get<0>(p*nshifts + i + bs) = plin / blockSize;
1553 template<
typename vectorType>
1554 __global__
void set_one(vectorType vt)
1557 const unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
1562 vt.template get<0>(p) = 1;
1565 template<
unsigned int pSegment,
typename newMapType,
typename mergeMapType,
1566 typename dataMapType,
typename segmentOffsetType,
1567 typename outMapType>
1568 __global__
void construct_new_chunk_map(newMapType new_map, dataMapType dataMap,
1569 mergeMapType merge_id, outMapType outMap,
1570 segmentOffsetType segments_data,
int start_p)
1573 const unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
1575 if (p >= segments_data.size()-1)
1578 unsigned int st = segments_data.template get<pSegment>(p);
1580 int segmentSize = segments_data.template get<pSegment>(p + 1)
1581 - segments_data.template get<pSegment>(p);
1583 for (
int j = 0 ; j < segmentSize ; j++)
1585 int dm = dataMap.template get<0>(st+j);
1586 new_map.template get<0>(dm) = outMap.template get<0>(p);
1590 template<
unsigned int pMask,
typename AggregateT,
typename blockConvertType,
typename newMapType,
typename dataType_ptrs,
typename dataType,
unsigned int ... prp>
1591 __global__
void copy_packed_data_to_chunks(
unsigned int * scan,
1592 unsigned short int * offsets,
1593 blockConvertType blc,
1595 dataType_ptrs data_ptrs,
1601 unsigned int n_accu_cnk)
1604 const unsigned int p = blockIdx.x;
1609 int scan_pp = scan[p];
1610 int n_block_pnt = scan[p+1] - scan_pp;
1612 if (threadIdx.x < n_block_pnt)
1614 unsigned short int off = offsets[scan[p] + threadIdx.x];
1616 int conv = blc.template get<0>(i)[off];
1618 unsigned short int off_c = conv & 0xFFFF;
1619 unsigned short int shf_c = conv >> 16;
1621 unsigned int pos_c = new_map.template get<0>(n_shf*p + shf_c + n_accu_cnk);
1624 spi(pos_c,off_c,data_buff,scan_pp + threadIdx.x,data_ptrs,n_pnt);
1626 boost::mpl::for_each_ref< boost::mpl::range_c<
int,0,
sizeof...(prp)> >(spi);
1628 data_buff.template get<pMask>(pos_c)[off_c] |= 0x1;
1633 template<
typename scanPo
interType,
typename scanType>
1634 __global__
void last_scan_point(scanPointerType scan_ptr, scanType scan,
unsigned int stride,
unsigned int n_pack)
1636 const unsigned int k = blockIdx.x * blockDim.x + threadIdx.x;
1638 if (k >= n_pack) {
return;}
1640 unsigned int ppos = scan.template get<0>((k+1)*stride-1);
1641 unsigned int pos = scan.template get<1>((k+1)*stride-1);
1643 ((
unsigned int *)scan_ptr.ptr[k])[pos] = ppos;
1646 template<
unsigned int pMask,
1647 typename AggregateT,
1651 typename pntBuff_type,
1652 typename pointOffset_type,
1653 typename indexBuffer_type,
1654 typename dataBuffer_type,
1656 unsigned int blockSize,
1657 unsigned int ... prp>
1658 __global__
void pack_data(pntBuff_type pntBuff,
1659 dataBuffer_type dataBuff,
1660 indexBuffer_type indexBuff,
1662 pointOffset_type point_offsets,
1670 const unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
1672 if (p >= pntBuff.size())
1675 const unsigned int pb = pntBuff.template get<0>(p);
1676 const unsigned int p_offset = point_offsets.template get<0>(p);
1678 const unsigned int k = pb % n_it;
1679 const unsigned int id = pb / n_it;
1681 const unsigned int dataBlockPos =
id / blockSize;
1682 const unsigned int offset =
id % blockSize;
1684 unsigned int ppos = scan.template get<0>(dataBlockPos + k*(indexBuff.size() + 1));
1685 const unsigned int dataBlockPosPack = scan.template get<1>(dataBlockPos + k*(indexBuff.size() + 1));
1688 spi(dataBlockPos,offset,dataBuff,p_offset,data_ptr->ptr[k],sar.sa[k]);
1690 boost::mpl::for_each_ref< boost::mpl::range_c<
int,0,
sizeof...(prp)> >(spi);
1692 ((
unsigned int *)scan_ptr.ptr[k])[dataBlockPosPack] = ppos;
1694 ((indexT *)index_ptr.ptr[k])[dataBlockPosPack] = indexBuff.template get<0>(dataBlockPos);
1695 ((
short int *)offset_ptr.ptr[k])[p_offset] = offset;
1696 ((
unsigned char *)mask_ptr.ptr[k])[p_offset] = dataBuff.template get<pMask>(dataBlockPos)[offset];
This class represent an N-dimensional box.
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
__device__ __host__ bool Intersect(const Box< dim, T > &b, Box< dim, T > &b_out) const
Intersect.
__device__ __host__ T getHigh(int i) const
get the high interval of the box
__host__ __device__ bool isInside(const Point< dim, T > &p) const
Check if the point is inside the box.
__device__ __host__ bool isInsideKey(const KeyType &k) const
Check if the point is inside the region (Border included)
This class implement the point shape in an N-dimensional space.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
grid_key_dx is the key to access any element in the grid
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
__device__ __host__ index_type get(index_type i) const
Get the i index.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
this class is a functor for "for_each" algorithm
this class is a functor for "for_each" algorithm
to_variadic_const_impl< 1, N, M, exit_::value, M >::type type
generate the boost::fusion::vector apply H on each term