5#ifndef OPENFPM_PDATA_SPARSEGRIDGPU_KER_CUH
6#define OPENFPM_PDATA_SPARSEGRIDGPU_KER_CUH
8#include <Grid/Geometry/grid_smb.hpp>
9#include "BlockMapGpu.hpp"
10#include "SparseGridGpu_ker_util.hpp"
12template<
typename indexT>
20template<
unsigned int dim,
21 unsigned int blockEdgeSize,
22 typename AggregateBlockT,
25 template<
typename>
class layout_base,
33 GridSmT blockWithGhostGrid;
39 const static unsigned char PADDING_BIT = 1;
40 static constexpr unsigned int blockSize = BlockTypeOf<AggregateBlockT, 0>::size;
41 unsigned int ghostLayerSize;
47 static constexpr unsigned int d = dim;
48 static constexpr unsigned int dims = dim;
49 static constexpr unsigned int blockEdgeSize_ = blockEdgeSize;
50 unsigned int stencilSupportRadius;
51 typedef AggregateBlockT AggregateBlockType;
52 typedef indexT indexT_;
66 GridSmT extendedBlockGeometry,
67 unsigned int stencilSupportRadius,
71 unsigned int ghostLayerSize,
75 blockWithGhostGrid(extendedBlockGeometry),
76 stencilSupportRadius(stencilSupportRadius),
77 ghostLayerSize(ghostLayerSize),
78 ghostLayerToThreadsMapping(ghostLayerToThreadsMapping),
89 template<
typename headers_type>
90 __device__
static int unpack_headers(headers_type & headers,
unsigned char * data,
int ih,
int sz_pack)
93 if (
sizeof(indexT) == 8)
94 {n_cnk = ((
size_t *)data)[0];}
97 unsigned int dp1 = ((
unsigned int *)data)[0];
98 unsigned int dp2 = ((
unsigned int *)&data[4])[0];
99 n_cnk = (size_t)dp1 + ((
size_t)dp2 << 32);
101 headers.template get<1>(ih) = n_cnk;
103 size_t actual_offset = n_cnk*
sizeof(indexT);
105 unsigned int n_pnt = *(
unsigned int *)&(data[
sizeof(
size_t) + 2*dim*
sizeof(int) + actual_offset + n_cnk*
sizeof(
unsigned int)]);
106 headers.template get<2>(ih) = n_pnt;
108 return sizeof(size_t) +
110 sizeof(indexT)*n_cnk +
111 align_number_device(
sizeof(indexT),(n_cnk+1)*
sizeof(
unsigned int)) +
112 align_number_device(
sizeof(indexT),n_pnt*sz_pack) +
113 align_number_device(
sizeof(indexT),n_pnt*
sizeof(
short int)) +
114 align_number_device(
sizeof(indexT),n_pnt*
sizeof(
unsigned char));
125 template<
typename CoordT>
128 return grid.getGlobalCoord(blockCoord,offset);
138 template<
typename CoordT>
141 return grid.LinId(coord);
151 inline __device__
unsigned int size(
unsigned int i)
153 return grid.getSize()[i];
165 return grid.InvLinId(linId);
178 return grid.InvLinId(dataBlockId * blockSize + offset);
191 template<
typename ite_type>
199 for (
int i = 0 ; i < dim ; i++)
204 active = active && (p.
get(i) >= (itd.start.get(i) + itd.start_base.get(i))) && (p.
get(i) <= itd.stop.get(i));
217 template<
typename CoordT>
220 return grid.BlockLinId(blockCoord);
232 return grid.BlockInvLinId(blockLinId);
245 return grid.InvLinId(blockLinId * blockSize);
251 auto i = base.
get(dimension) + offset;
252 res.
set_d(dimension, i);
263 return blockEdgeSize;
284 return std::pow(blockEdgeSize + 2*stencilSupportRadius, dim);
296 template<
typename NN_type,
typename indexT2>
305 for (
int i = 0 ; i < dim ; i++)
307 coord.
set_d(i,mov.
get(i) + offset % blockEdgeSize);
308 offset /= blockEdgeSize;
312 unsigned int offset_nn = 0;
314 bool out = NN_type::template getNNindex_offset<blockEdgeSize>(coord,
NN_index,offset_nn);
321 {nnb = nn_blocks.template get<0>(
NN_index + NN_type::nNN*pos.id);}
329 inline __device__
unsigned int posToEnlargedBlockPos(
unsigned int pos)
const
332 unsigned int coord[dim];
333 linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);
334 const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
341 getCoordInEnlargedBlock(
const unsigned int offset)
const
343 unsigned int coord[dim];
344 linToCoordWithOffset<blockEdgeSize>(offset, stencilSupportRadius, coord);
348 inline __device__
unsigned int
349 getLinIdInEnlargedBlock(
const unsigned int offset)
const
351 unsigned int coord[dim];
352 linToCoordWithOffset<blockEdgeSize>(offset, stencilSupportRadius, coord);
353 return coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
358 template<
typename Coordtype>
359 inline __device__
unsigned int
363 return coordToLin<blockEdgeSize>(res, stencilSupportRadius);
366 template<
typename Coordtype>
367 inline __device__
unsigned int
371 return coordToLin<blockEdgeSize>(res, stencilSupportRadius);
374 inline __device__
bool
375 getIfBoundaryElementInEnlargedBlock(
const grid_key_dx<dim, int> coordInEnlargedBlock,
char (&boundaryDirection)[dim])
377 bool isBoundary =
false;
378 for (
int d=0; d<dim; ++d)
380 const auto v = coordInEnlargedBlock.
get(d);
381 if (v==stencilSupportRadius)
383 boundaryDirection[d] = -1;
386 else if (v==stencilSupportRadius+blockEdgeSize-1)
388 boundaryDirection[d] = 1;
393 boundaryDirection[d] = 0;
401 template<
unsigned int p,
typename CoordT>
402 inline __device__
auto
410 template<
typename CoordT>
411 inline __device__
void
424 template<
unsigned int p,
typename CoordT>
425 inline __device__
auto
426 get(
const block_offset<CoordT> & coord)
const ->
decltype(std::declval<BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>>().getblockMap().template get_ele<p>(coord.pos)[coord.off])
438 template<
unsigned int p,
typename CoordT>
439 inline __device__
auto
440 get(
const block_offset<CoordT> & coord) ->
decltype(std::declval<BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>>().getblockMap().template get_ele<p>(coord.pos)[coord.off])
445 template<
unsigned int p,
typename CoordT>
446 inline __device__
auto
452 template<
typename CoordT>
459 template<
typename CoordT>
465 template<
typename CoordT>
466 inline __device__
auto
472 inline __device__
auto
478 template<
unsigned int chunksPerBlocks = 1,
typename CoordT>
479 inline __device__
auto
485 template<
unsigned int chunksPerBlocks = 1>
486 inline __device__
auto
515 template<
unsigned int p,
typename AggrWrapperT>
516 inline __device__
void
517 loadBlock(AggrWrapperT &block, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)
520 __loadBlock<p>(block, sharedRegion);
533 template<
unsigned int ... props,
typename AggrWrapperT>
534 inline __device__
void loadBlock(AggrWrapperT &block,
void *sharedRegionPtr[
sizeof...(props)])
536 __loadBlock<props ...>(block, sharedRegionPtr);
551 template<
unsigned int p ,
typename AggrWrapperT ,
typename CoordT>
552 inline __device__
void
555 auto blockLinId = getBlockId(coord);
556 __loadGhostBlock<p>(dataBlockLoad,blockLinId, sharedRegion);
559 template<
unsigned int p,
typename AggrWrapperT>
560 inline __device__
void
563 __loadGhostBlock<p>(dataBlockLoad,blockLinId, sharedRegion);
566 template<
unsigned int p,
typename AggrWrapperT>
567 inline __device__
void
570 __loadGhostBlock<p>(dataBlockLoad,blockLinId, sharedRegion,mask);
584 template<
unsigned int ... props,
typename CoordT>
587 auto blockLinId = getBlockId(coord);
588 __loadGhost<props ...>(blockLinId, neighboursPos, sharedRegionPtr);
592 template<
unsigned int ... props>
593 inline __device__
void loadGhost(
const unsigned int blockLinId,
const int * neighboursPos,
void *sharedRegionPtr[
sizeof...(props)])
595 __loadGhost<props ...>(blockLinId, neighboursPos, sharedRegionPtr);
598 template<
unsigned int p,
typename AggrWrapperT>
599 inline __device__
void
600 storeBlock(AggrWrapperT &block, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)
603 __storeBlock<p>(block, sharedRegion);
606 template<
unsigned int p,
typename CoordT>
607 inline __device__
void
611 auto & block = getBlock(coord);
612 __storeBlock<p>(block, sharedRegion);
626 template<
unsigned int ... props,
typename AggrWrapperT>
627 inline __device__
void storeBlock(AggrWrapperT &block,
void *sharedRegionPtr[
sizeof...(props)])
629 __storeBlock<props ...>(block, sharedRegionPtr);
632 template<
unsigned int ... props,
typename CoordT>
633 inline __device__
void storeBlockInPlace(
const grid_key_dx<dim, CoordT> & coord,
void *sharedRegionPtr[
sizeof...(props)])
635 auto block = getBlock(coord);
636 __storeBlock<props ...>(block, sharedRegionPtr);
639 template <
unsigned int p,
typename CoordT>
640 inline __device__ ScalarTypeOf<AggregateBlockT, p> & get(
643 ScalarTypeOf<AggregateBlockT, p> *sharedRegion)
652 const auto boxDimensions = sharedMemBox.
getKP2() - sharedMemBox.
getKP1() + one;
653 const auto relativeCoord = coord - sharedMemBox.
getKP1();
654 const auto locLinId = coordToLin(relativeCoord, boxDimensions);
655 return sharedRegion[locLinId];
664 template<
typename CoordT>
670 template<
typename BitMaskT>
671 inline static __device__
bool isPadding(
const BitMaskT &bitMask)
676 template <
typename keyIndexT>
680 return isPadding(mask);
683 template<
typename BitMaskT>
684 inline static __device__
void setPadding(BitMaskT &bitMask)
689 template<
typename BitMaskT>
690 inline static __device__
void unsetPadding(BitMaskT &bitMask)
695 template<
typename NNtype>
696 inline __device__ indexT getNeighboursPos(
const indexT blockId,
const unsigned int offset)
701 return NNtype::template getNNpos<indexT>(blockCoord,this->blockMap,*
this,offset);
715 pc = ghostLayerToThreadsMapping.check_device_pointer(ptr);
717 if (pc.
match ==
true)
719 pc.
match_str = std::string(
"ghostLayerToThreadsMapping overflow : ") +
"\n" + pc.
match_str;
723 pc = nn_blocks.check_device_pointer(ptr);
725 if (pc.
match ==
true)
741 template<
unsigned int p,
typename AggrWrapperT,
typename SharedPtrT>
742 inline __device__
void
743 __loadBlock(
const AggrWrapperT &block, SharedPtrT sharedRegionPtr)
745 typedef ScalarTypeOf<AggregateBlockT, p> ScalarT;
747 const unsigned int pos = threadIdx.x;
751 unsigned int coord[dim];
752 linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);
753 const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
760 sharedRegionPtr[linId] = block.template get<p>()[pos];
763 template<
unsigned int p,
unsigned int ... props,
typename AggrWrapperT>
764 inline __device__
void
765 __loadBlock(
const AggrWrapperT &block,
void *sharedRegionPtr[
sizeof...(props)+1])
767 __loadBlock<p>(block, sharedRegionPtr);
768 if (
sizeof...(props) > 1)
770 __loadBlock<props ...>(block, sharedRegionPtr + 1);
772 else if (
sizeof...(props) == 1)
774 __loadBlock<props ...>(block, *(sharedRegionPtr + 1));
780 template<
unsigned int p,
typename SharedPtrT>
781 inline __device__
void
782 __loadGhostNoNN(
const unsigned int blockId, SharedPtrT * sharedRegionPtr)
784 typedef ScalarTypeOf<AggregateBlockT, p> ScalarT;
786 const unsigned int edge = blockEdgeSize + 2*stencilSupportRadius;
791 for (
int pos = threadIdx.x; pos < ghostLayerSize; pos += blockDim.x)
794 const unsigned int linId = ghostLayerToThreadsMapping.template get<0>(pos);
797 unsigned int ctr = linId;
798 for (
int i = 0; i < dim; ++i)
800 int v = (ctr %
edge) - stencilSupportRadius;
802 elementCoord.
set_d(i, elementCoord.
get(i) + v);
806 ScalarT *basePtr = (ScalarT *)sharedRegionPtr;
807 *(basePtr + linId) = get<p>(elementCoord);
818 template<
unsigned int p,
typename AggrWrapperT ,
typename SharedPtrT>
819 inline __device__
void
826 ghostLayerToThreadsMapping,
829 stencilSupportRadius,
842 template<
unsigned int p,
typename AggrWrapperT ,
typename SharedPtrT>
843 inline __device__
void
851 ghostLayerToThreadsMapping,
854 stencilSupportRadius,
860 template<
unsigned int p,
unsigned int ... props>
861 inline __device__
void
862 __loadGhost(
const unsigned int blockId,
const int * neighboursPos,
void *sharedRegionPtr[
sizeof...(props)+1])
864 __loadGhost<p>(blockId, neighboursPos, sharedRegionPtr);
865 if (
sizeof...(props) > 1)
867 __loadGhost<props ...>(blockId, neighboursPos, sharedRegionPtr + 1);
869 else if (
sizeof...(props) == 1)
871 __loadGhost<props ...>(blockId, neighboursPos, *(sharedRegionPtr + 1));
875 template<
unsigned int p,
typename AggrWrapperT,
typename SharedPtrT>
876 inline __device__
void
877 __storeBlock(AggrWrapperT &block, SharedPtrT sharedRegionPtr)
879 typedef ScalarTypeOf<AggregateBlockT, p> ScalarT;
881 const unsigned int pos = threadIdx.x;
886 unsigned int coord[dim];
887 linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);
888 const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
892 ScalarT *basePtr = (ScalarT *)sharedRegionPtr;
894 block.template get<p>()[pos] = *(basePtr + linId);
898 template<
unsigned int p,
unsigned int ... props,
typename AggrWrapperT>
899 inline __device__
void
900 __storeBlock(AggrWrapperT &block,
void *sharedRegionPtr[
sizeof...(props)+1])
902 __storeBlock<p>(block, sharedRegionPtr);
903 if (
sizeof...(props) > 1)
905 __storeBlock<props ...>(block, sharedRegionPtr + 1);
907 else if (
sizeof...(props) == 1)
909 __storeBlock<props ...>(block, *(sharedRegionPtr + 1));
This class represent an N-dimensional box.
__host__ __device__ bool isInside(const Point< dim, T > &p) const
Check if the point is inside the box.
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
__device__ void loadGhost(const grid_key_dx< dim, CoordT > &coord, const int *neighboursPos, void *sharedRegionPtr[sizeof...(props)])
__device__ void loadGhostBlock(const AggrWrapperT &dataBlockLoad, const grid_key_dx< dim, CoordT > &coord, ScalarTypeOf< AggregateBlockT, p > *sharedRegion)
__device__ grid_key_dx< dim, int > getBlockBaseCoord(size_t blockLinId) const
Given a linearized block index it return the coordinated of the lower-left point in 2D or in general ...
static constexpr __device__ unsigned int getBlockEdgeSize()
Return the size of the block edge size.
__device__ size_t getLinId(const grid_key_dx< dim, CoordT > &coord) const
Linearization of global coordinates.
__device__ __host__ grid_key_dx< dim, CoordT > getGlobalCoord(const grid_key_dx< dim, CoordT > &blockCoord, unsigned int offset)
Get the coordinate of the block and the offset id inside the block it give the global coordinate.
__device__ auto get(const block_offset< CoordT > &coord) const -> decltype(std::declval< BlockMapGpu_ker< AggregateBlockT, indexT, layout_base > >().getblockMap().template get_ele< p >(coord.pos)[coord.off])
Access the grid point.
__device__ void storeBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)])
__device__ auto get(const block_offset< CoordT > &coord) -> decltype(std::declval< BlockMapGpu_ker< AggregateBlockT, indexT, layout_base > >().getblockMap().template get_ele< p >(coord.pos)[coord.off])
Access the grid point.
__device__ unsigned int size(unsigned int i)
Size of the sparse grid in each direction.
constexpr __device__ unsigned int getBlockSize() const
Return the size of the block.
__device__ grid_key_dx< dim, int > getCoord(size_t dataBlockId, unsigned offset) const
The inversion of getLinId.
__device__ block_offset< indexT2 > getNNPoint(openfpm::sparse_index< unsigned int > pos, unsigned int offset, const grid_key_dx< dim, indexT2 > &mov)
Get the neighborhood point in one direction.
__device__ grid_key_dx< dim, int > getCoord(size_t linId) const
The inversion of getLinId.
static __device__ int unpack_headers(headers_type &headers, unsigned char *data, int ih, int sz_pack)
__device__ void loadBlock(AggrWrapperT &block, ScalarTypeOf< AggregateBlockT, p > *sharedRegion)
int yes_has_check_device_pointer
Indicate this structure has a function to check the device pointer.
BcT background
background values
__device__ void __loadGhostBlock(const AggrWrapperT &block, const openfpm::sparse_index< unsigned int > blockId, SharedPtrT *sharedRegionPtr, unsigned char *maskPtr)
Load the ghost area in the shared region.
__device__ void loadBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)])
__device__ grid_key_dx< dim, int > getBlockCoord(size_t blockLinId) const
The inversion of getBlockLinId.
__device__ auto getPointBuffer() -> decltype(buffPnt) &
Return the buffer of points.
__device__ bool getInsertBlockOffset(const ite_type &itd, const grid_key_dx< dim, int > &p, grid_key_dx< dim, int > &blk, int &offset)
Given a point to insert, return the block-id and offset of that point.
__device__ unsigned int getEnlargedBlockSize() const
Return the size of the block + ghost needed to apply the stencil.
SparseGridGpu_ker(const openfpm::vector_sparse_gpu_ker< AggregateBlockT, indexT, layout_base > &blockMap, linearizer &grid, GridSmT extendedBlockGeometry, unsigned int stencilSupportRadius, openfpm::vector_gpu_ker< aggregate< short int, short int >, memory_traits_inte > ghostLayerToThreadsMapping, openfpm::vector_gpu_ker< aggregate< indexT >, memory_traits_inte > nn_blocks, openfpm::vector_gpu_ker< aggregate< indexT >, memory_traits_inte > buffPnt, unsigned int ghostLayerSize, BcT &bck)
constructor
__device__ void __loadGhostBlock(const AggrWrapperT &block, const openfpm::sparse_index< unsigned int > blockId, SharedPtrT *sharedRegionPtr)
Load the ghost area in the shared region.
__device__ size_t getBlockLinId(CoordT blockCoord) const
Linearization of block coordinates.
grid_key_dx is the key to access any element in the grid
void one()
Set to one the key.
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
__host__ __device__ Point< dim, typeT > toPoint() const
Convert to a point the grid_key_dx.
__device__ __host__ index_type get(index_type i) const
Get the i index.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Transform the boost::fusion::vector into memory specification (memory_traits)
grid interface available when on gpu
std::string match_str
match string
bool match
Indicate if the pointer match.
Sub-domain vertex graph node.