OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
SparseGridGpu_ker.cuh
1 //
2 // Created by tommaso on 11/06/19.
3 //
4 
5 #ifndef OPENFPM_PDATA_SPARSEGRIDGPU_KER_CUH
6 #define OPENFPM_PDATA_SPARSEGRIDGPU_KER_CUH
7 
8 #include <Grid/Geometry/grid_smb.hpp>
9 #include "BlockMapGpu.hpp"
10 #include "SparseGridGpu_ker_util.hpp"
11 
12 template<typename indexT>
14 {
15  indexT pos;
16  indexT off;
17 };
18 
19 //todo Remove template param GridSmT and just use BlockGeometry
20 template<unsigned int dim,
21  unsigned int blockEdgeSize,
22  typename AggregateBlockT,
23  typename ct_params,
24  typename indexT,
25  template<typename> class layout_base,
26  typename GridSmT,
27  typename linearizer,
28  typename BcT>
29 class SparseGridGpu_ker : public BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>
30 {
31 private:
32  linearizer grid;
33  GridSmT blockWithGhostGrid;
34 
37 
38 protected:
39  const static unsigned char PADDING_BIT = 1;
40  static constexpr unsigned int blockSize = BlockTypeOf<AggregateBlockT, 0>::size;
41  unsigned int ghostLayerSize;
45 
46 public:
47  static constexpr unsigned int d = dim;
48  static constexpr unsigned int dims = dim;
49  static constexpr unsigned int blockEdgeSize_ = blockEdgeSize;
50  unsigned int stencilSupportRadius;
51  typedef AggregateBlockT AggregateBlockType;
52  typedef indexT indexT_;
53 
56 
57 public:
58 
65  linearizer & grid,
66  GridSmT extendedBlockGeometry,
67  unsigned int stencilSupportRadius,
71  unsigned int ghostLayerSize,
72  BcT & bck)
73  : BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>(blockMap),
74  grid(grid),
75  blockWithGhostGrid(extendedBlockGeometry),
76  stencilSupportRadius(stencilSupportRadius),
77  ghostLayerSize(ghostLayerSize),
78  ghostLayerToThreadsMapping(ghostLayerToThreadsMapping),
79  nn_blocks(nn_blocks),
80  buffPnt(buffPnt),
81  background(bck)
82  {}
83 
89  template<typename headers_type>
90  __device__ static int unpack_headers(headers_type & headers, unsigned char * data, int ih, int sz_pack)
91  {
92  size_t n_cnk;
93  if (sizeof(indexT) == 8)
94  {n_cnk = ((size_t *)data)[0];}
95  else
96  {
97  unsigned int dp1 = ((unsigned int *)data)[0];
98  unsigned int dp2 = ((unsigned int *)&data[4])[0];
99  n_cnk = (size_t)dp1 + ((size_t)dp2 << 32);
100  }
101  headers.template get<1>(ih) = n_cnk;
102 
103  size_t actual_offset = n_cnk*sizeof(indexT);
104 
105  unsigned int n_pnt = *(unsigned int *)&(data[sizeof(size_t) + 2*dim*sizeof(int) + actual_offset + n_cnk*sizeof(unsigned int)]);
106  headers.template get<2>(ih) = n_pnt;
107 
108  return sizeof(size_t) + // byte required to pack the number of chunk packed
109  2*dim*sizeof(int) + // starting point + size of the indexing packing
110  sizeof(indexT)*n_cnk + // byte required to pack the chunk indexes
111  align_number_device(sizeof(indexT),(n_cnk+1)*sizeof(unsigned int)) + // byte required to pack the scan of the chunk point
112  align_number_device(sizeof(indexT),n_pnt*sz_pack) + // byte required to pack data
113  align_number_device(sizeof(indexT),n_pnt*sizeof(short int)) + // byte required to pack offsets
114  align_number_device(sizeof(indexT),n_pnt*sizeof(unsigned char)); // byte required to pack masks;
115  }
116 
125  template<typename CoordT>
126  __device__ __host__ inline grid_key_dx<dim,CoordT> getGlobalCoord(const grid_key_dx<dim, CoordT> & blockCoord, unsigned int offset)
127  {
128  return grid.getGlobalCoord(blockCoord,offset);
129  }
130 
138  template<typename CoordT>
139  inline __device__ size_t getLinId(const grid_key_dx<dim, CoordT> & coord) const
140  {
141  return grid.LinId(coord);
142  }
143 
151  inline __device__ unsigned int size(unsigned int i)
152  {
153  return grid.getSize()[i];
154  }
155 
163  inline __device__ grid_key_dx<dim, int> getCoord(size_t linId) const
164  {
165  return grid.InvLinId(linId);
166 
167  }
168 
176  inline __device__ grid_key_dx<dim, int> getCoord(size_t dataBlockId, unsigned offset) const
177  {
178  return grid.InvLinId(dataBlockId * blockSize + offset);
179 
180  }
181 
191  template<typename ite_type>
192  inline __device__ bool getInsertBlockOffset(const ite_type & itd, const grid_key_dx<dim, int> & p, grid_key_dx<dim, int> & blk, int & offset)
193  {
194  int accu = 1;
195  offset = 0;
196 
197  bool active = true;
198 
199  for (int i = 0 ; i < dim ; i++)
200  {
201  blk.set_d(i,p.get(i) / getBlockEdgeSize());
202  offset += (p.get(i) % getBlockEdgeSize()) * accu;
203  accu *= getBlockEdgeSize();
204  active = active && (p.get(i) >= (itd.start.get(i) + itd.start_base.get(i))) && (p.get(i) <= itd.stop.get(i));
205  }
206 
207  return active;
208  }
209 
217  template<typename CoordT>
218  inline __device__ size_t getBlockLinId(CoordT blockCoord) const
219  {
220  return grid.BlockLinId(blockCoord);
221  }
222 
230  inline __device__ grid_key_dx<dim, int> getBlockCoord(size_t blockLinId) const
231  {
232  return grid.BlockInvLinId(blockLinId);
233  }
234 
243  inline __device__ grid_key_dx<dim, int> getBlockBaseCoord(size_t blockLinId) const
244  {
245  return grid.InvLinId(blockLinId * blockSize);
246  }
247 
248  inline __device__ grid_key_dx<dim, int> getNeighbour(grid_key_dx<dim, int> base, unsigned int dimension, char offset) const
249  {
250  grid_key_dx<dim, int> res = base;
251  auto i = base.get(dimension) + offset;
252  res.set_d(dimension, i);
253  return res;
254  }
255 
261  constexpr static __device__ unsigned int getBlockEdgeSize()
262  {
263  return blockEdgeSize;
264  }
265 
266 
272  constexpr __device__ unsigned int getBlockSize() const
273  {
274  return blockSize;
275  }
276 
282  inline __device__ unsigned int getEnlargedBlockSize() const
283  {
284  return std::pow(blockEdgeSize + 2*stencilSupportRadius, dim);
285  }
286 
296  template<typename NN_type, typename indexT2>
298  unsigned int offset,
299  const grid_key_dx<dim,indexT2> & mov)
300  {
302 
304 
305  for (int i = 0 ; i < dim ; i++)
306  {
307  coord.set_d(i,mov.get(i) + offset % blockEdgeSize);
308  offset /= blockEdgeSize;
309  }
310 
311  unsigned int NN_index = 0;
312  unsigned int offset_nn = 0;
313 
314  bool out = NN_type::template getNNindex_offset<blockEdgeSize>(coord,NN_index,offset_nn);
315 
316  // Calculate internal coordinates
317 
318  indexT nnb = pos.id;
319 
320  if (out == true)
321  {nnb = nn_blocks.template get<0>(NN_index + NN_type::nNN*pos.id);}
322 
323  bof.pos = nnb;
324  bof.off = offset_nn;
325 
326  return bof;
327  }
328 
329  inline __device__ unsigned int posToEnlargedBlockPos(unsigned int pos) const
330  {
331  // Convert pos into a linear id accounting for the ghost offsets
332  unsigned int coord[dim];
333  linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);
334  const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
335 // const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);
336 
337  return linId;
338  }
339 
340  inline __device__ grid_key_dx<dim,int>
341  getCoordInEnlargedBlock(const unsigned int offset) const
342  {
343  unsigned int coord[dim];
344  linToCoordWithOffset<blockEdgeSize>(offset, stencilSupportRadius, coord);
345  return grid_key_dx<dim, int>(coord);
346  }
347 
348  inline __device__ unsigned int
349  getLinIdInEnlargedBlock(const unsigned int offset) const
350  {
351  unsigned int coord[dim];
352  linToCoordWithOffset<blockEdgeSize>(offset, stencilSupportRadius, coord);
353  return coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
354 
355 // return shift_position<dim,blockEdgeSize>::shift(offset,stencilSupportRadius);
356  }
357 
358  template<typename Coordtype>
359  inline __device__ unsigned int
360  getNeighbourLinIdInEnlargedBlock(const grid_key_dx<dim, Coordtype> & base, grid_key_dx<dim, Coordtype> & offsets) const
361  {
362  grid_key_dx<dim, int> res = base + offsets;
363  return coordToLin<blockEdgeSize>(res, stencilSupportRadius);
364  }
365 
366  template<typename Coordtype>
367  inline __device__ unsigned int
368  getNeighbourLinIdInEnlargedBlock(const grid_key_dx<dim,Coordtype> & base, unsigned int dimension, char offset) const
369  {
370  grid_key_dx<dim, int> res = getNeighbour(base, dimension, offset);
371  return coordToLin<blockEdgeSize>(res, stencilSupportRadius);
372  }
373 
374  inline __device__ bool
375  getIfBoundaryElementInEnlargedBlock(const grid_key_dx<dim, int> coordInEnlargedBlock, char (&boundaryDirection)[dim])
376  {
377  bool isBoundary = false;
378  for (int d=0; d<dim; ++d)
379  {
380  const auto v = coordInEnlargedBlock.get(d);
381  if (v==stencilSupportRadius)
382  {
383  boundaryDirection[d] = -1;
384  isBoundary = true;
385  }
386  else if (v==stencilSupportRadius+blockEdgeSize-1)
387  {
388  boundaryDirection[d] = 1;
389  isBoundary = true;
390  }
391  else
392  {
393  boundaryDirection[d] = 0;
394  }
395  }
396  return isBoundary;
397  }
398 
399  // Data management methods
400 
401  template<unsigned int p, typename CoordT>
402  inline __device__ auto
403  get(const grid_key_dx<dim, CoordT> & coord) const -> ScalarTypeOf<AggregateBlockT, p>
404  {
406  }
407 
408  // Data management methods
409 
410  template<typename CoordT>
411  inline __device__ void
412  get_sparse(const grid_key_dx<dim, CoordT> & coord, unsigned int & dataBlockPos, unsigned int & offset) const
413  {
414  return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::get_sparse(grid.LinId(coord),dataBlockPos,offset);
415  }
416 
424  template<unsigned int p, typename CoordT>
425  inline __device__ auto
426  get(const block_offset<CoordT> & coord) const -> decltype(std::declval<BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>>().getblockMap().template get_ele<p>(coord.pos)[coord.off])
427  {
428  return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::blockMap.template get_ele<p>(coord.pos)[coord.off];
429  }
430 
438  template<unsigned int p, typename CoordT>
439  inline __device__ auto
440  get(const block_offset<CoordT> & coord) -> decltype(std::declval<BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>>().getblockMap().template get_ele<p>(coord.pos)[coord.off])
441  {
442  return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::blockMap.template get_ele<p>(coord.pos)[coord.off];
443  }
444 
445  template<unsigned int p, typename CoordT>
446  inline __device__ auto
447  insert(const grid_key_dx<dim, CoordT> & coord) -> ScalarTypeOf<AggregateBlockT, p>& // should be decltype(BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::template insert<p>(0)) but LLVM complain
448  {
450  }
451 
452  template<typename CoordT>
453  inline __device__ unsigned int getBlockId(const grid_key_dx<dim, CoordT> & coord)
454  {
455  // todo: check this because it's bugged! maybe?
457  }
458 
459  template<typename CoordT>
460  inline __device__ unsigned int getOffset(const grid_key_dx<dim, CoordT> & coord)
461  {
463  }
464 
465  template<typename CoordT>
466  inline __device__ auto
468  {
470  }
471 
472  inline __device__ auto
473  getBlock(const unsigned int blockLinId) -> decltype(BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::getBlock(0))
474  {
476  }
477 
478  template<unsigned int chunksPerBlocks = 1,typename CoordT>
479  inline __device__ auto
481  {
483  }
484 
485  template<unsigned int chunksPerBlocks = 1>
486  inline __device__ auto
487  insertBlock(const indexT blockLinId, const unsigned int stride = 8192) -> decltype(BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::insertBlock(0))
488  {
489  return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::template insertBlock<chunksPerBlocks>(blockLinId,stride);
490  }
491 
497  inline __device__ auto getPointBuffer() -> decltype(buffPnt) &
498  {
499  return buffPnt;
500  }
501 
502  // Load & Store aux functions for user kernels. To be used for loading to or writing from shared memory.
503 
515  template<unsigned int p, typename AggrWrapperT>
516  inline __device__ void
517  loadBlock(AggrWrapperT &block, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)
518  {
519  //todo: Make this work well with multiples chunks per block or check not to get several chunks or dragons ahoy!
520  __loadBlock<p>(block, sharedRegion);
521  }
522 
533  template<unsigned int ... props, typename AggrWrapperT>
534  inline __device__ void loadBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)])
535  {
536  __loadBlock<props ...>(block, sharedRegionPtr);
537  }
538 
539 
551  template<unsigned int p , typename AggrWrapperT , typename CoordT>
552  inline __device__ void
553  loadGhostBlock(const AggrWrapperT & dataBlockLoad,const grid_key_dx<dim, CoordT> & coord, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)
554  {
555  auto blockLinId = getBlockId(coord);
556  __loadGhostBlock<p>(dataBlockLoad,blockLinId, sharedRegion);
557  }
558 
559  template<unsigned int p, typename AggrWrapperT>
560  inline __device__ void
561  loadGhostBlock(const AggrWrapperT & dataBlockLoad, const openfpm::sparse_index<unsigned int> blockLinId, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)
562  {
563  __loadGhostBlock<p>(dataBlockLoad,blockLinId, sharedRegion);
564  }
565 
566  template<unsigned int p, typename AggrWrapperT>
567  inline __device__ void
568  loadGhostBlock(const AggrWrapperT & dataBlockLoad, const openfpm::sparse_index<unsigned int> blockLinId, ScalarTypeOf<AggregateBlockT, p> *sharedRegion, unsigned char * mask)
569  {
570  __loadGhostBlock<p>(dataBlockLoad,blockLinId, sharedRegion,mask);
571  }
572 
584  template<unsigned int ... props, typename CoordT>
585  inline __device__ void loadGhost(const grid_key_dx<dim, CoordT> & coord, const int * neighboursPos, void *sharedRegionPtr[sizeof...(props)])
586  {
587  auto blockLinId = getBlockId(coord);
588  __loadGhost<props ...>(blockLinId, neighboursPos, sharedRegionPtr);
589  }
590 
591 
592  template<unsigned int ... props>
593  inline __device__ void loadGhost(const unsigned int blockLinId, const int * neighboursPos, void *sharedRegionPtr[sizeof...(props)])
594  {
595  __loadGhost<props ...>(blockLinId, neighboursPos, sharedRegionPtr);
596  }
597 
598  template<unsigned int p, typename AggrWrapperT>
599  inline __device__ void
600  storeBlock(AggrWrapperT &block, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)
601  {
602  //todo: Make this work well with multiples chunks per block or check not to get several chunks or dragons ahoy!
603  __storeBlock<p>(block, sharedRegion);
604  }
605 
606  template<unsigned int p, typename CoordT>
607  inline __device__ void
608  storeBlockInPlace(const grid_key_dx<dim, CoordT> & coord, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)
609  {
610  //todo: Make this work well with multiples chunks per block or check not to get several chunks or dragons ahoy!
611  auto & block = getBlock(coord);
612  __storeBlock<p>(block, sharedRegion);
613  }
614 
626  template<unsigned int ... props, typename AggrWrapperT>
627  inline __device__ void storeBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)])
628  {
629  __storeBlock<props ...>(block, sharedRegionPtr);
630  }
631 
632  template<unsigned int ... props, typename CoordT>
633  inline __device__ void storeBlockInPlace(const grid_key_dx<dim, CoordT> & coord, void *sharedRegionPtr[sizeof...(props)])
634  {
635  auto block = getBlock(coord);
636  __storeBlock<props ...>(block, sharedRegionPtr);
637  }
638 
639  template <unsigned int p, typename CoordT>
640  inline __device__ ScalarTypeOf<AggregateBlockT, p> & get(
642  Box<dim, indexT> sharedMemBox,
643  ScalarTypeOf<AggregateBlockT, p> *sharedRegion)
644  {
645  //NOTE: Size of Box must be equal to size of shared region!
646  //NOTE: Box must be square
647  if (sharedMemBox.isInside(coord.toPoint()))
648  {
649  //Get data from shared mem
650  auto one = coord;
651  one.one();
652  const auto boxDimensions = sharedMemBox.getKP2() - sharedMemBox.getKP1() + one; // The +1 is because upper bound is inclusive
653  const auto relativeCoord = coord - sharedMemBox.getKP1();
654  const auto locLinId = coordToLin(relativeCoord, boxDimensions);
655  return sharedRegion[locLinId];
656  }
657  else
658  {
659  //Get data from global mem
660  return get(coord);
661  }
662  }
663 
664  template<typename CoordT>
665  inline __device__ void remove(const grid_key_dx<dim, CoordT> & coord)
666  {
668  }
669 
670  template<typename BitMaskT>
671  inline static __device__ bool isPadding(const BitMaskT &bitMask)
672  {
674  }
675 
676  template <typename keyIndexT>
677  inline __device__ bool isPadding(grid_key_dx<dim, keyIndexT> coord) const
678  {
680  return isPadding(mask);
681  }
682 
683  template<typename BitMaskT>
684  inline static __device__ void setPadding(BitMaskT &bitMask)
685  {
687  }
688 
689  template<typename BitMaskT>
690  inline static __device__ void unsetPadding(BitMaskT &bitMask)
691  {
693  }
694 
695  template<typename NNtype>
696  inline __device__ indexT getNeighboursPos(const indexT blockId, const unsigned int offset)
697  {
698  //todo: also do the full neighbourhood version, this is just cross
699  auto blockCoord = getBlockCoord(blockId);
700 
701  return NNtype::template getNNpos<indexT>(blockCoord,this->blockMap,*this,offset);
702  }
703 
704 #ifdef SE_CLASS1
705 
711  pointer_check check_device_pointer(void * ptr)
712  {
713  pointer_check pc;
714 
715  pc = ghostLayerToThreadsMapping.check_device_pointer(ptr);
716 
717  if (pc.match == true)
718  {
719  pc.match_str = std::string("ghostLayerToThreadsMapping overflow : ") + "\n" + pc.match_str;
720  return pc;
721  }
722 
723  pc = nn_blocks.check_device_pointer(ptr);
724 
725  if (pc.match == true)
726  {
727  pc.match_str = std::string("nn_blocks overflow: ") + "\n" + pc.match_str;
728  return pc;
729  }
730 
731  pc = ((BlockMapGpu_ker<AggregateBlockT, indexT, layout_base> *)this)->check_device_pointer(ptr);
732 
733  return pc;
734  }
735 
736 #endif
737 
738 private:
739 
740 
741  template<unsigned int p, typename AggrWrapperT, typename SharedPtrT>
742  inline __device__ void
743  __loadBlock(const AggrWrapperT &block, SharedPtrT sharedRegionPtr)
744  {
745  typedef ScalarTypeOf<AggregateBlockT, p> ScalarT;
746 
747  const unsigned int pos = threadIdx.x;
748  //todo: Improve this version to allow multiple chunks per block!
749 
750  // Convert pos into a linear id accounting for the ghost offsets
751  unsigned int coord[dim];
752  linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);
753  const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
754 
755 // const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);
756 
757  // Actually load the data into the shared region
758  //ScalarT *basePtr = (ScalarT *)sharedRegionPtr;
759 
760  sharedRegionPtr[linId] = block.template get<p>()[pos];
761  }
762 
763  template<unsigned int p, unsigned int ... props, typename AggrWrapperT>
764  inline __device__ void
765  __loadBlock(const AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)+1])
766  {
767  __loadBlock<p>(block, sharedRegionPtr);
768  if (sizeof...(props) > 1)
769  {
770  __loadBlock<props ...>(block, sharedRegionPtr + 1);
771  }
772  else if (sizeof...(props) == 1)
773  {
774  __loadBlock<props ...>(block, *(sharedRegionPtr + 1));
775  }
776  }
777 
778  // NOTE: this must be called with linear thread grid, nice-to-have would be a generic converter (easy to do)
779  // from dim3 to linear which would work under all possible launch params
780  template<unsigned int p, typename SharedPtrT>
781  inline __device__ void
782  __loadGhostNoNN(const unsigned int blockId, SharedPtrT * sharedRegionPtr)
783  {
784  typedef ScalarTypeOf<AggregateBlockT, p> ScalarT;
785 
786  const unsigned int edge = blockEdgeSize + 2*stencilSupportRadius;
787 
788  grid_key_dx<dim, int> localCoord;
789  grid_key_dx<dim, int> elementCoord;
790 
791  for (int pos = threadIdx.x; pos < ghostLayerSize; pos += blockDim.x)
792  {
793  // Convert pos into a linear id accounting for the inner domain offsets
794  const unsigned int linId = ghostLayerToThreadsMapping.template get<0>(pos);
795  // Now get linear offset wrt the first element of the block
796  elementCoord = getBlockBaseCoord(blockId);
797  unsigned int ctr = linId;
798  for (int i = 0; i < dim; ++i)
799  {
800  int v = (ctr % edge) - stencilSupportRadius;
801  ctr /= edge;
802  elementCoord.set_d(i, elementCoord.get(i) + v);
803  }
804 
805  // Actually load the data into the shared region
806  ScalarT *basePtr = (ScalarT *)sharedRegionPtr;
807  *(basePtr + linId) = get<p>(elementCoord);
808  }
809  }
810 
818  template<unsigned int p, typename AggrWrapperT ,typename SharedPtrT>
819  inline __device__ void
820  __loadGhostBlock(const AggrWrapperT &block, const openfpm::sparse_index<unsigned int> blockId, SharedPtrT * sharedRegionPtr)
821  {
823 
825  sharedRegionPtr,
826  ghostLayerToThreadsMapping,
827  nn_blocks,
828  this->blockMap,
829  stencilSupportRadius,
830  ghostLayerSize,
831  blockId.id,
832  background);
833  }
834 
842  template<unsigned int p, typename AggrWrapperT ,typename SharedPtrT>
843  inline __device__ void
844  __loadGhostBlock(const AggrWrapperT &block, const openfpm::sparse_index<unsigned int> blockId, SharedPtrT * sharedRegionPtr, unsigned char * maskPtr)
845  {
847 
849  sharedRegionPtr,
850  maskPtr,
851  ghostLayerToThreadsMapping,
852  nn_blocks,
853  this->blockMap,
854  stencilSupportRadius,
855  ghostLayerSize,
856  blockId.id,
857  background);
858  }
859 
860  template<unsigned int p, unsigned int ... props>
861  inline __device__ void
862  __loadGhost(const unsigned int blockId, const int * neighboursPos, void *sharedRegionPtr[sizeof...(props)+1])
863  {
864  __loadGhost<p>(blockId, neighboursPos, sharedRegionPtr);
865  if (sizeof...(props) > 1)
866  {
867  __loadGhost<props ...>(blockId, neighboursPos, sharedRegionPtr + 1);
868  }
869  else if (sizeof...(props) == 1)
870  {
871  __loadGhost<props ...>(blockId, neighboursPos, *(sharedRegionPtr + 1));
872  }
873  }
874 
875  template<unsigned int p, typename AggrWrapperT, typename SharedPtrT>
876  inline __device__ void
877  __storeBlock(AggrWrapperT &block, SharedPtrT sharedRegionPtr)
878  {
879  typedef ScalarTypeOf<AggregateBlockT, p> ScalarT;
880 
881  const unsigned int pos = threadIdx.x;
882  //todo: Improve this version to allow multiple chunks per block!
883  if (pos < blockSize)
884  {
885  // Convert pos into a linear id accounting for the ghost offsets
886  unsigned int coord[dim];
887  linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);
888  const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
889 // const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);
890 
891  // Actually store the data from the shared region
892  ScalarT *basePtr = (ScalarT *)sharedRegionPtr;
893 
894  block.template get<p>()[pos] = *(basePtr + linId);
895  }
896  }
897 
898  template<unsigned int p, unsigned int ... props, typename AggrWrapperT>
899  inline __device__ void
900  __storeBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)+1])
901  {
902  __storeBlock<p>(block, sharedRegionPtr);
903  if (sizeof...(props) > 1)
904  {
905  __storeBlock<props ...>(block, sharedRegionPtr + 1);
906  }
907  else if (sizeof...(props) == 1)
908  {
909  __storeBlock<props ...>(block, *(sharedRegionPtr + 1));
910  }
911  }
912 };
913 
914 
915 #endif //OPENFPM_PDATA_SPARSEGRIDGPU_KER_CUH
static __device__ int unpack_headers(headers_type &headers, unsigned char *data, int ih, int sz_pack)
bool match
Indicate if the pointer match.
Sub-domain vertex graph node.
grid_key_dx is the key to access any element in the grid
Definition: grid_key.hpp:18
grid interface available when on gpu
__device__ auto getPointBuffer() -> decltype(buffPnt) &
Return the buffer of points.
constexpr __device__ unsigned int getBlockSize() const
Return the size of the block.
SparseGridGpu_ker(const openfpm::vector_sparse_gpu_ker< AggregateBlockT, indexT, layout_base > &blockMap, linearizer &grid, GridSmT extendedBlockGeometry, unsigned int stencilSupportRadius, openfpm::vector_gpu_ker< aggregate< short int, short int >, memory_traits_inte > ghostLayerToThreadsMapping, openfpm::vector_gpu_ker< aggregate< indexT >, memory_traits_inte > nn_blocks, openfpm::vector_gpu_ker< aggregate< indexT >, memory_traits_inte > buffPnt, unsigned int ghostLayerSize, BcT &bck)
constructor
static constexpr __device__ unsigned int getBlockEdgeSize()
Return the size of the block edge size.
__device__ void __loadGhostBlock(const AggrWrapperT &block, const openfpm::sparse_index< unsigned int > blockId, SharedPtrT *sharedRegionPtr, unsigned char *maskPtr)
Load the ghost area in the shared region.
__host__ __device__ Point< dim, typeT > toPoint() const
Convert to a point the grid_key_dx.
Definition: grid_key.hpp:457
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition: grid_key.hpp:503
__device__ size_t getBlockLinId(CoordT blockCoord) const
Linearization of block coordinates.
__device__ __host__ grid_key_dx< dim, CoordT > getGlobalCoord(const grid_key_dx< dim, CoordT > &blockCoord, unsigned int offset)
Get the coordinate of the block and the offset id inside the block it give the global coordinate.
__host__ __device__ bool isInside(const Point< dim, T > &p) const
Check if the point is inside the box.
Definition: Box.hpp:1004
__device__ unsigned int getEnlargedBlockSize() const
Return the size of the block + ghost needed to apply the stencil.
__device__ bool getInsertBlockOffset(const ite_type &itd, const grid_key_dx< dim, int > &p, grid_key_dx< dim, int > &blk, int &offset)
Given a point to insert, return the block-id and offset of that point.
void one()
Set to one the key.
Definition: grid_key.hpp:179
Transform the boost::fusion::vector into memory specification (memory_traits)
Definition: memory_conf.hpp:83
__device__ void storeBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)])
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
Definition: Box.hpp:669
BcT background
background values
__device__ grid_key_dx< dim, int > getBlockCoord(size_t blockLinId) const
The inversion of getBlockLinId.
__device__ block_offset< indexT2 > getNNPoint(openfpm::sparse_index< unsigned int > pos, unsigned int offset, const grid_key_dx< dim, indexT2 > &mov)
Get the neighborhood point in one direction.
__device__ grid_key_dx< dim, int > getCoord(size_t dataBlockId, unsigned offset) const
The inversion of getLinId.
__device__ void loadBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)])
__device__ auto get(const block_offset< CoordT > &coord) const -> decltype(std::declval< BlockMapGpu_ker< AggregateBlockT, indexT, layout_base >>().getblockMap().template get_ele< p >(coord.pos)[coord.off])
Access the grid point.
__device__ grid_key_dx< dim, int > getBlockBaseCoord(size_t blockLinId) const
Given a linearized block index it return the coordinated of the lower-left point in 2D or in general ...
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
__device__ void loadBlock(AggrWrapperT &block, ScalarTypeOf< AggregateBlockT, p > *sharedRegion)
__device__ void loadGhost(const grid_key_dx< dim, CoordT > &coord, const int *neighboursPos, void *sharedRegionPtr[sizeof...(props)])
int yes_has_check_device_pointer
Indicate this structure has a function to check the device pointer.
This class represent an N-dimensional box.
Definition: Box.hpp:60
__device__ void __loadGhostBlock(const AggrWrapperT &block, const openfpm::sparse_index< unsigned int > blockId, SharedPtrT *sharedRegionPtr)
Load the ghost area in the shared region.
__device__ void loadGhostBlock(const AggrWrapperT &dataBlockLoad, const grid_key_dx< dim, CoordT > &coord, ScalarTypeOf< AggregateBlockT, p > *sharedRegion)
__device__ unsigned int size(unsigned int i)
Size of the sparse grid in each direction.
std::string match_str
match string
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
Definition: Box.hpp:656
__device__ size_t getLinId(const grid_key_dx< dim, CoordT > &coord) const
Linearization of global coordinates.
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition: grid_key.hpp:516
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:214
__device__ grid_key_dx< dim, int > getCoord(size_t linId) const
The inversion of getLinId.
__device__ auto get(const block_offset< CoordT > &coord) -> decltype(std::declval< BlockMapGpu_ker< AggregateBlockT, indexT, layout_base >>().getblockMap().template get_ele< p >(coord.pos)[coord.off])
Access the grid point.