OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
SparseGridGpu_ker.cuh
1//
2// Created by tommaso on 11/06/19.
3//
4
5#ifndef OPENFPM_PDATA_SPARSEGRIDGPU_KER_CUH
6#define OPENFPM_PDATA_SPARSEGRIDGPU_KER_CUH
7
8#include <Grid/Geometry/grid_smb.hpp>
9#include "BlockMapGpu.hpp"
10#include "SparseGridGpu_ker_util.hpp"
11
12template<typename indexT>
14{
15 indexT pos;
16 indexT off;
17};
18
19//todo Remove template param GridSmT and just use BlockGeometry
20template<unsigned int dim,
21 unsigned int blockEdgeSize,
22 typename AggregateBlockT,
23 typename ct_params,
24 typename indexT,
25 template<typename> class layout_base,
26 typename GridSmT,
27 typename linearizer,
28 typename BcT>
29class SparseGridGpu_ker : public BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>
30{
31private:
32 linearizer grid;
33 GridSmT blockWithGhostGrid;
34
37
38protected:
39 const static unsigned char PADDING_BIT = 1;
40 static constexpr unsigned int blockSize = BlockTypeOf<AggregateBlockT, 0>::size;
41 unsigned int ghostLayerSize;
45
46public:
47 static constexpr unsigned int d = dim;
48 static constexpr unsigned int dims = dim;
49 static constexpr unsigned int blockEdgeSize_ = blockEdgeSize;
50 unsigned int stencilSupportRadius;
51 typedef AggregateBlockT AggregateBlockType;
52 typedef indexT indexT_;
53
56
57public:
58
65 linearizer & grid,
66 GridSmT extendedBlockGeometry,
67 unsigned int stencilSupportRadius,
71 unsigned int ghostLayerSize,
72 BcT & bck)
73 : BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>(blockMap),
74 grid(grid),
75 blockWithGhostGrid(extendedBlockGeometry),
76 stencilSupportRadius(stencilSupportRadius),
77 ghostLayerSize(ghostLayerSize),
78 ghostLayerToThreadsMapping(ghostLayerToThreadsMapping),
79 nn_blocks(nn_blocks),
80 buffPnt(buffPnt),
81 background(bck)
82 {}
83
89 template<typename headers_type>
90 __device__ static int unpack_headers(headers_type & headers, unsigned char * data, int ih, int sz_pack)
91 {
92 size_t n_cnk;
93 if (sizeof(indexT) == 8)
94 {n_cnk = ((size_t *)data)[0];}
95 else
96 {
97 unsigned int dp1 = ((unsigned int *)data)[0];
98 unsigned int dp2 = ((unsigned int *)&data[4])[0];
99 n_cnk = (size_t)dp1 + ((size_t)dp2 << 32);
100 }
101 headers.template get<1>(ih) = n_cnk;
102
103 size_t actual_offset = n_cnk*sizeof(indexT);
104
105 unsigned int n_pnt = *(unsigned int *)&(data[sizeof(size_t) + 2*dim*sizeof(int) + actual_offset + n_cnk*sizeof(unsigned int)]);
106 headers.template get<2>(ih) = n_pnt;
107
108 return sizeof(size_t) + // byte required to pack the number of chunk packed
109 2*dim*sizeof(int) + // starting point + size of the indexing packing
110 sizeof(indexT)*n_cnk + // byte required to pack the chunk indexes
111 align_number_device(sizeof(indexT),(n_cnk+1)*sizeof(unsigned int)) + // byte required to pack the scan of the chunk point
112 align_number_device(sizeof(indexT),n_pnt*sz_pack) + // byte required to pack data
113 align_number_device(sizeof(indexT),n_pnt*sizeof(short int)) + // byte required to pack offsets
114 align_number_device(sizeof(indexT),n_pnt*sizeof(unsigned char)); // byte required to pack masks;
115 }
116
125 template<typename CoordT>
126 __device__ __host__ inline grid_key_dx<dim,CoordT> getGlobalCoord(const grid_key_dx<dim, CoordT> & blockCoord, unsigned int offset)
127 {
128 return grid.getGlobalCoord(blockCoord,offset);
129 }
130
138 template<typename CoordT>
139 inline __device__ size_t getLinId(const grid_key_dx<dim, CoordT> & coord) const
140 {
141 return grid.LinId(coord);
142 }
143
151 inline __device__ unsigned int size(unsigned int i)
152 {
153 return grid.getSize()[i];
154 }
155
163 inline __device__ grid_key_dx<dim, int> getCoord(size_t linId) const
164 {
165 return grid.InvLinId(linId);
166
167 }
168
176 inline __device__ grid_key_dx<dim, int> getCoord(size_t dataBlockId, unsigned offset) const
177 {
178 return grid.InvLinId(dataBlockId * blockSize + offset);
179
180 }
181
191 template<typename ite_type>
192 inline __device__ bool getInsertBlockOffset(const ite_type & itd, const grid_key_dx<dim, int> & p, grid_key_dx<dim, int> & blk, int & offset)
193 {
194 int accu = 1;
195 offset = 0;
196
197 bool active = true;
198
199 for (int i = 0 ; i < dim ; i++)
200 {
201 blk.set_d(i,p.get(i) / getBlockEdgeSize());
202 offset += (p.get(i) % getBlockEdgeSize()) * accu;
203 accu *= getBlockEdgeSize();
204 active = active && (p.get(i) >= (itd.start.get(i) + itd.start_base.get(i))) && (p.get(i) <= itd.stop.get(i));
205 }
206
207 return active;
208 }
209
217 template<typename CoordT>
218 inline __device__ size_t getBlockLinId(CoordT blockCoord) const
219 {
220 return grid.BlockLinId(blockCoord);
221 }
222
230 inline __device__ grid_key_dx<dim, int> getBlockCoord(size_t blockLinId) const
231 {
232 return grid.BlockInvLinId(blockLinId);
233 }
234
243 inline __device__ grid_key_dx<dim, int> getBlockBaseCoord(size_t blockLinId) const
244 {
245 return grid.InvLinId(blockLinId * blockSize);
246 }
247
248 inline __device__ grid_key_dx<dim, int> getNeighbour(grid_key_dx<dim, int> base, unsigned int dimension, char offset) const
249 {
250 grid_key_dx<dim, int> res = base;
251 auto i = base.get(dimension) + offset;
252 res.set_d(dimension, i);
253 return res;
254 }
255
261 constexpr static __device__ unsigned int getBlockEdgeSize()
262 {
263 return blockEdgeSize;
264 }
265
266
272 constexpr __device__ unsigned int getBlockSize() const
273 {
274 return blockSize;
275 }
276
282 inline __device__ unsigned int getEnlargedBlockSize() const
283 {
284 return std::pow(blockEdgeSize + 2*stencilSupportRadius, dim);
285 }
286
296 template<typename NN_type, typename indexT2>
298 unsigned int offset,
299 const grid_key_dx<dim,indexT2> & mov)
300 {
302
304
305 for (int i = 0 ; i < dim ; i++)
306 {
307 coord.set_d(i,mov.get(i) + offset % blockEdgeSize);
308 offset /= blockEdgeSize;
309 }
310
311 unsigned int NN_index = 0;
312 unsigned int offset_nn = 0;
313
314 bool out = NN_type::template getNNindex_offset<blockEdgeSize>(coord,NN_index,offset_nn);
315
316 // Calculate internal coordinates
317
318 indexT nnb = pos.id;
319
320 if (out == true)
321 {nnb = nn_blocks.template get<0>(NN_index + NN_type::nNN*pos.id);}
322
323 bof.pos = nnb;
324 bof.off = offset_nn;
325
326 return bof;
327 }
328
329 inline __device__ unsigned int posToEnlargedBlockPos(unsigned int pos) const
330 {
331 // Convert pos into a linear id accounting for the ghost offsets
332 unsigned int coord[dim];
333 linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);
334 const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
335// const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);
336
337 return linId;
338 }
339
340 inline __device__ grid_key_dx<dim,int>
341 getCoordInEnlargedBlock(const unsigned int offset) const
342 {
343 unsigned int coord[dim];
344 linToCoordWithOffset<blockEdgeSize>(offset, stencilSupportRadius, coord);
345 return grid_key_dx<dim, int>(coord);
346 }
347
348 inline __device__ unsigned int
349 getLinIdInEnlargedBlock(const unsigned int offset) const
350 {
351 unsigned int coord[dim];
352 linToCoordWithOffset<blockEdgeSize>(offset, stencilSupportRadius, coord);
353 return coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
354
355// return shift_position<dim,blockEdgeSize>::shift(offset,stencilSupportRadius);
356 }
357
358 template<typename Coordtype>
359 inline __device__ unsigned int
360 getNeighbourLinIdInEnlargedBlock(const grid_key_dx<dim, Coordtype> & base, grid_key_dx<dim, Coordtype> & offsets) const
361 {
362 grid_key_dx<dim, int> res = base + offsets;
363 return coordToLin<blockEdgeSize>(res, stencilSupportRadius);
364 }
365
366 template<typename Coordtype>
367 inline __device__ unsigned int
368 getNeighbourLinIdInEnlargedBlock(const grid_key_dx<dim,Coordtype> & base, unsigned int dimension, char offset) const
369 {
370 grid_key_dx<dim, int> res = getNeighbour(base, dimension, offset);
371 return coordToLin<blockEdgeSize>(res, stencilSupportRadius);
372 }
373
374 inline __device__ bool
375 getIfBoundaryElementInEnlargedBlock(const grid_key_dx<dim, int> coordInEnlargedBlock, char (&boundaryDirection)[dim])
376 {
377 bool isBoundary = false;
378 for (int d=0; d<dim; ++d)
379 {
380 const auto v = coordInEnlargedBlock.get(d);
381 if (v==stencilSupportRadius)
382 {
383 boundaryDirection[d] = -1;
384 isBoundary = true;
385 }
386 else if (v==stencilSupportRadius+blockEdgeSize-1)
387 {
388 boundaryDirection[d] = 1;
389 isBoundary = true;
390 }
391 else
392 {
393 boundaryDirection[d] = 0;
394 }
395 }
396 return isBoundary;
397 }
398
399 // Data management methods
400
401 template<unsigned int p, typename CoordT>
402 inline __device__ auto
403 get(const grid_key_dx<dim, CoordT> & coord) const -> ScalarTypeOf<AggregateBlockT, p>
404 {
406 }
407
408 // Data management methods
409
410 template<typename CoordT>
411 inline __device__ void
412 get_sparse(const grid_key_dx<dim, CoordT> & coord, unsigned int & dataBlockPos, unsigned int & offset) const
413 {
414 return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::get_sparse(grid.LinId(coord),dataBlockPos,offset);
415 }
416
424 template<unsigned int p, typename CoordT>
425 inline __device__ auto
426 get(const block_offset<CoordT> & coord) const -> decltype(std::declval<BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>>().getblockMap().template get_ele<p>(coord.pos)[coord.off])
427 {
428 return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::blockMap.template get_ele<p>(coord.pos)[coord.off];
429 }
430
438 template<unsigned int p, typename CoordT>
439 inline __device__ auto
440 get(const block_offset<CoordT> & coord) -> decltype(std::declval<BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>>().getblockMap().template get_ele<p>(coord.pos)[coord.off])
441 {
442 return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::blockMap.template get_ele<p>(coord.pos)[coord.off];
443 }
444
445 template<unsigned int p, typename CoordT>
446 inline __device__ auto
447 insert(const grid_key_dx<dim, CoordT> & coord) -> ScalarTypeOf<AggregateBlockT, p>& // should be decltype(BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::template insert<p>(0)) but LLVM complain
448 {
450 }
451
452 template<typename CoordT>
453 inline __device__ unsigned int getBlockId(const grid_key_dx<dim, CoordT> & coord)
454 {
455 // todo: check this because it's bugged! maybe?
457 }
458
459 template<typename CoordT>
460 inline __device__ unsigned int getOffset(const grid_key_dx<dim, CoordT> & coord)
461 {
463 }
464
465 template<typename CoordT>
466 inline __device__ auto
468 {
470 }
471
472 inline __device__ auto
473 getBlock(const unsigned int blockLinId) -> decltype(BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::getBlock(0))
474 {
476 }
477
478 template<unsigned int chunksPerBlocks = 1,typename CoordT>
479 inline __device__ auto
481 {
483 }
484
485 template<unsigned int chunksPerBlocks = 1>
486 inline __device__ auto
487 insertBlock(const indexT blockLinId, const unsigned int stride = 8192) -> decltype(BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::insertBlock(0))
488 {
489 return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::template insertBlock<chunksPerBlocks>(blockLinId,stride);
490 }
491
497 inline __device__ auto getPointBuffer() -> decltype(buffPnt) &
498 {
499 return buffPnt;
500 }
501
502 // Load & Store aux functions for user kernels. To be used for loading to or writing from shared memory.
503
515 template<unsigned int p, typename AggrWrapperT>
516 inline __device__ void
517 loadBlock(AggrWrapperT &block, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)
518 {
519 //todo: Make this work well with multiples chunks per block or check not to get several chunks or dragons ahoy!
520 __loadBlock<p>(block, sharedRegion);
521 }
522
533 template<unsigned int ... props, typename AggrWrapperT>
534 inline __device__ void loadBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)])
535 {
536 __loadBlock<props ...>(block, sharedRegionPtr);
537 }
538
539
551 template<unsigned int p , typename AggrWrapperT , typename CoordT>
552 inline __device__ void
553 loadGhostBlock(const AggrWrapperT & dataBlockLoad,const grid_key_dx<dim, CoordT> & coord, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)
554 {
555 auto blockLinId = getBlockId(coord);
556 __loadGhostBlock<p>(dataBlockLoad,blockLinId, sharedRegion);
557 }
558
559 template<unsigned int p, typename AggrWrapperT>
560 inline __device__ void
561 loadGhostBlock(const AggrWrapperT & dataBlockLoad, const openfpm::sparse_index<unsigned int> blockLinId, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)
562 {
563 __loadGhostBlock<p>(dataBlockLoad,blockLinId, sharedRegion);
564 }
565
566 template<unsigned int p, typename AggrWrapperT>
567 inline __device__ void
568 loadGhostBlock(const AggrWrapperT & dataBlockLoad, const openfpm::sparse_index<unsigned int> blockLinId, ScalarTypeOf<AggregateBlockT, p> *sharedRegion, unsigned char * mask)
569 {
570 __loadGhostBlock<p>(dataBlockLoad,blockLinId, sharedRegion,mask);
571 }
572
584 template<unsigned int ... props, typename CoordT>
585 inline __device__ void loadGhost(const grid_key_dx<dim, CoordT> & coord, const int * neighboursPos, void *sharedRegionPtr[sizeof...(props)])
586 {
587 auto blockLinId = getBlockId(coord);
588 __loadGhost<props ...>(blockLinId, neighboursPos, sharedRegionPtr);
589 }
590
591
592 template<unsigned int ... props>
593 inline __device__ void loadGhost(const unsigned int blockLinId, const int * neighboursPos, void *sharedRegionPtr[sizeof...(props)])
594 {
595 __loadGhost<props ...>(blockLinId, neighboursPos, sharedRegionPtr);
596 }
597
598 template<unsigned int p, typename AggrWrapperT>
599 inline __device__ void
600 storeBlock(AggrWrapperT &block, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)
601 {
602 //todo: Make this work well with multiples chunks per block or check not to get several chunks or dragons ahoy!
603 __storeBlock<p>(block, sharedRegion);
604 }
605
606 template<unsigned int p, typename CoordT>
607 inline __device__ void
608 storeBlockInPlace(const grid_key_dx<dim, CoordT> & coord, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)
609 {
610 //todo: Make this work well with multiples chunks per block or check not to get several chunks or dragons ahoy!
611 auto & block = getBlock(coord);
612 __storeBlock<p>(block, sharedRegion);
613 }
614
626 template<unsigned int ... props, typename AggrWrapperT>
627 inline __device__ void storeBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)])
628 {
629 __storeBlock<props ...>(block, sharedRegionPtr);
630 }
631
632 template<unsigned int ... props, typename CoordT>
633 inline __device__ void storeBlockInPlace(const grid_key_dx<dim, CoordT> & coord, void *sharedRegionPtr[sizeof...(props)])
634 {
635 auto block = getBlock(coord);
636 __storeBlock<props ...>(block, sharedRegionPtr);
637 }
638
639 template <unsigned int p, typename CoordT>
640 inline __device__ ScalarTypeOf<AggregateBlockT, p> & get(
642 Box<dim, indexT> sharedMemBox,
643 ScalarTypeOf<AggregateBlockT, p> *sharedRegion)
644 {
645 //NOTE: Size of Box must be equal to size of shared region!
646 //NOTE: Box must be square
647 if (sharedMemBox.isInside(coord.toPoint()))
648 {
649 //Get data from shared mem
650 auto one = coord;
651 one.one();
652 const auto boxDimensions = sharedMemBox.getKP2() - sharedMemBox.getKP1() + one; // The +1 is because upper bound is inclusive
653 const auto relativeCoord = coord - sharedMemBox.getKP1();
654 const auto locLinId = coordToLin(relativeCoord, boxDimensions);
655 return sharedRegion[locLinId];
656 }
657 else
658 {
659 //Get data from global mem
660 return get(coord);
661 }
662 }
663
664 template<typename CoordT>
665 inline __device__ void remove(const grid_key_dx<dim, CoordT> & coord)
666 {
668 }
669
670 template<typename BitMaskT>
671 inline static __device__ bool isPadding(const BitMaskT &bitMask)
672 {
674 }
675
676 template <typename keyIndexT>
677 inline __device__ bool isPadding(grid_key_dx<dim, keyIndexT> coord) const
678 {
680 return isPadding(mask);
681 }
682
683 template<typename BitMaskT>
684 inline static __device__ void setPadding(BitMaskT &bitMask)
685 {
687 }
688
689 template<typename BitMaskT>
690 inline static __device__ void unsetPadding(BitMaskT &bitMask)
691 {
693 }
694
695 template<typename NNtype>
696 inline __device__ indexT getNeighboursPos(const indexT blockId, const unsigned int offset)
697 {
698 //todo: also do the full neighbourhood version, this is just cross
699 auto blockCoord = getBlockCoord(blockId);
700
701 return NNtype::template getNNpos<indexT>(blockCoord,this->blockMap,*this,offset);
702 }
703
704#ifdef SE_CLASS1
705
711 pointer_check check_device_pointer(void * ptr)
712 {
713 pointer_check pc;
714
715 pc = ghostLayerToThreadsMapping.check_device_pointer(ptr);
716
717 if (pc.match == true)
718 {
719 pc.match_str = std::string("ghostLayerToThreadsMapping overflow : ") + "\n" + pc.match_str;
720 return pc;
721 }
722
723 pc = nn_blocks.check_device_pointer(ptr);
724
725 if (pc.match == true)
726 {
727 pc.match_str = std::string("nn_blocks overflow: ") + "\n" + pc.match_str;
728 return pc;
729 }
730
731 pc = ((BlockMapGpu_ker<AggregateBlockT, indexT, layout_base> *)this)->check_device_pointer(ptr);
732
733 return pc;
734 }
735
736#endif
737
738private:
739
740
741 template<unsigned int p, typename AggrWrapperT, typename SharedPtrT>
742 inline __device__ void
743 __loadBlock(const AggrWrapperT &block, SharedPtrT sharedRegionPtr)
744 {
745 typedef ScalarTypeOf<AggregateBlockT, p> ScalarT;
746
747 const unsigned int pos = threadIdx.x;
748 //todo: Improve this version to allow multiple chunks per block!
749
750 // Convert pos into a linear id accounting for the ghost offsets
751 unsigned int coord[dim];
752 linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);
753 const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
754
755// const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);
756
757 // Actually load the data into the shared region
758 //ScalarT *basePtr = (ScalarT *)sharedRegionPtr;
759
760 sharedRegionPtr[linId] = block.template get<p>()[pos];
761 }
762
763 template<unsigned int p, unsigned int ... props, typename AggrWrapperT>
764 inline __device__ void
765 __loadBlock(const AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)+1])
766 {
767 __loadBlock<p>(block, sharedRegionPtr);
768 if (sizeof...(props) > 1)
769 {
770 __loadBlock<props ...>(block, sharedRegionPtr + 1);
771 }
772 else if (sizeof...(props) == 1)
773 {
774 __loadBlock<props ...>(block, *(sharedRegionPtr + 1));
775 }
776 }
777
778 // NOTE: this must be called with linear thread grid, nice-to-have would be a generic converter (easy to do)
779 // from dim3 to linear which would work under all possible launch params
780 template<unsigned int p, typename SharedPtrT>
781 inline __device__ void
782 __loadGhostNoNN(const unsigned int blockId, SharedPtrT * sharedRegionPtr)
783 {
784 typedef ScalarTypeOf<AggregateBlockT, p> ScalarT;
785
786 const unsigned int edge = blockEdgeSize + 2*stencilSupportRadius;
787
788 grid_key_dx<dim, int> localCoord;
789 grid_key_dx<dim, int> elementCoord;
790
791 for (int pos = threadIdx.x; pos < ghostLayerSize; pos += blockDim.x)
792 {
793 // Convert pos into a linear id accounting for the inner domain offsets
794 const unsigned int linId = ghostLayerToThreadsMapping.template get<0>(pos);
795 // Now get linear offset wrt the first element of the block
796 elementCoord = getBlockBaseCoord(blockId);
797 unsigned int ctr = linId;
798 for (int i = 0; i < dim; ++i)
799 {
800 int v = (ctr % edge) - stencilSupportRadius;
801 ctr /= edge;
802 elementCoord.set_d(i, elementCoord.get(i) + v);
803 }
804
805 // Actually load the data into the shared region
806 ScalarT *basePtr = (ScalarT *)sharedRegionPtr;
807 *(basePtr + linId) = get<p>(elementCoord);
808 }
809 }
810
818 template<unsigned int p, typename AggrWrapperT ,typename SharedPtrT>
819 inline __device__ void
820 __loadGhostBlock(const AggrWrapperT &block, const openfpm::sparse_index<unsigned int> blockId, SharedPtrT * sharedRegionPtr)
821 {
823
825 sharedRegionPtr,
826 ghostLayerToThreadsMapping,
827 nn_blocks,
828 this->blockMap,
829 stencilSupportRadius,
830 ghostLayerSize,
831 blockId.id,
832 background);
833 }
834
842 template<unsigned int p, typename AggrWrapperT ,typename SharedPtrT>
843 inline __device__ void
844 __loadGhostBlock(const AggrWrapperT &block, const openfpm::sparse_index<unsigned int> blockId, SharedPtrT * sharedRegionPtr, unsigned char * maskPtr)
845 {
847
849 sharedRegionPtr,
850 maskPtr,
851 ghostLayerToThreadsMapping,
852 nn_blocks,
853 this->blockMap,
854 stencilSupportRadius,
855 ghostLayerSize,
856 blockId.id,
857 background);
858 }
859
860 template<unsigned int p, unsigned int ... props>
861 inline __device__ void
862 __loadGhost(const unsigned int blockId, const int * neighboursPos, void *sharedRegionPtr[sizeof...(props)+1])
863 {
864 __loadGhost<p>(blockId, neighboursPos, sharedRegionPtr);
865 if (sizeof...(props) > 1)
866 {
867 __loadGhost<props ...>(blockId, neighboursPos, sharedRegionPtr + 1);
868 }
869 else if (sizeof...(props) == 1)
870 {
871 __loadGhost<props ...>(blockId, neighboursPos, *(sharedRegionPtr + 1));
872 }
873 }
874
875 template<unsigned int p, typename AggrWrapperT, typename SharedPtrT>
876 inline __device__ void
877 __storeBlock(AggrWrapperT &block, SharedPtrT sharedRegionPtr)
878 {
879 typedef ScalarTypeOf<AggregateBlockT, p> ScalarT;
880
881 const unsigned int pos = threadIdx.x;
882 //todo: Improve this version to allow multiple chunks per block!
883 if (pos < blockSize)
884 {
885 // Convert pos into a linear id accounting for the ghost offsets
886 unsigned int coord[dim];
887 linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);
888 const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
889// const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);
890
891 // Actually store the data from the shared region
892 ScalarT *basePtr = (ScalarT *)sharedRegionPtr;
893
894 block.template get<p>()[pos] = *(basePtr + linId);
895 }
896 }
897
898 template<unsigned int p, unsigned int ... props, typename AggrWrapperT>
899 inline __device__ void
900 __storeBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)+1])
901 {
902 __storeBlock<p>(block, sharedRegionPtr);
903 if (sizeof...(props) > 1)
904 {
905 __storeBlock<props ...>(block, sharedRegionPtr + 1);
906 }
907 else if (sizeof...(props) == 1)
908 {
909 __storeBlock<props ...>(block, *(sharedRegionPtr + 1));
910 }
911 }
912};
913
914
915#endif //OPENFPM_PDATA_SPARSEGRIDGPU_KER_CUH
This class represent an N-dimensional box.
Definition Box.hpp:61
__host__ __device__ bool isInside(const Point< dim, T > &p) const
Check if the point is inside the box.
Definition Box.hpp:1004
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
Definition Box.hpp:669
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
Definition Box.hpp:656
__device__ void loadGhost(const grid_key_dx< dim, CoordT > &coord, const int *neighboursPos, void *sharedRegionPtr[sizeof...(props)])
__device__ void loadGhostBlock(const AggrWrapperT &dataBlockLoad, const grid_key_dx< dim, CoordT > &coord, ScalarTypeOf< AggregateBlockT, p > *sharedRegion)
__device__ grid_key_dx< dim, int > getBlockBaseCoord(size_t blockLinId) const
Given a linearized block index it return the coordinated of the lower-left point in 2D or in general ...
static constexpr __device__ unsigned int getBlockEdgeSize()
Return the size of the block edge size.
__device__ size_t getLinId(const grid_key_dx< dim, CoordT > &coord) const
Linearization of global coordinates.
__device__ __host__ grid_key_dx< dim, CoordT > getGlobalCoord(const grid_key_dx< dim, CoordT > &blockCoord, unsigned int offset)
Get the coordinate of the block and the offset id inside the block it give the global coordinate.
__device__ auto get(const block_offset< CoordT > &coord) const -> decltype(std::declval< BlockMapGpu_ker< AggregateBlockT, indexT, layout_base > >().getblockMap().template get_ele< p >(coord.pos)[coord.off])
Access the grid point.
__device__ void storeBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)])
__device__ auto get(const block_offset< CoordT > &coord) -> decltype(std::declval< BlockMapGpu_ker< AggregateBlockT, indexT, layout_base > >().getblockMap().template get_ele< p >(coord.pos)[coord.off])
Access the grid point.
__device__ unsigned int size(unsigned int i)
Size of the sparse grid in each direction.
constexpr __device__ unsigned int getBlockSize() const
Return the size of the block.
__device__ grid_key_dx< dim, int > getCoord(size_t dataBlockId, unsigned offset) const
The inversion of getLinId.
__device__ block_offset< indexT2 > getNNPoint(openfpm::sparse_index< unsigned int > pos, unsigned int offset, const grid_key_dx< dim, indexT2 > &mov)
Get the neighborhood point in one direction.
__device__ grid_key_dx< dim, int > getCoord(size_t linId) const
The inversion of getLinId.
static __device__ int unpack_headers(headers_type &headers, unsigned char *data, int ih, int sz_pack)
__device__ void loadBlock(AggrWrapperT &block, ScalarTypeOf< AggregateBlockT, p > *sharedRegion)
int yes_has_check_device_pointer
Indicate this structure has a function to check the device pointer.
BcT background
background values
__device__ void __loadGhostBlock(const AggrWrapperT &block, const openfpm::sparse_index< unsigned int > blockId, SharedPtrT *sharedRegionPtr, unsigned char *maskPtr)
Load the ghost area in the shared region.
__device__ void loadBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)])
__device__ grid_key_dx< dim, int > getBlockCoord(size_t blockLinId) const
The inversion of getBlockLinId.
__device__ auto getPointBuffer() -> decltype(buffPnt) &
Return the buffer of points.
__device__ bool getInsertBlockOffset(const ite_type &itd, const grid_key_dx< dim, int > &p, grid_key_dx< dim, int > &blk, int &offset)
Given a point to insert, return the block-id and offset of that point.
__device__ unsigned int getEnlargedBlockSize() const
Return the size of the block + ghost needed to apply the stencil.
SparseGridGpu_ker(const openfpm::vector_sparse_gpu_ker< AggregateBlockT, indexT, layout_base > &blockMap, linearizer &grid, GridSmT extendedBlockGeometry, unsigned int stencilSupportRadius, openfpm::vector_gpu_ker< aggregate< short int, short int >, memory_traits_inte > ghostLayerToThreadsMapping, openfpm::vector_gpu_ker< aggregate< indexT >, memory_traits_inte > nn_blocks, openfpm::vector_gpu_ker< aggregate< indexT >, memory_traits_inte > buffPnt, unsigned int ghostLayerSize, BcT &bck)
constructor
__device__ void __loadGhostBlock(const AggrWrapperT &block, const openfpm::sparse_index< unsigned int > blockId, SharedPtrT *sharedRegionPtr)
Load the ghost area in the shared region.
__device__ size_t getBlockLinId(CoordT blockCoord) const
Linearization of block coordinates.
grid_key_dx is the key to access any element in the grid
Definition grid_key.hpp:19
void one()
Set to one the key.
Definition grid_key.hpp:179
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition grid_key.hpp:516
__host__ __device__ Point< dim, typeT > toPoint() const
Convert to a point the grid_key_dx.
Definition grid_key.hpp:457
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition grid_key.hpp:503
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Transform the boost::fusion::vector into memory specification (memory_traits)
grid interface available when on gpu
std::string match_str
match string
bool match
Indicate if the pointer match.
Sub-domain vertex graph node.