8#ifndef SPARSEGRIDGPU_KER_UTIL_HPP_
9#define SPARSEGRIDGPU_KER_UTIL_HPP_
11#include "util/variadic_to_vmpl.hpp"
16 template<
unsigned int p,
typename SrcType,
typename AggrType>
17 __device__ __host__
static inline void set(SrcType & src,AggrType & aggr)
19 src = aggr.template get<p>();
26 template<
unsigned int p,
typename SrcType,
typename AggrType>
27 __device__ __host__
static inline void set(SrcType & src,AggrType & aggr)
31template<
unsigned int dim,
typename T>
86template<
unsigned int dim>
91 template<
typename indexT,
typename blockCoord_type,
typename blockMap_type,
typename SparseGr
id_type>
92 __device__
static inline indexT getNNpos(blockCoord_type & blockCoord,
93 blockMap_type & blockMap,
94 SparseGrid_type & sparseGrid,
95 const unsigned int offset)
98 int neighbourPos = -1;
101 unsigned int d = offset/2;
102 int dPos = blockCoord.get(d) + (offset%2)*2 - 1;
103 blockCoord.set_d(d, dPos);
105 int bl = sparseGrid.getBlockLinId(blockCoord);
107 bl = (dPos < 0)?-1:bl;
109 neighbourPos = blockMap.get_sparse(bl).id;
114 template<
typename indexT,
unsigned int blockEdgeSize,
typename coordType>
115 __host__
static inline indexT getNNskin(coordType & coord,
int stencilSupportRadius)
117 int neighbourNum = -1;
119 for (
int j = 0; j < dim; ++j)
121 int c =
static_cast<int>(coord.get(j)) -
static_cast<int>(stencilSupportRadius);
127 else if (c >= blockEdgeSize)
129 neighbourNum = 2*j + 1;
141 template<
typename sparseGr
id_type,
typename coord_type,
typename Mask_type,
unsigned int eb_size>
142 __device__
static inline bool isPadding(sparseGrid_type & sparseGrid, coord_type & coord, Mask_type (& enlargedBlock)[eb_size])
144 bool isPadding_ =
false;
145 for (
int d=0; d<dim; ++d)
147 auto nPlusId = sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, d, 1);
148 auto nMinusId = sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, d, -1);
149 typename std::remove_all_extents<Mask_type>::type neighbourPlus = enlargedBlock[nPlusId];
150 typename std::remove_all_extents<Mask_type>::type neighbourMinus = enlargedBlock[nMinusId];
151 isPadding_ = isPadding_ || (!sparseGrid.exist(neighbourPlus));
152 isPadding_ = isPadding_ || (!sparseGrid.exist(neighbourMinus));
153 if (isPadding_)
break;
169template<
unsigned int n_it>
175template<
unsigned int n_it,
unsigned int n_prp>
178 void * ptr[n_it][n_prp+1];
181template<
typename copy_type,
unsigned int nprp,
unsigned int prp_val,
unsigned int prp_
id>
184 template<
typename dataBuffer_type>
185 __device__ __host__
static void copy(
void * (& data_ptr)[nprp], dataBuffer_type & dataBuff,
unsigned int ppos,
unsigned int dataBlockPos,
unsigned int offset,
unsigned int n_pnt)
187 ((copy_type *)data_ptr[prp_id])[ppos] = dataBuff.template get<prp_val>(dataBlockPos)[offset];
190 template<
typename dataBuffer_type>
191 __device__ __host__
static void copy_inv(
arr_arr_ptr<1,nprp> & data_ptr, dataBuffer_type & dataBuff,
unsigned int ppos,
unsigned int dataBlockPos,
unsigned int offset,
unsigned int n_pnt)
193 dataBuff.template get<prp_val>(dataBlockPos)[offset] = ((copy_type *)data_ptr.ptr[0][prp_id])[ppos];
197template<
typename copy_type,
unsigned int nprp,
unsigned int prp_val,
unsigned int prp_
id,
unsigned int N1>
200 template<
typename dataBuffer_type>
201 __device__ __host__
static void copy(
void * (& data_ptr)[nprp], dataBuffer_type & dataBuff,
unsigned int ppos,
unsigned int dataBlockPos,
unsigned int offset,
unsigned int n_pnt)
203 for (
int i = 0 ; i < N1 ; i++)
205 ((copy_type *)data_ptr[prp_id])[ppos+i*n_pnt] = dataBuff.template get<prp_val>(dataBlockPos)[i][offset];
209 template<
typename dataBuffer_type>
210 __device__ __host__
static void copy_inv(
arr_arr_ptr<1,nprp> & data_ptr, dataBuffer_type & dataBuff,
unsigned int ppos,
unsigned int dataBlockPos,
unsigned int offset,
unsigned int n_pnt)
212 for (
int i = 0 ; i < N1 ; i++)
214 dataBuff.template get<prp_val>(dataBlockPos)[i][offset] = ((copy_type *)data_ptr.ptr[0][prp_id])[ppos+i*n_pnt];
219template<
typename copy_type,
unsigned int nprp,
unsigned int prp_val,
unsigned int prp_
id,
unsigned int N1,
unsigned int N2>
222 template<
typename dataBuffer_type>
223 __device__ __host__
static void copy(
void * (& data_ptr)[nprp], dataBuffer_type & dataBuff,
unsigned int ppos,
unsigned int dataBlockPos,
unsigned int offset,
unsigned int n_pnt)
225 for (
int i = 0 ; i < N1 ; i++)
227 for (
int j = 0 ; j < N2 ; j++)
229 ((copy_type *)data_ptr[prp_id])[ppos + (i*N2 + j)*n_pnt] = dataBuff.template get<prp_val>(dataBlockPos)[i][j][offset];
234 template<
typename dataBuffer_type>
235 __device__ __host__
static void copy_inv(
arr_arr_ptr<1,nprp> & data_ptr, dataBuffer_type & dataBuff,
unsigned int ppos,
unsigned int dataBlockPos,
unsigned int offset,
unsigned int n_pnt)
237 for (
int i = 0 ; i < N1 ; i++)
239 for (
int j = 0 ; j < N2 ; j++)
241 dataBuff.template get<prp_val>(dataBlockPos)[i][j][offset] = ((copy_type *)data_ptr.ptr[0][prp_id])[ppos + (i*N2 + j)*n_pnt];
256template<
typename AggregateT,
typename dataBuffer_type,
int ... prp>
286 void * (&
data_ptr)[
sizeof...(prp)+1],
295 typedef typename boost::mpl::at<vprp,T>::type prp_cp;
298 typedef typename boost::mpl::at<typename AggregateT::type,prp_cp>::type pack_type;
314template<
typename AggregateT,
typename dataBuffer_type,
int ... prp>
353 typedef typename boost::mpl::at<vprp,T>::type prp_cp;
356 typedef typename boost::mpl::at<typename AggregateT::type,prp_cp>::type pack_type;
371template<
typename AggregateT,
int ... prp>
390 typedef typename boost::mpl::at<vprp,T>::type prp_cp;
393 typedef typename boost::mpl::at<typename AggregateT::type,prp_cp>::type pack_type;
399template<
unsigned int edgeSize,
unsigned int dim>
400inline __device__
unsigned int coordToLin(
const unsigned int (&coord)[dim],
const unsigned int paddingSize = 0)
402 unsigned int linId = coord[dim - 1];
403 for (
int d = dim - 2; d >= 0; --d)
405 linId *= edgeSize + 2 * paddingSize;
412template<
unsigned int edgeSize,
typename CoordT,
unsigned int dim>
415 unsigned int linId = coord.
get(dim - 1);
416 for (
int d = dim - 2; d >= 0; --d)
418 linId *= edgeSize + 2 * paddingSize;
419 linId += coord.
get(d);
424template <
typename CoordT,
unsigned int dim>
427 unsigned int linId = coord.
get(dim - 1);
428 for (
int d = dim - 2; d >= 0; --d)
430 linId *= blockDimensions.
get(d);
431 linId += coord.
get(d);
438template<
unsigned int edgeSize,
unsigned int dim>
439inline __device__
void linToCoordWithOffset(
const unsigned int linId,
const unsigned int offset,
unsigned int (&coord)[dim])
441 unsigned int linIdTmp = linId;
442 for (
unsigned int d = 0; d < dim; ++d)
444 coord[d] = linIdTmp % edgeSize;
446 linIdTmp /= edgeSize;
450template<
unsigned int edgeSize,
unsigned int dim>
451inline __device__
void linToCoord(
const unsigned int linId,
unsigned int (&coord)[dim])
453 unsigned int linIdTmp = linId;
454 for (
unsigned int d = 0; d < dim; ++d)
456 coord[d] = linIdTmp % edgeSize;
457 linIdTmp /= edgeSize;
464template<
unsigned int nLoop,
unsigned int dim,
typename AggregateBlockT,
unsigned int pMask ,
unsigned int p,
typename ct_params,
unsigned int blockEdgeSize>
467 template<
typename AggrWrapperT,
471 typename blockMapType,
473 __device__
static inline void load(
const AggrWrapperT &block,
474 SharedPtrT * sharedRegionPtr,
477 const blockMapType & blockMap,
478 unsigned int stencilSupportRadius,
479 unsigned int ghostLayerSize,
480 const unsigned int blockId,
483 printf(
"Error to implement loadGhostBlock_impl with nLoop=%d \n",nLoop);
487template<
unsigned int dim,
typename AggregateBlockT,
unsigned int pMask ,
unsigned int p,
typename ct_params,
unsigned int blockEdgeSize>
490 template<
typename AggrWrapperT,
494 typename blockMapType,
496 __device__
static inline void load(
const AggrWrapperT &block,
497 SharedPtrT * sharedRegionPtr,
500 const blockMapType & blockMap,
501 unsigned int stencilSupportRadius,
502 unsigned int ghostLayerSize,
503 const unsigned int blockIdPos,
506 typedef ScalarTypeOf<AggregateBlockT, p> ScalarT;
508 const int pos = threadIdx.x % ghostLayerSize;
510 __shared__
int neighboursPos[ct_params::nNN];
512 const unsigned int edge = blockEdgeSize + 2*stencilSupportRadius;
513 short int neighbourNum = ghostLayerToThreadsMapping.template get<nt>(pos);
516 const unsigned int linId = ghostLayerToThreadsMapping.template get<gt>(pos);
520 unsigned int acc = 1;
521 unsigned int offset = 0;
522 for (
int i = 0; i < dim; ++i)
524 int v = (ctr %
edge) - stencilSupportRadius;
525 v = (v < 0)?(v + blockEdgeSize):v;
526 v = (v >= blockEdgeSize)?v-blockEdgeSize:v;
529 acc *= blockEdgeSize;
533 unsigned int coord[dim];
534 linToCoordWithOffset<blockEdgeSize>(threadIdx.x, stencilSupportRadius, coord);
535 const unsigned int linId2 = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
537 unsigned int nnb = nn_blocks.template get<0>(blockIdPos*ct_params::nNN + (threadIdx.x % ct_params::nNN));
539 if (threadIdx.x < ct_params::nNN)
541 neighboursPos[threadIdx.x] = nnb;
547 auto nPos = neighboursPos[neighbourNum];
549 auto gdata = blockMap.template get_ele<p>(nPos)[offset];
553 auto bdata = block.template get<p>()[threadIdx.x];
555 auto bmask = block.template get<pMask>()[threadIdx.x];
556 auto gmask = blockMap.template get_ele<pMask>(nPos)[offset];
561 sharedRegionPtr[linId] = gdata;
562 sharedRegionPtr[linId2] = bdata;
566template<
unsigned int dim,
typename AggregateBlockT,
unsigned int pMask ,
unsigned int p,
typename ct_params,
unsigned int blockEdgeSize>
569 template<
typename AggrWrapperT,
573 typename blockMapType,
575 __device__
static inline void load(
const AggrWrapperT &block,
576 SharedPtrT * sharedRegionPtr,
579 const blockMapType & blockMap,
580 unsigned int stencilSupportRadius,
581 unsigned int ghostLayerSize,
582 const unsigned int blockIdPos,
585 typedef ScalarTypeOf<AggregateBlockT, p> ScalarT;
587 const int pos = threadIdx.x % ghostLayerSize;
588 const int pos_d1 = (threadIdx.x + blockDim.x) % ghostLayerSize;
590 __shared__
int neighboursPos[ct_params::nNN];
592 const unsigned int edge = blockEdgeSize + 2*stencilSupportRadius;
593 short int neighbourNum = ghostLayerToThreadsMapping.template get<nt>(pos);
594 short int neighbourNum2 = ghostLayerToThreadsMapping.template get<nt>(pos_d1);
597 const unsigned int linId = ghostLayerToThreadsMapping.template get<gt>(pos);
598 const unsigned int linId2 = ghostLayerToThreadsMapping.template get<gt>(pos_d1);
603 unsigned int acc = 1;
604 unsigned int offset = 0;
605 unsigned int offset2 = 0;
606 for (
int i = 0; i < dim; ++i)
608 int v = (ctr %
edge) - stencilSupportRadius;
609 int v2 = (ctr2 %
edge) - stencilSupportRadius;
610 v = (v < 0)?(v + blockEdgeSize):v;
611 v2 = (v2 < 0)?(v2 + blockEdgeSize):v2;
612 v = (v >= blockEdgeSize)?v-blockEdgeSize:v;
613 v2 = (v2 >= blockEdgeSize)?v2-blockEdgeSize:v2;
618 acc *= blockEdgeSize;
622 unsigned int coord[dim];
623 linToCoordWithOffset<blockEdgeSize>(threadIdx.x, stencilSupportRadius, coord);
624 const int linId_b = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
629 unsigned int nnb = nn_blocks.template get<0>(blockIdPos*ct_params::nNN + (threadIdx.x % ct_params::nNN));
631 if (threadIdx.x < ct_params::nNN)
633 neighboursPos[threadIdx.x] = nnb;
639 auto nPos = neighboursPos[neighbourNum];
640 auto nPos2 = neighboursPos[neighbourNum2];
642 auto gdata = blockMap.template get_ele<p>(nPos)[offset];
643 auto gdata2 = blockMap.template get_ele<p>(nPos2)[offset2];
647 auto bdata = block.template get<p>()[threadIdx.x];
649 auto gmask = blockMap.template get_ele<pMask>(nPos)[offset];
650 auto gmask2 = blockMap.template get_ele<pMask>(nPos2)[offset2];
654 auto bmask = block.template get<pMask>()[threadIdx.x];
660 sharedRegionPtr[linId] = gdata;
661 sharedRegionPtr[linId2] = gdata2;
662 sharedRegionPtr[linId_b] = bdata;
666template<
unsigned int dim,
typename AggregateBlockT,
unsigned int pMask ,
unsigned int p,
typename ct_params,
unsigned int blockEdgeSize>
669 template<
typename AggrWrapperT,
673 typename blockMapType,
675 __device__
static inline void load(
const AggrWrapperT &block,
676 SharedPtrT * sharedRegionPtr,
679 const blockMapType & blockMap,
680 unsigned int stencilSupportRadius,
681 unsigned int ghostLayerSize,
682 const unsigned int blockIdPos,
685 typedef ScalarTypeOf<AggregateBlockT, p> ScalarT;
687 const int pos = threadIdx.x % ghostLayerSize;
688 const int pos_d1 = (threadIdx.x + 2*blockDim.x) % ghostLayerSize;
690 __shared__
int neighboursPos[ct_params::nNN];
692 const unsigned int edge = blockEdgeSize + 2*stencilSupportRadius;
693 short int neighbourNum = ghostLayerToThreadsMapping.template get<nt>(pos);
694 short int neighbourNum2 = ghostLayerToThreadsMapping.template get<nt>(pos + blockDim.x);
695 short int neighbourNum3 = ghostLayerToThreadsMapping.template get<nt>(pos_d1);
698 const unsigned int linId = ghostLayerToThreadsMapping.template get<gt>(pos);
699 const unsigned int linId2 = ghostLayerToThreadsMapping.template get<gt>(pos + blockDim.x);
700 const unsigned int linId3 = ghostLayerToThreadsMapping.template get<gt>(pos_d1);
706 unsigned int acc = 1;
707 unsigned int offset = 0;
708 unsigned int offset2 = 0;
709 unsigned int offset3 = 0;
710 for (
int i = 0; i < dim; ++i)
712 int v = (ctr %
edge) - stencilSupportRadius;
713 int v2 = (ctr2 %
edge) - stencilSupportRadius;
714 int v3 = (ctr3 %
edge) - stencilSupportRadius;
715 v = (v < 0)?(v + blockEdgeSize):v;
716 v2 = (v2 < 0)?(v2 + blockEdgeSize):v2;
717 v3 = (v3 < 0)?(v3 + blockEdgeSize):v3;
718 v = (v >= blockEdgeSize)?v-blockEdgeSize:v;
719 v2 = (v2 >= blockEdgeSize)?v2-blockEdgeSize:v2;
720 v3 = (v3 >= blockEdgeSize)?v3-blockEdgeSize:v3;
727 acc *= blockEdgeSize;
731 unsigned int coord[dim];
732 linToCoordWithOffset<blockEdgeSize>(threadIdx.x, stencilSupportRadius, coord);
733 const int linId_b = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
738 unsigned int nnb = nn_blocks.template get<0>(blockIdPos*ct_params::nNN + (threadIdx.x % ct_params::nNN));
740 if (threadIdx.x < ct_params::nNN)
742 neighboursPos[threadIdx.x] = nnb;
748 auto nPos = neighboursPos[neighbourNum];
749 auto nPos2 = neighboursPos[neighbourNum2];
750 auto nPos3 = neighboursPos[neighbourNum3];
752 auto gdata = blockMap.template get_ele<p>(nPos)[offset];
753 auto gdata2 = blockMap.template get_ele<p>(nPos2)[offset2];
754 auto gdata3 = blockMap.template get_ele<p>(nPos3)[offset3];
758 auto bdata = block.template get<p>()[threadIdx.x];
760 auto gmask = blockMap.template get_ele<pMask>(nPos)[offset];
761 auto gmask2 = blockMap.template get_ele<pMask>(nPos2)[offset2];
762 auto gmask3 = blockMap.template get_ele<pMask>(nPos3)[offset3];
766 auto bmask = block.template get<pMask>()[threadIdx.x];
773 sharedRegionPtr[linId] = gdata;
774 sharedRegionPtr[linId2] = gdata2;
775 sharedRegionPtr[linId3] = gdata3;
776 sharedRegionPtr[linId_b] = bdata;
780template<
unsigned int dim,
typename AggregateBlockT,
unsigned int pMask ,
unsigned int p,
typename ct_params,
unsigned int blockEdgeSize>
783 template<
typename AggrWrapperT,
787 typename blockMapType,
789 __device__
static inline void load(
const AggrWrapperT &block,
790 SharedPtrT * sharedRegionPtr,
793 const blockMapType & blockMap,
794 unsigned int stencilSupportRadius,
795 unsigned int ghostLayerSize,
796 const unsigned int blockIdPos,
799 typedef ScalarTypeOf<AggregateBlockT, p> ScalarT;
801 const int pos = threadIdx.x % ghostLayerSize;
802 const int pos_d1 = (threadIdx.x + 6*blockDim.x) % ghostLayerSize;
804 __shared__
int neighboursPos[ct_params::nNN];
806 const unsigned int edge = blockEdgeSize + 2*stencilSupportRadius;
807 short int neighbourNum = ghostLayerToThreadsMapping.template get<nt>(pos);
808 short int neighbourNum2 = ghostLayerToThreadsMapping.template get<nt>(pos + blockDim.x);
809 short int neighbourNum3 = ghostLayerToThreadsMapping.template get<nt>(pos + 2*blockDim.x);
810 short int neighbourNum4 = ghostLayerToThreadsMapping.template get<nt>(pos + 3*blockDim.x);
811 short int neighbourNum5 = ghostLayerToThreadsMapping.template get<nt>(pos + 4*blockDim.x);
812 short int neighbourNum6 = ghostLayerToThreadsMapping.template get<nt>(pos + 5*blockDim.x);
813 short int neighbourNum7 = ghostLayerToThreadsMapping.template get<nt>(pos_d1);
816 const unsigned int linId = ghostLayerToThreadsMapping.template get<gt>(pos);
817 const unsigned int linId2 = ghostLayerToThreadsMapping.template get<gt>(pos + blockDim.x);
818 const unsigned int linId3 = ghostLayerToThreadsMapping.template get<gt>(pos + 2*blockDim.x);
819 const unsigned int linId4 = ghostLayerToThreadsMapping.template get<gt>(pos + 3*blockDim.x);
820 const unsigned int linId5 = ghostLayerToThreadsMapping.template get<gt>(pos + 4*blockDim.x);
821 const unsigned int linId6 = ghostLayerToThreadsMapping.template get<gt>(pos + 5*blockDim.x);
822 const unsigned int linId7 = ghostLayerToThreadsMapping.template get<gt>(pos_d1);
832 unsigned int acc = 1;
833 unsigned int offset = 0;
834 unsigned int offset2 = 0;
835 unsigned int offset3 = 0;
836 unsigned int offset4 = 0;
837 unsigned int offset5 = 0;
838 unsigned int offset6 = 0;
839 unsigned int offset7 = 0;
840 for (
int i = 0; i < dim; ++i)
842 int v = (ctr %
edge) - stencilSupportRadius;
843 int v2 = (ctr2 %
edge) - stencilSupportRadius;
844 int v3 = (ctr3 %
edge) - stencilSupportRadius;
845 int v4 = (ctr4 %
edge) - stencilSupportRadius;
846 int v5 = (ctr5 %
edge) - stencilSupportRadius;
847 int v6 = (ctr6 %
edge) - stencilSupportRadius;
848 int v7 = (ctr7 %
edge) - stencilSupportRadius;
849 v = (v < 0)?(v + blockEdgeSize):v;
850 v2 = (v2 < 0)?(v2 + blockEdgeSize):v2;
851 v3 = (v3 < 0)?(v3 + blockEdgeSize):v3;
852 v4 = (v4 < 0)?(v4 + blockEdgeSize):v4;
853 v5 = (v5 < 0)?(v5 + blockEdgeSize):v5;
854 v6 = (v6 < 0)?(v6 + blockEdgeSize):v6;
855 v7 = (v7 < 0)?(v7 + blockEdgeSize):v7;
856 v = (v >= blockEdgeSize)?v-blockEdgeSize:v;
857 v2 = (v2 >= blockEdgeSize)?v2-blockEdgeSize:v2;
858 v3 = (v3 >= blockEdgeSize)?v3-blockEdgeSize:v3;
859 v4 = (v4 >= blockEdgeSize)?v4-blockEdgeSize:v4;
860 v5 = (v5 >= blockEdgeSize)?v5-blockEdgeSize:v5;
861 v6 = (v6 >= blockEdgeSize)?v6-blockEdgeSize:v6;
862 v7 = (v7 >= blockEdgeSize)?v7-blockEdgeSize:v7;
877 acc *= blockEdgeSize;
881 unsigned int coord[dim];
882 linToCoordWithOffset<blockEdgeSize>(threadIdx.x, stencilSupportRadius, coord);
883 const int linId_b = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);
888 unsigned int nnb = nn_blocks.template get<0>(blockIdPos*ct_params::nNN + (threadIdx.x % ct_params::nNN));
890 if (threadIdx.x < ct_params::nNN)
892 neighboursPos[threadIdx.x] = nnb;
898 auto nPos = neighboursPos[neighbourNum];
899 auto nPos2 = neighboursPos[neighbourNum2];
900 auto nPos3 = neighboursPos[neighbourNum3];
901 auto nPos4 = neighboursPos[neighbourNum4];
902 auto nPos5 = neighboursPos[neighbourNum5];
903 auto nPos6 = neighboursPos[neighbourNum6];
904 auto nPos7 = neighboursPos[neighbourNum7];
906 auto gdata = blockMap.template get_ele<p>(nPos)[offset];
907 auto gdata2 = blockMap.template get_ele<p>(nPos2)[offset2];
908 auto gdata3 = blockMap.template get_ele<p>(nPos3)[offset3];
909 auto gdata4 = blockMap.template get_ele<p>(nPos4)[offset4];
910 auto gdata5 = blockMap.template get_ele<p>(nPos5)[offset5];
911 auto gdata6 = blockMap.template get_ele<p>(nPos6)[offset6];
912 auto gdata7 = blockMap.template get_ele<p>(nPos7)[offset7];
916 auto bdata = block.template get<p>()[threadIdx.x];
918 auto bmask = block.template get<pMask>()[threadIdx.x];
919 auto gmask = blockMap.template get_ele<pMask>(nPos)[offset];
920 auto gmask2 = blockMap.template get_ele<pMask>(nPos2)[offset2];
921 auto gmask3 = blockMap.template get_ele<pMask>(nPos3)[offset3];
922 auto gmask4 = blockMap.template get_ele<pMask>(nPos4)[offset4];
923 auto gmask5 = blockMap.template get_ele<pMask>(nPos5)[offset5];
924 auto gmask6 = blockMap.template get_ele<pMask>(nPos6)[offset6];
925 auto gmask7 = blockMap.template get_ele<pMask>(nPos7)[offset7];
936 sharedRegionPtr[linId] = gdata;
937 sharedRegionPtr[linId2] = gdata2;
938 sharedRegionPtr[linId3] = gdata3;
939 sharedRegionPtr[linId4] = gdata4;
940 sharedRegionPtr[linId5] = gdata5;
941 sharedRegionPtr[linId6] = gdata6;
942 sharedRegionPtr[linId7] = gdata7;
943 sharedRegionPtr[linId_b] = bdata;
grid_key_dx is the key to access any element in the grid
__device__ __host__ index_type get(index_type i) const
Get the i index.
static __device__ bool getNNindex_offset()
given a coordinate give the neighborhood chunk position and the offset in the neighborhood chunk
this class is a functor for "for_each" algorithm
void *(& data_ptr)[sizeof...(prp)+1]
data pointer
dataBuffer_type & dataBuff
data buffer
unsigned int offset
offset
__device__ __host__ void operator()(T &t)
It call the copy function for each property.
unsigned int dataBlockPos
position of the block
unsigned int n_pnt
Number of points to pack.
__device__ __host__ sparsegridgpu_pack_impl(unsigned int dataBlockPos, unsigned int offset, dataBuffer_type &dataBuff, unsigned int ppos, void *(&data_ptr)[sizeof...(prp)+1], unsigned int n_pnt)
constructor
this class is a functor for "for_each" algorithm
size_t point_size
point size
sparsegridgpu_pack_request()
constructor
void operator()(T &t)
It call the copy function for each property.
this class is a functor for "for_each" algorithm
dataBuffer_type & dataBuff
data buffer
unsigned int dataBlockPos
position of the block
unsigned int offset
offset
__device__ __host__ sparsegridgpu_unpack_impl(unsigned int dataBlockPos, unsigned int offset, dataBuffer_type &dataBuff, unsigned int ppos, arr_arr_ptr< 1, sizeof...(prp)> &data_ptr, unsigned int n_pnt)
constructor
unsigned int n_pnt
Number of points to pack.
__device__ __host__ void operator()(T &t)
It call the copy function for each property.
arr_arr_ptr< 1, sizeof...(prp)> & data_ptr
data pointer
Sub-domain vertex graph node.