8#ifndef SPARSEGRIDGPU_UTIL_TEST_CUH_
9#define SPARSEGRIDGPU_UTIL_TEST_CUH_
11#include "SparseGridGpu/tests/utils/SparseGridGpu_testKernels.cuh"
13template<
unsigned int p,
typename SparseGr
idType>
14__global__
void getValues2D(SparseGridType sparseGrid,
const int offsetX=0,
const int offsetY=0)
18 const auto bDimX = blockDim.x;
19 const auto bDimY = blockDim.y;
20 const auto bIdX = blockIdx.x;
21 const auto bIdY = blockIdx.y;
22 const auto tIdX = threadIdx.x;
23 const auto tIdY = threadIdx.y;
24 int x = bIdX * bDimX + tIdX + offsetX;
25 int y = bIdY * bDimY + tIdY + offsetY;
28 auto value = sparseGrid.template get<p>(coord);
36template<
unsigned int p,
typename SparseGr
idType>
37__global__
void getValuesNeighbourhood2D(SparseGridType sparseGrid,
const int offsetX=0,
const int offsetY=0)
41 const auto bDimX = blockDim.x;
42 const auto bDimY = blockDim.y;
43 const auto bIdX = blockIdx.x;
44 const auto bIdY = blockIdx.y;
45 const auto tIdX = threadIdx.x;
46 const auto tIdY = threadIdx.y;
47 int x = bIdX * bDimX + tIdX + offsetX;
48 int y = bIdY * bDimY + tIdY + offsetY;
51 for (
int i=0; i < 9; ++i)
53 auto value = sparseGrid.template get<p>(coord);
54 coord.set_d(0, x + i%3);
55 coord.set_d(1, y + i/3);
60template<
unsigned int p,
typename SparseGr
idType>
61__global__
void insertValues2D(SparseGridType sparseGrid,
const int offsetX=0,
const int offsetY=0)
65 const auto bDimX = blockDim.x;
66 const auto bDimY = blockDim.y;
67 const auto bIdX = blockIdx.x;
68 const auto bIdY = blockIdx.y;
69 const auto tIdX = threadIdx.x;
70 const auto tIdY = threadIdx.y;
71 int x = bIdX * bDimX + tIdX + offsetX;
72 int y = bIdY * bDimY + tIdY + offsetY;
75 sparseGrid.template insert<p>(coord) = x*x*y*y;
79 sparseGrid.flush_block_insert();
86template<
unsigned int p,
unsigned int chunksPerBlock,
unsigned int blockEdgeSize,
typename SparseGr
idType>
87__global__
void insertValues2DBlocked(SparseGridType sparseGrid,
const int sOffsetX=0,
const int sOffsetY=0)
89 constexpr unsigned int pMask = SparseGridType::pMask;
90 typedef BlockTypeOf<typename SparseGridType::AggregateType, p> BlockT;
91 typedef BlockTypeOf<typename SparseGridType::AggregateType, pMask> MaskBlockT;
95 int posX = blockIdx.x * blockDim.x + threadIdx.x + sOffsetX;
96 int posY = blockIdx.y * blockDim.y + threadIdx.y + sOffsetY;
97 const unsigned int offsetX = posX % blockEdgeSize;
98 const unsigned int offsetY = posY % blockEdgeSize;
100 const unsigned int offset = offsetY * blockEdgeSize + offsetX;
103 auto encap = sparseGrid.insertBlock(sparseGrid.getBlockLinId(blockCoord));
105 encap.template get<p>()[offset] = posX*posX * posY*posY;
110 sparseGrid.flush_block_insert();
115template<
unsigned int p,
unsigned int chunksPerBlock=1,
typename SparseGr
idType,
typename ScalarT>
116__global__
void insertConstantValue(SparseGridType sparseGrid, ScalarT value)
118 constexpr unsigned int pMask = SparseGridType::pMask;
119 typedef BlockTypeOf<typename SparseGridType::AggregateType, p> BlockT;
120 typedef BlockTypeOf<typename SparseGridType::AggregateType, pMask> MaskBlockT;
124 int x = blockIdx.x * blockDim.x + threadIdx.x;
125 int y = blockIdx.y * blockDim.y + threadIdx.y;
126 int z = blockIdx.z * blockDim.z + threadIdx.z;
129 auto pos = sparseGrid.getLinId(coord);
130 unsigned int dataBlockId = pos / BlockT::size;
131 unsigned int offset = pos % BlockT::size;
133 auto encap = sparseGrid.template insertBlock<chunksPerBlock>(dataBlockId,BlockT::size);
135 encap.template get<p>()[offset] = value;
140 sparseGrid.flush_block_insert();
149template<
unsigned int p,
typename SparseGr
idType,
typename ValueT>
150__global__
void insertOneValue(SparseGridType sparseGrid, dim3 pt, ValueT value)
154 int x = blockIdx.x * blockDim.x + threadIdx.x;
155 int y = blockIdx.y * blockDim.y + threadIdx.y;
156 int z = blockIdx.z * blockDim.z + threadIdx.z;
157 dim3 thCoord(x, y, z);
158 if (thCoord.x == pt.x && thCoord.y == pt.y && thCoord.z == pt.z)
161 sparseGrid.template insert<p>(coord) = value;
165 sparseGrid.flush_block_insert();
172template<
unsigned int p,
typename SparseGr
idType,
typename VectorOutType>
173__global__
void copyBlocksToOutput(SparseGridType sparseGrid, VectorOutType output)
175 const auto bDimX = blockDim.x;
176 const auto bDimY = blockDim.y;
177 const auto bDimZ = blockDim.z;
178 const auto bIdX = blockIdx.x;
179 const auto bIdY = blockIdx.y;
180 const auto bIdZ = blockIdx.z;
181 const auto tIdX = threadIdx.x;
182 const auto tIdY = threadIdx.y;
183 const auto tIdZ = threadIdx.z;
184 int x = bIdX * bDimX + tIdX;
185 int y = bIdY * bDimY + tIdY;
186 int z = bIdZ * bDimZ + tIdZ;
189 size_t pos = sparseGrid.getLinId(coord);
191 auto value = sparseGrid.template get<p>(coord);
193 output.template get<p>(pos) = value;
201template<
unsigned int dim,
unsigned int p_src,
unsigned int p_dst>
208 static constexpr unsigned int flops = 3 + 2*dim;
210 static constexpr unsigned int supportRadius = 1;
224 template<
typename SparseGr
idT,
typename DataBlockWrapperT>
226 SparseGridT & sparseGrid,
227 const unsigned int dataBlockId,
229 const unsigned int offset,
231 const DataBlockWrapperT & dataBlockLoad,
232 DataBlockWrapperT & dataBlockStore,
233 unsigned char curMask,
236 typedef typename SparseGridT::AggregateBlockType AggregateT;
237 typedef ScalarTypeOf<AggregateT, p_src> ScalarT;
239 constexpr unsigned int enlargedBlockSize =
IntPow<
240 SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
242 __shared__ ScalarT enlargedBlock[enlargedBlockSize];
244 sparseGrid.template loadGhostBlock<p_src>(dataBlockLoad, dataBlockIdPos, enlargedBlock);
248 decltype(sparseGrid.getLinIdInEnlargedBlock(0)) linId = 0;
251 if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
253 const auto coord = sparseGrid.getCoordInEnlargedBlock(offset);
255 linId = sparseGrid.getLinIdInEnlargedBlock(offset);
256 ScalarT cur = enlargedBlock[linId];
257 ScalarT laplacian = -2.0 * dim * cur;
259 for (
int d = 0; d < dim; ++d)
261 auto nPlusId = sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, d, 1);
262 auto nMinusId = sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, d, -1);
263 ScalarT neighbourPlus = enlargedBlock[nPlusId];
264 ScalarT neighbourMinus = enlargedBlock[nMinusId];
265 laplacian += neighbourMinus + neighbourPlus;
268 res = cur + dt * laplacian;
272 if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
274 enlargedBlock[linId] = res;
277 sparseGrid.template storeBlock<p_dst>(dataBlockStore, enlargedBlock);
292 template<
typename SparseGr
idT,
typename DataBlockWrapperT>
294 SparseGridT & sparseGrid,
295 const unsigned int dataBlockId,
297 const unsigned int offset,
299 const DataBlockWrapperT & dataBlockLoad,
300 DataBlockWrapperT & dataBlockStore,
304 constexpr unsigned int blockEdgeSize = SparseGridT::getBlockEdgeSize();
308 auto cur = dataBlockLoad.template get<p_src>()[offset];
309 auto laplacian = -2.0 * dim * cur;
311 auto neighbourCoord = pointCoord;
312 auto counter = offset;
313 unsigned int dimStride = 1;
314 for (
int d = 0; d < dim; ++d)
316 const auto localOffset = counter % blockEdgeSize;
318 if (localOffset == 0)
320 neighbourCoord.
set_d(d, neighbourCoord.get(d) - 1);
321 laplacian += sparseGrid.template get<p_src>(neighbourCoord);
322 neighbourCoord.set_d(d, neighbourCoord.get(d) + 1);
326 laplacian += dataBlockLoad.template get<p_src>()[offset - dimStride];
328 if (localOffset == blockEdgeSize - 1)
330 neighbourCoord.set_d(d, neighbourCoord.get(d) + 1);
331 laplacian += sparseGrid.template get<p_src>(neighbourCoord);
332 neighbourCoord.set_d(d, neighbourCoord.get(d) - 1);
336 laplacian += dataBlockLoad.template get<p_src>()[offset + dimStride];
339 counter /= blockEdgeSize;
340 dimStride *= blockEdgeSize;
342 dataBlockStore.template get<p_dst>()[offset] = cur + dt * laplacian;
346 template <
typename SparseGr
idT,
typename CtxT>
347 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
349 sparseGrid.template flush <smax_<0>> (ctx, flush_type::FLUSH_ON_DEVICE);
355 float coeff[3][3][3];
360template<
unsigned int dim,
unsigned int p_src,
unsigned int p_dst>
367 static constexpr unsigned int supportRadius = 1;
369 static constexpr unsigned int flops = 2*27;
371 template<
typename SparseGr
idT,
typename DataBlockWrapperT>
372 static inline __device__
void stencil(
373 SparseGridT & sparseGrid,
374 const unsigned int dataBlockId,
378 DataBlockWrapperT & dataBlockLoad,
379 DataBlockWrapperT & dataBlockStore,
380 bool applyStencilHere,
383 typedef typename SparseGridT::AggregateBlockType AggregateT;
384 typedef ScalarTypeOf<AggregateT, p_src> ScalarT;
386 constexpr unsigned int enlargedBlockSize =
IntPow<
387 SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
389 __shared__ ScalarT enlargedBlock[enlargedBlockSize];
391 sparseGrid.loadGhostBlock<p_src>(dataBlockLoad,dataBlockIdPos,enlargedBlock);
395 if (applyStencilHere)
397 const auto coord = sparseGrid.getCoordInEnlargedBlock(offset);
398 const auto linId = sparseGrid.getLinIdInEnlargedBlock(offset);
400 for (
int i = 0; i < dim; ++i)
402 for (
int j = 0; j < dim; ++j)
404 for (
int k = 0; k < dim; ++k)
412 auto nPlusId = sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, key);
413 tot += enlargedBlock[nPlusId] * cc.coeff[i][j][k];
418 dataBlockStore.template get<p_dst>()[offset] = tot;
422 template <
typename SparseGr
idT,
typename CtxT>
423 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
425 sparseGrid.template flush <smax_<0>> (ctx, flush_type::FLUSH_ON_DEVICE);
429template<
unsigned int dim,
unsigned int p_src,
unsigned int p_dst>
436 static constexpr unsigned int supportRadius = 1;
438 static constexpr unsigned int flops = 2*27;
440 template<
typename SparseGr
idT,
typename DataBlockWrapperT>
441 static inline __device__
void stencil(
442 SparseGridT & sparseGrid,
443 const unsigned int dataBlockId,
447 DataBlockWrapperT & dataBlockLoad,
448 DataBlockWrapperT & dataBlockStore,
449 bool applyStencilHere,
452 typedef typename SparseGridT::AggregateBlockType AggregateT;
453 typedef ScalarTypeOf<AggregateT, p_src> ScalarT;
455 if (applyStencilHere)
458 for (
int i = 0; i < dim; ++i)
460 for (
int j = 0; j < dim; ++j)
462 for (
int k = 0; k < dim; ++k)
471 auto pos = sparseGrid.template getNNPoint<stencil_type>(dataBlockIdPos, offset, key);
473 tot += sparseGrid.template get<p_src>(pos) * cc.coeff[i][j][k];
478 dataBlockStore.template get<p_dst>()[offset] = tot;
482 template <
typename SparseGr
idT,
typename CtxT>
483 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
485 sparseGrid.template flush <smax_<0>> (ctx, flush_type::FLUSH_ON_DEVICE);
489template<
typename SparseGr
idZ>
490void testConv3x3x3_perf(std::string testName)
492 constexpr unsigned int dim = 3;
493 constexpr unsigned int blockEdgeSize = 8;
496 unsigned int iterations = 100;
498 size_t sz[] = {1000,1000,1000};
500 SparseGridZ sparseGrid(sz);
502 sparseGrid.template setBackgroundValue<0>(0);
508 dim3 gridSize(32,32,32);
511 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
512 CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
513 gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
514 sparseGrid.toKernel(), start,128, 56, 1);
516 sparseGrid.template flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
518 sparseGrid.template deviceToHost<0>();
519 auto existingElements = sparseGrid.countExistingElements();
520 auto boundaryElements = sparseGrid.countBoundaryElements();
521 unsigned long long numElements = existingElements - boundaryElements;
523 sparseGrid.template findNeighbours<NNFull<3>>();
525 sparseGrid.template setNNType<NNFull<dim>>();
526 sparseGrid.template tagBoundaries<NNFull<3>>(ctx);
531 for (
unsigned int iter=0; iter<iterations; ++iter)
533 cudaDeviceSynchronize();
536 for (
int i = 0 ; i < 3 ; i++)
538 for (
int j = 0 ; j < 3 ; j++)
540 for (
int k = 0 ; k < 3 ; k++)
542 cc.coeff[k][j][i] = 1.0;
550 sparseGrid.template applyStencils<Conv3x3x3<dim,0,1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,cc);
552 cudaDeviceSynchronize();
555 measures_tm.add(ts.
getwct());
557 float gElemS = numElements / (1e9 * ts.
getwct());
560 measures_gf.add(gFlopsS);
564 double deviation_tm = 0;
565 standard_deviation(measures_tm,mean_tm,deviation_tm);
568 double deviation_gf = 0;
569 standard_deviation(measures_gf,mean_gf,deviation_gf);
571 std::cout <<
"Test: " << testName << std::endl;
572 std::cout <<
"Block: " << SparseGridZ::blockEdgeSize_
573 <<
"x" << SparseGridZ::blockEdgeSize_
574 <<
"x" << SparseGridZ::blockEdgeSize_
576 std::cout <<
"Grid: " << sz[0] <<
"x" << sz[1] <<
"x" << sz[2] << std::endl;
578 double dataOccupancyMean, dataOccupancyDev;
579 sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);
580 std::cout <<
"Data Occupancy: " << dataOccupancyMean <<
" dev:" << dataOccupancyDev << std::endl;
582 std::cout <<
"Iterations: " << iterations << std::endl;
583 std::cout <<
"\tConvolution3x3x3: " << mean_tm <<
" dev:" << deviation_tm <<
" s" << std::endl;
584 std::cout <<
"Throughput: " << std::endl <<
"\t " << mean_gf <<
" GFlops/s dev: " << deviation_gf <<
" GFlops/s " << std::endl;
587template<
typename SparseGr
idZ>
588static void testConv3x3x3_no_shared_perf(std::string testName)
590 unsigned int iterations = 100;
592 size_t sz[] = {1000,1000,1000};
594 SparseGridZ sparseGrid(sz);
596 sparseGrid.template setBackgroundValue<0>(0);
602 dim3 gridSize(32,32,32);
605 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
606 CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
607 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
608 sparseGrid.toKernel(), start,128, 56, 1);
610 sparseGrid.template flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
612 sparseGrid.template deviceToHost<0>();
613 auto existingElements = sparseGrid.countExistingElements();
614 auto boundaryElements = sparseGrid.countBoundaryElements();
615 unsigned long long numElements = existingElements - boundaryElements;
617 sparseGrid.template findNeighbours<NNFull<3>>();
619 sparseGrid.template setNNType<NNFull<SparseGridZ::dims>>();
620 sparseGrid.template tagBoundaries<NNFull<3>>(ctx,
No_check(),tag_boundaries::CALCULATE_EXISTING_POINTS);
626 for (
unsigned int iter=0; iter<iterations; ++iter)
628 cudaDeviceSynchronize();
631 for (
int i = 0 ; i < 3 ; i++)
633 for (
int j = 0 ; j < 3 ; j++)
635 for (
int k = 0 ; k < 3 ; k++)
637 cc.coeff[k][j][i] = 1.0;
645 sparseGrid.template applyStencils<Conv3x3x3_noshared<SparseGridZ::dims,0,1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE_NO_SHARED,cc);
647 cudaDeviceSynchronize();
650 measures_tm.add(ts.
getwct());
652 float gElemS = numElements / (1e9 * ts.
getwct());
655 measures_gf.add(gFlopsS);
659 double deviation_tm = 0;
660 standard_deviation(measures_tm,mean_tm,deviation_tm);
663 double deviation_gf = 0;
664 standard_deviation(measures_gf,mean_gf,deviation_gf);
666 std::cout <<
"Test: " << testName << std::endl;
667 std::cout <<
"Block: " << SparseGridZ::blockEdgeSize_
668 <<
"x" << SparseGridZ::blockEdgeSize_
669 <<
"x" << SparseGridZ::blockEdgeSize_
671 std::cout <<
"Grid: " << sz[0] <<
"x" << sz[1] <<
"x" << sz[2] << std::endl;
673 double dataOccupancyMean, dataOccupancyDev;
674 sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);
675 std::cout <<
"Data Occupancy: " << dataOccupancyMean <<
" dev:" << dataOccupancyDev << std::endl;
677 std::cout <<
"Iterations: " << iterations << std::endl;
678 std::cout <<
"\tConvolution3x3x3: " << mean_tm <<
" dev:" << deviation_tm <<
" s" << std::endl;
679 std::cout <<
"Throughput: " << std::endl <<
"\t " << mean_gf <<
" GFlops/s dev: " << deviation_gf <<
" GFlops/s " << std::endl;
682template<
unsigned int dim,
unsigned int p_src,
unsigned int p_dst>
689 static constexpr unsigned int flops = 3 + 2*dim;
691 static constexpr unsigned int supportRadius = 1;
705 template<
typename SparseGr
idT,
typename DataBlockWrapperT>
707 SparseGridT & sparseGrid,
708 const unsigned int dataBlockId,
710 const unsigned int offset,
712 const DataBlockWrapperT & dataBlockLoad,
713 DataBlockWrapperT & dataBlockStore,
717 typedef typename SparseGridT::AggregateBlockType AggregateT;
718 typedef ScalarTypeOf<AggregateT, p_src> ScalarT;
721 auto coord = pointCoord;
726 ScalarT cur = dataBlockLoad.template get<p_dst>()[offset];
727 ScalarT laplacian = -2.0 * dim * cur;
729 for (
int d = 0; d < dim; ++d)
731 auto locC = coord.get(d);
732 coord.set_d(d, locC+1);
733 auto nPlus = sparseGrid.template get<p_src>(coord);
734 coord.set_d(d, locC-1);
735 auto nMinus = sparseGrid.template get<p_src>(coord);
736 laplacian += nMinus + nPlus;
737 coord.set_d(d, locC);
739 res = cur + dt * laplacian;
744 dataBlockStore.template get<p_dst>()[offset] = res;
760 template<
typename SparseGr
idT,
typename DataBlockWrapperT>
762 SparseGridT & sparseGrid,
763 const unsigned int dataBlockId,
765 const unsigned int offset,
767 const DataBlockWrapperT & dataBlockLoad,
768 DataBlockWrapperT & dataBlockStore,
772 constexpr unsigned int blockEdgeSize = SparseGridT::getBlockEdgeSize();
776 auto cur = dataBlockLoad.template get<p_src>()[offset];
777 auto laplacian = -2.0 * dim * cur;
779 auto neighbourCoord = pointCoord;
780 auto counter = offset;
781 unsigned int dimStride = 1;
782 for (
int d = 0; d < dim; ++d)
784 const auto localOffset = counter % blockEdgeSize;
786 if (localOffset == 0)
788 neighbourCoord.
set_d(d, neighbourCoord.get(d) - 1);
789 laplacian += sparseGrid.template get<p_src>(neighbourCoord);
790 neighbourCoord.set_d(d, neighbourCoord.get(d) + 1);
794 laplacian += dataBlockLoad.template get<p_src>()[offset - dimStride];
796 if (localOffset == blockEdgeSize - 1)
798 neighbourCoord.set_d(d, neighbourCoord.get(d) + 1);
799 laplacian += sparseGrid.template get<p_src>(neighbourCoord);
800 neighbourCoord.set_d(d, neighbourCoord.get(d) - 1);
804 laplacian += dataBlockLoad.template get<p_src>()[offset + dimStride];
807 counter /= blockEdgeSize;
808 dimStride *= blockEdgeSize;
810 dataBlockStore.template get<p_dst>()[offset] = cur + dt * laplacian;
814 template <
typename SparseGr
idT,
typename CtxT>
815 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
817 sparseGrid.template flush <sRight_<0>> (ctx, flush_type::FLUSH_ON_DEVICE);
821template<
unsigned int dim,
unsigned int p_src,
unsigned int p_dst>
828 static constexpr unsigned int flops = 1;
830 static constexpr unsigned int supportRadius = 1;
844 template<
typename SparseGr
idT,
typename DataBlockWrapperT>
846 SparseGridT & sparseGrid,
847 const unsigned int dataBlockId,
849 const unsigned int offset,
851 const DataBlockWrapperT & dataBlockLoad,
852 DataBlockWrapperT & dataBlockStore,
856 typedef typename SparseGridT::AggregateBlockType AggregateT;
857 typedef ScalarTypeOf<AggregateT, p_src> ScalarT;
859 constexpr unsigned int enlargedBlockSize =
IntPow<
860 SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
862 __shared__ ScalarT enlargedBlock[enlargedBlockSize];
863 sparseGrid.loadGhostBlock<p_src>(dataBlockLoad, dataBlockIdPos, enlargedBlock);
867 decltype(sparseGrid.getLinIdInEnlargedBlock(0)) linId = 0;
872 linId = sparseGrid.getLinIdInEnlargedBlock(offset);
873 ScalarT cur = enlargedBlock[linId];
880 enlargedBlock[linId] = res;
883 sparseGrid.storeBlock<p_dst>(dataBlockStore, enlargedBlock);
898 template<
typename SparseGr
idT,
typename DataBlockWrapperT>
900 SparseGridT & sparseGrid,
901 const unsigned int dataBlockId,
903 const unsigned int offset,
905 const DataBlockWrapperT & dataBlockLoad,
906 DataBlockWrapperT & dataBlockStore,
910 constexpr unsigned int blockEdgeSize = SparseGridT::getBlockEdgeSize();
914 auto cur = dataBlockLoad.template get<p_src>()[offset];
915 auto laplacian = -2.0 * dim * cur;
917 auto neighbourCoord = pointCoord;
918 auto counter = offset;
919 unsigned int dimStride = 1;
920 for (
int d = 0; d < dim; ++d)
922 const auto localOffset = counter % blockEdgeSize;
924 if (localOffset == 0)
926 neighbourCoord.
set_d(d, neighbourCoord.get(d) - 1);
927 laplacian += sparseGrid.template get<p_src>(neighbourCoord);
928 neighbourCoord.set_d(d, neighbourCoord.get(d) + 1);
932 laplacian += dataBlockLoad.template get<p_src>()[offset - dimStride];
934 if (localOffset == blockEdgeSize - 1)
936 neighbourCoord.set_d(d, neighbourCoord.get(d) + 1);
937 laplacian += sparseGrid.template get<p_src>(neighbourCoord);
938 neighbourCoord.set_d(d, neighbourCoord.get(d) - 1);
942 laplacian += dataBlockLoad.template get<p_src>()[offset + dimStride];
945 counter /= blockEdgeSize;
946 dimStride *= blockEdgeSize;
948 dataBlockStore.template get<p_dst>()[offset] = cur + dt * laplacian;
952 template <
typename SparseGr
idT,
typename CtxT>
953 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
955 sparseGrid.template flush <sRight_<0>> (ctx, flush_type::FLUSH_ON_DEVICE);
grid_key_dx is the key to access any element in the grid
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
static __device__ void stencil(SparseGridT &sparseGrid, const unsigned int dataBlockId, const openfpm::sparse_index< unsigned int > dataBlockIdPos, const unsigned int offset, const grid_key_dx< dim, int > &pointCoord, const DataBlockWrapperT &dataBlockLoad, DataBlockWrapperT &dataBlockStore, bool isActive, float dt)
Stencil function.
static __host__ void stencilHost(SparseGridT &sparseGrid, const unsigned int dataBlockId, const openfpm::sparse_index< unsigned int > dataBlockIdPos, const unsigned int offset, const grid_key_dx< dim, int > &pointCoord, const DataBlockWrapperT &dataBlockLoad, DataBlockWrapperT &dataBlockStore, bool isActive, float dt)
Stencil Host function.
static __device__ void stencil(SparseGridT &sparseGrid, const unsigned int dataBlockId, const openfpm::sparse_index< unsigned int > dataBlockIdPos, const unsigned int offset, const grid_key_dx< dim, int > &pointCoord, const DataBlockWrapperT &dataBlockLoad, DataBlockWrapperT &dataBlockStore, unsigned char curMask, float dt)
Stencil function.
static __host__ void stencilHost(SparseGridT &sparseGrid, const unsigned int dataBlockId, const openfpm::sparse_index< unsigned int > dataBlockIdPos, const unsigned int offset, const grid_key_dx< dim, int > &pointCoord, const DataBlockWrapperT &dataBlockLoad, DataBlockWrapperT &dataBlockStore, bool isActive, float dt)
Stencil Host function.
static __host__ void stencilHost(SparseGridT &sparseGrid, const unsigned int dataBlockId, const openfpm::sparse_index< unsigned int > dataBlockIdPos, const unsigned int offset, const grid_key_dx< dim, int > &pointCoord, const DataBlockWrapperT &dataBlockLoad, DataBlockWrapperT &dataBlockStore, bool isActive, float dt)
Stencil Host function.
static __device__ void stencil(SparseGridT &sparseGrid, const unsigned int dataBlockId, const openfpm::sparse_index< unsigned int > dataBlockIdPos, const unsigned int offset, const grid_key_dx< dim, int > &pointCoord, const DataBlockWrapperT &dataBlockLoad, DataBlockWrapperT &dataBlockStore, bool isActive, float dt)
Stencil function.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...