5#define BOOST_TEST_DYN_LINK
7#define DISABLE_MPI_WRITTERS
9#include <boost/test/unit_test.hpp>
10#include "SparseGridGpu/SparseGridGpu.hpp"
11#include "SparseGridGpu/tests/utils/SparseGridGpu_testKernels.cuh"
12#include "SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh"
14template<
unsigned int p1 ,
unsigned int p2,
unsigned int chunksPerBlock=1,
typename SparseGr
idType,
typename ScalarT>
15__global__
void insertConstantValue2(SparseGridType sparseGrid, ScalarT value)
17 constexpr unsigned int pMask = SparseGridType::pMask;
18 typedef BlockTypeOf<typename SparseGridType::AggregateType, p1> BlockT;
22 int x = blockIdx.x * blockDim.x + threadIdx.x;
23 int y = blockIdx.y * blockDim.y + threadIdx.y;
24 int z = blockIdx.z * blockDim.z + threadIdx.z;
27 auto pos = sparseGrid.getLinId(coord);
28 unsigned int dataBlockId = pos / BlockT::size;
29 unsigned int offset = pos % BlockT::size;
31 auto encap = sparseGrid.insertBlock(dataBlockId);
32 encap.template get<p1>()[offset] = value;
33 encap.template get<p2>()[offset] = value;
38 sparseGrid.flush_block_insert();
46template<
unsigned int p,
typename SparseGr
idType>
47__global__
void insertValues(SparseGridType sparseGrid)
51 const auto bDimX = blockDim.x;
52 const auto bDimY = blockDim.y;
53 const auto bDimZ = blockDim.z;
54 const auto bIdX = blockIdx.x;
55 const auto bIdY = blockIdx.y;
56 const auto bIdZ = blockIdx.z;
57 const auto tIdX = threadIdx.x;
58 const auto tIdY = threadIdx.y;
59 const auto tIdZ = threadIdx.z;
60 int x = bIdX * bDimX + tIdX;
61 int y = bIdY * bDimY + tIdY;
62 int z = bIdZ * bDimZ + tIdZ;
65 size_t pos = sparseGrid.getLinId(coord);
67 sparseGrid.template insert<p>(coord) = x;
69 sparseGrid.flush_block_insert();
79template<
unsigned int p,
typename SparseGr
idType,
typename VectorOutType>
80__global__
void copyToOutputIfPadding(SparseGridType sparseGrid, VectorOutType output)
82 const auto bDimX = blockDim.x;
83 const auto bDimY = blockDim.y;
84 const auto bDimZ = blockDim.z;
85 const auto bIdX = blockIdx.x;
86 const auto bIdY = blockIdx.y;
87 const auto bIdZ = blockIdx.z;
88 const auto tIdX = threadIdx.x;
89 const auto tIdY = threadIdx.y;
90 const auto tIdZ = threadIdx.z;
91 int x = bIdX * bDimX + tIdX;
92 int y = bIdY * bDimY + tIdY;
93 int z = bIdZ * bDimZ + tIdZ;
96 size_t pos = sparseGrid.getLinId(coord);
99 output.template get<p>(pos) = sparseGrid.isPadding(coord) ? 1 : 0;
107template<
unsigned int p,
typename SparseGr
idType>
108__global__
void insertBoundaryValuesHeat(SparseGridType sparseGrid)
112 const auto bDimX = blockDim.x;
113 const auto bDimY = blockDim.y;
114 const auto bDimZ = blockDim.z;
115 const auto bIdX = blockIdx.x;
116 const auto bIdY = blockIdx.y;
117 const auto bIdZ = blockIdx.z;
118 const auto tIdX = threadIdx.x;
119 const auto tIdY = threadIdx.y;
120 const auto tIdZ = threadIdx.z;
121 int x = bIdX * bDimX + tIdX;
122 int y = bIdY * bDimY + tIdY;
123 int z = bIdZ * bDimZ + tIdZ;
131 else if (x == bDimX * gridDim.x - 1)
136 if (y == 0 || y == bDimY * gridDim.y - 1)
138 value = 10.0 * x / (bDimX * gridDim.x - 1);
141 sparseGrid.template insert<p>(coord) = value;
145 sparseGrid.flush_block_insert();
153template<
unsigned int dim,
unsigned int p>
158 static constexpr unsigned int supportRadius = 1;
160 template<
typename SparseGr
idT,
typename DataBlockWrapperT>
161 static inline __device__
void stencil(
162 SparseGridT & sparseGrid,
166 DataBlockWrapperT & dataBlockLoad,
167 DataBlockWrapperT & dataBlockStore)
169 typedef typename SparseGridT::AggregateBlockType AggregateT;
170 typedef ScalarTypeOf<AggregateT, p> ScalarT;
172 constexpr unsigned int enlargedBlockSize =
IntPow<
173 SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
175 __shared__ ScalarT enlargedBlock[enlargedBlockSize];
176 sparseGrid.loadBlock<p>(dataBlockLoad, enlargedBlock);
177 sparseGrid.loadGhost<p>(dataBlockCoord, enlargedBlock);
180 const auto coord = sparseGrid.getCoordInEnlargedBlock(offset);
181 const auto linId = sparseGrid.getLinIdInEnlargedBlock(offset);
182 ScalarT cur = enlargedBlock[linId];
183 ScalarT res = -2.0*dim*cur;
184 for (
int d=0; d<dim; ++d)
186 auto nPlusId = sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, d, 1);
187 auto nMinusId = sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, d, -1);
188 ScalarT neighbourPlus = enlargedBlock[nPlusId];
189 ScalarT neighbourMinus = enlargedBlock[nMinusId];
190 res += neighbourMinus + neighbourPlus;
192 enlargedBlock[linId] = res;
195 sparseGrid.storeBlock<p>(dataBlockStore, enlargedBlock);
198 template <
typename SparseGr
idT,
typename CtxT>
199 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
201 sparseGrid.template flush <smin_<0>> (ctx, flush_type::FLUSH_ON_DEVICE);
207BOOST_AUTO_TEST_SUITE(SparseGridGpu_tests)
209BOOST_AUTO_TEST_CASE(testInsert)
211 constexpr unsigned int dim = 2;
212 constexpr unsigned int blockEdgeSize = 8;
213 constexpr unsigned int dataBlockSize = blockEdgeSize * blockEdgeSize;
218 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
223 sparseGrid.template setBackgroundValue<0>(666);
224 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
226 CUDA_LAUNCH_DIM3((insertValues<0>),gridSize, blockSizeInsert,sparseGrid.toKernel());
229 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
231 sparseGrid.template deviceToHost<0>();
235 for (
size_t i = 0; i < 4*64 ; i++)
237 auto coord = sparseGrid.getCoord(i);
238 auto expectedValue = coord.get(0);
240 match &= sparseGrid.template get<0>(coord) == expectedValue;
243 BOOST_REQUIRE_EQUAL(match,
true);
246BOOST_AUTO_TEST_CASE(testInsert3D)
248 constexpr unsigned int dim = 3;
249 constexpr unsigned int blockEdgeSize = 4;
250 constexpr unsigned int dataBlockSize = blockEdgeSize * blockEdgeSize;
254 dim3 gridSize(2, 2, 2);
255 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize, blockEdgeSize);
260 sparseGrid.template setBackgroundValue<0>(666);
262 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
264 CUDA_LAUNCH_DIM3((insertValues<0>),gridSize, blockSizeInsert,sparseGrid.toKernel());
267 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
269 sparseGrid.template deviceToHost<0>();
273 for (
size_t i = 0; i < 64*4 ; i++)
275 auto coord = sparseGrid.getCoord(i);
276 auto expectedValue = coord.get(0);
278 match &= sparseGrid.template get<0>(coord) == expectedValue;
281 BOOST_REQUIRE_EQUAL(match,
true);
284BOOST_AUTO_TEST_CASE(testTagBoundaries)
287 constexpr unsigned int dim = 2;
288 constexpr unsigned int blockEdgeSize = 8;
292 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
297 sparseGrid.template setBackgroundValue<0>(666);
300 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
302 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt1, 1);
304 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt2, 1);
306 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt3, 1);
307 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
309 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
311 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt4, 1);
312 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
314 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
315 for (
int y = 9; y <= 11; y++)
318 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt1, 1);
320 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt2, 1);
322 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
324 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
325 for (
int y = 9; y <= 11; y++)
328 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt1, 1);
330 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt2, 1);
332 sparseGrid.template flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
335 sparseGrid.deviceToHost();
336 sparseGrid.deviceToHost<0>();
338 sparseGrid.findNeighbours();
340 sparseGrid.tagBoundaries(ctx);
344 output.resize(4 * 64);
346 CUDA_LAUNCH_DIM3((copyToOutputIfPadding<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), output.toKernel());
348 output.template deviceToHost<0>();
349 sparseGrid.template deviceToHost<0>();
353 for (
size_t i = 0; i < output.
size(); i++)
355 auto coord = sparseGrid.getCoord(i);
361 || i == 142 || i == 143 || i == 200 || i == 201
362 || i == 150 || i == 209
363 || i == 158 || i == 159 || i == 216 || i == 217
366 match &= output.template get<0>(i) == expectedValue;
369 BOOST_REQUIRE_EQUAL(match,
true);
372BOOST_AUTO_TEST_CASE(testTagBoundaries2)
374 constexpr unsigned int dim = 2;
375 constexpr unsigned int blockEdgeSize = 8;
379 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
384 sparseGrid.template setBackgroundValue<0>(666);
389 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
391 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), ptd1, 1);
393 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd2, 1);
395 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd3, 1);
397 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd4, 1);
398 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
401 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
403 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd1, 1);
405 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd2, 1);
407 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd3, 1);
409 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd4, 1);
410 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
413 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
415 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd1, 1);
417 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd2, 1);
419 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd3, 1);
421 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd4, 1);
422 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
425 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
427 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd1, 1);
429 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd2, 1);
431 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd3, 1);
433 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd4, 1);
434 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
438 sparseGrid.deviceToHost();
440 sparseGrid.findNeighbours();
442 sparseGrid.tagBoundaries(ctx);
446 output.resize(4 * 64);
448 CUDA_LAUNCH_DIM3((copyToOutputIfPadding<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), output.toKernel());
450 output.template deviceToHost<0>();
451 sparseGrid.template deviceToHost<0>();
455 for (
size_t i = 0; i < output.
size(); i++)
457 auto coord = sparseGrid.getCoord(i);
460 i == 54 || i == 55 || i == 62
461 || i == 134 || i == 142 || i == 143
462 || i == 112 || i == 113 || i == 121
463 || i == 200 || i == 193 || i == 201
466 match &= output.template get<0>(i) == expectedValue;
469 BOOST_REQUIRE_EQUAL(match,
true);
472BOOST_AUTO_TEST_CASE(testStencilHeat)
474 constexpr unsigned int dim = 2;
475 constexpr unsigned int blockEdgeSize = 8;
479 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
484 sparseGrid.template setBackgroundValue<0>(0);
487 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
488 CUDA_LAUNCH_DIM3((insertConstantValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), 0);
489 sparseGrid.flush <
smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
491 sparseGrid.findNeighbours();
492 sparseGrid.tagBoundaries(ctx);
494 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
497 const unsigned int maxIter = 1000;
499 for (
unsigned int iter=0; iter<maxIter; ++iter)
505 sparseGrid.template deviceToHost<0,1>();
509 for (
size_t i = 0; i < 64*4 ; i++)
511 auto coord = sparseGrid.getCoord(i);
512 float expectedValue = 10.0 * coord.get(0) / (gridSize.x * blockEdgeSize - 1);
514 match &= fabs(sparseGrid.template get<1>(coord) - expectedValue) < 1e-2;
518 BOOST_REQUIRE_EQUAL(match,
true);
521BOOST_AUTO_TEST_CASE(testStencil_lap_simplified)
523 constexpr unsigned int dim = 2;
524 constexpr unsigned int blockEdgeSize = 8;
528 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
533 sparseGrid.template setBackgroundValue<0>(0);
536 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
537 CUDA_LAUNCH_DIM3((insertConstantValue<0>),gridSize, blockSizeInsert, sparseGrid.toKernel(), 0);
538 sparseGrid.flush <
smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
540 sparseGrid.findNeighbours();
541 sparseGrid.tagBoundaries(ctx);
543 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
546 const unsigned int maxIter = 1000;
548 for (
unsigned int iter=0; iter<maxIter; ++iter)
551 return u + (cs.xm[0] + cs.xp[0] +
552 cs.xm[1] + cs.xp[1] - 4.0*u)*0.1;
555 return u + (cs.xm[0] + cs.xp[0] +
556 cs.xm[1] + cs.xp[1] - 4.0*u)*0.1;
560 sparseGrid.deviceToHost<0,1>();
563 sparseGrid.template deviceToHost<0>();
567 for (
size_t i = 0; i < 64*4; i++)
569 auto coord = sparseGrid.getCoord(i);
570 float expectedValue = 10.0 * coord.get(0) / (gridSize.x * blockEdgeSize - 1);
572 match &= fabs(sparseGrid.template get<0>(coord) - expectedValue) < 1e-2;
575 BOOST_REQUIRE_EQUAL(match,
true);
578BOOST_AUTO_TEST_CASE(testStencil_lap_no_cross_simplified)
580 constexpr unsigned int dim = 2;
581 constexpr unsigned int blockEdgeSize = 8;
585 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
590 sparseGrid.template setBackgroundValue<0>(0);
593 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
594 CUDA_LAUNCH_DIM3((insertConstantValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), 0);
595 CUDA_LAUNCH_DIM3((insertConstantValue<1>), gridSize, blockSizeInsert, sparseGrid.toKernel(), 0);
598 sparseGrid.findNeighbours();
599 sparseGrid.tagBoundaries(ctx);
601 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
602 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,1,1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
604 typedef typename GetCpBlockType<
decltype(sparseGrid),0,1>::type CpBlockType;
621 const unsigned int maxIter = 1000;
623 for (
unsigned int iter=0; iter<maxIter; ++iter)
625 sparseGrid.conv<0, 1, 1>({0,0},{16,16},[] __device__ (CpBlockType & u,
int i,
int j){
627 return c + (u(i-1,j) + u(i+1,j) +
628 u(i,j-1) + u(i,j+1) - 4.0*c)*0.1;
631 sparseGrid.conv<1, 0, 1>({0,0},{16,16},[] __device__ (CpBlockType & u,
int i,
int j){
633 return c + (u(i-1,j) + u(i+1,j) +
634 u(i,j-1) + u(i,j+1) - 4.0*c)*0.1;
638 sparseGrid.template deviceToHost<0>();
642 for (
size_t i = 0; i < 64*4; i++)
644 auto coord = sparseGrid.getCoord(i);
645 float expectedValue = 10.0 * coord.get(0) / (gridSize.x * blockEdgeSize - 1);
647 match &= fabs(sparseGrid.template get<0>(coord) - expectedValue) < 1e-2;
651 BOOST_REQUIRE_EQUAL(match,
true);
654BOOST_AUTO_TEST_CASE(testStencil_lap_no_cross_simplified2)
656 constexpr unsigned int dim = 2;
657 constexpr unsigned int blockEdgeSize = 8;
661 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
666 sparseGrid.template setBackgroundValue<0>(0);
669 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
670 CUDA_LAUNCH_DIM3((insertConstantValue<0>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
671 CUDA_LAUNCH_DIM3((insertConstantValue<1>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
672 CUDA_LAUNCH_DIM3((insertConstantValue<2>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
673 CUDA_LAUNCH_DIM3((insertConstantValue<3>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
676 sparseGrid.findNeighbours();
677 sparseGrid.tagBoundaries(ctx);
679 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
680 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,1,1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 5.0);
681 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,2,2>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
682 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,3,3>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 5.0);
684 typedef typename GetCpBlockType<
decltype(sparseGrid),0,1>::type CpBlockType;
701 const unsigned int maxIter = 1000;
703 for (
unsigned int iter=0; iter<maxIter; ++iter)
705 sparseGrid.conv2<0,1,2,3,1>({0,0},{16,16},[] __device__ (
float & u_out,
float & v_out, CpBlockType & u, CpBlockType & v,
int i,
int j){
708 u_out = cu + (u(i-1,j) + u(i+1,j) +
709 u(i,j-1) + u(i,j+1) - 4.0*cu)*0.1;
711 v_out = cv + (v(i-1,j) + v(i+1,j) +
712 v(i,j-1) + v(i,j+1) - 4.0*cv)*0.1;
715 sparseGrid.conv2<2,3,0,1,1>({0,0},{16,16},[] __device__ (
float & u_out,
float & v_out ,CpBlockType & u, CpBlockType & v,
int i,
int j){
718 u_out = cu + (u(i-1,j) + u(i+1,j) +
719 u(i,j-1) + u(i,j+1) - 4.0*cu)*0.1;
721 v_out = cv + (v(i-1,j) + v(i+1,j) +
722 v(i,j-1) + v(i,j+1) - 4.0*cv)*0.1;
726 sparseGrid.template deviceToHost<0,1>();
730 for (
size_t i = 0; i < 64*4; i++)
732 auto coord = sparseGrid.getCoord(i);
733 float expectedValue = 10.0 * coord.get(0) / (gridSize.x * blockEdgeSize - 1);
734 float expectedValue2 = 5.0 * coord.get(0) / (gridSize.x * blockEdgeSize - 1);
736 match &= fabs(sparseGrid.template get<0>(coord) - expectedValue) < 1e-2;
737 match &= fabs(sparseGrid.template get<1>(coord) - expectedValue2) < 1e-2;
740 BOOST_REQUIRE_EQUAL(match,
true);
743BOOST_AUTO_TEST_CASE(testStencil_lap_no_cross_simplified_subset)
745 constexpr unsigned int dim = 2;
746 constexpr unsigned int blockEdgeSize = 8;
750 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
755 sparseGrid.template setBackgroundValue<0>(0);
758 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
759 CUDA_LAUNCH_DIM3((insertConstantValue<0>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
760 CUDA_LAUNCH_DIM3((insertConstantValue<1>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
763 sparseGrid.findNeighbours();
764 sparseGrid.tagBoundaries(ctx);
766 typedef typename GetCpBlockType<
decltype(sparseGrid),0,1>::type CpBlockType;
768 sparseGrid.conv<0, 1, 1>({3,3},{11,11},[] __device__ (CpBlockType & u,
int i,
int j){
773 sparseGrid.template deviceToHost<1>();
777 for (
size_t i = 0; i < 64*4; i++)
779 auto coord = sparseGrid.getCoord(i);
781 if (coord.get(0) >= 3 && coord.get(1) >= 3 && coord.get(0) <= 11 && coord.get(1) <= 11)
783 match &= sparseGrid.template get<1>(coord) == 5.0;
787 match &= sparseGrid.template get<1>(coord) == 0.0;
791 BOOST_REQUIRE_EQUAL(match,
true);
796template<
typename sparsegr
id_type>
797__global__
void sparse_grid_get_test(sparsegrid_type sparseGrid,
grid_key_dx<3> key,
float * data)
799 *data = sparseGrid.template get<0>(key);
802BOOST_AUTO_TEST_CASE(testFlushInsert)
804 constexpr unsigned int dim = 3;
805 constexpr unsigned int blockEdgeSize = 4;
809 size_t sz[] = {137,100,57};
813 sparseGrid.template setBackgroundValue<0>(0);
837 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({3,6,7})),2.0);
838 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({13,16,17})),3.0);
839 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({13,46,27})),4.0);
840 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({36,63,11})),5.0);
841 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({37,96,47})),6.0);
842 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({130,56,37})),7.0);
843 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({131,76,17})),8.0);
844 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({36,86,27})),9.0);
845 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({34,36,7})),10.0);
847 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({4,6,7})),2.0);
848 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({12,16,17})),3.0);
849 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({12,46,27})),4.0);
850 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({35,63,11})),5.0);
851 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({38,96,47})),6.0);
852 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({131,56,37})),7.0);
853 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({132,76,17})),8.0);
854 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({37,86,27})),9.0);
855 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(
grid_key_dx<3>({35,36,7})),10.0);
857 sparseGrid.template hostToDevice<0>();
867 CUDA_LAUNCH_DIM3(sparse_grid_get_test,1,1,sparseGrid.toKernel(),key,(
float *)mem.
getDevicePointer());
871 BOOST_REQUIRE_EQUAL(*(
float *)mem.
getPointer(),2.0);
875 CUDA_LAUNCH_DIM3(sparse_grid_get_test,1,1,sparseGrid.toKernel(),key2,(
float *)mem.
getDevicePointer());
879 BOOST_REQUIRE_EQUAL(*(
float *)mem.
getPointer(),8.0);
884 float coeff[3][3][3];
887template<
unsigned int dim,
unsigned int p_src,
unsigned int p_dst>
894 static constexpr unsigned int supportRadius = 1;
896 template<
typename SparseGr
idT,
typename DataBlockWrapperT>
897 static inline __device__
void stencil(
898 SparseGridT & sparseGrid,
899 const unsigned int dataBlockId,
903 DataBlockWrapperT & dataBlockLoad,
904 DataBlockWrapperT & dataBlockStore,
905 unsigned char curMask,
908 typedef typename SparseGridT::AggregateBlockType AggregateT;
909 typedef ScalarTypeOf<AggregateT, p_src> ScalarT;
911 constexpr unsigned int enlargedBlockSize =
IntPow<
912 SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
914 __shared__ ScalarT enlargedBlock[enlargedBlockSize];
916 sparseGrid.template loadGhostBlock<p_src>(dataBlockLoad,dataBlockIdPos,enlargedBlock);
920 if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
922 const auto coord = sparseGrid.getCoordInEnlargedBlock(offset);
923 const auto linId = sparseGrid.getLinIdInEnlargedBlock(offset);
925 for (
int i = 0; i < dim; ++i)
927 for (
int j = 0; j < dim; ++j)
929 for (
int k = 0; k < dim; ++k)
937 auto nPlusId = sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, key);
938 tot += enlargedBlock[nPlusId] * cc.coeff[i][j][k];
943 dataBlockStore.template get<p_dst>()[offset] = tot;
947 template <
typename SparseGr
idT,
typename CtxT>
948 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
950 sparseGrid.template flush <smax_<0>> (ctx, flush_type::FLUSH_ON_DEVICE);
954template<
unsigned int dim,
unsigned int p_src,
unsigned int p_dst>
961 static constexpr unsigned int supportRadius = 1;
963 template<
typename SparseGr
idT,
typename DataBlockWrapperT>
964 static inline __device__
void stencil(
965 SparseGridT & sparseGrid,
966 const unsigned int dataBlockId,
970 DataBlockWrapperT & dataBlockLoad,
971 DataBlockWrapperT & dataBlockStore,
972 unsigned char curMask,
975 typedef typename SparseGridT::AggregateBlockType AggregateT;
976 typedef ScalarTypeOf<AggregateT, p_src> ScalarT;
978 if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
981 for (
int i = 0; i < dim; ++i)
983 for (
int j = 0; j < dim; ++j)
985 for (
int k = 0; k < dim; ++k)
994 block_offset<int> pos = sparseGrid.template getNNPoint<stencil_type>(dataBlockIdPos, offset, key);
996 tot += sparseGrid.template get<p_src>(pos) * cc.coeff[i][j][k];
1001 dataBlockStore.template get<p_dst>()[offset] = tot;
1005 template <
typename SparseGr
idT,
typename CtxT>
1006 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
1008 sparseGrid.template flush <smax_<0>> (ctx, flush_type::FLUSH_ON_DEVICE);
1012template<
typename SparseGr
idZ>
1013void test_convolution_3x3x3()
1015 size_t sz[] = {1000,1000,1000};
1017 SparseGridZ sparseGrid(sz);
1019 sparseGrid.template setBackgroundValue<0>(0);
1025 dim3 gridSize(32,32,32);
1028 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1029 CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1030 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1031 sparseGrid.toKernel(), start,64, 56, 1);
1033 sparseGrid.template flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1035 sparseGrid.template findNeighbours<NNFull<3>>();
1037 sparseGrid.template setNNType<NNFull<3>>();
1038 sparseGrid.template tagBoundaries<NNFull<3>>(ctx);
1042 for (
int i = 0 ; i < 3 ; i++)
1044 for (
int j = 0 ; j < 3 ; j++)
1046 for (
int k = 0 ; k < 3 ; k++)
1048 cc.coeff[k][j][i] = 1.0;
1055 sparseGrid.template applyStencils<Conv3x3x3<3,0,1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,cc);
1056 sparseGrid.template deviceToHost<0,1>();
1058 auto & bm = sparseGrid.private_get_blockMap();
1059 auto & dataVector = bm.getDataBuffer();
1063 BOOST_REQUIRE(dataVector.size() != 0);
1065 for (
size_t i = 0 ; i < dataVector.size() ; i++)
1067 for (
size_t j = 0 ; j < 64 ; j++)
1069 if (dataVector.template get<2>(i)[j] == 1)
1071 match &= dataVector.template get<0>(i)[j]*27 == dataVector.template get<1>(i)[j];
1076 BOOST_REQUIRE_EQUAL(match,
true);
1079template<
typename SparseGr
idZ>
1080void test_convolution_3x3x3_no_shared()
1082 size_t sz[] = {1000,1000,1000};
1084 SparseGridZ sparseGrid(sz);
1086 sparseGrid.template setBackgroundValue<0>(0);
1092 dim3 gridSize(32,32,32);
1095 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1096 CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1097 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1098 sparseGrid.toKernel(), start,64, 56, 1);
1100 sparseGrid.template flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1102 sparseGrid.template findNeighbours<NNFull<3>>();
1104 sparseGrid.template setNNType<NNFull<SparseGridZ::dims>>();
1105 sparseGrid.template tagBoundaries<NNFull<3>>(ctx,
No_check(),tag_boundaries::CALCULATE_EXISTING_POINTS);
1109 for (
int i = 0 ; i < 3 ; i++)
1111 for (
int j = 0 ; j < 3 ; j++)
1113 for (
int k = 0 ; k < 3 ; k++)
1115 cc.coeff[k][j][i] = 1.0;
1120 sparseGrid.template applyStencils<Conv3x3x3_noshared<SparseGridZ::dims,0,1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE_NO_SHARED,cc);
1122 sparseGrid.template deviceToHost<0,1>();
1124 auto & bm = sparseGrid.private_get_blockMap();
1125 auto & dataVector = bm.getDataBuffer();
1129 for (
size_t i = 0 ; i < dataVector.size() ; i++)
1131 for (
size_t j = 0 ; j < 64 ; j++)
1133 if (dataVector.template get<2>(i)[j] == 1)
1135 match &= dataVector.template get<0>(i)[j]*27 == dataVector.template get<1>(i)[j];
1140 BOOST_REQUIRE_EQUAL(match,
true);
1143BOOST_AUTO_TEST_CASE(test3x3x3convolution_no_shared)
1145 constexpr unsigned int dim = 3;
1146 constexpr unsigned int blockEdgeSize = 4;
1149 test_convolution_3x3x3_no_shared<SparseGridGpu<dim, AggregateT, blockEdgeSize, 64, long int>>();
1152BOOST_AUTO_TEST_CASE(test3x3x3convolution_no_shared_z_morton)
1154 constexpr unsigned int dim = 3;
1155 constexpr unsigned int blockEdgeSize = 4;
1158 test_convolution_3x3x3_no_shared<SparseGridGpu_z<dim, AggregateT, blockEdgeSize, 64, long int>>();
1161BOOST_AUTO_TEST_CASE(test3x3x3convolution)
1163 constexpr unsigned int dim = 3;
1164 constexpr unsigned int blockEdgeSize = 4;
1167 test_convolution_3x3x3<SparseGridGpu<dim, AggregateT, blockEdgeSize, 64, long int>>();
1170BOOST_AUTO_TEST_CASE(test3x3x3convolution_morton_z)
1172 constexpr unsigned int dim = 3;
1173 constexpr unsigned int blockEdgeSize = 4;
1176 test_convolution_3x3x3<SparseGridGpu_z<dim, AggregateT, blockEdgeSize, 64, long int>>();
1179BOOST_AUTO_TEST_CASE(test_sparse_grid_iterator_sub_host)
1181 constexpr unsigned int dim = 3;
1182 constexpr unsigned int blockEdgeSize = 4;
1185 size_t sz[3] = {768,768,768};
1186 dim3 gridSize(32,32,32);
1191 sparseGrid.template setBackgroundValue<0>(0);
1196 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1197 CUDA_LAUNCH_DIM3((insertSphere3D<0>),
1198 gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
1199 sparseGrid.toKernel(), start1, 32, 0, 1);
1201 sparseGrid.flush <
smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1203 sparseGrid.template deviceToHost<0>();
1212 auto it = sparseGrid.getIterator(start,stop);
1216 auto key = it.
get();
1218 match &= sparseGrid.template get<0>(key) == 1.0;
1220 sparseGrid.template get<0>(key) = 5.0;
1227 BOOST_REQUIRE_EQUAL(match,
true);
1228 BOOST_REQUIRE_EQUAL(count,42875);
1234BOOST_AUTO_TEST_CASE(test_sparse_grid_iterator_host)
1236 constexpr unsigned int dim = 3;
1237 constexpr unsigned int blockEdgeSize = 4;
1240 size_t sz[3] = {512,512,512};
1241 dim3 gridSize(32,32,32);
1246 sparseGrid.template setBackgroundValue<0>(0);
1251 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1252 CUDA_LAUNCH_DIM3((insertSphere3D<0>),
1253 gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
1254 sparseGrid.toKernel(), start1, 64, 32, 1);
1256 sparseGrid.flush <
smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1258 sparseGrid.template deviceToHost<0>();
1264 auto it = sparseGrid.getIterator();
1268 auto key = it.
get();
1270 match &= sparseGrid.template get<0>(key) == 1.0;
1278 BOOST_REQUIRE(sparseGrid.countExistingElements() != 0);
1280 BOOST_REQUIRE_EQUAL(sparseGrid.countExistingElements(),count);
1281 BOOST_REQUIRE_EQUAL(match,
true);
1284BOOST_AUTO_TEST_CASE(test_pack_request)
1286 size_t sz[] = {1000,1000,1000};
1288 constexpr int blockEdgeSize = 4;
1289 constexpr int dim = 3;
1293 SparseGridZ sparseGrid(sz);
1295 sparseGrid.template setBackgroundValue<0>(0);
1301 dim3 gridSize(32,32,32);
1304 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1305 CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1306 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1307 sparseGrid.toKernel(), start,64, 56, 1);
1309 sparseGrid.flush <
smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1310 sparseGrid.template deviceToHost<0>();
1312 size_t cnt = sparseGrid.countExistingElements();
1315 sparseGrid.packRequest<0>(req,ctx);
1318 sparseGrid.private_get_index_array().size()*16 + 8 +
1319 cnt*(
sizeof(float) + 2 + 1);
1321 BOOST_REQUIRE_EQUAL(req,tot);
1324BOOST_AUTO_TEST_CASE(test_MergeIndexMap)
1326 size_t sz[] = {1000,1000,1000};
1328 constexpr int blockEdgeSize = 4;
1329 constexpr int dim = 3;
1333 SparseGridZ sparseGrid(sz);
1335 sparseGrid.template setBackgroundValue<0>(0);
1341 dim3 gridSize(32,32,32);
1344 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1345 CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1346 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1347 sparseGrid.toKernel(), start,64, 56, 1);
1349 size_t sz_b = sparseGrid.private_get_index_array().size();
1351 sparseGrid.flush <
smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1353 auto & m_map = sparseGrid.getMergeIndexMapVector();
1354 auto & a_map = sparseGrid.getMappingVector();
1356 m_map.template deviceToHost<0>();
1357 a_map.template deviceToHost<0>();
1361 auto & indexes = sparseGrid.private_get_index_array();
1362 indexes.template deviceToHost<0>();
1363 auto & a_indexes = sparseGrid.private_get_add_index_array();
1364 a_indexes.template deviceToHost<0>();
1365 auto & m_out = sparseGrid.getSegmentToOutMap();
1366 m_out.template deviceToHost<0>();
1368 for (
int i = 0 ; i < m_map.size() ; i++)
1370 if (m_map.template get<0>(i) >= sz_b)
1372 int c = a_map.template get<0>(m_map.template get<0>(i) - sz_b);
1373 int ci = m_out.template get<0>(i);
1375 match &= (a_indexes.template get<0>(c) == indexes.template get<0>(ci));
1379 BOOST_REQUIRE_EQUAL(match,
true);
1382BOOST_AUTO_TEST_CASE(test_pack_request_with_iterator)
1384 size_t sz[] = {1000,1000,1000};
1386 constexpr int blockEdgeSize = 4;
1387 constexpr int dim = 3;
1391 SparseGridZ sparseGrid(sz);
1393 sparseGrid.template setBackgroundValue<0>(0);
1399 dim3 gridSize(32,32,32);
1402 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1403 CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1404 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1405 sparseGrid.toKernel(), start,64, 56, 1);
1407 sparseGrid.flush <
smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1410 sparseGrid.packReset();
1419 auto it1 = sparseGrid.getIterator(start1,stop1);
1420 sparseGrid.template packRequest<0>(it1,req);
1422 auto it2 = sparseGrid.getIterator(start2,stop2);
1423 sparseGrid.template packRequest<0>(it2,req);
1425 sparseGrid.template packCalculate<0>(req,ctx);
1428 sparseGrid.template deviceToHost<0>();
1431 size_t cnt = sparseGrid.countExistingElements();
1437 align_number(8,4685*8) + align_number(8,4475*8) +
1438 align_number(8,4686*4) + align_number(8,4476*4) +
1439 align_number(8,185807*4) + align_number(8,176787*4) +
1440 align_number(8,185807*2) + align_number(8,176787*2) +
1441 align_number(8,185807*1) + align_number(8,176787*1);
1443 BOOST_REQUIRE_EQUAL(req,tot);
1448 sparseGrid.packReset();
1454 auto it1 = sparseGrid.getIterator(start1,stop1);
1455 sparseGrid.template packRequest<0>(it1,req);
1457 auto it2 = sparseGrid.getIterator(start1,stop1);
1458 sparseGrid.template packRequest<0>(it2,req);
1460 sparseGrid.template packCalculate<0>(req,ctx);
1466 2*align_number(8,sparseGrid.private_get_index_array().size()*8) +
1467 2*align_number(8,(sparseGrid.private_get_index_array().size()+1)*4) +
1468 2*align_number(8,cnt*4) +
1469 2*align_number(8,cnt*2) +
1470 2*align_number(8,cnt*1);
1472 BOOST_REQUIRE_EQUAL(req,tot);
1475BOOST_AUTO_TEST_CASE(sparsegridgpu_remove_test)
1477 size_t sz[] = {1000,1000,1000};
1479 constexpr int blockEdgeSize = 4;
1480 constexpr int dim = 3;
1484 SparseGridZ sparseGrid(sz);
1486 sparseGrid.template setBackgroundValue<0>(0);
1492 dim3 gridSize(32,32,32);
1495 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1496 CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1497 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1498 sparseGrid.toKernel(), start,64, 56, 1);
1500 sparseGrid.flush <
smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1508 sparseGrid.remove(remove_section1);
1509 sparseGrid.remove(remove_section2);
1510 sparseGrid.remove(remove_section3);
1512 sparseGrid.removeAddUnpackFinalize<>(ctx,0);
1514 sparseGrid.deviceToHost<0>();
1518 auto it = sparseGrid.getIterator();
1529 float radius = sqrt((pt.
get(0) - 320)*(pt.
get(0) - 320) +
1530 (pt.
get(1) - 320)*(pt.
get(1) - 320) +
1531 (pt.
get(2) - 320)*(pt.
get(2) - 320));
1534 if (radius < 55.99 || radius > 64.01)
1537 if (remove_section1.isInside(pt) ==
true)
1540 if (remove_section2.isInside(pt) ==
true)
1543 if (remove_section3.isInside(pt) ==
true)
1549 BOOST_REQUIRE_EQUAL(match,
true);
1552template<
typename SG_type>
1553void pack_unpack_test(SG_type & sparseGridDst, SG_type & sparseGridSrc,
1571 sparseGridSrc.packReset();
1574 auto sub_it = sparseGridSrc.getIterator(box1_src.getKP1(),box1_src.getKP2());
1575 sparseGridSrc.template packRequest<0,1>(sub_it,req);
1577 sub_it = sparseGridSrc.getIterator(box2_src.getKP1(),box2_src.getKP2());
1578 sparseGridSrc.template packRequest<0,1>(sub_it,req);
1580 sub_it = sparseGridSrc.getIterator(box3_src.getKP1(),box3_src.getKP2());
1581 sparseGridSrc.template packRequest<0,1>(sub_it,req);
1583 sub_it = sparseGridSrc.getIterator(box4_src.getKP1(),box4_src.getKP2());
1584 sparseGridSrc.template packRequest<0,1>(sub_it,req);
1586 sparseGridSrc.template packCalculate<0,1>(req,ctx);
1599 sub_it = sparseGridSrc.getIterator(box1_src.getKP1(),box1_src.getKP2());
1600 sparseGridSrc.template pack<0,1>(prAlloc_prp,sub_it,sts);
1602 sub_it = sparseGridSrc.getIterator(box2_src.getKP1(),box2_src.getKP2());
1603 sparseGridSrc.template pack<0,1>(prAlloc_prp,sub_it,sts);
1605 sub_it = sparseGridSrc.getIterator(box3_src.getKP1(),box3_src.getKP2());
1606 sparseGridSrc.template pack<0,1>(prAlloc_prp,sub_it,sts);
1608 sub_it = sparseGridSrc.getIterator(box4_src.getKP1(),box4_src.getKP2());
1609 sparseGridSrc.template pack<0,1>(prAlloc_prp,sub_it,sts);
1612 sparseGridSrc.template packFinalize<0,1>(prAlloc_prp,sts);
1616 if (test_pack ==
true)
1619 BOOST_REQUIRE_EQUAL(ncnk,1107);
1620 size_t actual_offset = ncnk*
sizeof(size_t) +
sizeof(
size_t) + 2*3*
sizeof(
int);
1621 mem.
deviceToHost(actual_offset + ncnk*
sizeof(
unsigned int),actual_offset + ncnk*
sizeof(
unsigned int) +
sizeof(
unsigned int));
1622 unsigned int n_pnt = *(
unsigned int *)((
unsigned char *)mem.
getPointer() + actual_offset + ncnk*
sizeof(
unsigned int));
1623 BOOST_REQUIRE_EQUAL(n_pnt,41003);
1625 actual_offset += align_number(
sizeof(
size_t),(ncnk+1)*
sizeof(
unsigned int));
1626 actual_offset += align_number(
sizeof(
size_t),n_pnt*(16));
1627 actual_offset += align_number(
sizeof(
size_t),n_pnt*
sizeof(
short int));
1628 actual_offset += align_number(
sizeof(
size_t),n_pnt*
sizeof(
unsigned char));
1630 ncnk = *(
size_t *)((
unsigned char *)mem.
getPointer() + actual_offset);
1631 BOOST_REQUIRE_EQUAL(ncnk,1420);
1632 actual_offset += ncnk*
sizeof(size_t) +
sizeof(
size_t) + 2*3*
sizeof(
int);
1633 mem.
deviceToHost(actual_offset + ncnk*
sizeof(
unsigned int),actual_offset + ncnk*
sizeof(
unsigned int) +
sizeof(
unsigned int));
1634 n_pnt = *(
unsigned int *)((
unsigned char *)mem.
getPointer() + actual_offset + ncnk*
sizeof(
unsigned int));
1635 BOOST_REQUIRE_EQUAL(n_pnt,54276);
1637 actual_offset += align_number(
sizeof(
size_t),(ncnk+1)*
sizeof(
unsigned int));
1638 actual_offset += align_number(
sizeof(
size_t),n_pnt*(16));
1639 actual_offset += align_number(
sizeof(
size_t),n_pnt*
sizeof(
short int));
1640 actual_offset += align_number(
sizeof(
size_t),n_pnt*
sizeof(
unsigned char));
1642 ncnk = *(
size_t *)((
unsigned char *)mem.
getPointer() + actual_offset);
1643 BOOST_REQUIRE_EQUAL(ncnk,610);
1644 actual_offset += ncnk*
sizeof(size_t) +
sizeof(
size_t) + 2*3*
sizeof(
int);
1645 mem.
deviceToHost(actual_offset + ncnk*
sizeof(
unsigned int),actual_offset + ncnk*
sizeof(
unsigned int) +
sizeof(
unsigned int));
1646 n_pnt = *(
unsigned int *)((
unsigned char *)mem.
getPointer() + actual_offset + ncnk*
sizeof(
unsigned int));
1647 BOOST_REQUIRE_EQUAL(n_pnt,20828);
1649 actual_offset += align_number(
sizeof(
size_t),(ncnk+1)*
sizeof(
unsigned int));
1650 actual_offset += align_number(
sizeof(
size_t),n_pnt*(16));
1651 actual_offset += align_number(
sizeof(
size_t),n_pnt*
sizeof(
short int));
1652 actual_offset += align_number(
sizeof(
size_t),n_pnt*
sizeof(
unsigned char));
1654 ncnk = *(
size_t *)((
unsigned char *)mem.
getPointer() + actual_offset);
1655 BOOST_REQUIRE_EQUAL(ncnk,739);
1656 actual_offset += ncnk*
sizeof(size_t) +
sizeof(
size_t) + 2*3*
sizeof(
int);
1657 mem.
deviceToHost(actual_offset + ncnk*
sizeof(
unsigned int),actual_offset + ncnk*
sizeof(
unsigned int) +
sizeof(
unsigned int));
1658 n_pnt = *(
unsigned int *)((
unsigned char *)mem.
getPointer() + actual_offset + ncnk*
sizeof(
unsigned int));
1659 BOOST_REQUIRE_EQUAL(n_pnt,27283);
1662 prAlloc_prp.
reset();
1666 sparseGridDst.removeAddUnpackReset();
1669 auto sub2 = sparseGridDst.getIterator(box1_dst.
getKP1(),box1_dst.
getKP2());
1670 sparseGridDst.remove(box1_dst);
1671 sparseGridDst.template unpack<0,1>(prAlloc_prp,sub2,ps,ctx);
1673 sub2 = sparseGridDst.getIterator(box2_dst.
getKP1(),box2_dst.
getKP2());
1674 sparseGridDst.remove(box2_dst);
1675 sparseGridDst.template unpack<0,1>(prAlloc_prp,sub2,ps,ctx);
1677 sub2 = sparseGridDst.getIterator(box3_dst.
getKP1(),box3_dst.
getKP2());
1678 sparseGridDst.remove(box3_dst);
1679 sparseGridDst.template unpack<0,1>(prAlloc_prp,sub2,ps,ctx);
1681 sub2 = sparseGridDst.getIterator(box4_dst.
getKP1(),box4_dst.
getKP2());
1682 sparseGridDst.remove(box4_dst);
1683 sparseGridDst.template unpack<0,1>(prAlloc_prp,sub2,ps,ctx);
1685 sparseGridDst.template removeAddUnpackFinalize<0,1>(ctx,0);
1687 sparseGridDst.template deviceToHost<0,1>();
1690BOOST_AUTO_TEST_CASE(sparsegridgpu_pack_unpack)
1692 size_t sz[] = {1000,1000,1000};
1694 constexpr int blockEdgeSize = 4;
1695 constexpr int dim = 3;
1705 SparseGridZ sparseGridSrc(sz);
1706 SparseGridZ sparseGridDst(sz);
1708 sparseGridSrc.template setBackgroundValue<0>(0);
1709 sparseGridDst.template setBackgroundValue<0>(0);
1715 dim3 gridSize(32,32,32);
1718 sparseGridSrc.setGPUInsertBuffer(gridSize,dim3(1));
1719 CUDA_LAUNCH_DIM3((insertSphere3D_radiusV<0>),
1720 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1721 sparseGridSrc.toKernel(), start,64, 56, 1);
1727 pack_unpack_test(sparseGridDst,sparseGridSrc,
1732 sparseGridDst.template deviceToHost<0,1>();
1739 auto it = sparseGridDst.getIterator();
1747 auto pt = p.toPoint();
1753 const long int x = (
long int)pt.
get(0) - (start.get(0) + gridSize.x / 2 * blockEdgeSize);
1754 const long int y = (
long int)pt.
get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1755 const long int z = (
long int)pt.
get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1757 float radius = sqrt((
float) (x*x + y*y + z*z));
1759 bool is_active = radius < 64 && radius > 56;
1761 if (is_active ==
true)
1766 else if (box2_dst.
isInside(pt) ==
true)
1770 const long int x = (
long int)pt.
get(0) - (start.get(0) - 46 + gridSize.x / 2 * blockEdgeSize);
1771 const long int y = (
long int)pt.
get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1772 const long int z = (
long int)pt.
get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1774 float radius = sqrt((
float) (x*x + y*y + z*z));
1776 bool is_active = radius < 64 && radius > 56;
1778 if (is_active ==
true)
1783 else if (box3_dst.
isInside(pt) ==
true)
1787 const long int x = (
long int)pt.
get(0) - (start.get(0) + 44 + gridSize.x / 2 * blockEdgeSize);
1788 const long int y = (
long int)pt.
get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1789 const long int z = (
long int)pt.
get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1791 float radius = sqrt((
float) (x*x + y*y + z*z));
1793 bool is_active = radius < 64 && radius > 56;
1795 if (is_active ==
true)
1800 else if (box4_dst.
isInside(pt) ==
true)
1804 const long int x = (
long int)pt.
get(0) - (start.get(0) + gridSize.x / 2 * blockEdgeSize);
1805 const long int y = (
long int)pt.
get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1806 const long int z = (
long int)pt.
get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1808 float radius = sqrt((
float) (x*x + y*y + z*z));
1810 bool is_active = radius < 64 && radius > 56;
1812 if (is_active ==
true)
1821 BOOST_REQUIRE_EQUAL(match,
true);
1822 BOOST_REQUIRE_EQUAL(cnt1,41003);
1823 BOOST_REQUIRE_EQUAL(cnt2,54276);
1824 BOOST_REQUIRE_EQUAL(cnt3,20828);
1825 BOOST_REQUIRE_EQUAL(cnt4,27283);
1830 sparseGridSrc.setGPUInsertBuffer(gridSize,dim3(1));
1831 CUDA_LAUNCH_DIM3((removeSphere3D_even_radiusV<0>),
1832 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1833 sparseGridSrc.toKernel(), start,64, 56, 1);
1835 pack_unpack_test(sparseGridDst,sparseGridSrc,
1840 sparseGridDst.template deviceToHost<0,1>();
1847 auto it2 = sparseGridDst.getIterator();
1851 while (it2.isNext())
1855 auto pt = p.toPoint();
1861 const long int x = (
long int)pt.
get(0) - (start.get(0) + gridSize.x / 2 * blockEdgeSize);
1862 const long int y = (
long int)pt.
get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1863 const long int z = (
long int)pt.
get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1865 float radius = sqrt((
float) (x*x + y*y + z*z));
1867 bool is_active = radius < 64 && radius > 56;
1869 if (is_active ==
true)
1874 else if (box2_dst.
isInside(pt) ==
true)
1878 const long int x = (
long int)pt.
get(0) - (start.get(0) - 46 + gridSize.x / 2 * blockEdgeSize);
1879 const long int y = (
long int)pt.
get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1880 const long int z = (
long int)pt.
get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1882 float radius = sqrt((
float) (x*x + y*y + z*z));
1884 bool is_active = radius < 64 && radius > 56;
1886 if (is_active ==
true)
1891 else if (box3_dst.
isInside(pt) ==
true)
1895 const long int x = (
long int)pt.
get(0) - (start.get(0) + 44 + gridSize.x / 2 * blockEdgeSize);
1896 const long int y = (
long int)pt.
get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1897 const long int z = (
long int)pt.
get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1899 float radius = sqrt((
float) (x*x + y*y + z*z));
1901 bool is_active = radius < 64 && radius > 56;
1903 if (is_active ==
true)
1908 else if (box4_dst.
isInside(pt) ==
true)
1912 const long int x = (
long int)pt.
get(0) - (start.get(0) + gridSize.x / 2 * blockEdgeSize);
1913 const long int y = (
long int)pt.
get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1914 const long int z = (
long int)pt.
get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1916 float radius = sqrt((
float) (x*x + y*y + z*z));
1918 bool is_active = radius < 64 && radius > 56;
1920 if (is_active ==
true)
1929 BOOST_REQUIRE_EQUAL(match,
true);
1930 BOOST_REQUIRE_EQUAL(cnt1,20520);
1931 BOOST_REQUIRE_EQUAL(cnt2,27152);
1932 BOOST_REQUIRE_EQUAL(cnt3,10423);
1933 BOOST_REQUIRE_EQUAL(cnt4,13649);
1936#if defined(OPENFPM_DATA_ENABLE_IO_MODULE) || defined(PERFORMANCE_TEST)
1938BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput3DHeatStencil)
1940 constexpr unsigned int dim = 3;
1941 constexpr unsigned int blockEdgeSize = 4;
1944 size_t sz[3] = {512,512,512};
1946 dim3 gridSize(32,32,32);
1951 sparseGrid.template setBackgroundValue<0>(0);
1956 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1957 CUDA_LAUNCH_DIM3((insertSphere3D<0>),
1958 gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
1959 sparseGrid.toKernel(), start1, 64, 32, 1);
1961 sparseGrid.flush <
smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1963 sparseGrid.removeUnusedBuffers();
1967 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1968 CUDA_LAUNCH_DIM3((insertSphere3D<0>),
1969 gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
1970 sparseGrid.toKernel(), start2, 64, 44, 1);
1972 sparseGrid.flush <
smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1977 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1978 CUDA_LAUNCH_DIM3((insertSphere3D<0>),
1979 gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
1980 sparseGrid.toKernel(), start3, 20, 15, 1);
1982 sparseGrid.flush <
smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1986 sparseGrid.findNeighbours();
1987 sparseGrid.tagBoundaries(ctx);
1990 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,
1997 const unsigned int maxIter = 100;
1998 for (
unsigned int iter=0; iter<maxIter; ++iter)
2000 for (
int innerIter=0; innerIter<10; ++innerIter)
2009 sparseGrid.deviceToHost<0,1>();
2010 sparseGrid.write(
"SparseGridGPU_output3DHeatStencil.vtk");
2013BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput)
2015 constexpr unsigned int dim = 2;
2016 constexpr unsigned int blockEdgeSize = 8;
2019 size_t sz[2] = {1000000,1000000};
2020 dim3 gridSize(128,128);
2025 sparseGrid.template setBackgroundValue<0>(0);
2030 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
2031 CUDA_LAUNCH_DIM3((insertSphere<0>),gridSize, dim3(blockEdgeSize*blockEdgeSize,1),sparseGrid.toKernel(), start, 512, 256, 1);
2032 sparseGrid.flush <
smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
2034 sparseGrid.findNeighbours();
2035 sparseGrid.tagBoundaries(ctx);
2037 sparseGrid.template deviceToHost<0>();
2039 sparseGrid.write(
"SparseGridGPU_output.vtk");
2042BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput3D)
2044 constexpr unsigned int dim = 3;
2045 constexpr unsigned int blockEdgeSize = 4;
2048 size_t sz[3] = {512,512,512};
2050 dim3 gridSize(32,32,32);
2055 sparseGrid.template setBackgroundValue<0>(0);
2060 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
2061 CUDA_LAUNCH_DIM3((insertSphere3D<0>),
2062 gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
2063 sparseGrid.toKernel(), start, 64, 56, 1);
2064 sparseGrid.flush <
smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
2067 sparseGrid.findNeighbours();
2068 sparseGrid.tagBoundaries(ctx);
2070 sparseGrid.template applyStencils<BoundaryStencilSetX<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE);
2073 sparseGrid.template deviceToHost<0>();
2075 sparseGrid.write(
"SparseGridGPU_output3D.vtk");
2077 bool test = compare(
"SparseGridGPU_output3D.vtk",
"test_data/SparseGridGPU_output3D_test.vtk");
2078 BOOST_REQUIRE_EQUAL(
true,test);
2084BOOST_AUTO_TEST_SUITE_END()
This class represent an N-dimensional box.
__host__ __device__ bool isInside(const Point< dim, T > &p) const
Check if the point is inside the box.
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
virtual void * getDevicePointer()
get a readable pointer with the data
virtual void deviceToHost()
Move memory from device to host.
virtual bool resize(size_t sz)
resize the momory allocated
virtual void * getPointer()
get a readable pointer with the data
virtual bool allocate(size_t sz)
allocate memory
virtual void incRef()
Increment the reference counter.
void reset()
Reset the internal counters.
This class implement the point shape in an N-dimensional space.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
grid_key_dx is the key to access any element in the grid
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
__device__ __host__ index_type get(index_type i) const
Get the i index.
Implementation of 1-D std::vector like structure.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
get the type of the block
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...