OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
SparseGridGpu_tests.cu
1//
2// Created by tommaso on 10/06/19.
3//
4
5#define BOOST_TEST_DYN_LINK
6
7#define DISABLE_MPI_WRITTERS
8
9#include <boost/test/unit_test.hpp>
10#include "SparseGridGpu/SparseGridGpu.hpp"
11#include "SparseGridGpu/tests/utils/SparseGridGpu_testKernels.cuh"
12#include "SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh"
13
14template<unsigned int p1 , unsigned int p2, unsigned int chunksPerBlock=1, typename SparseGridType, typename ScalarT>
15__global__ void insertConstantValue2(SparseGridType sparseGrid, ScalarT value)
16{
17 constexpr unsigned int pMask = SparseGridType::pMask;
18 typedef BlockTypeOf<typename SparseGridType::AggregateType, p1> BlockT;
19
20 sparseGrid.init();
21
22 int x = blockIdx.x * blockDim.x + threadIdx.x;
23 int y = blockIdx.y * blockDim.y + threadIdx.y;
24 int z = blockIdx.z * blockDim.z + threadIdx.z;
26
27 auto pos = sparseGrid.getLinId(coord);
28 unsigned int dataBlockId = pos / BlockT::size;
29 unsigned int offset = pos % BlockT::size;
30
31 auto encap = sparseGrid.insertBlock(dataBlockId);
32 encap.template get<p1>()[offset] = value;
33 encap.template get<p2>()[offset] = value;
34 BlockMapGpu_ker<>::setExist(encap.template get<pMask>()[offset]);
35
36 __syncthreads();
37
38 sparseGrid.flush_block_insert();
39
40 // Compiler avoid warning
41 x++;
42 y++;
43 z++;
44}
45
46template<unsigned int p, typename SparseGridType>
47__global__ void insertValues(SparseGridType sparseGrid)
48{
49 sparseGrid.init();
50
51 const auto bDimX = blockDim.x;
52 const auto bDimY = blockDim.y;
53 const auto bDimZ = blockDim.z;
54 const auto bIdX = blockIdx.x;
55 const auto bIdY = blockIdx.y;
56 const auto bIdZ = blockIdx.z;
57 const auto tIdX = threadIdx.x;
58 const auto tIdY = threadIdx.y;
59 const auto tIdZ = threadIdx.z;
60 int x = bIdX * bDimX + tIdX;
61 int y = bIdY * bDimY + tIdY;
62 int z = bIdZ * bDimZ + tIdZ;
64
65 size_t pos = sparseGrid.getLinId(coord);
66
67 sparseGrid.template insert<p>(coord) = x;
68
69 sparseGrid.flush_block_insert();
70
71 // Compiler avoid warning
72 x++;
73 y++;
74 z++;
75}
76
77
78
79template<unsigned int p, typename SparseGridType, typename VectorOutType>
80__global__ void copyToOutputIfPadding(SparseGridType sparseGrid, VectorOutType output)
81{
82 const auto bDimX = blockDim.x;
83 const auto bDimY = blockDim.y;
84 const auto bDimZ = blockDim.z;
85 const auto bIdX = blockIdx.x;
86 const auto bIdY = blockIdx.y;
87 const auto bIdZ = blockIdx.z;
88 const auto tIdX = threadIdx.x;
89 const auto tIdY = threadIdx.y;
90 const auto tIdZ = threadIdx.z;
91 int x = bIdX * bDimX + tIdX;
92 int y = bIdY * bDimY + tIdY;
93 int z = bIdZ * bDimZ + tIdZ;
95
96 size_t pos = sparseGrid.getLinId(coord);
97
98
99 output.template get<p>(pos) = sparseGrid.isPadding(coord) ? 1 : 0;
100
101 // Compiler avoid warning
102 x++;
103 y++;
104 z++;
105}
106
107template<unsigned int p, typename SparseGridType>
108__global__ void insertBoundaryValuesHeat(SparseGridType sparseGrid)
109{
110 sparseGrid.init();
111
112 const auto bDimX = blockDim.x;
113 const auto bDimY = blockDim.y;
114 const auto bDimZ = blockDim.z;
115 const auto bIdX = blockIdx.x;
116 const auto bIdY = blockIdx.y;
117 const auto bIdZ = blockIdx.z;
118 const auto tIdX = threadIdx.x;
119 const auto tIdY = threadIdx.y;
120 const auto tIdZ = threadIdx.z;
121 int x = bIdX * bDimX + tIdX;
122 int y = bIdY * bDimY + tIdY;
123 int z = bIdZ * bDimZ + tIdZ;
125
126 float value = 0;
127 if (x == 0)
128 {
129 value = 0;
130 }
131 else if (x == bDimX * gridDim.x - 1)
132 {
133 value = 10;
134 }
135
136 if (y == 0 || y == bDimY * gridDim.y - 1)
137 {
138 value = 10.0 * x / (bDimX * gridDim.x - 1);
139 }
140
141 sparseGrid.template insert<p>(coord) = value;
142
143 __syncthreads();
144
145 sparseGrid.flush_block_insert();
146
147 // Compiler avoid warning
148 x++;
149 y++;
150 z++;
151}
152
153template<unsigned int dim, unsigned int p>
155{
156 // This is an example of a laplacian stencil to apply using the apply stencil facility of SparseGridGpu
157
158 static constexpr unsigned int supportRadius = 1;
159
160 template<typename SparseGridT, typename DataBlockWrapperT>
161 static inline __device__ void stencil(
162 SparseGridT & sparseGrid,
163 grid_key_dx<dim, int> & dataBlockCoord,
164 unsigned int offset,
165 grid_key_dx<dim, int> & pointCoord,
166 DataBlockWrapperT & dataBlockLoad,
167 DataBlockWrapperT & dataBlockStore)
168 {
169 typedef typename SparseGridT::AggregateBlockType AggregateT;
170 typedef ScalarTypeOf<AggregateT, p> ScalarT;
171
172 constexpr unsigned int enlargedBlockSize = IntPow<
173 SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
174
175 __shared__ ScalarT enlargedBlock[enlargedBlockSize];
176 sparseGrid.loadBlock<p>(dataBlockLoad, enlargedBlock);
177 sparseGrid.loadGhost<p>(dataBlockCoord, enlargedBlock);
178 __syncthreads();
179
180 const auto coord = sparseGrid.getCoordInEnlargedBlock(offset);
181 const auto linId = sparseGrid.getLinIdInEnlargedBlock(offset);
182 ScalarT cur = enlargedBlock[linId];
183 ScalarT res = -2.0*dim*cur; // The central part of the stencil
184 for (int d=0; d<dim; ++d)
185 {
186 auto nPlusId = sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, d, 1);
187 auto nMinusId = sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, d, -1);
188 ScalarT neighbourPlus = enlargedBlock[nPlusId];
189 ScalarT neighbourMinus = enlargedBlock[nMinusId];
190 res += neighbourMinus + neighbourPlus;
191 }
192 enlargedBlock[linId] = res;
193
194 __syncthreads();
195 sparseGrid.storeBlock<p>(dataBlockStore, enlargedBlock);
196 }
197
198 template <typename SparseGridT, typename CtxT>
199 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
200 {
201 sparseGrid.template flush <smin_<0>> (ctx, flush_type::FLUSH_ON_DEVICE);
202 }
203};
204
205//todo: Here write some sort of stencil kernel to test the client interface for stencil application
206
207BOOST_AUTO_TEST_SUITE(SparseGridGpu_tests)
208
209BOOST_AUTO_TEST_CASE(testInsert)
210{
211 constexpr unsigned int dim = 2;
212 constexpr unsigned int blockEdgeSize = 8;
213 constexpr unsigned int dataBlockSize = blockEdgeSize * blockEdgeSize;
214 typedef aggregate<DataBlock<float, dataBlockSize>> AggregateSGT;
215 typedef aggregate<float> AggregateOutT;
216
217 dim3 gridSize(2, 2);
218 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
219
220 grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
221 SparseGridGpu<dim, AggregateOutT, blockEdgeSize> sparseGrid(blockGeometry);
222
223 sparseGrid.template setBackgroundValue<0>(666);
224 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
225
226 CUDA_LAUNCH_DIM3((insertValues<0>),gridSize, blockSizeInsert,sparseGrid.toKernel());
227
229 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
230
231 sparseGrid.template deviceToHost<0>();
232
233 // Compare
234 bool match = true;
235 for (size_t i = 0; i < 4*64 ; i++)
236 {
237 auto coord = sparseGrid.getCoord(i);
238 auto expectedValue = coord.get(0);
239
240 match &= sparseGrid.template get<0>(coord) == expectedValue;
241 }
242
243 BOOST_REQUIRE_EQUAL(match, true);
244}
245
246BOOST_AUTO_TEST_CASE(testInsert3D)
247{
248 constexpr unsigned int dim = 3;
249 constexpr unsigned int blockEdgeSize = 4;
250 constexpr unsigned int dataBlockSize = blockEdgeSize * blockEdgeSize;
251 typedef aggregate<DataBlock<float, dataBlockSize>> AggregateSGT;
252 typedef aggregate<float> AggregateOutT;
253
254 dim3 gridSize(2, 2, 2);
255 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize, blockEdgeSize);
256
257 grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
258 SparseGridGpu<dim, AggregateOutT, blockEdgeSize> sparseGrid(blockGeometry);
259
260 sparseGrid.template setBackgroundValue<0>(666);
261
262 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
263
264 CUDA_LAUNCH_DIM3((insertValues<0>),gridSize, blockSizeInsert,sparseGrid.toKernel());
265
267 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
268
269 sparseGrid.template deviceToHost<0>();
270
271 // Compare
272 bool match = true;
273 for (size_t i = 0; i < 64*4 ; i++)
274 {
275 auto coord = sparseGrid.getCoord(i);
276 auto expectedValue = coord.get(0);
277
278 match &= sparseGrid.template get<0>(coord) == expectedValue;
279 }
280
281 BOOST_REQUIRE_EQUAL(match, true);
282}
283
284BOOST_AUTO_TEST_CASE(testTagBoundaries)
285{
286
287 constexpr unsigned int dim = 2;
288 constexpr unsigned int blockEdgeSize = 8;
289 typedef aggregate<float> AggregateT;
290
291 dim3 gridSize(2, 2);
292 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
293
294 grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
296
297 sparseGrid.template setBackgroundValue<0>(666);
299
300 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
301 dim3 pt1(0, 0, 0);
302 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt1, 1);
303 dim3 pt2(6, 6, 0);
304 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt2, 1);
305 dim3 pt3(7, 6, 0);
306 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt3, 1);
307 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
308
309 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
310 dim3 pt4(8, 6, 0);
311 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt4, 1);
312 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
314 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
315 for (int y = 9; y <= 11; y++)
316 {
317 dim3 pt1(6, y, 0);
318 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt1, 1);
319 dim3 pt2(7, y, 0);
320 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt2, 1);
321 }
322 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
323
324 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
325 for (int y = 9; y <= 11; y++)
326 {
327 dim3 pt1(8, y, 0);
328 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt1, 1);
329 dim3 pt2(9, y, 0);
330 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt2, 1);
331 }
332 sparseGrid.template flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
333
334// sparseGrid.hostToDevice(); //just sync masks
335 sparseGrid.deviceToHost(); //just sync masks
336 sparseGrid.deviceToHost<0>();
337
338 sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
339 // Now tag the boundaries
340 sparseGrid.tagBoundaries(ctx);
341
342 // Get output
344 output.resize(4 * 64);
345
346 CUDA_LAUNCH_DIM3((copyToOutputIfPadding<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), output.toKernel());
347
348 output.template deviceToHost<0>();
349 sparseGrid.template deviceToHost<0>();
350
351 // Compare
352 bool match = true;
353 for (size_t i = 0; i < output.size(); i++)
354 {
355 auto coord = sparseGrid.getCoord(i);
356 auto expectedValue =
357 ( i == 0
358 || i == 54
359 || i == 55
360 || i == 112
361 || i == 142 || i == 143 || i == 200 || i == 201 // (6,9), (7,9), (8,9), (9,9)
362 || i == 150 || i == 209 // (6,10), (9,10)
363 || i == 158 || i == 159 || i == 216 || i == 217 // (6,11), (7,11), (8,11), (9,11)
364 ) ? 1 : 0;
365
366 match &= output.template get<0>(i) == expectedValue;
367 }
368
369 BOOST_REQUIRE_EQUAL(match, true);
370}
371
372BOOST_AUTO_TEST_CASE(testTagBoundaries2)
373{
374 constexpr unsigned int dim = 2;
375 constexpr unsigned int blockEdgeSize = 8;
376 typedef aggregate<float> AggregateT;
377
378 dim3 gridSize(2, 2);
379 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
380
381 grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
383
384 sparseGrid.template setBackgroundValue<0>(666);
386
388 {
389 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
390 dim3 ptd1(6, 6, 0);
391 CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), ptd1, 1);
392 dim3 ptd2(6, 7, 0);
393 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd2, 1);
394 dim3 ptd3(7, 6, 0);
395 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd3, 1);
396 dim3 ptd4(7, 7, 0);
397 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd4, 1);
398 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
399 }
400 {
401 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
402 dim3 ptd1(8, 6, 0);
403 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd1, 1);
404 dim3 ptd2(9, 6, 0);
405 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd2, 1);
406 dim3 ptd3(8, 7, 0);
407 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd3, 1);
408 dim3 ptd4(9, 7, 0);
409 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd4, 1);
410 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
411 }
412 {
413 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
414 dim3 ptd1(6, 8, 0);
415 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd1, 1);
416 dim3 ptd2(7, 8, 0);
417 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd2, 1);
418 dim3 ptd3(6, 9, 0);
419 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd3, 1);
420 dim3 ptd4(7, 9, 0);
421 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd4, 1);
422 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
423 }
424 {
425 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
426 dim3 ptd1(8, 8, 0);
427 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd1, 1);
428 dim3 ptd2(8, 9, 0);
429 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd2, 1);
430 dim3 ptd3(9, 8, 0);
431 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd3, 1);
432 dim3 ptd4(9, 9, 0);
433 CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd4, 1);
434 sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
435 }
437
438 sparseGrid.deviceToHost(); //just sync masks
439
440 sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
441 // Now tag the boundaries
442 sparseGrid.tagBoundaries(ctx);
443
444 // Get output
446 output.resize(4 * 64);
447
448 CUDA_LAUNCH_DIM3((copyToOutputIfPadding<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), output.toKernel());
449
450 output.template deviceToHost<0>();
451 sparseGrid.template deviceToHost<0>();
452
453 // Compare
454 bool match = true;
455 for (size_t i = 0; i < output.size(); i++)
456 {
457 auto coord = sparseGrid.getCoord(i);
458 auto expectedValue =
459 (
460 i == 54 || i == 55 || i == 62 // (6,6), (7,6), (6,7)
461 || i == 134 || i == 142 || i == 143 // (6,8), (6,9), (7,9)
462 || i == 112 || i == 113 || i == 121 // (8,6), (9,6), (9,7)
463 || i == 200 || i == 193 || i == 201 // (8,9), (9,8), (9,9)
464 ) ? 1 : 0;
465
466 match &= output.template get<0>(i) == expectedValue;
467 }
468
469 BOOST_REQUIRE_EQUAL(match, true);
470}
471
472BOOST_AUTO_TEST_CASE(testStencilHeat)
473{
474 constexpr unsigned int dim = 2;
475 constexpr unsigned int blockEdgeSize = 8;
476 typedef aggregate<float,float> AggregateT;
477
478 dim3 gridSize(2, 2);
479 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
480
481 grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
484 sparseGrid.template setBackgroundValue<0>(0);
485
486 // Insert values on the grid
487 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
488 CUDA_LAUNCH_DIM3((insertConstantValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), 0);
489 sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
490
491 sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
492 sparseGrid.tagBoundaries(ctx);
493
494 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
495
496 // Now apply the laplacian operator
497 const unsigned int maxIter = 1000;
498// const unsigned int maxIter = 100;
499 for (unsigned int iter=0; iter<maxIter; ++iter)
500 {
501 sparseGrid.applyStencils<HeatStencil<dim, 0, 1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
502 sparseGrid.applyStencils<HeatStencil<dim, 1, 0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.01);
503 }
504
505 sparseGrid.template deviceToHost<0,1>();
506
507 // Compare
508 bool match = true;
509 for (size_t i = 0; i < 64*4 ; i++)
510 {
511 auto coord = sparseGrid.getCoord(i);
512 float expectedValue = 10.0 * coord.get(0) / (gridSize.x * blockEdgeSize - 1);
513
514 match &= fabs(sparseGrid.template get<1>(coord) - expectedValue) < 1e-2;
515
516 }
517
518 BOOST_REQUIRE_EQUAL(match, true);
519}
520
521BOOST_AUTO_TEST_CASE(testStencil_lap_simplified)
522{
523 constexpr unsigned int dim = 2;
524 constexpr unsigned int blockEdgeSize = 8;
525 typedef aggregate<float,float> AggregateT;
526
527 dim3 gridSize(2, 2);
528 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
529
530 grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
533 sparseGrid.template setBackgroundValue<0>(0);
534
535 // Insert values on the grid
536 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
537 CUDA_LAUNCH_DIM3((insertConstantValue<0>),gridSize, blockSizeInsert, sparseGrid.toKernel(), 0);
538 sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
539
540 sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
541 sparseGrid.tagBoundaries(ctx);
542
543 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
544
545 // Now apply the laplacian operator
546 const unsigned int maxIter = 1000;
547// const unsigned int maxIter = 100;
548 for (unsigned int iter=0; iter<maxIter; ++iter)
549 {
550 sparseGrid.conv_cross<0, 1, 1>({0,0},{16,16},[] __device__ (float & u, cross_stencil<2,float> & cs){
551 return u + (cs.xm[0] + cs.xp[0] +
552 cs.xm[1] + cs.xp[1] - 4.0*u)*0.1;
553 });
554 sparseGrid.conv_cross<1, 0, 1>({0,0},{16,16},[] __device__ (float & u, cross_stencil<2,float> & cs){
555 return u + (cs.xm[0] + cs.xp[0] +
556 cs.xm[1] + cs.xp[1] - 4.0*u)*0.1;
557 });
558 }
559
560 sparseGrid.deviceToHost<0,1>();
561
562 // Get output
563 sparseGrid.template deviceToHost<0>();
564
565 // Compare
566 bool match = true;
567 for (size_t i = 0; i < 64*4; i++)
568 {
569 auto coord = sparseGrid.getCoord(i);
570 float expectedValue = 10.0 * coord.get(0) / (gridSize.x * blockEdgeSize - 1);
571
572 match &= fabs(sparseGrid.template get<0>(coord) - expectedValue) < 1e-2;
573 }
574
575 BOOST_REQUIRE_EQUAL(match, true);
576}
577
578BOOST_AUTO_TEST_CASE(testStencil_lap_no_cross_simplified)
579{
580 constexpr unsigned int dim = 2;
581 constexpr unsigned int blockEdgeSize = 8;
582 typedef aggregate<float,float> AggregateT;
583
584 dim3 gridSize(2, 2);
585 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
586
587 grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
590 sparseGrid.template setBackgroundValue<0>(0);
591
592 // Insert values on the grid
593 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
594 CUDA_LAUNCH_DIM3((insertConstantValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), 0);
595 CUDA_LAUNCH_DIM3((insertConstantValue<1>), gridSize, blockSizeInsert, sparseGrid.toKernel(), 0);
596 sparseGrid.flush < smax_< 0 >, smax_< 1 >> (ctx, flush_type::FLUSH_ON_DEVICE);
597
598 sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
599 sparseGrid.tagBoundaries(ctx);
600
601 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
602 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,1,1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
603
604 typedef typename GetCpBlockType<decltype(sparseGrid),0,1>::type CpBlockType;
605
606 /*
607 *
608 * The initial condition is
609 *
610 * 0 Linear increase x 10
611 * * *
612 *
613 * 0 10
614 *
615 *
616 * * Linear increase x *
617 * 0 10
618 */
619
620 // Now apply the laplacian operator
621 const unsigned int maxIter = 1000;
622
623 for (unsigned int iter=0; iter<maxIter; ++iter)
624 {
625 sparseGrid.conv<0, 1, 1>({0,0},{16,16},[] __device__ (CpBlockType & u,int i, int j){
626 float c = u(i,j);
627 return c + (u(i-1,j) + u(i+1,j) +
628 u(i,j-1) + u(i,j+1) - 4.0*c)*0.1;
629 });
630
631 sparseGrid.conv<1, 0, 1>({0,0},{16,16},[] __device__ (CpBlockType & u,int i, int j){
632 float c = u(i,j);
633 return c + (u(i-1,j) + u(i+1,j) +
634 u(i,j-1) + u(i,j+1) - 4.0*c)*0.1;
635 });
636 }
637
638 sparseGrid.template deviceToHost<0>();
639
640 // Compare
641 bool match = true;
642 for (size_t i = 0; i < 64*4; i++)
643 {
644 auto coord = sparseGrid.getCoord(i);
645 float expectedValue = 10.0 * coord.get(0) / (gridSize.x * blockEdgeSize - 1);
646
647 match &= fabs(sparseGrid.template get<0>(coord) - expectedValue) < 1e-2;
648
649 }
650
651 BOOST_REQUIRE_EQUAL(match, true);
652}
653
654BOOST_AUTO_TEST_CASE(testStencil_lap_no_cross_simplified2)
655{
656 constexpr unsigned int dim = 2;
657 constexpr unsigned int blockEdgeSize = 8;
658 typedef aggregate<float,float,float,float> AggregateT;
659
660 dim3 gridSize(2, 2);
661 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
662
663 grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
666 sparseGrid.template setBackgroundValue<0>(0);
667
668 // Insert values on the grid
669 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
670 CUDA_LAUNCH_DIM3((insertConstantValue<0>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
671 CUDA_LAUNCH_DIM3((insertConstantValue<1>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
672 CUDA_LAUNCH_DIM3((insertConstantValue<2>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
673 CUDA_LAUNCH_DIM3((insertConstantValue<3>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
674 sparseGrid.flush < smax_< 0 >, smax_< 1 >, smax_< 2 >, smax_< 3 >> (ctx, flush_type::FLUSH_ON_DEVICE);
675
676 sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
677 sparseGrid.tagBoundaries(ctx);
678
679 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
680 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,1,1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 5.0);
681 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,2,2>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
682 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,3,3>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 5.0);
683
684 typedef typename GetCpBlockType<decltype(sparseGrid),0,1>::type CpBlockType;
685
686 /*
687 *
688 * The initial condition is
689 *
690 * 0 Linear increase x 10
691 * * *
692 *
693 * 0 10
694 *
695 *
696 * * Linear increase x *
697 * 0 10
698 */
699
700 // Now apply the laplacian operator
701 const unsigned int maxIter = 1000;
702
703 for (unsigned int iter=0; iter<maxIter; ++iter)
704 {
705 sparseGrid.conv2<0,1,2,3,1>({0,0},{16,16},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j){
706 float cu = u(i,j);
707 float cv = v(i,j);
708 u_out = cu + (u(i-1,j) + u(i+1,j) +
709 u(i,j-1) + u(i,j+1) - 4.0*cu)*0.1;
710
711 v_out = cv + (v(i-1,j) + v(i+1,j) +
712 v(i,j-1) + v(i,j+1) - 4.0*cv)*0.1;
713 });
714
715 sparseGrid.conv2<2,3,0,1,1>({0,0},{16,16},[] __device__ (float & u_out, float & v_out ,CpBlockType & u, CpBlockType & v, int i, int j){
716 float cu = u(i,j);
717 float cv = v(i,j);
718 u_out = cu + (u(i-1,j) + u(i+1,j) +
719 u(i,j-1) + u(i,j+1) - 4.0*cu)*0.1;
720
721 v_out = cv + (v(i-1,j) + v(i+1,j) +
722 v(i,j-1) + v(i,j+1) - 4.0*cv)*0.1;
723 });
724 }
725
726 sparseGrid.template deviceToHost<0,1>();
727
728 // Compare
729 bool match = true;
730 for (size_t i = 0; i < 64*4; i++)
731 {
732 auto coord = sparseGrid.getCoord(i);
733 float expectedValue = 10.0 * coord.get(0) / (gridSize.x * blockEdgeSize - 1);
734 float expectedValue2 = 5.0 * coord.get(0) / (gridSize.x * blockEdgeSize - 1);
735
736 match &= fabs(sparseGrid.template get<0>(coord) - expectedValue) < 1e-2;
737 match &= fabs(sparseGrid.template get<1>(coord) - expectedValue2) < 1e-2;
738 }
739
740 BOOST_REQUIRE_EQUAL(match, true);
741}
742
743BOOST_AUTO_TEST_CASE(testStencil_lap_no_cross_simplified_subset)
744{
745 constexpr unsigned int dim = 2;
746 constexpr unsigned int blockEdgeSize = 8;
747 typedef aggregate<float,float> AggregateT;
748
749 dim3 gridSize(2, 2);
750 dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
751
752 grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
755 sparseGrid.template setBackgroundValue<0>(0);
756
757 // Insert values on the grid
758 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
759 CUDA_LAUNCH_DIM3((insertConstantValue<0>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
760 CUDA_LAUNCH_DIM3((insertConstantValue<1>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
761 sparseGrid.flush < smax_< 0 >, smax_< 1 >> (ctx, flush_type::FLUSH_ON_DEVICE);
762
763 sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
764 sparseGrid.tagBoundaries(ctx);
765
766 typedef typename GetCpBlockType<decltype(sparseGrid),0,1>::type CpBlockType;
767
768 sparseGrid.conv<0, 1, 1>({3,3},{11,11},[] __device__ (CpBlockType & u,int i, int j){
769 return 5.0;
770 });
771
772
773 sparseGrid.template deviceToHost<1>();
774
775 // Compare
776 bool match = true;
777 for (size_t i = 0; i < 64*4; i++)
778 {
779 auto coord = sparseGrid.getCoord(i);
780
781 if (coord.get(0) >= 3 && coord.get(1) >= 3 && coord.get(0) <= 11 && coord.get(1) <= 11)
782 {
783 match &= sparseGrid.template get<1>(coord) == 5.0;
784 }
785 else
786 {
787 match &= sparseGrid.template get<1>(coord) == 0.0;
788 }
789 }
790
791 BOOST_REQUIRE_EQUAL(match, true);
792}
793
794
795
796template<typename sparsegrid_type>
797__global__ void sparse_grid_get_test(sparsegrid_type sparseGrid, grid_key_dx<3> key, float * data)
798{
799 *data = sparseGrid.template get<0>(key);
800}
801
802BOOST_AUTO_TEST_CASE(testFlushInsert)
803{
804 constexpr unsigned int dim = 3;
805 constexpr unsigned int blockEdgeSize = 4;
806 typedef aggregate<float,float> AggregateT;
807
808
809 size_t sz[] = {137,100,57};
810
813 sparseGrid.template setBackgroundValue<0>(0);
814
815 sparseGrid.insertFlush<0>(grid_key_dx<3>({3,6,7})) = 2.0;
816 sparseGrid.insertFlush<0>(grid_key_dx<3>({13,16,17})) = 3.0;
817 sparseGrid.insertFlush<0>(grid_key_dx<3>({13,46,27})) = 4.0;
818 sparseGrid.insertFlush<0>(grid_key_dx<3>({36,63,11})) = 5.0;
819 sparseGrid.insertFlush<0>(grid_key_dx<3>({37,96,47})) = 6.0;
820 sparseGrid.insertFlush<0>(grid_key_dx<3>({130,56,37})) = 7.0;
821 sparseGrid.insertFlush<0>(grid_key_dx<3>({131,76,17})) = 8.0;
822 sparseGrid.insertFlush<0>(grid_key_dx<3>({36,86,27})) = 9.0;
823 sparseGrid.insertFlush<0>(grid_key_dx<3>({34,36,7})) = 10.0;
824
826
827 sparseGrid.insertFlush<0>(grid_key_dx<3>({4,6,7})) = 2.0;
828 sparseGrid.insertFlush<0>(grid_key_dx<3>({12,16,17})) = 3.0;
829 sparseGrid.insertFlush<0>(grid_key_dx<3>({12,46,27})) = 4.0;
830 sparseGrid.insertFlush<0>(grid_key_dx<3>({35,63,11})) = 5.0;
831 sparseGrid.insertFlush<0>(grid_key_dx<3>({38,96,47})) = 6.0;
832 sparseGrid.insertFlush<0>(grid_key_dx<3>({131,56,37})) = 7.0;
833 sparseGrid.insertFlush<0>(grid_key_dx<3>({132,76,17})) = 8.0;
834 sparseGrid.insertFlush<0>(grid_key_dx<3>({37,86,27})) = 9.0;
835 sparseGrid.insertFlush<0>(grid_key_dx<3>({35,36,7})) = 10.0;
836
837 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({3,6,7})),2.0);
838 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({13,16,17})),3.0);
839 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({13,46,27})),4.0);
840 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({36,63,11})),5.0);
841 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({37,96,47})),6.0);
842 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({130,56,37})),7.0);
843 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({131,76,17})),8.0);
844 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({36,86,27})),9.0);
845 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({34,36,7})),10.0);
846
847 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({4,6,7})),2.0);
848 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({12,16,17})),3.0);
849 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({12,46,27})),4.0);
850 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({35,63,11})),5.0);
851 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({38,96,47})),6.0);
852 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({131,56,37})),7.0);
853 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({132,76,17})),8.0);
854 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({37,86,27})),9.0);
855 BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({35,36,7})),10.0);
856
857 sparseGrid.template hostToDevice<0>();
858
859 // Check on device I can get information
860
861 CudaMemory mem;
862
863 mem.allocate(sizeof(float));
864
865 grid_key_dx<3> key({3,6,7});
866
867 CUDA_LAUNCH_DIM3(sparse_grid_get_test,1,1,sparseGrid.toKernel(),key,(float *)mem.getDevicePointer());
868
869 mem.deviceToHost();
870
871 BOOST_REQUIRE_EQUAL(*(float *)mem.getPointer(),2.0);
872
873 grid_key_dx<3> key2({131,76,17});
874
875 CUDA_LAUNCH_DIM3(sparse_grid_get_test,1,1,sparseGrid.toKernel(),key2,(float *)mem.getDevicePointer());
876
877 mem.deviceToHost();
878
879 BOOST_REQUIRE_EQUAL(*(float *)mem.getPointer(),8.0);
880}
881
883{
884 float coeff[3][3][3];
885};
886
887template<unsigned int dim, unsigned int p_src, unsigned int p_dst>
889{
890 // This is an example of a laplacian smoothing stencil to apply using the apply stencil facility of SparseGridGpu
891
893
894 static constexpr unsigned int supportRadius = 1;
895
896 template<typename SparseGridT, typename DataBlockWrapperT>
897 static inline __device__ void stencil(
898 SparseGridT & sparseGrid,
899 const unsigned int dataBlockId,
901 unsigned int offset,
902 grid_key_dx<dim, int> & pointCoord,
903 DataBlockWrapperT & dataBlockLoad,
904 DataBlockWrapperT & dataBlockStore,
905 unsigned char curMask,
906 conv_coeff & cc)
907 {
908 typedef typename SparseGridT::AggregateBlockType AggregateT;
909 typedef ScalarTypeOf<AggregateT, p_src> ScalarT;
910
911 constexpr unsigned int enlargedBlockSize = IntPow<
912 SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
913
914 __shared__ ScalarT enlargedBlock[enlargedBlockSize];
915
916 sparseGrid.template loadGhostBlock<p_src>(dataBlockLoad,dataBlockIdPos,enlargedBlock);
917
918 __syncthreads();
919
920 if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
921 {
922 const auto coord = sparseGrid.getCoordInEnlargedBlock(offset);
923 const auto linId = sparseGrid.getLinIdInEnlargedBlock(offset);
924 ScalarT tot = 0.0;
925 for (int i = 0; i < dim; ++i)
926 {
927 for (int j = 0; j < dim; ++j)
928 {
929 for (int k = 0; k < dim; ++k)
930 {
932
933 key.set_d(0,i-1);
934 key.set_d(1,j-1);
935 key.set_d(2,k-1);
936
937 auto nPlusId = sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, key);
938 tot += enlargedBlock[nPlusId] * cc.coeff[i][j][k];
939 }
940 }
941 }
942
943 dataBlockStore.template get<p_dst>()[offset] = tot;
944 }
945 }
946
947 template <typename SparseGridT, typename CtxT>
948 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
949 {
950 sparseGrid.template flush <smax_<0>> (ctx, flush_type::FLUSH_ON_DEVICE);
951 }
952};
953
954template<unsigned int dim, unsigned int p_src, unsigned int p_dst>
956{
957 // This is an example of a laplacian smoothing stencil to apply using the apply stencil facility of SparseGridGpu
958
960
961 static constexpr unsigned int supportRadius = 1;
962
963 template<typename SparseGridT, typename DataBlockWrapperT>
964 static inline __device__ void stencil(
965 SparseGridT & sparseGrid,
966 const unsigned int dataBlockId,
968 unsigned int offset,
969 grid_key_dx<dim, int> & pointCoord,
970 DataBlockWrapperT & dataBlockLoad,
971 DataBlockWrapperT & dataBlockStore,
972 unsigned char curMask,
973 conv_coeff & cc)
974 {
975 typedef typename SparseGridT::AggregateBlockType AggregateT;
976 typedef ScalarTypeOf<AggregateT, p_src> ScalarT;
977
978 if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
979 {
980 ScalarT tot = 0.0;
981 for (int i = 0; i < dim; ++i)
982 {
983 for (int j = 0; j < dim; ++j)
984 {
985 for (int k = 0; k < dim; ++k)
986 {
987
989
990 key.set_d(0,k-1);
991 key.set_d(1,j-1);
992 key.set_d(2,i-1);
993
994 block_offset<int> pos = sparseGrid.template getNNPoint<stencil_type>(dataBlockIdPos, offset, key);
995
996 tot += sparseGrid.template get<p_src>(pos) * cc.coeff[i][j][k];
997 }
998 }
999 }
1000
1001 dataBlockStore.template get<p_dst>()[offset] = tot;
1002 }
1003 }
1004
1005 template <typename SparseGridT, typename CtxT>
1006 static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
1007 {
1008 sparseGrid.template flush <smax_<0>> (ctx, flush_type::FLUSH_ON_DEVICE);
1009 }
1010};
1011
1012template<typename SparseGridZ>
1013void test_convolution_3x3x3()
1014{
1015 size_t sz[] = {1000,1000,1000};
1016
1017 SparseGridZ sparseGrid(sz);
1019 sparseGrid.template setBackgroundValue<0>(0);
1020
1021 // now create 3 3D sphere
1022
1023 grid_key_dx<3,int> start({256,256,256});
1024
1025 dim3 gridSize(32,32,32);
1026
1027 // Insert values on the grid
1028 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1029 CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1030 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1031 sparseGrid.toKernel(), start,64, 56, 1);
1032
1033 sparseGrid.template flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1034
1035 sparseGrid.template findNeighbours<NNFull<3>>(); // Pre-compute the neighbours pos for each block!
1036
1037 sparseGrid.template setNNType<NNFull<3>>();
1038 sparseGrid.template tagBoundaries<NNFull<3>>(ctx);
1039
1040 conv_coeff cc;
1041
1042 for (int i = 0 ; i < 3 ; i++)
1043 {
1044 for (int j = 0 ; j < 3 ; j++)
1045 {
1046 for (int k = 0 ; k < 3 ; k++)
1047 {
1048 cc.coeff[k][j][i] = 1.0;
1049 }
1050 }
1051 }
1052
1053
1054
1055 sparseGrid.template applyStencils<Conv3x3x3<3,0,1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,cc);
1056 sparseGrid.template deviceToHost<0,1>();
1057
1058 auto & bm = sparseGrid.private_get_blockMap();
1059 auto & dataVector = bm.getDataBuffer();
1060
1061 bool match = true;
1062
1063 BOOST_REQUIRE(dataVector.size() != 0);
1064
1065 for (size_t i = 0 ; i < dataVector.size() ; i++)
1066 {
1067 for (size_t j = 0 ; j < 64 ; j++)
1068 {
1069 if (dataVector.template get<2>(i)[j] == 1)
1070 {
1071 match &= dataVector.template get<0>(i)[j]*27 == dataVector.template get<1>(i)[j];
1072 }
1073 }
1074 }
1075
1076 BOOST_REQUIRE_EQUAL(match,true);
1077}
1078
1079template<typename SparseGridZ>
1080void test_convolution_3x3x3_no_shared()
1081{
1082 size_t sz[] = {1000,1000,1000};
1083
1084 SparseGridZ sparseGrid(sz);
1086 sparseGrid.template setBackgroundValue<0>(0);
1087
1088 // now create 3 3D sphere
1089
1090 grid_key_dx<3,int> start({256,256,256});
1091
1092 dim3 gridSize(32,32,32);
1093
1094 // Insert values on the grid
1095 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1096 CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1097 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1098 sparseGrid.toKernel(), start,64, 56, 1);
1099
1100 sparseGrid.template flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1101
1102 sparseGrid.template findNeighbours<NNFull<3>>(); // Pre-compute the neighbours pos for each block!
1103
1104 sparseGrid.template setNNType<NNFull<SparseGridZ::dims>>();
1105 sparseGrid.template tagBoundaries<NNFull<3>>(ctx,No_check(),tag_boundaries::CALCULATE_EXISTING_POINTS);
1106
1107 conv_coeff cc;
1108
1109 for (int i = 0 ; i < 3 ; i++)
1110 {
1111 for (int j = 0 ; j < 3 ; j++)
1112 {
1113 for (int k = 0 ; k < 3 ; k++)
1114 {
1115 cc.coeff[k][j][i] = 1.0;
1116 }
1117 }
1118 }
1119
1120 sparseGrid.template applyStencils<Conv3x3x3_noshared<SparseGridZ::dims,0,1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE_NO_SHARED,cc);
1121
1122 sparseGrid.template deviceToHost<0,1>();
1123
1124 auto & bm = sparseGrid.private_get_blockMap();
1125 auto & dataVector = bm.getDataBuffer();
1126
1127 bool match = true;
1128
1129 for (size_t i = 0 ; i < dataVector.size() ; i++)
1130 {
1131 for (size_t j = 0 ; j < 64 ; j++)
1132 {
1133 if (dataVector.template get<2>(i)[j] == 1)
1134 {
1135 match &= dataVector.template get<0>(i)[j]*27 == dataVector.template get<1>(i)[j];
1136 }
1137 }
1138 }
1139
1140 BOOST_REQUIRE_EQUAL(match,true);
1141}
1142
1143BOOST_AUTO_TEST_CASE(test3x3x3convolution_no_shared)
1144{
1145 constexpr unsigned int dim = 3;
1146 constexpr unsigned int blockEdgeSize = 4;
1147 typedef aggregate<float,float> AggregateT;
1148
1149 test_convolution_3x3x3_no_shared<SparseGridGpu<dim, AggregateT, blockEdgeSize, 64, long int>>();
1150}
1151
1152BOOST_AUTO_TEST_CASE(test3x3x3convolution_no_shared_z_morton)
1153{
1154 constexpr unsigned int dim = 3;
1155 constexpr unsigned int blockEdgeSize = 4;
1156 typedef aggregate<float,float> AggregateT;
1157
1158 test_convolution_3x3x3_no_shared<SparseGridGpu_z<dim, AggregateT, blockEdgeSize, 64, long int>>();
1159}
1160
1161BOOST_AUTO_TEST_CASE(test3x3x3convolution)
1162{
1163 constexpr unsigned int dim = 3;
1164 constexpr unsigned int blockEdgeSize = 4;
1165 typedef aggregate<float,float> AggregateT;
1166
1167 test_convolution_3x3x3<SparseGridGpu<dim, AggregateT, blockEdgeSize, 64, long int>>();
1168}
1169
1170BOOST_AUTO_TEST_CASE(test3x3x3convolution_morton_z)
1171{
1172 constexpr unsigned int dim = 3;
1173 constexpr unsigned int blockEdgeSize = 4;
1174 typedef aggregate<float,float> AggregateT;
1175
1176 test_convolution_3x3x3<SparseGridGpu_z<dim, AggregateT, blockEdgeSize, 64, long int>>();
1177}
1178
1179BOOST_AUTO_TEST_CASE(test_sparse_grid_iterator_sub_host)
1180{
1181 constexpr unsigned int dim = 3;
1182 constexpr unsigned int blockEdgeSize = 4;
1183 typedef aggregate<float, float> AggregateT;
1184
1185 size_t sz[3] = {768,768,768};
1186 dim3 gridSize(32,32,32);
1187
1188 grid_smb<dim, blockEdgeSize> blockGeometry(sz);
1191 sparseGrid.template setBackgroundValue<0>(0);
1192
1194 // Sphere 1
1195 grid_key_dx<3,int> start1({256,256,256});
1196 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1197 CUDA_LAUNCH_DIM3((insertSphere3D<0>),
1198 gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
1199 sparseGrid.toKernel(), start1, 32, 0, 1);
1200
1201 sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1202
1203 sparseGrid.template deviceToHost<0>();
1204
1205 bool match = true;
1206
1207 int count = 0;
1208
1209 grid_key_dx<3> start({303,303,303});
1210 grid_key_dx<3> stop({337,337,337});
1211
1212 auto it = sparseGrid.getIterator(start,stop);
1213
1214 while (it.isNext())
1215 {
1216 auto key = it.get();
1217
1218 match &= sparseGrid.template get<0>(key) == 1.0;
1219
1220 sparseGrid.template get<0>(key) = 5.0;
1221
1222 count++;
1223
1224 ++it;
1225 }
1226
1227 BOOST_REQUIRE_EQUAL(match,true);
1228 BOOST_REQUIRE_EQUAL(count,42875);
1229}
1230
1231
1232
1233
1234BOOST_AUTO_TEST_CASE(test_sparse_grid_iterator_host)
1235{
1236 constexpr unsigned int dim = 3;
1237 constexpr unsigned int blockEdgeSize = 4;
1238 typedef aggregate<float, float> AggregateT;
1239
1240 size_t sz[3] = {512,512,512};
1241 dim3 gridSize(32,32,32);
1242
1243 grid_smb<dim, blockEdgeSize> blockGeometry(sz);
1246 sparseGrid.template setBackgroundValue<0>(0);
1247
1249 // Sphere 1
1250 grid_key_dx<3,int> start1({256,256,256});
1251 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1252 CUDA_LAUNCH_DIM3((insertSphere3D<0>),
1253 gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
1254 sparseGrid.toKernel(), start1, 64, 32, 1);
1255
1256 sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1257
1258 sparseGrid.template deviceToHost<0>();
1259
1260 bool match = true;
1261
1262 int count = 0;
1263
1264 auto it = sparseGrid.getIterator();
1265
1266 while (it.isNext())
1267 {
1268 auto key = it.get();
1269
1270 match &= sparseGrid.template get<0>(key) == 1.0;
1271 //unsigned char bl = sparseGrid.template get<2>(key);
1272
1273 count++;
1274
1275 ++it;
1276 }
1277
1278 BOOST_REQUIRE(sparseGrid.countExistingElements() != 0);
1279
1280 BOOST_REQUIRE_EQUAL(sparseGrid.countExistingElements(),count);
1281 BOOST_REQUIRE_EQUAL(match,true);
1282}
1283
1284BOOST_AUTO_TEST_CASE(test_pack_request)
1285{
1286 size_t sz[] = {1000,1000,1000};
1287
1288 constexpr int blockEdgeSize = 4;
1289 constexpr int dim = 3;
1290
1291 typedef SparseGridGpu<dim, aggregate<float>, blockEdgeSize, 64, long int> SparseGridZ;
1292
1293 SparseGridZ sparseGrid(sz);
1295 sparseGrid.template setBackgroundValue<0>(0);
1296
1297 // now create a 3D sphere
1298
1299 grid_key_dx<3,int> start({256,256,256});
1300
1301 dim3 gridSize(32,32,32);
1302
1303 // Insert values on the grid
1304 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1305 CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1306 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1307 sparseGrid.toKernel(), start,64, 56, 1);
1308
1309 sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1310 sparseGrid.template deviceToHost<0>();
1311
1312 size_t cnt = sparseGrid.countExistingElements();
1313
1314 size_t req = 0;
1315 sparseGrid.packRequest<0>(req,ctx);
1316
1317 size_t tot = 8 + // how many chunks
1318 sparseGrid.private_get_index_array().size()*16 + 8 +// store the scan + chunk indexes
1319 cnt*(sizeof(float) + 2 + 1); // how much data
1320
1321 BOOST_REQUIRE_EQUAL(req,tot);
1322}
1323
1324BOOST_AUTO_TEST_CASE(test_MergeIndexMap)
1325{
1326 size_t sz[] = {1000,1000,1000};
1327
1328 constexpr int blockEdgeSize = 4;
1329 constexpr int dim = 3;
1330
1331 typedef SparseGridGpu<dim, aggregate<float>, blockEdgeSize, 64, long int> SparseGridZ;
1332
1333 SparseGridZ sparseGrid(sz);
1335 sparseGrid.template setBackgroundValue<0>(0);
1336
1337 // now create a 3D sphere
1338
1339 grid_key_dx<3,int> start({256,256,256});
1340
1341 dim3 gridSize(32,32,32);
1342
1343 // Insert values on the grid
1344 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1345 CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1346 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1347 sparseGrid.toKernel(), start,64, 56, 1);
1348
1349 size_t sz_b = sparseGrid.private_get_index_array().size();
1350
1351 sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1352
1353 auto & m_map = sparseGrid.getMergeIndexMapVector();
1354 auto & a_map = sparseGrid.getMappingVector();
1355
1356 m_map.template deviceToHost<0>();
1357 a_map.template deviceToHost<0>();
1358
1359 bool match = true;
1360
1361 auto & indexes = sparseGrid.private_get_index_array();
1362 indexes.template deviceToHost<0>();
1363 auto & a_indexes = sparseGrid.private_get_add_index_array();
1364 a_indexes.template deviceToHost<0>();
1365 auto & m_out = sparseGrid.getSegmentToOutMap();
1366 m_out.template deviceToHost<0>();
1367
1368 for (int i = 0 ; i < m_map.size() ; i++)
1369 {
1370 if (m_map.template get<0>(i) >= sz_b)
1371 {
1372 int c = a_map.template get<0>(m_map.template get<0>(i) - sz_b);
1373 int ci = m_out.template get<0>(i);
1374
1375 match &= (a_indexes.template get<0>(c) == indexes.template get<0>(ci));
1376 }
1377 }
1378
1379 BOOST_REQUIRE_EQUAL(match,true);
1380}
1381
1382BOOST_AUTO_TEST_CASE(test_pack_request_with_iterator)
1383{
1384 size_t sz[] = {1000,1000,1000};
1385
1386 constexpr int blockEdgeSize = 4;
1387 constexpr int dim = 3;
1388
1389 typedef SparseGridGpu<dim, aggregate<float>, blockEdgeSize, 64, long int> SparseGridZ;
1390
1391 SparseGridZ sparseGrid(sz);
1393 sparseGrid.template setBackgroundValue<0>(0);
1394
1395 // now create a 3D sphere
1396
1397 grid_key_dx<3,int> start({256,256,256});
1398
1399 dim3 gridSize(32,32,32);
1400
1401 // Insert values on the grid
1402 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1403 CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1404 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1405 sparseGrid.toKernel(), start,64, 56, 1);
1406
1407 sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1408
1409 size_t req = 0;
1410 sparseGrid.packReset();
1411
1412 {
1413 grid_key_dx<3> start1({0,0,0});
1414 grid_key_dx<3> stop1({321,999,999});
1415
1416 grid_key_dx<3> start2({322,0,0});
1417 grid_key_dx<3> stop2({999,999,999});
1418
1419 auto it1 = sparseGrid.getIterator(start1,stop1);
1420 sparseGrid.template packRequest<0>(it1,req);
1421
1422 auto it2 = sparseGrid.getIterator(start2,stop2);
1423 sparseGrid.template packRequest<0>(it2,req);
1424
1425 sparseGrid.template packCalculate<0>(req,ctx);
1426 }
1427
1428 sparseGrid.template deviceToHost<0>();
1429
1430
1431 size_t cnt = sparseGrid.countExistingElements();
1432
1433 // The 2 come from the iterators
1434
1435 size_t tot = 2*8 + // 2 numbers of chunks
1436 2*2*dim*4 + // 2 * 2 * dimension * integer
1437 align_number(8,4685*8) + align_number(8,4475*8) + // 4685 chunks + 4475 chunks
1438 align_number(8,4686*4) + align_number(8,4476*4) + // store the scan
1439 align_number(8,185807*4) + align_number(8,176787*4) + // store the points
1440 align_number(8,185807*2) + align_number(8,176787*2) + // store offsets
1441 align_number(8,185807*1) + align_number(8,176787*1); // store the mask
1442
1443 BOOST_REQUIRE_EQUAL(req,tot);
1444
1446
1447 req = 0;
1448 sparseGrid.packReset();
1449
1450 {
1451 grid_key_dx<3> start1({0,0,0});
1452 grid_key_dx<3> stop1({999,999,999});
1453
1454 auto it1 = sparseGrid.getIterator(start1,stop1);
1455 sparseGrid.template packRequest<0>(it1,req);
1456
1457 auto it2 = sparseGrid.getIterator(start1,stop1);
1458 sparseGrid.template packRequest<0>(it2,req);
1459
1460 sparseGrid.template packCalculate<0>(req,ctx);
1461 }
1462
1463
1464 tot = 2*8 + // 2 numbers of chunks
1465 2*2*dim*4 + // 2 * 2 * dimension * integer
1466 2*align_number(8,sparseGrid.private_get_index_array().size()*8) + //
1467 2*align_number(8,(sparseGrid.private_get_index_array().size()+1)*4) + // store the scan
1468 2*align_number(8,cnt*4) + // store the points
1469 2*align_number(8,cnt*2) + // store offsets
1470 2*align_number(8,cnt*1); // store masks
1471
1472 BOOST_REQUIRE_EQUAL(req,tot);
1473}
1474
1475BOOST_AUTO_TEST_CASE(sparsegridgpu_remove_test)
1476{
1477 size_t sz[] = {1000,1000,1000};
1478
1479 constexpr int blockEdgeSize = 4;
1480 constexpr int dim = 3;
1481
1482 typedef SparseGridGpu<dim, aggregate<float>, blockEdgeSize, 64, long int> SparseGridZ;
1483
1484 SparseGridZ sparseGrid(sz);
1486 sparseGrid.template setBackgroundValue<0>(0);
1487
1488 // now create a 3D sphere
1489
1490 grid_key_dx<3,int> start({256,256,256});
1491
1492 dim3 gridSize(32,32,32);
1493
1494 // Insert values on the grid
1495 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1496 CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1497 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1498 sparseGrid.toKernel(), start,64, 56, 1);
1499
1500 sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1501
1502 // remove the center
1503
1504 Box<3,unsigned int> remove_section1({310,0,0},{330,999,999});
1505 Box<3,unsigned int> remove_section2({0,310,0},{999,330,999});
1506 Box<3,unsigned int> remove_section3({0,0,310},{999,999,330});
1507
1508 sparseGrid.remove(remove_section1);
1509 sparseGrid.remove(remove_section2);
1510 sparseGrid.remove(remove_section3);
1511
1512 sparseGrid.removeAddUnpackFinalize<>(ctx,0);
1513
1514 sparseGrid.deviceToHost<0>();
1515
1516 // Check we have the sphere points with the exception of the box
1517
1518 auto it = sparseGrid.getIterator();
1519
1520 bool match = true;
1521
1522 while (it.isNext())
1523 {
1524 auto p = it.get();
1525
1526 Point<3,size_t> pt = p.toPoint();
1527
1528 // Calculate redius
1529 float radius = sqrt((pt.get(0) - 320)*(pt.get(0) - 320) +
1530 (pt.get(1) - 320)*(pt.get(1) - 320) +
1531 (pt.get(2) - 320)*(pt.get(2) - 320));
1532
1533
1534 if (radius < 55.99 || radius > 64.01)
1535 {match &= false;}
1536
1537 if (remove_section1.isInside(pt) == true)
1538 {match &= false;}
1539
1540 if (remove_section2.isInside(pt) == true)
1541 {match &= false;}
1542
1543 if (remove_section3.isInside(pt) == true)
1544 {match &= false;}
1545
1546 ++it;
1547 }
1548
1549 BOOST_REQUIRE_EQUAL(match,true);
1550}
1551
1552template<typename SG_type>
1553void pack_unpack_test(SG_type & sparseGridDst, SG_type & sparseGridSrc,
1554 Box<3,size_t> & box1_dst,
1555 Box<3,size_t> & box2_dst,
1556 Box<3,size_t> & box3_dst,
1557 Box<3,size_t> & box4_dst,
1558 gpu::ofp_context_t & ctx,
1559 bool test_pack)
1560{
1561 Box<3,size_t> box1_src({256,256,256},{273,390,390});
1562 Box<3,size_t> box2_src({320,256,256},{337,390,390});
1563
1564 // And two vertical sections
1565
1566 Box<3,size_t> box3_src({256,256,256},{273,320,390});
1567 Box<3,size_t> box4_src({320,256,256},{337,320,390});
1568
1569 // Now we calculate the memory required to pack
1570
1571 sparseGridSrc.packReset();
1572
1573 size_t req = 0;
1574 auto sub_it = sparseGridSrc.getIterator(box1_src.getKP1(),box1_src.getKP2());
1575 sparseGridSrc.template packRequest<0,1>(sub_it,req);
1576
1577 sub_it = sparseGridSrc.getIterator(box2_src.getKP1(),box2_src.getKP2());
1578 sparseGridSrc.template packRequest<0,1>(sub_it,req);
1579
1580 sub_it = sparseGridSrc.getIterator(box3_src.getKP1(),box3_src.getKP2());
1581 sparseGridSrc.template packRequest<0,1>(sub_it,req);
1582
1583 sub_it = sparseGridSrc.getIterator(box4_src.getKP1(),box4_src.getKP2());
1584 sparseGridSrc.template packRequest<0,1>(sub_it,req);
1585
1586 sparseGridSrc.template packCalculate<0,1>(req,ctx);
1587
1588 CudaMemory mem;
1589 mem.resize(req);
1590
1591 // Create an object of preallocated memory for properties
1592 ExtPreAlloc<CudaMemory> & prAlloc_prp = *(new ExtPreAlloc<CudaMemory>(req,mem));
1593
1594 prAlloc_prp.incRef();
1595
1596 // Pack information
1597 Pack_stat sts;
1598
1599 sub_it = sparseGridSrc.getIterator(box1_src.getKP1(),box1_src.getKP2());
1600 sparseGridSrc.template pack<0,1>(prAlloc_prp,sub_it,sts);
1601
1602 sub_it = sparseGridSrc.getIterator(box2_src.getKP1(),box2_src.getKP2());
1603 sparseGridSrc.template pack<0,1>(prAlloc_prp,sub_it,sts);
1604
1605 sub_it = sparseGridSrc.getIterator(box3_src.getKP1(),box3_src.getKP2());
1606 sparseGridSrc.template pack<0,1>(prAlloc_prp,sub_it,sts);
1607
1608 sub_it = sparseGridSrc.getIterator(box4_src.getKP1(),box4_src.getKP2());
1609 sparseGridSrc.template pack<0,1>(prAlloc_prp,sub_it,sts);
1610
1611
1612 sparseGridSrc.template packFinalize<0,1>(prAlloc_prp,sts);
1613
1614 // Now we analyze the package
1615
1616 if (test_pack == true)
1617 {
1618 size_t ncnk = *(size_t *)mem.getPointer();
1619 BOOST_REQUIRE_EQUAL(ncnk,1107);
1620 size_t actual_offset = ncnk*sizeof(size_t) + sizeof(size_t) + 2*3*sizeof(int);
1621 mem.deviceToHost(actual_offset + ncnk*sizeof(unsigned int),actual_offset + ncnk*sizeof(unsigned int) + sizeof(unsigned int));
1622 unsigned int n_pnt = *(unsigned int *)((unsigned char *)mem.getPointer() + actual_offset + ncnk*sizeof(unsigned int));
1623 BOOST_REQUIRE_EQUAL(n_pnt,41003);
1624
1625 actual_offset += align_number(sizeof(size_t),(ncnk+1)*sizeof(unsigned int));
1626 actual_offset += align_number(sizeof(size_t),n_pnt*(16));
1627 actual_offset += align_number(sizeof(size_t),n_pnt*sizeof(short int));
1628 actual_offset += align_number(sizeof(size_t),n_pnt*sizeof(unsigned char));
1629
1630 ncnk = *(size_t *)((unsigned char *)mem.getPointer() + actual_offset);
1631 BOOST_REQUIRE_EQUAL(ncnk,1420);
1632 actual_offset += ncnk*sizeof(size_t) + sizeof(size_t) + 2*3*sizeof(int);
1633 mem.deviceToHost(actual_offset + ncnk*sizeof(unsigned int),actual_offset + ncnk*sizeof(unsigned int) + sizeof(unsigned int));
1634 n_pnt = *(unsigned int *)((unsigned char *)mem.getPointer() + actual_offset + ncnk*sizeof(unsigned int));
1635 BOOST_REQUIRE_EQUAL(n_pnt,54276);
1636
1637 actual_offset += align_number(sizeof(size_t),(ncnk+1)*sizeof(unsigned int));
1638 actual_offset += align_number(sizeof(size_t),n_pnt*(16));
1639 actual_offset += align_number(sizeof(size_t),n_pnt*sizeof(short int));
1640 actual_offset += align_number(sizeof(size_t),n_pnt*sizeof(unsigned char));
1641
1642 ncnk = *(size_t *)((unsigned char *)mem.getPointer() + actual_offset);
1643 BOOST_REQUIRE_EQUAL(ncnk,610);
1644 actual_offset += ncnk*sizeof(size_t) + sizeof(size_t) + 2*3*sizeof(int);
1645 mem.deviceToHost(actual_offset + ncnk*sizeof(unsigned int),actual_offset + ncnk*sizeof(unsigned int) + sizeof(unsigned int));
1646 n_pnt = *(unsigned int *)((unsigned char *)mem.getPointer() + actual_offset + ncnk*sizeof(unsigned int));
1647 BOOST_REQUIRE_EQUAL(n_pnt,20828);
1648
1649 actual_offset += align_number(sizeof(size_t),(ncnk+1)*sizeof(unsigned int));
1650 actual_offset += align_number(sizeof(size_t),n_pnt*(16));
1651 actual_offset += align_number(sizeof(size_t),n_pnt*sizeof(short int));
1652 actual_offset += align_number(sizeof(size_t),n_pnt*sizeof(unsigned char));
1653
1654 ncnk = *(size_t *)((unsigned char *)mem.getPointer() + actual_offset);
1655 BOOST_REQUIRE_EQUAL(ncnk,739);
1656 actual_offset += ncnk*sizeof(size_t) + sizeof(size_t) + 2*3*sizeof(int);
1657 mem.deviceToHost(actual_offset + ncnk*sizeof(unsigned int),actual_offset + ncnk*sizeof(unsigned int) + sizeof(unsigned int));
1658 n_pnt = *(unsigned int *)((unsigned char *)mem.getPointer() + actual_offset + ncnk*sizeof(unsigned int));
1659 BOOST_REQUIRE_EQUAL(n_pnt,27283);
1660 }
1661
1662 prAlloc_prp.reset();
1663
1664 Unpack_stat ps;
1665
1666 sparseGridDst.removeAddUnpackReset();
1667
1668 // sub-grid where to unpack
1669 auto sub2 = sparseGridDst.getIterator(box1_dst.getKP1(),box1_dst.getKP2());
1670 sparseGridDst.remove(box1_dst);
1671 sparseGridDst.template unpack<0,1>(prAlloc_prp,sub2,ps,ctx);
1672
1673 sub2 = sparseGridDst.getIterator(box2_dst.getKP1(),box2_dst.getKP2());
1674 sparseGridDst.remove(box2_dst);
1675 sparseGridDst.template unpack<0,1>(prAlloc_prp,sub2,ps,ctx);
1676
1677 sub2 = sparseGridDst.getIterator(box3_dst.getKP1(),box3_dst.getKP2());
1678 sparseGridDst.remove(box3_dst);
1679 sparseGridDst.template unpack<0,1>(prAlloc_prp,sub2,ps,ctx);
1680
1681 sub2 = sparseGridDst.getIterator(box4_dst.getKP1(),box4_dst.getKP2());
1682 sparseGridDst.remove(box4_dst);
1683 sparseGridDst.template unpack<0,1>(prAlloc_prp,sub2,ps,ctx);
1684
1685 sparseGridDst.template removeAddUnpackFinalize<0,1>(ctx,0);
1686
1687 sparseGridDst.template deviceToHost<0,1>();
1688}
1689
1690BOOST_AUTO_TEST_CASE(sparsegridgpu_pack_unpack)
1691{
1692 size_t sz[] = {1000,1000,1000};
1693
1694 constexpr int blockEdgeSize = 4;
1695 constexpr int dim = 3;
1696
1697 Box<3,size_t> box1_dst({256,256,256},{273,390,390});
1698 Box<3,size_t> box2_dst({274,256,256},{291,390,390});
1699
1700 Box<3,size_t> box3_dst({300,256,256},{317,320,390});
1701 Box<3,size_t> box4_dst({320,256,256},{337,320,390});
1702
1703 typedef SparseGridGpu<dim, aggregate<float,float[3]>, blockEdgeSize, 64, long int> SparseGridZ;
1704
1705 SparseGridZ sparseGridSrc(sz);
1706 SparseGridZ sparseGridDst(sz);
1708 sparseGridSrc.template setBackgroundValue<0>(0);
1709 sparseGridDst.template setBackgroundValue<0>(0);
1710
1711 // now create a 3D sphere
1712
1713 grid_key_dx<3,int> start({256,256,256});
1714
1715 dim3 gridSize(32,32,32);
1716
1717 // Insert values on the grid
1718 sparseGridSrc.setGPUInsertBuffer(gridSize,dim3(1));
1719 CUDA_LAUNCH_DIM3((insertSphere3D_radiusV<0>),
1720 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1721 sparseGridSrc.toKernel(), start,64, 56, 1);
1722
1723 sparseGridSrc.flush < smax_< 0 >, smax_<1> > (ctx, flush_type::FLUSH_ON_DEVICE);
1724
1725 // Now we pack two vertical sections
1726
1727 pack_unpack_test(sparseGridDst,sparseGridSrc,
1728 box1_dst,box2_dst,
1729 box3_dst,box4_dst,
1730 ctx,true);
1731
1732 sparseGridDst.template deviceToHost<0,1>();
1733
1734 int cnt1 = 0;
1735 int cnt2 = 0;
1736 int cnt3 = 0;
1737 int cnt4 = 0;
1738
1739 auto it = sparseGridDst.getIterator();
1740
1741 bool match = true;
1742
1743 while (it.isNext())
1744 {
1745 auto p = it.get();
1746
1747 auto pt = p.toPoint();
1748
1749 if (box1_dst.isInside(pt) == true)
1750 {
1751 ++cnt1;
1752
1753 const long int x = (long int)pt.get(0) - (start.get(0) + gridSize.x / 2 * blockEdgeSize);
1754 const long int y = (long int)pt.get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1755 const long int z = (long int)pt.get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1756
1757 float radius = sqrt((float) (x*x + y*y + z*z));
1758
1759 bool is_active = radius < 64 && radius > 56;
1760
1761 if (is_active == true)
1762 {match &= true;}
1763 else
1764 {match &= false;}
1765 }
1766 else if (box2_dst.isInside(pt) == true)
1767 {
1768 ++cnt2;
1769
1770 const long int x = (long int)pt.get(0) - (start.get(0) - 46 + gridSize.x / 2 * blockEdgeSize);
1771 const long int y = (long int)pt.get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1772 const long int z = (long int)pt.get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1773
1774 float radius = sqrt((float) (x*x + y*y + z*z));
1775
1776 bool is_active = radius < 64 && radius > 56;
1777
1778 if (is_active == true)
1779 {match &= true;}
1780 else
1781 {match &= false;}
1782 }
1783 else if (box3_dst.isInside(pt) == true)
1784 {
1785 ++cnt3;
1786
1787 const long int x = (long int)pt.get(0) - (start.get(0) + 44 + gridSize.x / 2 * blockEdgeSize);
1788 const long int y = (long int)pt.get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1789 const long int z = (long int)pt.get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1790
1791 float radius = sqrt((float) (x*x + y*y + z*z));
1792
1793 bool is_active = radius < 64 && radius > 56;
1794
1795 if (is_active == true)
1796 {match &= true;}
1797 else
1798 {match &= false;}
1799 }
1800 else if (box4_dst.isInside(pt) == true)
1801 {
1802 ++cnt4;
1803
1804 const long int x = (long int)pt.get(0) - (start.get(0) + gridSize.x / 2 * blockEdgeSize);
1805 const long int y = (long int)pt.get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1806 const long int z = (long int)pt.get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1807
1808 float radius = sqrt((float) (x*x + y*y + z*z));
1809
1810 bool is_active = radius < 64 && radius > 56;
1811
1812 if (is_active == true)
1813 {match &= true;}
1814 else
1815 {match &= false;}
1816 }
1817
1818 ++it;
1819 }
1820
1821 BOOST_REQUIRE_EQUAL(match,true);
1822 BOOST_REQUIRE_EQUAL(cnt1,41003);
1823 BOOST_REQUIRE_EQUAL(cnt2,54276);
1824 BOOST_REQUIRE_EQUAL(cnt3,20828);
1825 BOOST_REQUIRE_EQUAL(cnt4,27283);
1826
1827 // Now we remove even points
1828
1829 // Insert values on the grid
1830 sparseGridSrc.setGPUInsertBuffer(gridSize,dim3(1));
1831 CUDA_LAUNCH_DIM3((removeSphere3D_even_radiusV<0>),
1832 gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1833 sparseGridSrc.toKernel(), start,64, 56, 1);
1834
1835 pack_unpack_test(sparseGridDst,sparseGridSrc,
1836 box1_dst,box2_dst,
1837 box3_dst,box4_dst,
1838 ctx,false);
1839
1840 sparseGridDst.template deviceToHost<0,1>();
1841
1842 cnt1 = 0;
1843 cnt2 = 0;
1844 cnt3 = 0;
1845 cnt4 = 0;
1846
1847 auto it2 = sparseGridDst.getIterator();
1848
1849 match = true;
1850
1851 while (it2.isNext())
1852 {
1853 auto p = it2.get();
1854
1855 auto pt = p.toPoint();
1856
1857 if (box1_dst.isInside(pt) == true)
1858 {
1859 ++cnt1;
1860
1861 const long int x = (long int)pt.get(0) - (start.get(0) + gridSize.x / 2 * blockEdgeSize);
1862 const long int y = (long int)pt.get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1863 const long int z = (long int)pt.get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1864
1865 float radius = sqrt((float) (x*x + y*y + z*z));
1866
1867 bool is_active = radius < 64 && radius > 56;
1868
1869 if (is_active == true)
1870 {match &= true;}
1871 else
1872 {match &= false;}
1873 }
1874 else if (box2_dst.isInside(pt) == true)
1875 {
1876 ++cnt2;
1877
1878 const long int x = (long int)pt.get(0) - (start.get(0) - 46 + gridSize.x / 2 * blockEdgeSize);
1879 const long int y = (long int)pt.get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1880 const long int z = (long int)pt.get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1881
1882 float radius = sqrt((float) (x*x + y*y + z*z));
1883
1884 bool is_active = radius < 64 && radius > 56;
1885
1886 if (is_active == true)
1887 {match &= true;}
1888 else
1889 {match &= false;}
1890 }
1891 else if (box3_dst.isInside(pt) == true)
1892 {
1893 ++cnt3;
1894
1895 const long int x = (long int)pt.get(0) - (start.get(0) + 44 + gridSize.x / 2 * blockEdgeSize);
1896 const long int y = (long int)pt.get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1897 const long int z = (long int)pt.get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1898
1899 float radius = sqrt((float) (x*x + y*y + z*z));
1900
1901 bool is_active = radius < 64 && radius > 56;
1902
1903 if (is_active == true)
1904 {match &= true;}
1905 else
1906 {match &= false;}
1907 }
1908 else if (box4_dst.isInside(pt) == true)
1909 {
1910 ++cnt4;
1911
1912 const long int x = (long int)pt.get(0) - (start.get(0) + gridSize.x / 2 * blockEdgeSize);
1913 const long int y = (long int)pt.get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1914 const long int z = (long int)pt.get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1915
1916 float radius = sqrt((float) (x*x + y*y + z*z));
1917
1918 bool is_active = radius < 64 && radius > 56;
1919
1920 if (is_active == true)
1921 {match &= true;}
1922 else
1923 {match &= false;}
1924 }
1925
1926 ++it2;
1927 }
1928
1929 BOOST_REQUIRE_EQUAL(match,true);
1930 BOOST_REQUIRE_EQUAL(cnt1,20520);
1931 BOOST_REQUIRE_EQUAL(cnt2,27152);
1932 BOOST_REQUIRE_EQUAL(cnt3,10423);
1933 BOOST_REQUIRE_EQUAL(cnt4,13649);
1934}
1935
1936#if defined(OPENFPM_DATA_ENABLE_IO_MODULE) || defined(PERFORMANCE_TEST)
1937
1938BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput3DHeatStencil)
1939{
1940 constexpr unsigned int dim = 3;
1941 constexpr unsigned int blockEdgeSize = 4;
1942 typedef aggregate<float, float> AggregateT;
1943
1944 size_t sz[3] = {512,512,512};
1945// dim3 gridSize(128,128,128);
1946 dim3 gridSize(32,32,32);
1947
1948 grid_smb<dim, blockEdgeSize> blockGeometry(sz);
1951 sparseGrid.template setBackgroundValue<0>(0);
1952
1954 // Sphere 1
1955 grid_key_dx<3,int> start1({256,256,256});
1956 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1957 CUDA_LAUNCH_DIM3((insertSphere3D<0>),
1958 gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
1959 sparseGrid.toKernel(), start1, 64, 32, 1);
1960
1961 sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1962
1963 sparseGrid.removeUnusedBuffers();
1964
1965 // Sphere 2
1966 grid_key_dx<3,int> start2({192,192,192});
1967 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1968 CUDA_LAUNCH_DIM3((insertSphere3D<0>),
1969 gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
1970 sparseGrid.toKernel(), start2, 64, 44, 1);
1971
1972 sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1973
1974
1975 // Sphere 3
1976 grid_key_dx<3,int> start3({340,192,192});
1977 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1978 CUDA_LAUNCH_DIM3((insertSphere3D<0>),
1979 gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
1980 sparseGrid.toKernel(), start3, 20, 15, 1);
1981
1982 sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1983
1985
1986 sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
1987 sparseGrid.tagBoundaries(ctx);
1988
1989 // Now apply some boundary conditions
1990 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,
1991 192, 384,
1992 0.0, 10.0);
1993
1994
1995 // Now apply the laplacian operator
1996// const unsigned int maxIter = 1000;
1997 const unsigned int maxIter = 100;
1998 for (unsigned int iter=0; iter<maxIter; ++iter)
1999 {
2000 for (int innerIter=0; innerIter<10; ++innerIter)
2001 {
2002 sparseGrid.applyStencils<HeatStencil<dim, 0, 1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
2003
2004 sparseGrid.applyStencils<HeatStencil<dim, 1, 0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
2005
2006 }
2007 }
2008
2009 sparseGrid.deviceToHost<0,1>();
2010 sparseGrid.write("SparseGridGPU_output3DHeatStencil.vtk");
2011}
2012
2013BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput)
2014{
2015 constexpr unsigned int dim = 2;
2016 constexpr unsigned int blockEdgeSize = 8;
2017 typedef aggregate<float> AggregateT;
2018
2019 size_t sz[2] = {1000000,1000000};
2020 dim3 gridSize(128,128);
2021
2022 grid_smb<dim, blockEdgeSize> blockGeometry(sz);
2025 sparseGrid.template setBackgroundValue<0>(0);
2026
2027 grid_key_dx<2,int> start({500000,500000});
2028
2029 // Insert values on the grid
2030 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
2031 CUDA_LAUNCH_DIM3((insertSphere<0>),gridSize, dim3(blockEdgeSize*blockEdgeSize,1),sparseGrid.toKernel(), start, 512, 256, 1);
2032 sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
2033
2034 sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
2035 sparseGrid.tagBoundaries(ctx);
2036
2037 sparseGrid.template deviceToHost<0>();
2038
2039 sparseGrid.write("SparseGridGPU_output.vtk");
2040}
2041
2042BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput3D)
2043{
2044 constexpr unsigned int dim = 3;
2045 constexpr unsigned int blockEdgeSize = 4;
2046 typedef aggregate<float> AggregateT;
2047
2048 size_t sz[3] = {512,512,512};
2049// dim3 gridSize(128,128,128);
2050 dim3 gridSize(32,32,32);
2051
2052 grid_smb<dim, blockEdgeSize> blockGeometry(sz);
2055 sparseGrid.template setBackgroundValue<0>(0);
2056
2057 grid_key_dx<3,int> start({256,256,256});
2058
2059 // Insert values on the grid
2060 sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
2061 CUDA_LAUNCH_DIM3((insertSphere3D<0>),
2062 gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
2063 sparseGrid.toKernel(), start, 64, 56, 1);
2064 sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
2065
2066
2067 sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
2068 sparseGrid.tagBoundaries(ctx);
2069
2070 sparseGrid.template applyStencils<BoundaryStencilSetX<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE);
2071
2072
2073 sparseGrid.template deviceToHost<0>();
2074
2075 sparseGrid.write("SparseGridGPU_output3D.vtk");
2076
2077 bool test = compare("SparseGridGPU_output3D.vtk","test_data/SparseGridGPU_output3D_test.vtk");
2078 BOOST_REQUIRE_EQUAL(true,test);
2079}
2080
2081
2082#endif
2083
2084BOOST_AUTO_TEST_SUITE_END()
This class represent an N-dimensional box.
Definition Box.hpp:61
__host__ __device__ bool isInside(const Point< dim, T > &p) const
Check if the point is inside the box.
Definition Box.hpp:1004
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
Definition Box.hpp:669
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
Definition Box.hpp:656
virtual void * getDevicePointer()
get a readable pointer with the data
virtual void deviceToHost()
Move memory from device to host.
virtual bool resize(size_t sz)
resize the momory allocated
virtual void * getPointer()
get a readable pointer with the data
virtual bool allocate(size_t sz)
allocate memory
Definition CudaMemory.cu:38
virtual void incRef()
Increment the reference counter.
void reset()
Reset the internal counters.
Packing status object.
Definition Pack_stat.hpp:61
This class implement the point shape in an N-dimensional space.
Definition Point.hpp:28
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition Point.hpp:172
Unpacking status object.
Definition Pack_stat.hpp:16
grid_key_dx is the key to access any element in the grid
Definition grid_key.hpp:19
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition grid_key.hpp:516
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition grid_key.hpp:503
Implementation of 1-D std::vector like structure.
size_t size()
Stub size.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
get the type of the block
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...