OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
SparseGridGpu_tests.cu
1 //
2 // Created by tommaso on 10/06/19.
3 //
4 
5 #define BOOST_TEST_DYN_LINK
6 
7 #define DISABLE_MPI_WRITTERS
8 
9 #include <boost/test/unit_test.hpp>
10 #include "SparseGridGpu/SparseGridGpu.hpp"
11 #include "SparseGridGpu/tests/utils/SparseGridGpu_testKernels.cuh"
12 #include "SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh"
13 
14 template<unsigned int p1 , unsigned int p2, unsigned int chunksPerBlock=1, typename SparseGridType, typename ScalarT>
15 __global__ void insertConstantValue2(SparseGridType sparseGrid, ScalarT value)
16 {
17  constexpr unsigned int pMask = SparseGridType::pMask;
18  typedef BlockTypeOf<typename SparseGridType::AggregateType, p1> BlockT;
19 
20  sparseGrid.init();
21 
22  int x = blockIdx.x * blockDim.x + threadIdx.x;
23  int y = blockIdx.y * blockDim.y + threadIdx.y;
24  int z = blockIdx.z * blockDim.z + threadIdx.z;
26 
27  auto pos = sparseGrid.getLinId(coord);
28  unsigned int dataBlockId = pos / BlockT::size;
29  unsigned int offset = pos % BlockT::size;
30 
31  auto encap = sparseGrid.insertBlock(dataBlockId);
32  encap.template get<p1>()[offset] = value;
33  encap.template get<p2>()[offset] = value;
34  BlockMapGpu_ker<>::setExist(encap.template get<pMask>()[offset]);
35 
36  __syncthreads();
37 
38  sparseGrid.flush_block_insert();
39 
40  // Compiler avoid warning
41  x++;
42  y++;
43  z++;
44 }
45 
46 template<unsigned int p, typename SparseGridType>
47 __global__ void insertValues(SparseGridType sparseGrid)
48 {
49  sparseGrid.init();
50 
51  const auto bDimX = blockDim.x;
52  const auto bDimY = blockDim.y;
53  const auto bDimZ = blockDim.z;
54  const auto bIdX = blockIdx.x;
55  const auto bIdY = blockIdx.y;
56  const auto bIdZ = blockIdx.z;
57  const auto tIdX = threadIdx.x;
58  const auto tIdY = threadIdx.y;
59  const auto tIdZ = threadIdx.z;
60  int x = bIdX * bDimX + tIdX;
61  int y = bIdY * bDimY + tIdY;
62  int z = bIdZ * bDimZ + tIdZ;
64 
65  size_t pos = sparseGrid.getLinId(coord);
66 
67  sparseGrid.template insert<p>(coord) = x;
68 
69  sparseGrid.flush_block_insert();
70 
71  // Compiler avoid warning
72  x++;
73  y++;
74  z++;
75 }
76 
77 
78 
79 template<unsigned int p, typename SparseGridType, typename VectorOutType>
80 __global__ void copyToOutputIfPadding(SparseGridType sparseGrid, VectorOutType output)
81 {
82  const auto bDimX = blockDim.x;
83  const auto bDimY = blockDim.y;
84  const auto bDimZ = blockDim.z;
85  const auto bIdX = blockIdx.x;
86  const auto bIdY = blockIdx.y;
87  const auto bIdZ = blockIdx.z;
88  const auto tIdX = threadIdx.x;
89  const auto tIdY = threadIdx.y;
90  const auto tIdZ = threadIdx.z;
91  int x = bIdX * bDimX + tIdX;
92  int y = bIdY * bDimY + tIdY;
93  int z = bIdZ * bDimZ + tIdZ;
95 
96  size_t pos = sparseGrid.getLinId(coord);
97 
98 
99  output.template get<p>(pos) = sparseGrid.isPadding(coord) ? 1 : 0;
100 
101  // Compiler avoid warning
102  x++;
103  y++;
104  z++;
105 }
106 
107 template<unsigned int p, typename SparseGridType>
108 __global__ void insertBoundaryValuesHeat(SparseGridType sparseGrid)
109 {
110  sparseGrid.init();
111 
112  const auto bDimX = blockDim.x;
113  const auto bDimY = blockDim.y;
114  const auto bDimZ = blockDim.z;
115  const auto bIdX = blockIdx.x;
116  const auto bIdY = blockIdx.y;
117  const auto bIdZ = blockIdx.z;
118  const auto tIdX = threadIdx.x;
119  const auto tIdY = threadIdx.y;
120  const auto tIdZ = threadIdx.z;
121  int x = bIdX * bDimX + tIdX;
122  int y = bIdY * bDimY + tIdY;
123  int z = bIdZ * bDimZ + tIdZ;
125 
126  float value = 0;
127  if (x == 0)
128  {
129  value = 0;
130  }
131  else if (x == bDimX * gridDim.x - 1)
132  {
133  value = 10;
134  }
135 
136  if (y == 0 || y == bDimY * gridDim.y - 1)
137  {
138  value = 10.0 * x / (bDimX * gridDim.x - 1);
139  }
140 
141  sparseGrid.template insert<p>(coord) = value;
142 
143  __syncthreads();
144 
145  sparseGrid.flush_block_insert();
146 
147  // Compiler avoid warning
148  x++;
149  y++;
150  z++;
151 }
152 
153 template<unsigned int dim, unsigned int p>
155 {
156  // This is an example of a laplacian stencil to apply using the apply stencil facility of SparseGridGpu
157 
158  static constexpr unsigned int supportRadius = 1;
159 
160  template<typename SparseGridT, typename DataBlockWrapperT>
161  static inline __device__ void stencil(
162  SparseGridT & sparseGrid,
163  grid_key_dx<dim, int> & dataBlockCoord,
164  unsigned int offset,
165  grid_key_dx<dim, int> & pointCoord,
166  DataBlockWrapperT & dataBlockLoad,
167  DataBlockWrapperT & dataBlockStore)
168  {
169  typedef typename SparseGridT::AggregateBlockType AggregateT;
170  typedef ScalarTypeOf<AggregateT, p> ScalarT;
171 
172  constexpr unsigned int enlargedBlockSize = IntPow<
173  SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
174 
175  __shared__ ScalarT enlargedBlock[enlargedBlockSize];
176  sparseGrid.loadBlock<p>(dataBlockLoad, enlargedBlock);
177  sparseGrid.loadGhost<p>(dataBlockCoord, enlargedBlock);
178  __syncthreads();
179 
180  const auto coord = sparseGrid.getCoordInEnlargedBlock(offset);
181  const auto linId = sparseGrid.getLinIdInEnlargedBlock(offset);
182  ScalarT cur = enlargedBlock[linId];
183  ScalarT res = -2.0*dim*cur; // The central part of the stencil
184  for (int d=0; d<dim; ++d)
185  {
186  auto nPlusId = sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, d, 1);
187  auto nMinusId = sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, d, -1);
188  ScalarT neighbourPlus = enlargedBlock[nPlusId];
189  ScalarT neighbourMinus = enlargedBlock[nMinusId];
190  res += neighbourMinus + neighbourPlus;
191  }
192  enlargedBlock[linId] = res;
193 
194  __syncthreads();
195  sparseGrid.storeBlock<p>(dataBlockStore, enlargedBlock);
196  }
197 
198  template <typename SparseGridT, typename CtxT>
199  static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
200  {
201  sparseGrid.template flush <smin_<0>> (ctx, flush_type::FLUSH_ON_DEVICE);
202  }
203 };
204 
205 //todo: Here write some sort of stencil kernel to test the client interface for stencil application
206 
207 BOOST_AUTO_TEST_SUITE(SparseGridGpu_tests)
208 
209 BOOST_AUTO_TEST_CASE(testInsert)
210 {
211  constexpr unsigned int dim = 2;
212  constexpr unsigned int blockEdgeSize = 8;
213  constexpr unsigned int dataBlockSize = blockEdgeSize * blockEdgeSize;
214  typedef aggregate<DataBlock<float, dataBlockSize>> AggregateSGT;
215  typedef aggregate<float> AggregateOutT;
216 
217  dim3 gridSize(2, 2);
218  dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
219 
220  grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
221  SparseGridGpu<dim, AggregateOutT, blockEdgeSize> sparseGrid(blockGeometry);
222 
223  sparseGrid.template setBackgroundValue<0>(666);
224  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
225 
226  CUDA_LAUNCH_DIM3((insertValues<0>),gridSize, blockSizeInsert,sparseGrid.toKernel());
227 
228  mgpu::ofp_context_t ctx;
229  sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
230 
231  sparseGrid.template deviceToHost<0>();
232 
233  // Compare
234  bool match = true;
235  for (size_t i = 0; i < 4*64 ; i++)
236  {
237  auto coord = sparseGrid.getCoord(i);
238  auto expectedValue = coord.get(0);
239 
240  match &= sparseGrid.template get<0>(coord) == expectedValue;
241  }
242 
243  BOOST_REQUIRE_EQUAL(match, true);
244 }
245 
246 BOOST_AUTO_TEST_CASE(testInsert3D)
247 {
248  constexpr unsigned int dim = 3;
249  constexpr unsigned int blockEdgeSize = 4;
250  constexpr unsigned int dataBlockSize = blockEdgeSize * blockEdgeSize;
251  typedef aggregate<DataBlock<float, dataBlockSize>> AggregateSGT;
252  typedef aggregate<float> AggregateOutT;
253 
254  dim3 gridSize(2, 2, 2);
255  dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize, blockEdgeSize);
256 
257  grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
258  SparseGridGpu<dim, AggregateOutT, blockEdgeSize> sparseGrid(blockGeometry);
259 
260  sparseGrid.template setBackgroundValue<0>(666);
261 
262  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
263 
264  CUDA_LAUNCH_DIM3((insertValues<0>),gridSize, blockSizeInsert,sparseGrid.toKernel());
265 
266  mgpu::ofp_context_t ctx;
267  sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
268 
269  sparseGrid.template deviceToHost<0>();
270 
271  // Compare
272  bool match = true;
273  for (size_t i = 0; i < 64*4 ; i++)
274  {
275  auto coord = sparseGrid.getCoord(i);
276  auto expectedValue = coord.get(0);
277 
278  match &= sparseGrid.template get<0>(coord) == expectedValue;
279  }
280 
281  BOOST_REQUIRE_EQUAL(match, true);
282 }
283 
284 BOOST_AUTO_TEST_CASE(testTagBoundaries)
285 {
286 
287  constexpr unsigned int dim = 2;
288  constexpr unsigned int blockEdgeSize = 8;
289  typedef aggregate<float> AggregateT;
290 
291  dim3 gridSize(2, 2);
292  dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
293 
294  grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
295  SparseGridGpu<dim, AggregateT, blockEdgeSize, 64> sparseGrid(blockGeometry);
296 
297  sparseGrid.template setBackgroundValue<0>(666);
298  mgpu::ofp_context_t ctx;
299 
300  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
301  dim3 pt1(0, 0, 0);
302  CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt1, 1);
303  dim3 pt2(6, 6, 0);
304  CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt2, 1);
305  dim3 pt3(7, 6, 0);
306  CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt3, 1);
307  sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
308 
309  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
310  dim3 pt4(8, 6, 0);
311  CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt4, 1);
312  sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
314  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
315  for (int y = 9; y <= 11; y++)
316  {
317  dim3 pt1(6, y, 0);
318  CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt1, 1);
319  dim3 pt2(7, y, 0);
320  CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt2, 1);
321  }
322  sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
323 
324  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
325  for (int y = 9; y <= 11; y++)
326  {
327  dim3 pt1(8, y, 0);
328  CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt1, 1);
329  dim3 pt2(9, y, 0);
330  CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), pt2, 1);
331  }
332  sparseGrid.template flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
333 
334 // sparseGrid.hostToDevice(); //just sync masks
335  sparseGrid.deviceToHost(); //just sync masks
336  sparseGrid.deviceToHost<0>();
337 
338  sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
339  // Now tag the boundaries
340  sparseGrid.tagBoundaries(ctx);
341 
342  // Get output
344  output.resize(4 * 64);
345 
346  CUDA_LAUNCH_DIM3((copyToOutputIfPadding<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), output.toKernel());
347 
348  output.template deviceToHost<0>();
349  sparseGrid.template deviceToHost<0>();
350 
351  // Compare
352  bool match = true;
353  for (size_t i = 0; i < output.size(); i++)
354  {
355  auto coord = sparseGrid.getCoord(i);
356  auto expectedValue =
357  ( i == 0
358  || i == 54
359  || i == 55
360  || i == 112
361  || i == 142 || i == 143 || i == 200 || i == 201 // (6,9), (7,9), (8,9), (9,9)
362  || i == 150 || i == 209 // (6,10), (9,10)
363  || i == 158 || i == 159 || i == 216 || i == 217 // (6,11), (7,11), (8,11), (9,11)
364  ) ? 1 : 0;
365 
366  match &= output.template get<0>(i) == expectedValue;
367  }
368 
369  BOOST_REQUIRE_EQUAL(match, true);
370 }
371 
372 BOOST_AUTO_TEST_CASE(testTagBoundaries2)
373 {
374  constexpr unsigned int dim = 2;
375  constexpr unsigned int blockEdgeSize = 8;
376  typedef aggregate<float> AggregateT;
377 
378  dim3 gridSize(2, 2);
379  dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
380 
381  grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
382  SparseGridGpu<dim, AggregateT, blockEdgeSize, 64> sparseGrid(blockGeometry);
383 
384  sparseGrid.template setBackgroundValue<0>(666);
385  mgpu::ofp_context_t ctx;
386 
388  {
389  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
390  dim3 ptd1(6, 6, 0);
391  CUDA_LAUNCH_DIM3((insertOneValue<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), ptd1, 1);
392  dim3 ptd2(6, 7, 0);
393  CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd2, 1);
394  dim3 ptd3(7, 6, 0);
395  CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd3, 1);
396  dim3 ptd4(7, 7, 0);
397  CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd4, 1);
398  sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
399  }
400  {
401  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
402  dim3 ptd1(8, 6, 0);
403  CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd1, 1);
404  dim3 ptd2(9, 6, 0);
405  CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd2, 1);
406  dim3 ptd3(8, 7, 0);
407  CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd3, 1);
408  dim3 ptd4(9, 7, 0);
409  CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd4, 1);
410  sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
411  }
412  {
413  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
414  dim3 ptd1(6, 8, 0);
415  CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd1, 1);
416  dim3 ptd2(7, 8, 0);
417  CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd2, 1);
418  dim3 ptd3(6, 9, 0);
419  CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd3, 1);
420  dim3 ptd4(7, 9, 0);
421  CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd4, 1);
422  sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
423  }
424  {
425  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
426  dim3 ptd1(8, 8, 0);
427  CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd1, 1);
428  dim3 ptd2(8, 9, 0);
429  CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd2, 1);
430  dim3 ptd3(9, 8, 0);
431  CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd3, 1);
432  dim3 ptd4(9, 9, 0);
433  CUDA_LAUNCH_DIM3((insertOneValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), ptd4, 1);
434  sparseGrid.flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
435  }
437 
438  sparseGrid.deviceToHost(); //just sync masks
439 
440  sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
441  // Now tag the boundaries
442  sparseGrid.tagBoundaries(ctx);
443 
444  // Get output
446  output.resize(4 * 64);
447 
448  CUDA_LAUNCH_DIM3((copyToOutputIfPadding<0>),gridSize, blockSizeInsert,sparseGrid.toKernel(), output.toKernel());
449 
450  output.template deviceToHost<0>();
451  sparseGrid.template deviceToHost<0>();
452 
453  // Compare
454  bool match = true;
455  for (size_t i = 0; i < output.size(); i++)
456  {
457  auto coord = sparseGrid.getCoord(i);
458  auto expectedValue =
459  (
460  i == 54 || i == 55 || i == 62 // (6,6), (7,6), (6,7)
461  || i == 134 || i == 142 || i == 143 // (6,8), (6,9), (7,9)
462  || i == 112 || i == 113 || i == 121 // (8,6), (9,6), (9,7)
463  || i == 200 || i == 193 || i == 201 // (8,9), (9,8), (9,9)
464  ) ? 1 : 0;
465 
466  match &= output.template get<0>(i) == expectedValue;
467  }
468 
469  BOOST_REQUIRE_EQUAL(match, true);
470 }
471 
472 BOOST_AUTO_TEST_CASE(testStencilHeat)
473 {
474  constexpr unsigned int dim = 2;
475  constexpr unsigned int blockEdgeSize = 8;
476  typedef aggregate<float,float> AggregateT;
477 
478  dim3 gridSize(2, 2);
479  dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
480 
481  grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
482  SparseGridGpu<dim, AggregateT, blockEdgeSize, 64> sparseGrid(blockGeometry);
483  mgpu::ofp_context_t ctx;
484  sparseGrid.template setBackgroundValue<0>(0);
485 
486  // Insert values on the grid
487  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
488  CUDA_LAUNCH_DIM3((insertConstantValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), 0);
489  sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
490 
491  sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
492  sparseGrid.tagBoundaries(ctx);
493 
494  sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
495 
496  // Now apply the laplacian operator
497  const unsigned int maxIter = 1000;
498 // const unsigned int maxIter = 100;
499  for (unsigned int iter=0; iter<maxIter; ++iter)
500  {
501  sparseGrid.applyStencils<HeatStencil<dim, 0, 1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
502  sparseGrid.applyStencils<HeatStencil<dim, 1, 0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.01);
503  }
504 
505  sparseGrid.template deviceToHost<0,1>();
506 
507  // Compare
508  bool match = true;
509  for (size_t i = 0; i < 64*4 ; i++)
510  {
511  auto coord = sparseGrid.getCoord(i);
512  float expectedValue = 10.0 * coord.get(0) / (gridSize.x * blockEdgeSize - 1);
513 
514  match &= fabs(sparseGrid.template get<1>(coord) - expectedValue) < 1e-2;
515 
516  }
517 
518  BOOST_REQUIRE_EQUAL(match, true);
519 }
520 
521 BOOST_AUTO_TEST_CASE(testStencil_lap_simplified)
522 {
523  constexpr unsigned int dim = 2;
524  constexpr unsigned int blockEdgeSize = 8;
525  typedef aggregate<float,float> AggregateT;
526 
527  dim3 gridSize(2, 2);
528  dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
529 
530  grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
531  SparseGridGpu<dim, AggregateT, blockEdgeSize, 64> sparseGrid(blockGeometry);
532  mgpu::ofp_context_t ctx;
533  sparseGrid.template setBackgroundValue<0>(0);
534 
535  // Insert values on the grid
536  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
537  CUDA_LAUNCH_DIM3((insertConstantValue<0>),gridSize, blockSizeInsert, sparseGrid.toKernel(), 0);
538  sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
539 
540  sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
541  sparseGrid.tagBoundaries(ctx);
542 
543  sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
544 
545  // Now apply the laplacian operator
546  const unsigned int maxIter = 1000;
547 // const unsigned int maxIter = 100;
548  for (unsigned int iter=0; iter<maxIter; ++iter)
549  {
550  sparseGrid.conv_cross<0, 1, 1>({0,0},{16,16},[] __device__ (float & u, cross_stencil<2,float> & cs){
551  return u + (cs.xm[0] + cs.xp[0] +
552  cs.xm[1] + cs.xp[1] - 4.0*u)*0.1;
553  });
554  sparseGrid.conv_cross<1, 0, 1>({0,0},{16,16},[] __device__ (float & u, cross_stencil<2,float> & cs){
555  return u + (cs.xm[0] + cs.xp[0] +
556  cs.xm[1] + cs.xp[1] - 4.0*u)*0.1;
557  });
558  }
559 
560  sparseGrid.deviceToHost<0,1>();
561 
562  // Get output
563  sparseGrid.template deviceToHost<0>();
564 
565  // Compare
566  bool match = true;
567  for (size_t i = 0; i < 64*4; i++)
568  {
569  auto coord = sparseGrid.getCoord(i);
570  float expectedValue = 10.0 * coord.get(0) / (gridSize.x * blockEdgeSize - 1);
571 
572  match &= fabs(sparseGrid.template get<0>(coord) - expectedValue) < 1e-2;
573  }
574 
575  BOOST_REQUIRE_EQUAL(match, true);
576 }
577 
578 BOOST_AUTO_TEST_CASE(testStencil_lap_no_cross_simplified)
579 {
580  constexpr unsigned int dim = 2;
581  constexpr unsigned int blockEdgeSize = 8;
582  typedef aggregate<float,float> AggregateT;
583 
584  dim3 gridSize(2, 2);
585  dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
586 
587  grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
588  SparseGridGpu<dim, AggregateT, blockEdgeSize, 64> sparseGrid(blockGeometry);
589  mgpu::ofp_context_t ctx;
590  sparseGrid.template setBackgroundValue<0>(0);
591 
592  // Insert values on the grid
593  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
594  CUDA_LAUNCH_DIM3((insertConstantValue<0>), gridSize, blockSizeInsert, sparseGrid.toKernel(), 0);
595  CUDA_LAUNCH_DIM3((insertConstantValue<1>), gridSize, blockSizeInsert, sparseGrid.toKernel(), 0);
596  sparseGrid.flush < smax_< 0 >, smax_< 1 >> (ctx, flush_type::FLUSH_ON_DEVICE);
597 
598  sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
599  sparseGrid.tagBoundaries(ctx);
600 
601  sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
602  sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,1,1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
603 
604  typedef typename GetCpBlockType<decltype(sparseGrid),0,1>::type CpBlockType;
605 
606  /*
607  *
608  * The initial condition is
609  *
610  * 0 Linear increase x 10
611  * * *
612  *
613  * 0 10
614  *
615  *
616  * * Linear increase x *
617  * 0 10
618  */
619 
620  // Now apply the laplacian operator
621  const unsigned int maxIter = 1000;
622 
623  for (unsigned int iter=0; iter<maxIter; ++iter)
624  {
625  sparseGrid.conv<0, 1, 1>({0,0},{16,16},[] __device__ (CpBlockType & u,int i, int j){
626  float c = u(i,j);
627  return c + (u(i-1,j) + u(i+1,j) +
628  u(i,j-1) + u(i,j+1) - 4.0*c)*0.1;
629  });
630 
631  sparseGrid.conv<1, 0, 1>({0,0},{16,16},[] __device__ (CpBlockType & u,int i, int j){
632  float c = u(i,j);
633  return c + (u(i-1,j) + u(i+1,j) +
634  u(i,j-1) + u(i,j+1) - 4.0*c)*0.1;
635  });
636  }
637 
638  sparseGrid.template deviceToHost<0>();
639 
640  // Compare
641  bool match = true;
642  for (size_t i = 0; i < 64*4; i++)
643  {
644  auto coord = sparseGrid.getCoord(i);
645  float expectedValue = 10.0 * coord.get(0) / (gridSize.x * blockEdgeSize - 1);
646 
647  match &= fabs(sparseGrid.template get<0>(coord) - expectedValue) < 1e-2;
648 
649  }
650 
651  BOOST_REQUIRE_EQUAL(match, true);
652 }
653 
654 BOOST_AUTO_TEST_CASE(testStencil_lap_no_cross_simplified2)
655 {
656  constexpr unsigned int dim = 2;
657  constexpr unsigned int blockEdgeSize = 8;
658  typedef aggregate<float,float,float,float> AggregateT;
659 
660  dim3 gridSize(2, 2);
661  dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
662 
663  grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
664  SparseGridGpu<dim, AggregateT, blockEdgeSize, 64> sparseGrid(blockGeometry);
665  mgpu::ofp_context_t ctx;
666  sparseGrid.template setBackgroundValue<0>(0);
667 
668  // Insert values on the grid
669  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
670  CUDA_LAUNCH_DIM3((insertConstantValue<0>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
671  CUDA_LAUNCH_DIM3((insertConstantValue<1>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
672  CUDA_LAUNCH_DIM3((insertConstantValue<2>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
673  CUDA_LAUNCH_DIM3((insertConstantValue<3>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
674  sparseGrid.flush < smax_< 0 >, smax_< 1 >, smax_< 2 >, smax_< 3 >> (ctx, flush_type::FLUSH_ON_DEVICE);
675 
676  sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
677  sparseGrid.tagBoundaries(ctx);
678 
679  sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
680  sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,1,1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 5.0);
681  sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,2,2>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 10.0);
682  sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,3,3>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,0.0 ,gridSize.x * blockEdgeSize, 0.0, 5.0);
683 
684  typedef typename GetCpBlockType<decltype(sparseGrid),0,1>::type CpBlockType;
685 
686  /*
687  *
688  * The initial condition is
689  *
690  * 0 Linear increase x 10
691  * * *
692  *
693  * 0 10
694  *
695  *
696  * * Linear increase x *
697  * 0 10
698  */
699 
700  // Now apply the laplacian operator
701  const unsigned int maxIter = 1000;
702 
703  for (unsigned int iter=0; iter<maxIter; ++iter)
704  {
705  sparseGrid.conv2<0,1,2,3,1>({0,0},{16,16},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j){
706  float cu = u(i,j);
707  float cv = v(i,j);
708  u_out = cu + (u(i-1,j) + u(i+1,j) +
709  u(i,j-1) + u(i,j+1) - 4.0*cu)*0.1;
710 
711  v_out = cv + (v(i-1,j) + v(i+1,j) +
712  v(i,j-1) + v(i,j+1) - 4.0*cv)*0.1;
713  });
714 
715  sparseGrid.conv2<2,3,0,1,1>({0,0},{16,16},[] __device__ (float & u_out, float & v_out ,CpBlockType & u, CpBlockType & v, int i, int j){
716  float cu = u(i,j);
717  float cv = v(i,j);
718  u_out = cu + (u(i-1,j) + u(i+1,j) +
719  u(i,j-1) + u(i,j+1) - 4.0*cu)*0.1;
720 
721  v_out = cv + (v(i-1,j) + v(i+1,j) +
722  v(i,j-1) + v(i,j+1) - 4.0*cv)*0.1;
723  });
724  }
725 
726  sparseGrid.template deviceToHost<0,1>();
727 
728  // Compare
729  bool match = true;
730  for (size_t i = 0; i < 64*4; i++)
731  {
732  auto coord = sparseGrid.getCoord(i);
733  float expectedValue = 10.0 * coord.get(0) / (gridSize.x * blockEdgeSize - 1);
734  float expectedValue2 = 5.0 * coord.get(0) / (gridSize.x * blockEdgeSize - 1);
735 
736  match &= fabs(sparseGrid.template get<0>(coord) - expectedValue) < 1e-2;
737  match &= fabs(sparseGrid.template get<1>(coord) - expectedValue2) < 1e-2;
738  }
739 
740  BOOST_REQUIRE_EQUAL(match, true);
741 }
742 
743 BOOST_AUTO_TEST_CASE(testStencil_lap_no_cross_simplified_subset)
744 {
745  constexpr unsigned int dim = 2;
746  constexpr unsigned int blockEdgeSize = 8;
747  typedef aggregate<float,float> AggregateT;
748 
749  dim3 gridSize(2, 2);
750  dim3 blockSizeInsert(blockEdgeSize, blockEdgeSize);
751 
752  grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
753  SparseGridGpu<dim, AggregateT, blockEdgeSize, 64> sparseGrid(blockGeometry);
754  mgpu::ofp_context_t ctx;
755  sparseGrid.template setBackgroundValue<0>(0);
756 
757  // Insert values on the grid
758  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeInsert);
759  CUDA_LAUNCH_DIM3((insertConstantValue<0>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
760  CUDA_LAUNCH_DIM3((insertConstantValue<1>), gridSize, blockSizeInsert,sparseGrid.toKernel(), 0);
761  sparseGrid.flush < smax_< 0 >, smax_< 1 >> (ctx, flush_type::FLUSH_ON_DEVICE);
762 
763  sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
764  sparseGrid.tagBoundaries(ctx);
765 
766  typedef typename GetCpBlockType<decltype(sparseGrid),0,1>::type CpBlockType;
767 
768  sparseGrid.conv<0, 1, 1>({3,3},{11,11},[] __device__ (CpBlockType & u,int i, int j){
769  return 5.0;
770  });
771 
772 
773  sparseGrid.template deviceToHost<1>();
774 
775  // Compare
776  bool match = true;
777  for (size_t i = 0; i < 64*4; i++)
778  {
779  auto coord = sparseGrid.getCoord(i);
780 
781  if (coord.get(0) >= 3 && coord.get(1) >= 3 && coord.get(0) <= 11 && coord.get(1) <= 11)
782  {
783  match &= sparseGrid.template get<1>(coord) == 5.0;
784  }
785  else
786  {
787  match &= sparseGrid.template get<1>(coord) == 0.0;
788  }
789  }
790 
791  BOOST_REQUIRE_EQUAL(match, true);
792 }
793 
794 
795 
796 template<typename sparsegrid_type>
797 __global__ void sparse_grid_get_test(sparsegrid_type sparseGrid, grid_key_dx<3> key, float * data)
798 {
799  *data = sparseGrid.template get<0>(key);
800 }
801 
802 BOOST_AUTO_TEST_CASE(testFlushInsert)
803 {
804  constexpr unsigned int dim = 3;
805  constexpr unsigned int blockEdgeSize = 4;
806  typedef aggregate<float,float> AggregateT;
807 
808 
809  size_t sz[] = {137,100,57};
810 
812  mgpu::ofp_context_t ctx;
813  sparseGrid.template setBackgroundValue<0>(0);
814 
815  sparseGrid.insertFlush<0>(grid_key_dx<3>({3,6,7})) = 2.0;
816  sparseGrid.insertFlush<0>(grid_key_dx<3>({13,16,17})) = 3.0;
817  sparseGrid.insertFlush<0>(grid_key_dx<3>({13,46,27})) = 4.0;
818  sparseGrid.insertFlush<0>(grid_key_dx<3>({36,63,11})) = 5.0;
819  sparseGrid.insertFlush<0>(grid_key_dx<3>({37,96,47})) = 6.0;
820  sparseGrid.insertFlush<0>(grid_key_dx<3>({130,56,37})) = 7.0;
821  sparseGrid.insertFlush<0>(grid_key_dx<3>({131,76,17})) = 8.0;
822  sparseGrid.insertFlush<0>(grid_key_dx<3>({36,86,27})) = 9.0;
823  sparseGrid.insertFlush<0>(grid_key_dx<3>({34,36,7})) = 10.0;
824 
826 
827  sparseGrid.insertFlush<0>(grid_key_dx<3>({4,6,7})) = 2.0;
828  sparseGrid.insertFlush<0>(grid_key_dx<3>({12,16,17})) = 3.0;
829  sparseGrid.insertFlush<0>(grid_key_dx<3>({12,46,27})) = 4.0;
830  sparseGrid.insertFlush<0>(grid_key_dx<3>({35,63,11})) = 5.0;
831  sparseGrid.insertFlush<0>(grid_key_dx<3>({38,96,47})) = 6.0;
832  sparseGrid.insertFlush<0>(grid_key_dx<3>({131,56,37})) = 7.0;
833  sparseGrid.insertFlush<0>(grid_key_dx<3>({132,76,17})) = 8.0;
834  sparseGrid.insertFlush<0>(grid_key_dx<3>({37,86,27})) = 9.0;
835  sparseGrid.insertFlush<0>(grid_key_dx<3>({35,36,7})) = 10.0;
836 
837  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({3,6,7})),2.0);
838  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({13,16,17})),3.0);
839  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({13,46,27})),4.0);
840  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({36,63,11})),5.0);
841  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({37,96,47})),6.0);
842  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({130,56,37})),7.0);
843  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({131,76,17})),8.0);
844  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({36,86,27})),9.0);
845  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({34,36,7})),10.0);
846 
847  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({4,6,7})),2.0);
848  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({12,16,17})),3.0);
849  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({12,46,27})),4.0);
850  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({35,63,11})),5.0);
851  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({38,96,47})),6.0);
852  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({131,56,37})),7.0);
853  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({132,76,17})),8.0);
854  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({37,86,27})),9.0);
855  BOOST_REQUIRE_EQUAL(sparseGrid.get<0>(grid_key_dx<3>({35,36,7})),10.0);
856 
857  sparseGrid.template hostToDevice<0>();
858 
859  // Check on device I can get information
860 
861  CudaMemory mem;
862 
863  mem.allocate(sizeof(float));
864 
865  grid_key_dx<3> key({3,6,7});
866 
867  CUDA_LAUNCH_DIM3(sparse_grid_get_test,1,1,sparseGrid.toKernel(),key,(float *)mem.getDevicePointer());
868 
869  mem.deviceToHost();
870 
871  BOOST_REQUIRE_EQUAL(*(float *)mem.getPointer(),2.0);
872 
873  grid_key_dx<3> key2({131,76,17});
874 
875  CUDA_LAUNCH_DIM3(sparse_grid_get_test,1,1,sparseGrid.toKernel(),key2,(float *)mem.getDevicePointer());
876 
877  mem.deviceToHost();
878 
879  BOOST_REQUIRE_EQUAL(*(float *)mem.getPointer(),8.0);
880 }
881 
883 {
884  float coeff[3][3][3];
885 };
886 
887 template<unsigned int dim, unsigned int p_src, unsigned int p_dst>
888 struct Conv3x3x3
889 {
890  // This is an example of a laplacian smoothing stencil to apply using the apply stencil facility of SparseGridGpu
891 
892  typedef NNFull<dim> stencil_type;
893 
894  static constexpr unsigned int supportRadius = 1;
895 
896  template<typename SparseGridT, typename DataBlockWrapperT>
897  static inline __device__ void stencil(
898  SparseGridT & sparseGrid,
899  const unsigned int dataBlockId,
901  unsigned int offset,
902  grid_key_dx<dim, int> & pointCoord,
903  DataBlockWrapperT & dataBlockLoad,
904  DataBlockWrapperT & dataBlockStore,
905  unsigned char curMask,
906  conv_coeff & cc)
907  {
908  typedef typename SparseGridT::AggregateBlockType AggregateT;
909  typedef ScalarTypeOf<AggregateT, p_src> ScalarT;
910 
911  constexpr unsigned int enlargedBlockSize = IntPow<
912  SparseGridT::getBlockEdgeSize() + 2 * supportRadius, dim>::value;
913 
914  __shared__ ScalarT enlargedBlock[enlargedBlockSize];
915 
916  sparseGrid.template loadGhostBlock<p_src>(dataBlockLoad,dataBlockIdPos,enlargedBlock);
917 
918  __syncthreads();
919 
920  if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
921  {
922  const auto coord = sparseGrid.getCoordInEnlargedBlock(offset);
923  const auto linId = sparseGrid.getLinIdInEnlargedBlock(offset);
924  ScalarT tot = 0.0;
925  for (int i = 0; i < dim; ++i)
926  {
927  for (int j = 0; j < dim; ++j)
928  {
929  for (int k = 0; k < dim; ++k)
930  {
932 
933  key.set_d(0,i-1);
934  key.set_d(1,j-1);
935  key.set_d(2,k-1);
936 
937  auto nPlusId = sparseGrid.getNeighbourLinIdInEnlargedBlock(coord, key);
938  tot += enlargedBlock[nPlusId] * cc.coeff[i][j][k];
939  }
940  }
941  }
942 
943  dataBlockStore.template get<p_dst>()[offset] = tot;
944  }
945  }
946 
947  template <typename SparseGridT, typename CtxT>
948  static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
949  {
950  sparseGrid.template flush <smax_<0>> (ctx, flush_type::FLUSH_ON_DEVICE);
951  }
952 };
953 
954 template<unsigned int dim, unsigned int p_src, unsigned int p_dst>
956 {
957  // This is an example of a laplacian smoothing stencil to apply using the apply stencil facility of SparseGridGpu
958 
959  typedef NNFull<dim> stencil_type;
960 
961  static constexpr unsigned int supportRadius = 1;
962 
963  template<typename SparseGridT, typename DataBlockWrapperT>
964  static inline __device__ void stencil(
965  SparseGridT & sparseGrid,
966  const unsigned int dataBlockId,
968  unsigned int offset,
969  grid_key_dx<dim, int> & pointCoord,
970  DataBlockWrapperT & dataBlockLoad,
971  DataBlockWrapperT & dataBlockStore,
972  unsigned char curMask,
973  conv_coeff & cc)
974  {
975  typedef typename SparseGridT::AggregateBlockType AggregateT;
976  typedef ScalarTypeOf<AggregateT, p_src> ScalarT;
977 
978  if ((curMask & mask_sparse::EXIST) && !(curMask & mask_sparse::PADDING))
979  {
980  ScalarT tot = 0.0;
981  for (int i = 0; i < dim; ++i)
982  {
983  for (int j = 0; j < dim; ++j)
984  {
985  for (int k = 0; k < dim; ++k)
986  {
987 
989 
990  key.set_d(0,k-1);
991  key.set_d(1,j-1);
992  key.set_d(2,i-1);
993 
994  block_offset<int> pos = sparseGrid.template getNNPoint<stencil_type>(dataBlockIdPos, offset, key);
995 
996  tot += sparseGrid.template get<p_src>(pos) * cc.coeff[i][j][k];
997  }
998  }
999  }
1000 
1001  dataBlockStore.template get<p_dst>()[offset] = tot;
1002  }
1003  }
1004 
1005  template <typename SparseGridT, typename CtxT>
1006  static inline void __host__ flush(SparseGridT & sparseGrid, CtxT & ctx)
1007  {
1008  sparseGrid.template flush <smax_<0>> (ctx, flush_type::FLUSH_ON_DEVICE);
1009  }
1010 };
1011 
1012 template<typename SparseGridZ>
1013 void test_convolution_3x3x3()
1014 {
1015  size_t sz[] = {1000,1000,1000};
1016 
1017  SparseGridZ sparseGrid(sz);
1018  mgpu::ofp_context_t ctx;
1019  sparseGrid.template setBackgroundValue<0>(0);
1020 
1021  // now create 3 3D sphere
1022 
1023  grid_key_dx<3,int> start({256,256,256});
1024 
1025  dim3 gridSize(32,32,32);
1026 
1027  // Insert values on the grid
1028  sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1029  CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1030  gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1031  sparseGrid.toKernel(), start,64, 56, 1);
1032 
1033  sparseGrid.template flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1034 
1035  sparseGrid.template findNeighbours<NNFull<3>>(); // Pre-compute the neighbours pos for each block!
1036 
1037  sparseGrid.template setNNType<NNFull<3>>();
1038  sparseGrid.template tagBoundaries<NNFull<3>>(ctx);
1039 
1040  conv_coeff cc;
1041 
1042  for (int i = 0 ; i < 3 ; i++)
1043  {
1044  for (int j = 0 ; j < 3 ; j++)
1045  {
1046  for (int k = 0 ; k < 3 ; k++)
1047  {
1048  cc.coeff[k][j][i] = 1.0;
1049  }
1050  }
1051  }
1052 
1053 
1054 
1055  sparseGrid.template applyStencils<Conv3x3x3<3,0,1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,cc);
1056  sparseGrid.template deviceToHost<0,1>();
1057 
1058  auto & bm = sparseGrid.private_get_blockMap();
1059  auto & dataVector = bm.getDataBuffer();
1060 
1061  bool match = true;
1062 
1063  BOOST_REQUIRE(dataVector.size() != 0);
1064 
1065  for (size_t i = 0 ; i < dataVector.size() ; i++)
1066  {
1067  for (size_t j = 0 ; j < 64 ; j++)
1068  {
1069  if (dataVector.template get<2>(i)[j] == 1)
1070  {
1071  match &= dataVector.template get<0>(i)[j]*27 == dataVector.template get<1>(i)[j];
1072  }
1073  }
1074  }
1075 
1076  BOOST_REQUIRE_EQUAL(match,true);
1077 }
1078 
1079 template<typename SparseGridZ>
1080 void test_convolution_3x3x3_no_shared()
1081 {
1082  size_t sz[] = {1000,1000,1000};
1083 
1084  SparseGridZ sparseGrid(sz);
1085  mgpu::ofp_context_t ctx;
1086  sparseGrid.template setBackgroundValue<0>(0);
1087 
1088  // now create 3 3D sphere
1089 
1090  grid_key_dx<3,int> start({256,256,256});
1091 
1092  dim3 gridSize(32,32,32);
1093 
1094  // Insert values on the grid
1095  sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1096  CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1097  gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1098  sparseGrid.toKernel(), start,64, 56, 1);
1099 
1100  sparseGrid.template flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1101 
1102  sparseGrid.template findNeighbours<NNFull<3>>(); // Pre-compute the neighbours pos for each block!
1103 
1104  sparseGrid.template setNNType<NNFull<SparseGridZ::dims>>();
1105  sparseGrid.template tagBoundaries<NNFull<3>>(ctx,No_check(),tag_boundaries::CALCULATE_EXISTING_POINTS);
1106 
1107  conv_coeff cc;
1108 
1109  for (int i = 0 ; i < 3 ; i++)
1110  {
1111  for (int j = 0 ; j < 3 ; j++)
1112  {
1113  for (int k = 0 ; k < 3 ; k++)
1114  {
1115  cc.coeff[k][j][i] = 1.0;
1116  }
1117  }
1118  }
1119 
1120  sparseGrid.template applyStencils<Conv3x3x3_noshared<SparseGridZ::dims,0,1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE_NO_SHARED,cc);
1121 
1122  sparseGrid.template deviceToHost<0,1>();
1123 
1124  auto & bm = sparseGrid.private_get_blockMap();
1125  auto & dataVector = bm.getDataBuffer();
1126 
1127  bool match = true;
1128 
1129  for (size_t i = 0 ; i < dataVector.size() ; i++)
1130  {
1131  for (size_t j = 0 ; j < 64 ; j++)
1132  {
1133  if (dataVector.template get<2>(i)[j] == 1)
1134  {
1135  match &= dataVector.template get<0>(i)[j]*27 == dataVector.template get<1>(i)[j];
1136  }
1137  }
1138  }
1139 
1140  BOOST_REQUIRE_EQUAL(match,true);
1141 }
1142 
1143 BOOST_AUTO_TEST_CASE(test3x3x3convolution_no_shared)
1144 {
1145  constexpr unsigned int dim = 3;
1146  constexpr unsigned int blockEdgeSize = 4;
1147  typedef aggregate<float,float> AggregateT;
1148 
1149  test_convolution_3x3x3_no_shared<SparseGridGpu<dim, AggregateT, blockEdgeSize, 64, long int>>();
1150 }
1151 
1152 BOOST_AUTO_TEST_CASE(test3x3x3convolution_no_shared_z_morton)
1153 {
1154  constexpr unsigned int dim = 3;
1155  constexpr unsigned int blockEdgeSize = 4;
1156  typedef aggregate<float,float> AggregateT;
1157 
1158  test_convolution_3x3x3_no_shared<SparseGridGpu_z<dim, AggregateT, blockEdgeSize, 64, long int>>();
1159 }
1160 
1161 BOOST_AUTO_TEST_CASE(test3x3x3convolution)
1162 {
1163  constexpr unsigned int dim = 3;
1164  constexpr unsigned int blockEdgeSize = 4;
1165  typedef aggregate<float,float> AggregateT;
1166 
1167  test_convolution_3x3x3<SparseGridGpu<dim, AggregateT, blockEdgeSize, 64, long int>>();
1168 }
1169 
1170 BOOST_AUTO_TEST_CASE(test3x3x3convolution_morton_z)
1171 {
1172  constexpr unsigned int dim = 3;
1173  constexpr unsigned int blockEdgeSize = 4;
1174  typedef aggregate<float,float> AggregateT;
1175 
1176  test_convolution_3x3x3<SparseGridGpu_z<dim, AggregateT, blockEdgeSize, 64, long int>>();
1177 }
1178 
1179 BOOST_AUTO_TEST_CASE(test_sparse_grid_iterator_sub_host)
1180 {
1181  constexpr unsigned int dim = 3;
1182  constexpr unsigned int blockEdgeSize = 4;
1183  typedef aggregate<float, float> AggregateT;
1184 
1185  size_t sz[3] = {768,768,768};
1186  dim3 gridSize(32,32,32);
1187 
1188  grid_smb<dim, blockEdgeSize> blockGeometry(sz);
1190  mgpu::ofp_context_t ctx;
1191  sparseGrid.template setBackgroundValue<0>(0);
1192 
1194  // Sphere 1
1195  grid_key_dx<3,int> start1({256,256,256});
1196  sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1197  CUDA_LAUNCH_DIM3((insertSphere3D<0>),
1198  gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
1199  sparseGrid.toKernel(), start1, 32, 0, 1);
1200 
1201  sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1202 
1203  sparseGrid.template deviceToHost<0>();
1204 
1205  bool match = true;
1206 
1207  int count = 0;
1208 
1209  grid_key_dx<3> start({303,303,303});
1210  grid_key_dx<3> stop({337,337,337});
1211 
1212  auto it = sparseGrid.getIterator(start,stop);
1213 
1214  while (it.isNext())
1215  {
1216  auto key = it.get();
1217 
1218  match &= sparseGrid.template get<0>(key) == 1.0;
1219 
1220  sparseGrid.template get<0>(key) = 5.0;
1221 
1222  count++;
1223 
1224  ++it;
1225  }
1226 
1227  BOOST_REQUIRE_EQUAL(match,true);
1228  BOOST_REQUIRE_EQUAL(count,42875);
1229 }
1230 
1231 
1232 
1233 
1234 BOOST_AUTO_TEST_CASE(test_sparse_grid_iterator_host)
1235 {
1236  constexpr unsigned int dim = 3;
1237  constexpr unsigned int blockEdgeSize = 4;
1238  typedef aggregate<float, float> AggregateT;
1239 
1240  size_t sz[3] = {512,512,512};
1241  dim3 gridSize(32,32,32);
1242 
1243  grid_smb<dim, blockEdgeSize> blockGeometry(sz);
1245  mgpu::ofp_context_t ctx;
1246  sparseGrid.template setBackgroundValue<0>(0);
1247 
1249  // Sphere 1
1250  grid_key_dx<3,int> start1({256,256,256});
1251  sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1252  CUDA_LAUNCH_DIM3((insertSphere3D<0>),
1253  gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
1254  sparseGrid.toKernel(), start1, 64, 32, 1);
1255 
1256  sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1257 
1258  sparseGrid.template deviceToHost<0>();
1259 
1260  bool match = true;
1261 
1262  int count = 0;
1263 
1264  auto it = sparseGrid.getIterator();
1265 
1266  while (it.isNext())
1267  {
1268  auto key = it.get();
1269 
1270  match &= sparseGrid.template get<0>(key) == 1.0;
1271  //unsigned char bl = sparseGrid.template get<2>(key);
1272 
1273  count++;
1274 
1275  ++it;
1276  }
1277 
1278  BOOST_REQUIRE(sparseGrid.countExistingElements() != 0);
1279 
1280  BOOST_REQUIRE_EQUAL(sparseGrid.countExistingElements(),count);
1281  BOOST_REQUIRE_EQUAL(match,true);
1282 }
1283 
1284 BOOST_AUTO_TEST_CASE(test_pack_request)
1285 {
1286  size_t sz[] = {1000,1000,1000};
1287 
1288  constexpr int blockEdgeSize = 4;
1289  constexpr int dim = 3;
1290 
1291  typedef SparseGridGpu<dim, aggregate<float>, blockEdgeSize, 64, long int> SparseGridZ;
1292 
1293  SparseGridZ sparseGrid(sz);
1294  mgpu::ofp_context_t ctx;
1295  sparseGrid.template setBackgroundValue<0>(0);
1296 
1297  // now create a 3D sphere
1298 
1299  grid_key_dx<3,int> start({256,256,256});
1300 
1301  dim3 gridSize(32,32,32);
1302 
1303  // Insert values on the grid
1304  sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1305  CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1306  gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1307  sparseGrid.toKernel(), start,64, 56, 1);
1308 
1309  sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1310  sparseGrid.template deviceToHost<0>();
1311 
1312  size_t cnt = sparseGrid.countExistingElements();
1313 
1314  size_t req = 0;
1315  sparseGrid.packRequest<0>(req,ctx);
1316 
1317  size_t tot = 8 + // how many chunks
1318  sparseGrid.private_get_index_array().size()*16 + 8 +// store the scan + chunk indexes
1319  cnt*(sizeof(float) + 2 + 1); // how much data
1320 
1321  BOOST_REQUIRE_EQUAL(req,tot);
1322 }
1323 
1324 BOOST_AUTO_TEST_CASE(test_MergeIndexMap)
1325 {
1326  size_t sz[] = {1000,1000,1000};
1327 
1328  constexpr int blockEdgeSize = 4;
1329  constexpr int dim = 3;
1330 
1331  typedef SparseGridGpu<dim, aggregate<float>, blockEdgeSize, 64, long int> SparseGridZ;
1332 
1333  SparseGridZ sparseGrid(sz);
1334  mgpu::ofp_context_t ctx;
1335  sparseGrid.template setBackgroundValue<0>(0);
1336 
1337  // now create a 3D sphere
1338 
1339  grid_key_dx<3,int> start({256,256,256});
1340 
1341  dim3 gridSize(32,32,32);
1342 
1343  // Insert values on the grid
1344  sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1345  CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1346  gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1347  sparseGrid.toKernel(), start,64, 56, 1);
1348 
1349  size_t sz_b = sparseGrid.private_get_index_array().size();
1350 
1351  sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1352 
1353  auto & m_map = sparseGrid.getMergeIndexMapVector();
1354  auto & a_map = sparseGrid.getMappingVector();
1355 
1356  m_map.template deviceToHost<0>();
1357  a_map.template deviceToHost<0>();
1358 
1359  bool match = true;
1360 
1361  auto & indexes = sparseGrid.private_get_index_array();
1362  indexes.template deviceToHost<0>();
1363  auto & a_indexes = sparseGrid.private_get_add_index_array();
1364  a_indexes.template deviceToHost<0>();
1365  auto & m_out = sparseGrid.getSegmentToOutMap();
1366  m_out.template deviceToHost<0>();
1367 
1368  for (int i = 0 ; i < m_map.size() ; i++)
1369  {
1370  if (m_map.template get<0>(i) >= sz_b)
1371  {
1372  int c = a_map.template get<0>(m_map.template get<0>(i) - sz_b);
1373  int ci = m_out.template get<0>(i);
1374 
1375  match &= (a_indexes.template get<0>(c) == indexes.template get<0>(ci));
1376  }
1377  }
1378 
1379  BOOST_REQUIRE_EQUAL(match,true);
1380 }
1381 
1382 BOOST_AUTO_TEST_CASE(test_pack_request_with_iterator)
1383 {
1384  size_t sz[] = {1000,1000,1000};
1385 
1386  constexpr int blockEdgeSize = 4;
1387  constexpr int dim = 3;
1388 
1389  typedef SparseGridGpu<dim, aggregate<float>, blockEdgeSize, 64, long int> SparseGridZ;
1390 
1391  SparseGridZ sparseGrid(sz);
1392  mgpu::ofp_context_t ctx;
1393  sparseGrid.template setBackgroundValue<0>(0);
1394 
1395  // now create a 3D sphere
1396 
1397  grid_key_dx<3,int> start({256,256,256});
1398 
1399  dim3 gridSize(32,32,32);
1400 
1401  // Insert values on the grid
1402  sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1403  CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1404  gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1405  sparseGrid.toKernel(), start,64, 56, 1);
1406 
1407  sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1408 
1409  size_t req = 0;
1410  sparseGrid.packReset();
1411 
1412  {
1413  grid_key_dx<3> start1({0,0,0});
1414  grid_key_dx<3> stop1({321,999,999});
1415 
1416  grid_key_dx<3> start2({322,0,0});
1417  grid_key_dx<3> stop2({999,999,999});
1418 
1419  auto it1 = sparseGrid.getIterator(start1,stop1);
1420  sparseGrid.template packRequest<0>(it1,req);
1421 
1422  auto it2 = sparseGrid.getIterator(start2,stop2);
1423  sparseGrid.template packRequest<0>(it2,req);
1424 
1425  sparseGrid.template packCalculate<0>(req,ctx);
1426  }
1427 
1428  sparseGrid.template deviceToHost<0>();
1429 
1430 
1431  size_t cnt = sparseGrid.countExistingElements();
1432 
1433  // The 2 come from the iterators
1434 
1435  size_t tot = 2*8 + // 2 numbers of chunks
1436  2*2*dim*4 + // 2 * 2 * dimension * integer
1437  align_number(8,4685*8) + align_number(8,4475*8) + // 4685 chunks + 4475 chunks
1438  align_number(8,4686*4) + align_number(8,4476*4) + // store the scan
1439  align_number(8,185807*4) + align_number(8,176787*4) + // store the points
1440  align_number(8,185807*2) + align_number(8,176787*2) + // store offsets
1441  align_number(8,185807*1) + align_number(8,176787*1); // store the mask
1442 
1443  BOOST_REQUIRE_EQUAL(req,tot);
1444 
1446 
1447  req = 0;
1448  sparseGrid.packReset();
1449 
1450  {
1451  grid_key_dx<3> start1({0,0,0});
1452  grid_key_dx<3> stop1({999,999,999});
1453 
1454  auto it1 = sparseGrid.getIterator(start1,stop1);
1455  sparseGrid.template packRequest<0>(it1,req);
1456 
1457  auto it2 = sparseGrid.getIterator(start1,stop1);
1458  sparseGrid.template packRequest<0>(it2,req);
1459 
1460  sparseGrid.template packCalculate<0>(req,ctx);
1461  }
1462 
1463 
1464  tot = 2*8 + // 2 numbers of chunks
1465  2*2*dim*4 + // 2 * 2 * dimension * integer
1466  2*align_number(8,sparseGrid.private_get_index_array().size()*8) + //
1467  2*align_number(8,(sparseGrid.private_get_index_array().size()+1)*4) + // store the scan
1468  2*align_number(8,cnt*4) + // store the points
1469  2*align_number(8,cnt*2) + // store offsets
1470  2*align_number(8,cnt*1); // store masks
1471 
1472  BOOST_REQUIRE_EQUAL(req,tot);
1473 }
1474 
1475 BOOST_AUTO_TEST_CASE(sparsegridgpu_remove_test)
1476 {
1477  size_t sz[] = {1000,1000,1000};
1478 
1479  constexpr int blockEdgeSize = 4;
1480  constexpr int dim = 3;
1481 
1482  typedef SparseGridGpu<dim, aggregate<float>, blockEdgeSize, 64, long int> SparseGridZ;
1483 
1484  SparseGridZ sparseGrid(sz);
1485  mgpu::ofp_context_t ctx;
1486  sparseGrid.template setBackgroundValue<0>(0);
1487 
1488  // now create a 3D sphere
1489 
1490  grid_key_dx<3,int> start({256,256,256});
1491 
1492  dim3 gridSize(32,32,32);
1493 
1494  // Insert values on the grid
1495  sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1496  CUDA_LAUNCH_DIM3((insertSphere3D_radius<0>),
1497  gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1498  sparseGrid.toKernel(), start,64, 56, 1);
1499 
1500  sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1501 
1502  // remove the center
1503 
1504  Box<3,unsigned int> remove_section1({310,0,0},{330,999,999});
1505  Box<3,unsigned int> remove_section2({0,310,0},{999,330,999});
1506  Box<3,unsigned int> remove_section3({0,0,310},{999,999,330});
1507 
1508  sparseGrid.remove(remove_section1);
1509  sparseGrid.remove(remove_section2);
1510  sparseGrid.remove(remove_section3);
1511 
1512  sparseGrid.removeAddUnpackFinalize<>(ctx,0);
1513 
1514  sparseGrid.deviceToHost<0>();
1515 
1516  // Check we have the sphere points with the exception of the box
1517 
1518  auto it = sparseGrid.getIterator();
1519 
1520  bool match = true;
1521 
1522  while (it.isNext())
1523  {
1524  auto p = it.get();
1525 
1526  Point<3,size_t> pt = p.toPoint();
1527 
1528  // Calculate redius
1529  float radius = sqrt((pt.get(0) - 320)*(pt.get(0) - 320) +
1530  (pt.get(1) - 320)*(pt.get(1) - 320) +
1531  (pt.get(2) - 320)*(pt.get(2) - 320));
1532 
1533 
1534  if (radius < 55.99 || radius > 64.01)
1535  {match &= false;}
1536 
1537  if (remove_section1.isInside(pt) == true)
1538  {match &= false;}
1539 
1540  if (remove_section2.isInside(pt) == true)
1541  {match &= false;}
1542 
1543  if (remove_section3.isInside(pt) == true)
1544  {match &= false;}
1545 
1546  ++it;
1547  }
1548 
1549  BOOST_REQUIRE_EQUAL(match,true);
1550 }
1551 
1552 template<typename SG_type>
1553 void pack_unpack_test(SG_type & sparseGridDst, SG_type & sparseGridSrc,
1554  Box<3,size_t> & box1_dst,
1555  Box<3,size_t> & box2_dst,
1556  Box<3,size_t> & box3_dst,
1557  Box<3,size_t> & box4_dst,
1558  mgpu::ofp_context_t & ctx,
1559  bool test_pack)
1560 {
1561  Box<3,size_t> box1_src({256,256,256},{273,390,390});
1562  Box<3,size_t> box2_src({320,256,256},{337,390,390});
1563 
1564  // And two vertical sections
1565 
1566  Box<3,size_t> box3_src({256,256,256},{273,320,390});
1567  Box<3,size_t> box4_src({320,256,256},{337,320,390});
1568 
1569  // Now we calculate the memory required to pack
1570 
1571  sparseGridSrc.packReset();
1572 
1573  size_t req = 0;
1574  auto sub_it = sparseGridSrc.getIterator(box1_src.getKP1(),box1_src.getKP2());
1575  sparseGridSrc.template packRequest<0,1>(sub_it,req);
1576 
1577  sub_it = sparseGridSrc.getIterator(box2_src.getKP1(),box2_src.getKP2());
1578  sparseGridSrc.template packRequest<0,1>(sub_it,req);
1579 
1580  sub_it = sparseGridSrc.getIterator(box3_src.getKP1(),box3_src.getKP2());
1581  sparseGridSrc.template packRequest<0,1>(sub_it,req);
1582 
1583  sub_it = sparseGridSrc.getIterator(box4_src.getKP1(),box4_src.getKP2());
1584  sparseGridSrc.template packRequest<0,1>(sub_it,req);
1585 
1586  sparseGridSrc.template packCalculate<0,1>(req,ctx);
1587 
1588  CudaMemory mem;
1589  mem.resize(req);
1590 
1591  // Create an object of preallocated memory for properties
1592  ExtPreAlloc<CudaMemory> & prAlloc_prp = *(new ExtPreAlloc<CudaMemory>(req,mem));
1593 
1594  prAlloc_prp.incRef();
1595 
1596  // Pack information
1597  Pack_stat sts;
1598 
1599  sub_it = sparseGridSrc.getIterator(box1_src.getKP1(),box1_src.getKP2());
1600  sparseGridSrc.template pack<0,1>(prAlloc_prp,sub_it,sts);
1601 
1602  sub_it = sparseGridSrc.getIterator(box2_src.getKP1(),box2_src.getKP2());
1603  sparseGridSrc.template pack<0,1>(prAlloc_prp,sub_it,sts);
1604 
1605  sub_it = sparseGridSrc.getIterator(box3_src.getKP1(),box3_src.getKP2());
1606  sparseGridSrc.template pack<0,1>(prAlloc_prp,sub_it,sts);
1607 
1608  sub_it = sparseGridSrc.getIterator(box4_src.getKP1(),box4_src.getKP2());
1609  sparseGridSrc.template pack<0,1>(prAlloc_prp,sub_it,sts);
1610 
1611 
1612  sparseGridSrc.template packFinalize<0,1>(prAlloc_prp,sts);
1613 
1614  // Now we analyze the package
1615 
1616  if (test_pack == true)
1617  {
1618  size_t ncnk = *(size_t *)mem.getPointer();
1619  BOOST_REQUIRE_EQUAL(ncnk,1107);
1620  size_t actual_offset = ncnk*sizeof(size_t) + sizeof(size_t) + 2*3*sizeof(int);
1621  mem.deviceToHost(actual_offset + ncnk*sizeof(unsigned int),actual_offset + ncnk*sizeof(unsigned int) + sizeof(unsigned int));
1622  unsigned int n_pnt = *(unsigned int *)((unsigned char *)mem.getPointer() + actual_offset + ncnk*sizeof(unsigned int));
1623  BOOST_REQUIRE_EQUAL(n_pnt,41003);
1624 
1625  actual_offset += align_number(sizeof(size_t),(ncnk+1)*sizeof(unsigned int));
1626  actual_offset += align_number(sizeof(size_t),n_pnt*(16));
1627  actual_offset += align_number(sizeof(size_t),n_pnt*sizeof(short int));
1628  actual_offset += align_number(sizeof(size_t),n_pnt*sizeof(unsigned char));
1629 
1630  ncnk = *(size_t *)((unsigned char *)mem.getPointer() + actual_offset);
1631  BOOST_REQUIRE_EQUAL(ncnk,1420);
1632  actual_offset += ncnk*sizeof(size_t) + sizeof(size_t) + 2*3*sizeof(int);
1633  mem.deviceToHost(actual_offset + ncnk*sizeof(unsigned int),actual_offset + ncnk*sizeof(unsigned int) + sizeof(unsigned int));
1634  n_pnt = *(unsigned int *)((unsigned char *)mem.getPointer() + actual_offset + ncnk*sizeof(unsigned int));
1635  BOOST_REQUIRE_EQUAL(n_pnt,54276);
1636 
1637  actual_offset += align_number(sizeof(size_t),(ncnk+1)*sizeof(unsigned int));
1638  actual_offset += align_number(sizeof(size_t),n_pnt*(16));
1639  actual_offset += align_number(sizeof(size_t),n_pnt*sizeof(short int));
1640  actual_offset += align_number(sizeof(size_t),n_pnt*sizeof(unsigned char));
1641 
1642  ncnk = *(size_t *)((unsigned char *)mem.getPointer() + actual_offset);
1643  BOOST_REQUIRE_EQUAL(ncnk,610);
1644  actual_offset += ncnk*sizeof(size_t) + sizeof(size_t) + 2*3*sizeof(int);
1645  mem.deviceToHost(actual_offset + ncnk*sizeof(unsigned int),actual_offset + ncnk*sizeof(unsigned int) + sizeof(unsigned int));
1646  n_pnt = *(unsigned int *)((unsigned char *)mem.getPointer() + actual_offset + ncnk*sizeof(unsigned int));
1647  BOOST_REQUIRE_EQUAL(n_pnt,20828);
1648 
1649  actual_offset += align_number(sizeof(size_t),(ncnk+1)*sizeof(unsigned int));
1650  actual_offset += align_number(sizeof(size_t),n_pnt*(16));
1651  actual_offset += align_number(sizeof(size_t),n_pnt*sizeof(short int));
1652  actual_offset += align_number(sizeof(size_t),n_pnt*sizeof(unsigned char));
1653 
1654  ncnk = *(size_t *)((unsigned char *)mem.getPointer() + actual_offset);
1655  BOOST_REQUIRE_EQUAL(ncnk,739);
1656  actual_offset += ncnk*sizeof(size_t) + sizeof(size_t) + 2*3*sizeof(int);
1657  mem.deviceToHost(actual_offset + ncnk*sizeof(unsigned int),actual_offset + ncnk*sizeof(unsigned int) + sizeof(unsigned int));
1658  n_pnt = *(unsigned int *)((unsigned char *)mem.getPointer() + actual_offset + ncnk*sizeof(unsigned int));
1659  BOOST_REQUIRE_EQUAL(n_pnt,27283);
1660  }
1661 
1662  prAlloc_prp.reset();
1663 
1664  Unpack_stat ps;
1665 
1666  sparseGridDst.removeAddUnpackReset();
1667 
1668  // sub-grid where to unpack
1669  auto sub2 = sparseGridDst.getIterator(box1_dst.getKP1(),box1_dst.getKP2());
1670  sparseGridDst.remove(box1_dst);
1671  sparseGridDst.template unpack<0,1>(prAlloc_prp,sub2,ps,ctx);
1672 
1673  sub2 = sparseGridDst.getIterator(box2_dst.getKP1(),box2_dst.getKP2());
1674  sparseGridDst.remove(box2_dst);
1675  sparseGridDst.template unpack<0,1>(prAlloc_prp,sub2,ps,ctx);
1676 
1677  sub2 = sparseGridDst.getIterator(box3_dst.getKP1(),box3_dst.getKP2());
1678  sparseGridDst.remove(box3_dst);
1679  sparseGridDst.template unpack<0,1>(prAlloc_prp,sub2,ps,ctx);
1680 
1681  sub2 = sparseGridDst.getIterator(box4_dst.getKP1(),box4_dst.getKP2());
1682  sparseGridDst.remove(box4_dst);
1683  sparseGridDst.template unpack<0,1>(prAlloc_prp,sub2,ps,ctx);
1684 
1685  sparseGridDst.template removeAddUnpackFinalize<0,1>(ctx,0);
1686 
1687  sparseGridDst.template deviceToHost<0,1>();
1688 }
1689 
1690 BOOST_AUTO_TEST_CASE(sparsegridgpu_pack_unpack)
1691 {
1692  size_t sz[] = {1000,1000,1000};
1693 
1694  constexpr int blockEdgeSize = 4;
1695  constexpr int dim = 3;
1696 
1697  Box<3,size_t> box1_dst({256,256,256},{273,390,390});
1698  Box<3,size_t> box2_dst({274,256,256},{291,390,390});
1699 
1700  Box<3,size_t> box3_dst({300,256,256},{317,320,390});
1701  Box<3,size_t> box4_dst({320,256,256},{337,320,390});
1702 
1703  typedef SparseGridGpu<dim, aggregate<float,float[3]>, blockEdgeSize, 64, long int> SparseGridZ;
1704 
1705  SparseGridZ sparseGridSrc(sz);
1706  SparseGridZ sparseGridDst(sz);
1707  mgpu::ofp_context_t ctx;
1708  sparseGridSrc.template setBackgroundValue<0>(0);
1709  sparseGridDst.template setBackgroundValue<0>(0);
1710 
1711  // now create a 3D sphere
1712 
1713  grid_key_dx<3,int> start({256,256,256});
1714 
1715  dim3 gridSize(32,32,32);
1716 
1717  // Insert values on the grid
1718  sparseGridSrc.setGPUInsertBuffer(gridSize,dim3(1));
1719  CUDA_LAUNCH_DIM3((insertSphere3D_radiusV<0>),
1720  gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1721  sparseGridSrc.toKernel(), start,64, 56, 1);
1722 
1723  sparseGridSrc.flush < smax_< 0 >, smax_<1> > (ctx, flush_type::FLUSH_ON_DEVICE);
1724 
1725  // Now we pack two vertical sections
1726 
1727  pack_unpack_test(sparseGridDst,sparseGridSrc,
1728  box1_dst,box2_dst,
1729  box3_dst,box4_dst,
1730  ctx,true);
1731 
1732  sparseGridDst.template deviceToHost<0,1>();
1733 
1734  int cnt1 = 0;
1735  int cnt2 = 0;
1736  int cnt3 = 0;
1737  int cnt4 = 0;
1738 
1739  auto it = sparseGridDst.getIterator();
1740 
1741  bool match = true;
1742 
1743  while (it.isNext())
1744  {
1745  auto p = it.get();
1746 
1747  auto pt = p.toPoint();
1748 
1749  if (box1_dst.isInside(pt) == true)
1750  {
1751  ++cnt1;
1752 
1753  const long int x = (long int)pt.get(0) - (start.get(0) + gridSize.x / 2 * blockEdgeSize);
1754  const long int y = (long int)pt.get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1755  const long int z = (long int)pt.get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1756 
1757  float radius = sqrt((float) (x*x + y*y + z*z));
1758 
1759  bool is_active = radius < 64 && radius > 56;
1760 
1761  if (is_active == true)
1762  {match &= true;}
1763  else
1764  {match &= false;}
1765  }
1766  else if (box2_dst.isInside(pt) == true)
1767  {
1768  ++cnt2;
1769 
1770  const long int x = (long int)pt.get(0) - (start.get(0) - 46 + gridSize.x / 2 * blockEdgeSize);
1771  const long int y = (long int)pt.get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1772  const long int z = (long int)pt.get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1773 
1774  float radius = sqrt((float) (x*x + y*y + z*z));
1775 
1776  bool is_active = radius < 64 && radius > 56;
1777 
1778  if (is_active == true)
1779  {match &= true;}
1780  else
1781  {match &= false;}
1782  }
1783  else if (box3_dst.isInside(pt) == true)
1784  {
1785  ++cnt3;
1786 
1787  const long int x = (long int)pt.get(0) - (start.get(0) + 44 + gridSize.x / 2 * blockEdgeSize);
1788  const long int y = (long int)pt.get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1789  const long int z = (long int)pt.get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1790 
1791  float radius = sqrt((float) (x*x + y*y + z*z));
1792 
1793  bool is_active = radius < 64 && radius > 56;
1794 
1795  if (is_active == true)
1796  {match &= true;}
1797  else
1798  {match &= false;}
1799  }
1800  else if (box4_dst.isInside(pt) == true)
1801  {
1802  ++cnt4;
1803 
1804  const long int x = (long int)pt.get(0) - (start.get(0) + gridSize.x / 2 * blockEdgeSize);
1805  const long int y = (long int)pt.get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1806  const long int z = (long int)pt.get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1807 
1808  float radius = sqrt((float) (x*x + y*y + z*z));
1809 
1810  bool is_active = radius < 64 && radius > 56;
1811 
1812  if (is_active == true)
1813  {match &= true;}
1814  else
1815  {match &= false;}
1816  }
1817 
1818  ++it;
1819  }
1820 
1821  BOOST_REQUIRE_EQUAL(match,true);
1822  BOOST_REQUIRE_EQUAL(cnt1,41003);
1823  BOOST_REQUIRE_EQUAL(cnt2,54276);
1824  BOOST_REQUIRE_EQUAL(cnt3,20828);
1825  BOOST_REQUIRE_EQUAL(cnt4,27283);
1826 
1827  // Now we remove even points
1828 
1829  // Insert values on the grid
1830  sparseGridSrc.setGPUInsertBuffer(gridSize,dim3(1));
1831  CUDA_LAUNCH_DIM3((removeSphere3D_even_radiusV<0>),
1832  gridSize, dim3(SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_*SparseGridZ::blockEdgeSize_,1,1),
1833  sparseGridSrc.toKernel(), start,64, 56, 1);
1834 
1835  pack_unpack_test(sparseGridDst,sparseGridSrc,
1836  box1_dst,box2_dst,
1837  box3_dst,box4_dst,
1838  ctx,false);
1839 
1840  sparseGridDst.template deviceToHost<0,1>();
1841 
1842  cnt1 = 0;
1843  cnt2 = 0;
1844  cnt3 = 0;
1845  cnt4 = 0;
1846 
1847  auto it2 = sparseGridDst.getIterator();
1848 
1849  match = true;
1850 
1851  while (it2.isNext())
1852  {
1853  auto p = it2.get();
1854 
1855  auto pt = p.toPoint();
1856 
1857  if (box1_dst.isInside(pt) == true)
1858  {
1859  ++cnt1;
1860 
1861  const long int x = (long int)pt.get(0) - (start.get(0) + gridSize.x / 2 * blockEdgeSize);
1862  const long int y = (long int)pt.get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1863  const long int z = (long int)pt.get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1864 
1865  float radius = sqrt((float) (x*x + y*y + z*z));
1866 
1867  bool is_active = radius < 64 && radius > 56;
1868 
1869  if (is_active == true)
1870  {match &= true;}
1871  else
1872  {match &= false;}
1873  }
1874  else if (box2_dst.isInside(pt) == true)
1875  {
1876  ++cnt2;
1877 
1878  const long int x = (long int)pt.get(0) - (start.get(0) - 46 + gridSize.x / 2 * blockEdgeSize);
1879  const long int y = (long int)pt.get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1880  const long int z = (long int)pt.get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1881 
1882  float radius = sqrt((float) (x*x + y*y + z*z));
1883 
1884  bool is_active = radius < 64 && radius > 56;
1885 
1886  if (is_active == true)
1887  {match &= true;}
1888  else
1889  {match &= false;}
1890  }
1891  else if (box3_dst.isInside(pt) == true)
1892  {
1893  ++cnt3;
1894 
1895  const long int x = (long int)pt.get(0) - (start.get(0) + 44 + gridSize.x / 2 * blockEdgeSize);
1896  const long int y = (long int)pt.get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1897  const long int z = (long int)pt.get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1898 
1899  float radius = sqrt((float) (x*x + y*y + z*z));
1900 
1901  bool is_active = radius < 64 && radius > 56;
1902 
1903  if (is_active == true)
1904  {match &= true;}
1905  else
1906  {match &= false;}
1907  }
1908  else if (box4_dst.isInside(pt) == true)
1909  {
1910  ++cnt4;
1911 
1912  const long int x = (long int)pt.get(0) - (start.get(0) + gridSize.x / 2 * blockEdgeSize);
1913  const long int y = (long int)pt.get(1) - (start.get(1) + gridSize.y / 2 * blockEdgeSize);
1914  const long int z = (long int)pt.get(2) - (start.get(2) + gridSize.z / 2 * blockEdgeSize);
1915 
1916  float radius = sqrt((float) (x*x + y*y + z*z));
1917 
1918  bool is_active = radius < 64 && radius > 56;
1919 
1920  if (is_active == true)
1921  {match &= true;}
1922  else
1923  {match &= false;}
1924  }
1925 
1926  ++it2;
1927  }
1928 
1929  BOOST_REQUIRE_EQUAL(match,true);
1930  BOOST_REQUIRE_EQUAL(cnt1,20520);
1931  BOOST_REQUIRE_EQUAL(cnt2,27152);
1932  BOOST_REQUIRE_EQUAL(cnt3,10423);
1933  BOOST_REQUIRE_EQUAL(cnt4,13649);
1934 }
1935 
1936 #if defined(OPENFPM_DATA_ENABLE_IO_MODULE) || defined(PERFORMANCE_TEST)
1937 
1938 BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput3DHeatStencil)
1939 {
1940  constexpr unsigned int dim = 3;
1941  constexpr unsigned int blockEdgeSize = 4;
1942  typedef aggregate<float, float> AggregateT;
1943 
1944  size_t sz[3] = {512,512,512};
1945 // dim3 gridSize(128,128,128);
1946  dim3 gridSize(32,32,32);
1947 
1948  grid_smb<dim, blockEdgeSize> blockGeometry(sz);
1950  mgpu::ofp_context_t ctx;
1951  sparseGrid.template setBackgroundValue<0>(0);
1952 
1954  // Sphere 1
1955  grid_key_dx<3,int> start1({256,256,256});
1956  sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1957  CUDA_LAUNCH_DIM3((insertSphere3D<0>),
1958  gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
1959  sparseGrid.toKernel(), start1, 64, 32, 1);
1960 
1961  sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1962 
1963  sparseGrid.removeUnusedBuffers();
1964 
1965  // Sphere 2
1966  grid_key_dx<3,int> start2({192,192,192});
1967  sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1968  CUDA_LAUNCH_DIM3((insertSphere3D<0>),
1969  gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
1970  sparseGrid.toKernel(), start2, 64, 44, 1);
1971 
1972  sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1973 
1974 
1975  // Sphere 3
1976  grid_key_dx<3,int> start3({340,192,192});
1977  sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
1978  CUDA_LAUNCH_DIM3((insertSphere3D<0>),
1979  gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
1980  sparseGrid.toKernel(), start3, 20, 15, 1);
1981 
1982  sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
1983 
1985 
1986  sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
1987  sparseGrid.tagBoundaries(ctx);
1988 
1989  // Now apply some boundary conditions
1990  sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,
1991  192, 384,
1992  0.0, 10.0);
1993 
1994 
1995  // Now apply the laplacian operator
1996 // const unsigned int maxIter = 1000;
1997  const unsigned int maxIter = 100;
1998  for (unsigned int iter=0; iter<maxIter; ++iter)
1999  {
2000  for (int innerIter=0; innerIter<10; ++innerIter)
2001  {
2002  sparseGrid.applyStencils<HeatStencil<dim, 0, 1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
2003 
2004  sparseGrid.applyStencils<HeatStencil<dim, 1, 0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
2005 
2006  }
2007  }
2008 
2009  sparseGrid.deviceToHost<0,1>();
2010  sparseGrid.write("SparseGridGPU_output3DHeatStencil.vtk");
2011 }
2012 
2013 BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput)
2014 {
2015  constexpr unsigned int dim = 2;
2016  constexpr unsigned int blockEdgeSize = 8;
2017  typedef aggregate<float> AggregateT;
2018 
2019  size_t sz[2] = {1000000,1000000};
2020  dim3 gridSize(128,128);
2021 
2022  grid_smb<dim, blockEdgeSize> blockGeometry(sz);
2024  mgpu::ofp_context_t ctx;
2025  sparseGrid.template setBackgroundValue<0>(0);
2026 
2027  grid_key_dx<2,int> start({500000,500000});
2028 
2029  // Insert values on the grid
2030  sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
2031  CUDA_LAUNCH_DIM3((insertSphere<0>),gridSize, dim3(blockEdgeSize*blockEdgeSize,1),sparseGrid.toKernel(), start, 512, 256, 1);
2032  sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
2033 
2034  sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
2035  sparseGrid.tagBoundaries(ctx);
2036 
2037  sparseGrid.template deviceToHost<0>();
2038 
2039  sparseGrid.write("SparseGridGPU_output.vtk");
2040 }
2041 
2042 BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput3D)
2043 {
2044  constexpr unsigned int dim = 3;
2045  constexpr unsigned int blockEdgeSize = 4;
2046  typedef aggregate<float> AggregateT;
2047 
2048  size_t sz[3] = {512,512,512};
2049 // dim3 gridSize(128,128,128);
2050  dim3 gridSize(32,32,32);
2051 
2052  grid_smb<dim, blockEdgeSize> blockGeometry(sz);
2054  mgpu::ofp_context_t ctx;
2055  sparseGrid.template setBackgroundValue<0>(0);
2056 
2057  grid_key_dx<3,int> start({256,256,256});
2058 
2059  // Insert values on the grid
2060  sparseGrid.setGPUInsertBuffer(gridSize,dim3(1));
2061  CUDA_LAUNCH_DIM3((insertSphere3D<0>),
2062  gridSize, dim3(blockEdgeSize*blockEdgeSize*blockEdgeSize,1,1),
2063  sparseGrid.toKernel(), start, 64, 56, 1);
2064  sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
2065 
2066 
2067  sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
2068  sparseGrid.tagBoundaries(ctx);
2069 
2070  sparseGrid.template applyStencils<BoundaryStencilSetX<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE);
2071 
2072 
2073  sparseGrid.template deviceToHost<0>();
2074 
2075  sparseGrid.write("SparseGridGPU_output3D.vtk");
2076 
2077  bool test = compare("SparseGridGPU_output3D.vtk","test_data/SparseGridGPU_output3D_test.vtk");
2078  BOOST_REQUIRE_EQUAL(true,test);
2079 }
2080 
2081 
2082 #endif
2083 
2084 BOOST_AUTO_TEST_SUITE_END()
grid_key_dx is the key to access any element in the grid
Definition: grid_key.hpp:18
virtual bool allocate(size_t sz)
allocate memory
Definition: CudaMemory.cu:38
virtual void * getPointer()
get a readable pointer with the data
Definition: CudaMemory.cu:352
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition: grid_key.hpp:503
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:27
__host__ __device__ bool isInside(const Point< dim, T > &p) const
Check if the point is inside the box.
Definition: Box.hpp:1004
size_t size()
Stub size.
Definition: map_vector.hpp:211
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
Definition: Box.hpp:669
virtual void * getDevicePointer()
get a readable pointer with the data
Definition: CudaMemory.cu:497
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
virtual bool resize(size_t sz)
resize the momory allocated
Definition: CudaMemory.cu:259
virtual void incRef()
Increment the reference counter.
Definition: ExtPreAlloc.hpp:98
Unpacking status object.
Definition: Pack_stat.hpp:15
This class represent an N-dimensional box.
Definition: Box.hpp:60
get the type of the block
virtual void deviceToHost()
Move memory from device to host.
Definition: CudaMemory.cu:367
void reset()
Reset the internal counters.
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
Definition: Box.hpp:656
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition: grid_key.hpp:516
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:214
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202
Packing status object.
Definition: Pack_stat.hpp:60