OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
SparseGridGpu_performance_tests.cu
1 //
2 // Created by tommaso on 4/07/19.
3 //
4 #define BOOST_TEST_DYN_LINK
5 #define DISABLE_MPI_WRITTERS
6 
7 //#define SPARSEGRIDGPU_LAUNCH_BOUND_APPLY_STENCIL_IN_PLACE __launch_bounds__(512)
8 #define SPARSEGRIDGPU_LAUNCH_BOUND_APPLY_STENCIL_IN_PLACE_NO_SHARED __launch_bounds__(BLOCK_SIZE_STENCIL,12)
9 
10 #include <boost/test/unit_test.hpp>
11 #include "SparseGridGpu/SparseGridGpu.hpp"
12 #include "cuda_macro.h"
13 #include "util/stat/common_statistics.hpp"
14 #include "Plot/GoogleChart.hpp"
15 #include "util/performance/performance_util.hpp"
16 #include "SparseGridGpu/tests/utils/SparseGridGpu_testKernels.cuh"
17 #include <set>
18 #include "performancePlots.hpp"
19 #include "SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh"
20 
21 extern char * test_dir;
22 
23 // Property tree
24 
25 report_sparse_grid_tests report_sparsegrid_funcs;
26 std::string suiteURI = "performance.SparseGridGpu";
27 std::set<std::string> testSet;
28 
29 
30 BOOST_AUTO_TEST_SUITE(performance)
31 
32 BOOST_AUTO_TEST_SUITE(SparseGridGpu_test)
33 
34 
35 template<unsigned int blockEdgeSize, unsigned int gridEdgeSize, typename SparseGridZ>
36 void testStencilHeatGet_perf(unsigned int i, std::string base)
37 {
38  auto testName = "In-place GET stencil";
39  typedef HeatStencilGet<SparseGridZ::dims,0,1> Stencil01T;
40  typedef HeatStencilGet<SparseGridZ::dims,1,0> Stencil10T;
41 
42  // typedef HeatStencilGet<SparseGridZ::dims,0,0> Stencil01T;
43  // typedef HeatStencilGet<SparseGridZ::dims,0,0> Stencil10T;
44 
45  report_sparsegrid_funcs.graphs.put(base + ".dim",2);
46  report_sparsegrid_funcs.graphs.put(base + ".blockSize",blockEdgeSize);
47  report_sparsegrid_funcs.graphs.put(base + ".gridSize.x",gridEdgeSize*SparseGridZ::blockEdgeSize_);
48  report_sparsegrid_funcs.graphs.put(base + ".gridSize.y",gridEdgeSize*SparseGridZ::blockEdgeSize_);
49 
50  unsigned int iterations = 100;
51 
52  openfpm::vector<double> measures_gf;
53  openfpm::vector<double> measures_tm;
54 
55  dim3 gridSize(gridEdgeSize, gridEdgeSize);
56  dim3 blockSize(SparseGridZ::blockEdgeSize_,SparseGridZ::blockEdgeSize_);
57  typename SparseGridZ::grid_info blockGeometry(gridSize);
58  SparseGridZ sparseGrid(blockGeometry);
59  mgpu::ofp_context_t ctx;
60  sparseGrid.template setBackgroundValue<0>(0);
61 
62  unsigned long long numElements = gridEdgeSize*SparseGridZ::blockEdgeSize_*gridEdgeSize*SparseGridZ::blockEdgeSize_;
63 
64  // Initialize the grid
65  sparseGrid.setGPUInsertBuffer(gridSize, dim3(1));
66  CUDA_LAUNCH_DIM3((insertConstantValue<0>),gridSize, blockSize,sparseGrid.toKernel(), 0);
67  sparseGrid.template flush < sRight_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
68 
69  sparseGrid.setGPUInsertBuffer(gridSize, dim3(1));
70  dim3 sourcePt(gridSize.x * SparseGridZ::blockEdgeSize_ / 2, gridSize.y * SparseGridZ::blockEdgeSize_ / 2, 0);
71  insertOneValue<0> << < gridSize, blockSize >> > (sparseGrid.toKernel(), sourcePt, 100);
72  sparseGrid.template flush < sRight_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
73 
74  sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
75 
76  iterations /= 2;
77  for (unsigned int iter=0; iter<iterations; ++iter)
78  {
79  cudaDeviceSynchronize();
80 
81  timer ts;
82  ts.start();
83 
84  sparseGrid.template applyStencils<Stencil01T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
85  cudaDeviceSynchronize();
86  sparseGrid.template applyStencils<Stencil10T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
87  cudaDeviceSynchronize();
88 
89  ts.stop();
90 
91  measures_tm.add(ts.getwct());
92 
93  float gElemS = 2 * numElements / (1e9 * ts.getwct());
94  float gFlopsS = gElemS * Stencil01T::flops;
95 
96  measures_gf.add(gFlopsS);
97  }
98 
99  double mean_tm = 0;
100  double deviation_tm = 0;
101  standard_deviation(measures_tm,mean_tm,deviation_tm);
102 
103  double mean_gf = 0;
104  double deviation_gf = 0;
105  standard_deviation(measures_gf,mean_gf,deviation_gf);
106 
107  // All times above are in ms
108 
109  float gElemS = 2 * numElements / (1e9 * mean_tm);
110  float gFlopsS = gElemS * Stencil01T::flops;
111  std::cout << "Test: " << testName << std::endl;
112  std::cout << "Block: " << SparseGridZ::blockEdgeSize_ << "x" << SparseGridZ::blockEdgeSize_ << std::endl;
113  std::cout << "Grid: " << gridEdgeSize*SparseGridZ::blockEdgeSize_ << "x" << gridEdgeSize*SparseGridZ::blockEdgeSize_ << std::endl;
114  double dataOccupancyMean, dataOccupancyDev;
115  sparseGrid.deviceToHost();
116  sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);std::cout << "Data Occupancy: " << dataOccupancyMean << " dev:" << dataOccupancyDev << std::endl;
117  report_sparsegrid_funcs.graphs.put(base + ".dataOccupancy.mean",dataOccupancyMean);
118  report_sparsegrid_funcs.graphs.put(base +".dataOccupancy.dev",dataOccupancyDev);
119  std::cout << "Iterations: " << iterations << std::endl;
120  std::cout << "\tStencil: " << mean_gf << " dev:" << deviation_gf << " s" << std::endl;
121  std::cout << "Throughput: " << std::endl << "\t " << gElemS << " GElem/s " << std::endl << "\t " << gFlopsS << " GFlops/s" << std::endl;
122 
123  report_sparsegrid_funcs.graphs.put(base + ".GFlops.mean",mean_gf);
124  report_sparsegrid_funcs.graphs.put(base +".GFlops.dev",deviation_gf);
125  report_sparsegrid_funcs.graphs.put(base + ".time.mean",mean_tm);
126  report_sparsegrid_funcs.graphs.put(base +".time.dev",deviation_tm);
127 }
128 template<unsigned int blockEdgeSize, unsigned int gridEdgeSize>
129 void launch_testStencilHeatGet_perf(std::string testURI, unsigned int i)
130 {
131  constexpr unsigned int dim = 2;
132  typedef aggregate<float,float> AggregateT;
133  // typedef aggregate<float> AggregateT;
134  constexpr unsigned int chunkSize = IntPow<blockEdgeSize,dim>::value;
135 
136  std::string base(testURI + "(" + std::to_string(i) + ")");
137  report_sparsegrid_funcs.graphs.put(base + ".test.name","StencilN");
138 
139  testStencilHeatGet_perf<blockEdgeSize, gridEdgeSize,
141  cudaDeviceSynchronize();
142 }
143 template<unsigned int blockEdgeSize, unsigned int gridEdgeSize>
144 void launch_testStencilHeatGetZ_perf(std::string testURI, unsigned int i)
145 {
146  constexpr unsigned int dim = 2;
147  typedef aggregate<float,float> AggregateT;
148  // typedef aggregate<float> AggregateT;
149  constexpr unsigned int chunkSize = IntPow<blockEdgeSize,dim>::value;
150 
151  std::string base(testURI + "(" + std::to_string(i) + ")");
152  report_sparsegrid_funcs.graphs.put(base + ".test.name","StencilZ");
153 
154  testStencilHeatGet_perf<blockEdgeSize, gridEdgeSize,
156  cudaDeviceSynchronize();
157 }
158 
159 template<unsigned int blockEdgeSize, unsigned int gridEdgeSize, typename SparseGridZ>
160 void testStencilSkeleton_perf(unsigned int i, std::string base)
161 {
162  auto testName = "In-place stencil";
163  typedef SkeletonStencil<SparseGridZ::dims,0,1> Stencil01T;
164  typedef SkeletonStencil<SparseGridZ::dims,1,0> Stencil10T;
165 
166  // typedef SkeletonStencil<SparseGridZ::dims,0,0> Stencil01T;
167  // typedef SkeletonStencil<SparseGridZ::dims,0,0> Stencil10T;
168 
169  report_sparsegrid_funcs.graphs.put(base + ".dim",2);
170  report_sparsegrid_funcs.graphs.put(base + ".blockSize",blockEdgeSize);
171  report_sparsegrid_funcs.graphs.put(base + ".gridSize.x",gridEdgeSize*SparseGridZ::blockEdgeSize_);
172  report_sparsegrid_funcs.graphs.put(base + ".gridSize.y",gridEdgeSize*SparseGridZ::blockEdgeSize_);
173 
174  unsigned int iterations = 100;
175 
176  openfpm::vector<double> measures_gf;
177  openfpm::vector<double> measures_tm;
178 
179  dim3 gridSize(gridEdgeSize, gridEdgeSize);
180  dim3 blockSize(SparseGridZ::blockEdgeSize_,SparseGridZ::blockEdgeSize_);
181  typename SparseGridZ::grid_info blockGeometry(gridSize);
182  SparseGridZ sparseGrid(blockGeometry);
183  mgpu::ofp_context_t ctx;
184  sparseGrid.template setBackgroundValue<0>(0);
185 
186  unsigned long long numElements = gridEdgeSize*SparseGridZ::blockEdgeSize_*gridEdgeSize*SparseGridZ::blockEdgeSize_;
187 
188  // Initialize the grid
189  sparseGrid.setGPUInsertBuffer(gridSize, dim3(1));
190  CUDA_LAUNCH_DIM3((insertConstantValue<0>),gridSize, blockSize,sparseGrid.toKernel(), 0);
191  sparseGrid.template flush < sRight_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
192 
193  sparseGrid.setGPUInsertBuffer(gridSize, dim3(1));
194  dim3 sourcePt(gridSize.x * SparseGridZ::blockEdgeSize_ / 2, gridSize.y * SparseGridZ::blockEdgeSize_ / 2, 0);
195  insertOneValue<0> << < gridSize, blockSize >> > (sparseGrid.toKernel(), sourcePt, 100);
196  sparseGrid.template flush < sRight_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
197 
198  sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
199 
200  iterations /= 2;
201  for (unsigned int iter=0; iter<iterations; ++iter)
202  {
203  cudaDeviceSynchronize();
204 
205  timer ts;
206  ts.start();
207 
208  sparseGrid.template applyStencils<Stencil01T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
209  cudaDeviceSynchronize();
210  sparseGrid.template applyStencils<Stencil10T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
211  cudaDeviceSynchronize();
212 
213  ts.stop();
214 
215  measures_tm.add(ts.getwct());
216 
217  float gElemS = 2 * numElements / (1e9 * ts.getwct());
218  float gFlopsS = gElemS * Stencil01T::flops;
219 
220  measures_gf.add(gFlopsS);
221  }
222 
223  double mean_tm = 0;
224  double deviation_tm = 0;
225  standard_deviation(measures_tm,mean_tm,deviation_tm);
226 
227  double mean_gf = 0;
228  double deviation_gf = 0;
229  standard_deviation(measures_gf,mean_gf,deviation_gf);
230 
231  // All times above are in ms
232 
233  float gElemS = 2 * numElements / (1e9 * mean_tm);
234  float gFlopsS = gElemS * Stencil01T::flops;
235  std::cout << "Test: " << testName << std::endl;
236  std::cout << "Block: " << SparseGridZ::blockEdgeSize_ << "x" << SparseGridZ::blockEdgeSize_ << std::endl;
237  std::cout << "Grid: " << gridEdgeSize*SparseGridZ::blockEdgeSize_ << "x" << gridEdgeSize*SparseGridZ::blockEdgeSize_ << std::endl;
238  double dataOccupancyMean, dataOccupancyDev;
239  sparseGrid.deviceToHost();
240  sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);std::cout << "Data Occupancy: " << dataOccupancyMean << " dev:" << dataOccupancyDev << std::endl;
241  report_sparsegrid_funcs.graphs.put(base + ".dataOccupancy.mean",dataOccupancyMean);
242  report_sparsegrid_funcs.graphs.put(base +".dataOccupancy.dev",dataOccupancyDev);
243  std::cout << "Iterations: " << iterations << std::endl;
244  std::cout << "\tStencil: " << mean_gf << " dev:" << deviation_gf << " s" << std::endl;
245  std::cout << "Throughput: " << std::endl << "\t " << gElemS << " GElem/s " << std::endl << "\t " << gFlopsS << " GFlops/s" << std::endl;
246 
247  report_sparsegrid_funcs.graphs.put(base + ".GFlops.mean",mean_gf);
248  report_sparsegrid_funcs.graphs.put(base +".GFlops.dev",deviation_gf);
249  report_sparsegrid_funcs.graphs.put(base + ".time.mean",mean_tm);
250  report_sparsegrid_funcs.graphs.put(base +".time.dev",deviation_tm);
251 }
252 
253 void launch_testConv3x3x3_perf_z_morton(std::string testURI, unsigned int i)
254 {
255  constexpr unsigned int dim = 3;
256  typedef aggregate<float,float> AggregateT;
257  constexpr unsigned int chunkSize = IntPow<8,dim>::value;
258 
259  std::string base(testURI + "(" + std::to_string(i) + ")");
260  report_sparsegrid_funcs.graphs.put(base + ".test.name","Conv3x3x3");
261 
262  testConv3x3x3_perf<SparseGridGpu_z<dim, AggregateT, 8, chunkSize,long int>>("Convolution 3x3x3 Z-morton");
263 }
264 
265 void launch_testConv3x3x3_perf(std::string testURI, unsigned int i)
266 {
267  constexpr unsigned int dim = 3;
268  typedef aggregate<float,float> AggregateT;
269  constexpr unsigned int chunkSize = IntPow<8,dim>::value;
270 
271  std::string base(testURI + "(" + std::to_string(i) + ")");
272  report_sparsegrid_funcs.graphs.put(base + ".test.name","Conv3x3x3");
273 
274  testConv3x3x3_perf<SparseGridGpu<dim, AggregateT, 8, chunkSize,long int>>("Convolution 3x3x3 ");
275 }
276 
277 void launch_testConv3x3x3_perf_no_shared_z_morton(std::string testURI, unsigned int i)
278 {
279  constexpr unsigned int dim = 3;
280  typedef aggregate<float,float> AggregateT;
281 
282  std::string base(testURI + "(" + std::to_string(i) + ")");
283  report_sparsegrid_funcs.graphs.put(base + ".test.name","Conv3x3x3");
284 
285  testConv3x3x3_no_shared_perf<SparseGridGpu_z<dim, AggregateT, 8, 512, long int>>("Convolution 3x3x3_noshared z-morton");
286 }
287 
288 void launch_testConv3x3x3_perf_no_shared(std::string testURI, unsigned int i)
289 {
290  constexpr unsigned int dim = 3;
291  typedef aggregate<float,float> AggregateT;
292 
293  std::string base(testURI + "(" + std::to_string(i) + ")");
294  report_sparsegrid_funcs.graphs.put(base + ".test.name","Conv3x3x3");
295 
296  testConv3x3x3_no_shared_perf<SparseGridGpu<dim, AggregateT, 8, 512, long int>>("Convolution 3x3x3_noshared");
297 }
298 
299 template<unsigned int blockEdgeSize, unsigned int gridEdgeSize>
300 void launch_testStencilSkeleton_perf(std::string testURI, unsigned int i)
301 {
302  constexpr unsigned int dim = 2;
303  typedef aggregate<float,float> AggregateT;
304  // typedef aggregate<float> AggregateT;
305  constexpr unsigned int chunkSize = IntPow<blockEdgeSize,dim>::value;
306 
307  std::string base(testURI + "(" + std::to_string(i) + ")");
308  report_sparsegrid_funcs.graphs.put(base + ".test.name","StencilN");
309 
310  testStencilSkeleton_perf<blockEdgeSize, gridEdgeSize,
312  cudaDeviceSynchronize();
313 }
314 
315 
316 template<unsigned int blockEdgeSize, unsigned int gridEdgeSize>
317 void launch_testStencilSkeletonZ_perf(std::string testURI, unsigned int i)
318 {
319  constexpr unsigned int dim = 2;
320  typedef aggregate<float,float> AggregateT;
321  // typedef aggregate<float> AggregateT;
322  constexpr unsigned int chunkSize = IntPow<blockEdgeSize,dim>::value;
323 
324  std::string base(testURI + "(" + std::to_string(i) + ")");
325  report_sparsegrid_funcs.graphs.put(base + ".test.name","StencilZ");
326 
327  testStencilSkeleton_perf<blockEdgeSize, gridEdgeSize,
329  cudaDeviceSynchronize();
330 }
331 
332 BOOST_AUTO_TEST_CASE(testConv3x3x3_noshared)
333 {
334  std::string testURI = suiteURI + ".device.conv3x3x3_no_shared.sparse.N.3D.gridScaling";
335  unsigned int counter = 0;
336  launch_testConv3x3x3_perf_no_shared(testURI, counter++);
337  testSet.insert(testURI);
338 }
339 
340 BOOST_AUTO_TEST_CASE(testConv3x3x3_noshared_z_morton)
341 {
342  std::string testURI = suiteURI + ".device.conv3x3x3_no_shared.sparse.N.3D.gridScaling";
343  unsigned int counter = 0;
344  launch_testConv3x3x3_perf_no_shared_z_morton(testURI, counter++);
345  testSet.insert(testURI);
346 }
347 
348 BOOST_AUTO_TEST_CASE(testConv3x3x3)
349 {
350  std::string testURI = suiteURI + ".device.conv3x3x3.sparse.N.3D.gridScaling";
351  unsigned int counter = 0;
352  launch_testConv3x3x3_perf(testURI, counter++);
353  testSet.insert(testURI);
354 }
355 
356 BOOST_AUTO_TEST_CASE(testConv3x3x3_zmorton)
357 {
358 
359  std::string testURI = suiteURI + ".device.conv3x3x3_zmorton.sparse.N.3D.gridScaling";
360  unsigned int counter = 0;
361  launch_testConv3x3x3_perf_z_morton(testURI, counter++);
362  testSet.insert(testURI);
363 }
364 
365 BOOST_AUTO_TEST_CASE(testStencilSkeleton_gridScaling)
366 {
367  std::string testURI = suiteURI + ".device.skeleton.dense.N.2D.gridScaling";
368  unsigned int counter = 0;
369  constexpr unsigned int blockEdgeSize = 8;
370  launch_testStencilSkeleton_perf<blockEdgeSize, 128>(testURI, counter++);
371  launch_testStencilSkeleton_perf<blockEdgeSize, 256>(testURI, counter++);
372  launch_testStencilSkeleton_perf<blockEdgeSize, 512>(testURI, counter++);
373  launch_testStencilSkeleton_perf<blockEdgeSize, 1024>(testURI, counter++);
374 
375  testSet.insert(testURI);
376 }
377 
378 BOOST_AUTO_TEST_CASE(testStencilHeatGet_gridScaling_2)
379 {
380  std::string testURI = suiteURI + ".device.stencilGet.dense.N.2D.2.gridScaling";
381  unsigned int counter = 0;
382  launch_testStencilHeatGet_perf<2, 512>(testURI, counter++);
383  launch_testStencilHeatGet_perf<2, 1024>(testURI, counter++);
384  launch_testStencilHeatGet_perf<2, 2048>(testURI, counter++);
385  // launch_testStencilHeatGet_perf<2, 4096>(testURI, counter++); // test
386 
387  testSet.insert(testURI);
388 }
389 
390 BOOST_AUTO_TEST_CASE(testStencilHeatGet_gridScaling_4)
391 {
392  std::string testURI = suiteURI + ".device.stencilGet.dense.N.2D.4.gridScaling";
393  unsigned int counter = 0;
394  launch_testStencilHeatGet_perf<4, 256>(testURI, counter++);
395  launch_testStencilHeatGet_perf<4, 512>(testURI, counter++);
396  launch_testStencilHeatGet_perf<4, 1024>(testURI, counter++);
397  launch_testStencilHeatGet_perf<4, 2048>(testURI, counter++);
398 
399  testSet.insert(testURI);
400 }
401 
402 BOOST_AUTO_TEST_CASE(testStencilHeatGet_gridScaling_8)
403 {
404  std::string testURI = suiteURI + ".device.stencilGet.dense.N.2D.8.gridScaling";
405  unsigned int counter = 0;
406  launch_testStencilHeatGet_perf<8, 128>(testURI, counter++);
407  launch_testStencilHeatGet_perf<8, 256>(testURI, counter++);
408  launch_testStencilHeatGet_perf<8, 512>(testURI, counter++);
409  launch_testStencilHeatGet_perf<8, 1024>(testURI, counter++);
410 
411  testSet.insert(testURI);
412 }
413 
414 BOOST_AUTO_TEST_CASE(testStencilHeatGet_gridScaling_16)
415 {
416  std::string testURI = suiteURI + ".device.stencilGet.dense.N.2D.16.gridScaling";
417  unsigned int counter = 0;
418  launch_testStencilHeatGet_perf<16, 64>(testURI, counter++);
419  launch_testStencilHeatGet_perf<16, 128>(testURI, counter++);
420  launch_testStencilHeatGet_perf<16, 256>(testURI, counter++);
421  launch_testStencilHeatGet_perf<16, 512>(testURI, counter++);
422 
423  testSet.insert(testURI);
424 }
425 
426 BOOST_AUTO_TEST_CASE(testStencilHeatGet_gridScaling_32)
427 {
428  std::string testURI = suiteURI + ".device.stencilGet.dense.N.2D.32.gridScaling";
429  unsigned int counter = 0;
430  launch_testStencilHeatGet_perf<32, 32>(testURI, counter++);
431  launch_testStencilHeatGet_perf<32, 64>(testURI, counter++);
432  launch_testStencilHeatGet_perf<32, 128>(testURI, counter++);
433  launch_testStencilHeatGet_perf<32, 256>(testURI, counter++); // test
434 
435  testSet.insert(testURI);
436 }
437 
438 BOOST_AUTO_TEST_CASE(testStencilHeatGet_blockScaling)
439 {
440  std::string testURI = suiteURI + ".device.stencilGet.dense.N.2D.blockScaling";
441  unsigned int counter = 0;
442  // Note - blockEdgeSize == 2 doesn't work
443  launch_testStencilHeatGet_perf<4, 2048>(testURI, counter++);
444  launch_testStencilHeatGet_perf<8, 1024>(testURI, counter++);
445  launch_testStencilHeatGet_perf<16, 512>(testURI, counter++);
446  launch_testStencilHeatGet_perf<32, 256>(testURI, counter++);
447 
448  testSet.insert(testURI);
449 }
450 
451 
452 BOOST_AUTO_TEST_CASE(write_teport)
453 {
454  write_test_report(report_sparsegrid_funcs, testSet);
455 }
456 
457 BOOST_AUTO_TEST_SUITE_END()
458 
459 BOOST_AUTO_TEST_SUITE_END()
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
void start()
Start the timer.
Definition: timer.hpp:90
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:214
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202
Class for cpu time benchmarking.
Definition: timer.hpp:27
void stop()
Stop the timer.
Definition: timer.hpp:119