7#define BOOST_TEST_DYN_LINK
8#define DISABLE_MPI_WRITTERS
10#include <boost/test/unit_test.hpp>
11#include "performancePlots.hpp"
13#include "SparseGridGpu/SparseGridGpu.hpp"
14#include "SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh"
16extern std::string suiteURI;
18extern std::set<std::string> testSet;
21template<
unsigned int blockEdgeSize,
unsigned int gr
idEdgeSize>
22void testInsertSingle(std::string testURI,
unsigned int i)
24 auto testName =
"Insert single (one chunk per element)";
25 constexpr unsigned int dim = 2;
30 unsigned int iterations = 10;
31 bool prePopulateGrid =
true;
34 std::string base(testURI +
"(" + std::to_string(i) +
")");
35 report_sparsegrid_funcs.graphs.put(base +
".test.name",
"InsertSingle");
37 report_sparsegrid_funcs.graphs.put(base +
".dim",dim);
38 report_sparsegrid_funcs.graphs.put(base +
".blockSize",blockEdgeSize);
39 report_sparsegrid_funcs.graphs.put(base +
".gridSize.x",gridEdgeSize*blockEdgeSize);
40 report_sparsegrid_funcs.graphs.put(base +
".gridSize.y",gridEdgeSize*blockEdgeSize);
42 dim3 gridSize(gridEdgeSize, gridEdgeSize);
43 dim3 blockSize(blockEdgeSize, blockEdgeSize);
47 sparseGrid.template setBackgroundValue<0>(0);
52 sparseGrid.setGPUInsertBuffer(gridSize, blockSize);
53 insertValues2D<0> << < gridSize, blockSize >> > (sparseGrid.toKernel(), 0, 0);
54 sparseGrid.template flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
55 cudaDeviceSynchronize();
59 for (
unsigned int iter=0; iter<5; ++iter)
62 sparseGrid.setGPUInsertBuffer(gridSize, blockSize);
63 insertValues2D<0> << < gridSize, blockSize >> > (sparseGrid.toKernel(), offset, offset);
64 sparseGrid.template flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
65 cudaDeviceSynchronize();
68 unsigned long long numElements = gridEdgeSize*blockEdgeSize*gridEdgeSize*blockEdgeSize;
71 for (
unsigned int iter=0; iter<iterations; ++iter)
75 cudaDeviceSynchronize();
80 sparseGrid.setGPUInsertBuffer(gridSize, blockSize);
81 insertValues2D<0> << < gridSize, blockSize >> > (sparseGrid.toKernel(), offset, offset);
82 sparseGrid.template flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
83 cudaDeviceSynchronize();
87 float mElemS = numElements / (1e6 * ts.
getwct());
93 standard_deviation(measures,mean,deviation);
95 report_sparsegrid_funcs.graphs.put(base +
".Minsert.mean",mean);
96 report_sparsegrid_funcs.graphs.put(base +
".Minsert.dev",deviation);
100 std::cout <<
"Test: " << testName <<
"\n";
101 std::cout <<
"Block: " << blockEdgeSize <<
"x" << blockEdgeSize <<
"\n";
102 std::cout <<
"Grid: " << gridEdgeSize*blockEdgeSize <<
"x" << gridEdgeSize*blockEdgeSize <<
"\n";
103 double dataOccupancyMean, dataOccupancyDev;
104 sparseGrid.deviceToHost();
105 sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);std::cout <<
"Data Occupancy: " << dataOccupancyMean <<
" dev:" << dataOccupancyDev << std::endl;
106 report_sparsegrid_funcs.graphs.put(base +
".dataOccupancy.mean",dataOccupancyMean);
107 report_sparsegrid_funcs.graphs.put(base +
".dataOccupancy.dev",dataOccupancyDev);
108 std::cout <<
"Iterations: " << iterations <<
"\n";
109 std::cout <<
"Throughput:\n\t" << mean <<
"M/s" <<
"\n";
113BOOST_AUTO_TEST_SUITE(performance)
115BOOST_AUTO_TEST_SUITE(SparseGridGpu_test)
117BOOST_AUTO_TEST_CASE(testInsert_gridScaling_2)
119 std::string testURI = suiteURI +
".device.insert.dense.single.2D.2.gridScaling";
120 unsigned int counter = 0;
121 testInsertSingle<2, 128>(testURI, counter++);
122 testInsertSingle<2, 256>(testURI, counter++);
123 testInsertSingle<2, 512>(testURI, counter++);
124 testInsertSingle<2, 1024>(testURI, counter++);
125 testSet.insert(testURI);
127BOOST_AUTO_TEST_CASE(testInsert_gridScaling_4)
129 std::string testURI = suiteURI +
".device.insert.dense.single.2D.4.gridScaling";
130 unsigned int counter = 0;
131 testInsertSingle<4, 64>(testURI, counter++);
132 testInsertSingle<4, 128>(testURI, counter++);
133 testInsertSingle<4, 256>(testURI, counter++);
134 testInsertSingle<4, 512>(testURI, counter++);
135 testSet.insert(testURI);
137BOOST_AUTO_TEST_CASE(testInsert_gridScaling_8)
139 std::string testURI = suiteURI +
".device.insert.dense.single.2D.8.gridScaling";
140 unsigned int counter = 0;
141 testInsertSingle<8, 32>(testURI, counter++);
142 testInsertSingle<8, 64>(testURI, counter++);
143 testInsertSingle<8, 128>(testURI, counter++);
144 testInsertSingle<8, 256>(testURI, counter++);
145 testSet.insert(testURI);
148BOOST_AUTO_TEST_CASE(testInsert_blockScaling)
150 std::string testURI = suiteURI +
".device.insert.dense.single.2D.blockScaling";
151 unsigned int counter = 0;
152 testInsertSingle<2, 1024>(testURI, counter++);
153 testInsertSingle<4, 512>(testURI, counter++);
154 testInsertSingle<8, 256>(testURI, counter++);
156 testSet.insert(testURI);
161BOOST_AUTO_TEST_SUITE_END()
163BOOST_AUTO_TEST_SUITE_END()
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...