7#define BOOST_TEST_DYN_LINK
8#define DISABLE_MPI_WRITTERS
10#include <boost/test/unit_test.hpp>
11#include "performancePlots.hpp"
13#include "SparseGridGpu/SparseGridGpu.hpp"
14#include "SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh"
16extern std::string suiteURI;
18extern std::set<std::string> testSet;
20template<
unsigned int blockEdgeSize,
unsigned int gr
idEdgeSize>
21void test_insert_block(std::string testURI,
unsigned int i)
23 auto testName =
"Insert (one chunk per block)";
24 constexpr unsigned int dim = 2;
30 std::string base(testURI +
"(" + std::to_string(i) +
")");
31 report_sparsegrid_funcs.graphs.put(base +
".test.name",
"InsertBlock");
33 report_sparsegrid_funcs.graphs.put(base +
".name",
"Block insert");
34 report_sparsegrid_funcs.graphs.put(base +
".dim",dim);
35 report_sparsegrid_funcs.graphs.put(base +
".blockSize",blockEdgeSize);
36 report_sparsegrid_funcs.graphs.put(base +
".gridSize.x",gridEdgeSize*blockEdgeSize);
37 report_sparsegrid_funcs.graphs.put(base +
".gridSize.y",gridEdgeSize*blockEdgeSize);
39 unsigned int iterations = 10;
43 unsigned long long numElements = gridEdgeSize*blockEdgeSize*gridEdgeSize*blockEdgeSize;
44 dim3 gridSize(gridEdgeSize, gridEdgeSize);
45 dim3 blockSize(blockEdgeSize, blockEdgeSize);
46 dim3 blockSizeBlockedInsert(1, 1);
50 sparseGrid.template setBackgroundValue<0>(0);
53 for (
unsigned int iter=0; iter<5; ++iter)
56 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeBlockedInsert);
57 insertValues2DBlocked<0, 1, blockEdgeSize> << < gridSize, blockSize >> >
58 (sparseGrid.toKernel(), offset, offset);
59 sparseGrid.template flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
63 cudaDeviceSynchronize();
66 for (
unsigned int iter=0; iter<iterations; ++iter)
70 cudaDeviceSynchronize();
75 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeBlockedInsert);
76 insertValues2DBlocked<0, 1, blockEdgeSize> << < gridSize, blockSize >> >
77 (sparseGrid.toKernel(), offset, offset);
78 sparseGrid.template flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
80 cudaDeviceSynchronize();
84 float mElemS = numElements / (1e6 * ts.
getwct());
90 standard_deviation(measures,mean,deviation);
92 report_sparsegrid_funcs.graphs.put(base +
".Minsert.mean",mean);
93 report_sparsegrid_funcs.graphs.put(base +
".Minsert.dev",deviation);
97 std::cout <<
"Test: " << testName <<
"\n";
98 std::cout <<
"Block: " << blockEdgeSize <<
"x" << blockEdgeSize <<
"\n";
99 std::cout <<
"Grid: " << gridEdgeSize*blockEdgeSize <<
"x" << gridEdgeSize*blockEdgeSize <<
"\n";
100 double dataOccupancyMean, dataOccupancyDev;
101 sparseGrid.deviceToHost();
102 sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);std::cout <<
"Data Occupancy: " << dataOccupancyMean <<
" dev:" << dataOccupancyDev << std::endl;
103 report_sparsegrid_funcs.graphs.put(base +
".dataOccupancy.mean",dataOccupancyMean);
104 report_sparsegrid_funcs.graphs.put(base +
".dataOccupancy.dev",dataOccupancyDev);
105 std::cout <<
"Iterations: " << iterations <<
"\n";
106 std::cout <<
"\tInsert: " << mean <<
" dev: " << deviation <<
" s" << std::endl;
107 std::cout <<
"Throughput:\n\t" << mean <<
" MElem/s\n";
111BOOST_AUTO_TEST_SUITE(performance)
113BOOST_AUTO_TEST_SUITE(SparseGridGpu_test)
115BOOST_AUTO_TEST_CASE(testInsertBlocked_gridScaling_2)
117 std::string testURI = suiteURI +
".device.insert.dense.block.2D.2.gridScaling";
118 unsigned int counter = 0;
119 test_insert_block<2,128>(testURI, counter++);
120 test_insert_block<2,256>(testURI, counter++);
121 test_insert_block<2,512>(testURI, counter++);
122 test_insert_block<2,1024>(testURI, counter++);
123 test_insert_block<2,2048>(testURI, counter++);
126 testSet.insert(testURI);
129BOOST_AUTO_TEST_CASE(testInsertBlocked_gridScaling_4)
131 std::string testURI = suiteURI +
".device.insert.dense.block.2D.4.gridScaling";
132 unsigned int counter = 0;
133 test_insert_block<4,64>(testURI, counter++);
134 test_insert_block<4,128>(testURI, counter++);
135 test_insert_block<4,256>(testURI, counter++);
136 test_insert_block<4,512>(testURI, counter++);
137 test_insert_block<4,1024>(testURI, counter++);
138 test_insert_block<4,2048>(testURI, counter++);
140 testSet.insert(testURI);
143BOOST_AUTO_TEST_CASE(testInsertBlocked_gridScaling_8)
145 std::string testURI = suiteURI +
".device.insert.dense.block.2D.8.gridScaling";
146 unsigned int counter = 0;
147 test_insert_block<8,32>(testURI, counter++);
148 test_insert_block<8,64>(testURI, counter++);
149 test_insert_block<8,128>(testURI, counter++);
150 test_insert_block<8,256>(testURI, counter++);
151 test_insert_block<8,512>(testURI, counter++);
152 test_insert_block<8,1024>(testURI, counter++);
154 testSet.insert(testURI);
157BOOST_AUTO_TEST_CASE(testInsertBlocked_gridScaling_16)
159 std::string testURI = suiteURI +
".device.insert.dense.block.2D.16.gridScaling";
160 unsigned int counter = 0;
161 test_insert_block<16,16>(testURI, counter++);
162 test_insert_block<16,32>(testURI, counter++);
163 test_insert_block<16,64>(testURI, counter++);
164 test_insert_block<16,128>(testURI, counter++);
165 test_insert_block<16,256>(testURI, counter++);
166 test_insert_block<16,512>(testURI, counter++);
168 testSet.insert(testURI);
171BOOST_AUTO_TEST_CASE(testInsertBlocked_gridScaling_32)
173 std::string testURI = suiteURI +
".device.insert.dense.block.2D.32.gridScaling";
174 unsigned int counter = 0;
175 test_insert_block<32,8>(testURI, counter++);
176 test_insert_block<32,16>(testURI, counter++);
177 test_insert_block<32,32>(testURI, counter++);
178 test_insert_block<32,64>(testURI, counter++);
179 test_insert_block<32,128>(testURI, counter++);
180 test_insert_block<32,256>(testURI, counter++);
182 testSet.insert(testURI);
185BOOST_AUTO_TEST_CASE(testInsertBlocked_blockScaling)
187 std::string testURI = suiteURI +
".device.insert.dense.block.2D.blockScaling";
188 unsigned int counter = 0;
189 test_insert_block<2,2048>(testURI, counter++);
190 test_insert_block<4,1024>(testURI, counter++);
191 test_insert_block<8,512>(testURI, counter++);
192 test_insert_block<16,256>(testURI, counter++);
193 test_insert_block<32,128>(testURI, counter++);
195 testSet.insert(testURI);
198BOOST_AUTO_TEST_SUITE_END()
200BOOST_AUTO_TEST_SUITE_END()
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...