7#define BOOST_TEST_DYN_LINK
8#define DISABLE_MPI_WRITTERS
10#include <boost/test/unit_test.hpp>
11#include "performancePlots.hpp"
13#include "SparseGridGpu/SparseGridGpu.hpp"
14#include "SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh"
16extern std::string suiteURI;
18extern std::set<std::string> testSet;
20template<
unsigned int blockEdgeSize,
unsigned int gr
idEdgeSize,
typename SparseGr
idZ>
21void testStencilHeat_perf(
unsigned int i, std::string base)
23 auto testName =
"In-place stencil";
30 report_sparsegrid_funcs.graphs.put(base +
".dim",2);
31 report_sparsegrid_funcs.graphs.put(base +
".blockSize",blockEdgeSize);
32 report_sparsegrid_funcs.graphs.put(base +
".gridSize.x",gridEdgeSize*SparseGridZ::blockEdgeSize_);
33 report_sparsegrid_funcs.graphs.put(base +
".gridSize.y",gridEdgeSize*SparseGridZ::blockEdgeSize_);
35 unsigned int iterations = 100;
40 dim3 gridSize(gridEdgeSize, gridEdgeSize);
41 dim3 blockSize(SparseGridZ::blockEdgeSize_,SparseGridZ::blockEdgeSize_);
42 typename SparseGridZ::grid_info blockGeometry(gridSize);
43 SparseGridZ sparseGrid(blockGeometry);
45 sparseGrid.template setBackgroundValue<0>(0);
47 unsigned long long numElements = gridEdgeSize*SparseGridZ::blockEdgeSize_*gridEdgeSize*SparseGridZ::blockEdgeSize_;
50 sparseGrid.setGPUInsertBuffer(gridSize, dim3(1));
51 CUDA_LAUNCH_DIM3((insertConstantValue<0>),gridSize, blockSize,sparseGrid.toKernel(), 0);
52 sparseGrid.template flush < sRight_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
54 sparseGrid.setGPUInsertBuffer(gridSize, dim3(1));
55 dim3 sourcePt(gridSize.x * SparseGridZ::blockEdgeSize_ / 2, gridSize.y * SparseGridZ::blockEdgeSize_ / 2, 0);
56 insertOneValue<0> << < gridSize, blockSize >> > (sparseGrid.toKernel(), sourcePt, 100);
57 sparseGrid.template flush < sRight_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
59 sparseGrid.findNeighbours();
62 for (
unsigned int iter=0; iter<iterations; ++iter)
64 cudaDeviceSynchronize();
68 sparseGrid.template applyStencils<Stencil01T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
69 cudaDeviceSynchronize();
70 sparseGrid.template applyStencils<Stencil10T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
71 cudaDeviceSynchronize();
75 measures_tm.add(ts.
getwct());
77 float gElemS = 2 * numElements / (1e9 * ts.
getwct());
78 float gFlopsS = gElemS * Stencil01T::flops;
80 measures_gf.add(gFlopsS);
84 double deviation_tm = 0;
85 standard_deviation(measures_tm,mean_tm,deviation_tm);
88 double deviation_gf = 0;
89 standard_deviation(measures_gf,mean_gf,deviation_gf);
93 float gElemS = 2 * numElements / (1e9 * mean_tm);
94 float gFlopsS = gElemS * Stencil01T::flops;
95 std::cout <<
"Test: " << testName << std::endl;
96 std::cout <<
"Block: " << SparseGridZ::blockEdgeSize_ <<
"x" << SparseGridZ::blockEdgeSize_ << std::endl;
97 std::cout <<
"Grid: " << gridEdgeSize*SparseGridZ::blockEdgeSize_ <<
"x" << gridEdgeSize*SparseGridZ::blockEdgeSize_ << std::endl;
98 double dataOccupancyMean, dataOccupancyDev;
99 sparseGrid.deviceToHost();
100 sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);std::cout <<
"Data Occupancy: " << dataOccupancyMean <<
" dev:" << dataOccupancyDev << std::endl;
101 report_sparsegrid_funcs.graphs.put(base +
".dataOccupancy.mean",dataOccupancyMean);
102 report_sparsegrid_funcs.graphs.put(base +
".dataOccupancy.dev",dataOccupancyDev);
103 std::cout <<
"Iterations: " << iterations << std::endl;
104 std::cout <<
"\tStencil: " << mean_gf <<
" dev:" << deviation_gf <<
" s" << std::endl;
105 std::cout <<
"Throughput: " << std::endl <<
"\t " << gElemS <<
" GElem/s " << std::endl <<
"\t " << gFlopsS <<
" GFlops/s" << std::endl;
107 report_sparsegrid_funcs.graphs.put(base +
".GFlops.mean",mean_gf);
108 report_sparsegrid_funcs.graphs.put(base +
".GFlops.dev",deviation_gf);
109 report_sparsegrid_funcs.graphs.put(base +
".time.mean",mean_tm);
110 report_sparsegrid_funcs.graphs.put(base +
".time.dev",deviation_tm);
113template<
unsigned int blockEdgeSize,
unsigned int gr
idEdgeSize>
114void launch_testStencilHeatZ_perf(std::string testURI,
unsigned int i)
116 constexpr unsigned int dim = 2;
121 std::string base(testURI +
"(" + std::to_string(i) +
")");
122 report_sparsegrid_funcs.graphs.put(base +
".test.name",
"StencilZ");
124 testStencilHeat_perf<blockEdgeSize, gridEdgeSize,
126 cudaDeviceSynchronize();
129template<
unsigned int blockEdgeSize,
unsigned int gr
idEdgeSize>
130void launch_testStencilHeat_perf(std::string testURI,
unsigned int i)
132 constexpr unsigned int dim = 2;
137 std::string base(testURI +
"(" + std::to_string(i) +
")");
138 report_sparsegrid_funcs.graphs.put(base +
".test.name",
"StencilN");
140 testStencilHeat_perf<blockEdgeSize, gridEdgeSize,
142 cudaDeviceSynchronize();
146BOOST_AUTO_TEST_SUITE(performance)
148BOOST_AUTO_TEST_SUITE(SparseGridGpu_test)
150BOOST_AUTO_TEST_CASE(testStencilHeat_gridScaling)
152 std::string testURI = suiteURI +
".device.stencil.dense.N.2D.gridScaling";
153 unsigned int counter = 0;
154 constexpr unsigned int blockEdgeSize = 8;
155 launch_testStencilHeat_perf<blockEdgeSize, 128>(testURI, counter++);
156 launch_testStencilHeat_perf<blockEdgeSize, 256>(testURI, counter++);
157 launch_testStencilHeat_perf<blockEdgeSize, 512>(testURI, counter++);
158 launch_testStencilHeat_perf<blockEdgeSize, 1024>(testURI, counter++);
160 testSet.insert(testURI);
163BOOST_AUTO_TEST_CASE(testStencilHeat_gridScaling_2)
165 std::string testURI = suiteURI +
".device.stencil.dense.N.2D.2.gridScaling";
166 unsigned int counter = 0;
167 launch_testStencilHeat_perf<2, 512>(testURI, counter++);
168 launch_testStencilHeat_perf<2, 1024>(testURI, counter++);
169 launch_testStencilHeat_perf<2, 2048>(testURI, counter++);
172 testSet.insert(testURI);
175BOOST_AUTO_TEST_CASE(testStencilHeat_gridScaling_4)
177 std::string testURI = suiteURI +
".device.stencil.dense.N.2D.4.gridScaling";
178 unsigned int counter = 0;
179 launch_testStencilHeat_perf<4, 256>(testURI, counter++);
180 launch_testStencilHeat_perf<4, 512>(testURI, counter++);
181 launch_testStencilHeat_perf<4, 1024>(testURI, counter++);
184 testSet.insert(testURI);
187BOOST_AUTO_TEST_CASE(testStencilHeat_gridScaling_8)
189 std::string testURI = suiteURI +
".device.stencil.dense.N.2D.8.gridScaling";
190 unsigned int counter = 0;
191 launch_testStencilHeat_perf<8, 128>(testURI, counter++);
192 launch_testStencilHeat_perf<8, 256>(testURI, counter++);
193 launch_testStencilHeat_perf<8, 512>(testURI, counter++);
196 testSet.insert(testURI);
199BOOST_AUTO_TEST_CASE(testStencilHeat_gridScaling_16)
201 std::string testURI = suiteURI +
".device.stencil.dense.N.2D.16.gridScaling";
202 unsigned int counter = 0;
203 launch_testStencilHeat_perf<16, 64>(testURI, counter++);
204 launch_testStencilHeat_perf<16, 128>(testURI, counter++);
205 launch_testStencilHeat_perf<16, 256>(testURI, counter++);
208 testSet.insert(testURI);
211BOOST_AUTO_TEST_CASE(testStencilHeat_gridScaling_32)
213 std::string testURI = suiteURI +
".device.stencil.dense.N.2D.32.gridScaling";
214 unsigned int counter = 0;
215 launch_testStencilHeat_perf<32, 32>(testURI, counter++);
216 launch_testStencilHeat_perf<32, 64>(testURI, counter++);
217 launch_testStencilHeat_perf<32, 128>(testURI, counter++);
220 testSet.insert(testURI);
223BOOST_AUTO_TEST_CASE(testStencilHeat_blockScaling)
225 std::string testURI = suiteURI +
".device.stencil.dense.N.2D.blockScaling";
226 unsigned int counter = 0;
228 launch_testStencilHeat_perf<4, 2048>(testURI, counter++);
229 launch_testStencilHeat_perf<8, 1024>(testURI, counter++);
230 launch_testStencilHeat_perf<16, 512>(testURI, counter++);
231 launch_testStencilHeat_perf<32, 256>(testURI, counter++);
233 testSet.insert(testURI);
237BOOST_AUTO_TEST_CASE(testStencilHeatZ_gridScaling)
239 std::string testURI = suiteURI +
".device.stencil.dense.Z.2D.gridScaling";
240 unsigned int counter = 0;
241 constexpr unsigned int blockEdgeSize = 8;
242 launch_testStencilHeatZ_perf<blockEdgeSize, 128>(testURI, counter++);
243 launch_testStencilHeatZ_perf<blockEdgeSize, 256>(testURI, counter++);
244 launch_testStencilHeatZ_perf<blockEdgeSize, 512>(testURI, counter++);
245 launch_testStencilHeatZ_perf<blockEdgeSize, 1024>(testURI, counter++);
248 testSet.insert(testURI);
251BOOST_AUTO_TEST_CASE(testStencilHeatZ_gridScaling_2)
253 std::string testURI = suiteURI +
".device.stencil.dense.Z.2D.2.gridScaling";
254 unsigned int counter = 0;
255 launch_testStencilHeatZ_perf<2, 512>(testURI, counter++);
256 launch_testStencilHeatZ_perf<2, 1024>(testURI, counter++);
257 launch_testStencilHeatZ_perf<2, 2048>(testURI, counter++);
260 testSet.insert(testURI);
263BOOST_AUTO_TEST_CASE(testStencilHeatZ_gridScaling_4)
265 std::string testURI = suiteURI +
".device.stencil.dense.Z.2D.4.gridScaling";
266 unsigned int counter = 0;
267 launch_testStencilHeatZ_perf<4, 256>(testURI, counter++);
268 launch_testStencilHeatZ_perf<4, 512>(testURI, counter++);
269 launch_testStencilHeatZ_perf<4, 1024>(testURI, counter++);
270 launch_testStencilHeatZ_perf<4, 2048>(testURI, counter++);
272 testSet.insert(testURI);
275BOOST_AUTO_TEST_CASE(testStencilHeatZ_gridScaling_8)
277 std::string testURI = suiteURI +
".device.stencil.dense.Z.2D.8.gridScaling";
278 unsigned int counter = 0;
279 launch_testStencilHeatZ_perf<8, 128>(testURI, counter++);
280 launch_testStencilHeatZ_perf<8, 256>(testURI, counter++);
281 launch_testStencilHeatZ_perf<8, 512>(testURI, counter++);
282 launch_testStencilHeatZ_perf<8, 1024>(testURI, counter++);
284 testSet.insert(testURI);
287BOOST_AUTO_TEST_CASE(testStencilHeatZ_gridScaling_16)
289 std::string testURI = suiteURI +
".device.stencil.dense.Z.2D.16.gridScaling";
290 unsigned int counter = 0;
291 launch_testStencilHeatZ_perf<16, 64>(testURI, counter++);
292 launch_testStencilHeatZ_perf<16, 128>(testURI, counter++);
293 launch_testStencilHeatZ_perf<16, 256>(testURI, counter++);
294 launch_testStencilHeatZ_perf<16, 512>(testURI, counter++);
296 testSet.insert(testURI);
299BOOST_AUTO_TEST_CASE(testStencilHeatZ_gridScaling_32)
301 std::string testURI = suiteURI +
".device.stencil.dense.Z.2D.32.gridScaling";
302 unsigned int counter = 0;
303 launch_testStencilHeatZ_perf<32, 32>(testURI, counter++);
304 launch_testStencilHeatZ_perf<32, 64>(testURI, counter++);
305 launch_testStencilHeatZ_perf<32, 128>(testURI, counter++);
306 launch_testStencilHeatZ_perf<32, 256>(testURI, counter++);
308 testSet.insert(testURI);
311BOOST_AUTO_TEST_CASE(testStencilHeatZ_blockScaling)
313 std::string testURI = suiteURI +
".device.stencil.dense.Z.2D.blockScaling";
314 unsigned int counter = 0;
316 launch_testStencilHeatZ_perf<4, 2048>(testURI, counter++);
317 launch_testStencilHeatZ_perf<8, 1024>(testURI, counter++);
318 launch_testStencilHeatZ_perf<16, 512>(testURI, counter++);
319 launch_testStencilHeatZ_perf<32, 256>(testURI, counter++);
321 testSet.insert(testURI);
325BOOST_AUTO_TEST_SUITE_END()
327BOOST_AUTO_TEST_SUITE_END()
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...