7#define BOOST_TEST_DYN_LINK
8#define DISABLE_MPI_WRITTERS
10#include <boost/test/unit_test.hpp>
11#include "performancePlots.hpp"
13#include "SparseGridGpu/SparseGridGpu.hpp"
14#include "SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh"
16extern std::string suiteURI;
18extern std::set<std::string> testSet;
20template<
unsigned int blockEdgeSize,
unsigned int gr
idEdgeSize,
typename SparseGr
idZ>
21void testStencilHeat3D_perf(
unsigned int i, std::string base)
23 auto testName =
"In-place 3D stencil";
29 report_sparsegrid_funcs.graphs.put(base +
".dim",3);
30 report_sparsegrid_funcs.graphs.put(base +
".blockSize",blockEdgeSize);
31 report_sparsegrid_funcs.graphs.put(base +
".gridSize.x",gridEdgeSize*SparseGridZ::blockEdgeSize_);
32 report_sparsegrid_funcs.graphs.put(base +
".gridSize.y",gridEdgeSize*SparseGridZ::blockEdgeSize_);
33 report_sparsegrid_funcs.graphs.put(base +
".gridSize.z",gridEdgeSize*SparseGridZ::blockEdgeSize_);
35 unsigned int iterations = 100;
40 dim3 gridSize(gridEdgeSize, gridEdgeSize, gridEdgeSize);
41 dim3 blockSize(SparseGridZ::blockEdgeSize_, SparseGridZ::blockEdgeSize_, SparseGridZ::blockEdgeSize_);
43 typename SparseGridZ::grid_info blockGeometry(gridSize);
44 SparseGridZ sparseGrid(blockGeometry);
46 sparseGrid.template setBackgroundValue<0>(0);
48 unsigned long long numElements = gridEdgeSize*SparseGridZ::blockEdgeSize_
49 *gridEdgeSize*SparseGridZ::blockEdgeSize_
50 *gridEdgeSize*SparseGridZ::blockEdgeSize_;
53 sparseGrid.setGPUInsertBuffer(gridSize, dim3(1));
54 CUDA_LAUNCH_DIM3((insertConstantValue<0>),gridSize, blockSize,sparseGrid.toKernel(), 0);
55 sparseGrid.template flush < sRight_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
57 sparseGrid.setGPUInsertBuffer(gridSize, dim3(1));
58 dim3 sourcePt(gridSize.x * SparseGridZ::blockEdgeSize_ / 2,
59 gridSize.y * SparseGridZ::blockEdgeSize_ / 2,
60 gridSize.z * SparseGridZ::blockEdgeSize_ / 2);
61 insertOneValue<0> << < gridSize, blockSize >> > (sparseGrid.toKernel(), sourcePt, 100);
62 sparseGrid.template flush < sRight_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
64 sparseGrid.findNeighbours();
67 for (
unsigned int iter=0; iter<iterations; ++iter)
69 cudaDeviceSynchronize();
74 sparseGrid.template applyStencils<Stencil01T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
75 cudaDeviceSynchronize();
76 sparseGrid.template applyStencils<Stencil10T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
77 cudaDeviceSynchronize();
81 measures_tm.add(ts.
getwct());
83 float gElemS = 2 * numElements / (1e9 * ts.
getwct());
84 float gFlopsS = gElemS * Stencil01T::flops;
86 measures_gf.add(gFlopsS);
90 double deviation_tm = 0;
91 standard_deviation(measures_tm,mean_tm,deviation_tm);
94 double deviation_gf = 0;
95 standard_deviation(measures_gf,mean_gf,deviation_gf);
99 float gElemS = 2 * numElements / (1e9 * mean_tm);
100 float gFlopsS = gElemS * Stencil01T::flops;
101 std::cout <<
"Test: " << testName << std::endl;
102 std::cout <<
"Block: " << SparseGridZ::blockEdgeSize_
103 <<
"x" << SparseGridZ::blockEdgeSize_
104 <<
"x" << SparseGridZ::blockEdgeSize_
106 std::cout <<
"Grid: " << gridEdgeSize*SparseGridZ::blockEdgeSize_
107 <<
"x" << gridEdgeSize*SparseGridZ::blockEdgeSize_
108 <<
"x" << gridEdgeSize*SparseGridZ::blockEdgeSize_
110 double dataOccupancyMean, dataOccupancyDev;
111 sparseGrid.deviceToHost();
112 sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);std::cout <<
"Data Occupancy: " << dataOccupancyMean <<
" dev:" << dataOccupancyDev << std::endl;
113 report_sparsegrid_funcs.graphs.put(base +
".dataOccupancy.mean",dataOccupancyMean);
114 report_sparsegrid_funcs.graphs.put(base +
".dataOccupancy.dev",dataOccupancyDev);
115 std::cout <<
"Iterations: " << iterations << std::endl;
116 std::cout <<
"\tStencil: " << mean_gf <<
" dev:" << deviation_gf <<
" s" << std::endl;
117 std::cout <<
"Throughput: " << std::endl <<
"\t " << gElemS <<
" GElem/s " << std::endl <<
"\t " << gFlopsS <<
" GFlops/s" << std::endl;
119 report_sparsegrid_funcs.graphs.put(base +
".GFlops.mean",mean_gf);
120 report_sparsegrid_funcs.graphs.put(base +
".GFlops.dev",deviation_gf);
121 report_sparsegrid_funcs.graphs.put(base +
".time.mean",mean_tm);
122 report_sparsegrid_funcs.graphs.put(base +
".time.dev",deviation_tm);
125template<
unsigned int blockEdgeSize,
unsigned int gr
idEdgeSize>
126void launch_testStencilHeat3D_perf(std::string testURI,
unsigned int i)
128 constexpr unsigned int dim = 3;
132 std::string base(testURI +
"(" + std::to_string(i) +
")");
133 report_sparsegrid_funcs.graphs.put(base +
".test.name",
"StencilN3D");
135 testStencilHeat3D_perf<blockEdgeSize, gridEdgeSize,
137 cudaDeviceSynchronize();
140template<
unsigned int blockEdgeSize,
unsigned int gr
idEdgeSize,
typename SparseGr
idZ>
141void testStencilHeat3DSparse_perf(
unsigned int i, std::string base,
float fillMultiplier=1,
float voidMultiplier=1)
143 auto testName =
"In-place 3D sparse stencil";
145 constexpr unsigned int dim = SparseGridZ::dims;
153 report_sparsegrid_funcs.graphs.put(base +
".dim",dim);
154 report_sparsegrid_funcs.graphs.put(base +
".blockSize",blockEdgeSize);
155 report_sparsegrid_funcs.graphs.put(base +
".gridSize.x", gridEdgeSize * blockEdgeSize);
156 report_sparsegrid_funcs.graphs.put(base +
".gridSize.y", gridEdgeSize * blockEdgeSize);
157 report_sparsegrid_funcs.graphs.put(base +
".gridSize.z", gridEdgeSize * blockEdgeSize);
159 unsigned int iterations = 100;
164 dim3 gridSize(gridEdgeSize, gridEdgeSize, gridEdgeSize);
165 unsigned int spatialEdgeSize = 10000;
166 size_t sz[3] = {spatialEdgeSize, spatialEdgeSize, spatialEdgeSize};
167 typename SparseGridZ::grid_info blockGeometry(sz);
168 SparseGridZ sparseGrid(blockGeometry);
170 sparseGrid.template setBackgroundValue<0>(0);
173 float allMultiplier = fillMultiplier + voidMultiplier;
174 const unsigned int numSpheres = gridEdgeSize / (2*allMultiplier);
175 unsigned int centerPoint = spatialEdgeSize / 2;
177 for (
int i = 1; i <= numSpheres; ++i)
179 unsigned int rBig = allMultiplier*i * blockEdgeSize;
180 unsigned int rSmall = (allMultiplier*i - fillMultiplier) * blockEdgeSize;
183 sparseGrid.setGPUInsertBuffer(gridSize, dim3(1));
184 CUDA_LAUNCH_DIM3((insertSphere3D<0>),
185 gridSize, dim3(blockEdgeSize * blockEdgeSize * blockEdgeSize, 1, 1),
186 sparseGrid.toKernel(), start1, rBig, rSmall, 1);
187 cudaDeviceSynchronize();
188 sparseGrid.template flush<smax_<0 >>(ctx, flush_type::FLUSH_ON_DEVICE);
189 cudaDeviceSynchronize();
193 sparseGrid.findNeighbours();
194 sparseGrid.tagBoundaries(ctx);
196 sparseGrid.template deviceToHost<0>();
197 auto existingElements = sparseGrid.countExistingElements();
198 auto boundaryElements = sparseGrid.countBoundaryElements();
199 unsigned long long numElements = existingElements - boundaryElements;
202 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,
203 centerPoint, centerPoint + 2*blockEdgeSize*gridEdgeSize,
207 for (
unsigned int iter=0; iter<iterations; ++iter)
213 sparseGrid.template applyStencils<Stencil01T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
214 sparseGrid.template applyStencils<Stencil10T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
218 measures_tm.add(ts.
getwct());
220 float gElemS = 2 * numElements / (1e9 * ts.
getwct());
221 float gFlopsS = gElemS * Stencil01T::flops;
223 measures_gf.add(gFlopsS);
227 double deviation_tm = 0;
228 standard_deviation(measures_tm,mean_tm,deviation_tm);
231 double deviation_gf = 0;
232 standard_deviation(measures_gf,mean_gf,deviation_gf);
236 float gElemS = 2 * numElements / (1e9 * mean_tm);
237 float gFlopsS = gElemS * Stencil01T::flops;
238 std::cout <<
"Test: " << testName << std::endl;
239 std::cout <<
"Block: " << blockEdgeSize <<
"x" << blockEdgeSize <<
"x" << blockEdgeSize << std::endl;
240 std::cout <<
"Grid: " << gridEdgeSize * blockEdgeSize
241 <<
"x" << gridEdgeSize * blockEdgeSize
242 <<
"x" << gridEdgeSize * blockEdgeSize
244 double dataOccupancyMean, dataOccupancyDev;
245 sparseGrid.deviceToHost();
246 sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);std::cout <<
"Data Occupancy: " << dataOccupancyMean <<
" dev:" << dataOccupancyDev << std::endl;
247 report_sparsegrid_funcs.graphs.put(base +
".dataOccupancy.mean",dataOccupancyMean);
248 report_sparsegrid_funcs.graphs.put(base +
".dataOccupancy.dev",dataOccupancyDev);
249 std::cout <<
"Iterations: " << iterations << std::endl;
250 std::cout <<
"\tStencil: " << mean_gf <<
" dev:" << deviation_gf <<
" s" << std::endl;
251 std::cout <<
"Throughput: " << std::endl <<
"\t " << gElemS <<
" GElem/s " << std::endl <<
"\t " << gFlopsS <<
" GFlops/s" << std::endl;
253 report_sparsegrid_funcs.graphs.put(base +
".GFlops.mean",mean_gf);
254 report_sparsegrid_funcs.graphs.put(base +
".GFlops.dev",deviation_gf);
255 report_sparsegrid_funcs.graphs.put(base +
".time.mean",mean_tm);
256 report_sparsegrid_funcs.graphs.put(base +
".time.dev",deviation_tm);
263template<
unsigned int blockEdgeSize,
unsigned int gr
idEdgeSize>
264void launch_testStencilHeat3DSparse_perf(std::string testURI,
unsigned int i,
265 float fillMultiplier=1,
float voidMultiplier=1)
267 constexpr unsigned int dim = 3;
271 std::string base(testURI +
"(" + std::to_string(i) +
")");
272 report_sparsegrid_funcs.graphs.put(base +
".test.name",
"StencilN3DSparse");
274 testStencilHeat3DSparse_perf<blockEdgeSize, gridEdgeSize,
276 fillMultiplier, voidMultiplier);
277 cudaDeviceSynchronize();
280BOOST_AUTO_TEST_SUITE(performance)
282BOOST_AUTO_TEST_SUITE(SparseGridGpu_test)
284BOOST_AUTO_TEST_CASE(testStencilHeat3D_gridScaling)
286 std::string testURI = suiteURI +
".device.stencil.dense.N.3D.gridScaling";
287 unsigned int counter = 0;
288 constexpr unsigned int blockEdgeSize = 8;
289 launch_testStencilHeat3D_perf<blockEdgeSize, 8>(testURI, counter++);
290 launch_testStencilHeat3D_perf<blockEdgeSize, 16>(testURI, counter++);
291 launch_testStencilHeat3D_perf<blockEdgeSize, 32>(testURI, counter++);
292 launch_testStencilHeat3D_perf<blockEdgeSize, 64>(testURI, counter++);
295 testSet.insert(testURI);
298BOOST_AUTO_TEST_CASE(testStencilHeat3D_gridScaling_2)
300 std::string testURI = suiteURI +
".device.stencil.dense.N.3D.2.gridScaling";
301 unsigned int counter = 0;
302 launch_testStencilHeat3D_perf<2, 32>(testURI, counter++);
303 launch_testStencilHeat3D_perf<2, 64>(testURI, counter++);
304 launch_testStencilHeat3D_perf<2, 128>(testURI, counter++);
307 testSet.insert(testURI);
310BOOST_AUTO_TEST_CASE(testStencilHeat3D_gridScaling_4)
312 std::string testURI = suiteURI +
".device.stencil.dense.N.3D.4.gridScaling";
313 unsigned int counter = 0;
314 launch_testStencilHeat3D_perf<4, 16>(testURI, counter++);
315 launch_testStencilHeat3D_perf<4, 32>(testURI, counter++);
316 launch_testStencilHeat3D_perf<4, 64>(testURI, counter++);
319 testSet.insert(testURI);
322BOOST_AUTO_TEST_CASE(testStencilHeat3D_gridScaling_8)
324 std::string testURI = suiteURI +
".device.stencil.dense.N.3D.8.gridScaling";
325 unsigned int counter = 0;
326 launch_testStencilHeat3D_perf<8, 8>(testURI, counter++);
327 launch_testStencilHeat3D_perf<8, 16>(testURI, counter++);
328 launch_testStencilHeat3D_perf<8, 32>(testURI, counter++);
329 launch_testStencilHeat3D_perf<8, 64>(testURI, counter++);
331 testSet.insert(testURI);
334BOOST_AUTO_TEST_CASE(testStencilHeat3D_blockScaling)
336 std::string testURI = suiteURI +
".device.stencil.dense.N.3D.blockScaling";
337 unsigned int counter = 0;
338 launch_testStencilHeat3D_perf<2, 128>(testURI, counter++);
339 launch_testStencilHeat3D_perf<4, 64>(testURI, counter++);
340 launch_testStencilHeat3D_perf<8, 32>(testURI, counter++);
343 testSet.insert(testURI);
346BOOST_AUTO_TEST_CASE(testStencilHeat3DSparse_gridScaling)
348 std::string testURI = suiteURI +
".device.stencil.sparse.N.3D.gridScaling";
349 unsigned int counter = 0;
350 constexpr unsigned int blockEdgeSize = 8;
351 launch_testStencilHeat3DSparse_perf<blockEdgeSize, 8>(testURI, counter++, 1, 1);
352 launch_testStencilHeat3DSparse_perf<blockEdgeSize, 16>(testURI, counter++, 1, 1);
353 launch_testStencilHeat3DSparse_perf<blockEdgeSize, 32>(testURI, counter++, 1, 1);
354 launch_testStencilHeat3DSparse_perf<blockEdgeSize, 64>(testURI, counter++, 1, 1);
356 testSet.insert(testURI);
359BOOST_AUTO_TEST_CASE(testStencilHeat3DSparse_blockScaling)
361 std::string testURI = suiteURI +
".device.stencil.sparse.N.3D.blockScaling";
362 unsigned int counter = 0;
363 launch_testStencilHeat3DSparse_perf<2, 128>(testURI, counter++, 1, 1);
364 launch_testStencilHeat3DSparse_perf<4, 64>(testURI, counter++, 1, 1);
365 launch_testStencilHeat3DSparse_perf<8, 32>(testURI, counter++, 1, 1);
368 testSet.insert(testURI);
373BOOST_AUTO_TEST_SUITE_END()
375BOOST_AUTO_TEST_SUITE_END()
grid_key_dx is the key to access any element in the grid
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...