7#define BOOST_TEST_DYN_LINK
8#define DISABLE_MPI_WRITTERS
10#include <boost/test/unit_test.hpp>
11#include "performancePlots.hpp"
13#include "SparseGridGpu/SparseGridGpu.hpp"
14#include "SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh"
16extern std::string suiteURI;
18extern std::set<std::string> testSet;
20template<
unsigned int blockEdgeSize,
unsigned int gr
idEdgeSize,
typename SparseGr
idZ>
21void testStencilHeatSparse_perf(
unsigned int i, std::string base,
float fillMultiplier=1,
float voidMultiplier=1)
23 auto testName =
"In-place sparse stencil";
25 constexpr unsigned int dim = SparseGridZ::dims;
33 report_sparsegrid_funcs.graphs.put(base +
".dim",2);
34 report_sparsegrid_funcs.graphs.put(base +
".blockSize",blockEdgeSize);
35 report_sparsegrid_funcs.graphs.put(base +
".gridSize.x",gridEdgeSize*blockEdgeSize);
36 report_sparsegrid_funcs.graphs.put(base +
".gridSize.y",gridEdgeSize*blockEdgeSize);
38 unsigned int iterations = 100;
43 dim3 gridSize(gridEdgeSize, gridEdgeSize);
44 unsigned int spatialEdgeSize = 1000000;
45 size_t sz[2] = {spatialEdgeSize, spatialEdgeSize};
46 typename SparseGridZ::grid_info blockGeometry(sz);
47 SparseGridZ sparseGrid(blockGeometry);
49 sparseGrid.template setBackgroundValue<0>(0);
52 float allMultiplier = fillMultiplier + voidMultiplier;
53 const unsigned int numSpheres = gridEdgeSize / (2*allMultiplier);
55 unsigned int centerPoint = spatialEdgeSize / 2;
57 for (
int i = 1; i <= numSpheres; ++i)
59 unsigned int rBig = allMultiplier*i * blockEdgeSize;
60 unsigned int rSmall = (allMultiplier*i - fillMultiplier) * blockEdgeSize;
63 sparseGrid.setGPUInsertBuffer(gridSize, dim3(1));
64 CUDA_LAUNCH_DIM3((insertSphere<0>),
65 gridSize, dim3(blockEdgeSize * blockEdgeSize, 1, 1),
66 sparseGrid.toKernel(), start1, rBig, rSmall, 5);
67 cudaDeviceSynchronize();
68 sparseGrid.template flush<smax_<0 >>(ctx, flush_type::FLUSH_ON_DEVICE);
69 cudaDeviceSynchronize();
73 sparseGrid.findNeighbours();
74 sparseGrid.tagBoundaries(ctx);
76 sparseGrid.template deviceToHost<0>();
77 auto existingElements = sparseGrid.countExistingElements();
78 auto boundaryElements = sparseGrid.countBoundaryElements();
79 unsigned long long numElements = existingElements - boundaryElements;
82 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,
83 centerPoint, centerPoint + 2*blockEdgeSize*gridEdgeSize,
87 for (
unsigned int iter=0; iter<iterations; ++iter)
89 cudaDeviceSynchronize();
94 sparseGrid.template applyStencils<Stencil01T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
95 cudaDeviceSynchronize();
96 sparseGrid.template applyStencils<Stencil10T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
97 cudaDeviceSynchronize();
101 measures_tm.add(ts.
getwct());
103 float gElemS = 2 * numElements / (1e9 * ts.
getwct());
104 float gFlopsS = gElemS * Stencil01T::flops;
106 measures_gf.add(gFlopsS);
110 double deviation_tm = 0;
111 standard_deviation(measures_tm,mean_tm,deviation_tm);
114 double deviation_gf = 0;
115 standard_deviation(measures_gf,mean_gf,deviation_gf);
119 float gElemS = 2 * numElements / (1e9 * mean_tm);
120 float gFlopsS = gElemS * Stencil01T::flops;
121 std::cout <<
"Test: " << testName << std::endl;
122 std::cout <<
"Block: " << blockEdgeSize <<
"x" << blockEdgeSize << std::endl;
123 std::cout <<
"Grid: " << gridEdgeSize*blockEdgeSize <<
"x" << gridEdgeSize*blockEdgeSize << std::endl;
124 double dataOccupancyMean, dataOccupancyDev;
125 sparseGrid.deviceToHost();
126 sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);
127 std::cout <<
"Data Occupancy: " << dataOccupancyMean <<
" dev:" << dataOccupancyDev << std::endl;
128 report_sparsegrid_funcs.graphs.put(base +
".dataOccupancy.mean",dataOccupancyMean);
129 report_sparsegrid_funcs.graphs.put(base +
".dataOccupancy.dev",dataOccupancyDev);
130 std::cout <<
"Iterations: " << iterations << std::endl;
131 std::cout <<
"\tStencil: " << mean_gf <<
" dev:" << deviation_gf <<
" s" << std::endl;
132 std::cout <<
"Throughput: " << std::endl <<
"\t " << gElemS <<
" GElem/s " << std::endl <<
"\t " << gFlopsS <<
" GFlops/s" << std::endl;
134 report_sparsegrid_funcs.graphs.put(base +
".GFlops.mean",mean_gf);
135 report_sparsegrid_funcs.graphs.put(base +
".GFlops.dev",deviation_gf);
136 report_sparsegrid_funcs.graphs.put(base +
".time.mean",mean_tm);
137 report_sparsegrid_funcs.graphs.put(base +
".time.dev",deviation_tm);
140template<
unsigned int blockEdgeSize,
unsigned int gr
idEdgeSize>
141void launch_testStencilHeatSparse_perf(std::string testURI,
unsigned int i,
142 float fillMultiplier=1,
float voidMultiplier=1, std::string occupancyStr=
"05")
144 constexpr unsigned int dim = 2;
148 std::string base(testURI +
"(" + std::to_string(i) +
")");
149 report_sparsegrid_funcs.graphs.put(base +
".test.name",
"StencilNSparse"+occupancyStr);
151 testStencilHeatSparse_perf<blockEdgeSize, gridEdgeSize,
153 fillMultiplier, voidMultiplier);
154 cudaDeviceSynchronize();
157template<
unsigned int blockEdgeSize,
unsigned int gr
idEdgeSize>
158void launch_testStencilHeatSparseZ_perf(std::string testURI,
unsigned int i,
159 float fillMultiplier=1,
float voidMultiplier=1, std::string occupancyStr=
"05")
161 constexpr unsigned int dim = 2;
165 std::string base(testURI +
"(" + std::to_string(i) +
")");
166 report_sparsegrid_funcs.graphs.put(base +
".test.name",
"StencilNSparse"+occupancyStr);
168 testStencilHeatSparse_perf<blockEdgeSize, gridEdgeSize,
170 fillMultiplier, voidMultiplier);
171 cudaDeviceSynchronize();
174BOOST_AUTO_TEST_SUITE(performance)
176BOOST_AUTO_TEST_SUITE(SparseGridGpu_test)
178BOOST_AUTO_TEST_CASE(testStencilHeatSparse05_gridScaling)
180 std::string testURI = suiteURI +
".device.stencil.sparse.N.2D.05.gridScaling";
181 unsigned int counter = 0;
182 constexpr unsigned int blockEdgeSize = 8;
183 launch_testStencilHeatSparse_perf<blockEdgeSize, 128>(testURI, counter++, 1.45, 1,
"05");
184 launch_testStencilHeatSparse_perf<blockEdgeSize, 256>(testURI, counter++, 1.45, 1,
"05");
185 launch_testStencilHeatSparse_perf<blockEdgeSize, 512>(testURI, counter++, 1.45, 1,
"05");
186 launch_testStencilHeatSparse_perf<blockEdgeSize, 1024>(testURI, counter++, 1.45, 1,
"05");
189 testSet.insert(testURI);
193BOOST_AUTO_TEST_CASE(testStencilHeatSparse08_gridScaling)
195 std::string testURI = suiteURI +
".device.stencil.sparse.N.2D.08.gridScaling";
196 unsigned int counter = 0;
197 constexpr unsigned int blockEdgeSize = 8;
198 launch_testStencilHeatSparse_perf<blockEdgeSize, 128>(testURI, counter++, 2, 0.20,
"08");
199 launch_testStencilHeatSparse_perf<blockEdgeSize, 256>(testURI, counter++, 2, 0.20,
"08");
200 launch_testStencilHeatSparse_perf<blockEdgeSize, 512>(testURI, counter++, 2, 0.20,
"08");
201 launch_testStencilHeatSparse_perf<blockEdgeSize, 1024>(testURI, counter++, 2, 0.20,
"08");
203 testSet.insert(testURI);
207BOOST_AUTO_TEST_CASE(testStencilHeatSparse09_gridScaling)
209 std::string testURI = suiteURI +
".device.stencil.sparse.N.2D.09.gridScaling";
210 unsigned int counter = 0;
211 constexpr unsigned int blockEdgeSize = 8;
212 launch_testStencilHeatSparse_perf<blockEdgeSize, 128>(testURI, counter++, 2.3, 0.07,
"09");
213 launch_testStencilHeatSparse_perf<blockEdgeSize, 256>(testURI, counter++, 2.3, 0.07,
"09");
214 launch_testStencilHeatSparse_perf<blockEdgeSize, 512>(testURI, counter++, 2.3, 0.07,
"09");
215 launch_testStencilHeatSparse_perf<blockEdgeSize, 1024>(testURI, counter++, 2.3, 0.07,
"09");
217 testSet.insert(testURI);
220BOOST_AUTO_TEST_CASE(testStencilHeatSparseZ05_gridScaling)
222 std::string testURI = suiteURI +
".device.stencil.sparse.Z.2D.05.gridScaling";
223 unsigned int counter = 0;
224 constexpr unsigned int blockEdgeSize = 8;
225 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 128>(testURI, counter++, 1.45, 1,
"05");
226 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 256>(testURI, counter++, 1.45, 1,
"05");
227 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 512>(testURI, counter++, 1.45, 1,
"05");
228 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 1024>(testURI, counter++, 1.45, 1,
"05");
230 testSet.insert(testURI);
234BOOST_AUTO_TEST_CASE(testStencilHeatSparseZ08_gridScaling)
236 std::string testURI = suiteURI +
".device.stencil.sparse.Z.2D.08.gridScaling";
237 unsigned int counter = 0;
238 constexpr unsigned int blockEdgeSize = 8;
239 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 128>(testURI, counter++, 2, 0.20,
"08");
240 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 256>(testURI, counter++, 2, 0.20,
"08");
241 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 512>(testURI, counter++, 2, 0.20,
"08");
242 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 1024>(testURI, counter++, 2, 0.20,
"08");
244 testSet.insert(testURI);
248BOOST_AUTO_TEST_CASE(testStencilHeatSparseZ09_gridScaling)
250 std::string testURI = suiteURI +
".device.stencil.sparse.Z.2D.09.gridScaling";
251 unsigned int counter = 0;
252 constexpr unsigned int blockEdgeSize = 8;
253 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 128>(testURI, counter++, 2.3, 0.07,
"09");
254 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 256>(testURI, counter++, 2.3, 0.07,
"09");
255 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 512>(testURI, counter++, 2.3, 0.07,
"09");
256 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 1024>(testURI, counter++, 2.3, 0.07,
"09");
258 testSet.insert(testURI);
261BOOST_AUTO_TEST_CASE(testStencilHeatSparse05_32Block_2048Grid_Case)
263 std::string testURI = suiteURI +
".device.stencil.sparse.N.2D.05.32_2048";
264 unsigned int counter = 0;
265 launch_testStencilHeatSparse_perf<32, 2048/32>(testURI, counter++, 1.45, 1,
"05");
267 testSet.insert(testURI);
270BOOST_AUTO_TEST_SUITE_END()
272BOOST_AUTO_TEST_SUITE_END()
grid_key_dx is the key to access any element in the grid
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...