OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
SparseGridGpu_performance_heat_stencil_sparse.cu
1/*
2 * SparseGridGpu_performance_heat_stencil_sparse.cu
3 *
4 * Created on: Sep 10, 2019
5 * Author: i-bird
6 */
7#define BOOST_TEST_DYN_LINK
8#define DISABLE_MPI_WRITTERS
9
10#include <boost/test/unit_test.hpp>
11#include "performancePlots.hpp"
12#include <iostream>
13#include "SparseGridGpu/SparseGridGpu.hpp"
14#include "SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh"
15
16extern std::string suiteURI;
17extern report_sparse_grid_tests report_sparsegrid_funcs;
18extern std::set<std::string> testSet;
19
20template<unsigned int blockEdgeSize, unsigned int gridEdgeSize, typename SparseGridZ>
21void testStencilHeatSparse_perf(unsigned int i, std::string base, float fillMultiplier=1, float voidMultiplier=1)
22{
23 auto testName = "In-place sparse stencil";
24// unsigned int gridEdgeSize = 128;
25 constexpr unsigned int dim = SparseGridZ::dims;
26// const unsigned int blockEdgeSize = SparseGridZ::blockEdgeSize_;
27
28 typedef HeatStencil<dim, 0, 1> Stencil01T;
29 typedef HeatStencil<dim, 1, 0> Stencil10T;
30
31// std::string base("performance.SparseGridGpu(" + std::to_string(i) + ").stencil");
32
33 report_sparsegrid_funcs.graphs.put(base + ".dim",2);
34 report_sparsegrid_funcs.graphs.put(base + ".blockSize",blockEdgeSize);
35 report_sparsegrid_funcs.graphs.put(base + ".gridSize.x",gridEdgeSize*blockEdgeSize);
36 report_sparsegrid_funcs.graphs.put(base + ".gridSize.y",gridEdgeSize*blockEdgeSize);
37
38 unsigned int iterations = 100;
39
40 openfpm::vector<double> measures_gf;
41 openfpm::vector<double> measures_tm;
42
43 dim3 gridSize(gridEdgeSize, gridEdgeSize);
44 unsigned int spatialEdgeSize = 1000000;
45 size_t sz[2] = {spatialEdgeSize, spatialEdgeSize};
46 typename SparseGridZ::grid_info blockGeometry(sz);
47 SparseGridZ sparseGrid(blockGeometry);
49 sparseGrid.template setBackgroundValue<0>(0);
50
52 float allMultiplier = fillMultiplier + voidMultiplier;
53 const unsigned int numSpheres = gridEdgeSize / (2*allMultiplier);
54// const unsigned int numSpheres = 1;
55 unsigned int centerPoint = spatialEdgeSize / 2;
56
57 for (int i = 1; i <= numSpheres; ++i)
58 {
59 unsigned int rBig = allMultiplier*i * blockEdgeSize;
60 unsigned int rSmall = (allMultiplier*i - fillMultiplier) * blockEdgeSize;
61 // Sphere i-th
62 grid_key_dx<dim, int> start1({centerPoint, centerPoint});
63 sparseGrid.setGPUInsertBuffer(gridSize, dim3(1));
64 CUDA_LAUNCH_DIM3((insertSphere<0>),
65 gridSize, dim3(blockEdgeSize * blockEdgeSize, 1, 1),
66 sparseGrid.toKernel(), start1, rBig, rSmall, 5);
67 cudaDeviceSynchronize();
68 sparseGrid.template flush<smax_<0 >>(ctx, flush_type::FLUSH_ON_DEVICE);
69 cudaDeviceSynchronize();
70 }
72
73 sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block!
74 sparseGrid.tagBoundaries(ctx);
75
76 sparseGrid.template deviceToHost<0>(); // NECESSARY as count takes place on Host!
77 auto existingElements = sparseGrid.countExistingElements();
78 auto boundaryElements = sparseGrid.countBoundaryElements();
79 unsigned long long numElements = existingElements - boundaryElements;
80
81 // Now apply some boundary conditions
82 sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,
83 centerPoint, centerPoint + 2*blockEdgeSize*gridEdgeSize,
84 0.0, 10.0);
85
86 iterations /= 2;
87 for (unsigned int iter=0; iter<iterations; ++iter)
88 {
89 cudaDeviceSynchronize();
90
91 timer ts;
92 ts.start();
93
94 sparseGrid.template applyStencils<Stencil01T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
95 cudaDeviceSynchronize();
96 sparseGrid.template applyStencils<Stencil10T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
97 cudaDeviceSynchronize();
98
99 ts.stop();
100
101 measures_tm.add(ts.getwct());
102
103 float gElemS = 2 * numElements / (1e9 * ts.getwct());
104 float gFlopsS = gElemS * Stencil01T::flops;
105
106 measures_gf.add(gFlopsS);
107 }
108
109 double mean_tm = 0;
110 double deviation_tm = 0;
111 standard_deviation(measures_tm,mean_tm,deviation_tm);
112
113 double mean_gf = 0;
114 double deviation_gf = 0;
115 standard_deviation(measures_gf,mean_gf,deviation_gf);
116
117 // All times above are in ms
118
119 float gElemS = 2 * numElements / (1e9 * mean_tm);
120 float gFlopsS = gElemS * Stencil01T::flops;
121 std::cout << "Test: " << testName << std::endl;
122 std::cout << "Block: " << blockEdgeSize << "x" << blockEdgeSize << std::endl;
123 std::cout << "Grid: " << gridEdgeSize*blockEdgeSize << "x" << gridEdgeSize*blockEdgeSize << std::endl;
124 double dataOccupancyMean, dataOccupancyDev;
125 sparseGrid.deviceToHost();
126 sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);
127 std::cout << "Data Occupancy: " << dataOccupancyMean << " dev:" << dataOccupancyDev << std::endl;
128 report_sparsegrid_funcs.graphs.put(base + ".dataOccupancy.mean",dataOccupancyMean);
129 report_sparsegrid_funcs.graphs.put(base +".dataOccupancy.dev",dataOccupancyDev);
130 std::cout << "Iterations: " << iterations << std::endl;
131 std::cout << "\tStencil: " << mean_gf << " dev:" << deviation_gf << " s" << std::endl;
132 std::cout << "Throughput: " << std::endl << "\t " << gElemS << " GElem/s " << std::endl << "\t " << gFlopsS << " GFlops/s" << std::endl;
133
134 report_sparsegrid_funcs.graphs.put(base + ".GFlops.mean",mean_gf);
135 report_sparsegrid_funcs.graphs.put(base +".GFlops.dev",deviation_gf);
136 report_sparsegrid_funcs.graphs.put(base + ".time.mean",mean_tm);
137 report_sparsegrid_funcs.graphs.put(base +".time.dev",deviation_tm);
138}
139
140template<unsigned int blockEdgeSize, unsigned int gridEdgeSize>
141void launch_testStencilHeatSparse_perf(std::string testURI, unsigned int i,
142 float fillMultiplier=1, float voidMultiplier=1, std::string occupancyStr="05")
143{
144 constexpr unsigned int dim = 2;
145 typedef aggregate<float,float> AggregateT;
146 constexpr unsigned int chunkSize = IntPow<blockEdgeSize,dim>::value;
147
148 std::string base(testURI + "(" + std::to_string(i) + ")");
149 report_sparsegrid_funcs.graphs.put(base + ".test.name","StencilNSparse"+occupancyStr);
150
151 testStencilHeatSparse_perf<blockEdgeSize, gridEdgeSize,
153 fillMultiplier, voidMultiplier);
154 cudaDeviceSynchronize();
155}
156
157template<unsigned int blockEdgeSize, unsigned int gridEdgeSize>
158void launch_testStencilHeatSparseZ_perf(std::string testURI, unsigned int i,
159 float fillMultiplier=1, float voidMultiplier=1, std::string occupancyStr="05")
160{
161 constexpr unsigned int dim = 2;
162 typedef aggregate<float,float> AggregateT;
163 constexpr unsigned int chunkSize = IntPow<blockEdgeSize,dim>::value;
164
165 std::string base(testURI + "(" + std::to_string(i) + ")");
166 report_sparsegrid_funcs.graphs.put(base + ".test.name","StencilNSparse"+occupancyStr);
167
168 testStencilHeatSparse_perf<blockEdgeSize, gridEdgeSize,
170 fillMultiplier, voidMultiplier);
171 cudaDeviceSynchronize();
172}
173
174BOOST_AUTO_TEST_SUITE(performance)
175
176BOOST_AUTO_TEST_SUITE(SparseGridGpu_test)
177
178BOOST_AUTO_TEST_CASE(testStencilHeatSparse05_gridScaling)
179{
180 std::string testURI = suiteURI + ".device.stencil.sparse.N.2D.05.gridScaling";
181 unsigned int counter = 0;
182 constexpr unsigned int blockEdgeSize = 8;
183 launch_testStencilHeatSparse_perf<blockEdgeSize, 128>(testURI, counter++, 1.45, 1, "05");
184 launch_testStencilHeatSparse_perf<blockEdgeSize, 256>(testURI, counter++, 1.45, 1, "05");
185 launch_testStencilHeatSparse_perf<blockEdgeSize, 512>(testURI, counter++, 1.45, 1, "05");
186 launch_testStencilHeatSparse_perf<blockEdgeSize, 1024>(testURI, counter++, 1.45, 1, "05");
188
189 testSet.insert(testURI);
190}
191
192
193BOOST_AUTO_TEST_CASE(testStencilHeatSparse08_gridScaling)
194{
195 std::string testURI = suiteURI + ".device.stencil.sparse.N.2D.08.gridScaling";
196 unsigned int counter = 0;
197 constexpr unsigned int blockEdgeSize = 8;
198 launch_testStencilHeatSparse_perf<blockEdgeSize, 128>(testURI, counter++, 2, 0.20, "08");
199 launch_testStencilHeatSparse_perf<blockEdgeSize, 256>(testURI, counter++, 2, 0.20, "08");
200 launch_testStencilHeatSparse_perf<blockEdgeSize, 512>(testURI, counter++, 2, 0.20, "08");
201 launch_testStencilHeatSparse_perf<blockEdgeSize, 1024>(testURI, counter++, 2, 0.20, "08");
202
203 testSet.insert(testURI);
204}
205
206
207BOOST_AUTO_TEST_CASE(testStencilHeatSparse09_gridScaling)
208{
209 std::string testURI = suiteURI + ".device.stencil.sparse.N.2D.09.gridScaling";
210 unsigned int counter = 0;
211 constexpr unsigned int blockEdgeSize = 8;
212 launch_testStencilHeatSparse_perf<blockEdgeSize, 128>(testURI, counter++, 2.3, 0.07, "09");
213 launch_testStencilHeatSparse_perf<blockEdgeSize, 256>(testURI, counter++, 2.3, 0.07, "09");
214 launch_testStencilHeatSparse_perf<blockEdgeSize, 512>(testURI, counter++, 2.3, 0.07, "09");
215 launch_testStencilHeatSparse_perf<blockEdgeSize, 1024>(testURI, counter++, 2.3, 0.07, "09");
216
217 testSet.insert(testURI);
218}
219
220BOOST_AUTO_TEST_CASE(testStencilHeatSparseZ05_gridScaling)
221{
222 std::string testURI = suiteURI + ".device.stencil.sparse.Z.2D.05.gridScaling";
223 unsigned int counter = 0;
224 constexpr unsigned int blockEdgeSize = 8;
225 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 128>(testURI, counter++, 1.45, 1, "05");
226 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 256>(testURI, counter++, 1.45, 1, "05");
227 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 512>(testURI, counter++, 1.45, 1, "05");
228 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 1024>(testURI, counter++, 1.45, 1, "05");
229
230 testSet.insert(testURI);
231}
232
233
234BOOST_AUTO_TEST_CASE(testStencilHeatSparseZ08_gridScaling)
235{
236 std::string testURI = suiteURI + ".device.stencil.sparse.Z.2D.08.gridScaling";
237 unsigned int counter = 0;
238 constexpr unsigned int blockEdgeSize = 8;
239 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 128>(testURI, counter++, 2, 0.20, "08");
240 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 256>(testURI, counter++, 2, 0.20, "08");
241 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 512>(testURI, counter++, 2, 0.20, "08");
242 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 1024>(testURI, counter++, 2, 0.20, "08");
243
244 testSet.insert(testURI);
245}
246
247
248BOOST_AUTO_TEST_CASE(testStencilHeatSparseZ09_gridScaling)
249{
250 std::string testURI = suiteURI + ".device.stencil.sparse.Z.2D.09.gridScaling";
251 unsigned int counter = 0;
252 constexpr unsigned int blockEdgeSize = 8;
253 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 128>(testURI, counter++, 2.3, 0.07, "09");
254 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 256>(testURI, counter++, 2.3, 0.07, "09");
255 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 512>(testURI, counter++, 2.3, 0.07, "09");
256 launch_testStencilHeatSparseZ_perf<blockEdgeSize, 1024>(testURI, counter++, 2.3, 0.07, "09");
257
258 testSet.insert(testURI);
259}
260
261BOOST_AUTO_TEST_CASE(testStencilHeatSparse05_32Block_2048Grid_Case)
262{
263 std::string testURI = suiteURI + ".device.stencil.sparse.N.2D.05.32_2048";
264 unsigned int counter = 0;
265 launch_testStencilHeatSparse_perf<32, 2048/32>(testURI, counter++, 1.45, 1, "05");
266
267 testSet.insert(testURI);
268}
269
270BOOST_AUTO_TEST_SUITE_END()
271
272BOOST_AUTO_TEST_SUITE_END()
grid_key_dx is the key to access any element in the grid
Definition grid_key.hpp:19
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
Definition timer.hpp:28
void stop()
Stop the timer.
Definition timer.hpp:119
void start()
Start the timer.
Definition timer.hpp:90
double getwct()
Return the elapsed real time.
Definition timer.hpp:130
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...