7#define BOOST_TEST_DYN_LINK
8#define DISABLE_MPI_WRITTERS
10#include <boost/test/unit_test.hpp>
11#include "performancePlots.hpp"
13#include "SparseGridGpu/SparseGridGpu.hpp"
14#include "SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh"
17extern std::string suiteURI;
19extern std::set<std::string> testSet;
21template<
unsigned int blockEdgeSize,
unsigned int gr
idEdgeSize>
22void testGetNeighbourhood(std::string testURI,
unsigned int i)
24 auto testName =
"Get single - neighbourhood avg";
25 constexpr unsigned int dim = 2;
30 unsigned int iterations = 10;
33 std::string base(testURI +
"(" + std::to_string(i) +
")");
34 report_sparsegrid_funcs.graphs.put(base +
".test.name",
"Get");
36 report_sparsegrid_funcs.graphs.put(base +
".dim",dim);
37 report_sparsegrid_funcs.graphs.put(base +
".blockSize",blockEdgeSize);
38 report_sparsegrid_funcs.graphs.put(base +
".gridSize.x",gridEdgeSize*blockEdgeSize);
39 report_sparsegrid_funcs.graphs.put(base +
".gridSize.y",gridEdgeSize*blockEdgeSize);
41 dim3 gridSize(gridEdgeSize, gridEdgeSize);
42 dim3 blockSize(blockEdgeSize, blockEdgeSize);
43 dim3 blockSizeBlockedInsert(1, 1);
47 sparseGrid.template setBackgroundValue<0>(0);
51 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeBlockedInsert);
52 insertValues2DBlocked<0, 1, blockEdgeSize> << < gridSize, blockSize >> >
53 (sparseGrid.toKernel(), offset, offset);
54 sparseGrid.template flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
56 unsigned long long numElements = gridEdgeSize*blockEdgeSize*gridEdgeSize*blockEdgeSize;
59 for (
unsigned int iter=0; iter<iterations; ++iter)
63 cudaDeviceSynchronize();
68 getValuesNeighbourhood2D<0> << < gridSize, blockSize >> > (sparseGrid.toKernel(), offset, offset);
69 cudaDeviceSynchronize();
73 float gElemS = 9 * numElements / (1e9 * ts.
getwct());
79 standard_deviation(measures,mean,deviation);
81 report_sparsegrid_funcs.graphs.put(base +
".Gget.mean",mean);
82 report_sparsegrid_funcs.graphs.put(base +
".Gget.dev",deviation);
86 std::cout <<
"Test: " << testName <<
"\n";
87 std::cout <<
"Block: " << blockEdgeSize <<
"x" << blockEdgeSize <<
"\n";
88 std::cout <<
"Grid: " << gridEdgeSize*blockEdgeSize <<
"x" << gridEdgeSize*blockEdgeSize <<
"\n";
89 double dataOccupancyMean, dataOccupancyDev;
90 sparseGrid.deviceToHost();
91 sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);std::cout <<
"Data Occupancy: " << dataOccupancyMean <<
" dev:" << dataOccupancyDev << std::endl;
92 report_sparsegrid_funcs.graphs.put(base +
".dataOccupancy.mean",dataOccupancyMean);
93 report_sparsegrid_funcs.graphs.put(base +
".dataOccupancy.dev",dataOccupancyDev);
94 std::cout <<
"Iterations: " << iterations <<
"\n";
95 std::cout <<
"Throughput:\n\t" << mean <<
"GElem/s" <<
"\n";
98BOOST_AUTO_TEST_SUITE(performance)
100BOOST_AUTO_TEST_SUITE(SparseGridGpu_test)
102BOOST_AUTO_TEST_CASE(testGetNeighbourhood_gridScaling_2)
104 std::string testURI = suiteURI +
".device.get.dense.neighbourhood.2D.2.gridScaling";
105 unsigned int counter = 0;
106 testGetNeighbourhood<2, 128>(testURI, counter++);
107 testGetNeighbourhood<2, 256>(testURI, counter++);
108 testGetNeighbourhood<2, 512>(testURI, counter++);
109 testGetNeighbourhood<2, 1024>(testURI, counter++);
110 testGetNeighbourhood<2, 2048>(testURI, counter++);
111 testGetNeighbourhood<2, 4096>(testURI, counter++);
112 testSet.insert(testURI);
115BOOST_AUTO_TEST_CASE(testGetNeighbourhood_gridScaling_4)
117 std::string testURI = suiteURI +
".device.get.dense.neighbourhood.2D.4.gridScaling";
118 unsigned int counter = 0;
119 testGetNeighbourhood<4, 64>(testURI, counter++);
120 testGetNeighbourhood<4, 128>(testURI, counter++);
121 testGetNeighbourhood<4, 256>(testURI, counter++);
122 testGetNeighbourhood<4, 512>(testURI, counter++);
123 testGetNeighbourhood<4, 1024>(testURI, counter++);
124 testGetNeighbourhood<4, 2048>(testURI, counter++);
125 testSet.insert(testURI);
128BOOST_AUTO_TEST_CASE(testGetNeighbourhood_gridScaling_8)
130 std::string testURI = suiteURI +
".device.get.dense.neighbourhood.2D.8.gridScaling";
131 unsigned int counter = 0;
132 testGetNeighbourhood<8, 32>(testURI, counter++);
133 testGetNeighbourhood<8, 64>(testURI, counter++);
134 testGetNeighbourhood<8, 128>(testURI, counter++);
135 testGetNeighbourhood<8, 256>(testURI, counter++);
136 testGetNeighbourhood<8, 512>(testURI, counter++);
137 testGetNeighbourhood<8, 1024>(testURI, counter++);
138 testSet.insert(testURI);
141BOOST_AUTO_TEST_CASE(testGetNeighbourhood_gridScaling_16)
143 std::string testURI = suiteURI +
".device.get.dense.neighbourhood.2D.16.gridScaling";
144 unsigned int counter = 0;
145 testGetNeighbourhood<16, 16>(testURI, counter++);
146 testGetNeighbourhood<16, 32>(testURI, counter++);
147 testGetNeighbourhood<16, 64>(testURI, counter++);
148 testGetNeighbourhood<16, 128>(testURI, counter++);
149 testGetNeighbourhood<16, 256>(testURI, counter++);
150 testGetNeighbourhood<16, 512>(testURI, counter++);
151 testSet.insert(testURI);
154BOOST_AUTO_TEST_CASE(testGetNeighbourhood_gridScaling_32)
156 std::string testURI = suiteURI +
".device.get.dense.neighbourhood.2D.32.gridScaling";
157 unsigned int counter = 0;
158 testGetNeighbourhood<32, 8>(testURI, counter++);
159 testGetNeighbourhood<32, 16>(testURI, counter++);
160 testGetNeighbourhood<32, 32>(testURI, counter++);
161 testGetNeighbourhood<32, 64>(testURI, counter++);
162 testGetNeighbourhood<32, 128>(testURI, counter++);
163 testGetNeighbourhood<32, 256>(testURI, counter++);
164 testSet.insert(testURI);
167BOOST_AUTO_TEST_CASE(testGetNeighbourhood_blockScaling)
169 std::string testURI = suiteURI +
".device.get.dense.neighbourhood.2D.blockScaling";
170 unsigned int counter = 0;
171 testGetNeighbourhood<2, 1024>(testURI, counter++);
172 testGetNeighbourhood<4, 512>(testURI, counter++);
173 testGetNeighbourhood<8, 256>(testURI, counter++);
174 testGetNeighbourhood<16, 128>(testURI, counter++);
175 testGetNeighbourhood<32, 64>(testURI, counter++);
177 testSet.insert(testURI);
180BOOST_AUTO_TEST_SUITE_END()
182BOOST_AUTO_TEST_SUITE_END()
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...