OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
SparseGridGpu_performance_get_nn.cu
1 /*
2  * SparseGridGpu_performance_get_nn.cu
3  *
4  * Created on: Sep 10, 2019
5  * Author: i-bird
6  */
7 #define BOOST_TEST_DYN_LINK
8 #define DISABLE_MPI_WRITTERS
9 
10 #include <boost/test/unit_test.hpp>
11 #include "performancePlots.hpp"
12 #include <iostream>
13 #include "SparseGridGpu/SparseGridGpu.hpp"
14 #include "SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh"
15 
16 
17 extern std::string suiteURI;
18 extern report_sparse_grid_tests report_sparsegrid_funcs;
19 extern std::set<std::string> testSet;
20 
21 template<unsigned int blockEdgeSize, unsigned int gridEdgeSize>
22 void testGetNeighbourhood(std::string testURI, unsigned int i)
23 {
24  auto testName = "Get single - neighbourhood avg";
25  constexpr unsigned int dim = 2;
26 // constexpr unsigned int blockEdgeSize = 8;
27  constexpr unsigned int chunkSize = IntPow<blockEdgeSize,dim>::value;
28  typedef aggregate<float> AggregateT;
29 
30  unsigned int iterations = 10;
31 
32 // std::string base("performance.SparseGridGpu(" + std::to_string(i) + ").getSingle");
33  std::string base(testURI + "(" + std::to_string(i) + ")");
34  report_sparsegrid_funcs.graphs.put(base + ".test.name","Get");
35 
36  report_sparsegrid_funcs.graphs.put(base + ".dim",dim);
37  report_sparsegrid_funcs.graphs.put(base + ".blockSize",blockEdgeSize);
38  report_sparsegrid_funcs.graphs.put(base + ".gridSize.x",gridEdgeSize*blockEdgeSize);
39  report_sparsegrid_funcs.graphs.put(base + ".gridSize.y",gridEdgeSize*blockEdgeSize);
40 
41  dim3 gridSize(gridEdgeSize, gridEdgeSize);
42  dim3 blockSize(blockEdgeSize, blockEdgeSize);
43  dim3 blockSizeBlockedInsert(1, 1);
44  grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
46  mgpu::ofp_context_t ctx;
47  sparseGrid.template setBackgroundValue<0>(0);
48 
49  // Now fill the grid once
50  auto offset = 0;
51  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeBlockedInsert);
52  insertValues2DBlocked<0, 1, blockEdgeSize> << < gridSize, blockSize >> >
53  (sparseGrid.toKernel(), offset, offset);
54  sparseGrid.template flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
55 
56  unsigned long long numElements = gridEdgeSize*blockEdgeSize*gridEdgeSize*blockEdgeSize;
57  openfpm::vector<double> measures;
58 
59  for (unsigned int iter=0; iter<iterations; ++iter)
60  {
61  auto offset = 0;
62 
63  cudaDeviceSynchronize();
64 
65  timer ts;
66  ts.start();
67 
68  getValuesNeighbourhood2D<0> << < gridSize, blockSize >> > (sparseGrid.toKernel(), offset, offset);
69  cudaDeviceSynchronize();
70 
71  ts.stop();
72 
73  float gElemS = 9 * numElements / (1e9 * ts.getwct());
74  measures.add(gElemS);
75  }
76 
77  double mean = 0;
78  double deviation = 0;
79  standard_deviation(measures,mean,deviation);
80 
81  report_sparsegrid_funcs.graphs.put(base + ".Gget.mean",mean);
82  report_sparsegrid_funcs.graphs.put(base +".Gget.dev",deviation);
83 
84  // All times above are in ms
85 
86  std::cout << "Test: " << testName << "\n";
87  std::cout << "Block: " << blockEdgeSize << "x" << blockEdgeSize << "\n";
88  std::cout << "Grid: " << gridEdgeSize*blockEdgeSize << "x" << gridEdgeSize*blockEdgeSize << "\n";
89  double dataOccupancyMean, dataOccupancyDev;
90  sparseGrid.deviceToHost();
91  sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);std::cout << "Data Occupancy: " << dataOccupancyMean << " dev:" << dataOccupancyDev << std::endl;
92  report_sparsegrid_funcs.graphs.put(base + ".dataOccupancy.mean",dataOccupancyMean);
93  report_sparsegrid_funcs.graphs.put(base +".dataOccupancy.dev",dataOccupancyDev);
94  std::cout << "Iterations: " << iterations << "\n";
95  std::cout << "Throughput:\n\t" << mean << "GElem/s" << "\n";
96 }
97 
98 BOOST_AUTO_TEST_SUITE(performance)
99 
100 BOOST_AUTO_TEST_SUITE(SparseGridGpu_test)
101 
102 BOOST_AUTO_TEST_CASE(testGetNeighbourhood_gridScaling_2)
103 {
104  std::string testURI = suiteURI + ".device.get.dense.neighbourhood.2D.2.gridScaling";
105  unsigned int counter = 0;
106  testGetNeighbourhood<2, 128>(testURI, counter++);
107  testGetNeighbourhood<2, 256>(testURI, counter++);
108  testGetNeighbourhood<2, 512>(testURI, counter++);
109  testGetNeighbourhood<2, 1024>(testURI, counter++);
110  testGetNeighbourhood<2, 2048>(testURI, counter++);
111  testGetNeighbourhood<2, 4096>(testURI, counter++);
112  testSet.insert(testURI);
113 }
114 
115 BOOST_AUTO_TEST_CASE(testGetNeighbourhood_gridScaling_4)
116 {
117  std::string testURI = suiteURI + ".device.get.dense.neighbourhood.2D.4.gridScaling";
118  unsigned int counter = 0;
119  testGetNeighbourhood<4, 64>(testURI, counter++);
120  testGetNeighbourhood<4, 128>(testURI, counter++);
121  testGetNeighbourhood<4, 256>(testURI, counter++);
122  testGetNeighbourhood<4, 512>(testURI, counter++);
123  testGetNeighbourhood<4, 1024>(testURI, counter++);
124  testGetNeighbourhood<4, 2048>(testURI, counter++);
125  testSet.insert(testURI);
126 }
127 
128 BOOST_AUTO_TEST_CASE(testGetNeighbourhood_gridScaling_8)
129 {
130  std::string testURI = suiteURI + ".device.get.dense.neighbourhood.2D.8.gridScaling";
131  unsigned int counter = 0;
132  testGetNeighbourhood<8, 32>(testURI, counter++);
133  testGetNeighbourhood<8, 64>(testURI, counter++);
134  testGetNeighbourhood<8, 128>(testURI, counter++);
135  testGetNeighbourhood<8, 256>(testURI, counter++);
136  testGetNeighbourhood<8, 512>(testURI, counter++);
137  testGetNeighbourhood<8, 1024>(testURI, counter++);
138  testSet.insert(testURI);
139 }
140 
141 BOOST_AUTO_TEST_CASE(testGetNeighbourhood_gridScaling_16)
142 {
143  std::string testURI = suiteURI + ".device.get.dense.neighbourhood.2D.16.gridScaling";
144  unsigned int counter = 0;
145  testGetNeighbourhood<16, 16>(testURI, counter++);
146  testGetNeighbourhood<16, 32>(testURI, counter++);
147  testGetNeighbourhood<16, 64>(testURI, counter++);
148  testGetNeighbourhood<16, 128>(testURI, counter++);
149  testGetNeighbourhood<16, 256>(testURI, counter++);
150  testGetNeighbourhood<16, 512>(testURI, counter++);
151  testSet.insert(testURI);
152 }
153 
154 BOOST_AUTO_TEST_CASE(testGetNeighbourhood_gridScaling_32)
155 {
156  std::string testURI = suiteURI + ".device.get.dense.neighbourhood.2D.32.gridScaling";
157  unsigned int counter = 0;
158  testGetNeighbourhood<32, 8>(testURI, counter++);
159  testGetNeighbourhood<32, 16>(testURI, counter++);
160  testGetNeighbourhood<32, 32>(testURI, counter++);
161  testGetNeighbourhood<32, 64>(testURI, counter++);
162  testGetNeighbourhood<32, 128>(testURI, counter++);
163  testGetNeighbourhood<32, 256>(testURI, counter++);
164  testSet.insert(testURI);
165 }
166 
167 BOOST_AUTO_TEST_CASE(testGetNeighbourhood_blockScaling)
168 {
169  std::string testURI = suiteURI + ".device.get.dense.neighbourhood.2D.blockScaling";
170  unsigned int counter = 0;
171  testGetNeighbourhood<2, 1024>(testURI, counter++);
172  testGetNeighbourhood<4, 512>(testURI, counter++);
173  testGetNeighbourhood<8, 256>(testURI, counter++);
174  testGetNeighbourhood<16, 128>(testURI, counter++);
175  testGetNeighbourhood<32, 64>(testURI, counter++);
176 
177  testSet.insert(testURI);
178 }
179 
180 BOOST_AUTO_TEST_SUITE_END()
181 
182 BOOST_AUTO_TEST_SUITE_END()
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
void start()
Start the timer.
Definition: timer.hpp:90
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:214
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202
Class for cpu time benchmarking.
Definition: timer.hpp:27
void stop()
Stop the timer.
Definition: timer.hpp:119