OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
SparseGridGpu_performance_get_single.cu
1 /*
2  * SparseGridGpu_performance_get_single.cu
3  *
4  * Created on: Sep 9, 2019
5  * Author: i-bird
6  */
7 #define BOOST_TEST_DYN_LINK
8 #define DISABLE_MPI_WRITTERS
9 
10 #include <boost/test/unit_test.hpp>
11 #include "performancePlots.hpp"
12 #include <iostream>
13 #include "SparseGridGpu/SparseGridGpu.hpp"
14 #include "SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh"
15 
16 
17 extern std::string suiteURI;
18 extern report_sparse_grid_tests report_sparsegrid_funcs;
19 extern std::set<std::string> testSet;
20 
21 
22 template<unsigned int blockEdgeSize, unsigned int gridEdgeSize>
23 void testGetSingle(std::string testURI, unsigned int i)
24 {
25  auto testName = "Get single";
26  constexpr unsigned int dim = 2;
27 // constexpr unsigned int blockEdgeSize = 8;
28  constexpr unsigned int chunkSize = IntPow<blockEdgeSize,dim>::value;
29  typedef aggregate<float> AggregateT;
30 
31  unsigned int iterations = 10;
32 
33 // std::string base("performance.SparseGridGpu(" + std::to_string(i) + ").getSingle");
34  std::string base(testURI + "(" + std::to_string(i) + ")");
35  report_sparsegrid_funcs.graphs.put(base + ".test.name","Get");
36 
37  report_sparsegrid_funcs.graphs.put(base + ".dim",dim);
38  report_sparsegrid_funcs.graphs.put(base + ".blockSize",blockEdgeSize);
39  report_sparsegrid_funcs.graphs.put(base + ".gridSize.x",gridEdgeSize*blockEdgeSize);
40  report_sparsegrid_funcs.graphs.put(base + ".gridSize.y",gridEdgeSize*blockEdgeSize);
41 
42  dim3 gridSize(gridEdgeSize, gridEdgeSize);
43  dim3 blockSize(blockEdgeSize, blockEdgeSize);
44  dim3 blockSizeBlockedInsert(1, 1);
45  grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
47  mgpu::ofp_context_t ctx;
48  sparseGrid.template setBackgroundValue<0>(0);
49 
50  // Now fill the grid once
51  auto offset = 0;
52  sparseGrid.setGPUInsertBuffer(gridSize, blockSizeBlockedInsert);
53  insertValues2DBlocked<0, 1, blockEdgeSize> << < gridSize, blockSize >> >
54  (sparseGrid.toKernel(), offset, offset);
55  sparseGrid.template flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
56 
57  unsigned long long numElements = gridEdgeSize*blockEdgeSize*gridEdgeSize*blockEdgeSize;
58  openfpm::vector<double> measures;
59 
60  for (unsigned int iter=0; iter<iterations; ++iter)
61  {
62  auto offset = 0;
63 
64  cudaDeviceSynchronize();
65 
66  timer ts;
67  ts.start();
68 
69  getValues2D<0> << < gridSize, blockSize >> > (sparseGrid.toKernel(), offset, offset);
70  cudaDeviceSynchronize();
71 
72  ts.stop();
73 
74  float gElemS = numElements / (1e9 * ts.getwct());
75  measures.add(gElemS);
76  }
77 
78  double mean = 0;
79  double deviation = 0;
80  standard_deviation(measures,mean,deviation);
81 
82  report_sparsegrid_funcs.graphs.put(base + ".Gget.mean",mean);
83  report_sparsegrid_funcs.graphs.put(base +".Gget.dev",deviation);
84 
85  // All times above are in ms
86 
87  std::cout << "Test: " << testName << "\n";
88  std::cout << "Block: " << blockEdgeSize << "x" << blockEdgeSize << "\n";
89  std::cout << "Grid: " << gridEdgeSize*blockEdgeSize << "x" << gridEdgeSize*blockEdgeSize << "\n";
90  double dataOccupancyMean, dataOccupancyDev;
91  sparseGrid.deviceToHost();
92  sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);std::cout << "Data Occupancy: " << dataOccupancyMean << " dev:" << dataOccupancyDev << std::endl;
93  report_sparsegrid_funcs.graphs.put(base + ".dataOccupancy.mean",dataOccupancyMean);
94  report_sparsegrid_funcs.graphs.put(base +".dataOccupancy.dev",dataOccupancyDev);
95  std::cout << "Iterations: " << iterations << "\n";
96  std::cout << "Throughput:\n\t" << mean << "GElem/s" << "\n";
97 }
98 
99 BOOST_AUTO_TEST_SUITE(performance)
100 
101 BOOST_AUTO_TEST_SUITE(SparseGridGpu_test)
102 
103 BOOST_AUTO_TEST_CASE(testGet_gridScaling_2)
104 {
105  std::string testURI = suiteURI + ".device.get.dense.single.2D.2.gridScaling";
106  unsigned int counter = 0;
107  testGetSingle<2, 128>(testURI, counter++);
108  testGetSingle<2, 256>(testURI, counter++);
109  testGetSingle<2, 512>(testURI, counter++);
110  testGetSingle<2, 1024>(testURI, counter++);
111  testGetSingle<2, 2048>(testURI, counter++);
112  testGetSingle<2, 4096>(testURI, counter++);
113  testSet.insert(testURI);
114 }
115 
116 BOOST_AUTO_TEST_CASE(testGet_gridScaling_4)
117 {
118  std::string testURI = suiteURI + ".device.get.dense.single.2D.4.gridScaling";
119  unsigned int counter = 0;
120  testGetSingle<4, 64>(testURI, counter++);
121  testGetSingle<4, 128>(testURI, counter++);
122  testGetSingle<4, 256>(testURI, counter++);
123  testGetSingle<4, 512>(testURI, counter++);
124  testGetSingle<4, 1024>(testURI, counter++);
125  testGetSingle<4, 2048>(testURI, counter++);
126  testSet.insert(testURI);
127 }
128 
129 BOOST_AUTO_TEST_CASE(testGet_gridScaling_8)
130 {
131  std::string testURI = suiteURI + ".device.get.dense.single.2D.8.gridScaling";
132  unsigned int counter = 0;
133  testGetSingle<8, 32>(testURI, counter++);
134  testGetSingle<8, 64>(testURI, counter++);
135  testGetSingle<8, 128>(testURI, counter++);
136  testGetSingle<8, 256>(testURI, counter++);
137  testGetSingle<8, 512>(testURI, counter++);
138  testGetSingle<8, 1024>(testURI, counter++);
139  testSet.insert(testURI);
140 }
141 
142 BOOST_AUTO_TEST_CASE(testGet_gridScaling_16)
143 {
144  std::string testURI = suiteURI + ".device.get.dense.single.2D.16.gridScaling";
145  unsigned int counter = 0;
146  testGetSingle<16, 16>(testURI, counter++);
147  testGetSingle<16, 32>(testURI, counter++);
148  testGetSingle<16, 64>(testURI, counter++);
149  testGetSingle<16, 128>(testURI, counter++);
150  testGetSingle<16, 256>(testURI, counter++);
151  testGetSingle<16, 512>(testURI, counter++);
152  testSet.insert(testURI);
153 }
154 
155 BOOST_AUTO_TEST_CASE(testGet_gridScaling_32)
156 {
157  std::string testURI = suiteURI + ".device.get.dense.single.2D.32.gridScaling";
158  unsigned int counter = 0;
159  testGetSingle<32, 8>(testURI, counter++);
160  testGetSingle<32, 16>(testURI, counter++);
161  testGetSingle<32, 32>(testURI, counter++);
162  testGetSingle<32, 64>(testURI, counter++);
163  testGetSingle<32, 128>(testURI, counter++);
164  testGetSingle<32, 256>(testURI, counter++);
165  testSet.insert(testURI);
166 }
167 
168 BOOST_AUTO_TEST_CASE(testGet_blockScaling)
169 {
170  std::string testURI = suiteURI + ".device.get.dense.single.2D.blockScaling";
171  unsigned int counter = 0;
172  testGetSingle<2, 1024>(testURI, counter++);
173  testGetSingle<4, 512>(testURI, counter++);
174  testGetSingle<8, 256>(testURI, counter++);
175  testGetSingle<16, 128>(testURI, counter++);
176  testGetSingle<32, 64>(testURI, counter++);
177 
178  testSet.insert(testURI);
179 }
180 
181 BOOST_AUTO_TEST_SUITE_END()
182 
183 BOOST_AUTO_TEST_SUITE_END()
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
void start()
Start the timer.
Definition: timer.hpp:90
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:214
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202
Class for cpu time benchmarking.
Definition: timer.hpp:27
void stop()
Stop the timer.
Definition: timer.hpp:119