OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
SparseGridGpu_performance_get_nn.cu
1/*
2 * SparseGridGpu_performance_get_nn.cu
3 *
4 * Created on: Sep 10, 2019
5 * Author: i-bird
6 */
7#define BOOST_TEST_DYN_LINK
8#define DISABLE_MPI_WRITTERS
9
10#include <boost/test/unit_test.hpp>
11#include "performancePlots.hpp"
12#include <iostream>
13#include "SparseGridGpu/SparseGridGpu.hpp"
14#include "SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh"
15
16
17extern std::string suiteURI;
18extern report_sparse_grid_tests report_sparsegrid_funcs;
19extern std::set<std::string> testSet;
20
21template<unsigned int blockEdgeSize, unsigned int gridEdgeSize>
22void testGetNeighbourhood(std::string testURI, unsigned int i)
23{
24 auto testName = "Get single - neighbourhood avg";
25 constexpr unsigned int dim = 2;
26// constexpr unsigned int blockEdgeSize = 8;
27 constexpr unsigned int chunkSize = IntPow<blockEdgeSize,dim>::value;
28 typedef aggregate<float> AggregateT;
29
30 unsigned int iterations = 10;
31
32// std::string base("performance.SparseGridGpu(" + std::to_string(i) + ").getSingle");
33 std::string base(testURI + "(" + std::to_string(i) + ")");
34 report_sparsegrid_funcs.graphs.put(base + ".test.name","Get");
35
36 report_sparsegrid_funcs.graphs.put(base + ".dim",dim);
37 report_sparsegrid_funcs.graphs.put(base + ".blockSize",blockEdgeSize);
38 report_sparsegrid_funcs.graphs.put(base + ".gridSize.x",gridEdgeSize*blockEdgeSize);
39 report_sparsegrid_funcs.graphs.put(base + ".gridSize.y",gridEdgeSize*blockEdgeSize);
40
41 dim3 gridSize(gridEdgeSize, gridEdgeSize);
42 dim3 blockSize(blockEdgeSize, blockEdgeSize);
43 dim3 blockSizeBlockedInsert(1, 1);
44 grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
47 sparseGrid.template setBackgroundValue<0>(0);
48
49 // Now fill the grid once
50 auto offset = 0;
51 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeBlockedInsert);
52 insertValues2DBlocked<0, 1, blockEdgeSize> << < gridSize, blockSize >> >
53 (sparseGrid.toKernel(), offset, offset);
54 sparseGrid.template flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
55
56 unsigned long long numElements = gridEdgeSize*blockEdgeSize*gridEdgeSize*blockEdgeSize;
58
59 for (unsigned int iter=0; iter<iterations; ++iter)
60 {
61 auto offset = 0;
62
63 cudaDeviceSynchronize();
64
65 timer ts;
66 ts.start();
67
68 getValuesNeighbourhood2D<0> << < gridSize, blockSize >> > (sparseGrid.toKernel(), offset, offset);
69 cudaDeviceSynchronize();
70
71 ts.stop();
72
73 float gElemS = 9 * numElements / (1e9 * ts.getwct());
74 measures.add(gElemS);
75 }
76
77 double mean = 0;
78 double deviation = 0;
79 standard_deviation(measures,mean,deviation);
80
81 report_sparsegrid_funcs.graphs.put(base + ".Gget.mean",mean);
82 report_sparsegrid_funcs.graphs.put(base +".Gget.dev",deviation);
83
84 // All times above are in ms
85
86 std::cout << "Test: " << testName << "\n";
87 std::cout << "Block: " << blockEdgeSize << "x" << blockEdgeSize << "\n";
88 std::cout << "Grid: " << gridEdgeSize*blockEdgeSize << "x" << gridEdgeSize*blockEdgeSize << "\n";
89 double dataOccupancyMean, dataOccupancyDev;
90 sparseGrid.deviceToHost();
91 sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);std::cout << "Data Occupancy: " << dataOccupancyMean << " dev:" << dataOccupancyDev << std::endl;
92 report_sparsegrid_funcs.graphs.put(base + ".dataOccupancy.mean",dataOccupancyMean);
93 report_sparsegrid_funcs.graphs.put(base +".dataOccupancy.dev",dataOccupancyDev);
94 std::cout << "Iterations: " << iterations << "\n";
95 std::cout << "Throughput:\n\t" << mean << "GElem/s" << "\n";
96}
97
98BOOST_AUTO_TEST_SUITE(performance)
99
100BOOST_AUTO_TEST_SUITE(SparseGridGpu_test)
101
102BOOST_AUTO_TEST_CASE(testGetNeighbourhood_gridScaling_2)
103{
104 std::string testURI = suiteURI + ".device.get.dense.neighbourhood.2D.2.gridScaling";
105 unsigned int counter = 0;
106 testGetNeighbourhood<2, 128>(testURI, counter++);
107 testGetNeighbourhood<2, 256>(testURI, counter++);
108 testGetNeighbourhood<2, 512>(testURI, counter++);
109 testGetNeighbourhood<2, 1024>(testURI, counter++);
110 testGetNeighbourhood<2, 2048>(testURI, counter++);
111 testGetNeighbourhood<2, 4096>(testURI, counter++);
112 testSet.insert(testURI);
113}
114
115BOOST_AUTO_TEST_CASE(testGetNeighbourhood_gridScaling_4)
116{
117 std::string testURI = suiteURI + ".device.get.dense.neighbourhood.2D.4.gridScaling";
118 unsigned int counter = 0;
119 testGetNeighbourhood<4, 64>(testURI, counter++);
120 testGetNeighbourhood<4, 128>(testURI, counter++);
121 testGetNeighbourhood<4, 256>(testURI, counter++);
122 testGetNeighbourhood<4, 512>(testURI, counter++);
123 testGetNeighbourhood<4, 1024>(testURI, counter++);
124 testGetNeighbourhood<4, 2048>(testURI, counter++);
125 testSet.insert(testURI);
126}
127
128BOOST_AUTO_TEST_CASE(testGetNeighbourhood_gridScaling_8)
129{
130 std::string testURI = suiteURI + ".device.get.dense.neighbourhood.2D.8.gridScaling";
131 unsigned int counter = 0;
132 testGetNeighbourhood<8, 32>(testURI, counter++);
133 testGetNeighbourhood<8, 64>(testURI, counter++);
134 testGetNeighbourhood<8, 128>(testURI, counter++);
135 testGetNeighbourhood<8, 256>(testURI, counter++);
136 testGetNeighbourhood<8, 512>(testURI, counter++);
137 testGetNeighbourhood<8, 1024>(testURI, counter++);
138 testSet.insert(testURI);
139}
140
141BOOST_AUTO_TEST_CASE(testGetNeighbourhood_gridScaling_16)
142{
143 std::string testURI = suiteURI + ".device.get.dense.neighbourhood.2D.16.gridScaling";
144 unsigned int counter = 0;
145 testGetNeighbourhood<16, 16>(testURI, counter++);
146 testGetNeighbourhood<16, 32>(testURI, counter++);
147 testGetNeighbourhood<16, 64>(testURI, counter++);
148 testGetNeighbourhood<16, 128>(testURI, counter++);
149 testGetNeighbourhood<16, 256>(testURI, counter++);
150 testGetNeighbourhood<16, 512>(testURI, counter++);
151 testSet.insert(testURI);
152}
153
154BOOST_AUTO_TEST_CASE(testGetNeighbourhood_gridScaling_32)
155{
156 std::string testURI = suiteURI + ".device.get.dense.neighbourhood.2D.32.gridScaling";
157 unsigned int counter = 0;
158 testGetNeighbourhood<32, 8>(testURI, counter++);
159 testGetNeighbourhood<32, 16>(testURI, counter++);
160 testGetNeighbourhood<32, 32>(testURI, counter++);
161 testGetNeighbourhood<32, 64>(testURI, counter++);
162 testGetNeighbourhood<32, 128>(testURI, counter++);
163 testGetNeighbourhood<32, 256>(testURI, counter++);
164 testSet.insert(testURI);
165}
166
167BOOST_AUTO_TEST_CASE(testGetNeighbourhood_blockScaling)
168{
169 std::string testURI = suiteURI + ".device.get.dense.neighbourhood.2D.blockScaling";
170 unsigned int counter = 0;
171 testGetNeighbourhood<2, 1024>(testURI, counter++);
172 testGetNeighbourhood<4, 512>(testURI, counter++);
173 testGetNeighbourhood<8, 256>(testURI, counter++);
174 testGetNeighbourhood<16, 128>(testURI, counter++);
175 testGetNeighbourhood<32, 64>(testURI, counter++);
176
177 testSet.insert(testURI);
178}
179
180BOOST_AUTO_TEST_SUITE_END()
181
182BOOST_AUTO_TEST_SUITE_END()
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
Definition timer.hpp:28
void stop()
Stop the timer.
Definition timer.hpp:119
void start()
Start the timer.
Definition timer.hpp:90
double getwct()
Return the elapsed real time.
Definition timer.hpp:130
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...