OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
SparseGridGpu_performance_get_single.cu
1/*
2 * SparseGridGpu_performance_get_single.cu
3 *
4 * Created on: Sep 9, 2019
5 * Author: i-bird
6 */
7#define BOOST_TEST_DYN_LINK
8#define DISABLE_MPI_WRITTERS
9
10#include <boost/test/unit_test.hpp>
11#include "performancePlots.hpp"
12#include <iostream>
13#include "SparseGridGpu/SparseGridGpu.hpp"
14#include "SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh"
15
16
17extern std::string suiteURI;
18extern report_sparse_grid_tests report_sparsegrid_funcs;
19extern std::set<std::string> testSet;
20
21
22template<unsigned int blockEdgeSize, unsigned int gridEdgeSize>
23void testGetSingle(std::string testURI, unsigned int i)
24{
25 auto testName = "Get single";
26 constexpr unsigned int dim = 2;
27// constexpr unsigned int blockEdgeSize = 8;
28 constexpr unsigned int chunkSize = IntPow<blockEdgeSize,dim>::value;
29 typedef aggregate<float> AggregateT;
30
31 unsigned int iterations = 10;
32
33// std::string base("performance.SparseGridGpu(" + std::to_string(i) + ").getSingle");
34 std::string base(testURI + "(" + std::to_string(i) + ")");
35 report_sparsegrid_funcs.graphs.put(base + ".test.name","Get");
36
37 report_sparsegrid_funcs.graphs.put(base + ".dim",dim);
38 report_sparsegrid_funcs.graphs.put(base + ".blockSize",blockEdgeSize);
39 report_sparsegrid_funcs.graphs.put(base + ".gridSize.x",gridEdgeSize*blockEdgeSize);
40 report_sparsegrid_funcs.graphs.put(base + ".gridSize.y",gridEdgeSize*blockEdgeSize);
41
42 dim3 gridSize(gridEdgeSize, gridEdgeSize);
43 dim3 blockSize(blockEdgeSize, blockEdgeSize);
44 dim3 blockSizeBlockedInsert(1, 1);
45 grid_smb<dim, blockEdgeSize> blockGeometry(gridSize);
48 sparseGrid.template setBackgroundValue<0>(0);
49
50 // Now fill the grid once
51 auto offset = 0;
52 sparseGrid.setGPUInsertBuffer(gridSize, blockSizeBlockedInsert);
53 insertValues2DBlocked<0, 1, blockEdgeSize> << < gridSize, blockSize >> >
54 (sparseGrid.toKernel(), offset, offset);
55 sparseGrid.template flush < smax_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
56
57 unsigned long long numElements = gridEdgeSize*blockEdgeSize*gridEdgeSize*blockEdgeSize;
59
60 for (unsigned int iter=0; iter<iterations; ++iter)
61 {
62 auto offset = 0;
63
64 cudaDeviceSynchronize();
65
66 timer ts;
67 ts.start();
68
69 getValues2D<0> << < gridSize, blockSize >> > (sparseGrid.toKernel(), offset, offset);
70 cudaDeviceSynchronize();
71
72 ts.stop();
73
74 float gElemS = numElements / (1e9 * ts.getwct());
75 measures.add(gElemS);
76 }
77
78 double mean = 0;
79 double deviation = 0;
80 standard_deviation(measures,mean,deviation);
81
82 report_sparsegrid_funcs.graphs.put(base + ".Gget.mean",mean);
83 report_sparsegrid_funcs.graphs.put(base +".Gget.dev",deviation);
84
85 // All times above are in ms
86
87 std::cout << "Test: " << testName << "\n";
88 std::cout << "Block: " << blockEdgeSize << "x" << blockEdgeSize << "\n";
89 std::cout << "Grid: " << gridEdgeSize*blockEdgeSize << "x" << gridEdgeSize*blockEdgeSize << "\n";
90 double dataOccupancyMean, dataOccupancyDev;
91 sparseGrid.deviceToHost();
92 sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);std::cout << "Data Occupancy: " << dataOccupancyMean << " dev:" << dataOccupancyDev << std::endl;
93 report_sparsegrid_funcs.graphs.put(base + ".dataOccupancy.mean",dataOccupancyMean);
94 report_sparsegrid_funcs.graphs.put(base +".dataOccupancy.dev",dataOccupancyDev);
95 std::cout << "Iterations: " << iterations << "\n";
96 std::cout << "Throughput:\n\t" << mean << "GElem/s" << "\n";
97}
98
99BOOST_AUTO_TEST_SUITE(performance)
100
101BOOST_AUTO_TEST_SUITE(SparseGridGpu_test)
102
103BOOST_AUTO_TEST_CASE(testGet_gridScaling_2)
104{
105 std::string testURI = suiteURI + ".device.get.dense.single.2D.2.gridScaling";
106 unsigned int counter = 0;
107 testGetSingle<2, 128>(testURI, counter++);
108 testGetSingle<2, 256>(testURI, counter++);
109 testGetSingle<2, 512>(testURI, counter++);
110 testGetSingle<2, 1024>(testURI, counter++);
111 testGetSingle<2, 2048>(testURI, counter++);
112 testGetSingle<2, 4096>(testURI, counter++);
113 testSet.insert(testURI);
114}
115
116BOOST_AUTO_TEST_CASE(testGet_gridScaling_4)
117{
118 std::string testURI = suiteURI + ".device.get.dense.single.2D.4.gridScaling";
119 unsigned int counter = 0;
120 testGetSingle<4, 64>(testURI, counter++);
121 testGetSingle<4, 128>(testURI, counter++);
122 testGetSingle<4, 256>(testURI, counter++);
123 testGetSingle<4, 512>(testURI, counter++);
124 testGetSingle<4, 1024>(testURI, counter++);
125 testGetSingle<4, 2048>(testURI, counter++);
126 testSet.insert(testURI);
127}
128
129BOOST_AUTO_TEST_CASE(testGet_gridScaling_8)
130{
131 std::string testURI = suiteURI + ".device.get.dense.single.2D.8.gridScaling";
132 unsigned int counter = 0;
133 testGetSingle<8, 32>(testURI, counter++);
134 testGetSingle<8, 64>(testURI, counter++);
135 testGetSingle<8, 128>(testURI, counter++);
136 testGetSingle<8, 256>(testURI, counter++);
137 testGetSingle<8, 512>(testURI, counter++);
138 testGetSingle<8, 1024>(testURI, counter++);
139 testSet.insert(testURI);
140}
141
142BOOST_AUTO_TEST_CASE(testGet_gridScaling_16)
143{
144 std::string testURI = suiteURI + ".device.get.dense.single.2D.16.gridScaling";
145 unsigned int counter = 0;
146 testGetSingle<16, 16>(testURI, counter++);
147 testGetSingle<16, 32>(testURI, counter++);
148 testGetSingle<16, 64>(testURI, counter++);
149 testGetSingle<16, 128>(testURI, counter++);
150 testGetSingle<16, 256>(testURI, counter++);
151 testGetSingle<16, 512>(testURI, counter++);
152 testSet.insert(testURI);
153}
154
155BOOST_AUTO_TEST_CASE(testGet_gridScaling_32)
156{
157 std::string testURI = suiteURI + ".device.get.dense.single.2D.32.gridScaling";
158 unsigned int counter = 0;
159 testGetSingle<32, 8>(testURI, counter++);
160 testGetSingle<32, 16>(testURI, counter++);
161 testGetSingle<32, 32>(testURI, counter++);
162 testGetSingle<32, 64>(testURI, counter++);
163 testGetSingle<32, 128>(testURI, counter++);
164 testGetSingle<32, 256>(testURI, counter++);
165 testSet.insert(testURI);
166}
167
168BOOST_AUTO_TEST_CASE(testGet_blockScaling)
169{
170 std::string testURI = suiteURI + ".device.get.dense.single.2D.blockScaling";
171 unsigned int counter = 0;
172 testGetSingle<2, 1024>(testURI, counter++);
173 testGetSingle<4, 512>(testURI, counter++);
174 testGetSingle<8, 256>(testURI, counter++);
175 testGetSingle<16, 128>(testURI, counter++);
176 testGetSingle<32, 64>(testURI, counter++);
177
178 testSet.insert(testURI);
179}
180
181BOOST_AUTO_TEST_SUITE_END()
182
183BOOST_AUTO_TEST_SUITE_END()
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
Definition timer.hpp:28
void stop()
Stop the timer.
Definition timer.hpp:119
void start()
Start the timer.
Definition timer.hpp:90
double getwct()
Return the elapsed real time.
Definition timer.hpp:130
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...