OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
BlockMapGpu_tests.cu
1//
2// Created by tommaso on 14/05/19.
3//
4
5#define BOOST_TEST_DYN_LINK
6
7#include <boost/test/unit_test.hpp>
8#include "SparseGridGpu/BlockMapGpu.hpp"
9#include "SparseGridGpu/BlockMapGpu_ker.cuh"
10#include "SparseGridGpu/BlockMapGpu_kernels.cuh"
11#include "SparseGridGpu/DataBlock.cuh"
12
13#include <limits>
14
15template<unsigned int p, typename SparseGridType, typename VectorOutType>
16__global__ void copyBlocksToOutput(SparseGridType sparseGrid, VectorOutType output)
17{
18 int pos = blockIdx.x * blockDim.x + threadIdx.x;
19 output.template get<p>(pos) = sparseGrid.template get<p>(pos);
20}
21
22template<unsigned int p, typename SparseGridType>
23__global__ void insertValues(SparseGridType sparseGrid)
24{
25 sparseGrid.init();
26
27 int pos = blockIdx.x * blockDim.x + threadIdx.x;
28
29 sparseGrid.template insert<p>(pos) = pos;
30
31 __syncthreads();
32
33 sparseGrid.flush_block_insert();
34}
35
36template<unsigned int p, unsigned int chunksPerBlock, typename SparseGridType>
37__global__ void insertValuesBlocked(SparseGridType sparseGrid)
38{
39 constexpr unsigned int pMask = SparseGridType::pMask;
40 typedef BlockTypeOf<typename SparseGridType::AggregateType, p> BlockT;
41 typedef BlockTypeOf<typename SparseGridType::AggregateType, pMask> MaskBlockT;
42
43 sparseGrid.init();
44
45
46 int pos = blockIdx.x * blockDim.x + threadIdx.x;
47 unsigned int dataBlockId = pos / BlockT::size;
48 unsigned int offset = pos % BlockT::size;
49
50 auto encap = sparseGrid.template insertBlock<chunksPerBlock>(dataBlockId,BlockT::size);
51
52
53 encap.template get<p>()[offset] = pos;
54 BlockMapGpu_ker<>::setExist(encap.template get<pMask>()[offset]);
55
56 __syncthreads();
57
58 sparseGrid.flush_block_insert();
59}
60
61template<unsigned int p, typename SparseGridType>
62__global__ void insertValuesHalfBlock(SparseGridType sparseGrid)
63{
64 sparseGrid.init();
65
66 int pos = blockIdx.x * blockDim.x + threadIdx.x;
67
68 constexpr unsigned int dataChunkSize = BlockTypeOf<typename SparseGridType::AggregateType, p>::size;
69 if (threadIdx.x % dataChunkSize < dataChunkSize/ 2)
70 {
71 sparseGrid.template insert<p>(pos) = pos;
72 }
73
74 __syncthreads();
75
76 sparseGrid.flush_block_insert();
77}
78
79BOOST_AUTO_TEST_SUITE(BlockMapGpu_tests)
80
81BOOST_AUTO_TEST_CASE(testBitwiseOps)
82{
83 BOOST_REQUIRE(BlockMapGpu_ker<>::getBit(1,0));
84 BOOST_REQUIRE(!BlockMapGpu_ker<>::getBit(2,0));
85 BOOST_REQUIRE(BlockMapGpu_ker<>::getBit(2,1));
86 BOOST_REQUIRE(BlockMapGpu_ker<>::getBit(3,0));
87 BOOST_REQUIRE(BlockMapGpu_ker<>::getBit(3,1));
88 unsigned int m = 0;
89 BOOST_REQUIRE(!BlockMapGpu_ker<>::getBit(m,0));
91 BOOST_REQUIRE(BlockMapGpu_ker<>::getBit(m,0));
93 BOOST_REQUIRE(!BlockMapGpu_ker<>::getBit(m,0));
94}
95
96BOOST_AUTO_TEST_CASE(testBackground)
97{
98 typedef aggregate<DataBlock<float, 64>> AggregateSGT;
99 typedef aggregate<float> AggregateOutT;
100 BlockMapGpu<AggregateSGT> sparseGrid;
101 sparseGrid.template setBackgroundValue<0>(666);
102
103 const unsigned int gridSize = 10;
104 const unsigned int blockSize = 128;
105
106 // Get output
108 output.resize(gridSize * blockSize);
109 CUDA_LAUNCH_DIM3((copyBlocksToOutput<0>), gridSize, blockSize, sparseGrid.toKernel(), output.toKernel());
110
111 output.template deviceToHost<0>();
112 sparseGrid.template deviceToHost<0>();
113
114 // Compare
115 bool match = true;
116 for (size_t i = 0; i < output.size(); i++)
117 {
118 match &= output.template get<0>(i) == 666;
119 match &= output.template get<0>(i) == sparseGrid.template get<0>(i);
120 }
121
122 BOOST_REQUIRE_EQUAL(match, true);
123}
124
125
126BOOST_AUTO_TEST_CASE(testInsert)
127{
128 typedef aggregate<DataBlock<float, 64>> AggregateT;
129 typedef aggregate<float> AggregateOutT;
131 blockMap.template setBackgroundValue<0>(666);
132
133 const unsigned int gridSize = 3;
134 const unsigned int bufferPoolSize = 128; // Should be multiple of BlockT::size
135 const unsigned int blockSizeInsert = 128;
136 const unsigned int gridSizeRead = gridSize + 1;
137 const unsigned int blockSizeRead = 128;
138
139 // Prealloc insert buffer
140 blockMap.setGPUInsertBuffer(gridSize, bufferPoolSize);
141
142 // Insert values
143 CUDA_LAUNCH_DIM3((insertValues<0>), gridSize, blockSizeInsert ,blockMap.toKernel());
144
145 // Flush inserts
147 blockMap.flush<smax_<0>>(ctx, flush_type::FLUSH_ON_DEVICE);
148
149 // Get output
151 output.resize(gridSizeRead * blockSizeRead);
152
153 CUDA_LAUNCH_DIM3((copyBlocksToOutput<0>), gridSizeRead, blockSizeRead,blockMap.toKernel(), output.toKernel());
154
155 output.template deviceToHost<0>();
156 blockMap.template deviceToHost<0>();
157
158 // Compare
159 bool match = true;
160 for (size_t i = 0; i < output.size(); i++)
161 {
162 auto expectedValue = (i < gridSize * blockSizeInsert) ? i : 666;
163
164 match &= output.template get<0>(i) == blockMap.template get<0>(i);
165 match &= output.template get<0>(i) == expectedValue;
166 }
167
168 BOOST_REQUIRE_EQUAL(match, true);
169}
170
171BOOST_AUTO_TEST_CASE(testInsert_halfBlock)
172{
173 typedef aggregate<DataBlock<float, 64>> AggregateT;
174 typedef aggregate<float> AggregateOutT;
176 blockMap.template setBackgroundValue<0>(666);
177
178 const unsigned int gridSize = 3;
179 const unsigned int bufferPoolSize = 128; // Should be multiple of BlockT::size
180 const unsigned int blockSizeInsert = 128;
181 const unsigned int gridSizeRead = gridSize + 1;
182 const unsigned int blockSizeRead = 128;
183
184 // Prealloc insert buffer
185 blockMap.setGPUInsertBuffer(gridSize, bufferPoolSize);
186
187 // Insert values
188 CUDA_LAUNCH_DIM3((insertValuesHalfBlock<0>), gridSize, blockSizeInsert, blockMap.toKernel());
189
190 // Flush inserts
192 blockMap.flush<smax_<0>>(ctx, flush_type::FLUSH_ON_DEVICE);
193
194 // Get output
196 output.resize(gridSizeRead * blockSizeRead);
197
198 CUDA_LAUNCH_DIM3((copyBlocksToOutput<0>), gridSizeRead, blockSizeRead ,blockMap.toKernel(), output.toKernel());
199
200 output.template deviceToHost<0>();
201 blockMap.template deviceToHost<0>();
202
203 // Compare
204 bool match = true;
205 for (size_t i = 0; i < output.size(); i++)
206 {
207 auto expectedValue = (i < gridSize * blockSizeInsert) ? i : 666;
208 constexpr unsigned int dataChunkSize = BlockTypeOf<AggregateT, 0>::size;
209 int offset = i % dataChunkSize;
210 if (! (offset < dataChunkSize / 2))
211 {
212 expectedValue = 666; // Just the first half of each block was inserted
213 }
214
215 match &= output.template get<0>(i) == blockMap.template get<0>(i);
216 match &= output.template get<0>(i) == expectedValue;
217 }
218
219 BOOST_REQUIRE_EQUAL(match, true);
220}
221
222BOOST_AUTO_TEST_CASE(testInsert_blocked)
223{
224 typedef aggregate<DataBlock<float, 64>> AggregateT;
225 typedef aggregate<float> AggregateOutT;
227 sparseGrid.template setBackgroundValue<0>(666);
228
229 const unsigned int gridSize = 3;
230 const unsigned int bufferPoolSize = 4; // Should be multiple of BlockT::size
231 const unsigned int blockSizeInsert = 128;
232 const unsigned int gridSizeRead = gridSize + 1;
233 const unsigned int blockSizeRead = 128;
234
235 // Prealloc insert buffer
236 sparseGrid.setGPUInsertBuffer(gridSize, bufferPoolSize);
237
238 // Insert values
239 CUDA_LAUNCH_DIM3((insertValuesBlocked<0, 2>), gridSize, blockSizeInsert,sparseGrid.toKernel());
240
241 // Flush inserts
243 sparseGrid.flush<smax_<0>>(ctx, flush_type::FLUSH_ON_DEVICE);
244
245 // Get output
247 output.resize(gridSizeRead * blockSizeRead);
248
249 CUDA_LAUNCH_DIM3((copyBlocksToOutput<0>), gridSizeRead, blockSizeRead, sparseGrid.toKernel(), output.toKernel());
250
251 output.template deviceToHost<0>();
252 sparseGrid.template deviceToHost<0>();
253
254 // Compare
255 bool match = true;
256 for (size_t i = 0; i < output.size(); i++)
257 {
258 auto expectedValue = (i < gridSize * blockSizeInsert) ? i : 666;
259 match &= output.template get<0>(i) == sparseGrid.template get<0>(i);
260 match &= output.template get<0>(i) == expectedValue;
261 }
262
263 BOOST_REQUIRE_EQUAL(match, true);
264}
265
266BOOST_AUTO_TEST_SUITE_END()
267
void setGPUInsertBuffer(int nBlock, int nSlot)
Implementation of 1-D std::vector like structure.
size_t size()
Stub size.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...