OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
BlockMapGpu_tests.cu
1 //
2 // Created by tommaso on 14/05/19.
3 //
4 
5 #define BOOST_TEST_DYN_LINK
6 
7 #include <boost/test/unit_test.hpp>
8 #include "SparseGridGpu/BlockMapGpu.hpp"
9 #include "SparseGridGpu/BlockMapGpu_ker.cuh"
10 #include "SparseGridGpu/BlockMapGpu_kernels.cuh"
11 #include "SparseGridGpu/DataBlock.cuh"
12 
13 #include <limits>
14 
15 template<unsigned int p, typename SparseGridType, typename VectorOutType>
16 __global__ void copyBlocksToOutput(SparseGridType sparseGrid, VectorOutType output)
17 {
18  int pos = blockIdx.x * blockDim.x + threadIdx.x;
19  output.template get<p>(pos) = sparseGrid.template get<p>(pos);
20 }
21 
22 template<unsigned int p, typename SparseGridType>
23 __global__ void insertValues(SparseGridType sparseGrid)
24 {
25  sparseGrid.init();
26 
27  int pos = blockIdx.x * blockDim.x + threadIdx.x;
28 
29  sparseGrid.template insert<p>(pos) = pos;
30 
31  __syncthreads();
32 
33  sparseGrid.flush_block_insert();
34 }
35 
36 template<unsigned int p, unsigned int chunksPerBlock, typename SparseGridType>
37 __global__ void insertValuesBlocked(SparseGridType sparseGrid)
38 {
39  constexpr unsigned int pMask = SparseGridType::pMask;
40  typedef BlockTypeOf<typename SparseGridType::AggregateType, p> BlockT;
41  typedef BlockTypeOf<typename SparseGridType::AggregateType, pMask> MaskBlockT;
42 
43  sparseGrid.init();
44 
45 
46  int pos = blockIdx.x * blockDim.x + threadIdx.x;
47  unsigned int dataBlockId = pos / BlockT::size;
48  unsigned int offset = pos % BlockT::size;
49 
50  auto encap = sparseGrid.template insertBlock<chunksPerBlock>(dataBlockId,BlockT::size);
51 
52 
53  encap.template get<p>()[offset] = pos;
54  BlockMapGpu_ker<>::setExist(encap.template get<pMask>()[offset]);
55 
56  __syncthreads();
57 
58  sparseGrid.flush_block_insert();
59 }
60 
61 template<unsigned int p, typename SparseGridType>
62 __global__ void insertValuesHalfBlock(SparseGridType sparseGrid)
63 {
64  sparseGrid.init();
65 
66  int pos = blockIdx.x * blockDim.x + threadIdx.x;
67 
68  constexpr unsigned int dataChunkSize = BlockTypeOf<typename SparseGridType::AggregateType, p>::size;
69  if (threadIdx.x % dataChunkSize < dataChunkSize/ 2)
70  {
71  sparseGrid.template insert<p>(pos) = pos;
72  }
73 
74  __syncthreads();
75 
76  sparseGrid.flush_block_insert();
77 }
78 
79 BOOST_AUTO_TEST_SUITE(BlockMapGpu_tests)
80 
81 BOOST_AUTO_TEST_CASE(testBitwiseOps)
82 {
83  BOOST_REQUIRE(BlockMapGpu_ker<>::getBit(1,0));
84  BOOST_REQUIRE(!BlockMapGpu_ker<>::getBit(2,0));
85  BOOST_REQUIRE(BlockMapGpu_ker<>::getBit(2,1));
86  BOOST_REQUIRE(BlockMapGpu_ker<>::getBit(3,0));
87  BOOST_REQUIRE(BlockMapGpu_ker<>::getBit(3,1));
88  unsigned int m = 0;
89  BOOST_REQUIRE(!BlockMapGpu_ker<>::getBit(m,0));
91  BOOST_REQUIRE(BlockMapGpu_ker<>::getBit(m,0));
93  BOOST_REQUIRE(!BlockMapGpu_ker<>::getBit(m,0));
94 }
95 
96 BOOST_AUTO_TEST_CASE(testBackground)
97 {
98  typedef aggregate<DataBlock<float, 64>> AggregateSGT;
99  typedef aggregate<float> AggregateOutT;
100  BlockMapGpu<AggregateSGT> sparseGrid;
101  sparseGrid.template setBackgroundValue<0>(666);
102 
103  const unsigned int gridSize = 10;
104  const unsigned int blockSize = 128;
105 
106  // Get output
108  output.resize(gridSize * blockSize);
109  CUDA_LAUNCH_DIM3((copyBlocksToOutput<0>), gridSize, blockSize, sparseGrid.toKernel(), output.toKernel());
110 
111  output.template deviceToHost<0>();
112  sparseGrid.template deviceToHost<0>();
113 
114  // Compare
115  bool match = true;
116  for (size_t i = 0; i < output.size(); i++)
117  {
118  match &= output.template get<0>(i) == 666;
119  match &= output.template get<0>(i) == sparseGrid.template get<0>(i);
120  }
121 
122  BOOST_REQUIRE_EQUAL(match, true);
123 }
124 
125 
126 BOOST_AUTO_TEST_CASE(testInsert)
127 {
128  typedef aggregate<DataBlock<float, 64>> AggregateT;
129  typedef aggregate<float> AggregateOutT;
131  blockMap.template setBackgroundValue<0>(666);
132 
133  const unsigned int gridSize = 3;
134  const unsigned int bufferPoolSize = 128; // Should be multiple of BlockT::size
135  const unsigned int blockSizeInsert = 128;
136  const unsigned int gridSizeRead = gridSize + 1;
137  const unsigned int blockSizeRead = 128;
138 
139  // Prealloc insert buffer
140  blockMap.setGPUInsertBuffer(gridSize, bufferPoolSize);
141 
142  // Insert values
143  CUDA_LAUNCH_DIM3((insertValues<0>), gridSize, blockSizeInsert ,blockMap.toKernel());
144 
145  // Flush inserts
146  mgpu::ofp_context_t ctx;
147  blockMap.flush<smax_<0>>(ctx, flush_type::FLUSH_ON_DEVICE);
148 
149  // Get output
151  output.resize(gridSizeRead * blockSizeRead);
152 
153  CUDA_LAUNCH_DIM3((copyBlocksToOutput<0>), gridSizeRead, blockSizeRead,blockMap.toKernel(), output.toKernel());
154 
155  output.template deviceToHost<0>();
156  blockMap.template deviceToHost<0>();
157 
158  // Compare
159  bool match = true;
160  for (size_t i = 0; i < output.size(); i++)
161  {
162  auto expectedValue = (i < gridSize * blockSizeInsert) ? i : 666;
163 
164  match &= output.template get<0>(i) == blockMap.template get<0>(i);
165  match &= output.template get<0>(i) == expectedValue;
166  }
167 
168  BOOST_REQUIRE_EQUAL(match, true);
169 }
170 
171 BOOST_AUTO_TEST_CASE(testInsert_halfBlock)
172 {
173  typedef aggregate<DataBlock<float, 64>> AggregateT;
174  typedef aggregate<float> AggregateOutT;
176  blockMap.template setBackgroundValue<0>(666);
177 
178  const unsigned int gridSize = 3;
179  const unsigned int bufferPoolSize = 128; // Should be multiple of BlockT::size
180  const unsigned int blockSizeInsert = 128;
181  const unsigned int gridSizeRead = gridSize + 1;
182  const unsigned int blockSizeRead = 128;
183 
184  // Prealloc insert buffer
185  blockMap.setGPUInsertBuffer(gridSize, bufferPoolSize);
186 
187  // Insert values
188  CUDA_LAUNCH_DIM3((insertValuesHalfBlock<0>), gridSize, blockSizeInsert, blockMap.toKernel());
189 
190  // Flush inserts
191  mgpu::ofp_context_t ctx;
192  blockMap.flush<smax_<0>>(ctx, flush_type::FLUSH_ON_DEVICE);
193 
194  // Get output
196  output.resize(gridSizeRead * blockSizeRead);
197 
198  CUDA_LAUNCH_DIM3((copyBlocksToOutput<0>), gridSizeRead, blockSizeRead ,blockMap.toKernel(), output.toKernel());
199 
200  output.template deviceToHost<0>();
201  blockMap.template deviceToHost<0>();
202 
203  // Compare
204  bool match = true;
205  for (size_t i = 0; i < output.size(); i++)
206  {
207  auto expectedValue = (i < gridSize * blockSizeInsert) ? i : 666;
208  constexpr unsigned int dataChunkSize = BlockTypeOf<AggregateT, 0>::size;
209  int offset = i % dataChunkSize;
210  if (! (offset < dataChunkSize / 2))
211  {
212  expectedValue = 666; // Just the first half of each block was inserted
213  }
214 
215  match &= output.template get<0>(i) == blockMap.template get<0>(i);
216  match &= output.template get<0>(i) == expectedValue;
217  }
218 
219  BOOST_REQUIRE_EQUAL(match, true);
220 }
221 
222 BOOST_AUTO_TEST_CASE(testInsert_blocked)
223 {
224  typedef aggregate<DataBlock<float, 64>> AggregateT;
225  typedef aggregate<float> AggregateOutT;
226  BlockMapGpu<AggregateT, 128> sparseGrid;
227  sparseGrid.template setBackgroundValue<0>(666);
228 
229  const unsigned int gridSize = 3;
230  const unsigned int bufferPoolSize = 4; // Should be multiple of BlockT::size
231  const unsigned int blockSizeInsert = 128;
232  const unsigned int gridSizeRead = gridSize + 1;
233  const unsigned int blockSizeRead = 128;
234 
235  // Prealloc insert buffer
236  sparseGrid.setGPUInsertBuffer(gridSize, bufferPoolSize);
237 
238  // Insert values
239  CUDA_LAUNCH_DIM3((insertValuesBlocked<0, 2>), gridSize, blockSizeInsert,sparseGrid.toKernel());
240 
241  // Flush inserts
242  mgpu::ofp_context_t ctx;
243  sparseGrid.flush<smax_<0>>(ctx, flush_type::FLUSH_ON_DEVICE);
244 
245  // Get output
247  output.resize(gridSizeRead * blockSizeRead);
248 
249  CUDA_LAUNCH_DIM3((copyBlocksToOutput<0>), gridSizeRead, blockSizeRead, sparseGrid.toKernel(), output.toKernel());
250 
251  output.template deviceToHost<0>();
252  sparseGrid.template deviceToHost<0>();
253 
254  // Compare
255  bool match = true;
256  for (size_t i = 0; i < output.size(); i++)
257  {
258  auto expectedValue = (i < gridSize * blockSizeInsert) ? i : 666;
259  match &= output.template get<0>(i) == sparseGrid.template get<0>(i);
260  match &= output.template get<0>(i) == expectedValue;
261  }
262 
263  BOOST_REQUIRE_EQUAL(match, true);
264 }
265 
266 BOOST_AUTO_TEST_SUITE_END()
267 
void setGPUInsertBuffer(int nBlock, int nSlot)
size_t size()
Stub size.
Definition: map_vector.hpp:211
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:214
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202