OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
BlockMapGpu_kernels_tests.cu
1 //
2 // Created by tommaso on 30/05/19.
3 //
4 
5 #define BOOST_TEST_DYN_LINK
6 
7 #include <boost/test/unit_test.hpp>
8 #include "SparseGridGpu/BlockMapGpu.hpp"
9 #include "SparseGridGpu/BlockMapGpu_ker.cuh"
10 #include "SparseGridGpu/BlockMapGpu_kernels.cuh"
11 #include "SparseGridGpu/DataBlock.cuh"
12 #include "Vector/cuda/map_vector_sparse_cuda_kernels.cuh"
13 
14 BOOST_AUTO_TEST_SUITE(BlockMapGpu_kernels_tests)
15 
16 BOOST_AUTO_TEST_CASE(testSegreduce_total)
17 {
18  typedef float ScalarT;
19  typedef DataBlock<unsigned char, 64> MaskBlockT;
20  typedef DataBlock<ScalarT, 64> BlockT;
22  openfpm::vector_gpu<aggregate<int>> segment_dataMap;
24  openfpm::vector_gpu<aggregate<int>> segments_oldData;
25 
26  // segment reduction for the new added data
27  segments.resize(8);
28  segments.template get<0>(0) = 0;
29  segments.template get<0>(1) = 4;
30  segments.template get<0>(2) = 5;
31  segments.template get<0>(3) = 7;
32  segments.template get<0>(4) = 8;
33  segments.template get<0>(5) = 11;
34  segments.template get<0>(6) = 17;
35  segments.template get<0>(7) = 18; // Id of first non-existent data
36 
37  // segment old data indicate if new data has also old data
38  // to be reduced with
39  segments_oldData.resize(8);
40  segments_oldData.template get<0>(0) = -1;
41  segments_oldData.template get<0>(1) = 2;
42  segments_oldData.template get<0>(2) = -1;
43  segments_oldData.template get<0>(3) = 5;
44  segments_oldData.template get<0>(4) = 7;
45  segments_oldData.template get<0>(5) = -1;
46  segments_oldData.template get<0>(6) = -1;
47  segments_oldData.template get<0>(7) = -1; // Id of first non-existent data
48 
49  // for each added index we have a chunk in the vct_add_data vector
50  // undortunately is not
51 
52  segment_dataMap.resize(19);
53  segment_dataMap.template get<0>(0) = 10;
54  segment_dataMap.template get<0>(1) = 1;
55  segment_dataMap.template get<0>(2) = 50;
56  segment_dataMap.template get<0>(3) = 11;
57  segment_dataMap.template get<0>(4) = 13;
58  segment_dataMap.template get<0>(5) = 87;
59  segment_dataMap.template get<0>(6) = 54;
60  segment_dataMap.template get<0>(7) = 33;
61  segment_dataMap.template get<0>(8) = 22;
62  segment_dataMap.template get<0>(9) = 17;
63  segment_dataMap.template get<0>(10) = 40;
64  segment_dataMap.template get<0>(11) = 32;
65  segment_dataMap.template get<0>(12) = 80;
66  segment_dataMap.template get<0>(13) = 52;
67  segment_dataMap.template get<0>(14) = 21;
68  segment_dataMap.template get<0>(15) = 76;
69  segment_dataMap.template get<0>(16) = 65;
70  segment_dataMap.template get<0>(17) = 54;
71  segment_dataMap.template get<0>(18) = 3;
72 
73  outputMap.resize(7);
74  outputMap.template get<0>(0) = 9;
75  outputMap.template get<0>(1) = 11;
76  outputMap.template get<0>(2) = 13;
77  outputMap.template get<0>(3) = 34;
78  outputMap.template get<0>(4) = 23;
79  outputMap.template get<0>(5) = 90;
80  outputMap.template get<0>(6) = 21;
81 
82  segments.template hostToDevice<0>();
83  segment_dataMap.hostToDevice<0>();
84  segments_oldData.hostToDevice<0>();
85  outputMap.hostToDevice<0>();
86 
87  const unsigned int BITMASK = 0, BLOCK = 1;
88  BlockT block;
89  MaskBlockT mask;
90  BlockT block_old;
91  MaskBlockT mask_old;
92  for (int i = 0; i < 32; ++i)
93  {
94  block[i] = i + 1;
95  mask[i] = 1;
96  block_old[i] = 100 + i + 1;
97  mask_old[i] = 1;
98  }
99  for (int i = 32; i < 64; ++i)
100  {
101  block[i] = 666;
102  mask[i] = 0;
103  block_old[i] = 666;
104  mask_old[i] = 0;
105  }
106  block_old[33] = 665;
107  mask_old[33] = 1;
108 
111  data_new.resize(100);
112  data_old.resize(100);
113  for (int i = 0; i < 100; ++i)
114  {
115  data_new.template get<BITMASK>(i) = mask;
116  data_new.template get<BLOCK>(i) = block;
117  if (i < data_old.size())
118  {
119  data_old.template get<BITMASK>(i) = mask_old;
120  data_old.template get<BLOCK>(i) = block_old;
121  }
122  }
123 
124  data_new.template hostToDevice<BITMASK, BLOCK>();
125  data_old.template hostToDevice<BITMASK, BLOCK>();
126 
127  // Allocate output buffer
129  outputData.resize(100);
130 
131  CUDA_LAUNCH_DIM3((BlockMapGpuKernels::segreduce_total<BLOCK, 0, BITMASK, 2, mgpu::plus_t<ScalarT>>),segments.size()-1, 2*BlockT::size,
132  data_new.toKernel(),
133  data_old.toKernel(),
134  segments.toKernel(),
135  segment_dataMap.toKernel(),
136  segments_oldData.toKernel(),
137  outputMap.toKernel(),
138  outputData.toKernel());
139 
140  // Segreduce on mask
141  CUDA_LAUNCH_DIM3((BlockMapGpuKernels::segreduce_total<BITMASK, 0, BITMASK, 2, mgpu::maximum_t<unsigned char>>),segments.size()-1, 2*BlockT::size,
142  data_new.toKernel(),
143  data_old.toKernel(),
144  segments.toKernel(),
145  segment_dataMap.toKernel(),
146  segments_oldData.toKernel(),
147  outputMap.toKernel(),
148  outputData.toKernel());
149 
150  // Check
151 
152  outputData.template deviceToHost<BITMASK, BLOCK>();
153 
154  // Check
155 
156  for (int j = 0 ; j < outputMap.size() ; j++)
157  {
158  int out_id = outputMap.template get<0>(j);
159  int seg_mult = segments.template get<0>(j+1) - segments.template get<0>(j);
160 
161  BlockT outBlock = outputData.template get<BLOCK>(out_id);
162  MaskBlockT outMask = outputData.template get<BITMASK>(out_id);
163 
164  if (segments_oldData.template get<0>(j) != -1)
165  {
166  for (int i = 0; i < BlockT::size / 2; ++i)
167  {
168  BOOST_REQUIRE_EQUAL(outMask[i],1);
169  BOOST_REQUIRE_EQUAL(outBlock[i],100 + (i+1)*(seg_mult+1));
170  }
171  BOOST_REQUIRE_EQUAL(outMask[33],1);
172  BOOST_REQUIRE_EQUAL(outBlock[33],665);
173  }
174  else
175  {
176  for (int i = 0; i < BlockT::size / 2; ++i)
177  {
178  BOOST_REQUIRE_EQUAL(outMask[i],1);
179  BOOST_REQUIRE_EQUAL(outBlock[i],(i+1)*seg_mult);
180  }
181  }
182  }
183 }
184 
185 
186 BOOST_AUTO_TEST_SUITE_END() // SparseGridGpu_kernels_tests
187 
188 BOOST_AUTO_TEST_SUITE(BlockMapGpu_functors_tests)
189 
190 
191 BOOST_AUTO_TEST_CASE(test_maps_create)
192 {
193  openfpm::vector_gpu<aggregate<int>> merge_indexes;
195 
198 
202  openfpm::vector_gpu<aggregate<int>> segments_oldData;
203 
204  merge_keys.resize(16);
205  merge_keys.template get<0>(0) = 22; // new
206  merge_keys.template get<0>(1) = 23; // new
207  merge_keys.template get<0>(2) = 33; // old id:0
208  merge_keys.template get<0>(3) = 34; // old id:1
209  merge_keys.template get<0>(4) = 36; // old id:2
210  merge_keys.template get<0>(5) = 37; // old id:3
211  merge_keys.template get<0>(6) = 43; // old id:4
212  merge_keys.template get<0>(7) = 45; // old id:5
213  merge_keys.template get<0>(8) = 46; // new
214  merge_keys.template get<0>(9) = 56; // old id:6
215  merge_keys.template get<0>(10) = 56; // new
216  merge_keys.template get<0>(11) = 60; // old id:7
217  merge_keys.template get<0>(12) = 61; // old id: 8
218  merge_keys.template get<0>(13) = 63; // new
219  merge_keys.template get<0>(14) = 64; // new
220  merge_keys.template get<0>(15) = 65; // new
221 
222  // old data has 9 points
223  // new data has 7 segments
224 
225  merge_indexes.resize(16);
226  merge_indexes.template get<0>(0) = 9; // new
227  merge_indexes.template get<0>(1) = 10; // new
228  merge_indexes.template get<0>(2) = 0; // old id:0
229  merge_indexes.template get<0>(3) = 1; // old id:1
230  merge_indexes.template get<0>(4) = 2; // old id:2
231  merge_indexes.template get<0>(5) = 3; // old id:3
232  merge_indexes.template get<0>(6) = 4; // old id:4
233  merge_indexes.template get<0>(7) = 5; // old id:5
234  merge_indexes.template get<0>(8) = 11; // new
235  merge_indexes.template get<0>(9) = 6; // old id:6
236  merge_indexes.template get<0>(10) = 12; // new
237  merge_indexes.template get<0>(11) = 7; // old id:7
238  merge_indexes.template get<0>(12) = 8; // old id: 8
239  merge_indexes.template get<0>(13) = 13; // new
240  merge_indexes.template get<0>(14) = 14; // new
241  merge_indexes.template get<0>(15) = 15; // new
242 
243  merge_keys.template hostToDevice<0>();
244  merge_indexes.template hostToDevice<0>();
245 
246  s_ids.resize(16+1);
247  p_ids.resize(16+1);
248 
249  // fill the last of compute predicates
250  p_ids.template get<0>(p_ids.size()-1) = 0;
251  p_ids.template get<1>(p_ids.size()-1) = 0;
252  p_ids.template get<2>(p_ids.size()-1) = 0;
253  p_ids.template get<3>(p_ids.size()-1) = 0;
254 
255  p_ids.template hostToDevice<0,1,2,3>(p_ids.size()-1,p_ids.size()-1);
256 
257  auto ite = merge_indexes.getGPUIterator();
258 
259  CUDA_LAUNCH(BlockMapGpuKernels::compute_predicate,ite,merge_keys.toKernel(),merge_indexes.toKernel(),9,p_ids.toKernel());
260 
261  mgpu::ofp_context_t context;
262  openfpm::scan((int *)p_ids.template getDeviceBuffer<0>(),
263  s_ids.size(),
264  (int *)s_ids.template getDeviceBuffer<0>(),
265  context);
266 
267  openfpm::scan((int *)p_ids.template getDeviceBuffer<1>(),
268  s_ids.size(),
269  (int *)s_ids.template getDeviceBuffer<1>(),
270  context);
271 
272  openfpm::scan((int *)p_ids.template getDeviceBuffer<2>(),
273  s_ids.size(),
274  (int *)s_ids.template getDeviceBuffer<2>(),
275  context);
276 
277  openfpm::scan((int *)p_ids.template getDeviceBuffer<3>(),
278  s_ids.size(),
279  (int *)s_ids.template getDeviceBuffer<3>(),
280  context);
281 
282  openfpm::scan((int *)p_ids.template getDeviceBuffer<4>(),
283  s_ids.size(),
284  (int *)s_ids.template getDeviceBuffer<4>(),
285  context);
286 
287  s_ids.template deviceToHost<0,1,2,3,4>();
288  p_ids.template deviceToHost<0,1,2,3,4>();
289 
290  size_t copy_old_size = s_ids.template get<3>(s_ids.size()-1) + p_ids.template get<3>(p_ids.size()-1);
291  size_t seg_old_size = s_ids.template get<1>(s_ids.size()-1) + p_ids.template get<1>(p_ids.size()-1);
292  size_t out_map_size = s_ids.template get<1>(s_ids.size()-1) + p_ids.template get<1>(p_ids.size()-1);
293 
294  segments_oldData.resize(seg_old_size);
295  outputMap.resize(out_map_size);
296  copy_old_src.resize(copy_old_size);
297  copy_old_dst.resize(copy_old_size);
298 
299  CUDA_LAUNCH(BlockMapGpuKernels::maps_create,ite,s_ids.toKernel(),p_ids.toKernel(),segments_oldData.toKernel(),outputMap.toKernel(),copy_old_dst.toKernel(),copy_old_src.toKernel());
300 
301  segments_oldData.template deviceToHost<0>();
302  outputMap.template deviceToHost<0>();
303  copy_old_dst.template deviceToHost<0>();
304  copy_old_src.template deviceToHost<0>();
305 
306  BOOST_REQUIRE_EQUAL(seg_old_size,7);
307  BOOST_REQUIRE_EQUAL(out_map_size,7);
308  BOOST_REQUIRE_EQUAL(copy_old_size,8);
309 
310  BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(0),-1);
311  BOOST_REQUIRE_EQUAL(outputMap.template get<0>(0),0);
312  BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(1),-1);
313  BOOST_REQUIRE_EQUAL(outputMap.template get<0>(1),1);
314  BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(2),-1);
315  BOOST_REQUIRE_EQUAL(outputMap.template get<0>(2),8);
316  BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(3),6);
317  BOOST_REQUIRE_EQUAL(outputMap.template get<0>(3),9);
318  BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(4),-1);
319  BOOST_REQUIRE_EQUAL(outputMap.template get<0>(4),12);
320  BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(5),-1);
321  BOOST_REQUIRE_EQUAL(outputMap.template get<0>(5),13);
322  BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(6),-1);
323  BOOST_REQUIRE_EQUAL(outputMap.template get<0>(6),14);
324 
325  BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(0),2);
326  BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(1),3);
327  BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(2),4);
328  BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(3),5);
329  BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(4),6);
330  BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(5),7);
331  BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(6),10);
332  BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(7),11);
333 
334  BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(0),0);
335  BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(1),1);
336  BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(2),2);
337  BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(3),3);
338  BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(4),4);
339  BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(5),5);
340  BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(6),7);
341  BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(7),8);
342 }
343 
344 BOOST_AUTO_TEST_CASE (testSolve_conflicts)
345 {
346  typedef float ScalarT;
347  typedef DataBlock<ScalarT, 64> BlockT;
348  typedef DataBlock<unsigned char, 64> MaskBlockT;
349  const unsigned int p=0, pMask=1, pInd=0;
350  openfpm::vector_gpu<aggregate<unsigned int>> keys, mergeIndices, tmpIndices, keysOut, trivial_map;
352  openfpm::vector_gpu<aggregate<BlockT, MaskBlockT>> dataOld, dataNew, tmpData, dataOut;
353  mgpu::ofp_context_t ctx;
354 
355  // Keys
356  keys.resize(14);
357  keys.template get<pInd>(0) = 0;
358  keys.template get<pInd>(1) = 1;
359  keys.template get<pInd>(2) = 2;
360  keys.template get<pInd>(3) = 4;
361  keys.template get<pInd>(4) = 4;
362  keys.template get<pInd>(5) = 5;
363  keys.template get<pInd>(6) = 6;
364  keys.template get<pInd>(7) = 7;
365  keys.template get<pInd>(8) = 8;
366  keys.template get<pInd>(9) = 9;
367  keys.template get<pInd>(10) = 10;
368  keys.template get<pInd>(11) = 11;
369  keys.template get<pInd>(12) = 13;
370  keys.template get<pInd>(13) = 13;
371 
372  // Merge Indices
373  mergeIndices.resize(14);
374  mergeIndices.template get<pInd>(0) = 5; // 0
375  mergeIndices.template get<pInd>(1) = 0;
376  mergeIndices.template get<pInd>(2) = 6; // 1
377  mergeIndices.template get<pInd>(3) = 1;
378  mergeIndices.template get<pInd>(4) = 7; // 2
379  mergeIndices.template get<pInd>(5) = 8; // 3
380  mergeIndices.template get<pInd>(6) = 9; // 4
381  mergeIndices.template get<pInd>(7) = 10; // 5
382  mergeIndices.template get<pInd>(8) = 2;
383  mergeIndices.template get<pInd>(9) = 11; // 6
384  mergeIndices.template get<pInd>(10) = 3;
385  mergeIndices.template get<pInd>(11) = 12; // 7
386  mergeIndices.template get<pInd>(12) = 13; // 8
387  mergeIndices.template get<pInd>(13) = 14; // 9
388 
389  // segments new
390  segments_new.resize(10);
391  segments_new.template get<0>(0) = 0; // 5
392  segments_new.template get<0>(1) = 1; // 6
393  segments_new.template get<0>(2) = 2; // 7
394  segments_new.template get<0>(3) = 3; // 8
395  segments_new.template get<0>(4) = 4; // 9
396  segments_new.template get<0>(5) = 5; // 10
397  segments_new.template get<0>(6) = 6; // 11
398  segments_new.template get<0>(7) = 7; // 12
399  segments_new.template get<0>(8) = 8; // 13,14
400  segments_new.template get<0>(9) = 10;
401 
402  // Fill the data
403  { // We want i to live in a confined namespace
404  int i = 0;
405  dataOld.resize(5);
406  for (; i < dataOld.size(); ++i)
407  {
408  for (int j = 0; j < BlockT::size; ++j)
409  {
410  dataOld.template get<p>(i)[j] = i;
411  dataOld.template get<pMask>(i)[j] = 1;
412  };
413  dataOld.template get<p>(i)[0] = 1;
414  }
415  dataNew.resize(10);
416  for (; i < dataOld.size() + dataNew.size(); ++i)
417  {
418  int ii = i - dataOld.size();
419  for (int j = 0; j < BlockT::size; ++j)
420  {
421  dataNew.template get<p>(ii)[j] = i;
422  dataNew.template get<pMask>(ii)[j] = 1;
423  };
424  dataNew.template get<p>(ii)[0] = 1;
425  }
426  }
427 
429 
430  trivial_map.resize(dataNew.size()+1);
431 
432  for (size_t i = 0 ; i < trivial_map.size(); i++)
433  {
434  trivial_map.template get<0>(i) = i;
435  }
436 
437  trivial_map.hostToDevice<0>();
438 
439  // Copy to device
440  keys.hostToDevice<pInd>();
441  mergeIndices.hostToDevice<pInd>();
442  dataOld.hostToDevice<p, pMask>();
443  dataNew.hostToDevice<p, pMask>();
444  segments_new.hostToDevice<0>();
445 
446  BlockMapGpuFunctors::BlockFunctor<128> obj;
447 
448  // Now perform the compaction
449 
450  obj.solve_conflicts<0,
451  decltype(keys),
452  decltype(segments_new),
453  decltype(dataOld),
454  sadd_<p>
455  >(
456  keys, mergeIndices, segments_new, trivial_map,
457  dataOld, dataNew,
458  keysOut, dataOut,
459  ctx
460  );
461 
462  // Now retrieve the dataDst vector
463  keysOut.deviceToHost<pInd>();
464  dataOut.deviceToHost<p, pMask>();
465 
466  // Validation
467  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(0)[0], 1);
468  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(1)[0], 1);
469  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(2)[0], 1);
470  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(3)[0], 2);
471  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(4)[0], 1);
472  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(5)[0], 1);
473  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(6)[0], 1);
474  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(7)[0], 1);
475  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(8)[0], 1);
476  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(9)[0], 1);
477  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(10)[0], 1);
478  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(11)[0], 2);
479 
480  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(0)[1], 5);
481  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(1)[1], 0);
482  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(2)[1], 6);
483  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(3)[1], 8);
484  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(4)[1], 8);
485  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(5)[1], 9);
486  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(6)[1], 10);
487  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(7)[1], 2);
488  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(8)[1], 11);
489  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(9)[1], 3);
490  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(10)[1], 12);
491  BOOST_REQUIRE_EQUAL(dataOut.template get<p>(11)[1], 27);
492 }
493 
494 BOOST_AUTO_TEST_SUITE_END() //SparseGridGpu_functors_tests
495 
size_t size()
Stub size.
Definition: map_vector.hpp:211
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202