OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
BlockMapGpu_kernels_tests.cu
1//
2// Created by tommaso on 30/05/19.
3//
4
5#define BOOST_TEST_DYN_LINK
6
7#include <boost/test/unit_test.hpp>
8#include "SparseGridGpu/BlockMapGpu.hpp"
9#include "SparseGridGpu/BlockMapGpu_ker.cuh"
10#include "SparseGridGpu/BlockMapGpu_kernels.cuh"
11#include "SparseGridGpu/DataBlock.cuh"
12#include "Vector/cuda/map_vector_sparse_cuda_kernels.cuh"
13
14BOOST_AUTO_TEST_SUITE(BlockMapGpu_kernels_tests)
15
16BOOST_AUTO_TEST_CASE(testSegreduce_total)
17{
18 typedef float ScalarT;
19 typedef DataBlock<unsigned char, 64> MaskBlockT;
20 typedef DataBlock<ScalarT, 64> BlockT;
24 openfpm::vector_gpu<aggregate<int>> segments_oldData;
25
26 // segment reduction for the new added data
27 segments.resize(8);
28 segments.template get<0>(0) = 0;
29 segments.template get<0>(1) = 4;
30 segments.template get<0>(2) = 5;
31 segments.template get<0>(3) = 7;
32 segments.template get<0>(4) = 8;
33 segments.template get<0>(5) = 11;
34 segments.template get<0>(6) = 17;
35 segments.template get<0>(7) = 18; // Id of first non-existent data
36
37 // segment old data indicate if new data has also old data
38 // to be reduced with
39 segments_oldData.resize(8);
40 segments_oldData.template get<0>(0) = -1;
41 segments_oldData.template get<0>(1) = 2;
42 segments_oldData.template get<0>(2) = -1;
43 segments_oldData.template get<0>(3) = 5;
44 segments_oldData.template get<0>(4) = 7;
45 segments_oldData.template get<0>(5) = -1;
46 segments_oldData.template get<0>(6) = -1;
47 segments_oldData.template get<0>(7) = -1; // Id of first non-existent data
48
49 // for each added index we have a chunk in the vct_add_data vector
50 // undortunately is not
51
52 segment_dataMap.resize(19);
53 segment_dataMap.template get<0>(0) = 10;
54 segment_dataMap.template get<0>(1) = 1;
55 segment_dataMap.template get<0>(2) = 50;
56 segment_dataMap.template get<0>(3) = 11;
57 segment_dataMap.template get<0>(4) = 13;
58 segment_dataMap.template get<0>(5) = 87;
59 segment_dataMap.template get<0>(6) = 54;
60 segment_dataMap.template get<0>(7) = 33;
61 segment_dataMap.template get<0>(8) = 22;
62 segment_dataMap.template get<0>(9) = 17;
63 segment_dataMap.template get<0>(10) = 40;
64 segment_dataMap.template get<0>(11) = 32;
65 segment_dataMap.template get<0>(12) = 80;
66 segment_dataMap.template get<0>(13) = 52;
67 segment_dataMap.template get<0>(14) = 21;
68 segment_dataMap.template get<0>(15) = 76;
69 segment_dataMap.template get<0>(16) = 65;
70 segment_dataMap.template get<0>(17) = 54;
71 segment_dataMap.template get<0>(18) = 3;
72
73 outputMap.resize(7);
74 outputMap.template get<0>(0) = 9;
75 outputMap.template get<0>(1) = 11;
76 outputMap.template get<0>(2) = 13;
77 outputMap.template get<0>(3) = 34;
78 outputMap.template get<0>(4) = 23;
79 outputMap.template get<0>(5) = 90;
80 outputMap.template get<0>(6) = 21;
81
82 segments.template hostToDevice<0>();
83 segment_dataMap.hostToDevice<0>();
84 segments_oldData.hostToDevice<0>();
85 outputMap.hostToDevice<0>();
86
87 const unsigned int BITMASK = 0, BLOCK = 1;
88 BlockT block;
89 MaskBlockT mask;
90 BlockT block_old;
91 MaskBlockT mask_old;
92 for (int i = 0; i < 32; ++i)
93 {
94 block[i] = i + 1;
95 mask[i] = 1;
96 block_old[i] = 100 + i + 1;
97 mask_old[i] = 1;
98 }
99 for (int i = 32; i < 64; ++i)
100 {
101 block[i] = 666;
102 mask[i] = 0;
103 block_old[i] = 666;
104 mask_old[i] = 0;
105 }
106 block_old[33] = 665;
107 mask_old[33] = 1;
108
111 data_new.resize(100);
112 data_old.resize(100);
113 for (int i = 0; i < 100; ++i)
114 {
115 data_new.template get<BITMASK>(i) = mask;
116 data_new.template get<BLOCK>(i) = block;
117 if (i < data_old.size())
118 {
119 data_old.template get<BITMASK>(i) = mask_old;
120 data_old.template get<BLOCK>(i) = block_old;
121 }
122 }
123
124 data_new.template hostToDevice<BITMASK, BLOCK>();
125 data_old.template hostToDevice<BITMASK, BLOCK>();
126
127 // Allocate output buffer
129 outputData.resize(100);
130
131 CUDA_LAUNCH_DIM3((BlockMapGpuKernels::segreduce_total<BLOCK, 0, BITMASK, 2, gpu::plus_t<ScalarT>>),segments.size()-1, 2*BlockT::size,
132 data_new.toKernel(),
133 data_old.toKernel(),
134 segments.toKernel(),
135 segment_dataMap.toKernel(),
136 segments_oldData.toKernel(),
137 outputMap.toKernel(),
138 outputData.toKernel());
139
140 // Segreduce on mask
141 CUDA_LAUNCH_DIM3((BlockMapGpuKernels::segreduce_total<BITMASK, 0, BITMASK, 2, gpu::maximum_t<unsigned char>>),segments.size()-1, 2*BlockT::size,
142 data_new.toKernel(),
143 data_old.toKernel(),
144 segments.toKernel(),
145 segment_dataMap.toKernel(),
146 segments_oldData.toKernel(),
147 outputMap.toKernel(),
148 outputData.toKernel());
149
150 // Check
151
152 outputData.template deviceToHost<BITMASK, BLOCK>();
153
154 // Check
155
156 for (int j = 0 ; j < outputMap.size() ; j++)
157 {
158 int out_id = outputMap.template get<0>(j);
159 int seg_mult = segments.template get<0>(j+1) - segments.template get<0>(j);
160
161 BlockT outBlock = outputData.template get<BLOCK>(out_id);
162 MaskBlockT outMask = outputData.template get<BITMASK>(out_id);
163
164 if (segments_oldData.template get<0>(j) != -1)
165 {
166 for (int i = 0; i < BlockT::size / 2; ++i)
167 {
168 BOOST_REQUIRE_EQUAL(outMask[i],1);
169 BOOST_REQUIRE_EQUAL(outBlock[i],100 + (i+1)*(seg_mult+1));
170 }
171 BOOST_REQUIRE_EQUAL(outMask[33],1);
172 BOOST_REQUIRE_EQUAL(outBlock[33],665);
173 }
174 else
175 {
176 for (int i = 0; i < BlockT::size / 2; ++i)
177 {
178 BOOST_REQUIRE_EQUAL(outMask[i],1);
179 BOOST_REQUIRE_EQUAL(outBlock[i],(i+1)*seg_mult);
180 }
181 }
182 }
183}
184
185
186BOOST_AUTO_TEST_SUITE_END() // SparseGridGpu_kernels_tests
187
188BOOST_AUTO_TEST_SUITE(BlockMapGpu_functors_tests)
189
190
191BOOST_AUTO_TEST_CASE(test_maps_create)
192{
195
198
202 openfpm::vector_gpu<aggregate<int>> segments_oldData;
203
204 merge_keys.resize(16);
205 merge_keys.template get<0>(0) = 22; // new
206 merge_keys.template get<0>(1) = 23; // new
207 merge_keys.template get<0>(2) = 33; // old id:0
208 merge_keys.template get<0>(3) = 34; // old id:1
209 merge_keys.template get<0>(4) = 36; // old id:2
210 merge_keys.template get<0>(5) = 37; // old id:3
211 merge_keys.template get<0>(6) = 43; // old id:4
212 merge_keys.template get<0>(7) = 45; // old id:5
213 merge_keys.template get<0>(8) = 46; // new
214 merge_keys.template get<0>(9) = 56; // old id:6
215 merge_keys.template get<0>(10) = 56; // new
216 merge_keys.template get<0>(11) = 60; // old id:7
217 merge_keys.template get<0>(12) = 61; // old id: 8
218 merge_keys.template get<0>(13) = 63; // new
219 merge_keys.template get<0>(14) = 64; // new
220 merge_keys.template get<0>(15) = 65; // new
221
222 // old data has 9 points
223 // new data has 7 segments
224
225 merge_indexes.resize(16);
226 merge_indexes.template get<0>(0) = 9; // new
227 merge_indexes.template get<0>(1) = 10; // new
228 merge_indexes.template get<0>(2) = 0; // old id:0
229 merge_indexes.template get<0>(3) = 1; // old id:1
230 merge_indexes.template get<0>(4) = 2; // old id:2
231 merge_indexes.template get<0>(5) = 3; // old id:3
232 merge_indexes.template get<0>(6) = 4; // old id:4
233 merge_indexes.template get<0>(7) = 5; // old id:5
234 merge_indexes.template get<0>(8) = 11; // new
235 merge_indexes.template get<0>(9) = 6; // old id:6
236 merge_indexes.template get<0>(10) = 12; // new
237 merge_indexes.template get<0>(11) = 7; // old id:7
238 merge_indexes.template get<0>(12) = 8; // old id: 8
239 merge_indexes.template get<0>(13) = 13; // new
240 merge_indexes.template get<0>(14) = 14; // new
241 merge_indexes.template get<0>(15) = 15; // new
242
243 merge_keys.template hostToDevice<0>();
244 merge_indexes.template hostToDevice<0>();
245
246 s_ids.resize(16+1);
247 p_ids.resize(16+1);
248
249 // fill the last of compute predicates
250 p_ids.template get<0>(p_ids.size()-1) = 0;
251 p_ids.template get<1>(p_ids.size()-1) = 0;
252 p_ids.template get<2>(p_ids.size()-1) = 0;
253 p_ids.template get<3>(p_ids.size()-1) = 0;
254
255 p_ids.template hostToDevice<0,1,2,3>(p_ids.size()-1,p_ids.size()-1);
256
257 auto ite = merge_indexes.getGPUIterator();
258
259 CUDA_LAUNCH(BlockMapGpuKernels::compute_predicate,ite,merge_keys.toKernel(),merge_indexes.toKernel(),9,p_ids.toKernel());
260
261 gpu::ofp_context_t context;
262 openfpm::scan((int *)p_ids.template getDeviceBuffer<0>(),
263 s_ids.size(),
264 (int *)s_ids.template getDeviceBuffer<0>(),
265 context);
266
267 openfpm::scan((int *)p_ids.template getDeviceBuffer<1>(),
268 s_ids.size(),
269 (int *)s_ids.template getDeviceBuffer<1>(),
270 context);
271
272 openfpm::scan((int *)p_ids.template getDeviceBuffer<2>(),
273 s_ids.size(),
274 (int *)s_ids.template getDeviceBuffer<2>(),
275 context);
276
277 openfpm::scan((int *)p_ids.template getDeviceBuffer<3>(),
278 s_ids.size(),
279 (int *)s_ids.template getDeviceBuffer<3>(),
280 context);
281
282 openfpm::scan((int *)p_ids.template getDeviceBuffer<4>(),
283 s_ids.size(),
284 (int *)s_ids.template getDeviceBuffer<4>(),
285 context);
286
287 s_ids.template deviceToHost<0,1,2,3,4>();
288 p_ids.template deviceToHost<0,1,2,3,4>();
289
290 size_t copy_old_size = s_ids.template get<3>(s_ids.size()-1) + p_ids.template get<3>(p_ids.size()-1);
291 size_t seg_old_size = s_ids.template get<1>(s_ids.size()-1) + p_ids.template get<1>(p_ids.size()-1);
292 size_t out_map_size = s_ids.template get<1>(s_ids.size()-1) + p_ids.template get<1>(p_ids.size()-1);
293
294 segments_oldData.resize(seg_old_size);
295 outputMap.resize(out_map_size);
296 copy_old_src.resize(copy_old_size);
297 copy_old_dst.resize(copy_old_size);
298
299 CUDA_LAUNCH(BlockMapGpuKernels::maps_create,ite,s_ids.toKernel(),p_ids.toKernel(),segments_oldData.toKernel(),outputMap.toKernel(),copy_old_dst.toKernel(),copy_old_src.toKernel());
300
301 segments_oldData.template deviceToHost<0>();
302 outputMap.template deviceToHost<0>();
303 copy_old_dst.template deviceToHost<0>();
304 copy_old_src.template deviceToHost<0>();
305
306 BOOST_REQUIRE_EQUAL(seg_old_size,7);
307 BOOST_REQUIRE_EQUAL(out_map_size,7);
308 BOOST_REQUIRE_EQUAL(copy_old_size,8);
309
310 BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(0),-1);
311 BOOST_REQUIRE_EQUAL(outputMap.template get<0>(0),0);
312 BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(1),-1);
313 BOOST_REQUIRE_EQUAL(outputMap.template get<0>(1),1);
314 BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(2),-1);
315 BOOST_REQUIRE_EQUAL(outputMap.template get<0>(2),8);
316 BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(3),6);
317 BOOST_REQUIRE_EQUAL(outputMap.template get<0>(3),9);
318 BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(4),-1);
319 BOOST_REQUIRE_EQUAL(outputMap.template get<0>(4),12);
320 BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(5),-1);
321 BOOST_REQUIRE_EQUAL(outputMap.template get<0>(5),13);
322 BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(6),-1);
323 BOOST_REQUIRE_EQUAL(outputMap.template get<0>(6),14);
324
325 BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(0),2);
326 BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(1),3);
327 BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(2),4);
328 BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(3),5);
329 BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(4),6);
330 BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(5),7);
331 BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(6),10);
332 BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(7),11);
333
334 BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(0),0);
335 BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(1),1);
336 BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(2),2);
337 BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(3),3);
338 BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(4),4);
339 BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(5),5);
340 BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(6),7);
341 BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(7),8);
342}
343
344BOOST_AUTO_TEST_CASE (testSolve_conflicts)
345{
346 typedef float ScalarT;
347 typedef DataBlock<ScalarT, 64> BlockT;
348 typedef DataBlock<unsigned char, 64> MaskBlockT;
349 const unsigned int p=0, pMask=1, pInd=0;
350 openfpm::vector_gpu<aggregate<unsigned int>> keys, mergeIndices, tmpIndices, keysOut, trivial_map;
352 openfpm::vector_gpu<aggregate<BlockT, MaskBlockT>> dataOld, dataNew, tmpData, dataOut;
354
355 // Keys
356 keys.resize(14);
357 keys.template get<pInd>(0) = 0;
358 keys.template get<pInd>(1) = 1;
359 keys.template get<pInd>(2) = 2;
360 keys.template get<pInd>(3) = 4;
361 keys.template get<pInd>(4) = 4;
362 keys.template get<pInd>(5) = 5;
363 keys.template get<pInd>(6) = 6;
364 keys.template get<pInd>(7) = 7;
365 keys.template get<pInd>(8) = 8;
366 keys.template get<pInd>(9) = 9;
367 keys.template get<pInd>(10) = 10;
368 keys.template get<pInd>(11) = 11;
369 keys.template get<pInd>(12) = 13;
370 keys.template get<pInd>(13) = 13;
371
372 // Merge Indices
373 mergeIndices.resize(14);
374 mergeIndices.template get<pInd>(0) = 5; // 0
375 mergeIndices.template get<pInd>(1) = 0;
376 mergeIndices.template get<pInd>(2) = 6; // 1
377 mergeIndices.template get<pInd>(3) = 1;
378 mergeIndices.template get<pInd>(4) = 7; // 2
379 mergeIndices.template get<pInd>(5) = 8; // 3
380 mergeIndices.template get<pInd>(6) = 9; // 4
381 mergeIndices.template get<pInd>(7) = 10; // 5
382 mergeIndices.template get<pInd>(8) = 2;
383 mergeIndices.template get<pInd>(9) = 11; // 6
384 mergeIndices.template get<pInd>(10) = 3;
385 mergeIndices.template get<pInd>(11) = 12; // 7
386 mergeIndices.template get<pInd>(12) = 13; // 8
387 mergeIndices.template get<pInd>(13) = 14; // 9
388
389 // segments new
390 segments_new.resize(10);
391 segments_new.template get<0>(0) = 0; // 5
392 segments_new.template get<0>(1) = 1; // 6
393 segments_new.template get<0>(2) = 2; // 7
394 segments_new.template get<0>(3) = 3; // 8
395 segments_new.template get<0>(4) = 4; // 9
396 segments_new.template get<0>(5) = 5; // 10
397 segments_new.template get<0>(6) = 6; // 11
398 segments_new.template get<0>(7) = 7; // 12
399 segments_new.template get<0>(8) = 8; // 13,14
400 segments_new.template get<0>(9) = 10;
401
402 // Fill the data
403 { // We want i to live in a confined namespace
404 int i = 0;
405 dataOld.resize(5);
406 for (; i < dataOld.size(); ++i)
407 {
408 for (int j = 0; j < BlockT::size; ++j)
409 {
410 dataOld.template get<p>(i)[j] = i;
411 dataOld.template get<pMask>(i)[j] = 1;
412 };
413 dataOld.template get<p>(i)[0] = 1;
414 }
415 dataNew.resize(10);
416 for (; i < dataOld.size() + dataNew.size(); ++i)
417 {
418 int ii = i - dataOld.size();
419 for (int j = 0; j < BlockT::size; ++j)
420 {
421 dataNew.template get<p>(ii)[j] = i;
422 dataNew.template get<pMask>(ii)[j] = 1;
423 };
424 dataNew.template get<p>(ii)[0] = 1;
425 }
426 }
427
429
430 trivial_map.resize(dataNew.size()+1);
431
432 for (size_t i = 0 ; i < trivial_map.size(); i++)
433 {
434 trivial_map.template get<0>(i) = i;
435 }
436
437 trivial_map.hostToDevice<0>();
438
439 // Copy to device
440 keys.hostToDevice<pInd>();
441 mergeIndices.hostToDevice<pInd>();
442 dataOld.hostToDevice<p, pMask>();
443 dataNew.hostToDevice<p, pMask>();
444 segments_new.hostToDevice<0>();
445
446 BlockMapGpuFunctors::BlockFunctor<128> obj;
447
448 // Now perform the compaction
449
450 obj.solve_conflicts<0,
451 decltype(keys),
452 decltype(segments_new),
453 decltype(dataOld),
455 >(
456 keys, mergeIndices, segments_new, trivial_map,
457 dataOld, dataNew,
458 keysOut, dataOut,
459 ctx
460 );
461
462 // Now retrieve the dataDst vector
463 keysOut.deviceToHost<pInd>();
464 dataOut.deviceToHost<p, pMask>();
465
466 // Validation
467 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(0)[0], 1);
468 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(1)[0], 1);
469 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(2)[0], 1);
470 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(3)[0], 2);
471 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(4)[0], 1);
472 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(5)[0], 1);
473 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(6)[0], 1);
474 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(7)[0], 1);
475 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(8)[0], 1);
476 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(9)[0], 1);
477 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(10)[0], 1);
478 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(11)[0], 2);
479
480 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(0)[1], 5);
481 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(1)[1], 0);
482 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(2)[1], 6);
483 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(3)[1], 8);
484 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(4)[1], 8);
485 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(5)[1], 9);
486 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(6)[1], 10);
487 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(7)[1], 2);
488 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(8)[1], 11);
489 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(9)[1], 3);
490 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(10)[1], 12);
491 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(11)[1], 27);
492}
493
494BOOST_AUTO_TEST_SUITE_END() //SparseGridGpu_functors_tests
495
Implementation of 1-D std::vector like structure.
size_t size()
Stub size.