5#define BOOST_TEST_DYN_LINK
7#include <boost/test/unit_test.hpp>
8#include "SparseGridGpu/BlockMapGpu.hpp"
9#include "SparseGridGpu/BlockMapGpu_ker.cuh"
10#include "SparseGridGpu/BlockMapGpu_kernels.cuh"
11#include "SparseGridGpu/DataBlock.cuh"
12#include "Vector/cuda/map_vector_sparse_cuda_kernels.cuh"
14BOOST_AUTO_TEST_SUITE(BlockMapGpu_kernels_tests)
16BOOST_AUTO_TEST_CASE(testSegreduce_total)
18 typedef float ScalarT;
28 segments.template get<0>(0) = 0;
29 segments.template get<0>(1) = 4;
30 segments.template get<0>(2) = 5;
31 segments.template get<0>(3) = 7;
32 segments.template get<0>(4) = 8;
33 segments.template get<0>(5) = 11;
34 segments.template get<0>(6) = 17;
35 segments.template get<0>(7) = 18;
39 segments_oldData.resize(8);
40 segments_oldData.template get<0>(0) = -1;
41 segments_oldData.template get<0>(1) = 2;
42 segments_oldData.template get<0>(2) = -1;
43 segments_oldData.template get<0>(3) = 5;
44 segments_oldData.template get<0>(4) = 7;
45 segments_oldData.template get<0>(5) = -1;
46 segments_oldData.template get<0>(6) = -1;
47 segments_oldData.template get<0>(7) = -1;
52 segment_dataMap.resize(19);
53 segment_dataMap.template get<0>(0) = 10;
54 segment_dataMap.template get<0>(1) = 1;
55 segment_dataMap.template get<0>(2) = 50;
56 segment_dataMap.template get<0>(3) = 11;
57 segment_dataMap.template get<0>(4) = 13;
58 segment_dataMap.template get<0>(5) = 87;
59 segment_dataMap.template get<0>(6) = 54;
60 segment_dataMap.template get<0>(7) = 33;
61 segment_dataMap.template get<0>(8) = 22;
62 segment_dataMap.template get<0>(9) = 17;
63 segment_dataMap.template get<0>(10) = 40;
64 segment_dataMap.template get<0>(11) = 32;
65 segment_dataMap.template get<0>(12) = 80;
66 segment_dataMap.template get<0>(13) = 52;
67 segment_dataMap.template get<0>(14) = 21;
68 segment_dataMap.template get<0>(15) = 76;
69 segment_dataMap.template get<0>(16) = 65;
70 segment_dataMap.template get<0>(17) = 54;
71 segment_dataMap.template get<0>(18) = 3;
74 outputMap.template get<0>(0) = 9;
75 outputMap.template get<0>(1) = 11;
76 outputMap.template get<0>(2) = 13;
77 outputMap.template get<0>(3) = 34;
78 outputMap.template get<0>(4) = 23;
79 outputMap.template get<0>(5) = 90;
80 outputMap.template get<0>(6) = 21;
82 segments.template hostToDevice<0>();
83 segment_dataMap.hostToDevice<0>();
84 segments_oldData.hostToDevice<0>();
85 outputMap.hostToDevice<0>();
87 const unsigned int BITMASK = 0, BLOCK = 1;
92 for (
int i = 0; i < 32; ++i)
96 block_old[i] = 100 + i + 1;
99 for (
int i = 32; i < 64; ++i)
111 data_new.resize(100);
112 data_old.resize(100);
113 for (
int i = 0; i < 100; ++i)
115 data_new.template get<BITMASK>(i) = mask;
116 data_new.template get<BLOCK>(i) = block;
117 if (i < data_old.
size())
119 data_old.template get<BITMASK>(i) = mask_old;
120 data_old.template get<BLOCK>(i) = block_old;
124 data_new.template hostToDevice<BITMASK, BLOCK>();
125 data_old.template hostToDevice<BITMASK, BLOCK>();
129 outputData.resize(100);
131 CUDA_LAUNCH_DIM3((BlockMapGpuKernels::segreduce_total<BLOCK, 0, BITMASK, 2,
gpu::plus_t<ScalarT>>),segments.
size()-1, 2*BlockT::size,
135 segment_dataMap.toKernel(),
136 segments_oldData.toKernel(),
137 outputMap.toKernel(),
138 outputData.toKernel());
145 segment_dataMap.toKernel(),
146 segments_oldData.toKernel(),
147 outputMap.toKernel(),
148 outputData.toKernel());
152 outputData.template deviceToHost<BITMASK, BLOCK>();
156 for (
int j = 0 ; j < outputMap.
size() ; j++)
158 int out_id = outputMap.template get<0>(j);
159 int seg_mult = segments.template get<0>(j+1) - segments.template get<0>(j);
161 BlockT outBlock = outputData.template get<BLOCK>(out_id);
162 MaskBlockT outMask = outputData.template get<BITMASK>(out_id);
164 if (segments_oldData.template get<0>(j) != -1)
166 for (
int i = 0; i < BlockT::size / 2; ++i)
168 BOOST_REQUIRE_EQUAL(outMask[i],1);
169 BOOST_REQUIRE_EQUAL(outBlock[i],100 + (i+1)*(seg_mult+1));
171 BOOST_REQUIRE_EQUAL(outMask[33],1);
172 BOOST_REQUIRE_EQUAL(outBlock[33],665);
176 for (
int i = 0; i < BlockT::size / 2; ++i)
178 BOOST_REQUIRE_EQUAL(outMask[i],1);
179 BOOST_REQUIRE_EQUAL(outBlock[i],(i+1)*seg_mult);
186BOOST_AUTO_TEST_SUITE_END()
188BOOST_AUTO_TEST_SUITE(BlockMapGpu_functors_tests)
191BOOST_AUTO_TEST_CASE(test_maps_create)
204 merge_keys.resize(16);
205 merge_keys.template get<0>(0) = 22;
206 merge_keys.template get<0>(1) = 23;
207 merge_keys.template get<0>(2) = 33;
208 merge_keys.template get<0>(3) = 34;
209 merge_keys.template get<0>(4) = 36;
210 merge_keys.template get<0>(5) = 37;
211 merge_keys.template get<0>(6) = 43;
212 merge_keys.template get<0>(7) = 45;
213 merge_keys.template get<0>(8) = 46;
214 merge_keys.template get<0>(9) = 56;
215 merge_keys.template get<0>(10) = 56;
216 merge_keys.template get<0>(11) = 60;
217 merge_keys.template get<0>(12) = 61;
218 merge_keys.template get<0>(13) = 63;
219 merge_keys.template get<0>(14) = 64;
220 merge_keys.template get<0>(15) = 65;
225 merge_indexes.resize(16);
226 merge_indexes.template get<0>(0) = 9;
227 merge_indexes.template get<0>(1) = 10;
228 merge_indexes.template get<0>(2) = 0;
229 merge_indexes.template get<0>(3) = 1;
230 merge_indexes.template get<0>(4) = 2;
231 merge_indexes.template get<0>(5) = 3;
232 merge_indexes.template get<0>(6) = 4;
233 merge_indexes.template get<0>(7) = 5;
234 merge_indexes.template get<0>(8) = 11;
235 merge_indexes.template get<0>(9) = 6;
236 merge_indexes.template get<0>(10) = 12;
237 merge_indexes.template get<0>(11) = 7;
238 merge_indexes.template get<0>(12) = 8;
239 merge_indexes.template get<0>(13) = 13;
240 merge_indexes.template get<0>(14) = 14;
241 merge_indexes.template get<0>(15) = 15;
243 merge_keys.template hostToDevice<0>();
244 merge_indexes.template hostToDevice<0>();
250 p_ids.template get<0>(p_ids.
size()-1) = 0;
251 p_ids.template get<1>(p_ids.
size()-1) = 0;
252 p_ids.template get<2>(p_ids.
size()-1) = 0;
253 p_ids.template get<3>(p_ids.
size()-1) = 0;
255 p_ids.template hostToDevice<0,1,2,3>(p_ids.
size()-1,p_ids.
size()-1);
257 auto ite = merge_indexes.getGPUIterator();
259 CUDA_LAUNCH(BlockMapGpuKernels::compute_predicate,ite,merge_keys.toKernel(),merge_indexes.toKernel(),9,p_ids.toKernel());
262 openfpm::scan((
int *)p_ids.template getDeviceBuffer<0>(),
264 (
int *)s_ids.template getDeviceBuffer<0>(),
267 openfpm::scan((
int *)p_ids.template getDeviceBuffer<1>(),
269 (
int *)s_ids.template getDeviceBuffer<1>(),
272 openfpm::scan((
int *)p_ids.template getDeviceBuffer<2>(),
274 (
int *)s_ids.template getDeviceBuffer<2>(),
277 openfpm::scan((
int *)p_ids.template getDeviceBuffer<3>(),
279 (
int *)s_ids.template getDeviceBuffer<3>(),
282 openfpm::scan((
int *)p_ids.template getDeviceBuffer<4>(),
284 (
int *)s_ids.template getDeviceBuffer<4>(),
287 s_ids.template deviceToHost<0,1,2,3,4>();
288 p_ids.template deviceToHost<0,1,2,3,4>();
290 size_t copy_old_size = s_ids.template get<3>(s_ids.
size()-1) + p_ids.template get<3>(p_ids.
size()-1);
291 size_t seg_old_size = s_ids.template get<1>(s_ids.
size()-1) + p_ids.template get<1>(p_ids.
size()-1);
292 size_t out_map_size = s_ids.template get<1>(s_ids.
size()-1) + p_ids.template get<1>(p_ids.
size()-1);
294 segments_oldData.resize(seg_old_size);
295 outputMap.resize(out_map_size);
296 copy_old_src.resize(copy_old_size);
297 copy_old_dst.resize(copy_old_size);
299 CUDA_LAUNCH(BlockMapGpuKernels::maps_create,ite,s_ids.toKernel(),p_ids.toKernel(),segments_oldData.toKernel(),outputMap.toKernel(),copy_old_dst.toKernel(),copy_old_src.toKernel());
301 segments_oldData.template deviceToHost<0>();
302 outputMap.template deviceToHost<0>();
303 copy_old_dst.template deviceToHost<0>();
304 copy_old_src.template deviceToHost<0>();
306 BOOST_REQUIRE_EQUAL(seg_old_size,7);
307 BOOST_REQUIRE_EQUAL(out_map_size,7);
308 BOOST_REQUIRE_EQUAL(copy_old_size,8);
310 BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(0),-1);
311 BOOST_REQUIRE_EQUAL(outputMap.template get<0>(0),0);
312 BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(1),-1);
313 BOOST_REQUIRE_EQUAL(outputMap.template get<0>(1),1);
314 BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(2),-1);
315 BOOST_REQUIRE_EQUAL(outputMap.template get<0>(2),8);
316 BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(3),6);
317 BOOST_REQUIRE_EQUAL(outputMap.template get<0>(3),9);
318 BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(4),-1);
319 BOOST_REQUIRE_EQUAL(outputMap.template get<0>(4),12);
320 BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(5),-1);
321 BOOST_REQUIRE_EQUAL(outputMap.template get<0>(5),13);
322 BOOST_REQUIRE_EQUAL(segments_oldData.template get<0>(6),-1);
323 BOOST_REQUIRE_EQUAL(outputMap.template get<0>(6),14);
325 BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(0),2);
326 BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(1),3);
327 BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(2),4);
328 BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(3),5);
329 BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(4),6);
330 BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(5),7);
331 BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(6),10);
332 BOOST_REQUIRE_EQUAL(copy_old_dst.template get<0>(7),11);
334 BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(0),0);
335 BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(1),1);
336 BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(2),2);
337 BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(3),3);
338 BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(4),4);
339 BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(5),5);
340 BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(6),7);
341 BOOST_REQUIRE_EQUAL(copy_old_src.template get<0>(7),8);
344BOOST_AUTO_TEST_CASE (testSolve_conflicts)
346 typedef float ScalarT;
349 const unsigned int p=0, pMask=1, pInd=0;
357 keys.template get<pInd>(0) = 0;
358 keys.template get<pInd>(1) = 1;
359 keys.template get<pInd>(2) = 2;
360 keys.template get<pInd>(3) = 4;
361 keys.template get<pInd>(4) = 4;
362 keys.template get<pInd>(5) = 5;
363 keys.template get<pInd>(6) = 6;
364 keys.template get<pInd>(7) = 7;
365 keys.template get<pInd>(8) = 8;
366 keys.template get<pInd>(9) = 9;
367 keys.template get<pInd>(10) = 10;
368 keys.template get<pInd>(11) = 11;
369 keys.template get<pInd>(12) = 13;
370 keys.template get<pInd>(13) = 13;
373 mergeIndices.resize(14);
374 mergeIndices.template get<pInd>(0) = 5;
375 mergeIndices.template get<pInd>(1) = 0;
376 mergeIndices.template get<pInd>(2) = 6;
377 mergeIndices.template get<pInd>(3) = 1;
378 mergeIndices.template get<pInd>(4) = 7;
379 mergeIndices.template get<pInd>(5) = 8;
380 mergeIndices.template get<pInd>(6) = 9;
381 mergeIndices.template get<pInd>(7) = 10;
382 mergeIndices.template get<pInd>(8) = 2;
383 mergeIndices.template get<pInd>(9) = 11;
384 mergeIndices.template get<pInd>(10) = 3;
385 mergeIndices.template get<pInd>(11) = 12;
386 mergeIndices.template get<pInd>(12) = 13;
387 mergeIndices.template get<pInd>(13) = 14;
390 segments_new.resize(10);
391 segments_new.template get<0>(0) = 0;
392 segments_new.template get<0>(1) = 1;
393 segments_new.template get<0>(2) = 2;
394 segments_new.template get<0>(3) = 3;
395 segments_new.template get<0>(4) = 4;
396 segments_new.template get<0>(5) = 5;
397 segments_new.template get<0>(6) = 6;
398 segments_new.template get<0>(7) = 7;
399 segments_new.template get<0>(8) = 8;
400 segments_new.template get<0>(9) = 10;
406 for (; i < dataOld.
size(); ++i)
408 for (
int j = 0; j < BlockT::size; ++j)
410 dataOld.template get<p>(i)[j] = i;
411 dataOld.template get<pMask>(i)[j] = 1;
413 dataOld.template get<p>(i)[0] = 1;
416 for (; i < dataOld.
size() + dataNew.
size(); ++i)
418 int ii = i - dataOld.
size();
419 for (
int j = 0; j < BlockT::size; ++j)
421 dataNew.template get<p>(ii)[j] = i;
422 dataNew.template get<pMask>(ii)[j] = 1;
424 dataNew.template get<p>(ii)[0] = 1;
430 trivial_map.resize(dataNew.
size()+1);
432 for (
size_t i = 0 ; i < trivial_map.
size(); i++)
434 trivial_map.template get<0>(i) = i;
437 trivial_map.hostToDevice<0>();
440 keys.hostToDevice<pInd>();
441 mergeIndices.hostToDevice<pInd>();
442 dataOld.hostToDevice<p, pMask>();
443 dataNew.hostToDevice<p, pMask>();
444 segments_new.hostToDevice<0>();
446 BlockMapGpuFunctors::BlockFunctor<128> obj;
450 obj.solve_conflicts<0,
452 decltype(segments_new),
456 keys, mergeIndices, segments_new, trivial_map,
463 keysOut.deviceToHost<pInd>();
464 dataOut.deviceToHost<p, pMask>();
467 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(0)[0], 1);
468 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(1)[0], 1);
469 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(2)[0], 1);
470 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(3)[0], 2);
471 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(4)[0], 1);
472 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(5)[0], 1);
473 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(6)[0], 1);
474 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(7)[0], 1);
475 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(8)[0], 1);
476 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(9)[0], 1);
477 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(10)[0], 1);
478 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(11)[0], 2);
480 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(0)[1], 5);
481 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(1)[1], 0);
482 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(2)[1], 6);
483 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(3)[1], 8);
484 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(4)[1], 8);
485 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(5)[1], 9);
486 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(6)[1], 10);
487 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(7)[1], 2);
488 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(8)[1], 11);
489 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(9)[1], 3);
490 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(10)[1], 12);
491 BOOST_REQUIRE_EQUAL(dataOut.template get<p>(11)[1], 27);
494BOOST_AUTO_TEST_SUITE_END()
Implementation of 1-D std::vector like structure.