1 #define BOOST_TEST_DYN_LINK
5 #include <boost/test/unit_test.hpp>
6 #include "VCluster/VCluster.hpp"
7 #include "Vector/map_vector.hpp"
8 #include "Vector/cuda/vector_dist_cuda_funcs.cuh"
9 #include "Vector/util/vector_dist_funcs.hpp"
10 #include "Decomposition/CartDecomposition.hpp"
12 #include "Vector/vector_dist.hpp"
13 #include "util/cuda/scan_ofp.cuh"
15 #define SUB_UNIT_FACTOR 1024
17 BOOST_AUTO_TEST_SUITE( vector_dist_gpu_util_func_test )
19 BOOST_AUTO_TEST_CASE( vector_ghost_process_local_particles )
31 for (
size_t i = 0 ; i < vPrp.
size() ; i++)
33 vPos.template get<0>(i)[0] = (float)rand()/(float)RAND_MAX;
34 vPos.template get<0>(i)[1] = (float)rand()/(float)RAND_MAX;
35 vPos.template get<0>(i)[2] = (float)rand()/(float)RAND_MAX;
37 vPrp.template get<0>(i) = i+12345;
39 vPrp.template get<1>(i)[0] = i;
40 vPrp.template get<1>(i)[1] = i+20000;
41 vPrp.template get<1>(i)[2] = i+50000;
43 vPrp.template get<2>(i)[0][0] = i+60000;
44 vPrp.template get<2>(i)[0][1] = i+70000;
45 vPrp.template get<2>(i)[0][2] = i+80000;
46 vPrp.template get<2>(i)[1][0] = i+90000;
47 vPrp.template get<2>(i)[1][1] = i+100000;
48 vPrp.template get<2>(i)[1][2] = i+110000;
49 vPrp.template get<2>(i)[2][0] = i+120000;
50 vPrp.template get<2>(i)[2][1] = i+130000;
51 vPrp.template get<2>(i)[2][1] = i+140000;
52 vPrp.template get<2>(i)[2][2] = i+150000;
61 box_f_dev.template get<0>(0)[0] = 0.0;
62 box_f_dev.template get<0>(0)[1] = 0.0;
63 box_f_dev.template get<0>(0)[2] = 0.0;
64 box_f_dev.template get<1>(0)[0] = 0.5;
65 box_f_dev.template get<1>(0)[1] = 1.0;
66 box_f_dev.template get<1>(0)[2] = 1.0;
67 box_f_sv.template get<0>(0) = 0;
69 box_f_dev.template get<0>(1)[0] = 0.0;
70 box_f_dev.template get<0>(1)[1] = 0.0;
71 box_f_dev.template get<0>(1)[2] = 0.0;
72 box_f_dev.template get<1>(1)[0] = 0.3;
73 box_f_dev.template get<1>(1)[1] = 1.0;
74 box_f_dev.template get<1>(1)[2] = 1.0;
75 box_f_sv.template get<0>(1) = 1;
77 box_f_dev.template get<0>(2)[0] = 0.0;
78 box_f_dev.template get<0>(2)[1] = 0.0;
79 box_f_dev.template get<0>(2)[2] = 0.0;
80 box_f_dev.template get<1>(2)[0] = 0.2;
81 box_f_dev.template get<1>(2)[1] = 1.0;
82 box_f_dev.template get<1>(2)[2] = 1.0;
83 box_f_sv.template get<0>(2) = 2;
85 box_f_dev.template get<0>(3)[0] = 0.0;
86 box_f_dev.template get<0>(3)[1] = 0.0;
87 box_f_dev.template get<0>(3)[2] = 0.0;
88 box_f_dev.template get<1>(3)[0] = 0.1;
89 box_f_dev.template get<1>(3)[1] = 1.0;
90 box_f_dev.template get<1>(3)[2] = 1.0;
91 box_f_sv.template get<0>(3) = 3;
94 auto ite = vPos.getGPUIteratorTo(vPos.
size());
96 o_part_loc.resize(vPos.
size()+1);
97 o_part_loc.template get<0>(o_part_loc.
size()-1) = 0;
98 o_part_loc.template hostToDevice<0>(o_part_loc.
size()-1,o_part_loc.
size()-1);
100 box_f_dev.hostToDevice<0,1>();
101 box_f_sv.hostToDevice<0>();
102 vPos.hostToDevice<0>();
103 vPrp.hostToDevice<0,1,2>();
106 CUDA_LAUNCH_DIM3((num_shift_ghost_each_part<3,
float,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),decltype(vPos.toKernel()),decltype(o_part_loc.toKernel())>),
108 box_f_dev.toKernel(),box_f_sv.toKernel(),vPos.toKernel(),o_part_loc.toKernel(),(
unsigned int)vPos.
size());
110 o_part_loc.deviceToHost<0>();
114 for (
size_t i = 0 ; i < vPos.
size() ; i++)
116 if (vPos.template get<0>(i)[0] >= 0.5)
117 {match &= o_part_loc.template get<0>(i) == 0;}
118 else if (vPos.template get<0>(i)[0] >= 0.3)
119 {match &= o_part_loc.template get<0>(i) == 1;}
120 else if (vPos.template get<0>(i)[0] >= 0.2)
121 {match &= o_part_loc.template get<0>(i) == 2;}
122 else if (vPos.template get<0>(i)[0] >= 0.1)
123 {match &= o_part_loc.template get<0>(i) == 3;}
125 {match &= o_part_loc.template get<0>(i) == 4;}
128 BOOST_REQUIRE_EQUAL(match,
true);
131 starts.resize(o_part_loc.
size());
133 auto & v_cl = create_vcluster();
134 openfpm::scan((
unsigned int *)o_part_loc.template getDeviceBuffer<0>(), o_part_loc.
size(), (
unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getGpuContext());
136 starts.deviceToHost<0>(starts.
size()-1,starts.
size()-1);
137 size_t tot = starts.template get<0>(o_part_loc.
size()-1);
143 shifts.template get<0>(0)[0] = 10.0;
144 shifts.template get<0>(0)[1] = 0.0;
145 shifts.template get<0>(0)[2] = 0.0;
147 shifts.template get<0>(1)[0] = 20.0;
148 shifts.template get<0>(1)[1] = 0.0;
149 shifts.template get<0>(1)[2] = 0.0;
151 shifts.template get<0>(2)[0] = 30.0;
152 shifts.template get<0>(2)[1] = 0.0;
153 shifts.template get<0>(2)[2] = 0.0;
155 shifts.template get<0>(3)[0] = 40.0;
156 shifts.template get<0>(3)[1] = 0.0;
157 shifts.template get<0>(3)[2] = 0.0;
159 size_t old = vPos.
size();
160 vPos.resize(vPos.
size() + tot);
161 vPrp.resize(vPrp.
size() + tot);
163 shifts.template hostToDevice<0>();
165 o_part_loc2.resize(tot);
167 CUDA_LAUNCH_DIM3((shift_ghost_each_part<3,
float,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),
168 decltype(vPos.toKernel()),decltype(vPrp.toKernel()),
169 decltype(starts.toKernel()),decltype(shifts.toKernel()),
170 decltype(o_part_loc2.toKernel())>),
172 box_f_dev.toKernel(),box_f_sv.toKernel(),
173 vPos.toKernel(),vPrp.toKernel(),
174 starts.toKernel(),shifts.toKernel(),o_part_loc2.toKernel(),(
unsigned int)old,(
unsigned int)old);
176 vPos.deviceToHost<0>();
177 o_part_loc2.deviceToHost<0,1>();
178 vPrp.deviceToHost<0,1,2>();
182 for (
size_t i = 0 ; i < old ; i++)
184 if (vPos.template get<0>(i)[0] >= 0.5)
186 else if (vPos.template get<0>(i)[0] >= 0.3)
188 for (
size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
190 match &= vPos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
191 match &= vPos.template get<0>(base)[0] >= -(j+1.0)*10.0;
193 match &= o_part_loc2.template get<0>(base_o) == i;
194 match &= o_part_loc2.template get<1>(base_o) == j;
198 match &= vPrp.template get<0>(base) == vPrp.template get<0>(i);
200 match &= vPrp.template get<1>(base)[0] == vPrp.template get<1>(i)[0];
201 match &= vPrp.template get<1>(base)[1] == vPrp.template get<1>(i)[1];
202 match &= vPrp.template get<1>(base)[2] == vPrp.template get<1>(i)[2];
204 match &= vPrp.template get<2>(base)[0][0] == vPrp.template get<2>(i)[0][0];
205 match &= vPrp.template get<2>(base)[0][1] == vPrp.template get<2>(i)[0][1];
206 match &= vPrp.template get<2>(base)[0][2] == vPrp.template get<2>(i)[0][2];
207 match &= vPrp.template get<2>(base)[1][0] == vPrp.template get<2>(i)[1][0];
208 match &= vPrp.template get<2>(base)[1][1] == vPrp.template get<2>(i)[1][1];
209 match &= vPrp.template get<2>(base)[1][2] == vPrp.template get<2>(i)[1][2];
210 match &= vPrp.template get<2>(base)[2][0] == vPrp.template get<2>(i)[2][0];
211 match &= vPrp.template get<2>(base)[2][1] == vPrp.template get<2>(i)[2][1];
212 match &= vPrp.template get<2>(base)[2][2] == vPrp.template get<2>(i)[2][2];
218 else if (vPos.template get<0>(i)[0] >= 0.2)
220 for (
size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
222 match &= vPos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
223 match &= vPos.template get<0>(base)[0] >= -(j+1.0)*10.0;
225 match &= o_part_loc2.template get<0>(base_o) == i;
226 match &= o_part_loc2.template get<1>(base_o) == j;
230 match &= vPrp.template get<0>(base) == vPrp.template get<0>(i);
232 match &= vPrp.template get<1>(base)[0] == vPrp.template get<1>(i)[0];
233 match &= vPrp.template get<1>(base)[1] == vPrp.template get<1>(i)[1];
234 match &= vPrp.template get<1>(base)[2] == vPrp.template get<1>(i)[2];
236 match &= vPrp.template get<2>(base)[0][0] == vPrp.template get<2>(i)[0][0];
237 match &= vPrp.template get<2>(base)[0][1] == vPrp.template get<2>(i)[0][1];
238 match &= vPrp.template get<2>(base)[0][2] == vPrp.template get<2>(i)[0][2];
239 match &= vPrp.template get<2>(base)[1][0] == vPrp.template get<2>(i)[1][0];
240 match &= vPrp.template get<2>(base)[1][1] == vPrp.template get<2>(i)[1][1];
241 match &= vPrp.template get<2>(base)[1][2] == vPrp.template get<2>(i)[1][2];
242 match &= vPrp.template get<2>(base)[2][0] == vPrp.template get<2>(i)[2][0];
243 match &= vPrp.template get<2>(base)[2][1] == vPrp.template get<2>(i)[2][1];
244 match &= vPrp.template get<2>(base)[2][2] == vPrp.template get<2>(i)[2][2];
251 else if (vPos.template get<0>(i)[0] >= 0.1)
253 for (
size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
255 match &= vPos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
256 match &= vPos.template get<0>(base)[0] >= -(j+1.0)*10.0;
258 match &= o_part_loc2.template get<0>(base_o) == i;
259 match &= o_part_loc2.template get<1>(base_o) == j;
263 match &= vPrp.template get<0>(base) == vPrp.template get<0>(i);
265 match &= vPrp.template get<1>(base)[0] == vPrp.template get<1>(i)[0];
266 match &= vPrp.template get<1>(base)[1] == vPrp.template get<1>(i)[1];
267 match &= vPrp.template get<1>(base)[2] == vPrp.template get<1>(i)[2];
269 match &= vPrp.template get<2>(base)[0][0] == vPrp.template get<2>(i)[0][0];
270 match &= vPrp.template get<2>(base)[0][1] == vPrp.template get<2>(i)[0][1];
271 match &= vPrp.template get<2>(base)[0][2] == vPrp.template get<2>(i)[0][2];
272 match &= vPrp.template get<2>(base)[1][0] == vPrp.template get<2>(i)[1][0];
273 match &= vPrp.template get<2>(base)[1][1] == vPrp.template get<2>(i)[1][1];
274 match &= vPrp.template get<2>(base)[1][2] == vPrp.template get<2>(i)[1][2];
275 match &= vPrp.template get<2>(base)[2][0] == vPrp.template get<2>(i)[2][0];
276 match &= vPrp.template get<2>(base)[2][1] == vPrp.template get<2>(i)[2][1];
277 match &= vPrp.template get<2>(base)[2][2] == vPrp.template get<2>(i)[2][2];
285 for (
size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
287 match &= vPos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
288 match &= vPos.template get<0>(base)[0] >= -(j+1.0)*10.0;
290 match &= o_part_loc2.template get<0>(base_o) == i;
291 match &= o_part_loc2.template get<1>(base_o) == j;
295 match &= vPrp.template get<0>(base) == vPrp.template get<0>(i);
297 match &= vPrp.template get<1>(base)[0] == vPrp.template get<1>(i)[0];
298 match &= vPrp.template get<1>(base)[1] == vPrp.template get<1>(i)[1];
299 match &= vPrp.template get<1>(base)[2] == vPrp.template get<1>(i)[2];
301 match &= vPrp.template get<2>(base)[0][0] == vPrp.template get<2>(i)[0][0];
302 match &= vPrp.template get<2>(base)[0][1] == vPrp.template get<2>(i)[0][1];
303 match &= vPrp.template get<2>(base)[0][2] == vPrp.template get<2>(i)[0][2];
304 match &= vPrp.template get<2>(base)[1][0] == vPrp.template get<2>(i)[1][0];
305 match &= vPrp.template get<2>(base)[1][1] == vPrp.template get<2>(i)[1][1];
306 match &= vPrp.template get<2>(base)[1][2] == vPrp.template get<2>(i)[1][2];
307 match &= vPrp.template get<2>(base)[2][0] == vPrp.template get<2>(i)[2][0];
308 match &= vPrp.template get<2>(base)[2][1] == vPrp.template get<2>(i)[2][1];
309 match &= vPrp.template get<2>(base)[2][2] == vPrp.template get<2>(i)[2][2];
317 BOOST_REQUIRE_EQUAL(match,
true);
327 for (
size_t i = 0 ; i < old ; i++)
329 vPos2.template get<0>(i)[0] = vPos.template get<0>(i)[0];
330 vPos2.template get<0>(i)[1] = vPos.template get<0>(i)[1];
331 vPos2.template get<0>(i)[2] = vPos.template get<0>(i)[2];
333 vPrp2.template get<0>(i) = vPrp.template get<0>(i);
335 vPrp2.template get<1>(i)[0] = vPrp.template get<1>(i)[0];
336 vPrp2.template get<1>(i)[1] = vPrp.template get<1>(i)[1];
337 vPrp2.template get<1>(i)[2] = vPrp.template get<1>(i)[2];
339 vPrp2.template get<2>(i)[0][0] = vPrp.template get<2>(i)[0][0];
340 vPrp2.template get<2>(i)[0][1] = vPrp.template get<2>(i)[0][1];
341 vPrp2.template get<2>(i)[0][2] = vPrp.template get<2>(i)[0][2];
342 vPrp2.template get<2>(i)[1][0] = vPrp.template get<2>(i)[1][0];
343 vPrp2.template get<2>(i)[1][1] = vPrp.template get<2>(i)[1][1];
344 vPrp2.template get<2>(i)[1][2] = vPrp.template get<2>(i)[1][2];
345 vPrp2.template get<2>(i)[2][0] = vPrp.template get<2>(i)[2][0];
346 vPrp2.template get<2>(i)[2][1] = vPrp.template get<2>(i)[2][1];
347 vPrp2.template get<2>(i)[2][2] = vPrp.template get<2>(i)[2][2];
350 vPos2.resize(vPos.
size());
351 vPrp2.resize(vPrp.
size());
353 vPos2.hostToDevice<0>();
354 vPrp2.hostToDevice<0,1,2>();
356 ite = o_part_loc2.getGPUIterator();
358 CUDA_LAUNCH_DIM3((process_ghost_particles_local<
true,3,decltype(o_part_loc2.toKernel()),decltype(vPos2.toKernel()),decltype(vPrp2.toKernel()),decltype(shifts.toKernel())>),
360 o_part_loc2.toKernel(),vPos2.toKernel(),vPrp2.toKernel(),shifts.toKernel(),(
unsigned int)old);
362 vPos2.template deviceToHost<0>();
363 vPrp2.template deviceToHost<0,1,2>();
365 for (
size_t i = old ; i < vPos.
size() ; i++)
367 match &= vPos.template get<0>(i)[0] == vPos2.template get<0>(i)[0];
368 match &= vPos.template get<0>(i)[1] == vPos2.template get<0>(i)[1];
369 match &= vPos.template get<0>(i)[2] == vPos2.template get<0>(i)[2];
371 match &= vPrp2.template get<0>(i) == vPrp.template get<0>(i);
373 match &= vPrp2.template get<1>(i)[0] == vPrp.template get<1>(i)[0];
374 match &= vPrp2.template get<1>(i)[1] == vPrp.template get<1>(i)[1];
375 match &= vPrp2.template get<1>(i)[2] == vPrp.template get<1>(i)[2];
377 match &= vPrp2.template get<2>(i)[0][0] == vPrp.template get<2>(i)[0][0];
378 match &= vPrp2.template get<2>(i)[0][1] == vPrp.template get<2>(i)[0][1];
379 match &= vPrp2.template get<2>(i)[0][2] == vPrp.template get<2>(i)[0][2];
380 match &= vPrp2.template get<2>(i)[1][0] == vPrp.template get<2>(i)[1][0];
381 match &= vPrp2.template get<2>(i)[1][1] == vPrp.template get<2>(i)[1][1];
382 match &= vPrp2.template get<2>(i)[1][2] == vPrp.template get<2>(i)[1][2];
383 match &= vPrp2.template get<2>(i)[2][0] == vPrp.template get<2>(i)[2][0];
384 match &= vPrp2.template get<2>(i)[2][1] == vPrp.template get<2>(i)[2][1];
385 match &= vPrp2.template get<2>(i)[2][2] == vPrp.template get<2>(i)[2][2];
388 BOOST_REQUIRE_EQUAL(match,
true);
391 BOOST_AUTO_TEST_CASE( vector_ghost_fill_send_buffer_test )
403 auto & v_cl = create_vcluster();
413 for (
size_t i = 0 ; i < vPrp.
size() ; i++)
415 vPrp.template get<0>(i) = i+12345;
417 vPrp.template get<1>(i)[0] = i;
418 vPrp.template get<1>(i)[1] = i+20000;
419 vPrp.template get<1>(i)[2] = i+50000;
421 vPrp.template get<2>(i)[0][0] = i+60000;
422 vPrp.template get<2>(i)[0][1] = i+70000;
423 vPrp.template get<2>(i)[0][2] = i+80000;
424 vPrp.template get<2>(i)[1][0] = i+90000;
425 vPrp.template get<2>(i)[1][1] = i+100000;
426 vPrp.template get<2>(i)[1][2] = i+110000;
427 vPrp.template get<2>(i)[2][0] = i+120000;
428 vPrp.template get<2>(i)[2][1] = i+130000;
429 vPrp.template get<2>(i)[2][2] = i+140000;
432 vPrp.hostToDevice<0,1,2>();
434 g_opart_device.resize(2*10000*3);
436 for (
size_t i = 0 ; i < 3 ; i++)
438 for (
size_t j = 0 ; j < 10000 ; j++)
440 g_opart_device.template get<0>(i*2*10000 + j*2) = i;
441 g_opart_device.template get<0>(i*2*10000 + j*2+1) = i;
443 g_opart_device.template get<1>(i*2*10000 + j*2) = j;
444 g_opart_device.template get<1>(i*2*10000 + j*2+1) = j;
446 g_opart_device.template get<2>(i*2*10000 + j*2) = 0;
447 g_opart_device.template get<2>(i*2*10000 + j*2+1) = 0;
451 g_opart_device.hostToDevice<0,1,2>();
453 g_send_prp.resize(3);
458 for (
size_t i = 0 ; i < 3 ; i++)
460 g_send_prp.get(i).resize(2*10000);
462 auto ite = g_send_prp.get(i).getGPUIterator();
464 CUDA_LAUNCH_DIM3((process_ghost_particles_prp<decltype(g_opart_device.toKernel()),decltype(g_send_prp.get(i).toKernel()),decltype(vPrp.toKernel()),0,1,2>),
466 g_opart_device.toKernel(), g_send_prp.get(i).toKernel(),
467 vPrp.toKernel(),(
unsigned int)offset);
469 offset += g_send_prp.get(i).
size();
473 g_send_prp.get(i).deviceToHost<0,1,2>();
475 for (
size_t j = 0 ; j < 10000 ; j++)
477 match &= g_send_prp.get(i).template get<0>(2*j) == j+12345;
479 match &= g_send_prp.get(i).template get<1>(2*j)[0] == j;
480 match &= g_send_prp.get(i).template get<1>(2*j)[1] == j+20000;
481 match &= g_send_prp.get(i).template get<1>(2*j)[2] == j+50000;
483 match &= g_send_prp.get(i).template get<2>(2*j)[0][0] == j+60000;
484 match &= g_send_prp.get(i).template get<2>(2*j)[0][1] == j+70000;
485 match &= g_send_prp.get(i).template get<2>(2*j)[0][2] == j+80000;
486 match &= g_send_prp.get(i).template get<2>(2*j)[1][0] == j+90000;
487 match &= g_send_prp.get(i).template get<2>(2*j)[1][1] == j+100000;
488 match &= g_send_prp.get(i).template get<2>(2*j)[1][2] == j+110000;
489 match &= g_send_prp.get(i).template get<2>(2*j)[2][0] == j+120000;
490 match &= g_send_prp.get(i).template get<2>(2*j)[2][1] == j+130000;
491 match &= g_send_prp.get(i).template get<2>(2*j)[2][2] == j+140000;
494 match = g_send_prp.get(i).template get<0>(2*j+1) == j+12345;
496 match = g_send_prp.get(i).template get<1>(2*j+1)[0] == j;
497 match = g_send_prp.get(i).template get<1>(2*j+1)[1] == j+20000;
498 match = g_send_prp.get(i).template get<1>(2*j+1)[2] == j+50000;
500 match = g_send_prp.get(i).template get<2>(2*j+1)[0][0] == j+60000;
501 match = g_send_prp.get(i).template get<2>(2*j+1)[0][1] == j+70000;
502 match = g_send_prp.get(i).template get<2>(2*j+1)[0][2] == j+80000;
503 match = g_send_prp.get(i).template get<2>(2*j+1)[1][0] == j+90000;
504 match = g_send_prp.get(i).template get<2>(2*j+1)[1][1] == j+100000;
505 match = g_send_prp.get(i).template get<2>(2*j+1)[1][2] == j+110000;
506 match = g_send_prp.get(i).template get<2>(2*j+1)[2][0] == j+120000;
507 match = g_send_prp.get(i).template get<2>(2*j+1)[2][1] == j+130000;
508 match = g_send_prp.get(i).template get<2>(2*j+1)[2][2] == j+140000;
512 BOOST_REQUIRE_EQUAL(match,
true);
515 BOOST_AUTO_TEST_CASE( decomposition_ie_ghost_gpu_test_use )
517 auto & v_cl = create_vcluster();
533 size_t n_sub = n_proc * SUB_UNIT_FACTOR;
536 for (
int i = 0; i < 3; i++)
537 { div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));}
543 size_t bc[] = { PERIODIC, PERIODIC, PERIODIC };
546 dec.setParameters(div,box,bc,g);
551 int nsub = dec.getNSubDomain();
552 int n_part = 10000 / nsub;
555 vg.resize(nsub*n_part);
557 for (
size_t k = 0 ; k < nsub ; k++)
561 for (
size_t j = 0 ; j < n_part ; j++)
563 vg.template get<0>(k*n_part+j)[0] = (sp.
getHigh(0) - sp.
getLow(0))*((
float)rand()/(float)RAND_MAX) + sp.
getLow(0);
564 vg.template get<0>(k*n_part+j)[1] = (sp.
getHigh(1) - sp.
getLow(1))*((
float)rand()/(float)RAND_MAX) + sp.
getLow(1);
565 vg.template get<0>(k*n_part+j)[2] = (sp.
getHigh(2) - sp.
getLow(2))*((
float)rand()/(float)RAND_MAX) + sp.
getLow(2);
569 vg.hostToDevice<0>();
573 auto ite = vg.getGPUIterator();
576 proc_id_out.resize(vg.
size()+1);
577 proc_id_out.template get<0>(proc_id_out.
size()-1) = 0;
578 proc_id_out.template hostToDevice(proc_id_out.
size()-1,proc_id_out.
size()-1);
580 CUDA_LAUNCH_DIM3((num_proc_ghost_each_part<3,
float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel())>),
582 dec.toKernel(),vg.toKernel(),proc_id_out.toKernel());
687 BOOST_AUTO_TEST_CASE( decomposition_to_gpu_test_use )
689 auto & v_cl = create_vcluster();
703 size_t n_sub = n_proc * SUB_UNIT_FACTOR;
706 for (
int i = 0; i < 3; i++)
707 { div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));}
713 size_t bc[] = { PERIODIC, PERIODIC, PERIODIC };
716 dec.setParameters(div,box,bc,g);
722 for (
size_t i = 0 ; i < 10000 ; i++)
724 vg.template get<0>(i)[0] = (float)rand()/(float)RAND_MAX;
725 vg.template get<0>(i)[1] = (float)rand()/(float)RAND_MAX;
726 vg.template get<0>(i)[2] = (float)rand()/(float)RAND_MAX;
729 vg.hostToDevice<0>();
733 auto ite = vg.getGPUIterator();
736 proc_id_out.resize(vg.
size());
739 dev_counter.resize(v_cl.size());
740 dev_counter.fill<0>(0);
741 dev_counter.fill<1>(0);
742 dev_counter.fill<2>(0);
744 CUDA_LAUNCH_DIM3((process_id_proc_each_part<3,
float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel()),decltype(dev_counter.toKernel())>),
746 dec.toKernel(),vg.toKernel(),proc_id_out.toKernel(),dev_counter.toKernel(),(
int)v_cl.rank());
749 proc_id_out.deviceToHost<0>();
752 for (
size_t i = 0 ; i < proc_id_out.
size() ; i++)
756 match &= proc_id_out.template get<0>(i) == dec.processorIDBC(xp);
761 BOOST_AUTO_TEST_CASE( vector_dist_gpu_find_buffer_offsets_test )
768 for (
size_t k = 0 ; k < vgp.
size() ; k++)
770 vgp.template get<0>(k) = k / 1000;
771 vgp.template get<1>(k) = k / 1000;
780 auto ite = vgp.getGPUIterator();
781 vgp.hostToDevice<0,1>();
783 CUDA_LAUNCH((find_buffer_offsets<1,decltype(vgp.toKernel()),decltype(offs.toKernel())>),ite,vgp.toKernel(),(
int *)mem.
getDevicePointer(),offs.toKernel());
785 offs.template deviceToHost<0,1>();
789 BOOST_REQUIRE_EQUAL(n_ele,199);
794 for (
size_t i = 0 ; i < n_ele ; i++)
796 ofv.add(offs.template get<0>(i));
797 ofv2.add(offs.template get<1>(i));
803 for (
size_t i = 0 ; i < ofv.
size() ; i++)
805 BOOST_REQUIRE_EQUAL(ofv.get(i),(i+1)*1000);
806 BOOST_REQUIRE_EQUAL(ofv2.get(i),i);
810 BOOST_AUTO_TEST_CASE(vector_dist_reorder_lbl)
818 for (
int i = 0 ; i < 10 ; i++)
820 for (
int j = 0 ; j < 10 ; j++)
822 lbl_p.template get<2>(i*10+j) = i;
823 lbl_p.template get<1>(i*10+j) = j;
825 starts.template get<0>(i) = (i*10);
829 starts.template hostToDevice<0>();
830 lbl_p.template hostToDevice<1,2>();
832 auto ite = lbl_p.getGPUIterator();
834 CUDA_LAUNCH_DIM3((reorder_lbl<decltype(lbl_p.toKernel()),decltype(starts.toKernel())>),ite.wthr,ite.thr,lbl_p.toKernel(),starts.toKernel());
836 starts.template deviceToHost<0>();
837 lbl_p.template deviceToHost<0,1,2>();
839 for (
int i = 0 ; i < 10 ; i++)
841 for (
int j = 0 ; j < 10 ; j++)
843 BOOST_REQUIRE_EQUAL(lbl_p.template get<0>(j*10+i),i*10+j);
848 BOOST_AUTO_TEST_CASE(vector_dist_gpu_map_fill_send_buffer_test)
858 unsigned int offset = 0;
861 vPrp.resize(vPos.
size());
862 m_opart.resize(vPos.
size());
864 for (
size_t i = 0 ; i < vPos.
size() ; i++)
866 vPos.template get<0>(i)[0] = (float)rand()/(float)RAND_MAX;
867 vPos.template get<0>(i)[1] = (float)rand()/(float)RAND_MAX;
868 vPos.template get<0>(i)[2] = (float)rand()/(float)RAND_MAX;
870 vPrp.template get<0>(i) = 5.0 + (float)rand()/(float)RAND_MAX;
871 vPrp.template get<1>(i)[0] = 10.0 + (float)rand()/(float)RAND_MAX;
872 vPrp.template get<1>(i)[1] = 11.0 + (float)rand()/(float)RAND_MAX;
873 vPrp.template get<2>(i)[0][0] = 40.0 + (float)rand()/(float)RAND_MAX;
874 vPrp.template get<2>(i)[0][1] = 50.0 + (float)rand()/(float)RAND_MAX;
875 vPrp.template get<2>(i)[0][2] = 60.0 + (float)rand()/(float)RAND_MAX;
876 vPrp.template get<2>(i)[1][0] = 70.0 + (float)rand()/(float)RAND_MAX;
877 vPrp.template get<2>(i)[1][1] = 80.0 + (float)rand()/(float)RAND_MAX;
878 vPrp.template get<2>(i)[1][2] = 150.0 + (float)rand()/(float)RAND_MAX;
879 vPrp.template get<2>(i)[2][0] = 160.0 + (float)rand()/(float)RAND_MAX;
880 vPrp.template get<2>(i)[2][1] = 170.0 + (float)rand()/(float)RAND_MAX;
881 vPrp.template get<2>(i)[2][2] = 340.0 + (float)rand()/(float)RAND_MAX;
884 m_opart.template get<1>(i) = seg;
885 m_opart.template get<0>(i) = (9999 - i%10000) + seg * 10000;
891 for (
size_t i = 0 ; i < m_pos.
size() ; i++)
893 m_pos.get(i).resize(10000);
894 m_prp.get(i).resize(10000);
897 vPos.hostToDevice<0>();
898 vPrp.hostToDevice<0,1,2>();
900 m_opart.hostToDevice<0,1>();
902 for (
size_t i = 0 ; i < m_pos.
size() ; i++)
904 auto ite = m_pos.get(i).getGPUIterator();
906 CUDA_LAUNCH_DIM3((process_map_particles<decltype(m_opart.toKernel()),decltype(m_pos.get(i).toKernel()),decltype(m_prp.get(i).toKernel()),
907 decltype(vPos.toKernel()),decltype(vPrp.toKernel())>),
909 m_opart.toKernel(),m_pos.get(i).toKernel(), m_prp.get(i).toKernel(),
910 vPos.toKernel(),vPrp.toKernel(),offset);
912 m_pos.get(i).deviceToHost<0>();
913 m_prp.get(i).deviceToHost<0,1,2>();
917 for (
size_t j = 0 ; j < m_pos.get(i).
size() ; j++)
919 match &= (m_pos.get(i).template get<0>(j)[0] == vPos.template get<0>(m_opart.template get<0>(offset+j))[0]);
920 match &= (m_pos.get(i).template get<0>(j)[1] == vPos.template get<0>(m_opart.template get<0>(offset+j))[1]);
921 match &= (m_pos.get(i).template get<0>(j)[2] == vPos.template get<0>(m_opart.template get<0>(offset+j))[2]);
923 match &= (m_prp.get(i).template get<0>(j) == vPrp.template get<0>(m_opart.template get<0>(offset+j)));
925 match &= (m_prp.get(i).template get<1>(j)[0] == vPrp.template get<1>(m_opart.template get<0>(offset+j))[0]);
926 match &= (m_prp.get(i).template get<1>(j)[1] == vPrp.template get<1>(m_opart.template get<0>(offset+j))[1]);
928 match &= (m_prp.get(i).template get<2>(j)[0][0] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[0][0]);
929 match &= (m_prp.get(i).template get<2>(j)[0][1] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[0][1]);
930 match &= (m_prp.get(i).template get<2>(j)[0][2] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[0][2]);
931 match &= (m_prp.get(i).template get<2>(j)[1][0] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[1][0]);
932 match &= (m_prp.get(i).template get<2>(j)[1][1] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[1][1]);
933 match &= (m_prp.get(i).template get<2>(j)[1][2] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[1][2]);
934 match &= (m_prp.get(i).template get<2>(j)[2][0] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[2][0]);
935 match &= (m_prp.get(i).template get<2>(j)[2][1] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[2][1]);
936 match &= (m_prp.get(i).template get<2>(j)[2][2] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[2][2]);
939 BOOST_REQUIRE_EQUAL(match,
true);
941 offset += m_pos.get(i).
size();
945 template<
unsigned int prp>
946 void vector_dist_remove_marked_type()
948 auto & v_cl = create_vcluster();
950 if (v_cl.size() > 16)
959 size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
965 auto it = vd.getDomainIterator();
971 vd.getPos(p)[0] = (float)rand() / (float)RAND_MAX;
972 vd.getPos(p)[1] = (float)rand() / (float)RAND_MAX;
973 vd.getPos(p)[2] = (float)rand() / (float)RAND_MAX;
979 vd.template ghost_get<>();
981 it = vd.getDomainIterator();
992 vd.template getProp<0>(p) = fc;
993 vd.template getProp<1>(p) = dc;
994 vd.template getProp<2>(p) = ic;
995 vd.template getProp<3>(p) = sc;
997 vd.template getProp<prp>(p) = (ic % 3 == 0);
1007 size_t sz = vd.size_local() - vd.size_local()/3;
1009 vd.template hostToDeviceProp<0,1,2,3>();
1011 remove_marked<prp>(vd);
1013 BOOST_REQUIRE_EQUAL(vd.size_local(),sz);
1015 vd.template deviceToHostProp<0,1,2,3>();
1017 auto it2 = vd.getDomainIterator();
1028 {test &= ((
int)vd.template getProp<0>(p) % 3 != 0);}
1031 {test &= ((
int)vd.template getProp<1>(p) % 3 != 0);}
1034 {test &= ((
int)vd.template getProp<2>(p) % 3 != 0);}
1037 {test &= ((
int)vd.template getProp<3>(p) % 3 != 0);}
1042 {std::cout << (
int)vd.template getProp<0>(p) << std::endl;}
1045 {std::cout << (
int)vd.template getProp<1>(p) << std::endl;}
1048 {std::cout << (
int)vd.template getProp<2>(p) << std::endl;}
1051 {std::cout << (
int)vd.template getProp<3>(p) << std::endl;}
1059 BOOST_REQUIRE_EQUAL(test,
true);
1064 size_t size_old = vd.size_local();
1067 vd.getPropVector().template fill<prp>(0);
1069 remove_marked<prp>(vd);
1071 BOOST_REQUIRE_EQUAL(vd.size_local(),size_old);
1074 vd.getPropVector().template fill<prp>(1);
1076 remove_marked<prp>(vd);
1078 BOOST_REQUIRE_EQUAL(vd.size_local(),0);
1081 BOOST_AUTO_TEST_CASE(vector_dist_remove_marked)
1083 vector_dist_remove_marked_type<0>();
1084 vector_dist_remove_marked_type<1>();
1085 vector_dist_remove_marked_type<2>();
1086 vector_dist_remove_marked_type<3>();
1090 BOOST_AUTO_TEST_CASE( vector_dist_particle_NN_MP_iteration_gpu )
1103 std::default_random_engine eg;
1104 eg.seed(v_cl.
rank()*4533);
1105 std::uniform_real_distribution<float> ud(-L,L);
1109 long int big_step = k / 4;
1110 big_step = (big_step == 0)?1:big_step;
1112 BOOST_TEST_CHECKPOINT(
"Testing 3D periodic vector symmetric cell-list k=" << k );
1117 size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
1119 float r_cut = 100.0;
1362 BOOST_AUTO_TEST_SUITE_END()
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
__device__ __host__ T getHigh(int i) const
get the high interval of the box
This class decompose a space into sub-sub-domains and distribute them across processors.
virtual void * getDevicePointer()
get a readable pointer with the data
virtual void deviceToHost()
Move memory from device to host.
virtual void fill(unsigned char c)
fill the buffer with a byte
virtual void * getPointer()
get a readable pointer with the data
virtual bool allocate(size_t sz)
allocate memory
size_t rank()
Get the process unit id.
size_t getProcessingUnits()
Get the total number of processors.
Implementation of VCluster class.
Grow policy define how the vector should grow every time we exceed the size.
Implementation of 1-D std::vector like structure.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Transform the boost::fusion::vector into memory specification (memory_traits)
This is a container to create a general object.