1#define BOOST_TEST_DYN_LINK
5#include <boost/test/unit_test.hpp>
6#include "VCluster/VCluster.hpp"
7#include "Vector/map_vector.hpp"
8#include "Vector/cuda/vector_dist_cuda_funcs.cuh"
9#include "Vector/util/vector_dist_funcs.hpp"
10#include "Decomposition/CartDecomposition.hpp"
12#include "Vector/vector_dist.hpp"
13#include "util/cuda/scan_ofp.cuh"
15#define SUB_UNIT_FACTOR 1024
17BOOST_AUTO_TEST_SUITE( vector_dist_gpu_util_func_test )
19BOOST_AUTO_TEST_CASE( vector_ghost_process_local_particles )
31 for (
size_t i = 0 ; i < v_prp.
size() ; i++)
33 v_pos.template get<0>(i)[0] = (float)rand()/(float)RAND_MAX;
34 v_pos.template get<0>(i)[1] = (float)rand()/(float)RAND_MAX;
35 v_pos.template get<0>(i)[2] = (float)rand()/(float)RAND_MAX;
37 v_prp.template get<0>(i) = i+12345;
39 v_prp.template get<1>(i)[0] = i;
40 v_prp.template get<1>(i)[1] = i+20000;
41 v_prp.template get<1>(i)[2] = i+50000;
43 v_prp.template get<2>(i)[0][0] = i+60000;
44 v_prp.template get<2>(i)[0][1] = i+70000;
45 v_prp.template get<2>(i)[0][2] = i+80000;
46 v_prp.template get<2>(i)[1][0] = i+90000;
47 v_prp.template get<2>(i)[1][1] = i+100000;
48 v_prp.template get<2>(i)[1][2] = i+110000;
49 v_prp.template get<2>(i)[2][0] = i+120000;
50 v_prp.template get<2>(i)[2][1] = i+130000;
51 v_prp.template get<2>(i)[2][1] = i+140000;
52 v_prp.template get<2>(i)[2][2] = i+150000;
61 box_f_dev.template get<0>(0)[0] = 0.0;
62 box_f_dev.template get<0>(0)[1] = 0.0;
63 box_f_dev.template get<0>(0)[2] = 0.0;
64 box_f_dev.template get<1>(0)[0] = 0.5;
65 box_f_dev.template get<1>(0)[1] = 1.0;
66 box_f_dev.template get<1>(0)[2] = 1.0;
67 box_f_sv.template get<0>(0) = 0;
69 box_f_dev.template get<0>(1)[0] = 0.0;
70 box_f_dev.template get<0>(1)[1] = 0.0;
71 box_f_dev.template get<0>(1)[2] = 0.0;
72 box_f_dev.template get<1>(1)[0] = 0.3;
73 box_f_dev.template get<1>(1)[1] = 1.0;
74 box_f_dev.template get<1>(1)[2] = 1.0;
75 box_f_sv.template get<0>(1) = 1;
77 box_f_dev.template get<0>(2)[0] = 0.0;
78 box_f_dev.template get<0>(2)[1] = 0.0;
79 box_f_dev.template get<0>(2)[2] = 0.0;
80 box_f_dev.template get<1>(2)[0] = 0.2;
81 box_f_dev.template get<1>(2)[1] = 1.0;
82 box_f_dev.template get<1>(2)[2] = 1.0;
83 box_f_sv.template get<0>(2) = 2;
85 box_f_dev.template get<0>(3)[0] = 0.0;
86 box_f_dev.template get<0>(3)[1] = 0.0;
87 box_f_dev.template get<0>(3)[2] = 0.0;
88 box_f_dev.template get<1>(3)[0] = 0.1;
89 box_f_dev.template get<1>(3)[1] = 1.0;
90 box_f_dev.template get<1>(3)[2] = 1.0;
91 box_f_sv.template get<0>(3) = 3;
94 auto ite = v_pos.getGPUIteratorTo(v_pos.
size());
96 o_part_loc.resize(v_pos.
size()+1);
97 o_part_loc.template get<0>(o_part_loc.
size()-1) = 0;
98 o_part_loc.template hostToDevice<0>(o_part_loc.
size()-1,o_part_loc.
size()-1);
100 box_f_dev.hostToDevice<0,1>();
101 box_f_sv.hostToDevice<0>();
102 v_pos.hostToDevice<0>();
103 v_prp.hostToDevice<0,1,2>();
106 CUDA_LAUNCH_DIM3((num_shift_ghost_each_part<3,
float,
decltype(box_f_dev.toKernel()),
decltype(box_f_sv.toKernel()),
decltype(v_pos.toKernel()),
decltype(o_part_loc.toKernel())>),
108 box_f_dev.toKernel(),box_f_sv.toKernel(),v_pos.toKernel(),o_part_loc.toKernel(),v_pos.
size());
110 o_part_loc.deviceToHost<0>();
114 for (
size_t i = 0 ; i < v_pos.
size() ; i++)
116 if (v_pos.template get<0>(i)[0] >= 0.5)
117 {match &= o_part_loc.template get<0>(i) == 0;}
118 else if (v_pos.template get<0>(i)[0] >= 0.3)
119 {match &= o_part_loc.template get<0>(i) == 1;}
120 else if (v_pos.template get<0>(i)[0] >= 0.2)
121 {match &= o_part_loc.template get<0>(i) == 2;}
122 else if (v_pos.template get<0>(i)[0] >= 0.1)
123 {match &= o_part_loc.template get<0>(i) == 3;}
125 {match &= o_part_loc.template get<0>(i) == 4;}
128 BOOST_REQUIRE_EQUAL(match,
true);
131 starts.resize(o_part_loc.
size());
133 auto & v_cl = create_vcluster();
134 openfpm::scan((
unsigned int *)o_part_loc.template getDeviceBuffer<0>(), o_part_loc.
size(), (
unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getgpuContext());
136 starts.deviceToHost<0>(starts.
size()-1,starts.
size()-1);
137 size_t tot = starts.template get<0>(o_part_loc.
size()-1);
143 shifts.template get<0>(0)[0] = 10.0;
144 shifts.template get<0>(0)[1] = 0.0;
145 shifts.template get<0>(0)[2] = 0.0;
147 shifts.template get<0>(1)[0] = 20.0;
148 shifts.template get<0>(1)[1] = 0.0;
149 shifts.template get<0>(1)[2] = 0.0;
151 shifts.template get<0>(2)[0] = 30.0;
152 shifts.template get<0>(2)[1] = 0.0;
153 shifts.template get<0>(2)[2] = 0.0;
155 shifts.template get<0>(3)[0] = 40.0;
156 shifts.template get<0>(3)[1] = 0.0;
157 shifts.template get<0>(3)[2] = 0.0;
159 size_t old = v_pos.
size();
160 v_pos.resize(v_pos.
size() + tot);
161 v_prp.resize(v_prp.
size() + tot);
163 shifts.template hostToDevice<0>();
165 o_part_loc2.resize(tot);
167 CUDA_LAUNCH_DIM3((shift_ghost_each_part<3,
float,
decltype(box_f_dev.toKernel()),
decltype(box_f_sv.toKernel()),
168 decltype(v_pos.toKernel()),
decltype(v_prp.toKernel()),
169 decltype(starts.toKernel()),
decltype(shifts.toKernel()),
170 decltype(o_part_loc2.toKernel())>),
172 box_f_dev.toKernel(),box_f_sv.toKernel(),
173 v_pos.toKernel(),v_prp.toKernel(),
174 starts.toKernel(),shifts.toKernel(),o_part_loc2.toKernel(),old,old);
176 v_pos.deviceToHost<0>();
177 o_part_loc2.deviceToHost<0,1>();
178 v_prp.deviceToHost<0,1,2>();
182 for (
size_t i = 0 ; i < old ; i++)
184 if (v_pos.template get<0>(i)[0] >= 0.5)
186 else if (v_pos.template get<0>(i)[0] >= 0.3)
188 for (
size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
190 match &= v_pos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
191 match &= v_pos.template get<0>(base)[0] >= -(j+1.0)*10.0;
193 match &= o_part_loc2.template get<0>(base_o) == i;
194 match &= o_part_loc2.template get<1>(base_o) == j;
198 match &= v_prp.template get<0>(base) == v_prp.template get<0>(i);
200 match &= v_prp.template get<1>(base)[0] == v_prp.template get<1>(i)[0];
201 match &= v_prp.template get<1>(base)[1] == v_prp.template get<1>(i)[1];
202 match &= v_prp.template get<1>(base)[2] == v_prp.template get<1>(i)[2];
204 match &= v_prp.template get<2>(base)[0][0] == v_prp.template get<2>(i)[0][0];
205 match &= v_prp.template get<2>(base)[0][1] == v_prp.template get<2>(i)[0][1];
206 match &= v_prp.template get<2>(base)[0][2] == v_prp.template get<2>(i)[0][2];
207 match &= v_prp.template get<2>(base)[1][0] == v_prp.template get<2>(i)[1][0];
208 match &= v_prp.template get<2>(base)[1][1] == v_prp.template get<2>(i)[1][1];
209 match &= v_prp.template get<2>(base)[1][2] == v_prp.template get<2>(i)[1][2];
210 match &= v_prp.template get<2>(base)[2][0] == v_prp.template get<2>(i)[2][0];
211 match &= v_prp.template get<2>(base)[2][1] == v_prp.template get<2>(i)[2][1];
212 match &= v_prp.template get<2>(base)[2][2] == v_prp.template get<2>(i)[2][2];
218 else if (v_pos.template get<0>(i)[0] >= 0.2)
220 for (
size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
222 match &= v_pos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
223 match &= v_pos.template get<0>(base)[0] >= -(j+1.0)*10.0;
225 match &= o_part_loc2.template get<0>(base_o) == i;
226 match &= o_part_loc2.template get<1>(base_o) == j;
230 match &= v_prp.template get<0>(base) == v_prp.template get<0>(i);
232 match &= v_prp.template get<1>(base)[0] == v_prp.template get<1>(i)[0];
233 match &= v_prp.template get<1>(base)[1] == v_prp.template get<1>(i)[1];
234 match &= v_prp.template get<1>(base)[2] == v_prp.template get<1>(i)[2];
236 match &= v_prp.template get<2>(base)[0][0] == v_prp.template get<2>(i)[0][0];
237 match &= v_prp.template get<2>(base)[0][1] == v_prp.template get<2>(i)[0][1];
238 match &= v_prp.template get<2>(base)[0][2] == v_prp.template get<2>(i)[0][2];
239 match &= v_prp.template get<2>(base)[1][0] == v_prp.template get<2>(i)[1][0];
240 match &= v_prp.template get<2>(base)[1][1] == v_prp.template get<2>(i)[1][1];
241 match &= v_prp.template get<2>(base)[1][2] == v_prp.template get<2>(i)[1][2];
242 match &= v_prp.template get<2>(base)[2][0] == v_prp.template get<2>(i)[2][0];
243 match &= v_prp.template get<2>(base)[2][1] == v_prp.template get<2>(i)[2][1];
244 match &= v_prp.template get<2>(base)[2][2] == v_prp.template get<2>(i)[2][2];
251 else if (v_pos.template get<0>(i)[0] >= 0.1)
253 for (
size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
255 match &= v_pos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
256 match &= v_pos.template get<0>(base)[0] >= -(j+1.0)*10.0;
258 match &= o_part_loc2.template get<0>(base_o) == i;
259 match &= o_part_loc2.template get<1>(base_o) == j;
263 match &= v_prp.template get<0>(base) == v_prp.template get<0>(i);
265 match &= v_prp.template get<1>(base)[0] == v_prp.template get<1>(i)[0];
266 match &= v_prp.template get<1>(base)[1] == v_prp.template get<1>(i)[1];
267 match &= v_prp.template get<1>(base)[2] == v_prp.template get<1>(i)[2];
269 match &= v_prp.template get<2>(base)[0][0] == v_prp.template get<2>(i)[0][0];
270 match &= v_prp.template get<2>(base)[0][1] == v_prp.template get<2>(i)[0][1];
271 match &= v_prp.template get<2>(base)[0][2] == v_prp.template get<2>(i)[0][2];
272 match &= v_prp.template get<2>(base)[1][0] == v_prp.template get<2>(i)[1][0];
273 match &= v_prp.template get<2>(base)[1][1] == v_prp.template get<2>(i)[1][1];
274 match &= v_prp.template get<2>(base)[1][2] == v_prp.template get<2>(i)[1][2];
275 match &= v_prp.template get<2>(base)[2][0] == v_prp.template get<2>(i)[2][0];
276 match &= v_prp.template get<2>(base)[2][1] == v_prp.template get<2>(i)[2][1];
277 match &= v_prp.template get<2>(base)[2][2] == v_prp.template get<2>(i)[2][2];
285 for (
size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
287 match &= v_pos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
288 match &= v_pos.template get<0>(base)[0] >= -(j+1.0)*10.0;
290 match &= o_part_loc2.template get<0>(base_o) == i;
291 match &= o_part_loc2.template get<1>(base_o) == j;
295 match &= v_prp.template get<0>(base) == v_prp.template get<0>(i);
297 match &= v_prp.template get<1>(base)[0] == v_prp.template get<1>(i)[0];
298 match &= v_prp.template get<1>(base)[1] == v_prp.template get<1>(i)[1];
299 match &= v_prp.template get<1>(base)[2] == v_prp.template get<1>(i)[2];
301 match &= v_prp.template get<2>(base)[0][0] == v_prp.template get<2>(i)[0][0];
302 match &= v_prp.template get<2>(base)[0][1] == v_prp.template get<2>(i)[0][1];
303 match &= v_prp.template get<2>(base)[0][2] == v_prp.template get<2>(i)[0][2];
304 match &= v_prp.template get<2>(base)[1][0] == v_prp.template get<2>(i)[1][0];
305 match &= v_prp.template get<2>(base)[1][1] == v_prp.template get<2>(i)[1][1];
306 match &= v_prp.template get<2>(base)[1][2] == v_prp.template get<2>(i)[1][2];
307 match &= v_prp.template get<2>(base)[2][0] == v_prp.template get<2>(i)[2][0];
308 match &= v_prp.template get<2>(base)[2][1] == v_prp.template get<2>(i)[2][1];
309 match &= v_prp.template get<2>(base)[2][2] == v_prp.template get<2>(i)[2][2];
317 BOOST_REQUIRE_EQUAL(match,
true);
327 for (
size_t i = 0 ; i < old ; i++)
329 v_pos2.template get<0>(i)[0] = v_pos.template get<0>(i)[0];
330 v_pos2.template get<0>(i)[1] = v_pos.template get<0>(i)[1];
331 v_pos2.template get<0>(i)[2] = v_pos.template get<0>(i)[2];
333 v_prp2.template get<0>(i) = v_prp.template get<0>(i);
335 v_prp2.template get<1>(i)[0] = v_prp.template get<1>(i)[0];
336 v_prp2.template get<1>(i)[1] = v_prp.template get<1>(i)[1];
337 v_prp2.template get<1>(i)[2] = v_prp.template get<1>(i)[2];
339 v_prp2.template get<2>(i)[0][0] = v_prp.template get<2>(i)[0][0];
340 v_prp2.template get<2>(i)[0][1] = v_prp.template get<2>(i)[0][1];
341 v_prp2.template get<2>(i)[0][2] = v_prp.template get<2>(i)[0][2];
342 v_prp2.template get<2>(i)[1][0] = v_prp.template get<2>(i)[1][0];
343 v_prp2.template get<2>(i)[1][1] = v_prp.template get<2>(i)[1][1];
344 v_prp2.template get<2>(i)[1][2] = v_prp.template get<2>(i)[1][2];
345 v_prp2.template get<2>(i)[2][0] = v_prp.template get<2>(i)[2][0];
346 v_prp2.template get<2>(i)[2][1] = v_prp.template get<2>(i)[2][1];
347 v_prp2.template get<2>(i)[2][2] = v_prp.template get<2>(i)[2][2];
350 v_pos2.resize(v_pos.
size());
351 v_prp2.resize(v_prp.
size());
353 v_pos2.hostToDevice<0>();
354 v_prp2.hostToDevice<0,1,2>();
356 ite = o_part_loc2.getGPUIterator();
358 CUDA_LAUNCH_DIM3((process_ghost_particles_local<
true,3,
decltype(o_part_loc2.toKernel()),
decltype(v_pos2.toKernel()),
decltype(v_prp2.toKernel()),
decltype(shifts.toKernel())>),
360 o_part_loc2.toKernel(),v_pos2.toKernel(),v_prp2.toKernel(),shifts.toKernel(),old);
362 v_pos2.template deviceToHost<0>();
363 v_prp2.template deviceToHost<0,1,2>();
365 for (
size_t i = old ; i < v_pos.
size() ; i++)
367 match &= v_pos.template get<0>(i)[0] == v_pos2.template get<0>(i)[0];
368 match &= v_pos.template get<0>(i)[1] == v_pos2.template get<0>(i)[1];
369 match &= v_pos.template get<0>(i)[2] == v_pos2.template get<0>(i)[2];
371 match &= v_prp2.template get<0>(i) == v_prp.template get<0>(i);
373 match &= v_prp2.template get<1>(i)[0] == v_prp.template get<1>(i)[0];
374 match &= v_prp2.template get<1>(i)[1] == v_prp.template get<1>(i)[1];
375 match &= v_prp2.template get<1>(i)[2] == v_prp.template get<1>(i)[2];
377 match &= v_prp2.template get<2>(i)[0][0] == v_prp.template get<2>(i)[0][0];
378 match &= v_prp2.template get<2>(i)[0][1] == v_prp.template get<2>(i)[0][1];
379 match &= v_prp2.template get<2>(i)[0][2] == v_prp.template get<2>(i)[0][2];
380 match &= v_prp2.template get<2>(i)[1][0] == v_prp.template get<2>(i)[1][0];
381 match &= v_prp2.template get<2>(i)[1][1] == v_prp.template get<2>(i)[1][1];
382 match &= v_prp2.template get<2>(i)[1][2] == v_prp.template get<2>(i)[1][2];
383 match &= v_prp2.template get<2>(i)[2][0] == v_prp.template get<2>(i)[2][0];
384 match &= v_prp2.template get<2>(i)[2][1] == v_prp.template get<2>(i)[2][1];
385 match &= v_prp2.template get<2>(i)[2][2] == v_prp.template get<2>(i)[2][2];
388 BOOST_REQUIRE_EQUAL(match,
true);
391BOOST_AUTO_TEST_CASE( vector_ghost_fill_send_buffer_test )
403 auto & v_cl = create_vcluster();
413 for (
size_t i = 0 ; i < v_prp.
size() ; i++)
415 v_prp.template get<0>(i) = i+12345;
417 v_prp.template get<1>(i)[0] = i;
418 v_prp.template get<1>(i)[1] = i+20000;
419 v_prp.template get<1>(i)[2] = i+50000;
421 v_prp.template get<2>(i)[0][0] = i+60000;
422 v_prp.template get<2>(i)[0][1] = i+70000;
423 v_prp.template get<2>(i)[0][2] = i+80000;
424 v_prp.template get<2>(i)[1][0] = i+90000;
425 v_prp.template get<2>(i)[1][1] = i+100000;
426 v_prp.template get<2>(i)[1][2] = i+110000;
427 v_prp.template get<2>(i)[2][0] = i+120000;
428 v_prp.template get<2>(i)[2][1] = i+130000;
429 v_prp.template get<2>(i)[2][2] = i+140000;
432 v_prp.hostToDevice<0,1,2>();
434 g_opart_device.resize(2*10000*3);
436 for (
size_t i = 0 ; i < 3 ; i++)
438 for (
size_t j = 0 ; j < 10000 ; j++)
440 g_opart_device.template get<0>(i*2*10000 + j*2) = i;
441 g_opart_device.template get<0>(i*2*10000 + j*2+1) = i;
443 g_opart_device.template get<1>(i*2*10000 + j*2) = j;
444 g_opart_device.template get<1>(i*2*10000 + j*2+1) = j;
446 g_opart_device.template get<2>(i*2*10000 + j*2) = 0;
447 g_opart_device.template get<2>(i*2*10000 + j*2+1) = 0;
451 g_opart_device.hostToDevice<0,1,2>();
453 g_send_prp.resize(3);
458 for (
size_t i = 0 ; i < 3 ; i++)
460 g_send_prp.get(i).resize(2*10000);
462 auto ite = g_send_prp.get(i).getGPUIterator();
464 CUDA_LAUNCH_DIM3((process_ghost_particles_prp<
decltype(g_opart_device.toKernel()),
decltype(g_send_prp.get(i).toKernel()),
decltype(v_prp.toKernel()),0,1,2>),
466 g_opart_device.toKernel(), g_send_prp.get(i).toKernel(),
467 v_prp.toKernel(),offset);
469 offset += g_send_prp.get(i).
size();
473 g_send_prp.get(i).deviceToHost<0,1,2>();
475 for (
size_t j = 0 ; j < 10000 ; j++)
477 match &= g_send_prp.get(i).template get<0>(2*j) == j+12345;
479 match &= g_send_prp.get(i).template get<1>(2*j)[0] == j;
480 match &= g_send_prp.get(i).template get<1>(2*j)[1] == j+20000;
481 match &= g_send_prp.get(i).template get<1>(2*j)[2] == j+50000;
483 match &= g_send_prp.get(i).template get<2>(2*j)[0][0] == j+60000;
484 match &= g_send_prp.get(i).template get<2>(2*j)[0][1] == j+70000;
485 match &= g_send_prp.get(i).template get<2>(2*j)[0][2] == j+80000;
486 match &= g_send_prp.get(i).template get<2>(2*j)[1][0] == j+90000;
487 match &= g_send_prp.get(i).template get<2>(2*j)[1][1] == j+100000;
488 match &= g_send_prp.get(i).template get<2>(2*j)[1][2] == j+110000;
489 match &= g_send_prp.get(i).template get<2>(2*j)[2][0] == j+120000;
490 match &= g_send_prp.get(i).template get<2>(2*j)[2][1] == j+130000;
491 match &= g_send_prp.get(i).template get<2>(2*j)[2][2] == j+140000;
494 match = g_send_prp.get(i).template get<0>(2*j+1) == j+12345;
496 match = g_send_prp.get(i).template get<1>(2*j+1)[0] == j;
497 match = g_send_prp.get(i).template get<1>(2*j+1)[1] == j+20000;
498 match = g_send_prp.get(i).template get<1>(2*j+1)[2] == j+50000;
500 match = g_send_prp.get(i).template get<2>(2*j+1)[0][0] == j+60000;
501 match = g_send_prp.get(i).template get<2>(2*j+1)[0][1] == j+70000;
502 match = g_send_prp.get(i).template get<2>(2*j+1)[0][2] == j+80000;
503 match = g_send_prp.get(i).template get<2>(2*j+1)[1][0] == j+90000;
504 match = g_send_prp.get(i).template get<2>(2*j+1)[1][1] == j+100000;
505 match = g_send_prp.get(i).template get<2>(2*j+1)[1][2] == j+110000;
506 match = g_send_prp.get(i).template get<2>(2*j+1)[2][0] == j+120000;
507 match = g_send_prp.get(i).template get<2>(2*j+1)[2][1] == j+130000;
508 match = g_send_prp.get(i).template get<2>(2*j+1)[2][2] == j+140000;
512 BOOST_REQUIRE_EQUAL(match,
true);
515BOOST_AUTO_TEST_CASE( decomposition_ie_ghost_gpu_test_use )
517 auto & v_cl = create_vcluster();
533 size_t n_sub = n_proc * SUB_UNIT_FACTOR;
536 for (
int i = 0; i < 3; i++)
537 { div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));}
543 size_t bc[] = { PERIODIC, PERIODIC, PERIODIC };
546 dec.setParameters(div,box,bc,g);
551 int nsub = dec.getNSubDomain();
552 int n_part = 10000 / nsub;
555 vg.resize(nsub*n_part);
557 for (
size_t k = 0 ; k < nsub ; k++)
561 for (
size_t j = 0 ; j < n_part ; j++)
563 vg.template get<0>(k*n_part+j)[0] = (sp.
getHigh(0) - sp.
getLow(0))*((
float)rand()/(float)RAND_MAX) + sp.
getLow(0);
564 vg.template get<0>(k*n_part+j)[1] = (sp.
getHigh(1) - sp.
getLow(1))*((
float)rand()/(float)RAND_MAX) + sp.
getLow(1);
565 vg.template get<0>(k*n_part+j)[2] = (sp.
getHigh(2) - sp.
getLow(2))*((
float)rand()/(float)RAND_MAX) + sp.
getLow(2);
569 vg.hostToDevice<0>();
573 auto ite = vg.getGPUIterator();
576 proc_id_out.resize(vg.
size()+1);
577 proc_id_out.template get<0>(proc_id_out.
size()-1) = 0;
578 proc_id_out.template hostToDevice(proc_id_out.
size()-1,proc_id_out.
size()-1);
580 CUDA_LAUNCH_DIM3((num_proc_ghost_each_part<3,
float,
decltype(dec.toKernel()),
decltype(vg.toKernel()),
decltype(proc_id_out.toKernel())>),
582 dec.toKernel(),vg.toKernel(),proc_id_out.toKernel());
687BOOST_AUTO_TEST_CASE( decomposition_to_gpu_test_use )
689 auto & v_cl = create_vcluster();
703 size_t n_sub = n_proc * SUB_UNIT_FACTOR;
706 for (
int i = 0; i < 3; i++)
707 { div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));}
713 size_t bc[] = { PERIODIC, PERIODIC, PERIODIC };
716 dec.setParameters(div,box,bc,g);
722 for (
size_t i = 0 ; i < 10000 ; i++)
724 vg.template get<0>(i)[0] = (float)rand()/(float)RAND_MAX;
725 vg.template get<0>(i)[1] = (float)rand()/(float)RAND_MAX;
726 vg.template get<0>(i)[2] = (float)rand()/(float)RAND_MAX;
729 vg.hostToDevice<0>();
733 auto ite = vg.getGPUIterator();
736 proc_id_out.resize(vg.
size());
739 dev_counter.resize(v_cl.size());
740 dev_counter.fill<0>(0);
741 dev_counter.fill<1>(0);
742 dev_counter.fill<2>(0);
744 CUDA_LAUNCH_DIM3((process_id_proc_each_part<3,
float,
decltype(dec.toKernel()),
decltype(vg.toKernel()),
decltype(proc_id_out.toKernel()),
decltype(dev_counter.toKernel())>),
746 dec.toKernel(),vg.toKernel(),proc_id_out.toKernel(),dev_counter.toKernel(),v_cl.rank());
749 proc_id_out.deviceToHost<0>();
752 for (
size_t i = 0 ; i < proc_id_out.
size() ; i++)
756 match &= proc_id_out.template get<0>(i) == dec.processorIDBC(xp);
761BOOST_AUTO_TEST_CASE( vector_dist_gpu_find_buffer_offsets_test )
768 for (
size_t k = 0 ; k < vgp.
size() ; k++)
770 vgp.template get<0>(k) = k / 1000;
771 vgp.template get<1>(k) = k / 1000;
780 auto ite = vgp.getGPUIterator();
781 vgp.hostToDevice<0,1>();
783 CUDA_LAUNCH((find_buffer_offsets<1,
decltype(vgp.toKernel()),
decltype(offs.toKernel())>),ite,vgp.toKernel(),(
int *)mem.
getDevicePointer(),offs.toKernel());
785 offs.template deviceToHost<0,1>();
789 BOOST_REQUIRE_EQUAL(n_ele,199);
794 for (
size_t i = 0 ; i < n_ele ; i++)
796 ofv.add(offs.template get<0>(i));
797 ofv2.add(offs.template get<1>(i));
803 for (
size_t i = 0 ; i < ofv.
size() ; i++)
805 BOOST_REQUIRE_EQUAL(ofv.get(i),(i+1)*1000);
806 BOOST_REQUIRE_EQUAL(ofv2.get(i),i);
810BOOST_AUTO_TEST_CASE(vector_dist_reorder_lbl)
818 for (
int i = 0 ; i < 10 ; i++)
820 for (
int j = 0 ; j < 10 ; j++)
822 lbl_p.template get<2>(i*10+j) = i;
823 lbl_p.template get<1>(i*10+j) = j;
825 starts.template get<0>(i) = (i*10);
829 starts.template hostToDevice<0>();
830 lbl_p.template hostToDevice<1,2>();
832 auto ite = lbl_p.getGPUIterator();
834 CUDA_LAUNCH_DIM3((reorder_lbl<
decltype(lbl_p.toKernel()),
decltype(starts.toKernel())>),ite.wthr,ite.thr,lbl_p.toKernel(),starts.toKernel());
836 starts.template deviceToHost<0>();
837 lbl_p.template deviceToHost<0,1,2>();
839 for (
int i = 0 ; i < 10 ; i++)
841 for (
int j = 0 ; j < 10 ; j++)
843 BOOST_REQUIRE_EQUAL(lbl_p.template get<0>(j*10+i),i*10+j);
848BOOST_AUTO_TEST_CASE(vector_dist_merge_sort)
860 v_prp_out.resize(10000);
861 v_pos_out.resize(10000);
862 ns_to_s.resize(10000);
864 for (
int i = 0 ; i < 10000 ; i++)
866 v_pos_out.template get<0>(i)[0] = i;
867 v_pos_out.template get<0>(i)[1] = i+10000;
868 v_pos_out.template get<0>(i)[2] = i+20000;
870 v_pos.template get<0>(i)[0] = 0;
871 v_pos.template get<0>(i)[1] = 0;
872 v_pos.template get<0>(i)[2] = 0;
874 v_prp_out.template get<0>(i)[0] = i+60123;
875 v_prp_out.template get<0>(i)[1] = i+73543;
876 v_prp_out.template get<0>(i)[2] = i+82432;
878 v_prp_out.template get<1>(i)[0] = i+80123;
879 v_prp_out.template get<1>(i)[1] = i+93543;
880 v_prp_out.template get<1>(i)[2] = i+102432;
882 v_prp_out.template get<2>(i)[0] = i+110123;
883 v_prp_out.template get<2>(i)[1] = i+123543;
884 v_prp_out.template get<2>(i)[2] = i+132432;
886 v_prp.template get<0>(i)[0] = 0;
887 v_prp.template get<0>(i)[1] = 0;
888 v_prp.template get<0>(i)[2] = 0;
890 v_prp.template get<1>(i)[0] = 0;
891 v_prp.template get<1>(i)[1] = 0;
892 v_prp.template get<1>(i)[2] = 0;
894 v_prp.template get<2>(i)[0] = 0;
895 v_prp.template get<2>(i)[1] = 0;
896 v_prp.template get<2>(i)[2] = 0;
898 ns_to_s.template get<0>(i) = 10000-i-1;
901 v_prp.template hostToDevice<0,1,2>();
902 v_prp_out.template hostToDevice<0,1,2>();
903 v_pos.template hostToDevice<0>();
904 v_pos_out.template hostToDevice<0>();
905 ns_to_s.template hostToDevice<0>();
907 auto ite = v_pos.getGPUIterator();
909 CUDA_LAUNCH_DIM3((merge_sort_part<
false,
decltype(v_pos.toKernel()),
decltype(v_prp.toKernel()),
decltype(ns_to_s.toKernel()),0>),ite.wthr,ite.thr,v_pos.toKernel(),v_prp.toKernel(),
910 v_pos_out.toKernel(),v_prp_out.toKernel(),
913 v_prp.template deviceToHost<0,1,2>();
916 for (
int i = 0 ; i < 10000 ; i++)
918 match &= v_prp_out.template get<0>(10000-i-1)[0] == v_prp.template get<0>(i)[0];
919 match &= v_prp_out.template get<0>(10000-i-1)[1] == v_prp.template get<0>(i)[1];
920 match &= v_prp_out.template get<0>(10000-i-1)[2] == v_prp.template get<0>(i)[2];
922 match &= v_prp.template get<1>(10000-i-1)[0] == 0;
923 match &= v_prp.template get<1>(10000-i-1)[1] == 0;
924 match &= v_prp.template get<1>(10000-i-1)[2] == 0;
926 match &= v_prp.template get<2>(10000-i-1)[0] == 0;
927 match &= v_prp.template get<2>(10000-i-1)[1] == 0;
928 match &= v_prp.template get<2>(10000-i-1)[2] == 0;
931 BOOST_REQUIRE_EQUAL(match,
true);
933 CUDA_LAUNCH_DIM3((merge_sort_part<
false,
decltype(v_pos.toKernel()),
decltype(v_prp.toKernel()),
decltype(ns_to_s.toKernel()),1,2>),ite.wthr,ite.thr,v_pos.toKernel(),v_prp.toKernel(),
934 v_pos_out.toKernel(),v_prp_out.toKernel(),
937 v_prp.template deviceToHost<0,1,2>();
938 v_pos.template deviceToHost<0>();
940 for (
int i = 0 ; i < 10000 ; i++)
942 match &= v_prp_out.template get<0>(10000-i-1)[0] == v_prp.template get<0>(i)[0];
943 match &= v_prp_out.template get<0>(10000-i-1)[1] == v_prp.template get<0>(i)[1];
944 match &= v_prp_out.template get<0>(10000-i-1)[2] == v_prp.template get<0>(i)[2];
946 match &= v_prp_out.template get<1>(10000-i-1)[0] == v_prp.template get<1>(i)[0];
947 match &= v_prp_out.template get<1>(10000-i-1)[1] == v_prp.template get<1>(i)[1];
948 match &= v_prp_out.template get<1>(10000-i-1)[2] == v_prp.template get<1>(i)[2];
950 match &= v_prp_out.template get<2>(10000-i-1)[0] == v_prp.template get<2>(i)[0];
951 match &= v_prp_out.template get<2>(10000-i-1)[1] == v_prp.template get<2>(i)[1];
952 match &= v_prp_out.template get<2>(10000-i-1)[2] == v_prp.template get<2>(i)[2];
954 match &= v_pos.template get<0>(10000-i-1)[0] == 0;
955 match &= v_pos.template get<0>(10000-i-1)[1] == 0;
956 match &= v_pos.template get<0>(10000-i-1)[2] == 0;
959 BOOST_REQUIRE_EQUAL(match,
true);
961 CUDA_LAUNCH_DIM3((merge_sort_part<
true,
decltype(v_pos.toKernel()),
decltype(v_prp.toKernel()),
decltype(ns_to_s.toKernel())>),ite.wthr,ite.thr,v_pos.toKernel(),v_prp.toKernel(),
962 v_pos_out.toKernel(),v_prp_out.toKernel(),
965 v_prp.template deviceToHost<0,1,2>();
966 v_pos.template deviceToHost<0>();
968 for (
int i = 0 ; i < 10000 ; i++)
972 match &= v_prp_out.template get<0>(10000-i-1)[0] == v_prp.template get<0>(i)[0];
973 match &= v_prp_out.template get<0>(10000-i-1)[1] == v_prp.template get<0>(i)[1];
974 match &= v_prp_out.template get<0>(10000-i-1)[2] == v_prp.template get<0>(i)[2];
976 match &= v_prp_out.template get<1>(10000-i-1)[0] == v_prp.template get<1>(i)[0];
977 match &= v_prp_out.template get<1>(10000-i-1)[1] == v_prp.template get<1>(i)[1];
978 match &= v_prp_out.template get<1>(10000-i-1)[2] == v_prp.template get<1>(i)[2];
980 match &= v_prp_out.template get<2>(10000-i-1)[0] == v_prp.template get<2>(i)[0];
981 match &= v_prp_out.template get<2>(10000-i-1)[1] == v_prp.template get<2>(i)[1];
982 match &= v_prp_out.template get<2>(10000-i-1)[2] == v_prp.template get<2>(i)[2];
985 match &= v_pos_out.template get<0>(10000-i-1)[0] == v_pos.template get<0>(i)[0];
986 match &= v_pos_out.template get<0>(10000-i-1)[1] == v_pos.template get<0>(i)[1];
987 match &= v_pos_out.template get<0>(10000-i-1)[2] == v_pos.template get<0>(i)[2];
990 BOOST_REQUIRE_EQUAL(match,
true);
993BOOST_AUTO_TEST_CASE(vector_dist_gpu_map_fill_send_buffer_test)
1003 unsigned int offset = 0;
1005 v_pos.resize(100000);
1006 v_prp.resize(v_pos.
size());
1007 m_opart.resize(v_pos.
size());
1009 for (
size_t i = 0 ; i < v_pos.
size() ; i++)
1011 v_pos.template get<0>(i)[0] = (float)rand()/(float)RAND_MAX;
1012 v_pos.template get<0>(i)[1] = (float)rand()/(float)RAND_MAX;
1013 v_pos.template get<0>(i)[2] = (float)rand()/(float)RAND_MAX;
1015 v_prp.template get<0>(i) = 5.0 + (float)rand()/(float)RAND_MAX;
1016 v_prp.template get<1>(i)[0] = 10.0 + (float)rand()/(float)RAND_MAX;
1017 v_prp.template get<1>(i)[1] = 11.0 + (float)rand()/(float)RAND_MAX;
1018 v_prp.template get<2>(i)[0][0] = 40.0 + (float)rand()/(float)RAND_MAX;
1019 v_prp.template get<2>(i)[0][1] = 50.0 + (float)rand()/(float)RAND_MAX;
1020 v_prp.template get<2>(i)[0][2] = 60.0 + (float)rand()/(float)RAND_MAX;
1021 v_prp.template get<2>(i)[1][0] = 70.0 + (float)rand()/(float)RAND_MAX;
1022 v_prp.template get<2>(i)[1][1] = 80.0 + (float)rand()/(float)RAND_MAX;
1023 v_prp.template get<2>(i)[1][2] = 150.0 + (float)rand()/(float)RAND_MAX;
1024 v_prp.template get<2>(i)[2][0] = 160.0 + (float)rand()/(float)RAND_MAX;
1025 v_prp.template get<2>(i)[2][1] = 170.0 + (float)rand()/(float)RAND_MAX;
1026 v_prp.template get<2>(i)[2][2] = 340.0 + (float)rand()/(float)RAND_MAX;
1028 int seg = i / 10000;
1029 m_opart.template get<1>(i) = seg;
1030 m_opart.template get<0>(i) = (9999 - i%10000) + seg * 10000;
1036 for (
size_t i = 0 ; i < m_pos.
size() ; i++)
1038 m_pos.get(i).resize(10000);
1039 m_prp.get(i).resize(10000);
1042 v_pos.hostToDevice<0>();
1043 v_prp.hostToDevice<0,1,2>();
1045 m_opart.hostToDevice<0,1>();
1047 for (
size_t i = 0 ; i < m_pos.
size() ; i++)
1049 auto ite = m_pos.get(i).getGPUIterator();
1051 CUDA_LAUNCH_DIM3((process_map_particles<
decltype(m_opart.toKernel()),
decltype(m_pos.get(i).toKernel()),
decltype(m_prp.get(i).toKernel()),
1052 decltype(v_pos.toKernel()),
decltype(v_prp.toKernel())>),
1054 m_opart.toKernel(),m_pos.get(i).toKernel(), m_prp.get(i).toKernel(),
1055 v_pos.toKernel(),v_prp.toKernel(),offset);
1057 m_pos.get(i).deviceToHost<0>();
1058 m_prp.get(i).deviceToHost<0,1,2>();
1062 for (
size_t j = 0 ; j < m_pos.get(i).
size() ; j++)
1064 match &= (m_pos.get(i).template get<0>(j)[0] == v_pos.template get<0>(m_opart.template get<0>(offset+j))[0]);
1065 match &= (m_pos.get(i).template get<0>(j)[1] == v_pos.template get<0>(m_opart.template get<0>(offset+j))[1]);
1066 match &= (m_pos.get(i).template get<0>(j)[2] == v_pos.template get<0>(m_opart.template get<0>(offset+j))[2]);
1068 match &= (m_prp.get(i).template get<0>(j) == v_prp.template get<0>(m_opart.template get<0>(offset+j)));
1070 match &= (m_prp.get(i).template get<1>(j)[0] == v_prp.template get<1>(m_opart.template get<0>(offset+j))[0]);
1071 match &= (m_prp.get(i).template get<1>(j)[1] == v_prp.template get<1>(m_opart.template get<0>(offset+j))[1]);
1073 match &= (m_prp.get(i).template get<2>(j)[0][0] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[0][0]);
1074 match &= (m_prp.get(i).template get<2>(j)[0][1] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[0][1]);
1075 match &= (m_prp.get(i).template get<2>(j)[0][2] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[0][2]);
1076 match &= (m_prp.get(i).template get<2>(j)[1][0] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[1][0]);
1077 match &= (m_prp.get(i).template get<2>(j)[1][1] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[1][1]);
1078 match &= (m_prp.get(i).template get<2>(j)[1][2] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[1][2]);
1079 match &= (m_prp.get(i).template get<2>(j)[2][0] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[2][0]);
1080 match &= (m_prp.get(i).template get<2>(j)[2][1] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[2][1]);
1081 match &= (m_prp.get(i).template get<2>(j)[2][2] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[2][2]);
1084 BOOST_REQUIRE_EQUAL(match,
true);
1086 offset += m_pos.get(i).
size();
1090template<
unsigned int prp>
1091void vector_dist_remove_marked_type()
1093 auto & v_cl = create_vcluster();
1095 if (v_cl.size() > 16)
1104 size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
1110 auto it = vd.getDomainIterator();
1116 vd.getPos(p)[0] = (float)rand() / (float)RAND_MAX;
1117 vd.getPos(p)[1] = (float)rand() / (float)RAND_MAX;
1118 vd.getPos(p)[2] = (float)rand() / (float)RAND_MAX;
1124 vd.template ghost_get<>();
1126 it = vd.getDomainIterator();
1137 vd.template getProp<0>(p) = fc;
1138 vd.template getProp<1>(p) = dc;
1139 vd.template getProp<2>(p) = ic;
1140 vd.template getProp<3>(p) = sc;
1142 vd.template getProp<prp>(p) = (ic % 3 == 0);
1152 size_t sz = vd.size_local() - vd.size_local()/3;
1154 vd.template hostToDeviceProp<0,1,2,3>();
1156 remove_marked<prp>(vd);
1158 BOOST_REQUIRE_EQUAL(vd.size_local(),sz);
1160 vd.template deviceToHostProp<0,1,2,3>();
1162 auto it2 = vd.getDomainIterator();
1173 {test &= ((
int)vd.template getProp<0>(p) % 3 != 0);}
1176 {test &= ((
int)vd.template getProp<1>(p) % 3 != 0);}
1179 {test &= ((
int)vd.template getProp<2>(p) % 3 != 0);}
1182 {test &= ((
int)vd.template getProp<3>(p) % 3 != 0);}
1187 {std::cout << (
int)vd.template getProp<0>(p) << std::endl;}
1190 {std::cout << (
int)vd.template getProp<1>(p) << std::endl;}
1193 {std::cout << (
int)vd.template getProp<2>(p) << std::endl;}
1196 {std::cout << (
int)vd.template getProp<3>(p) << std::endl;}
1204 BOOST_REQUIRE_EQUAL(test,
true);
1209 size_t size_old = vd.size_local();
1212 vd.getPropVector().template fill<prp>(0);
1214 remove_marked<prp>(vd);
1216 BOOST_REQUIRE_EQUAL(vd.size_local(),size_old);
1219 vd.getPropVector().template fill<prp>(1);
1221 remove_marked<prp>(vd);
1223 BOOST_REQUIRE_EQUAL(vd.size_local(),0);
1226BOOST_AUTO_TEST_CASE(vector_dist_remove_marked)
1228 vector_dist_remove_marked_type<0>();
1229 vector_dist_remove_marked_type<1>();
1230 vector_dist_remove_marked_type<2>();
1231 vector_dist_remove_marked_type<3>();
1235BOOST_AUTO_TEST_CASE( vector_dist_particle_NN_MP_iteration_gpu )
1248 std::default_random_engine eg;
1249 eg.seed(v_cl.
rank()*4533);
1250 std::uniform_real_distribution<float> ud(-L,L);
1254 long int big_step = k / 4;
1255 big_step = (big_step == 0)?1:big_step;
1257 BOOST_TEST_CHECKPOINT(
"Testing 3D periodic vector symmetric cell-list k=" << k );
1262 size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
1264 float r_cut = 100.0;
1507BOOST_AUTO_TEST_SUITE_END()
This class represent an N-dimensional box.
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
__device__ __host__ T getHigh(int i) const
get the high interval of the box
This class decompose a space into sub-sub-domains and distribute them across processors.
virtual void * getDevicePointer()
get a readable pointer with the data
virtual void deviceToHost()
Move memory from device to host.
virtual void fill(unsigned char c)
fill the buffer with a byte
virtual void * getPointer()
get a readable pointer with the data
virtual bool allocate(size_t sz)
allocate memory
This class implement the point shape in an N-dimensional space.
This class represent an N-dimensional box.
size_t rank()
Get the process unit id.
size_t getProcessingUnits()
Get the total number of processors.
Implementation of VCluster class.
Grow policy define how the vector should grow every time we exceed the size.
Implementation of 1-D std::vector like structure.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Transform the boost::fusion::vector into memory specification (memory_traits)
This is a container to create a general object.