OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
vector_dist_cuda_func_test.cu
1#define BOOST_TEST_DYN_LINK
2
3#define TEST1
4
5#include <boost/test/unit_test.hpp>
6#include "VCluster/VCluster.hpp"
7#include "Vector/map_vector.hpp"
8#include "Vector/cuda/vector_dist_cuda_funcs.cuh"
9#include "Vector/util/vector_dist_funcs.hpp"
10#include "Decomposition/CartDecomposition.hpp"
11//#include "util/cuda/scan_cuda.cuh"
12#include "Vector/vector_dist.hpp"
13#include "util/cuda/scan_ofp.cuh"
14
15#define SUB_UNIT_FACTOR 1024
16
17BOOST_AUTO_TEST_SUITE( vector_dist_gpu_util_func_test )
18
19BOOST_AUTO_TEST_CASE( vector_ghost_process_local_particles )
20{
22
24 v_prp.resize(10000);
25
27 v_pos.resize(10000);
28
30
31 for (size_t i = 0 ; i < v_prp.size() ; i++)
32 {
33 v_pos.template get<0>(i)[0] = (float)rand()/(float)RAND_MAX;
34 v_pos.template get<0>(i)[1] = (float)rand()/(float)RAND_MAX;
35 v_pos.template get<0>(i)[2] = (float)rand()/(float)RAND_MAX;
36
37 v_prp.template get<0>(i) = i+12345;
38
39 v_prp.template get<1>(i)[0] = i;
40 v_prp.template get<1>(i)[1] = i+20000;
41 v_prp.template get<1>(i)[2] = i+50000;
42
43 v_prp.template get<2>(i)[0][0] = i+60000;
44 v_prp.template get<2>(i)[0][1] = i+70000;
45 v_prp.template get<2>(i)[0][2] = i+80000;
46 v_prp.template get<2>(i)[1][0] = i+90000;
47 v_prp.template get<2>(i)[1][1] = i+100000;
48 v_prp.template get<2>(i)[1][2] = i+110000;
49 v_prp.template get<2>(i)[2][0] = i+120000;
50 v_prp.template get<2>(i)[2][1] = i+130000;
51 v_prp.template get<2>(i)[2][1] = i+140000;
52 v_prp.template get<2>(i)[2][2] = i+150000;
53 }
54
57
58 box_f_dev.resize(4);
59 box_f_sv.resize(4);
60
61 box_f_dev.template get<0>(0)[0] = 0.0;
62 box_f_dev.template get<0>(0)[1] = 0.0;
63 box_f_dev.template get<0>(0)[2] = 0.0;
64 box_f_dev.template get<1>(0)[0] = 0.5;
65 box_f_dev.template get<1>(0)[1] = 1.0;
66 box_f_dev.template get<1>(0)[2] = 1.0;
67 box_f_sv.template get<0>(0) = 0;
68
69 box_f_dev.template get<0>(1)[0] = 0.0;
70 box_f_dev.template get<0>(1)[1] = 0.0;
71 box_f_dev.template get<0>(1)[2] = 0.0;
72 box_f_dev.template get<1>(1)[0] = 0.3;
73 box_f_dev.template get<1>(1)[1] = 1.0;
74 box_f_dev.template get<1>(1)[2] = 1.0;
75 box_f_sv.template get<0>(1) = 1;
76
77 box_f_dev.template get<0>(2)[0] = 0.0;
78 box_f_dev.template get<0>(2)[1] = 0.0;
79 box_f_dev.template get<0>(2)[2] = 0.0;
80 box_f_dev.template get<1>(2)[0] = 0.2;
81 box_f_dev.template get<1>(2)[1] = 1.0;
82 box_f_dev.template get<1>(2)[2] = 1.0;
83 box_f_sv.template get<0>(2) = 2;
84
85 box_f_dev.template get<0>(3)[0] = 0.0;
86 box_f_dev.template get<0>(3)[1] = 0.0;
87 box_f_dev.template get<0>(3)[2] = 0.0;
88 box_f_dev.template get<1>(3)[0] = 0.1;
89 box_f_dev.template get<1>(3)[1] = 1.0;
90 box_f_dev.template get<1>(3)[2] = 1.0;
91 box_f_sv.template get<0>(3) = 3;
92
93 // Label the internal (assigned) particles
94 auto ite = v_pos.getGPUIteratorTo(v_pos.size());
95
96 o_part_loc.resize(v_pos.size()+1);
97 o_part_loc.template get<0>(o_part_loc.size()-1) = 0;
98 o_part_loc.template hostToDevice<0>(o_part_loc.size()-1,o_part_loc.size()-1);
99
100 box_f_dev.hostToDevice<0,1>();
101 box_f_sv.hostToDevice<0>();
102 v_pos.hostToDevice<0>();
103 v_prp.hostToDevice<0,1,2>();
104
105 // label particle processor
106 CUDA_LAUNCH_DIM3((num_shift_ghost_each_part<3,float,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),decltype(v_pos.toKernel()),decltype(o_part_loc.toKernel())>),
107 ite.wthr,ite.thr,
108 box_f_dev.toKernel(),box_f_sv.toKernel(),v_pos.toKernel(),o_part_loc.toKernel(),v_pos.size());
109
110 o_part_loc.deviceToHost<0>();
111
112 bool match = true;
113
114 for (size_t i = 0 ; i < v_pos.size() ; i++)
115 {
116 if (v_pos.template get<0>(i)[0] >= 0.5)
117 {match &= o_part_loc.template get<0>(i) == 0;}
118 else if (v_pos.template get<0>(i)[0] >= 0.3)
119 {match &= o_part_loc.template get<0>(i) == 1;}
120 else if (v_pos.template get<0>(i)[0] >= 0.2)
121 {match &= o_part_loc.template get<0>(i) == 2;}
122 else if (v_pos.template get<0>(i)[0] >= 0.1)
123 {match &= o_part_loc.template get<0>(i) == 3;}
124 else
125 {match &= o_part_loc.template get<0>(i) == 4;}
126 }
127
128 BOOST_REQUIRE_EQUAL(match,true);
129
131 starts.resize(o_part_loc.size());
132
133 auto & v_cl = create_vcluster();
134 openfpm::scan((unsigned int *)o_part_loc.template getDeviceBuffer<0>(), o_part_loc.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getgpuContext());
135
136 starts.deviceToHost<0>(starts.size()-1,starts.size()-1);
137 size_t tot = starts.template get<0>(o_part_loc.size()-1);
138
140
141 shifts.resize(4);
142
143 shifts.template get<0>(0)[0] = 10.0;
144 shifts.template get<0>(0)[1] = 0.0;
145 shifts.template get<0>(0)[2] = 0.0;
146
147 shifts.template get<0>(1)[0] = 20.0;
148 shifts.template get<0>(1)[1] = 0.0;
149 shifts.template get<0>(1)[2] = 0.0;
150
151 shifts.template get<0>(2)[0] = 30.0;
152 shifts.template get<0>(2)[1] = 0.0;
153 shifts.template get<0>(2)[2] = 0.0;
154
155 shifts.template get<0>(3)[0] = 40.0;
156 shifts.template get<0>(3)[1] = 0.0;
157 shifts.template get<0>(3)[2] = 0.0;
158
159 size_t old = v_pos.size();
160 v_pos.resize(v_pos.size() + tot);
161 v_prp.resize(v_prp.size() + tot);
162
163 shifts.template hostToDevice<0>();
165 o_part_loc2.resize(tot);
166
167 CUDA_LAUNCH_DIM3((shift_ghost_each_part<3,float,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),
168 decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),
169 decltype(starts.toKernel()),decltype(shifts.toKernel()),
170 decltype(o_part_loc2.toKernel())>),
171 ite.wthr,ite.thr,
172 box_f_dev.toKernel(),box_f_sv.toKernel(),
173 v_pos.toKernel(),v_prp.toKernel(),
174 starts.toKernel(),shifts.toKernel(),o_part_loc2.toKernel(),old,old);
175
176 v_pos.deviceToHost<0>();
177 o_part_loc2.deviceToHost<0,1>();
178 v_prp.deviceToHost<0,1,2>();
179
180 size_t base = old;
181 size_t base_o = 0;
182 for (size_t i = 0 ; i < old ; i++)
183 {
184 if (v_pos.template get<0>(i)[0] >= 0.5)
185 {}
186 else if (v_pos.template get<0>(i)[0] >= 0.3)
187 {
188 for (size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
189 {
190 match &= v_pos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
191 match &= v_pos.template get<0>(base)[0] >= -(j+1.0)*10.0;
192
193 match &= o_part_loc2.template get<0>(base_o) == i;
194 match &= o_part_loc2.template get<1>(base_o) == j;
195
197
198 match &= v_prp.template get<0>(base) == v_prp.template get<0>(i);
199
200 match &= v_prp.template get<1>(base)[0] == v_prp.template get<1>(i)[0];
201 match &= v_prp.template get<1>(base)[1] == v_prp.template get<1>(i)[1];
202 match &= v_prp.template get<1>(base)[2] == v_prp.template get<1>(i)[2];
203
204 match &= v_prp.template get<2>(base)[0][0] == v_prp.template get<2>(i)[0][0];
205 match &= v_prp.template get<2>(base)[0][1] == v_prp.template get<2>(i)[0][1];
206 match &= v_prp.template get<2>(base)[0][2] == v_prp.template get<2>(i)[0][2];
207 match &= v_prp.template get<2>(base)[1][0] == v_prp.template get<2>(i)[1][0];
208 match &= v_prp.template get<2>(base)[1][1] == v_prp.template get<2>(i)[1][1];
209 match &= v_prp.template get<2>(base)[1][2] == v_prp.template get<2>(i)[1][2];
210 match &= v_prp.template get<2>(base)[2][0] == v_prp.template get<2>(i)[2][0];
211 match &= v_prp.template get<2>(base)[2][1] == v_prp.template get<2>(i)[2][1];
212 match &= v_prp.template get<2>(base)[2][2] == v_prp.template get<2>(i)[2][2];
213
214 base++;
215 base_o++;
216 }
217 }
218 else if (v_pos.template get<0>(i)[0] >= 0.2)
219 {
220 for (size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
221 {
222 match &= v_pos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
223 match &= v_pos.template get<0>(base)[0] >= -(j+1.0)*10.0;
224
225 match &= o_part_loc2.template get<0>(base_o) == i;
226 match &= o_part_loc2.template get<1>(base_o) == j;
227
229
230 match &= v_prp.template get<0>(base) == v_prp.template get<0>(i);
231
232 match &= v_prp.template get<1>(base)[0] == v_prp.template get<1>(i)[0];
233 match &= v_prp.template get<1>(base)[1] == v_prp.template get<1>(i)[1];
234 match &= v_prp.template get<1>(base)[2] == v_prp.template get<1>(i)[2];
235
236 match &= v_prp.template get<2>(base)[0][0] == v_prp.template get<2>(i)[0][0];
237 match &= v_prp.template get<2>(base)[0][1] == v_prp.template get<2>(i)[0][1];
238 match &= v_prp.template get<2>(base)[0][2] == v_prp.template get<2>(i)[0][2];
239 match &= v_prp.template get<2>(base)[1][0] == v_prp.template get<2>(i)[1][0];
240 match &= v_prp.template get<2>(base)[1][1] == v_prp.template get<2>(i)[1][1];
241 match &= v_prp.template get<2>(base)[1][2] == v_prp.template get<2>(i)[1][2];
242 match &= v_prp.template get<2>(base)[2][0] == v_prp.template get<2>(i)[2][0];
243 match &= v_prp.template get<2>(base)[2][1] == v_prp.template get<2>(i)[2][1];
244 match &= v_prp.template get<2>(base)[2][2] == v_prp.template get<2>(i)[2][2];
245
246
247 base++;
248 base_o++;
249 }
250 }
251 else if (v_pos.template get<0>(i)[0] >= 0.1)
252 {
253 for (size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
254 {
255 match &= v_pos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
256 match &= v_pos.template get<0>(base)[0] >= -(j+1.0)*10.0;
257
258 match &= o_part_loc2.template get<0>(base_o) == i;
259 match &= o_part_loc2.template get<1>(base_o) == j;
260
262
263 match &= v_prp.template get<0>(base) == v_prp.template get<0>(i);
264
265 match &= v_prp.template get<1>(base)[0] == v_prp.template get<1>(i)[0];
266 match &= v_prp.template get<1>(base)[1] == v_prp.template get<1>(i)[1];
267 match &= v_prp.template get<1>(base)[2] == v_prp.template get<1>(i)[2];
268
269 match &= v_prp.template get<2>(base)[0][0] == v_prp.template get<2>(i)[0][0];
270 match &= v_prp.template get<2>(base)[0][1] == v_prp.template get<2>(i)[0][1];
271 match &= v_prp.template get<2>(base)[0][2] == v_prp.template get<2>(i)[0][2];
272 match &= v_prp.template get<2>(base)[1][0] == v_prp.template get<2>(i)[1][0];
273 match &= v_prp.template get<2>(base)[1][1] == v_prp.template get<2>(i)[1][1];
274 match &= v_prp.template get<2>(base)[1][2] == v_prp.template get<2>(i)[1][2];
275 match &= v_prp.template get<2>(base)[2][0] == v_prp.template get<2>(i)[2][0];
276 match &= v_prp.template get<2>(base)[2][1] == v_prp.template get<2>(i)[2][1];
277 match &= v_prp.template get<2>(base)[2][2] == v_prp.template get<2>(i)[2][2];
278
279 base++;
280 base_o++;
281 }
282 }
283 else
284 {
285 for (size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
286 {
287 match &= v_pos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
288 match &= v_pos.template get<0>(base)[0] >= -(j+1.0)*10.0;
289
290 match &= o_part_loc2.template get<0>(base_o) == i;
291 match &= o_part_loc2.template get<1>(base_o) == j;
292
294
295 match &= v_prp.template get<0>(base) == v_prp.template get<0>(i);
296
297 match &= v_prp.template get<1>(base)[0] == v_prp.template get<1>(i)[0];
298 match &= v_prp.template get<1>(base)[1] == v_prp.template get<1>(i)[1];
299 match &= v_prp.template get<1>(base)[2] == v_prp.template get<1>(i)[2];
300
301 match &= v_prp.template get<2>(base)[0][0] == v_prp.template get<2>(i)[0][0];
302 match &= v_prp.template get<2>(base)[0][1] == v_prp.template get<2>(i)[0][1];
303 match &= v_prp.template get<2>(base)[0][2] == v_prp.template get<2>(i)[0][2];
304 match &= v_prp.template get<2>(base)[1][0] == v_prp.template get<2>(i)[1][0];
305 match &= v_prp.template get<2>(base)[1][1] == v_prp.template get<2>(i)[1][1];
306 match &= v_prp.template get<2>(base)[1][2] == v_prp.template get<2>(i)[1][2];
307 match &= v_prp.template get<2>(base)[2][0] == v_prp.template get<2>(i)[2][0];
308 match &= v_prp.template get<2>(base)[2][1] == v_prp.template get<2>(i)[2][1];
309 match &= v_prp.template get<2>(base)[2][2] == v_prp.template get<2>(i)[2][2];
310
311 base++;
312 base_o++;
313 }
314 }
315 }
316
317 BOOST_REQUIRE_EQUAL(match,true);
318
320
323
324 v_pos2.resize(old);
325 v_prp2.resize(old);
326
327 for (size_t i = 0 ; i < old ; i++)
328 {
329 v_pos2.template get<0>(i)[0] = v_pos.template get<0>(i)[0];
330 v_pos2.template get<0>(i)[1] = v_pos.template get<0>(i)[1];
331 v_pos2.template get<0>(i)[2] = v_pos.template get<0>(i)[2];
332
333 v_prp2.template get<0>(i) = v_prp.template get<0>(i);
334
335 v_prp2.template get<1>(i)[0] = v_prp.template get<1>(i)[0];
336 v_prp2.template get<1>(i)[1] = v_prp.template get<1>(i)[1];
337 v_prp2.template get<1>(i)[2] = v_prp.template get<1>(i)[2];
338
339 v_prp2.template get<2>(i)[0][0] = v_prp.template get<2>(i)[0][0];
340 v_prp2.template get<2>(i)[0][1] = v_prp.template get<2>(i)[0][1];
341 v_prp2.template get<2>(i)[0][2] = v_prp.template get<2>(i)[0][2];
342 v_prp2.template get<2>(i)[1][0] = v_prp.template get<2>(i)[1][0];
343 v_prp2.template get<2>(i)[1][1] = v_prp.template get<2>(i)[1][1];
344 v_prp2.template get<2>(i)[1][2] = v_prp.template get<2>(i)[1][2];
345 v_prp2.template get<2>(i)[2][0] = v_prp.template get<2>(i)[2][0];
346 v_prp2.template get<2>(i)[2][1] = v_prp.template get<2>(i)[2][1];
347 v_prp2.template get<2>(i)[2][2] = v_prp.template get<2>(i)[2][2];
348 }
349
350 v_pos2.resize(v_pos.size());
351 v_prp2.resize(v_prp.size());
352
353 v_pos2.hostToDevice<0>();
354 v_prp2.hostToDevice<0,1,2>();
355
356 ite = o_part_loc2.getGPUIterator();
357
358 CUDA_LAUNCH_DIM3((process_ghost_particles_local<true,3,decltype(o_part_loc2.toKernel()),decltype(v_pos2.toKernel()),decltype(v_prp2.toKernel()),decltype(shifts.toKernel())>),
359 ite.wthr,ite.thr,
360 o_part_loc2.toKernel(),v_pos2.toKernel(),v_prp2.toKernel(),shifts.toKernel(),old);
361
362 v_pos2.template deviceToHost<0>();
363 v_prp2.template deviceToHost<0,1,2>();
364
365 for (size_t i = old ; i < v_pos.size() ; i++)
366 {
367 match &= v_pos.template get<0>(i)[0] == v_pos2.template get<0>(i)[0];
368 match &= v_pos.template get<0>(i)[1] == v_pos2.template get<0>(i)[1];
369 match &= v_pos.template get<0>(i)[2] == v_pos2.template get<0>(i)[2];
370
371 match &= v_prp2.template get<0>(i) == v_prp.template get<0>(i);
372
373 match &= v_prp2.template get<1>(i)[0] == v_prp.template get<1>(i)[0];
374 match &= v_prp2.template get<1>(i)[1] == v_prp.template get<1>(i)[1];
375 match &= v_prp2.template get<1>(i)[2] == v_prp.template get<1>(i)[2];
376
377 match &= v_prp2.template get<2>(i)[0][0] == v_prp.template get<2>(i)[0][0];
378 match &= v_prp2.template get<2>(i)[0][1] == v_prp.template get<2>(i)[0][1];
379 match &= v_prp2.template get<2>(i)[0][2] == v_prp.template get<2>(i)[0][2];
380 match &= v_prp2.template get<2>(i)[1][0] == v_prp.template get<2>(i)[1][0];
381 match &= v_prp2.template get<2>(i)[1][1] == v_prp.template get<2>(i)[1][1];
382 match &= v_prp2.template get<2>(i)[1][2] == v_prp.template get<2>(i)[1][2];
383 match &= v_prp2.template get<2>(i)[2][0] == v_prp.template get<2>(i)[2][0];
384 match &= v_prp2.template get<2>(i)[2][1] == v_prp.template get<2>(i)[2][1];
385 match &= v_prp2.template get<2>(i)[2][2] == v_prp.template get<2>(i)[2][2];
386 }
387
388 BOOST_REQUIRE_EQUAL(match,true);
389}
390
391BOOST_AUTO_TEST_CASE( vector_ghost_fill_send_buffer_test )
392{
394
395 // Sending property object
397
398 // send vector for each processor
400
402
403 auto & v_cl = create_vcluster();
404
405 // Vcluster
406 Vcluster<> & vcl = create_vcluster();
407
409 v_prp.resize(10000);
410
412
413 for (size_t i = 0 ; i < v_prp.size() ; i++)
414 {
415 v_prp.template get<0>(i) = i+12345;
416
417 v_prp.template get<1>(i)[0] = i;
418 v_prp.template get<1>(i)[1] = i+20000;
419 v_prp.template get<1>(i)[2] = i+50000;
420
421 v_prp.template get<2>(i)[0][0] = i+60000;
422 v_prp.template get<2>(i)[0][1] = i+70000;
423 v_prp.template get<2>(i)[0][2] = i+80000;
424 v_prp.template get<2>(i)[1][0] = i+90000;
425 v_prp.template get<2>(i)[1][1] = i+100000;
426 v_prp.template get<2>(i)[1][2] = i+110000;
427 v_prp.template get<2>(i)[2][0] = i+120000;
428 v_prp.template get<2>(i)[2][1] = i+130000;
429 v_prp.template get<2>(i)[2][2] = i+140000;
430 }
431
432 v_prp.hostToDevice<0,1,2>();
433
434 g_opart_device.resize(2*10000*3);
435
436 for (size_t i = 0 ; i < 3 ; i++)
437 {
438 for (size_t j = 0 ; j < 10000 ; j++)
439 {
440 g_opart_device.template get<0>(i*2*10000 + j*2) = i;
441 g_opart_device.template get<0>(i*2*10000 + j*2+1) = i;
442
443 g_opart_device.template get<1>(i*2*10000 + j*2) = j;
444 g_opart_device.template get<1>(i*2*10000 + j*2+1) = j;
445
446 g_opart_device.template get<2>(i*2*10000 + j*2) = 0;
447 g_opart_device.template get<2>(i*2*10000 + j*2+1) = 0;
448 }
449 }
450
451 g_opart_device.hostToDevice<0,1,2>();
452
453 g_send_prp.resize(3);
454
455 bool match = true;
456 size_t offset = 0;
457
458 for (size_t i = 0 ; i < 3 ; i++)
459 {
460 g_send_prp.get(i).resize(2*10000);
461
462 auto ite = g_send_prp.get(i).getGPUIterator();
463
464 CUDA_LAUNCH_DIM3((process_ghost_particles_prp<decltype(g_opart_device.toKernel()),decltype(g_send_prp.get(i).toKernel()),decltype(v_prp.toKernel()),0,1,2>),
465 ite.wthr,ite.thr,
466 g_opart_device.toKernel(), g_send_prp.get(i).toKernel(),
467 v_prp.toKernel(),offset);
468
469 offset += g_send_prp.get(i).size();
470
472
473 g_send_prp.get(i).deviceToHost<0,1,2>();
474
475 for (size_t j = 0 ; j < 10000 ; j++)
476 {
477 match &= g_send_prp.get(i).template get<0>(2*j) == j+12345;
478
479 match &= g_send_prp.get(i).template get<1>(2*j)[0] == j;
480 match &= g_send_prp.get(i).template get<1>(2*j)[1] == j+20000;
481 match &= g_send_prp.get(i).template get<1>(2*j)[2] == j+50000;
482
483 match &= g_send_prp.get(i).template get<2>(2*j)[0][0] == j+60000;
484 match &= g_send_prp.get(i).template get<2>(2*j)[0][1] == j+70000;
485 match &= g_send_prp.get(i).template get<2>(2*j)[0][2] == j+80000;
486 match &= g_send_prp.get(i).template get<2>(2*j)[1][0] == j+90000;
487 match &= g_send_prp.get(i).template get<2>(2*j)[1][1] == j+100000;
488 match &= g_send_prp.get(i).template get<2>(2*j)[1][2] == j+110000;
489 match &= g_send_prp.get(i).template get<2>(2*j)[2][0] == j+120000;
490 match &= g_send_prp.get(i).template get<2>(2*j)[2][1] == j+130000;
491 match &= g_send_prp.get(i).template get<2>(2*j)[2][2] == j+140000;
492
493
494 match = g_send_prp.get(i).template get<0>(2*j+1) == j+12345;
495
496 match = g_send_prp.get(i).template get<1>(2*j+1)[0] == j;
497 match = g_send_prp.get(i).template get<1>(2*j+1)[1] == j+20000;
498 match = g_send_prp.get(i).template get<1>(2*j+1)[2] == j+50000;
499
500 match = g_send_prp.get(i).template get<2>(2*j+1)[0][0] == j+60000;
501 match = g_send_prp.get(i).template get<2>(2*j+1)[0][1] == j+70000;
502 match = g_send_prp.get(i).template get<2>(2*j+1)[0][2] == j+80000;
503 match = g_send_prp.get(i).template get<2>(2*j+1)[1][0] == j+90000;
504 match = g_send_prp.get(i).template get<2>(2*j+1)[1][1] == j+100000;
505 match = g_send_prp.get(i).template get<2>(2*j+1)[1][2] == j+110000;
506 match = g_send_prp.get(i).template get<2>(2*j+1)[2][0] == j+120000;
507 match = g_send_prp.get(i).template get<2>(2*j+1)[2][1] == j+130000;
508 match = g_send_prp.get(i).template get<2>(2*j+1)[2][2] == j+140000;
509 }
510 }
511
512 BOOST_REQUIRE_EQUAL(match,true);
513}
514
515BOOST_AUTO_TEST_CASE( decomposition_ie_ghost_gpu_test_use )
516{
517 auto & v_cl = create_vcluster();
518
519 // Vcluster
520 Vcluster<> & vcl = create_vcluster();
521
523
524 dec_type dec(vcl);
525
526 // Physical domain
527 Box<3, float> box( { 0.0, 0.0, 0.0 }, { 1.0, 1.0, 1.0 });
528 size_t div[3];
529
530 // Get the number of processor and calculate the number of sub-domain
531 // for each processor (SUB_UNIT_FACTOR=64)
532 size_t n_proc = vcl.getProcessingUnits();
533 size_t n_sub = n_proc * SUB_UNIT_FACTOR;
534
535 // Set the number of sub-domains on each dimension (in a scalable way)
536 for (int i = 0; i < 3; i++)
537 { div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));}
538
539 // Define ghost
540 Ghost<3, float> g(0.1);
541
542 // Boundary conditions
543 size_t bc[] = { PERIODIC, PERIODIC, PERIODIC };
544
545 // Decompose
546 dec.setParameters(div,box,bc,g);
547 dec.decompose();
548
549 // Get the local boxes
550
551 int nsub = dec.getNSubDomain();
552 int n_part = 10000 / nsub;
553
555 vg.resize(nsub*n_part);
556
557 for (size_t k = 0 ; k < nsub ; k++)
558 {
559 SpaceBox<3,float> sp = dec.getSubDomain(k);
560
561 for (size_t j = 0 ; j < n_part ; j++)
562 {
563 vg.template get<0>(k*n_part+j)[0] = (sp.getHigh(0) - sp.getLow(0))*((float)rand()/(float)RAND_MAX) + sp.getLow(0);
564 vg.template get<0>(k*n_part+j)[1] = (sp.getHigh(1) - sp.getLow(1))*((float)rand()/(float)RAND_MAX) + sp.getLow(1);
565 vg.template get<0>(k*n_part+j)[2] = (sp.getHigh(2) - sp.getLow(2))*((float)rand()/(float)RAND_MAX) + sp.getLow(2);
566 }
567 }
568
569 vg.hostToDevice<0>();
570
571 // process on GPU the processor ID for each particles
572
573 auto ite = vg.getGPUIterator();
574
576 proc_id_out.resize(vg.size()+1);
577 proc_id_out.template get<0>(proc_id_out.size()-1) = 0;
578 proc_id_out.template hostToDevice(proc_id_out.size()-1,proc_id_out.size()-1);
579
580 CUDA_LAUNCH_DIM3((num_proc_ghost_each_part<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel())>),
581 ite.wthr,ite.thr,
582 dec.toKernel(),vg.toKernel(),proc_id_out.toKernel());
583
584/* proc_id_out.deviceToHost<0>();
585
586 bool match = true;
587 for (size_t i = 0 ; i < vg.size() ; i++)
588 {
589 Point<3,float> xp = vg.template get<0>(i);
590
591 match &= proc_id_out.template get<0>(i) == dec.ghost_processorID_N(xp);
592
593 const openfpm::vector<std::pair<size_t, size_t>> & vp_id = dec.template ghost_processorID_pair<typename dec_type::lc_processor_id, typename dec_type::shift_id>(xp, UNIQUE);
594
595 match &= proc_id_out.template get<0>(i) == vp_id.size();
596 }
597
598
599 BOOST_REQUIRE_EQUAL(match,true);
600
602
603 openfpm::vector<aggregate<unsigned int>,
604 CudaMemory,
605 memory_traits_inte> starts;
606
607 starts.resize(proc_id_out.size());
608
609 openfpm::scan((unsigned int *)proc_id_out.template getDeviceBuffer<0>(),proc_id_out.size(),(unsigned int *)starts.template getDeviceBuffer<0>(),v_cl.getgpuContext());
610
611 starts.deviceToHost<0>(starts.size()-1,starts.size()-1);
612
613 size_t sz = starts.template get<0>(starts.size()-1);
614
616
617 openfpm::vector<aggregate<unsigned int,long unsigned int>,
618 CudaMemory,
619 memory_traits_inte> output;
620
621 output.resize(sz);
622
623 ite = vg.getGPUIterator();
624
625 // we compute processor id for each particle
626 CUDA_LAUNCH_DIM3((proc_label_id_ghost<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(starts.toKernel()),decltype(output.toKernel())>),
627 ite.wthr,ite.thr,
628 dec.toKernel(),vg.toKernel(),starts.toKernel(),output.toKernel());
629
630 output.template deviceToHost<0,1>();
631
633
634 starts.deviceToHost<0>();
635
636 match = true;
637
638 for (size_t i = 0 ; i < starts.size() - 1 ; i++)
639 {
640 size_t base = starts.template get<0>(i);
641 size_t sz = starts.template get<0>(i+1) - base;
642
643 if (sz != 0)
644 {
645 size_t pid = output.template get<1>(base) & 0xFFFFFFFF;
646 Point<3,float> xp = vg.template get<0>(pid);
647
648 openfpm::vector<proc_box_id> tmp_sort1;
649 openfpm::vector<proc_box_id> tmp_sort2;
650
651 const openfpm::vector<std::pair<size_t, size_t>> & vp_id = dec.template ghost_processorID_pair<typename dec_type::lc_processor_id, typename dec_type::shift_id>(xp, UNIQUE);
652
653 tmp_sort1.resize(vp_id.size());
654
655 for (size_t j = 0 ; j < vp_id.size() ; j++)
656 {
657 tmp_sort1.get(j).proc_id = dec.IDtoProc(vp_id.get(j).first);
658 tmp_sort1.get(j).box_id = 0;
659 tmp_sort1.get(j).shift_id = vp_id.get(j).second;
660 }
661
662 tmp_sort1.sort();
663
664 tmp_sort2.resize(sz);
665 for (size_t j = 0 ; j < sz ; j++)
666 {
667 tmp_sort2.get(j).proc_id = output.template get<0>(base+j);
668 tmp_sort2.get(j).box_id = 0;
669 tmp_sort2.get(j).shift_id = output.template get<1>(base+j) >> 32;
670 }
671
672 tmp_sort2.sort();
673
674 match &= tmp_sort1.size() == tmp_sort2.size();
675
676 for (size_t j = 0 ; j < tmp_sort1.size() ; j++)
677 {
678 match &= tmp_sort1.get(j).proc_id == tmp_sort2.get(j).proc_id;
679 match &= tmp_sort1.get(j).shift_id == tmp_sort2.get(j).shift_id;
680 }
681 }
682 }
683
684 BOOST_REQUIRE_EQUAL(match,true);*/
685}
686
687BOOST_AUTO_TEST_CASE( decomposition_to_gpu_test_use )
688{
689 auto & v_cl = create_vcluster();
690
691 // Vcluster
692 Vcluster<> & vcl = create_vcluster();
693
695
696 // Physical domain
697 Box<3, float> box( { 0.0, 0.0, 0.0 }, { 1.0, 1.0, 1.0 });
698 size_t div[3];
699
700 // Get the number of processor and calculate the number of sub-domain
701 // for each processor (SUB_UNIT_FACTOR=64)
702 size_t n_proc = vcl.getProcessingUnits();
703 size_t n_sub = n_proc * SUB_UNIT_FACTOR;
704
705 // Set the number of sub-domains on each dimension (in a scalable way)
706 for (int i = 0; i < 3; i++)
707 { div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));}
708
709 // Define ghost
710 Ghost<3, float> g(0.01);
711
712 // Boundary conditions
713 size_t bc[] = { PERIODIC, PERIODIC, PERIODIC };
714
715 // Decompose
716 dec.setParameters(div,box,bc,g);
717 dec.decompose();
718
720 vg.resize(10000);
721
722 for (size_t i = 0 ; i < 10000 ; i++)
723 {
724 vg.template get<0>(i)[0] = (float)rand()/(float)RAND_MAX;
725 vg.template get<0>(i)[1] = (float)rand()/(float)RAND_MAX;
726 vg.template get<0>(i)[2] = (float)rand()/(float)RAND_MAX;
727 }
728
729 vg.hostToDevice<0>();
730
731 // process on GPU the processor ID for each particles
732
733 auto ite = vg.getGPUIterator();
734
736 proc_id_out.resize(vg.size());
737
739 dev_counter.resize(v_cl.size());
740 dev_counter.fill<0>(0);
741 dev_counter.fill<1>(0);
742 dev_counter.fill<2>(0);
743
744 CUDA_LAUNCH_DIM3((process_id_proc_each_part<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel()),decltype(dev_counter.toKernel())>),
745 ite.wthr,ite.thr,
746 dec.toKernel(),vg.toKernel(),proc_id_out.toKernel(),dev_counter.toKernel(),v_cl.rank());
747
748
749 proc_id_out.deviceToHost<0>();
750
751 bool match = true;
752 for (size_t i = 0 ; i < proc_id_out.size() ; i++)
753 {
754 Point<3,float> xp = vg.template get<0>(i);
755
756 match &= proc_id_out.template get<0>(i) == dec.processorIDBC(xp);
757 }
758}
759
760
761BOOST_AUTO_TEST_CASE( vector_dist_gpu_find_buffer_offsets_test )
762{
765
766 vgp.resize(200000);
767
768 for (size_t k = 0 ; k < vgp.size() ; k++)
769 {
770 vgp.template get<0>(k) = k / 1000;
771 vgp.template get<1>(k) = k / 1000;
772 }
773
774 offs.resize(220);
775
776 CudaMemory mem;
777 mem.allocate(sizeof(int));
778 mem.fill(0);
779
780 auto ite = vgp.getGPUIterator();
781 vgp.hostToDevice<0,1>();
782
783 CUDA_LAUNCH((find_buffer_offsets<1,decltype(vgp.toKernel()),decltype(offs.toKernel())>),ite,vgp.toKernel(),(int *)mem.getDevicePointer(),offs.toKernel());
784
785 offs.template deviceToHost<0,1>();
786
787 mem.deviceToHost();
788 int n_ele = *(int *)mem.getPointer();
789 BOOST_REQUIRE_EQUAL(n_ele,199);
790
793
794 for (size_t i = 0 ; i < n_ele ; i++)
795 {
796 ofv.add(offs.template get<0>(i));
797 ofv2.add(offs.template get<1>(i));
798 }
799
800 ofv.sort();
801 ofv2.sort();
802
803 for (size_t i = 0 ; i < ofv.size() ; i++)
804 {
805 BOOST_REQUIRE_EQUAL(ofv.get(i),(i+1)*1000);
806 BOOST_REQUIRE_EQUAL(ofv2.get(i),i);
807 }
808}
809
810BOOST_AUTO_TEST_CASE(vector_dist_reorder_lbl)
811{
814
815 lbl_p.resize(100);
816 starts.resize(10);
817
818 for (int i = 0 ; i < 10 ; i++) // <------ particle id
819 {
820 for (int j = 0 ; j < 10 ; j++) // <----- processor
821 {
822 lbl_p.template get<2>(i*10+j) = i;
823 lbl_p.template get<1>(i*10+j) = j;
824 }
825 starts.template get<0>(i) = (i*10);
826 }
827
828 // move lbl and starts to gpu
829 starts.template hostToDevice<0>();
830 lbl_p.template hostToDevice<1,2>();
831
832 auto ite = lbl_p.getGPUIterator();
833
834 CUDA_LAUNCH_DIM3((reorder_lbl<decltype(lbl_p.toKernel()),decltype(starts.toKernel())>),ite.wthr,ite.thr,lbl_p.toKernel(),starts.toKernel());
835
836 starts.template deviceToHost<0>();
837 lbl_p.template deviceToHost<0,1,2>();
838
839 for (int i = 0 ; i < 10 ; i++) // <------ particle id
840 {
841 for (int j = 0 ; j < 10 ; j++) // <----- processor
842 {
843 BOOST_REQUIRE_EQUAL(lbl_p.template get<0>(j*10+i),i*10+j);
844 }
845 }
846}
847
848BOOST_AUTO_TEST_CASE(vector_dist_merge_sort)
849{
852
855
857
858 v_prp.resize(10000);
859 v_pos.resize(10000);
860 v_prp_out.resize(10000);
861 v_pos_out.resize(10000);
862 ns_to_s.resize(10000);
863
864 for (int i = 0 ; i < 10000 ; i++) // <------ particle id
865 {
866 v_pos_out.template get<0>(i)[0] = i;
867 v_pos_out.template get<0>(i)[1] = i+10000;
868 v_pos_out.template get<0>(i)[2] = i+20000;
869
870 v_pos.template get<0>(i)[0] = 0;
871 v_pos.template get<0>(i)[1] = 0;
872 v_pos.template get<0>(i)[2] = 0;
873
874 v_prp_out.template get<0>(i)[0] = i+60123;
875 v_prp_out.template get<0>(i)[1] = i+73543;
876 v_prp_out.template get<0>(i)[2] = i+82432;
877
878 v_prp_out.template get<1>(i)[0] = i+80123;
879 v_prp_out.template get<1>(i)[1] = i+93543;
880 v_prp_out.template get<1>(i)[2] = i+102432;
881
882 v_prp_out.template get<2>(i)[0] = i+110123;
883 v_prp_out.template get<2>(i)[1] = i+123543;
884 v_prp_out.template get<2>(i)[2] = i+132432;
885
886 v_prp.template get<0>(i)[0] = 0;
887 v_prp.template get<0>(i)[1] = 0;
888 v_prp.template get<0>(i)[2] = 0;
889
890 v_prp.template get<1>(i)[0] = 0;
891 v_prp.template get<1>(i)[1] = 0;
892 v_prp.template get<1>(i)[2] = 0;
893
894 v_prp.template get<2>(i)[0] = 0;
895 v_prp.template get<2>(i)[1] = 0;
896 v_prp.template get<2>(i)[2] = 0;
897
898 ns_to_s.template get<0>(i) = 10000-i-1;
899 }
900
901 v_prp.template hostToDevice<0,1,2>();
902 v_prp_out.template hostToDevice<0,1,2>();
903 v_pos.template hostToDevice<0>();
904 v_pos_out.template hostToDevice<0>();
905 ns_to_s.template hostToDevice<0>();
906
907 auto ite = v_pos.getGPUIterator();
908
909 CUDA_LAUNCH_DIM3((merge_sort_part<false,decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(ns_to_s.toKernel()),0>),ite.wthr,ite.thr,v_pos.toKernel(),v_prp.toKernel(),
910 v_pos_out.toKernel(),v_prp_out.toKernel(),
911 ns_to_s.toKernel());
912
913 v_prp.template deviceToHost<0,1,2>();
914
915 bool match = true;
916 for (int i = 0 ; i < 10000 ; i++) // <------ particle id
917 {
918 match &= v_prp_out.template get<0>(10000-i-1)[0] == v_prp.template get<0>(i)[0];
919 match &= v_prp_out.template get<0>(10000-i-1)[1] == v_prp.template get<0>(i)[1];
920 match &= v_prp_out.template get<0>(10000-i-1)[2] == v_prp.template get<0>(i)[2];
921
922 match &= v_prp.template get<1>(10000-i-1)[0] == 0;
923 match &= v_prp.template get<1>(10000-i-1)[1] == 0;
924 match &= v_prp.template get<1>(10000-i-1)[2] == 0;
925
926 match &= v_prp.template get<2>(10000-i-1)[0] == 0;
927 match &= v_prp.template get<2>(10000-i-1)[1] == 0;
928 match &= v_prp.template get<2>(10000-i-1)[2] == 0;
929 }
930
931 BOOST_REQUIRE_EQUAL(match,true);
932
933 CUDA_LAUNCH_DIM3((merge_sort_part<false,decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(ns_to_s.toKernel()),1,2>),ite.wthr,ite.thr,v_pos.toKernel(),v_prp.toKernel(),
934 v_pos_out.toKernel(),v_prp_out.toKernel(),
935 ns_to_s.toKernel());
936
937 v_prp.template deviceToHost<0,1,2>();
938 v_pos.template deviceToHost<0>();
939
940 for (int i = 0 ; i < 10000 ; i++) // <------ particle id
941 {
942 match &= v_prp_out.template get<0>(10000-i-1)[0] == v_prp.template get<0>(i)[0];
943 match &= v_prp_out.template get<0>(10000-i-1)[1] == v_prp.template get<0>(i)[1];
944 match &= v_prp_out.template get<0>(10000-i-1)[2] == v_prp.template get<0>(i)[2];
945
946 match &= v_prp_out.template get<1>(10000-i-1)[0] == v_prp.template get<1>(i)[0];
947 match &= v_prp_out.template get<1>(10000-i-1)[1] == v_prp.template get<1>(i)[1];
948 match &= v_prp_out.template get<1>(10000-i-1)[2] == v_prp.template get<1>(i)[2];
949
950 match &= v_prp_out.template get<2>(10000-i-1)[0] == v_prp.template get<2>(i)[0];
951 match &= v_prp_out.template get<2>(10000-i-1)[1] == v_prp.template get<2>(i)[1];
952 match &= v_prp_out.template get<2>(10000-i-1)[2] == v_prp.template get<2>(i)[2];
953
954 match &= v_pos.template get<0>(10000-i-1)[0] == 0;
955 match &= v_pos.template get<0>(10000-i-1)[1] == 0;
956 match &= v_pos.template get<0>(10000-i-1)[2] == 0;
957 }
958
959 BOOST_REQUIRE_EQUAL(match,true);
960
961 CUDA_LAUNCH_DIM3((merge_sort_part<true,decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(ns_to_s.toKernel())>),ite.wthr,ite.thr,v_pos.toKernel(),v_prp.toKernel(),
962 v_pos_out.toKernel(),v_prp_out.toKernel(),
963 ns_to_s.toKernel());
964
965 v_prp.template deviceToHost<0,1,2>();
966 v_pos.template deviceToHost<0>();
967
968 for (int i = 0 ; i < 10000 ; i++) // <------ particle id
969 {
970
971
972 match &= v_prp_out.template get<0>(10000-i-1)[0] == v_prp.template get<0>(i)[0];
973 match &= v_prp_out.template get<0>(10000-i-1)[1] == v_prp.template get<0>(i)[1];
974 match &= v_prp_out.template get<0>(10000-i-1)[2] == v_prp.template get<0>(i)[2];
975
976 match &= v_prp_out.template get<1>(10000-i-1)[0] == v_prp.template get<1>(i)[0];
977 match &= v_prp_out.template get<1>(10000-i-1)[1] == v_prp.template get<1>(i)[1];
978 match &= v_prp_out.template get<1>(10000-i-1)[2] == v_prp.template get<1>(i)[2];
979
980 match &= v_prp_out.template get<2>(10000-i-1)[0] == v_prp.template get<2>(i)[0];
981 match &= v_prp_out.template get<2>(10000-i-1)[1] == v_prp.template get<2>(i)[1];
982 match &= v_prp_out.template get<2>(10000-i-1)[2] == v_prp.template get<2>(i)[2];
983
984
985 match &= v_pos_out.template get<0>(10000-i-1)[0] == v_pos.template get<0>(i)[0];
986 match &= v_pos_out.template get<0>(10000-i-1)[1] == v_pos.template get<0>(i)[1];
987 match &= v_pos_out.template get<0>(10000-i-1)[2] == v_pos.template get<0>(i)[2];
988 }
989
990 BOOST_REQUIRE_EQUAL(match,true);
991}
992
993BOOST_AUTO_TEST_CASE(vector_dist_gpu_map_fill_send_buffer_test)
994{
996
999
1002
1003 unsigned int offset = 0;
1004
1005 v_pos.resize(100000);
1006 v_prp.resize(v_pos.size());
1007 m_opart.resize(v_pos.size());
1008
1009 for (size_t i = 0 ; i < v_pos.size() ; i++)
1010 {
1011 v_pos.template get<0>(i)[0] = (float)rand()/(float)RAND_MAX;
1012 v_pos.template get<0>(i)[1] = (float)rand()/(float)RAND_MAX;
1013 v_pos.template get<0>(i)[2] = (float)rand()/(float)RAND_MAX;
1014
1015 v_prp.template get<0>(i) = 5.0 + (float)rand()/(float)RAND_MAX;
1016 v_prp.template get<1>(i)[0] = 10.0 + (float)rand()/(float)RAND_MAX;
1017 v_prp.template get<1>(i)[1] = 11.0 + (float)rand()/(float)RAND_MAX;
1018 v_prp.template get<2>(i)[0][0] = 40.0 + (float)rand()/(float)RAND_MAX;
1019 v_prp.template get<2>(i)[0][1] = 50.0 + (float)rand()/(float)RAND_MAX;
1020 v_prp.template get<2>(i)[0][2] = 60.0 + (float)rand()/(float)RAND_MAX;
1021 v_prp.template get<2>(i)[1][0] = 70.0 + (float)rand()/(float)RAND_MAX;
1022 v_prp.template get<2>(i)[1][1] = 80.0 + (float)rand()/(float)RAND_MAX;
1023 v_prp.template get<2>(i)[1][2] = 150.0 + (float)rand()/(float)RAND_MAX;
1024 v_prp.template get<2>(i)[2][0] = 160.0 + (float)rand()/(float)RAND_MAX;
1025 v_prp.template get<2>(i)[2][1] = 170.0 + (float)rand()/(float)RAND_MAX;
1026 v_prp.template get<2>(i)[2][2] = 340.0 + (float)rand()/(float)RAND_MAX;
1027
1028 int seg = i / 10000;
1029 m_opart.template get<1>(i) = seg;
1030 m_opart.template get<0>(i) = (9999 - i%10000) + seg * 10000;
1031 }
1032
1033 m_pos.resize(10);
1034 m_prp.resize(10);
1035
1036 for (size_t i = 0 ; i < m_pos.size() ; i++)
1037 {
1038 m_pos.get(i).resize(10000);
1039 m_prp.get(i).resize(10000);
1040 }
1041
1042 v_pos.hostToDevice<0>();
1043 v_prp.hostToDevice<0,1,2>();
1044
1045 m_opart.hostToDevice<0,1>();
1046
1047 for (size_t i = 0 ; i < m_pos.size() ; i++)
1048 {
1049 auto ite = m_pos.get(i).getGPUIterator();
1050
1051 CUDA_LAUNCH_DIM3((process_map_particles<decltype(m_opart.toKernel()),decltype(m_pos.get(i).toKernel()),decltype(m_prp.get(i).toKernel()),
1052 decltype(v_pos.toKernel()),decltype(v_prp.toKernel())>),
1053 ite.wthr,ite.thr,
1054 m_opart.toKernel(),m_pos.get(i).toKernel(), m_prp.get(i).toKernel(),
1055 v_pos.toKernel(),v_prp.toKernel(),offset);
1056
1057 m_pos.get(i).deviceToHost<0>();
1058 m_prp.get(i).deviceToHost<0,1,2>();
1059
1060 bool match = true;
1061
1062 for (size_t j = 0 ; j < m_pos.get(i).size() ; j++)
1063 {
1064 match &= (m_pos.get(i).template get<0>(j)[0] == v_pos.template get<0>(m_opart.template get<0>(offset+j))[0]);
1065 match &= (m_pos.get(i).template get<0>(j)[1] == v_pos.template get<0>(m_opart.template get<0>(offset+j))[1]);
1066 match &= (m_pos.get(i).template get<0>(j)[2] == v_pos.template get<0>(m_opart.template get<0>(offset+j))[2]);
1067
1068 match &= (m_prp.get(i).template get<0>(j) == v_prp.template get<0>(m_opart.template get<0>(offset+j)));
1069
1070 match &= (m_prp.get(i).template get<1>(j)[0] == v_prp.template get<1>(m_opart.template get<0>(offset+j))[0]);
1071 match &= (m_prp.get(i).template get<1>(j)[1] == v_prp.template get<1>(m_opart.template get<0>(offset+j))[1]);
1072
1073 match &= (m_prp.get(i).template get<2>(j)[0][0] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[0][0]);
1074 match &= (m_prp.get(i).template get<2>(j)[0][1] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[0][1]);
1075 match &= (m_prp.get(i).template get<2>(j)[0][2] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[0][2]);
1076 match &= (m_prp.get(i).template get<2>(j)[1][0] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[1][0]);
1077 match &= (m_prp.get(i).template get<2>(j)[1][1] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[1][1]);
1078 match &= (m_prp.get(i).template get<2>(j)[1][2] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[1][2]);
1079 match &= (m_prp.get(i).template get<2>(j)[2][0] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[2][0]);
1080 match &= (m_prp.get(i).template get<2>(j)[2][1] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[2][1]);
1081 match &= (m_prp.get(i).template get<2>(j)[2][2] == v_prp.template get<2>(m_opart.template get<0>(offset+j))[2][2]);
1082 }
1083
1084 BOOST_REQUIRE_EQUAL(match,true);
1085
1086 offset += m_pos.get(i).size();
1087 }
1088}
1089
1090template<unsigned int prp>
1091void vector_dist_remove_marked_type()
1092{
1093 auto & v_cl = create_vcluster();
1094
1095 if (v_cl.size() > 16)
1096 {return;}
1097
1098 Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
1099
1100 // set the ghost based on the radius cut off (make just a little bit smaller than the spacing)
1101 Ghost<3,float> g(0.1);
1102
1103 // Boundary conditions
1104 size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
1105
1106 vector_dist_gpu<3,float,aggregate<float,float,int,int>> vd(50000*v_cl.size(),domain,bc,g);
1107
1108 // Fill the position
1109
1110 auto it = vd.getDomainIterator();
1111
1112 while(it.isNext())
1113 {
1114 auto p = it.get();
1115
1116 vd.getPos(p)[0] = (float)rand() / (float)RAND_MAX;
1117 vd.getPos(p)[1] = (float)rand() / (float)RAND_MAX;
1118 vd.getPos(p)[2] = (float)rand() / (float)RAND_MAX;
1119
1120 ++it;
1121 }
1122
1123 vd.map();
1124 vd.template ghost_get<>();
1125
1126 it = vd.getDomainIterator();
1127
1128 float fc = 1.0;
1129 float dc = 1.0;
1130 int ic = 1;
1131 int sc = 1;
1132
1133 while(it.isNext())
1134 {
1135 auto p = it.get();
1136
1137 vd.template getProp<0>(p) = fc;
1138 vd.template getProp<1>(p) = dc;
1139 vd.template getProp<2>(p) = ic;
1140 vd.template getProp<3>(p) = sc;
1141
1142 vd.template getProp<prp>(p) = (ic % 3 == 0);
1143
1144 fc += 1.0;
1145 dc += 1.0;
1146 ic += 1;
1147 sc += 1;
1148
1149 ++it;
1150 }
1151
1152 size_t sz = vd.size_local() - vd.size_local()/3;
1153
1154 vd.template hostToDeviceProp<0,1,2,3>();
1155
1156 remove_marked<prp>(vd);
1157
1158 BOOST_REQUIRE_EQUAL(vd.size_local(),sz);
1159
1160 vd.template deviceToHostProp<0,1,2,3>();
1161
1162 auto it2 = vd.getDomainIterator();
1163
1164 // There should not be number divisible by 3
1165
1166 bool test = true;
1167
1168 while(it2.isNext())
1169 {
1170 auto p = it2.get();
1171
1172 if (prp != 0)
1173 {test &= ((int)vd.template getProp<0>(p) % 3 != 0);}
1174
1175 if (prp != 1)
1176 {test &= ((int)vd.template getProp<1>(p) % 3 != 0);}
1177
1178 if (prp != 2)
1179 {test &= ((int)vd.template getProp<2>(p) % 3 != 0);}
1180
1181 if (prp != 3)
1182 {test &= ((int)vd.template getProp<3>(p) % 3 != 0);}
1183
1184 if (test == false)
1185 {
1186 if (prp != 0)
1187 {std::cout << (int)vd.template getProp<0>(p) << std::endl;}
1188
1189 if (prp != 1)
1190 {std::cout << (int)vd.template getProp<1>(p) << std::endl;}
1191
1192 if (prp != 2)
1193 {std::cout << (int)vd.template getProp<2>(p) << std::endl;}
1194
1195 if (prp != 3)
1196 {std::cout << (int)vd.template getProp<3>(p) << std::endl;}
1197
1198 break;
1199 }
1200
1201 ++it2;
1202 }
1203
1204 BOOST_REQUIRE_EQUAL(test,true);
1205
1206
1207 // We test where we do not remove anything
1208
1209 size_t size_old = vd.size_local();
1210
1211 // Because remove_marked is destructive we have to reset the property
1212 vd.getPropVector().template fill<prp>(0);
1213
1214 remove_marked<prp>(vd);
1215
1216 BOOST_REQUIRE_EQUAL(vd.size_local(),size_old);
1217
1218 // Now we try to remove all
1219 vd.getPropVector().template fill<prp>(1);
1220
1221 remove_marked<prp>(vd);
1222
1223 BOOST_REQUIRE_EQUAL(vd.size_local(),0);
1224}
1225
1226BOOST_AUTO_TEST_CASE(vector_dist_remove_marked)
1227{
1228 vector_dist_remove_marked_type<0>();
1229 vector_dist_remove_marked_type<1>();
1230 vector_dist_remove_marked_type<2>();
1231 vector_dist_remove_marked_type<3>();
1232}
1233
1234
1235BOOST_AUTO_TEST_CASE( vector_dist_particle_NN_MP_iteration_gpu )
1236{
1237 typedef aggregate<size_t,size_t,size_t> part_prop;
1238
1239 Vcluster<> & v_cl = create_vcluster();
1240
1241 if (v_cl.getProcessingUnits() > 24)
1242 {return;}
1243
1244 float L = 1000.0;
1245
1246 // set the seed
1247 // create the random generator engine
1248 std::default_random_engine eg;
1249 eg.seed(v_cl.rank()*4533);
1250 std::uniform_real_distribution<float> ud(-L,L);
1251
1252 long int k = 4096 * v_cl.getProcessingUnits();
1253
1254 long int big_step = k / 4;
1255 big_step = (big_step == 0)?1:big_step;
1256
1257 BOOST_TEST_CHECKPOINT( "Testing 3D periodic vector symmetric cell-list k=" << k );
1258
1259 Box<3,float> box({-L,-L,-L},{L,L,L});
1260
1261 // Boundary conditions
1262 size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
1263
1264 float r_cut = 100.0;
1265
1266 // ghost
1267 Ghost<3,float> ghost(r_cut);
1268
1269 // Distributed vector
1270 vector_dist_gpu<3,float,part_prop> vd(k,box,bc,ghost,BIND_DEC_TO_GHOST);
1271
1272/* size_t start = vd.init_size_accum(k);
1273
1274 auto it = vd.getIterator();
1275
1276 while (it.isNext())
1277 {
1278 auto key = it.get();
1279
1280 vd.getPosWrite(key)[0] = ud(eg);
1281 vd.getPosWrite(key)[1] = ud(eg);
1282 vd.getPosWrite(key)[2] = ud(eg);
1283
1284 // Fill some properties randomly
1285
1286 vd.template getPropWrite<0>(key) = 0;
1287 vd.template getPropWrite<1>(key) = 0;
1288 vd.template getPropWrite<2>(key) = key.getKey() + start;
1289
1290 ++it;
1291 }
1292
1293 vd.map();
1294
1295 // sync the ghost
1296 vd.template ghost_get<0,2>();
1297
1298 auto NN = vd.getCellList(r_cut);
1299 auto p_it = vd.getDomainIterator();
1300
1301 while (p_it.isNext())
1302 {
1303 auto p = p_it.get();
1304
1305 Point<3,float> xp = vd.getPosRead(p);
1306
1307 auto Np = NN.getNNIterator(NN.getCell(xp));
1308
1309 while (Np.isNext())
1310 {
1311 auto q = Np.get();
1312
1313 if (p.getKey() == q)
1314 {
1315 ++Np;
1316 continue;
1317 }
1318
1319 // repulsive
1320
1321 Point<3,float> xq = vd.getPosRead(q);
1322 Point<3,float> f = (xp - xq);
1323
1324 float distance = f.norm();
1325
1326 // Particle should be inside 2 * r_cut range
1327
1328 if (distance < r_cut )
1329 {
1330 vd.template getPropWrite<0>(p)++;
1331 }
1332
1333 ++Np;
1334 }
1335
1336 ++p_it;
1337 }
1338
1339 // We now divide the particles on 4 phases
1340
1341 openfpm::vector<vector_dist_gpu<3,float,part_prop>> phases;
1342 phases.add( vector_dist_gpu<3,float,part_prop>(vd.getDecomposition(),0));
1343 phases.add( vector_dist_gpu<3,float,part_prop>(phases.get(0).getDecomposition(),0));
1344 phases.add( vector_dist_gpu<3,float,part_prop>(phases.get(0).getDecomposition(),0));
1345 phases.add( vector_dist_gpu<3,float,part_prop>(phases.get(0).getDecomposition(),0));
1346
1347 auto it2 = vd.getDomainIterator();
1348
1349 while (it2.isNext())
1350 {
1351 auto p = it2.get();
1352
1353 if (p.getKey() % 4 == 0)
1354 {
1355 phases.get(0).add();
1356 phases.get(0).getLastPos()[0] = vd.getPos(p)[0];
1357 phases.get(0).getLastPos()[1] = vd.getPos(p)[1];
1358 phases.get(0).getLastPos()[2] = vd.getPos(p)[2];
1359
1360 phases.get(0).getLastProp<1>() = 0;
1361
1362 phases.get(0).template getLastProp<2>() = vd.template getProp<2>(p);
1363 }
1364 else if (p.getKey() % 4 == 1)
1365 {
1366 phases.get(1).add();
1367 phases.get(1).getLastPos()[0] = vd.getPos(p)[0];
1368 phases.get(1).getLastPos()[1] = vd.getPos(p)[1];
1369 phases.get(1).getLastPos()[2] = vd.getPos(p)[2];
1370
1371 phases.get(1).getLastProp<1>() = 0;
1372
1373 phases.get(1).template getLastProp<2>() = vd.template getProp<2>(p);
1374 }
1375 else if (p.getKey() % 4 == 2)
1376 {
1377 phases.get(2).add();
1378 phases.get(2).getLastPos()[0] = vd.getPos(p)[0];
1379 phases.get(2).getLastPos()[1] = vd.getPos(p)[1];
1380 phases.get(2).getLastPos()[2] = vd.getPos(p)[2];
1381
1382 phases.get(2).getLastProp<1>() = 0;
1383
1384 phases.get(2).template getLastProp<2>() = vd.template getProp<2>(p);
1385 }
1386 else
1387 {
1388 phases.get(3).add();
1389 phases.get(3).getLastPos()[0] = vd.getPos(p)[0];
1390 phases.get(3).getLastPos()[1] = vd.getPos(p)[1];
1391 phases.get(3).getLastPos()[2] = vd.getPos(p)[2];
1392
1393 phases.get(3).getLastProp<1>() = 0;
1394
1395 phases.get(3).template getLastProp<2>() = vd.template getProp<2>(p);
1396 }
1397
1398 ++it2;
1399 }
1400
1401 // now we synchronize the ghosts
1402
1403 for (size_t i = 0 ; i < phases.size() ; i++)
1404 {
1405 phases.get(i).template ghost_get<0,1,2>();
1406 }
1407
1408 typedef decltype(phases.get(0).getCellListSym(r_cut)) cell_list_type;
1409
1410 openfpm::vector<cell_list_type> NN_ptr;
1411
1412 for (size_t i = 0 ; i < phases.size() ; i++)
1413 {
1414 NN_ptr.add(phases.get(i).getCellListSym(r_cut));
1415 }
1416
1417 // We now interact all the phases
1418
1419 for (size_t i = 0 ; i < phases.size() ; i++)
1420 {
1421 for (size_t j = 0 ; j < phases.size() ; j++)
1422 {
1423 auto p_it2 = phases.get(i).getDomainIterator();
1424
1425 while (p_it2.isNext())
1426 {
1427 auto p = p_it2.get();
1428
1429 Point<3,float> xp = phases.get(i).getPosRead(p);
1430
1431 auto Np = NN_ptr.get(j).getNNIteratorSymMP<NO_CHECK>(NN_ptr.get(j).getCell(xp),p.getKey(),phases.get(i).getPosVector(),phases.get(j).getPosVector());
1432
1433 while (Np.isNext())
1434 {
1435 auto q = Np.get();
1436
1437 if (p.getKey() == q && i == j)
1438 {
1439 ++Np;
1440 continue;
1441 }
1442
1443 // repulsive
1444
1445 Point<3,float> xq = phases.get(j).getPosRead(q);
1446 Point<3,float> f = (xp - xq);
1447
1448 float distance = f.norm();
1449
1450 // Particle should be inside r_cut range
1451
1452 if (distance < r_cut )
1453 {
1454 phases.get(i).template getPropWrite<1>(p)++;
1455 phases.get(j).template getPropWrite<1>(q)++;
1456 }
1457
1458 ++Np;
1459 }
1460
1461 ++p_it2;
1462 }
1463 }
1464 }
1465
1466 for (size_t i = 0 ; i < phases.size() ; i++)
1467 {
1468 phases.get(i).template ghost_put<add_,1>();
1469 }
1470
1471 auto p_it3 = vd.getDomainIterator();
1472
1473 bool ret = true;
1474 while (p_it3.isNext())
1475 {
1476 auto p = p_it3.get();
1477
1478 int ph;
1479
1480 if (p.getKey() % 4 == 0)
1481 {ph = 0;}
1482 else if (p.getKey() % 4 == 1)
1483 {ph = 1;}
1484 else if (p.getKey() % 4 == 2)
1485 {ph = 2;}
1486 else
1487 {ph = 3;}
1488
1489 size_t pah = p.getKey()/4;
1490 ret &= phases.get(ph).template getPropRead<1>(pah) == vd.template getPropRead<0>(p);
1491
1492 if (ret == false)
1493 {
1494 std::cout << "Error on particle: " << vd.template getPropRead<2>(p) << " " << v_cl.rank() << std::endl;
1495
1496 std::cout << "phase " << ph << " particle " << pah << " " << phases.get(ph).template getPropRead<1>(pah) << " A " << vd.template getPropRead<0>(p) << std::endl;
1497
1498 break;
1499 }
1500
1501 ++p_it3;
1502 }
1503
1504 BOOST_REQUIRE_EQUAL(ret,true);*/
1505}
1506
1507BOOST_AUTO_TEST_SUITE_END()
1508
This class represent an N-dimensional box.
Definition Box.hpp:61
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
Definition Box.hpp:556
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Definition Box.hpp:567
This class decompose a space into sub-sub-domains and distribute them across processors.
virtual void * getDevicePointer()
get a readable pointer with the data
virtual void deviceToHost()
Move memory from device to host.
virtual void fill(unsigned char c)
fill the buffer with a byte
virtual void * getPointer()
get a readable pointer with the data
virtual bool allocate(size_t sz)
allocate memory
Definition CudaMemory.cu:38
This class implement the point shape in an N-dimensional space.
Definition Point.hpp:28
This class represent an N-dimensional box.
Definition SpaceBox.hpp:27
size_t rank()
Get the process unit id.
size_t getProcessingUnits()
Get the total number of processors.
Implementation of VCluster class.
Definition VCluster.hpp:59
Grow policy define how the vector should grow every time we exceed the size.
Implementation of 1-D std::vector like structure.
size_t size()
Stub size.
Distributed vector.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Transform the boost::fusion::vector into memory specification (memory_traits)
This is a container to create a general object.