OpenFPM  5.2.0
Project that contain the implementation of distributed structures
vector_dist_cuda_func_test.cu
1 #define BOOST_TEST_DYN_LINK
2 
3 #define TEST1
4 
5 #include <boost/test/unit_test.hpp>
6 #include "VCluster/VCluster.hpp"
7 #include "Vector/map_vector.hpp"
8 #include "Vector/cuda/vector_dist_cuda_funcs.cuh"
9 #include "Vector/util/vector_dist_funcs.hpp"
10 #include "Decomposition/CartDecomposition.hpp"
11 //#include "util/cuda/scan_cuda.cuh"
12 #include "Vector/vector_dist.hpp"
13 #include "util/cuda/scan_ofp.cuh"
14 
15 #define SUB_UNIT_FACTOR 1024
16 
17 BOOST_AUTO_TEST_SUITE( vector_dist_gpu_util_func_test )
18 
19 BOOST_AUTO_TEST_CASE( vector_ghost_process_local_particles )
20 {
22 
24  vPrp.resize(10000);
25 
27  vPos.resize(10000);
28 
30 
31  for (size_t i = 0 ; i < vPrp.size() ; i++)
32  {
33  vPos.template get<0>(i)[0] = (float)rand()/(float)RAND_MAX;
34  vPos.template get<0>(i)[1] = (float)rand()/(float)RAND_MAX;
35  vPos.template get<0>(i)[2] = (float)rand()/(float)RAND_MAX;
36 
37  vPrp.template get<0>(i) = i+12345;
38 
39  vPrp.template get<1>(i)[0] = i;
40  vPrp.template get<1>(i)[1] = i+20000;
41  vPrp.template get<1>(i)[2] = i+50000;
42 
43  vPrp.template get<2>(i)[0][0] = i+60000;
44  vPrp.template get<2>(i)[0][1] = i+70000;
45  vPrp.template get<2>(i)[0][2] = i+80000;
46  vPrp.template get<2>(i)[1][0] = i+90000;
47  vPrp.template get<2>(i)[1][1] = i+100000;
48  vPrp.template get<2>(i)[1][2] = i+110000;
49  vPrp.template get<2>(i)[2][0] = i+120000;
50  vPrp.template get<2>(i)[2][1] = i+130000;
51  vPrp.template get<2>(i)[2][1] = i+140000;
52  vPrp.template get<2>(i)[2][2] = i+150000;
53  }
54 
57 
58  box_f_dev.resize(4);
59  box_f_sv.resize(4);
60 
61  box_f_dev.template get<0>(0)[0] = 0.0;
62  box_f_dev.template get<0>(0)[1] = 0.0;
63  box_f_dev.template get<0>(0)[2] = 0.0;
64  box_f_dev.template get<1>(0)[0] = 0.5;
65  box_f_dev.template get<1>(0)[1] = 1.0;
66  box_f_dev.template get<1>(0)[2] = 1.0;
67  box_f_sv.template get<0>(0) = 0;
68 
69  box_f_dev.template get<0>(1)[0] = 0.0;
70  box_f_dev.template get<0>(1)[1] = 0.0;
71  box_f_dev.template get<0>(1)[2] = 0.0;
72  box_f_dev.template get<1>(1)[0] = 0.3;
73  box_f_dev.template get<1>(1)[1] = 1.0;
74  box_f_dev.template get<1>(1)[2] = 1.0;
75  box_f_sv.template get<0>(1) = 1;
76 
77  box_f_dev.template get<0>(2)[0] = 0.0;
78  box_f_dev.template get<0>(2)[1] = 0.0;
79  box_f_dev.template get<0>(2)[2] = 0.0;
80  box_f_dev.template get<1>(2)[0] = 0.2;
81  box_f_dev.template get<1>(2)[1] = 1.0;
82  box_f_dev.template get<1>(2)[2] = 1.0;
83  box_f_sv.template get<0>(2) = 2;
84 
85  box_f_dev.template get<0>(3)[0] = 0.0;
86  box_f_dev.template get<0>(3)[1] = 0.0;
87  box_f_dev.template get<0>(3)[2] = 0.0;
88  box_f_dev.template get<1>(3)[0] = 0.1;
89  box_f_dev.template get<1>(3)[1] = 1.0;
90  box_f_dev.template get<1>(3)[2] = 1.0;
91  box_f_sv.template get<0>(3) = 3;
92 
93  // Label the internal (assigned) particles
94  auto ite = vPos.getGPUIteratorTo(vPos.size());
95 
96  o_part_loc.resize(vPos.size()+1);
97  o_part_loc.template get<0>(o_part_loc.size()-1) = 0;
98  o_part_loc.template hostToDevice<0>(o_part_loc.size()-1,o_part_loc.size()-1);
99 
100  box_f_dev.hostToDevice<0,1>();
101  box_f_sv.hostToDevice<0>();
102  vPos.hostToDevice<0>();
103  vPrp.hostToDevice<0,1,2>();
104 
105  // label particle processor
106  CUDA_LAUNCH_DIM3((num_shift_ghost_each_part<3,float,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),decltype(vPos.toKernel()),decltype(o_part_loc.toKernel())>),
107  ite.wthr,ite.thr,
108  box_f_dev.toKernel(),box_f_sv.toKernel(),vPos.toKernel(),o_part_loc.toKernel(),(unsigned int)vPos.size());
109 
110  o_part_loc.deviceToHost<0>();
111 
112  bool match = true;
113 
114  for (size_t i = 0 ; i < vPos.size() ; i++)
115  {
116  if (vPos.template get<0>(i)[0] >= 0.5)
117  {match &= o_part_loc.template get<0>(i) == 0;}
118  else if (vPos.template get<0>(i)[0] >= 0.3)
119  {match &= o_part_loc.template get<0>(i) == 1;}
120  else if (vPos.template get<0>(i)[0] >= 0.2)
121  {match &= o_part_loc.template get<0>(i) == 2;}
122  else if (vPos.template get<0>(i)[0] >= 0.1)
123  {match &= o_part_loc.template get<0>(i) == 3;}
124  else
125  {match &= o_part_loc.template get<0>(i) == 4;}
126  }
127 
128  BOOST_REQUIRE_EQUAL(match,true);
129 
131  starts.resize(o_part_loc.size());
132 
133  auto & v_cl = create_vcluster();
134  openfpm::scan((unsigned int *)o_part_loc.template getDeviceBuffer<0>(), o_part_loc.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getGpuContext());
135 
136  starts.deviceToHost<0>(starts.size()-1,starts.size()-1);
137  size_t tot = starts.template get<0>(o_part_loc.size()-1);
138 
140 
141  shifts.resize(4);
142 
143  shifts.template get<0>(0)[0] = 10.0;
144  shifts.template get<0>(0)[1] = 0.0;
145  shifts.template get<0>(0)[2] = 0.0;
146 
147  shifts.template get<0>(1)[0] = 20.0;
148  shifts.template get<0>(1)[1] = 0.0;
149  shifts.template get<0>(1)[2] = 0.0;
150 
151  shifts.template get<0>(2)[0] = 30.0;
152  shifts.template get<0>(2)[1] = 0.0;
153  shifts.template get<0>(2)[2] = 0.0;
154 
155  shifts.template get<0>(3)[0] = 40.0;
156  shifts.template get<0>(3)[1] = 0.0;
157  shifts.template get<0>(3)[2] = 0.0;
158 
159  size_t old = vPos.size();
160  vPos.resize(vPos.size() + tot);
161  vPrp.resize(vPrp.size() + tot);
162 
163  shifts.template hostToDevice<0>();
165  o_part_loc2.resize(tot);
166 
167  CUDA_LAUNCH_DIM3((shift_ghost_each_part<3,float,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),
168  decltype(vPos.toKernel()),decltype(vPrp.toKernel()),
169  decltype(starts.toKernel()),decltype(shifts.toKernel()),
170  decltype(o_part_loc2.toKernel())>),
171  ite.wthr,ite.thr,
172  box_f_dev.toKernel(),box_f_sv.toKernel(),
173  vPos.toKernel(),vPrp.toKernel(),
174  starts.toKernel(),shifts.toKernel(),o_part_loc2.toKernel(),(unsigned int)old,(unsigned int)old);
175 
176  vPos.deviceToHost<0>();
177  o_part_loc2.deviceToHost<0,1>();
178  vPrp.deviceToHost<0,1,2>();
179 
180  size_t base = old;
181  size_t base_o = 0;
182  for (size_t i = 0 ; i < old ; i++)
183  {
184  if (vPos.template get<0>(i)[0] >= 0.5)
185  {}
186  else if (vPos.template get<0>(i)[0] >= 0.3)
187  {
188  for (size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
189  {
190  match &= vPos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
191  match &= vPos.template get<0>(base)[0] >= -(j+1.0)*10.0;
192 
193  match &= o_part_loc2.template get<0>(base_o) == i;
194  match &= o_part_loc2.template get<1>(base_o) == j;
195 
197 
198  match &= vPrp.template get<0>(base) == vPrp.template get<0>(i);
199 
200  match &= vPrp.template get<1>(base)[0] == vPrp.template get<1>(i)[0];
201  match &= vPrp.template get<1>(base)[1] == vPrp.template get<1>(i)[1];
202  match &= vPrp.template get<1>(base)[2] == vPrp.template get<1>(i)[2];
203 
204  match &= vPrp.template get<2>(base)[0][0] == vPrp.template get<2>(i)[0][0];
205  match &= vPrp.template get<2>(base)[0][1] == vPrp.template get<2>(i)[0][1];
206  match &= vPrp.template get<2>(base)[0][2] == vPrp.template get<2>(i)[0][2];
207  match &= vPrp.template get<2>(base)[1][0] == vPrp.template get<2>(i)[1][0];
208  match &= vPrp.template get<2>(base)[1][1] == vPrp.template get<2>(i)[1][1];
209  match &= vPrp.template get<2>(base)[1][2] == vPrp.template get<2>(i)[1][2];
210  match &= vPrp.template get<2>(base)[2][0] == vPrp.template get<2>(i)[2][0];
211  match &= vPrp.template get<2>(base)[2][1] == vPrp.template get<2>(i)[2][1];
212  match &= vPrp.template get<2>(base)[2][2] == vPrp.template get<2>(i)[2][2];
213 
214  base++;
215  base_o++;
216  }
217  }
218  else if (vPos.template get<0>(i)[0] >= 0.2)
219  {
220  for (size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
221  {
222  match &= vPos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
223  match &= vPos.template get<0>(base)[0] >= -(j+1.0)*10.0;
224 
225  match &= o_part_loc2.template get<0>(base_o) == i;
226  match &= o_part_loc2.template get<1>(base_o) == j;
227 
229 
230  match &= vPrp.template get<0>(base) == vPrp.template get<0>(i);
231 
232  match &= vPrp.template get<1>(base)[0] == vPrp.template get<1>(i)[0];
233  match &= vPrp.template get<1>(base)[1] == vPrp.template get<1>(i)[1];
234  match &= vPrp.template get<1>(base)[2] == vPrp.template get<1>(i)[2];
235 
236  match &= vPrp.template get<2>(base)[0][0] == vPrp.template get<2>(i)[0][0];
237  match &= vPrp.template get<2>(base)[0][1] == vPrp.template get<2>(i)[0][1];
238  match &= vPrp.template get<2>(base)[0][2] == vPrp.template get<2>(i)[0][2];
239  match &= vPrp.template get<2>(base)[1][0] == vPrp.template get<2>(i)[1][0];
240  match &= vPrp.template get<2>(base)[1][1] == vPrp.template get<2>(i)[1][1];
241  match &= vPrp.template get<2>(base)[1][2] == vPrp.template get<2>(i)[1][2];
242  match &= vPrp.template get<2>(base)[2][0] == vPrp.template get<2>(i)[2][0];
243  match &= vPrp.template get<2>(base)[2][1] == vPrp.template get<2>(i)[2][1];
244  match &= vPrp.template get<2>(base)[2][2] == vPrp.template get<2>(i)[2][2];
245 
246 
247  base++;
248  base_o++;
249  }
250  }
251  else if (vPos.template get<0>(i)[0] >= 0.1)
252  {
253  for (size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
254  {
255  match &= vPos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
256  match &= vPos.template get<0>(base)[0] >= -(j+1.0)*10.0;
257 
258  match &= o_part_loc2.template get<0>(base_o) == i;
259  match &= o_part_loc2.template get<1>(base_o) == j;
260 
262 
263  match &= vPrp.template get<0>(base) == vPrp.template get<0>(i);
264 
265  match &= vPrp.template get<1>(base)[0] == vPrp.template get<1>(i)[0];
266  match &= vPrp.template get<1>(base)[1] == vPrp.template get<1>(i)[1];
267  match &= vPrp.template get<1>(base)[2] == vPrp.template get<1>(i)[2];
268 
269  match &= vPrp.template get<2>(base)[0][0] == vPrp.template get<2>(i)[0][0];
270  match &= vPrp.template get<2>(base)[0][1] == vPrp.template get<2>(i)[0][1];
271  match &= vPrp.template get<2>(base)[0][2] == vPrp.template get<2>(i)[0][2];
272  match &= vPrp.template get<2>(base)[1][0] == vPrp.template get<2>(i)[1][0];
273  match &= vPrp.template get<2>(base)[1][1] == vPrp.template get<2>(i)[1][1];
274  match &= vPrp.template get<2>(base)[1][2] == vPrp.template get<2>(i)[1][2];
275  match &= vPrp.template get<2>(base)[2][0] == vPrp.template get<2>(i)[2][0];
276  match &= vPrp.template get<2>(base)[2][1] == vPrp.template get<2>(i)[2][1];
277  match &= vPrp.template get<2>(base)[2][2] == vPrp.template get<2>(i)[2][2];
278 
279  base++;
280  base_o++;
281  }
282  }
283  else
284  {
285  for (size_t j = 0 ; j < o_part_loc.template get<0>(i) ; j++)
286  {
287  match &= vPos.template get<0>(base)[0] < 1.0 - (j+1.0)*10.0;
288  match &= vPos.template get<0>(base)[0] >= -(j+1.0)*10.0;
289 
290  match &= o_part_loc2.template get<0>(base_o) == i;
291  match &= o_part_loc2.template get<1>(base_o) == j;
292 
294 
295  match &= vPrp.template get<0>(base) == vPrp.template get<0>(i);
296 
297  match &= vPrp.template get<1>(base)[0] == vPrp.template get<1>(i)[0];
298  match &= vPrp.template get<1>(base)[1] == vPrp.template get<1>(i)[1];
299  match &= vPrp.template get<1>(base)[2] == vPrp.template get<1>(i)[2];
300 
301  match &= vPrp.template get<2>(base)[0][0] == vPrp.template get<2>(i)[0][0];
302  match &= vPrp.template get<2>(base)[0][1] == vPrp.template get<2>(i)[0][1];
303  match &= vPrp.template get<2>(base)[0][2] == vPrp.template get<2>(i)[0][2];
304  match &= vPrp.template get<2>(base)[1][0] == vPrp.template get<2>(i)[1][0];
305  match &= vPrp.template get<2>(base)[1][1] == vPrp.template get<2>(i)[1][1];
306  match &= vPrp.template get<2>(base)[1][2] == vPrp.template get<2>(i)[1][2];
307  match &= vPrp.template get<2>(base)[2][0] == vPrp.template get<2>(i)[2][0];
308  match &= vPrp.template get<2>(base)[2][1] == vPrp.template get<2>(i)[2][1];
309  match &= vPrp.template get<2>(base)[2][2] == vPrp.template get<2>(i)[2][2];
310 
311  base++;
312  base_o++;
313  }
314  }
315  }
316 
317  BOOST_REQUIRE_EQUAL(match,true);
318 
320 
323 
324  vPos2.resize(old);
325  vPrp2.resize(old);
326 
327  for (size_t i = 0 ; i < old ; i++)
328  {
329  vPos2.template get<0>(i)[0] = vPos.template get<0>(i)[0];
330  vPos2.template get<0>(i)[1] = vPos.template get<0>(i)[1];
331  vPos2.template get<0>(i)[2] = vPos.template get<0>(i)[2];
332 
333  vPrp2.template get<0>(i) = vPrp.template get<0>(i);
334 
335  vPrp2.template get<1>(i)[0] = vPrp.template get<1>(i)[0];
336  vPrp2.template get<1>(i)[1] = vPrp.template get<1>(i)[1];
337  vPrp2.template get<1>(i)[2] = vPrp.template get<1>(i)[2];
338 
339  vPrp2.template get<2>(i)[0][0] = vPrp.template get<2>(i)[0][0];
340  vPrp2.template get<2>(i)[0][1] = vPrp.template get<2>(i)[0][1];
341  vPrp2.template get<2>(i)[0][2] = vPrp.template get<2>(i)[0][2];
342  vPrp2.template get<2>(i)[1][0] = vPrp.template get<2>(i)[1][0];
343  vPrp2.template get<2>(i)[1][1] = vPrp.template get<2>(i)[1][1];
344  vPrp2.template get<2>(i)[1][2] = vPrp.template get<2>(i)[1][2];
345  vPrp2.template get<2>(i)[2][0] = vPrp.template get<2>(i)[2][0];
346  vPrp2.template get<2>(i)[2][1] = vPrp.template get<2>(i)[2][1];
347  vPrp2.template get<2>(i)[2][2] = vPrp.template get<2>(i)[2][2];
348  }
349 
350  vPos2.resize(vPos.size());
351  vPrp2.resize(vPrp.size());
352 
353  vPos2.hostToDevice<0>();
354  vPrp2.hostToDevice<0,1,2>();
355 
356  ite = o_part_loc2.getGPUIterator();
357 
358  CUDA_LAUNCH_DIM3((process_ghost_particles_local<true,3,decltype(o_part_loc2.toKernel()),decltype(vPos2.toKernel()),decltype(vPrp2.toKernel()),decltype(shifts.toKernel())>),
359  ite.wthr,ite.thr,
360  o_part_loc2.toKernel(),vPos2.toKernel(),vPrp2.toKernel(),shifts.toKernel(),(unsigned int)old);
361 
362  vPos2.template deviceToHost<0>();
363  vPrp2.template deviceToHost<0,1,2>();
364 
365  for (size_t i = old ; i < vPos.size() ; i++)
366  {
367  match &= vPos.template get<0>(i)[0] == vPos2.template get<0>(i)[0];
368  match &= vPos.template get<0>(i)[1] == vPos2.template get<0>(i)[1];
369  match &= vPos.template get<0>(i)[2] == vPos2.template get<0>(i)[2];
370 
371  match &= vPrp2.template get<0>(i) == vPrp.template get<0>(i);
372 
373  match &= vPrp2.template get<1>(i)[0] == vPrp.template get<1>(i)[0];
374  match &= vPrp2.template get<1>(i)[1] == vPrp.template get<1>(i)[1];
375  match &= vPrp2.template get<1>(i)[2] == vPrp.template get<1>(i)[2];
376 
377  match &= vPrp2.template get<2>(i)[0][0] == vPrp.template get<2>(i)[0][0];
378  match &= vPrp2.template get<2>(i)[0][1] == vPrp.template get<2>(i)[0][1];
379  match &= vPrp2.template get<2>(i)[0][2] == vPrp.template get<2>(i)[0][2];
380  match &= vPrp2.template get<2>(i)[1][0] == vPrp.template get<2>(i)[1][0];
381  match &= vPrp2.template get<2>(i)[1][1] == vPrp.template get<2>(i)[1][1];
382  match &= vPrp2.template get<2>(i)[1][2] == vPrp.template get<2>(i)[1][2];
383  match &= vPrp2.template get<2>(i)[2][0] == vPrp.template get<2>(i)[2][0];
384  match &= vPrp2.template get<2>(i)[2][1] == vPrp.template get<2>(i)[2][1];
385  match &= vPrp2.template get<2>(i)[2][2] == vPrp.template get<2>(i)[2][2];
386  }
387 
388  BOOST_REQUIRE_EQUAL(match,true);
389 }
390 
391 BOOST_AUTO_TEST_CASE( vector_ghost_fill_send_buffer_test )
392 {
394 
395  // Sending property object
397 
398  // send vector for each processor
400 
401  openfpm::vector<send_vector> g_send_prp;
402 
403  auto & v_cl = create_vcluster();
404 
405  // Vcluster
406  Vcluster<> & vcl = create_vcluster();
407 
409  vPrp.resize(10000);
410 
412 
413  for (size_t i = 0 ; i < vPrp.size() ; i++)
414  {
415  vPrp.template get<0>(i) = i+12345;
416 
417  vPrp.template get<1>(i)[0] = i;
418  vPrp.template get<1>(i)[1] = i+20000;
419  vPrp.template get<1>(i)[2] = i+50000;
420 
421  vPrp.template get<2>(i)[0][0] = i+60000;
422  vPrp.template get<2>(i)[0][1] = i+70000;
423  vPrp.template get<2>(i)[0][2] = i+80000;
424  vPrp.template get<2>(i)[1][0] = i+90000;
425  vPrp.template get<2>(i)[1][1] = i+100000;
426  vPrp.template get<2>(i)[1][2] = i+110000;
427  vPrp.template get<2>(i)[2][0] = i+120000;
428  vPrp.template get<2>(i)[2][1] = i+130000;
429  vPrp.template get<2>(i)[2][2] = i+140000;
430  }
431 
432  vPrp.hostToDevice<0,1,2>();
433 
434  g_opart_device.resize(2*10000*3);
435 
436  for (size_t i = 0 ; i < 3 ; i++)
437  {
438  for (size_t j = 0 ; j < 10000 ; j++)
439  {
440  g_opart_device.template get<0>(i*2*10000 + j*2) = i;
441  g_opart_device.template get<0>(i*2*10000 + j*2+1) = i;
442 
443  g_opart_device.template get<1>(i*2*10000 + j*2) = j;
444  g_opart_device.template get<1>(i*2*10000 + j*2+1) = j;
445 
446  g_opart_device.template get<2>(i*2*10000 + j*2) = 0;
447  g_opart_device.template get<2>(i*2*10000 + j*2+1) = 0;
448  }
449  }
450 
451  g_opart_device.hostToDevice<0,1,2>();
452 
453  g_send_prp.resize(3);
454 
455  bool match = true;
456  size_t offset = 0;
457 
458  for (size_t i = 0 ; i < 3 ; i++)
459  {
460  g_send_prp.get(i).resize(2*10000);
461 
462  auto ite = g_send_prp.get(i).getGPUIterator();
463 
464  CUDA_LAUNCH_DIM3((process_ghost_particles_prp<decltype(g_opart_device.toKernel()),decltype(g_send_prp.get(i).toKernel()),decltype(vPrp.toKernel()),0,1,2>),
465  ite.wthr,ite.thr,
466  g_opart_device.toKernel(), g_send_prp.get(i).toKernel(),
467  vPrp.toKernel(),(unsigned int)offset);
468 
469  offset += g_send_prp.get(i).size();
470 
472 
473  g_send_prp.get(i).deviceToHost<0,1,2>();
474 
475  for (size_t j = 0 ; j < 10000 ; j++)
476  {
477  match &= g_send_prp.get(i).template get<0>(2*j) == j+12345;
478 
479  match &= g_send_prp.get(i).template get<1>(2*j)[0] == j;
480  match &= g_send_prp.get(i).template get<1>(2*j)[1] == j+20000;
481  match &= g_send_prp.get(i).template get<1>(2*j)[2] == j+50000;
482 
483  match &= g_send_prp.get(i).template get<2>(2*j)[0][0] == j+60000;
484  match &= g_send_prp.get(i).template get<2>(2*j)[0][1] == j+70000;
485  match &= g_send_prp.get(i).template get<2>(2*j)[0][2] == j+80000;
486  match &= g_send_prp.get(i).template get<2>(2*j)[1][0] == j+90000;
487  match &= g_send_prp.get(i).template get<2>(2*j)[1][1] == j+100000;
488  match &= g_send_prp.get(i).template get<2>(2*j)[1][2] == j+110000;
489  match &= g_send_prp.get(i).template get<2>(2*j)[2][0] == j+120000;
490  match &= g_send_prp.get(i).template get<2>(2*j)[2][1] == j+130000;
491  match &= g_send_prp.get(i).template get<2>(2*j)[2][2] == j+140000;
492 
493 
494  match = g_send_prp.get(i).template get<0>(2*j+1) == j+12345;
495 
496  match = g_send_prp.get(i).template get<1>(2*j+1)[0] == j;
497  match = g_send_prp.get(i).template get<1>(2*j+1)[1] == j+20000;
498  match = g_send_prp.get(i).template get<1>(2*j+1)[2] == j+50000;
499 
500  match = g_send_prp.get(i).template get<2>(2*j+1)[0][0] == j+60000;
501  match = g_send_prp.get(i).template get<2>(2*j+1)[0][1] == j+70000;
502  match = g_send_prp.get(i).template get<2>(2*j+1)[0][2] == j+80000;
503  match = g_send_prp.get(i).template get<2>(2*j+1)[1][0] == j+90000;
504  match = g_send_prp.get(i).template get<2>(2*j+1)[1][1] == j+100000;
505  match = g_send_prp.get(i).template get<2>(2*j+1)[1][2] == j+110000;
506  match = g_send_prp.get(i).template get<2>(2*j+1)[2][0] == j+120000;
507  match = g_send_prp.get(i).template get<2>(2*j+1)[2][1] == j+130000;
508  match = g_send_prp.get(i).template get<2>(2*j+1)[2][2] == j+140000;
509  }
510  }
511 
512  BOOST_REQUIRE_EQUAL(match,true);
513 }
514 
515 BOOST_AUTO_TEST_CASE( decomposition_ie_ghost_gpu_test_use )
516 {
517  auto & v_cl = create_vcluster();
518 
519  // Vcluster
520  Vcluster<> & vcl = create_vcluster();
521 
523 
524  dec_type dec(vcl);
525 
526  // Physical domain
527  Box<3, float> box( { 0.0, 0.0, 0.0 }, { 1.0, 1.0, 1.0 });
528  size_t div[3];
529 
530  // Get the number of processor and calculate the number of sub-domain
531  // for each processor (SUB_UNIT_FACTOR=64)
532  size_t n_proc = vcl.getProcessingUnits();
533  size_t n_sub = n_proc * SUB_UNIT_FACTOR;
534 
535  // Set the number of sub-domains on each dimension (in a scalable way)
536  for (int i = 0; i < 3; i++)
537  { div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));}
538 
539  // Define ghost
540  Ghost<3, float> g(0.1);
541 
542  // Boundary conditions
543  size_t bc[] = { PERIODIC, PERIODIC, PERIODIC };
544 
545  // Decompose
546  dec.setParameters(div,box,bc,g);
547  dec.decompose();
548 
549  // Get the local boxes
550 
551  int nsub = dec.getNSubDomain();
552  int n_part = 10000 / nsub;
553 
555  vg.resize(nsub*n_part);
556 
557  for (size_t k = 0 ; k < nsub ; k++)
558  {
559  Box<3,float> sp = dec.getSubDomain(k);
560 
561  for (size_t j = 0 ; j < n_part ; j++)
562  {
563  vg.template get<0>(k*n_part+j)[0] = (sp.getHigh(0) - sp.getLow(0))*((float)rand()/(float)RAND_MAX) + sp.getLow(0);
564  vg.template get<0>(k*n_part+j)[1] = (sp.getHigh(1) - sp.getLow(1))*((float)rand()/(float)RAND_MAX) + sp.getLow(1);
565  vg.template get<0>(k*n_part+j)[2] = (sp.getHigh(2) - sp.getLow(2))*((float)rand()/(float)RAND_MAX) + sp.getLow(2);
566  }
567  }
568 
569  vg.hostToDevice<0>();
570 
571  // process on GPU the processor ID for each particles
572 
573  auto ite = vg.getGPUIterator();
574 
576  proc_id_out.resize(vg.size()+1);
577  proc_id_out.template get<0>(proc_id_out.size()-1) = 0;
578  proc_id_out.template hostToDevice(proc_id_out.size()-1,proc_id_out.size()-1);
579 
580  CUDA_LAUNCH_DIM3((num_proc_ghost_each_part<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel())>),
581  ite.wthr,ite.thr,
582  dec.toKernel(),vg.toKernel(),proc_id_out.toKernel());
583 
584 /* proc_id_out.deviceToHost<0>();
585 
586  bool match = true;
587  for (size_t i = 0 ; i < vg.size() ; i++)
588  {
589  Point<3,float> xp = vg.template get<0>(i);
590 
591  match &= proc_id_out.template get<0>(i) == dec.ghost_processorID_N(xp);
592 
593  const openfpm::vector<std::pair<size_t, size_t>> & vp_id = dec.template ghost_processorID_pair<typename dec_type::lc_processor_id, typename dec_type::shift_id>(xp, UNIQUE);
594 
595  match &= proc_id_out.template get<0>(i) == vp_id.size();
596  }
597 
598 
599  BOOST_REQUIRE_EQUAL(match,true);
600 
602 
603  openfpm::vector<aggregate<unsigned int>,
604  CudaMemory,
605  memory_traits_inte> starts;
606 
607  starts.resize(proc_id_out.size());
608 
609  openfpm::scan((unsigned int *)proc_id_out.template getDeviceBuffer<0>(),proc_id_out.size(),(unsigned int *)starts.template getDeviceBuffer<0>(),v_cl.getGpuContext());
610 
611  starts.deviceToHost<0>(starts.size()-1,starts.size()-1);
612 
613  size_t sz = starts.template get<0>(starts.size()-1);
614 
616 
617  openfpm::vector<aggregate<unsigned int,long unsigned int>,
618  CudaMemory,
619  memory_traits_inte> output;
620 
621  output.resize(sz);
622 
623  ite = vg.getGPUIterator();
624 
625  // we compute processor id for each particle
626  CUDA_LAUNCH_DIM3((proc_label_id_ghost<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(starts.toKernel()),decltype(output.toKernel())>),
627  ite.wthr,ite.thr,
628  dec.toKernel(),vg.toKernel(),starts.toKernel(),output.toKernel());
629 
630  output.template deviceToHost<0,1>();
631 
633 
634  starts.deviceToHost<0>();
635 
636  match = true;
637 
638  for (size_t i = 0 ; i < starts.size() - 1 ; i++)
639  {
640  size_t base = starts.template get<0>(i);
641  size_t sz = starts.template get<0>(i+1) - base;
642 
643  if (sz != 0)
644  {
645  size_t pid = output.template get<1>(base) & 0xFFFFFFFF;
646  Point<3,float> xp = vg.template get<0>(pid);
647 
648  openfpm::vector<proc_box_id> tmp_sort1;
649  openfpm::vector<proc_box_id> tmp_sort2;
650 
651  const openfpm::vector<std::pair<size_t, size_t>> & vp_id = dec.template ghost_processorID_pair<typename dec_type::lc_processor_id, typename dec_type::shift_id>(xp, UNIQUE);
652 
653  tmp_sort1.resize(vp_id.size());
654 
655  for (size_t j = 0 ; j < vp_id.size() ; j++)
656  {
657  tmp_sort1.get(j).proc_id = dec.IDtoProc(vp_id.get(j).first);
658  tmp_sort1.get(j).box_id = 0;
659  tmp_sort1.get(j).shift_id = vp_id.get(j).second;
660  }
661 
662  tmp_sort1.sort();
663 
664  tmp_sort2.resize(sz);
665  for (size_t j = 0 ; j < sz ; j++)
666  {
667  tmp_sort2.get(j).proc_id = output.template get<0>(base+j);
668  tmp_sort2.get(j).box_id = 0;
669  tmp_sort2.get(j).shift_id = output.template get<1>(base+j) >> 32;
670  }
671 
672  tmp_sort2.sort();
673 
674  match &= tmp_sort1.size() == tmp_sort2.size();
675 
676  for (size_t j = 0 ; j < tmp_sort1.size() ; j++)
677  {
678  match &= tmp_sort1.get(j).proc_id == tmp_sort2.get(j).proc_id;
679  match &= tmp_sort1.get(j).shift_id == tmp_sort2.get(j).shift_id;
680  }
681  }
682  }
683 
684  BOOST_REQUIRE_EQUAL(match,true);*/
685 }
686 
687 BOOST_AUTO_TEST_CASE( decomposition_to_gpu_test_use )
688 {
689  auto & v_cl = create_vcluster();
690 
691  // Vcluster
692  Vcluster<> & vcl = create_vcluster();
693 
695 
696  // Physical domain
697  Box<3, float> box( { 0.0, 0.0, 0.0 }, { 1.0, 1.0, 1.0 });
698  size_t div[3];
699 
700  // Get the number of processor and calculate the number of sub-domain
701  // for each processor (SUB_UNIT_FACTOR=64)
702  size_t n_proc = vcl.getProcessingUnits();
703  size_t n_sub = n_proc * SUB_UNIT_FACTOR;
704 
705  // Set the number of sub-domains on each dimension (in a scalable way)
706  for (int i = 0; i < 3; i++)
707  { div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));}
708 
709  // Define ghost
710  Ghost<3, float> g(0.01);
711 
712  // Boundary conditions
713  size_t bc[] = { PERIODIC, PERIODIC, PERIODIC };
714 
715  // Decompose
716  dec.setParameters(div,box,bc,g);
717  dec.decompose();
718 
720  vg.resize(10000);
721 
722  for (size_t i = 0 ; i < 10000 ; i++)
723  {
724  vg.template get<0>(i)[0] = (float)rand()/(float)RAND_MAX;
725  vg.template get<0>(i)[1] = (float)rand()/(float)RAND_MAX;
726  vg.template get<0>(i)[2] = (float)rand()/(float)RAND_MAX;
727  }
728 
729  vg.hostToDevice<0>();
730 
731  // process on GPU the processor ID for each particles
732 
733  auto ite = vg.getGPUIterator();
734 
736  proc_id_out.resize(vg.size());
737 
739  dev_counter.resize(v_cl.size());
740  dev_counter.fill<0>(0);
741  dev_counter.fill<1>(0);
742  dev_counter.fill<2>(0);
743 
744  CUDA_LAUNCH_DIM3((process_id_proc_each_part<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel()),decltype(dev_counter.toKernel())>),
745  ite.wthr,ite.thr,
746  dec.toKernel(),vg.toKernel(),proc_id_out.toKernel(),dev_counter.toKernel(),(int)v_cl.rank());
747 
748 
749  proc_id_out.deviceToHost<0>();
750 
751  bool match = true;
752  for (size_t i = 0 ; i < proc_id_out.size() ; i++)
753  {
754  Point<3,float> xp = vg.template get<0>(i);
755 
756  match &= proc_id_out.template get<0>(i) == dec.processorIDBC(xp);
757  }
758 }
759 
760 
761 BOOST_AUTO_TEST_CASE( vector_dist_gpu_find_buffer_offsets_test )
762 {
765 
766  vgp.resize(200000);
767 
768  for (size_t k = 0 ; k < vgp.size() ; k++)
769  {
770  vgp.template get<0>(k) = k / 1000;
771  vgp.template get<1>(k) = k / 1000;
772  }
773 
774  offs.resize(220);
775 
776  CudaMemory mem;
777  mem.allocate(sizeof(int));
778  mem.fill(0);
779 
780  auto ite = vgp.getGPUIterator();
781  vgp.hostToDevice<0,1>();
782 
783  CUDA_LAUNCH((find_buffer_offsets<1,decltype(vgp.toKernel()),decltype(offs.toKernel())>),ite,vgp.toKernel(),(int *)mem.getDevicePointer(),offs.toKernel());
784 
785  offs.template deviceToHost<0,1>();
786 
787  mem.deviceToHost();
788  int n_ele = *(int *)mem.getPointer();
789  BOOST_REQUIRE_EQUAL(n_ele,199);
790 
793 
794  for (size_t i = 0 ; i < n_ele ; i++)
795  {
796  ofv.add(offs.template get<0>(i));
797  ofv2.add(offs.template get<1>(i));
798  }
799 
800  ofv.sort();
801  ofv2.sort();
802 
803  for (size_t i = 0 ; i < ofv.size() ; i++)
804  {
805  BOOST_REQUIRE_EQUAL(ofv.get(i),(i+1)*1000);
806  BOOST_REQUIRE_EQUAL(ofv2.get(i),i);
807  }
808 }
809 
810 BOOST_AUTO_TEST_CASE(vector_dist_reorder_lbl)
811 {
814 
815  lbl_p.resize(100);
816  starts.resize(10);
817 
818  for (int i = 0 ; i < 10 ; i++) // <------ particle id
819  {
820  for (int j = 0 ; j < 10 ; j++) // <----- processor
821  {
822  lbl_p.template get<2>(i*10+j) = i;
823  lbl_p.template get<1>(i*10+j) = j;
824  }
825  starts.template get<0>(i) = (i*10);
826  }
827 
828  // move lbl and starts to gpu
829  starts.template hostToDevice<0>();
830  lbl_p.template hostToDevice<1,2>();
831 
832  auto ite = lbl_p.getGPUIterator();
833 
834  CUDA_LAUNCH_DIM3((reorder_lbl<decltype(lbl_p.toKernel()),decltype(starts.toKernel())>),ite.wthr,ite.thr,lbl_p.toKernel(),starts.toKernel());
835 
836  starts.template deviceToHost<0>();
837  lbl_p.template deviceToHost<0,1,2>();
838 
839  for (int i = 0 ; i < 10 ; i++) // <------ particle id
840  {
841  for (int j = 0 ; j < 10 ; j++) // <----- processor
842  {
843  BOOST_REQUIRE_EQUAL(lbl_p.template get<0>(j*10+i),i*10+j);
844  }
845  }
846 }
847 
848 BOOST_AUTO_TEST_CASE(vector_dist_gpu_map_fill_send_buffer_test)
849 {
851 
854 
857 
858  unsigned int offset = 0;
859 
860  vPos.resize(100000);
861  vPrp.resize(vPos.size());
862  m_opart.resize(vPos.size());
863 
864  for (size_t i = 0 ; i < vPos.size() ; i++)
865  {
866  vPos.template get<0>(i)[0] = (float)rand()/(float)RAND_MAX;
867  vPos.template get<0>(i)[1] = (float)rand()/(float)RAND_MAX;
868  vPos.template get<0>(i)[2] = (float)rand()/(float)RAND_MAX;
869 
870  vPrp.template get<0>(i) = 5.0 + (float)rand()/(float)RAND_MAX;
871  vPrp.template get<1>(i)[0] = 10.0 + (float)rand()/(float)RAND_MAX;
872  vPrp.template get<1>(i)[1] = 11.0 + (float)rand()/(float)RAND_MAX;
873  vPrp.template get<2>(i)[0][0] = 40.0 + (float)rand()/(float)RAND_MAX;
874  vPrp.template get<2>(i)[0][1] = 50.0 + (float)rand()/(float)RAND_MAX;
875  vPrp.template get<2>(i)[0][2] = 60.0 + (float)rand()/(float)RAND_MAX;
876  vPrp.template get<2>(i)[1][0] = 70.0 + (float)rand()/(float)RAND_MAX;
877  vPrp.template get<2>(i)[1][1] = 80.0 + (float)rand()/(float)RAND_MAX;
878  vPrp.template get<2>(i)[1][2] = 150.0 + (float)rand()/(float)RAND_MAX;
879  vPrp.template get<2>(i)[2][0] = 160.0 + (float)rand()/(float)RAND_MAX;
880  vPrp.template get<2>(i)[2][1] = 170.0 + (float)rand()/(float)RAND_MAX;
881  vPrp.template get<2>(i)[2][2] = 340.0 + (float)rand()/(float)RAND_MAX;
882 
883  int seg = i / 10000;
884  m_opart.template get<1>(i) = seg;
885  m_opart.template get<0>(i) = (9999 - i%10000) + seg * 10000;
886  }
887 
888  m_pos.resize(10);
889  m_prp.resize(10);
890 
891  for (size_t i = 0 ; i < m_pos.size() ; i++)
892  {
893  m_pos.get(i).resize(10000);
894  m_prp.get(i).resize(10000);
895  }
896 
897  vPos.hostToDevice<0>();
898  vPrp.hostToDevice<0,1,2>();
899 
900  m_opart.hostToDevice<0,1>();
901 
902  for (size_t i = 0 ; i < m_pos.size() ; i++)
903  {
904  auto ite = m_pos.get(i).getGPUIterator();
905 
906  CUDA_LAUNCH_DIM3((process_map_particles<decltype(m_opart.toKernel()),decltype(m_pos.get(i).toKernel()),decltype(m_prp.get(i).toKernel()),
907  decltype(vPos.toKernel()),decltype(vPrp.toKernel())>),
908  ite.wthr,ite.thr,
909  m_opart.toKernel(),m_pos.get(i).toKernel(), m_prp.get(i).toKernel(),
910  vPos.toKernel(),vPrp.toKernel(),offset);
911 
912  m_pos.get(i).deviceToHost<0>();
913  m_prp.get(i).deviceToHost<0,1,2>();
914 
915  bool match = true;
916 
917  for (size_t j = 0 ; j < m_pos.get(i).size() ; j++)
918  {
919  match &= (m_pos.get(i).template get<0>(j)[0] == vPos.template get<0>(m_opart.template get<0>(offset+j))[0]);
920  match &= (m_pos.get(i).template get<0>(j)[1] == vPos.template get<0>(m_opart.template get<0>(offset+j))[1]);
921  match &= (m_pos.get(i).template get<0>(j)[2] == vPos.template get<0>(m_opart.template get<0>(offset+j))[2]);
922 
923  match &= (m_prp.get(i).template get<0>(j) == vPrp.template get<0>(m_opart.template get<0>(offset+j)));
924 
925  match &= (m_prp.get(i).template get<1>(j)[0] == vPrp.template get<1>(m_opart.template get<0>(offset+j))[0]);
926  match &= (m_prp.get(i).template get<1>(j)[1] == vPrp.template get<1>(m_opart.template get<0>(offset+j))[1]);
927 
928  match &= (m_prp.get(i).template get<2>(j)[0][0] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[0][0]);
929  match &= (m_prp.get(i).template get<2>(j)[0][1] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[0][1]);
930  match &= (m_prp.get(i).template get<2>(j)[0][2] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[0][2]);
931  match &= (m_prp.get(i).template get<2>(j)[1][0] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[1][0]);
932  match &= (m_prp.get(i).template get<2>(j)[1][1] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[1][1]);
933  match &= (m_prp.get(i).template get<2>(j)[1][2] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[1][2]);
934  match &= (m_prp.get(i).template get<2>(j)[2][0] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[2][0]);
935  match &= (m_prp.get(i).template get<2>(j)[2][1] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[2][1]);
936  match &= (m_prp.get(i).template get<2>(j)[2][2] == vPrp.template get<2>(m_opart.template get<0>(offset+j))[2][2]);
937  }
938 
939  BOOST_REQUIRE_EQUAL(match,true);
940 
941  offset += m_pos.get(i).size();
942  }
943 }
944 
945 template<unsigned int prp>
946 void vector_dist_remove_marked_type()
947 {
948  auto & v_cl = create_vcluster();
949 
950  if (v_cl.size() > 16)
951  {return;}
952 
953  Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
954 
955  // set the ghost based on the radius cut off (make just a little bit smaller than the spacing)
956  Ghost<3,float> g(0.1);
957 
958  // Boundary conditions
959  size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
960 
961  vector_dist_gpu<3,float,aggregate<float,float,int,int>> vd(50000*v_cl.size(),domain,bc,g);
962 
963  // Fill the position
964 
965  auto it = vd.getDomainIterator();
966 
967  while(it.isNext())
968  {
969  auto p = it.get();
970 
971  vd.getPos(p)[0] = (float)rand() / (float)RAND_MAX;
972  vd.getPos(p)[1] = (float)rand() / (float)RAND_MAX;
973  vd.getPos(p)[2] = (float)rand() / (float)RAND_MAX;
974 
975  ++it;
976  }
977 
978  vd.map();
979  vd.template ghost_get<>();
980 
981  it = vd.getDomainIterator();
982 
983  float fc = 1.0;
984  float dc = 1.0;
985  int ic = 1;
986  int sc = 1;
987 
988  while(it.isNext())
989  {
990  auto p = it.get();
991 
992  vd.template getProp<0>(p) = fc;
993  vd.template getProp<1>(p) = dc;
994  vd.template getProp<2>(p) = ic;
995  vd.template getProp<3>(p) = sc;
996 
997  vd.template getProp<prp>(p) = (ic % 3 == 0);
998 
999  fc += 1.0;
1000  dc += 1.0;
1001  ic += 1;
1002  sc += 1;
1003 
1004  ++it;
1005  }
1006 
1007  size_t sz = vd.size_local() - vd.size_local()/3;
1008 
1009  vd.template hostToDeviceProp<0,1,2,3>();
1010 
1011  remove_marked<prp>(vd);
1012 
1013  BOOST_REQUIRE_EQUAL(vd.size_local(),sz);
1014 
1015  vd.template deviceToHostProp<0,1,2,3>();
1016 
1017  auto it2 = vd.getDomainIterator();
1018 
1019  // There should not be number divisible by 3
1020 
1021  bool test = true;
1022 
1023  while(it2.isNext())
1024  {
1025  auto p = it2.get();
1026 
1027  if (prp != 0)
1028  {test &= ((int)vd.template getProp<0>(p) % 3 != 0);}
1029 
1030  if (prp != 1)
1031  {test &= ((int)vd.template getProp<1>(p) % 3 != 0);}
1032 
1033  if (prp != 2)
1034  {test &= ((int)vd.template getProp<2>(p) % 3 != 0);}
1035 
1036  if (prp != 3)
1037  {test &= ((int)vd.template getProp<3>(p) % 3 != 0);}
1038 
1039  if (test == false)
1040  {
1041  if (prp != 0)
1042  {std::cout << (int)vd.template getProp<0>(p) << std::endl;}
1043 
1044  if (prp != 1)
1045  {std::cout << (int)vd.template getProp<1>(p) << std::endl;}
1046 
1047  if (prp != 2)
1048  {std::cout << (int)vd.template getProp<2>(p) << std::endl;}
1049 
1050  if (prp != 3)
1051  {std::cout << (int)vd.template getProp<3>(p) << std::endl;}
1052 
1053  break;
1054  }
1055 
1056  ++it2;
1057  }
1058 
1059  BOOST_REQUIRE_EQUAL(test,true);
1060 
1061 
1062  // We test where we do not remove anything
1063 
1064  size_t size_old = vd.size_local();
1065 
1066  // Because remove_marked is destructive we have to reset the property
1067  vd.getPropVector().template fill<prp>(0);
1068 
1069  remove_marked<prp>(vd);
1070 
1071  BOOST_REQUIRE_EQUAL(vd.size_local(),size_old);
1072 
1073  // Now we try to remove all
1074  vd.getPropVector().template fill<prp>(1);
1075 
1076  remove_marked<prp>(vd);
1077 
1078  BOOST_REQUIRE_EQUAL(vd.size_local(),0);
1079 }
1080 
1081 BOOST_AUTO_TEST_CASE(vector_dist_remove_marked)
1082 {
1083  vector_dist_remove_marked_type<0>();
1084  vector_dist_remove_marked_type<1>();
1085  vector_dist_remove_marked_type<2>();
1086  vector_dist_remove_marked_type<3>();
1087 }
1088 
1089 
1090 BOOST_AUTO_TEST_CASE( vector_dist_particle_NN_MP_iteration_gpu )
1091 {
1092  typedef aggregate<size_t,size_t,size_t> part_prop;
1093 
1094  Vcluster<> & v_cl = create_vcluster();
1095 
1096  if (v_cl.getProcessingUnits() > 24)
1097  {return;}
1098 
1099  float L = 1000.0;
1100 
1101  // set the seed
1102  // create the random generator engine
1103  std::default_random_engine eg;
1104  eg.seed(v_cl.rank()*4533);
1105  std::uniform_real_distribution<float> ud(-L,L);
1106 
1107  long int k = 4096 * v_cl.getProcessingUnits();
1108 
1109  long int big_step = k / 4;
1110  big_step = (big_step == 0)?1:big_step;
1111 
1112  BOOST_TEST_CHECKPOINT( "Testing 3D periodic vector symmetric cell-list k=" << k );
1113 
1114  Box<3,float> box({-L,-L,-L},{L,L,L});
1115 
1116  // Boundary conditions
1117  size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
1118 
1119  float r_cut = 100.0;
1120 
1121  // ghost
1122  Ghost<3,float> ghost(r_cut);
1123 
1124  // Distributed vector
1125  vector_dist_gpu<3,float,part_prop> vd(k,box,bc,ghost,BIND_DEC_TO_GHOST);
1126 
1127 /* size_t start = vd.init_size_accum(k);
1128 
1129  auto it = vd.getIterator();
1130 
1131  while (it.isNext())
1132  {
1133  auto key = it.get();
1134 
1135  vd.getPosWrite(key)[0] = ud(eg);
1136  vd.getPosWrite(key)[1] = ud(eg);
1137  vd.getPosWrite(key)[2] = ud(eg);
1138 
1139  // Fill some properties randomly
1140 
1141  vd.template getPropWrite<0>(key) = 0;
1142  vd.template getPropWrite<1>(key) = 0;
1143  vd.template getPropWrite<2>(key) = key.getKey() + start;
1144 
1145  ++it;
1146  }
1147 
1148  vd.map();
1149 
1150  // sync the ghost
1151  vd.template ghost_get<0,2>();
1152 
1153  auto NN = vd.getCellList(r_cut);
1154  auto p_it = vd.getDomainIterator();
1155 
1156  while (p_it.isNext())
1157  {
1158  auto p = p_it.get();
1159 
1160  Point<3,float> xp = vd.getPosRead(p);
1161 
1162  auto Np = NN.getNNIteratorBox(NN.getCell(xp));
1163 
1164  while (Np.isNext())
1165  {
1166  auto q = Np.get();
1167 
1168  if (p.getKey() == q)
1169  {
1170  ++Np;
1171  continue;
1172  }
1173 
1174  // repulsive
1175 
1176  Point<3,float> xq = vd.getPosRead(q);
1177  Point<3,float> f = (xp - xq);
1178 
1179  float distance = f.norm();
1180 
1181  // Particle should be inside 2 * r_cut range
1182 
1183  if (distance < r_cut )
1184  {
1185  vd.template getPropWrite<0>(p)++;
1186  }
1187 
1188  ++Np;
1189  }
1190 
1191  ++p_it;
1192  }
1193 
1194  // We now divide the particles on 4 phases
1195 
1196  openfpm::vector<vector_dist_gpu<3,float,part_prop>> phases;
1197  phases.add( vector_dist_gpu<3,float,part_prop>(vd.getDecomposition(),0));
1198  phases.add( vector_dist_gpu<3,float,part_prop>(phases.get(0).getDecomposition(),0));
1199  phases.add( vector_dist_gpu<3,float,part_prop>(phases.get(0).getDecomposition(),0));
1200  phases.add( vector_dist_gpu<3,float,part_prop>(phases.get(0).getDecomposition(),0));
1201 
1202  auto it2 = vd.getDomainIterator();
1203 
1204  while (it2.isNext())
1205  {
1206  auto p = it2.get();
1207 
1208  if (p.getKey() % 4 == 0)
1209  {
1210  phases.get(0).add();
1211  phases.get(0).getLastPos()[0] = vd.getPos(p)[0];
1212  phases.get(0).getLastPos()[1] = vd.getPos(p)[1];
1213  phases.get(0).getLastPos()[2] = vd.getPos(p)[2];
1214 
1215  phases.get(0).getLastProp<1>() = 0;
1216 
1217  phases.get(0).template getLastProp<2>() = vd.template getProp<2>(p);
1218  }
1219  else if (p.getKey() % 4 == 1)
1220  {
1221  phases.get(1).add();
1222  phases.get(1).getLastPos()[0] = vd.getPos(p)[0];
1223  phases.get(1).getLastPos()[1] = vd.getPos(p)[1];
1224  phases.get(1).getLastPos()[2] = vd.getPos(p)[2];
1225 
1226  phases.get(1).getLastProp<1>() = 0;
1227 
1228  phases.get(1).template getLastProp<2>() = vd.template getProp<2>(p);
1229  }
1230  else if (p.getKey() % 4 == 2)
1231  {
1232  phases.get(2).add();
1233  phases.get(2).getLastPos()[0] = vd.getPos(p)[0];
1234  phases.get(2).getLastPos()[1] = vd.getPos(p)[1];
1235  phases.get(2).getLastPos()[2] = vd.getPos(p)[2];
1236 
1237  phases.get(2).getLastProp<1>() = 0;
1238 
1239  phases.get(2).template getLastProp<2>() = vd.template getProp<2>(p);
1240  }
1241  else
1242  {
1243  phases.get(3).add();
1244  phases.get(3).getLastPos()[0] = vd.getPos(p)[0];
1245  phases.get(3).getLastPos()[1] = vd.getPos(p)[1];
1246  phases.get(3).getLastPos()[2] = vd.getPos(p)[2];
1247 
1248  phases.get(3).getLastProp<1>() = 0;
1249 
1250  phases.get(3).template getLastProp<2>() = vd.template getProp<2>(p);
1251  }
1252 
1253  ++it2;
1254  }
1255 
1256  // now we synchronize the ghosts
1257 
1258  for (size_t i = 0 ; i < phases.size() ; i++)
1259  {
1260  phases.get(i).template ghost_get<0,1,2>();
1261  }
1262 
1263  typedef decltype(phases.get(0).getCellListSym(r_cut)) cell_list_type;
1264 
1265  openfpm::vector<cell_list_type> NN_ptr;
1266 
1267  for (size_t i = 0 ; i < phases.size() ; i++)
1268  {
1269  NN_ptr.add(phases.get(i).getCellListSym(r_cut));
1270  }
1271 
1272  // We now interact all the phases
1273 
1274  for (size_t i = 0 ; i < phases.size() ; i++)
1275  {
1276  for (size_t j = 0 ; j < phases.size() ; j++)
1277  {
1278  auto p_it2 = phases.get(i).getDomainIterator();
1279 
1280  while (p_it2.isNext())
1281  {
1282  auto p = p_it2.get();
1283 
1284  Point<3,float> xp = phases.get(i).getPosRead(p);
1285 
1286  auto Np = NN_ptr.get(j).getNNIteratorBoxSymMP(NN_ptr.get(j).getCell(xp),p.getKey(),phases.get(i).getPosVector(),phases.get(j).getPosVector());
1287 
1288  while (Np.isNext())
1289  {
1290  auto q = Np.get();
1291 
1292  if (p.getKey() == q && i == j)
1293  {
1294  ++Np;
1295  continue;
1296  }
1297 
1298  // repulsive
1299 
1300  Point<3,float> xq = phases.get(j).getPosRead(q);
1301  Point<3,float> f = (xp - xq);
1302 
1303  float distance = f.norm();
1304 
1305  // Particle should be inside r_cut range
1306 
1307  if (distance < r_cut )
1308  {
1309  phases.get(i).template getPropWrite<1>(p)++;
1310  phases.get(j).template getPropWrite<1>(q)++;
1311  }
1312 
1313  ++Np;
1314  }
1315 
1316  ++p_it2;
1317  }
1318  }
1319  }
1320 
1321  for (size_t i = 0 ; i < phases.size() ; i++)
1322  {
1323  phases.get(i).template ghost_put<add_,1>();
1324  }
1325 
1326  auto p_it3 = vd.getDomainIterator();
1327 
1328  bool ret = true;
1329  while (p_it3.isNext())
1330  {
1331  auto p = p_it3.get();
1332 
1333  int ph;
1334 
1335  if (p.getKey() % 4 == 0)
1336  {ph = 0;}
1337  else if (p.getKey() % 4 == 1)
1338  {ph = 1;}
1339  else if (p.getKey() % 4 == 2)
1340  {ph = 2;}
1341  else
1342  {ph = 3;}
1343 
1344  size_t pah = p.getKey()/4;
1345  ret &= phases.get(ph).template getPropRead<1>(pah) == vd.template getPropRead<0>(p);
1346 
1347  if (ret == false)
1348  {
1349  std::cout << "Error on particle: " << vd.template getPropRead<2>(p) << " " << v_cl.rank() << std::endl;
1350 
1351  std::cout << "phase " << ph << " particle " << pah << " " << phases.get(ph).template getPropRead<1>(pah) << " A " << vd.template getPropRead<0>(p) << std::endl;
1352 
1353  break;
1354  }
1355 
1356  ++p_it3;
1357  }
1358 
1359  BOOST_REQUIRE_EQUAL(ret,true);*/
1360 }
1361 
1362 BOOST_AUTO_TEST_SUITE_END()
1363 
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
Definition: Box.hpp:555
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Definition: Box.hpp:566
This class decompose a space into sub-sub-domains and distribute them across processors.
virtual void * getDevicePointer()
get a readable pointer with the data
Definition: CudaMemory.cu:503
virtual void deviceToHost()
Move memory from device to host.
Definition: CudaMemory.cu:369
virtual void fill(unsigned char c)
fill the buffer with a byte
Definition: CudaMemory.cu:485
virtual void * getPointer()
get a readable pointer with the data
Definition: CudaMemory.cu:354
virtual bool allocate(size_t sz)
allocate memory
Definition: CudaMemory.cu:38
Definition: Ghost.hpp:40
size_t rank()
Get the process unit id.
size_t getProcessingUnits()
Get the total number of processors.
Implementation of VCluster class.
Definition: VCluster.hpp:59
Grow policy define how the vector should grow every time we exceed the size.
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:204
size_t size()
Stub size.
Definition: map_vector.hpp:212
Distributed vector.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:221
Transform the boost::fusion::vector into memory specification (memory_traits)
Definition: memory_conf.hpp:84
This is a container to create a general object.