1#define BOOST_TEST_DYN_LINK
3#include <boost/test/unit_test.hpp>
4#include "VCluster/VCluster.hpp"
5#include <Vector/vector_dist.hpp>
6#include "Vector/tests/vector_dist_util_unit_tests.hpp"
8#define SUB_UNIT_FACTOR 1024
10template<
unsigned int dim ,
typename vector_dist_type>
11__global__
void move_parts_gpu_test(vector_dist_type vd)
13 auto p = GET_PARTICLE(vd);
16 for (
int i = 0 ; i < dim ; i++)
18 vd.getPos(p)[i] += 0.05;
22BOOST_AUTO_TEST_SUITE( vector_dist_gpu_test )
24void print_test(std::string test,
size_t sz)
26 if (create_vcluster().getProcessUnitID() == 0)
27 std::cout << test <<
" " << sz <<
"\n";
31__global__
void initialize_props(vector_dist_ker<3,
float,
aggregate<
float,
float [3],
float[3]>> vd)
33 auto p = GET_PARTICLE(vd);
35 vd.template getProp<0>(p) = vd.getPos(p)[0] + vd.getPos(p)[1] + vd.getPos(p)[2];
37 vd.template getProp<1>(p)[0] = vd.getPos(p)[0] + vd.getPos(p)[1];
38 vd.template getProp<1>(p)[1] = vd.getPos(p)[0] + vd.getPos(p)[2];
39 vd.template getProp<1>(p)[2] = vd.getPos(p)[1] + vd.getPos(p)[2];
42template<
typename T,
typename CellList_type>
43__global__
void calculate_force(vector_dist_ker<3, T,
aggregate<T, T[3], T [3]>> vd,
44 vector_dist_ker<3, T,
aggregate<T, T[3], T [3]>> vd_sort,
48 auto p = GET_PARTICLE(vd);
52 auto it = cl.getNNIterator(cl.getCell(xp));
59 auto q1 = it.get_sort();
62 if (q2 == p) {++it;
continue;}
75 force1 += vd_sort.template getProp<0>(q1)*r1;
80 force2 += vd.template getProp<0>(q2)*r2;
86 vd.template getProp<1>(p)[0] = force1.
get(0);
87 vd.template getProp<1>(p)[1] = force1.get(1);
88 vd.template getProp<1>(p)[2] = force1.get(2);
90 vd.template getProp<2>(p)[0] = force2.get(0);
91 vd.template getProp<2>(p)[1] = force2.get(1);
92 vd.template getProp<2>(p)[2] = force2.get(2);
95template<
typename T,
typename CellList_type>
96__global__
void calculate_force_full_sort(vector_dist_ker<3, T,
aggregate<T, T[3], T [3]>> vd,
97 CellList_type cl,
int rank)
100 GET_PARTICLE_SORT(p,cl);
104 auto it = cl.getNNIterator(cl.getCell(xp));
110 auto q1 = it.get_sort();
112 if (q1 == p) {++it;
continue;}
120 if (r1.
norm() > 1e-6)
124 force1 += vd.template getProp<0>(q1)*r1;
130 vd.template getProp<1>(p)[0] = force1.
get(0);
131 vd.template getProp<1>(p)[1] = force1.get(1);
132 vd.template getProp<1>(p)[2] = force1.get(2);
135template<
typename CellList_type,
typename vector_type>
136bool check_force(CellList_type & NN_cpu,
vector_type & vd)
138 typedef typename vector_type::stype St;
154 auto NNc = NN_cpu.getNNIterator(NN_cpu.getCell(xp));
160 if (q == p.getKey()) {++NNc;
continue;}
167 if (r2.
norm() > 1e-6)
170 force += vd.template getProp<0>(q)*r2;
176 match &= fabs(vd.template getProp<1>(p)[0] - vd.template getProp<2>(p)[0]) < 0.0003;
177 match &= fabs(vd.template getProp<1>(p)[1] - vd.template getProp<2>(p)[1]) < 0.0003;
178 match &= fabs(vd.template getProp<1>(p)[2] - vd.template getProp<2>(p)[2]) < 0.0003;
180 match &= fabs(vd.template getProp<1>(p)[0] - force.get(0)) < 0.0003;
181 match &= fabs(vd.template getProp<1>(p)[1] - force.get(1)) < 0.0003;
182 match &= fabs(vd.template getProp<1>(p)[2] - force.get(2)) < 0.0003;
186 std::cout <<
"ERROR: " << vd.template getProp<1>(p)[0] <<
" " << vd.template getProp<2>(p)[0] << std::endl;
187 std::cout <<
"ERROR: " << vd.template getProp<1>(p)[1] <<
" " << vd.template getProp<2>(p)[1] << std::endl;
188 std::cout <<
"ERROR: " << vd.template getProp<1>(p)[2] <<
" " << vd.template getProp<2>(p)[2] << std::endl;
190 std::cout << p.getKey() <<
" ERROR2: " << vd.template getProp<1>(p)[0] <<
" " << force.get(0) << std::endl;
191 std::cout << p.getKey() <<
" ERROR2: " << vd.template getProp<1>(p)[1] <<
" " << force.get(1) << std::endl;
192 std::cout << p.getKey() <<
" ERROR2: " << vd.template getProp<1>(p)[2] <<
" " << force.get(2) << std::endl;
204BOOST_AUTO_TEST_CASE( vector_dist_gpu_ghost_get )
206 auto & v_cl = create_vcluster();
208 if (v_cl.size() > 16)
217 size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
227 vd.
getPos(p)[0] = (float)rand() / (float)RAND_MAX;
228 vd.
getPos(p)[1] = (float)rand() / (float)RAND_MAX;
229 vd.
getPos(p)[2] = (float)rand() / (float)RAND_MAX;
233 vd.template getProp<1>(p)[0] = vd.
getPos(p)[0] + vd.
getPos(p)[1];
234 vd.template getProp<1>(p)[1] = vd.
getPos(p)[0] + vd.
getPos(p)[2];
235 vd.template getProp<1>(p)[2] = vd.
getPos(p)[1] + vd.
getPos(p)[2];
237 vd.template getProp<2>(p)[0] = vd.
getPos(p)[0] + 3.0*vd.
getPos(p)[1];
238 vd.template getProp<2>(p)[1] = vd.
getPos(p)[0] + 3.0*vd.
getPos(p)[2];
239 vd.template getProp<2>(p)[2] = vd.
getPos(p)[1] + 3.0*vd.
getPos(p)[2];
248 vd.template ghost_get<0,1,2>();
260 check &= (vd.template getProp<0>(p) == vd.
getPos(p)[0] + vd.
getPos(p)[1] + vd.
getPos(p)[2]);
262 check &= (vd.template getProp<1>(p)[0] == vd.
getPos(p)[0] + vd.
getPos(p)[1]);
263 check &= (vd.template getProp<1>(p)[1] == vd.
getPos(p)[0] + vd.
getPos(p)[2]);
264 check &= (vd.template getProp<1>(p)[2] == vd.
getPos(p)[1] + vd.
getPos(p)[2]);
266 check &= (vd.template getProp<2>(p)[0] == vd.
getPos(p)[0] + 3.0*vd.
getPos(p)[1]);
267 check &= (vd.template getProp<2>(p)[1] == vd.
getPos(p)[0] + 3.0*vd.
getPos(p)[2]);
268 check &= (vd.template getProp<2>(p)[2] == vd.
getPos(p)[1] + 3.0*vd.
getPos(p)[2]);
279 BOOST_REQUIRE(tot_s > 1000);
282template<
typename vector_type,
typename CellList_type,
typename CellList_type_cpu>
283void check_cell_list_cpu_and_gpu(
vector_type & vd, CellList_type & NN, CellList_type_cpu & NN_cpu)
285 const auto it5 = vd.getDomainIteratorGPU(32);
287 CUDA_LAUNCH((calculate_force<
typename vector_type::stype,
decltype(NN.toKernel())>),it5,vd.toKernel(),vd.toKernel_sorted(),NN.toKernel(),create_vcluster().rank());
289 vd.template deviceToHostProp<1,2>();
291 bool test = check_force(NN_cpu,vd);
292 BOOST_REQUIRE_EQUAL(test,
true);
302 vd.template getProp<1>(p)[0] = 0.0;
303 vd.template getProp<1>(p)[1] = 0.0;
304 vd.template getProp<1>(p)[2] = 0.0;
309 vd.template hostToDeviceProp<1>();
313 CUDA_LAUNCH((calculate_force_full_sort<
typename vector_type::stype,
decltype(NN.toKernel())>),it5,vd.toKernel_sorted(),NN.toKernel(),create_vcluster().rank());
315 vd.template merge_sort<1>(NN);
316 vd.template deviceToHostProp<1>();
318 test = check_force(NN_cpu,vd);
319 BOOST_REQUIRE_EQUAL(test,
true);
322template<
typename CellList_type>
323void vector_dist_gpu_test_impl()
325 auto & v_cl = create_vcluster();
327 if (v_cl.size() > 16)
336 size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
340 srand(55067*create_vcluster().rank());
352 vd.
getPos(p)[0] = (float)x / (
float)RAND_MAX;
353 vd.
getPos(p)[1] = (float)y / (
float)RAND_MAX;
354 vd.
getPos(p)[2] = (float)z / (
float)RAND_MAX;
369 BOOST_REQUIRE_EQUAL(size_l,10000);
383 noOut &= ct.isLocal(vd.
getPos(p));
389 BOOST_REQUIRE_EQUAL(noOut,
true);
394 const auto it3 = vd.getDomainIteratorGPU();
399 CUDA_LAUNCH_DIM3(initialize_props,it3.wthr,it3.thr,vd.toKernel());
411 BOOST_REQUIRE_CLOSE(vd.template getProp<0>(p),vd.
getPos(p)[0] + vd.
getPos(p)[1] + vd.
getPos(p)[2],0.01);
413 BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[0],vd.
getPos(p)[0] + vd.
getPos(p)[1],0.01);
414 BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[1],vd.
getPos(p)[0] + vd.
getPos(p)[2],0.01);
415 BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[2],vd.
getPos(p)[1] + vd.
getPos(p)[2],0.01);
428 vd.template hostToDeviceProp<0>();
430 auto NN = vd.template getCellListGPU<CellList_type>(0.1);
432 check_cell_list_cpu_and_gpu(vd,NN,NN_cpu);
434 auto NN_up = vd.template getCellListGPU<CellList_type>(0.1);
438 check_cell_list_cpu_and_gpu(vd,NN_up,NN_cpu);
441template<
typename CellList_type>
442void vector_dist_gpu_make_sort_test_impl()
444 auto & v_cl = create_vcluster();
446 if (v_cl.size() > 16)
455 size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
459 srand(55067*create_vcluster().rank());
471 vd.
getPos(p)[0] = (float)x / (
float)RAND_MAX;
472 vd.
getPos(p)[1] = (float)y / (
float)RAND_MAX;
473 vd.
getPos(p)[2] = (float)z / (
float)RAND_MAX;
481 vd.
map(RUN_ON_DEVICE);
483 auto it3 = vd.getDomainIteratorGPU();
485 CUDA_LAUNCH_DIM3(initialize_props,it3.wthr,it3.thr,vd.toKernel());
493 auto NN = vd.template getCellListGPU<CellList_type>(0.1);
503 for (
size_t i = 0 ; i < NN_cpu1.getNCells() ; i++)
505 match &= NN_cpu1.getNelements(i) == NN_cpu2.getNelements(i);
508 BOOST_REQUIRE_EQUAL(match,
true);
514 NN = vd.template getCellListGPU<CellList_type>(0.1);
521 tmp_pos.template deviceToHost<0>();
527 NN = vd.template getCellListGPU<CellList_type>(0.1);
531 vd.make_sort_from(NN);
535 tmp_pos.deviceToHost<0>();
536 vd.deviceToHostPos();
539 for (
size_t i = 0 ; i < vd.size_local() ; i++)
545 auto c1 = NN.getCell(p1);
546 auto c2 = NN.getCell(p1);
551 BOOST_REQUIRE_EQUAL(match,
true);
555BOOST_AUTO_TEST_CASE(vector_dist_gpu_make_sort_sparse)
557 vector_dist_gpu_make_sort_test_impl<CELLLIST_GPU_SPARSE<3,float>>();
560BOOST_AUTO_TEST_CASE(vector_dist_gpu_make_sort)
562 vector_dist_gpu_make_sort_test_impl<CellList_gpu<3,float,CudaMemory,shift_only<3, float>>>();
565BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
567 vector_dist_gpu_test_impl<CellList_gpu<3,float,CudaMemory,shift_only<3, float>>>();
570BOOST_AUTO_TEST_CASE( vector_dist_gpu_test_sparse)
572 vector_dist_gpu_test_impl<CELLLIST_GPU_SPARSE<3,float>>();
576void vdist_calc_gpu_test()
578 auto & v_cl = create_vcluster();
580 if (v_cl.size() > 16)
583 Box<3,St> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
589 size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
599 srand(v_cl.rank()*10000);
606 vd.
getPos(p)[0] = (St)rand() / (float)RAND_MAX;
607 vd.
getPos(p)[1] = (St)rand() / (float)RAND_MAX;
608 vd.
getPos(p)[2] = (St)rand() / (float)RAND_MAX;
612 vd.template getProp<1>(p)[0] = vd.
getPos(p)[0];
613 vd.template getProp<1>(p)[1] = vd.
getPos(p)[1];
614 vd.template getProp<1>(p)[2] = vd.
getPos(p)[2];
616 vd.template getProp<2>(p)[0] = vd.
getPos(p)[0] + vd.
getPos(p)[1];
617 vd.template getProp<2>(p)[1] = vd.
getPos(p)[0] + vd.
getPos(p)[2];
618 vd.template getProp<2>(p)[2] = vd.
getPos(p)[1] + vd.
getPos(p)[2];
625 vd.template hostToDeviceProp<0,1,2>();
628 vd.
map(RUN_ON_DEVICE);
633 vd.template deviceToHostProp<0,1,2>();
647 vd.template getProp<0>(p) = 0.0;
649 vd.template getProp<0>(p) = 0.0;
650 vd.template getProp<0>(p) = 0.0;
651 vd.template getProp<0>(p) = 0.0;
653 vd.template getProp<0>(p) = 0.0;
654 vd.template getProp<0>(p) = 0.0;
655 vd.template getProp<0>(p) = 0.0;
663 vd.template deviceToHostProp<0,1,2>();
674 match &= vd.template getProp<0>(p) == vd.
getPos(p)[0] + vd.
getPos(p)[1] + vd.
getPos(p)[2];
676 match &= vd.template getProp<1>(p)[0] == vd.
getPos(p)[0];
677 match &= vd.template getProp<1>(p)[1] == vd.
getPos(p)[1];
678 match &= vd.template getProp<1>(p)[2] == vd.
getPos(p)[2];
680 match &= vd.template getProp<2>(p)[0] == vd.
getPos(p)[0] + vd.
getPos(p)[1];
681 match &= vd.template getProp<2>(p)[1] == vd.
getPos(p)[0] + vd.
getPos(p)[2];
682 match &= vd.template getProp<2>(p)[2] == vd.
getPos(p)[1] + vd.
getPos(p)[2];
687 BOOST_REQUIRE_EQUAL(match,
true);
700 count_local_n_local<3>(vd,it5,bc,domain,dom_ext,l_cnt,nl_cnt,n_out);
702 BOOST_REQUIRE_EQUAL(n_out,0);
707 for (
size_t i = 0 ; i < 10 ; i++)
709 vd.
map(RUN_ON_DEVICE);
712 vd.template deviceToHostProp<0,1,2>();
726 vd_cpu.getLastPos()[0] = vd.
getPos(p)[0];
727 vd_cpu.getLastPos()[1] = vd.
getPos(p)[1];
728 vd_cpu.getLastPos()[2] = vd.
getPos(p)[2];
730 vd_cpu.template getLastProp<0>() = vd.template getProp<0>(p);
732 vd_cpu.template getLastProp<1>()[0] = vd.template getProp<1>(p)[0];
733 vd_cpu.template getLastProp<1>()[1] = vd.template getProp<1>(p)[1];
734 vd_cpu.template getLastProp<1>()[2] = vd.template getProp<1>(p)[2];
736 vd_cpu.template getLastProp<2>()[0] = vd.template getProp<2>(p)[0];
737 vd_cpu.template getLastProp<2>()[1] = vd.template getProp<2>(p)[1];
738 vd_cpu.template getLastProp<2>()[2] = vd.template getProp<2>(p)[2];
743 vd_cpu.template ghost_get<0,1,2>();
747 vd.template ghost_get<0,1,2>(RUN_ON_DEVICE);
752 vd.template deviceToHostProp<0,1,2>();
767 bool operator<(
const part & tmp)
const
769 if (xp.
get(0) < tmp.xp.get(0))
771 else if (xp.
get(0) > tmp.xp.get(0))
774 if (xp.
get(1) < tmp.xp.get(1))
776 else if (xp.
get(1) > tmp.xp.get(1))
779 if (xp.
get(2) < tmp.xp.get(2))
781 else if (xp.
get(2) > tmp.xp.get(2))
791 cpu_sort.resize(vd_cpu.size_local_with_ghost() - vd_cpu.size_local());
794 BOOST_REQUIRE_EQUAL(cpu_sort.
size(),gpu_sort.
size());
799 while (itc2.isNext())
803 cpu_sort.get(cnt).xp.get(0) = vd_cpu.getPos(p)[0];
804 gpu_sort.get(cnt).xp.get(0) = vd.
getPos(p)[0];
805 cpu_sort.get(cnt).xp.get(1) = vd_cpu.getPos(p)[1];
806 gpu_sort.get(cnt).xp.get(1) = vd.
getPos(p)[1];
807 cpu_sort.get(cnt).xp.get(2) = vd_cpu.getPos(p)[2];
808 gpu_sort.get(cnt).xp.get(2) = vd.
getPos(p)[2];
810 cpu_sort.get(cnt).prp0 = vd_cpu.template getProp<0>(p);
811 gpu_sort.get(cnt).prp0 = vd.template getProp<0>(p);
813 cpu_sort.get(cnt).prp1[0] = vd_cpu.template getProp<1>(p)[0];
814 gpu_sort.get(cnt).prp1[0] = vd.template getProp<1>(p)[0];
815 cpu_sort.get(cnt).prp1[1] = vd_cpu.template getProp<1>(p)[1];
816 gpu_sort.get(cnt).prp1[1] = vd.template getProp<1>(p)[1];
817 cpu_sort.get(cnt).prp1[2] = vd_cpu.template getProp<1>(p)[2];
818 gpu_sort.get(cnt).prp1[2] = vd.template getProp<1>(p)[2];
820 cpu_sort.get(cnt).prp2[0] = vd_cpu.template getProp<2>(p)[0];
821 gpu_sort.get(cnt).prp2[0] = vd.template getProp<2>(p)[0];
822 cpu_sort.get(cnt).prp2[1] = vd_cpu.template getProp<2>(p)[1];
823 gpu_sort.get(cnt).prp2[1] = vd.template getProp<2>(p)[1];
824 cpu_sort.get(cnt).prp2[2] = vd_cpu.template getProp<2>(p)[2];
825 gpu_sort.get(cnt).prp2[2] = vd.template getProp<2>(p)[2];
834 for (
size_t i = 0 ; i < cpu_sort.
size() ; i++)
836 match &= cpu_sort.get(i).xp.get(0) == gpu_sort.get(i).xp.get(0);
837 match &= cpu_sort.get(i).xp.get(1) == gpu_sort.get(i).xp.get(1);
838 match &= cpu_sort.get(i).xp.get(2) == gpu_sort.get(i).xp.get(2);
840 match &= cpu_sort.get(i).prp0 == gpu_sort.get(i).prp0;
841 match &= cpu_sort.get(i).prp1[0] == gpu_sort.get(i).prp1[0];
842 match &= cpu_sort.get(i).prp1[1] == gpu_sort.get(i).prp1[1];
843 match &= cpu_sort.get(i).prp1[2] == gpu_sort.get(i).prp1[2];
845 match &= cpu_sort.get(i).prp2[0] == gpu_sort.get(i).prp2[0];
846 match &= cpu_sort.get(i).prp2[1] == gpu_sort.get(i).prp2[1];
847 match &= cpu_sort.get(i).prp2[2] == gpu_sort.get(i).prp2[2];
850 BOOST_REQUIRE_EQUAL(match,
true);
854 auto ite = vd.getDomainIteratorGPU();
855 CUDA_LAUNCH_DIM3((move_parts_gpu_test<3,
decltype(vd.toKernel())>),ite.wthr,ite.thr,vd.toKernel());
859BOOST_AUTO_TEST_CASE( vector_dist_map_on_gpu_test)
861 vdist_calc_gpu_test<float>();
862 vdist_calc_gpu_test<double>();
865BOOST_AUTO_TEST_CASE(vector_dist_reduce)
867 auto & v_cl = create_vcluster();
869 if (v_cl.size() > 16)
878 size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
893 vd.template getProp<0>(p) = fc;
894 vd.template getProp<1>(p) = dc;
895 vd.template getProp<2>(p) = ic;
896 vd.template getProp<3>(p) = sc;
906 vd.template hostToDeviceProp<0,1,2,3>();
908 float redf = reduce_local<0,_add_>(vd);
909 double redd = reduce_local<1,_add_>(vd);
910 int redi = reduce_local<2,_add_>(vd);
911 size_t reds = reduce_local<3,_add_>(vd);
918 float redf2 = reduce_local<0,_max_>(vd);
919 double redd2 = reduce_local<1,_max_>(vd);
920 int redi2 = reduce_local<2,_max_>(vd);
921 size_t reds2 = reduce_local<3,_max_>(vd);
929template<
typename CellList_type>
930void vector_dist_dlb_on_cuda_impl(
size_t k,
double r_cut)
932 std::random_device r;
934 std::seed_seq seed2{ create_vcluster().rank(),
935 create_vcluster().rank(),
936 create_vcluster().rank(),
937 create_vcluster().rank(),
938 create_vcluster().rank(),
939 create_vcluster().rank(),
940 create_vcluster().rank(),
941 create_vcluster().rank()};
942 std::mt19937 e2(seed2);
951 std::uniform_real_distribution<double> unif(0.0,0.3);
955 size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
963 for(
size_t i = 0 ; i < k ; i++)
975 vd.template hostToDeviceProp<0>();
977 vd.
map(RUN_ON_DEVICE);
978 vd.template ghost_get<>(RUN_ON_DEVICE);
983 vd.template deviceToHostProp<0>();
997 vd.template getProp<0>(p) = VV.getNNPart(p.getKey());
1003 vd.template hostToDeviceProp<0>();
1009 vd.
map(RUN_ON_DEVICE);
1021 for (
size_t i = 0 ; i < loads.
size() ; i++)
1023 double load_f = load;
1024 double load_fc = loads.get(i);
1026 BOOST_REQUIRE_CLOSE(load_f,load_fc,7.0);
1033 for (
size_t i = 0 ; i < 25 ; i++)
1045 vd.
getPos(p)[0] += v.get(0) * 0.09;
1046 vd.
getPos(p)[1] += v.get(1) * 0.09;
1047 vd.
getPos(p)[2] += v.get(2) * 0.09;
1054 vd.
map(RUN_ON_DEVICE);
1055 vd.template ghost_get<0>(RUN_ON_DEVICE);
1058 vd.template deviceToHostProp<0,1,2>();
1061 auto NN_gpu = vd.template getCellListGPU<CellList_type>(r_cut);
1063 check_cell_list_cpu_and_gpu(vd,NN_gpu,NN_cpu);
1070 while (it2.isNext())
1074 match &= vd.template getProp<0>(p) == VV2.getNNPart(p.getKey());
1079 BOOST_REQUIRE_EQUAL(match,
true);
1084 vd.
map(RUN_ON_DEVICE);
1088 vd.template ghost_get<0>(RUN_ON_DEVICE);
1090 vd.template deviceToHostProp<0>();
1099 for (
size_t i = 0 ; i < loads.
size() ; i++)
1101 double load_f = load;
1102 double load_fc = loads.get(i);
1105 BOOST_REQUIRE_CLOSE(load_f,load_fc,30.0);
1107 BOOST_REQUIRE_CLOSE(load_f,load_fc,10.0);
1113template<
typename CellList_type>
1114void vector_dist_dlb_on_cuda_impl_async(
size_t k,
double r_cut)
1116 std::random_device r;
1118 std::seed_seq seed2{r() + create_vcluster().rank(),
1119 r() + create_vcluster().rank(),
1120 r() + create_vcluster().rank(),
1121 r() + create_vcluster().rank(),
1122 r() + create_vcluster().rank(),
1123 r() + create_vcluster().rank(),
1124 r() + create_vcluster().rank(),
1125 r() + create_vcluster().rank()};
1126 std::mt19937 e2(seed2);
1135 std::uniform_real_distribution<double> unif(0.0,0.3);
1139 size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
1147 for(
size_t i = 0 ; i < k ; i++)
1159 vd.template hostToDeviceProp<0>();
1161 vd.
map(RUN_ON_DEVICE);
1162 vd.template Ighost_get<>(RUN_ON_DEVICE);
1163 vd.template ghost_wait<>(RUN_ON_DEVICE);
1168 vd.template deviceToHostProp<0>();
1182 vd.template getProp<0>(p) = VV.getNNPart(p.getKey());
1188 vd.template hostToDeviceProp<0>();
1194 vd.
map(RUN_ON_DEVICE);
1206 for (
size_t i = 0 ; i < loads.
size() ; i++)
1208 double load_f = load;
1209 double load_fc = loads.get(i);
1211 BOOST_REQUIRE_CLOSE(load_f,load_fc,7.0);
1218 for (
size_t i = 0 ; i < 25 ; i++)
1230 vd.
getPos(p)[0] += v.get(0) * 0.09;
1231 vd.
getPos(p)[1] += v.get(1) * 0.09;
1232 vd.
getPos(p)[2] += v.get(2) * 0.09;
1239 vd.
map(RUN_ON_DEVICE);
1240 vd.template Ighost_get<0>(RUN_ON_DEVICE);
1241 vd.template ghost_wait<0>(RUN_ON_DEVICE);
1243 vd.template deviceToHostProp<0,1,2>();
1246 auto NN_gpu = vd.template getCellListGPU<CellList_type>(r_cut);
1248 check_cell_list_cpu_and_gpu(vd,NN_gpu,NN_cpu);
1255 while (it2.isNext())
1259 match &= vd.template getProp<0>(p) == VV2.getNNPart(p.getKey());
1264 BOOST_REQUIRE_EQUAL(match,
true);
1269 vd.
map(RUN_ON_DEVICE);
1275 vd.template Ighost_get<0>(RUN_ON_DEVICE);
1276 vd.template ghost_wait<0>(RUN_ON_DEVICE);
1278 vd.template deviceToHostProp<0>();
1287 for (
size_t i = 0 ; i < loads.
size() ; i++)
1289 double load_f = load;
1290 double load_fc = loads.get(i);
1292 BOOST_REQUIRE_CLOSE(load_f,load_fc,10.0);
1297BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda_async)
1299 vector_dist_dlb_on_cuda_impl_async<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,
unsigned int,
int,
false>>(50000,0.01);
1302BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda)
1304 vector_dist_dlb_on_cuda_impl<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,
unsigned int,
int,
false>>(50000,0.01);
1307BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda_sparse)
1309 vector_dist_dlb_on_cuda_impl<CELLLIST_GPU_SPARSE<3,double>>(50000,0.01);
1312BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda2)
1314 if (create_vcluster().size() <= 3)
1318 vector_dist_dlb_on_cuda_impl<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,
unsigned int,
int,
false>>(1000000,0.01);
1322BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda3)
1324 if (create_vcluster().size() < 8)
1328 vector_dist_dlb_on_cuda_impl<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,
unsigned int,
int,
false>>(15000000,0.005);
1333BOOST_AUTO_TEST_CASE(vector_dist_keep_prop_on_cuda)
1344 size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
1352 for(
size_t i = 0 ; i < 50000 ; i++)
1356 vd.
getLastPos()[0] = ((double)rand())/RAND_MAX * 0.3;
1357 vd.
getLastPos()[1] = ((double)rand())/RAND_MAX * 0.3;
1358 vd.
getLastPos()[2] = ((double)rand())/RAND_MAX * 0.3;
1364 vd.template hostToDeviceProp<0>();
1366 vd.
map(RUN_ON_DEVICE);
1367 vd.template ghost_get<>(RUN_ON_DEVICE);
1372 vd.template deviceToHostProp<0>();
1383 vd.template getProp<0>(p) = 0.0;
1385 vd.template getProp<1>(p)[0] = 1000.0;
1386 vd.template getProp<1>(p)[1] = 2000.0;
1387 vd.template getProp<1>(p)[2] = 3000.0;
1389 vd.template getProp<2>(p)[0][0] = 6000,0;
1390 vd.template getProp<2>(p)[0][1] = 7000.0;
1391 vd.template getProp<2>(p)[0][2] = 8000.0;
1392 vd.template getProp<2>(p)[1][0] = 9000.0;
1393 vd.template getProp<2>(p)[1][1] = 10000.0;
1394 vd.template getProp<2>(p)[1][2] = 11000.0;
1395 vd.template getProp<2>(p)[2][0] = 12000.0;
1396 vd.template getProp<2>(p)[2][1] = 13000.0;
1397 vd.template getProp<2>(p)[2][2] = 14000.0;
1403 vd.template hostToDeviceProp<0,1,2>();
1409 vd.
map(RUN_ON_DEVICE);
1421 for (
size_t i = 0 ; i < loads.
size() ; i++)
1423 double load_f = load;
1424 double load_fc = loads.get(i);
1426 BOOST_REQUIRE_CLOSE(load_f,load_fc,7.0);
1435 for (
size_t i = 0 ; i < 25 ; i++)
1449 vd.
getPos(p)[0] += v.get(0) * 0.09;
1450 vd.
getPos(p)[1] += v.get(1) * 0.09;
1451 vd.
getPos(p)[2] += v.get(2) * 0.09;
1458 vd.
map(RUN_ON_DEVICE);
1459 vd.template ghost_get<>(RUN_ON_DEVICE);
1461 vd.template deviceToHostProp<0,1,2>();
1466 vd.
map(RUN_ON_DEVICE);
1470 vd.template ghost_get<0>(RUN_ON_DEVICE);
1472 vd.template deviceToHostProp<0,1,2>();
1481 for (
size_t i = 0 ; i < loads.
size() ; i++)
1483 double load_f = load;
1484 double load_fc = loads.get(i);
1486 BOOST_REQUIRE_CLOSE(load_f,load_fc,10.0);
1491 vd.template deviceToHostProp<0,1,2>();
1496 while (it2.isNext())
1500 vd.template getProp<0>(p) += 1;
1502 vd.template getProp<1>(p)[0] += 1.0;
1503 vd.template getProp<1>(p)[1] += 1.0;
1504 vd.template getProp<1>(p)[2] += 1.0;
1506 vd.template getProp<2>(p)[0][0] += 1.0;
1507 vd.template getProp<2>(p)[0][1] += 1.0;
1508 vd.template getProp<2>(p)[0][2] += 1.0;
1509 vd.template getProp<2>(p)[1][0] += 1.0;
1510 vd.template getProp<2>(p)[1][1] += 1.0;
1511 vd.template getProp<2>(p)[1][2] += 1.0;
1512 vd.template getProp<2>(p)[2][0] += 1.0;
1513 vd.template getProp<2>(p)[2][1] += 1.0;
1514 vd.template getProp<2>(p)[2][2] += 1.0;
1519 vd.template hostToDeviceProp<0,1,2>();
1523 vd.template ghost_get<0,1,2>(RUN_ON_DEVICE | KEEP_PROPERTIES);
1524 vd.template deviceToHostProp<0,1,2>();
1530 while (itg.isNext())
1534 match &= vd.template getProp<0>(p) == base;
1536 match &= vd.template getProp<1>(p)[0] == base + 1000.0;
1537 match &= vd.template getProp<1>(p)[1] == base + 2000.0;
1538 match &= vd.template getProp<1>(p)[2] == base + 3000.0;
1540 match &= vd.template getProp<2>(p)[0][0] == base + 6000.0;
1541 match &= vd.template getProp<2>(p)[0][1] == base + 7000.0;
1542 match &= vd.template getProp<2>(p)[0][2] == base + 8000.0;
1543 match &= vd.template getProp<2>(p)[1][0] == base + 9000.0;
1544 match &= vd.template getProp<2>(p)[1][1] == base + 10000.0;
1545 match &= vd.template getProp<2>(p)[1][2] == base + 11000.0;
1546 match &= vd.template getProp<2>(p)[2][0] == base + 12000.0;
1547 match &= vd.template getProp<2>(p)[2][1] == base + 13000.0;
1548 match &= vd.template getProp<2>(p)[2][2] == base + 14000.0;
1553 BOOST_REQUIRE_EQUAL(match,
true);
1560 __device__
static bool check(
int c)
1566BOOST_AUTO_TEST_CASE(vector_dist_get_index_set)
1570 size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
1572 if (create_vcluster().size() >= 16)
1579 auto it = vdg.getDomainIterator();
1585 vdg.getPos(p)[0] = (double)rand() / RAND_MAX;
1586 vdg.getPos(p)[1] = (double)rand() / RAND_MAX;
1587 vdg.getPos(p)[2] = (double)rand() / RAND_MAX;
1589 vdg.template getProp<0>(p) = (
int)((
double)rand() / RAND_MAX / 0.5);
1591 vdg.template getProp<1>(p) = (double)rand() / RAND_MAX;
1598 vdg.hostToDeviceProp<0,1>();
1599 vdg.hostToDevicePos();
1601 auto cl = vdg.getCellListGPU(0.1);
1607 get_indexes_by_type<0,type_is_one>(vdg.getPropVectorSort(),ids,vdg.size_local(),v_cl.
getgpuContext());
1611 ids.template deviceToHost<0>();
1613 auto & vs = vdg.getPropVectorSort();
1614 vs.template deviceToHost<0>();
1618 for (
int i = 0 ; i < ids.
size() ; i++)
1620 if (vs.template get<0>(ids.template get<0>(i)) != 1)
1624 BOOST_REQUIRE_EQUAL(match,
true);
1627BOOST_AUTO_TEST_CASE(vector_dist_compare_host_device)
1631 size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
1633 if (create_vcluster().size() >= 16)
1638 auto it = vdg.getDomainIterator();
1644 vdg.getPos(p)[0] = (double)rand() / RAND_MAX;
1645 vdg.getPos(p)[1] = (double)rand() / RAND_MAX;
1646 vdg.getPos(p)[2] = (double)rand() / RAND_MAX;
1648 vdg.template getProp<0>(p) = (double)rand() / RAND_MAX;
1650 vdg.template getProp<1>(p)[0] = (double)rand() / RAND_MAX;
1651 vdg.template getProp<1>(p)[1] = (double)rand() / RAND_MAX;
1652 vdg.template getProp<1>(p)[2] = (double)rand() / RAND_MAX;
1654 vdg.template getProp<2>(p)[0][0] = (double)rand() / RAND_MAX;
1655 vdg.template getProp<2>(p)[0][1] = (double)rand() / RAND_MAX;
1656 vdg.template getProp<2>(p)[0][2] = (double)rand() / RAND_MAX;
1657 vdg.template getProp<2>(p)[1][0] = (double)rand() / RAND_MAX;
1658 vdg.template getProp<2>(p)[1][1] = (double)rand() / RAND_MAX;
1659 vdg.template getProp<2>(p)[1][2] = (double)rand() / RAND_MAX;
1660 vdg.template getProp<2>(p)[2][0] = (double)rand() / RAND_MAX;
1661 vdg.template getProp<2>(p)[2][1] = (double)rand() / RAND_MAX;
1662 vdg.template getProp<2>(p)[2][2] = (double)rand() / RAND_MAX;
1669 vdg.hostToDeviceProp<0,1,2>();
1670 vdg.hostToDevicePos();
1672 bool test = vdg.compareHostAndDevicePos(0.00001,0.00000001);
1673 BOOST_REQUIRE_EQUAL(test,
true);
1675 vdg.getPos(100)[0] = 0.99999999;
1677 test = vdg.compareHostAndDevicePos(0.00001,0.00000001);
1678 BOOST_REQUIRE_EQUAL(test,
false);
1680 vdg.hostToDevicePos();
1681 vdg.getPos(100)[0] = 0.99999999;
1683 test = vdg.compareHostAndDevicePos(0.00001,0.00000001);
1684 BOOST_REQUIRE_EQUAL(test,
true);
1688 test = vdg.compareHostAndDeviceProp<1>(0.00001,0.00000001);
1689 BOOST_REQUIRE_EQUAL(test,
true);
1691 vdg.getProp<1>(103)[0] = 0.99999999;
1693 test = vdg.compareHostAndDeviceProp<1>(0.00001,0.00000001);
1694 BOOST_REQUIRE_EQUAL(test,
false);
1696 vdg.hostToDeviceProp<1>();
1697 vdg.getProp<1>(103)[0] = 0.99999999;
1699 test = vdg.compareHostAndDeviceProp<1>(0.00001,0.00000001);
1700 BOOST_REQUIRE_EQUAL(test,
true);
1705 test = vdg.compareHostAndDeviceProp<0>(0.00001,0.00000001);
1706 BOOST_REQUIRE_EQUAL(test,
true);
1708 vdg.getProp<0>(105) = 0.99999999;
1710 test = vdg.compareHostAndDeviceProp<0>(0.00001,0.00000001);
1711 BOOST_REQUIRE_EQUAL(test,
false);
1713 vdg.hostToDeviceProp<0>();
1714 vdg.getProp<0>(105) = 0.99999999;
1716 test = vdg.compareHostAndDeviceProp<0>(0.00001,0.00000001);
1717 BOOST_REQUIRE_EQUAL(test,
true);
1723 test = vdg.compareHostAndDeviceProp<2>(0.00001,0.00000001);
1724 BOOST_REQUIRE_EQUAL(test,
true);
1726 vdg.getProp<2>(108)[1][2] = 0.99999999;
1728 test = vdg.compareHostAndDeviceProp<2>(0.00001,0.00000001);
1729 BOOST_REQUIRE_EQUAL(test,
false);
1731 vdg.hostToDeviceProp<2>();
1732 vdg.getProp<2>(108)[1][2] = 0.99999999;
1734 test = vdg.compareHostAndDeviceProp<2>(0.00001,0.00000001);
1735 BOOST_REQUIRE_EQUAL(test,
true);
1738template<
typename vector_dist_type>
1739__global__
void assign_to_ghost(vector_dist_type vds)
1741 int i = threadIdx.x + blockIdx.x * blockDim.x;
1743 if (i >= vds.size()) {
return;}
1745 vds.template getProp<0>(i) = 1000.0 + i;
1747 vds.template getProp<1>(i)[0] = 2000.0 + i;
1748 vds.template getProp<1>(i)[1] = 3000.0 + i;
1749 vds.template getProp<1>(i)[2] = 4000.0 + i;
1751 vds.template getProp<2>(i)[0][0] = 12000.0 + i;
1752 vds.template getProp<2>(i)[0][1] = 13000.0 + i;
1753 vds.template getProp<2>(i)[0][2] = 14000.0 + i;
1754 vds.template getProp<2>(i)[1][0] = 22000.0 + i;
1755 vds.template getProp<2>(i)[1][1] = 23000.0 + i;
1756 vds.template getProp<2>(i)[1][2] = 24000.0 + i;
1757 vds.template getProp<2>(i)[2][0] = 32000.0 + i;
1758 vds.template getProp<2>(i)[2][1] = 33000.0 + i;
1759 vds.template getProp<2>(i)[2][2] = 34000.0 + i;
1763BOOST_AUTO_TEST_CASE(vector_dist_domain_and_ghost_test)
1767 size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
1769 if (create_vcluster().size() >= 16)
1774 auto ite = vdg.getDomainAndGhostIteratorGPU();
1776 CUDA_LAUNCH(assign_to_ghost,ite,vdg.toKernel());
1778 vdg.template deviceToHostProp<0,1,2>();
1781 auto it = vdg.getDomainAndGhostIterator();
1789 check &= vdg.template getProp<0>(k) == 1000.0 + k.getKey();
1791 check &= vdg.template getProp<1>(k)[0] == 2000.0 + k.getKey();
1792 check &= vdg.template getProp<1>(k)[1] == 3000.0 + k.getKey();
1793 check &= vdg.template getProp<1>(k)[2] == 4000.0 + k.getKey();
1795 check &= vdg.template getProp<2>(k)[0][0] == 12000.0 + k.getKey();
1796 check &= vdg.template getProp<2>(k)[0][1] == 13000.0 + k.getKey();
1797 check &= vdg.template getProp<2>(k)[0][2] == 14000.0 + k.getKey();
1798 check &= vdg.template getProp<2>(k)[1][0] == 22000.0 + k.getKey();
1799 check &= vdg.template getProp<2>(k)[1][1] == 23000.0 + k.getKey();
1800 check &= vdg.template getProp<2>(k)[1][2] == 24000.0 + k.getKey();
1801 check &= vdg.template getProp<2>(k)[2][0] == 32000.0 + k.getKey();
1802 check &= vdg.template getProp<2>(k)[2][1] == 33000.0 + k.getKey();
1803 check &= vdg.template getProp<2>(k)[2][2] == 34000.0 + k.getKey();
1809 BOOST_REQUIRE_EQUAL(check,
true);
1812template<
typename vT>
1813__global__
void launch_overflow(vT vs, vT vs2)
1815 vs2.template getProp<1>(57)[0];
1818BOOST_AUTO_TEST_CASE(vector_dist_overflow_se_class1)
1822 size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
1824 if (create_vcluster().size() >= 16)
1827 std::cout <<
"****** TEST ERROR MESSAGE BEGIN ********" << std::endl;
1833 vdg.setCapacity(100);
1846 CUDA_LAUNCH(launch_overflow,ite,vdg.toKernel(),vdg2.toKernel());
1850 std::cout <<
"SE_CLASS1 Catch" << std::endl;
1853 std::cout <<
"****** TEST ERROR MESSAGE END ********" << std::endl;
1858BOOST_AUTO_TEST_CASE( vector_dist_ghost_put_gpu )
1862#ifndef CUDIFY_USE_OPENMP
1866 long int k = 25*25*25*create_vcluster().getProcessingUnits();
1867 k = std::pow(k, 1/3.);
1872 print_test(
"Testing 3D periodic ghost put GPU k=",k);
1873 BOOST_TEST_CHECKPOINT(
"Testing 3D periodic ghost put k=" << k );
1875 long int big_step = k / 30;
1876 big_step = (big_step == 0)?1:big_step;
1877 long int small_step = 21;
1880 for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step )
1882 float r_cut = 1.3 / k;
1883 float r_g = 1.5 / k;
1888 size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
1902 auto key = it.get();
1922 vd.template hostToDeviceProp<0,2>();
1925 vd.template deviceToHostProp<0,2>();
1936 while (it2.isNext())
1943 auto Np = NN.getNNIterator<NO_CHECK>(NN.getCell(xp));
1956 vd.
getPropWrite<2>(q) += a*(-dist*dist+r_cut*r_cut) / 2;
1966 vd.template hostToDeviceProp<0,2>();
1967 vd.template ghost_put<add_atomic_,0,2>(RUN_ON_DEVICE);
1968 vd.template deviceToHostProp<0,2>();
1974 float constant = vd.
getProp<0>(it3.get());
1975 float constanta = vd.
getProp<2>(it3.get());
1978 while (it3.isNext())
1980 float constant2 = vd.
getProp<0>(it3.get());
1981 float constant3 = vd.
getProp<2>(it3.get());
1982 if (fabs(constant - constant2)/constant > eps || fabs(constanta - constant3)/constanta > eps)
1986 std::cout << p.
toString() <<
" " << constant2 <<
"/" << constant <<
"/" << constant3 <<
" " << v_cl.
getProcessUnitID() << std::endl;
1993 BOOST_REQUIRE_EQUAL(ret,
true);
1997 while (itp.isNext())
1999 auto key = itp.
get();
2015 while (it2.isNext())
2022 auto Np = NN.getNNIterator<NO_CHECK>(NN.getCell(xp));
2045 vd.template hostToDeviceProp<0,2>();
2046 vd.template ghost_put<add_atomic_,0>(RUN_ON_DEVICE);
2047 vd.template ghost_put<add_atomic_,2>(RUN_ON_DEVICE);
2048 vd.template deviceToHostProp<0,2>();
2058 while (it3.isNext())
2062 if (fabs(constant - constant2)/constant > eps || fabs(constanta - constant3)/constanta > eps)
2066 std::cout << p.
toString() <<
" " << constant2 <<
"/" << constant <<
"/" << constant3 <<
" " << v_cl.
getProcessUnitID() << std::endl;
2073 BOOST_REQUIRE_EQUAL(ret,
true);
2080BOOST_AUTO_TEST_SUITE_END()
This class represent an N-dimensional box.
void enlarge(const Box< dim, T > &gh)
Enlarge the box with ghost margin.
This class implement the point shape in an N-dimensional space.
__device__ __host__ T distance(const Point< dim, T > &q) const
It calculate the distance between 2 points.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
std::string toString() const
Return the string with the point coordinate.
__device__ __host__ T norm() const
norm of the vector
void execute()
Execute all the requests.
size_t getProcessUnitID()
Get the process unit id.
size_t getProcessingUnits()
Get the total number of processors.
gpu::ofp_context_t & getgpuContext(bool iw=true)
If nvidia cuda is activated return a gpu context.
bool allGather(T &send, openfpm::vector< T, Mem, gr > &v)
Gather the data from all processors.
Implementation of VCluster class.
Implementation of 1-D std::vector like structure.
vect_dist_key_dx get()
Get the actual key.
auto getLastPropWrite() -> decltype(v_prp.template get< id >(0))
Get the property of the last element.
auto getPropWrite(vect_dist_key_dx vec_key) -> decltype(v_prp.template get< id >(vec_key.getKey()))
Get the property of an element.
size_t size_local() const
return the local size of the vector
auto getProp(vect_dist_key_dx vec_key) -> decltype(v_prp.template get< id >(vec_key.getKey()))
Get the property of an element.
const vector_dist_prop & getPropVector() const
return the property vector of all the particles
grid_dist_id_iterator_dec< Decomposition > getGridIterator(const size_t(&sz)[dim])
auto getPosRead(vect_dist_key_dx vec_key) const -> decltype(v_pos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
size_t size_local_with_ghost() const
return the local size of the vector
void deviceToHostPos()
Move the memory from the device to host memory.
void updateCellList(CellL &cell_list, bool no_se3=false, cl_construct_opt opt=cl_construct_opt::Full)
Update a cell list using the stored particles.
VerletL getVerlet(St r_cut)
for each particle get the verlet list
vector_dist_iterator getGhostIterator() const
Get the iterator across the position of the ghost particles.
auto getPos(vect_dist_key_dx vec_key) -> decltype(v_pos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
CellL getCellList(St r_cut, bool no_se3=false)
Construct a cell list starting from the stored particles.
vector_dist_iterator getDomainIterator() const
Get an iterator that traverse the particles in the domain.
void ghost_get(size_t opt=WITH_POSITION)
It synchronize the properties and position of the ghost particles.
auto getLastPosWrite() -> decltype(v_pos.template get< 0 >(0))
Get the position of the last element.
void hostToDevicePos()
Move the memory from the device to host memory.
void map(size_t opt=NONE)
It move all the particles that does not belong to the local processor to the respective processor.
const vector_dist_pos & getPosVector() const
return the position vector of all the particles
auto getLastPos() -> decltype(v_pos.template get< 0 >(0))
Get the position of the last element.
void deviceToHostProp()
Move the memory from the device to host memory.
vector_dist_iterator getDomainAndGhostIterator() const
Get an iterator that traverse the particles in the domain.
void add()
Add local particle.
void addComputationCosts(const self &vd, Model md=Model())
Add the computation cost on the decomposition coming from the particles.
auto getPropRead(vect_dist_key_dx vec_key) const -> decltype(v_prp.template get< id >(vec_key.getKey()))
Get the property of an element.
Decomposition & getDecomposition()
Get the decomposition.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...