1 #define BOOST_TEST_DYN_LINK
3 #include <boost/test/unit_test.hpp>
4 #include "VCluster/VCluster.hpp"
5 #include <Vector/vector_dist.hpp>
6 #include "Vector/tests/vector_dist_util_unit_tests.hpp"
8 #define SUB_UNIT_FACTOR 1024
10 template<
unsigned int dim ,
typename vector_dist_type>
13 auto p = GET_PARTICLE(vecDist);
16 for (
int i = 0 ; i < dim ; i++)
18 vecDist.
getPos(p)[i] += 0.05;
22 BOOST_AUTO_TEST_SUITE( vector_dist_gpu_test )
24 void print_test(std::string test,
size_t sz)
26 if (create_vcluster().getProcessUnitID() == 0)
27 std::cout << test <<
" " << sz <<
"\n";
31 __global__
void initialize_props(vector_dist_ker<3,
float,
aggregate<
float,
float [3],
float[3]>> vecDist)
33 auto p = GET_PARTICLE(vecDist);
35 vecDist.template getProp<0>(p) = vecDist.
getPos(p)[0] + vecDist.
getPos(p)[1] + vecDist.
getPos(p)[2];
37 vecDist.template getProp<1>(p)[0] = vecDist.
getPos(p)[0] + vecDist.
getPos(p)[1];
38 vecDist.template getProp<1>(p)[1] = vecDist.
getPos(p)[0] + vecDist.
getPos(p)[2];
39 vecDist.template getProp<1>(p)[2] = vecDist.
getPos(p)[1] + vecDist.
getPos(p)[2];
41 vecDist.template getProp<2>(p)[0] = vecDist.
getPos(p)[0] + vecDist.
getPos(p)[1];
42 vecDist.template getProp<2>(p)[1] = vecDist.
getPos(p)[0] + vecDist.
getPos(p)[2];
43 vecDist.template getProp<2>(p)[2] = vecDist.
getPos(p)[1] + vecDist.
getPos(p)[2];
46 template<
typename T,
typename CellList_type>
47 __global__
void calculate_force(
48 vector_dist_ker<3, T,
aggregate<T, T[3], T [3]>> vecDist,
49 CellList_type cellList,
52 size_t p = GET_PARTICLE(vecDist);
56 auto it = cellList.getNNIteratorBox(cellList.getCell(xp));
64 if (q == p) {++it;
continue;}
72 force += vecDist.template getProp<0>(q)*r;
78 vecDist.template getProp<1>(p)[0] = force.get(0);
79 vecDist.template getProp<1>(p)[1] = force.get(1);
80 vecDist.template getProp<1>(p)[2] = force.get(2);
83 template<
typename T,
typename CellList_type>
84 __global__
void calculate_force_sort(
85 vector_dist_ker<3, T,
aggregate<T, T[3], T [3]>> vecDistSort,
86 CellList_type cellList,
89 size_t p; GET_PARTICLE_SORT(p, cellList);
94 auto it = cellList.getNNIteratorBox(cellList.getCell(xp));
98 auto q = it.get_sort();
100 if (q == p) {++it;
continue;}
110 force += vecDistSort.template getProp<0>(q)*r;
116 vecDistSort.template getProp<1>(p)[0] = force.get(0);
117 vecDistSort.template getProp<1>(p)[1] = force.get(1);
118 vecDistSort.template getProp<1>(p)[2] = force.get(2);
121 template<
typename CellList_type,
typename vector_type>
122 bool check_force(CellList_type & cellList,
vector_type & vecDist)
124 typedef typename vector_type::stype St;
140 auto NNc = cellList.getNNIteratorBox(cellList.getCell(xp));
146 if (q == p.getKey()) {++NNc;
continue;}
153 if (r2.
norm() > 1e-6)
156 force += vecDist.template getProp<0>(q)*r2;
162 match &= fabs(vecDist.template getProp<1>(p)[0] - force.get(0)) < 0.0003;
163 match &= fabs(vecDist.template getProp<1>(p)[1] - force.get(1)) < 0.0003;
164 match &= fabs(vecDist.template getProp<1>(p)[2] - force.get(2)) < 0.0003;
168 std::cout << p.getKey() <<
" ERROR: " << vecDist.template getProp<1>(p)[0] <<
" " << force.get(0) << std::endl;
181 BOOST_AUTO_TEST_CASE( vector_dist_gpu_ghost_get )
183 auto & vCluster = create_vcluster();
185 if (vCluster.size() > 16)
194 size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
204 vecDist.
getPos(p)[0] = (float)rand() / (float)RAND_MAX;
205 vecDist.
getPos(p)[1] = (float)rand() / (float)RAND_MAX;
206 vecDist.
getPos(p)[2] = (float)rand() / (float)RAND_MAX;
208 vecDist.template getProp<0>(p) = vecDist.
getPos(p)[0] + vecDist.
getPos(p)[1] + vecDist.
getPos(p)[2];
210 vecDist.template getProp<1>(p)[0] = vecDist.
getPos(p)[0] + vecDist.
getPos(p)[1];
211 vecDist.template getProp<1>(p)[1] = vecDist.
getPos(p)[0] + vecDist.
getPos(p)[2];
212 vecDist.template getProp<1>(p)[2] = vecDist.
getPos(p)[1] + vecDist.
getPos(p)[2];
214 vecDist.template getProp<2>(p)[0] = vecDist.
getPos(p)[0] + 3.0*vecDist.
getPos(p)[1];
215 vecDist.template getProp<2>(p)[1] = vecDist.
getPos(p)[0] + 3.0*vecDist.
getPos(p)[2];
216 vecDist.template getProp<2>(p)[2] = vecDist.
getPos(p)[1] + 3.0*vecDist.
getPos(p)[2];
225 vecDist.template ghost_get<0,1,2>();
237 check &= (vecDist.template getProp<0>(p) == vecDist.
getPos(p)[0] + vecDist.
getPos(p)[1] + vecDist.
getPos(p)[2]);
239 check &= (vecDist.template getProp<1>(p)[0] == vecDist.
getPos(p)[0] + vecDist.
getPos(p)[1]);
240 check &= (vecDist.template getProp<1>(p)[1] == vecDist.
getPos(p)[0] + vecDist.
getPos(p)[2]);
241 check &= (vecDist.template getProp<1>(p)[2] == vecDist.
getPos(p)[1] + vecDist.
getPos(p)[2]);
243 check &= (vecDist.template getProp<2>(p)[0] == vecDist.
getPos(p)[0] + 3.0*vecDist.
getPos(p)[1]);
244 check &= (vecDist.template getProp<2>(p)[1] == vecDist.
getPos(p)[0] + 3.0*vecDist.
getPos(p)[2]);
245 check &= (vecDist.template getProp<2>(p)[2] == vecDist.
getPos(p)[1] + 3.0*vecDist.
getPos(p)[2]);
256 BOOST_REQUIRE(tot_s > 1000);
259 template<
typename vector_type,
typename CellList_type,
typename CellList_type_cpu>
260 void compareCellListCpuGpu(
vector_type & vecDist, CellList_type & NN, CellList_type_cpu & cellList)
262 const auto it5 = vecDist.getDomainIteratorGPU(32);
264 CUDA_LAUNCH((calculate_force<
typename vector_type::stype,decltype(NN.toKernel())>),
268 (
int)create_vcluster().rank()
271 vecDist.template deviceToHostProp<1>();
273 bool test = check_force(cellList,vecDist);
274 BOOST_REQUIRE_EQUAL(test,
true);
277 template<
typename vector_type,
typename CellList_type,
typename CellList_type_cpu>
278 void compareCellListCpuGpuSorted(
vector_type & vecDistSort, CellList_type & cellListGPU, CellList_type_cpu & cellList)
280 const auto it5 = vecDistSort.getDomainIteratorGPU(32);
282 CUDA_LAUNCH((calculate_force_sort<
typename vector_type::stype,decltype(cellListGPU.toKernel())>),
284 vecDistSort.toKernel(),
285 cellListGPU.toKernel(),
286 (
int)create_vcluster().rank()
289 vecDistSort.template restoreOrder<1>(cellListGPU);
291 vecDistSort.template deviceToHostProp<1>();
294 bool test = check_force(cellList,vecDistSort);
295 BOOST_REQUIRE_EQUAL(test,
true);
298 template<
typename CellList_type,
bool sorted>
299 void vector_dist_gpu_test_impl()
301 auto & vCluster = create_vcluster();
303 if (vCluster.size() > 16)
312 size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
316 srand(55067*create_vcluster().rank());
328 vecDist.
getPos(p)[0] = (float)x / (
float)RAND_MAX;
329 vecDist.
getPos(p)[1] = (float)y / (
float)RAND_MAX;
330 vecDist.
getPos(p)[2] = (float)z / (
float)RAND_MAX;
342 vCluster.sum(size_l);
345 BOOST_REQUIRE_EQUAL(size_l,10000);
358 noOut &= dec.isLocal(vecDist.
getPos(p));
364 BOOST_REQUIRE_EQUAL(noOut,
true);
365 BOOST_REQUIRE_EQUAL(cnt,vecDist.
size_local());
369 const auto it3 = vecDist.getDomainIteratorGPU();
374 CUDA_LAUNCH_DIM3(initialize_props,it3.wthr,it3.thr,vecDist.toKernel());
386 BOOST_REQUIRE_CLOSE(vecDist.template getProp<0>(p),vecDist.
getPos(p)[0] + vecDist.
getPos(p)[1] + vecDist.
getPos(p)[2],0.01);
388 BOOST_REQUIRE_CLOSE(vecDist.template getProp<1>(p)[0],vecDist.
getPos(p)[0] + vecDist.
getPos(p)[1],0.01);
389 BOOST_REQUIRE_CLOSE(vecDist.template getProp<1>(p)[1],vecDist.
getPos(p)[0] + vecDist.
getPos(p)[2],0.01);
390 BOOST_REQUIRE_CLOSE(vecDist.template getProp<1>(p)[2],vecDist.
getPos(p)[1] + vecDist.
getPos(p)[2],0.01);
403 vecDist.template hostToDeviceProp<0>();
405 size_t opt = (sorted)? CL_GPU_REORDER : 0;
407 auto cellListGPU = vecDist.template getCellListGPU<CellList_type>(0.1, opt | CL_NON_SYMMETRIC);
412 vecDist.template hostToDeviceProp<0>();
414 vecDist.template updateCellListGPU<0>(cellListGPU);
415 compareCellListCpuGpuSorted(vecDist,cellListGPU,cellList);
420 vecDist.updateCellListGPU(cellListGPU);
421 compareCellListCpuGpu(vecDist,cellListGPU,cellList);
425 template<
typename CellList_type>
426 void vector_dist_gpu_make_sort_test_impl()
428 auto & vCluster = create_vcluster();
430 if (vCluster.size() > 16)
439 size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
443 srand(55067*create_vcluster().rank());
455 vecDist.
getPos(p)[0] = (float)x / (
float)RAND_MAX;
456 vecDist.
getPos(p)[1] = (float)y / (
float)RAND_MAX;
457 vecDist.
getPos(p)[2] = (float)z / (
float)RAND_MAX;
465 vecDist.
map(RUN_ON_DEVICE);
467 auto it3 = vecDist.getDomainIteratorGPU();
469 CUDA_LAUNCH_DIM3(initialize_props,it3.wthr,it3.thr,vecDist.toKernel());
471 vecDist.template deviceToHostProp<0,1,2>();
482 auto NN = vecDist.template getCellListGPU<CellList_type>(0.1, CL_NON_SYMMETRIC | CL_GPU_REORDER);
485 vecDist.template hostToDeviceProp<0,1,2>();
487 vecDist.template updateCellListGPU<0,1,2>(NN);
488 vecDist.template restoreOrder<0,1,2>(NN);
490 NN.setOpt(NN.getOpt() | CL_GPU_SKIP_CONSTRUCT_ON_STATIC_DOMAIN);
491 vecDist.template updateCellListGPU<0,1,2>(NN);
492 vecDist.template restoreOrder<0,1,2>(NN);
493 NN.setOpt(NN.getOpt() ^ CL_GPU_SKIP_CONSTRUCT_ON_STATIC_DOMAIN);
496 vecDist.template deviceToHostProp<0,1,2>();
499 vecDist.template hostToDeviceProp<0>();
501 vecDist.template updateCellListGPU<0>(NN);
502 vecDist.template restoreOrder<0>(NN);
505 vecDist.template deviceToHostProp<0>();
508 vecDist.template hostToDeviceProp<1>();
510 vecDist.template updateCellListGPU<1>(NN);
511 vecDist.template restoreOrder<1>(NN);
514 vecDist.template deviceToHostProp<1>();
517 vecDist.template hostToDeviceProp<2>();
519 vecDist.template updateCellListGPU<2>(NN);
520 vecDist.template restoreOrder<2>(NN);
523 vecDist.template deviceToHostProp<2>();
529 for (
size_t i = 0 ; i < NN_cpu1.getNCells() ; i++)
531 match &= NN_cpu1.
getNelements(i) == NN_cpu2.getNelements(i);
533 BOOST_REQUIRE_EQUAL(match,
true);
536 for (
size_t i = 0 ; i < vecDist.
size_local() ; i++)
542 auto c1 = NN.getCell(p1);
543 auto c2 = NN.getCell(p2);
547 BOOST_REQUIRE_EQUAL(match,
true);
550 for (
size_t i = 0 ; i < vecDist.
size_local() ; i++)
552 for (
int j = 0; j < 3; ++j)
553 match &= (vecDist.
getPos(i)[j] - tmpPos.template get<0>(i)[j]) < 0.0003;
555 match &= (vecDist.
getProp<0>(i) - tmpPrp.template get<0>(i)) < 0.0003;
557 for (
int j = 0; j < 3; ++j)
558 match &= (vecDist.
getProp<1>(i)[j] - tmpPrp.template get<1>(i)[j]) < 0.0003;
560 for (
int j = 0; j < 3; ++j)
561 match &= (vecDist.
getProp<2>(i)[j] - tmpPrp.template get<2>(i)[j]) < 0.0003;
564 BOOST_REQUIRE_EQUAL(match,
true);
570 BOOST_AUTO_TEST_CASE(vector_dist_gpu_make_sort_sparse)
572 vector_dist_gpu_make_sort_test_impl<CELLLIST_GPU_SPARSE<3,float>>();
575 BOOST_AUTO_TEST_CASE(vector_dist_gpu_make_sort)
577 vector_dist_gpu_make_sort_test_impl<CellList_gpu<3,float,CudaMemory,shift_only<3, float>>>();
580 BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
582 vector_dist_gpu_test_impl<CellList_gpu<3,float,CudaMemory,shift_only<3, float>>,
false>();
585 BOOST_AUTO_TEST_CASE( vector_dist_gpu_test_sorted)
587 vector_dist_gpu_test_impl<CellList_gpu<3,float,CudaMemory,shift_only<3, float>>,
true>();
590 BOOST_AUTO_TEST_CASE( vector_dist_gpu_test_sparse)
592 vector_dist_gpu_test_impl<CELLLIST_GPU_SPARSE<3,float>,
false>();
595 BOOST_AUTO_TEST_CASE( vector_dist_gpu_test_sparse_sorted)
597 vector_dist_gpu_test_impl<CELLLIST_GPU_SPARSE<3,float>,
true>();
600 template<
typename St>
601 void vdist_calc_gpu_test()
603 auto & vCluster = create_vcluster();
605 if (vCluster.size() > 16)
608 Box<3,St> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
614 size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
624 srand(vCluster.rank()*10000);
631 vecDist.
getPos(p)[0] = (St)rand() / (float)RAND_MAX;
632 vecDist.
getPos(p)[1] = (St)rand() / (float)RAND_MAX;
633 vecDist.
getPos(p)[2] = (St)rand() / (float)RAND_MAX;
635 vecDist.template getProp<0>(p) = vecDist.
getPos(p)[0] + vecDist.
getPos(p)[1] + vecDist.
getPos(p)[2];
637 vecDist.template getProp<1>(p)[0] = vecDist.
getPos(p)[0];
638 vecDist.template getProp<1>(p)[1] = vecDist.
getPos(p)[1];
639 vecDist.template getProp<1>(p)[2] = vecDist.
getPos(p)[2];
641 vecDist.template getProp<2>(p)[0] = vecDist.
getPos(p)[0] + vecDist.
getPos(p)[1];
642 vecDist.template getProp<2>(p)[1] = vecDist.
getPos(p)[0] + vecDist.
getPos(p)[2];
643 vecDist.template getProp<2>(p)[2] = vecDist.
getPos(p)[1] + vecDist.
getPos(p)[2];
650 vecDist.template hostToDeviceProp<0,1,2>();
653 vecDist.
map(RUN_ON_DEVICE);
658 vecDist.template deviceToHostProp<0,1,2>();
668 vecDist.
getPos(p)[0] = 1.0;
669 vecDist.
getPos(p)[1] = 1.0;
670 vecDist.
getPos(p)[2] = 1.0;
672 vecDist.template getProp<0>(p) = 0.0;
674 vecDist.template getProp<0>(p) = 0.0;
675 vecDist.template getProp<0>(p) = 0.0;
676 vecDist.template getProp<0>(p) = 0.0;
678 vecDist.template getProp<0>(p) = 0.0;
679 vecDist.template getProp<0>(p) = 0.0;
680 vecDist.template getProp<0>(p) = 0.0;
688 vecDist.template deviceToHostProp<0,1,2>();
699 match &= vecDist.template getProp<0>(p) == vecDist.
getPos(p)[0] + vecDist.
getPos(p)[1] + vecDist.
getPos(p)[2];
701 match &= vecDist.template getProp<1>(p)[0] == vecDist.
getPos(p)[0];
702 match &= vecDist.template getProp<1>(p)[1] == vecDist.
getPos(p)[1];
703 match &= vecDist.template getProp<1>(p)[2] == vecDist.
getPos(p)[2];
705 match &= vecDist.template getProp<2>(p)[0] == vecDist.
getPos(p)[0] + vecDist.
getPos(p)[1];
706 match &= vecDist.template getProp<2>(p)[1] == vecDist.
getPos(p)[0] + vecDist.
getPos(p)[2];
707 match &= vecDist.template getProp<2>(p)[2] == vecDist.
getPos(p)[1] + vecDist.
getPos(p)[2];
712 BOOST_REQUIRE_EQUAL(match,
true);
725 count_local_n_local<3>(vecDist,it5,bc,domain,dom_ext,l_cnt,nl_cnt,n_out);
727 BOOST_REQUIRE_EQUAL(n_out,0);
728 BOOST_REQUIRE_EQUAL(l_cnt,vecDist.
size_local());
732 for (
size_t i = 0 ; i < 10 ; i++)
734 vecDist.
map(RUN_ON_DEVICE);
737 vecDist.template deviceToHostProp<0,1,2>();
751 vd_cpu.getLastPos()[0] = vecDist.
getPos(p)[0];
752 vd_cpu.getLastPos()[1] = vecDist.
getPos(p)[1];
753 vd_cpu.getLastPos()[2] = vecDist.
getPos(p)[2];
755 vd_cpu.template getLastProp<0>() = vecDist.template getProp<0>(p);
757 vd_cpu.template getLastProp<1>()[0] = vecDist.template getProp<1>(p)[0];
758 vd_cpu.template getLastProp<1>()[1] = vecDist.template getProp<1>(p)[1];
759 vd_cpu.template getLastProp<1>()[2] = vecDist.template getProp<1>(p)[2];
761 vd_cpu.template getLastProp<2>()[0] = vecDist.template getProp<2>(p)[0];
762 vd_cpu.template getLastProp<2>()[1] = vecDist.template getProp<2>(p)[1];
763 vd_cpu.template getLastProp<2>()[2] = vecDist.template getProp<2>(p)[2];
768 vd_cpu.template ghost_get<0,1,2>();
772 vecDist.template ghost_get<0,1,2>(RUN_ON_DEVICE);
777 vecDist.template deviceToHostProp<0,1,2>();
792 bool operator<(
const part & tmp)
const
794 if (xp.
get(0) < tmp.xp.get(0))
796 else if (xp.
get(0) > tmp.xp.get(0))
799 if (xp.
get(1) < tmp.xp.get(1))
801 else if (xp.
get(1) > tmp.xp.get(1))
804 if (xp.
get(2) < tmp.xp.get(2))
806 else if (xp.
get(2) > tmp.xp.get(2))
816 cpu_sort.resize(vd_cpu.size_local_with_ghost() - vd_cpu.size_local());
819 BOOST_REQUIRE_EQUAL(cpu_sort.
size(),gpu_sort.
size());
824 while (itc2.isNext())
828 cpu_sort.get(cnt).xp.get(0) = vd_cpu.getPos(p)[0];
829 gpu_sort.get(cnt).xp.get(0) = vecDist.
getPos(p)[0];
830 cpu_sort.get(cnt).xp.get(1) = vd_cpu.getPos(p)[1];
831 gpu_sort.get(cnt).xp.get(1) = vecDist.
getPos(p)[1];
832 cpu_sort.get(cnt).xp.get(2) = vd_cpu.getPos(p)[2];
833 gpu_sort.get(cnt).xp.get(2) = vecDist.
getPos(p)[2];
835 cpu_sort.get(cnt).prp0 = vd_cpu.template getProp<0>(p);
836 gpu_sort.get(cnt).prp0 = vecDist.template getProp<0>(p);
838 cpu_sort.get(cnt).prp1[0] = vd_cpu.template getProp<1>(p)[0];
839 gpu_sort.get(cnt).prp1[0] = vecDist.template getProp<1>(p)[0];
840 cpu_sort.get(cnt).prp1[1] = vd_cpu.template getProp<1>(p)[1];
841 gpu_sort.get(cnt).prp1[1] = vecDist.template getProp<1>(p)[1];
842 cpu_sort.get(cnt).prp1[2] = vd_cpu.template getProp<1>(p)[2];
843 gpu_sort.get(cnt).prp1[2] = vecDist.template getProp<1>(p)[2];
845 cpu_sort.get(cnt).prp2[0] = vd_cpu.template getProp<2>(p)[0];
846 gpu_sort.get(cnt).prp2[0] = vecDist.template getProp<2>(p)[0];
847 cpu_sort.get(cnt).prp2[1] = vd_cpu.template getProp<2>(p)[1];
848 gpu_sort.get(cnt).prp2[1] = vecDist.template getProp<2>(p)[1];
849 cpu_sort.get(cnt).prp2[2] = vd_cpu.template getProp<2>(p)[2];
850 gpu_sort.get(cnt).prp2[2] = vecDist.template getProp<2>(p)[2];
859 for (
size_t i = 0 ; i < cpu_sort.
size() ; i++)
861 match &= cpu_sort.get(i).xp.get(0) == gpu_sort.get(i).xp.get(0);
862 match &= cpu_sort.get(i).xp.get(1) == gpu_sort.get(i).xp.get(1);
863 match &= cpu_sort.get(i).xp.get(2) == gpu_sort.get(i).xp.get(2);
865 match &= cpu_sort.get(i).prp0 == gpu_sort.get(i).prp0;
866 match &= cpu_sort.get(i).prp1[0] == gpu_sort.get(i).prp1[0];
867 match &= cpu_sort.get(i).prp1[1] == gpu_sort.get(i).prp1[1];
868 match &= cpu_sort.get(i).prp1[2] == gpu_sort.get(i).prp1[2];
870 match &= cpu_sort.get(i).prp2[0] == gpu_sort.get(i).prp2[0];
871 match &= cpu_sort.get(i).prp2[1] == gpu_sort.get(i).prp2[1];
872 match &= cpu_sort.get(i).prp2[2] == gpu_sort.get(i).prp2[2];
875 BOOST_REQUIRE_EQUAL(match,
true);
879 auto ite = vecDist.getDomainIteratorGPU();
880 CUDA_LAUNCH_DIM3((move_parts_gpu_test<3,decltype(vecDist.toKernel())>),ite.wthr,ite.thr,vecDist.toKernel());
884 BOOST_AUTO_TEST_CASE( vector_dist_map_on_gpu_test)
886 vdist_calc_gpu_test<float>();
887 vdist_calc_gpu_test<double>();
890 BOOST_AUTO_TEST_CASE(vector_dist_reduce)
892 auto & vCluster = create_vcluster();
894 if (vCluster.size() > 16)
903 size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
918 vecDist.template getProp<0>(p) = fc;
919 vecDist.template getProp<1>(p) = dc;
920 vecDist.template getProp<2>(p) = ic;
921 vecDist.template getProp<3>(p) = sc;
931 vecDist.template hostToDeviceProp<0,1,2,3>();
933 float redf = reduce_local<0,_add_>(vecDist);
934 double redd = reduce_local<1,_add_>(vecDist);
935 int redi = reduce_local<2,_add_>(vecDist);
936 size_t reds = reduce_local<3,_add_>(vecDist);
943 float redf2 = reduce_local<0,_max_>(vecDist);
944 double redd2 = reduce_local<1,_max_>(vecDist);
945 int redi2 = reduce_local<2,_max_>(vecDist);
946 size_t reds2 = reduce_local<3,_max_>(vecDist);
948 BOOST_REQUIRE_EQUAL(redf2,vecDist.
size_local());
949 BOOST_REQUIRE_EQUAL(redd2,vecDist.
size_local());
950 BOOST_REQUIRE_EQUAL(redi2,vecDist.
size_local());
951 BOOST_REQUIRE_EQUAL(reds2,vecDist.
size_local());
954 template<
typename CellList_type,
bool sorted>
955 void vector_dist_dlb_on_cuda_impl(
size_t k,
double r_cut)
957 std::random_device r;
959 std::seed_seq seed2{ create_vcluster().rank(),
960 create_vcluster().rank(),
961 create_vcluster().rank(),
962 create_vcluster().rank(),
963 create_vcluster().rank(),
964 create_vcluster().rank(),
965 create_vcluster().rank(),
966 create_vcluster().rank()};
967 std::mt19937 e2(seed2);
976 std::uniform_real_distribution<double> unif(0.0,0.3);
980 size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
988 for(
size_t i = 0 ; i < k ; i++)
1000 vecDist.template hostToDeviceProp<0>();
1002 vecDist.
map(RUN_ON_DEVICE);
1003 vecDist.template ghost_get<>(RUN_ON_DEVICE);
1008 vecDist.template deviceToHostProp<0>();
1022 vecDist.template getProp<0>(p) = VV.getNNPart(p.getKey());
1028 vecDist.template hostToDeviceProp<0>();
1034 vecDist.
map(RUN_ON_DEVICE);
1042 size_t load = vecDist.
getDecomposition().getDistribution().getProcessorLoad();
1046 for (
size_t i = 0 ; i < loads.
size() ; i++)
1048 double load_f = load;
1049 double load_fc = loads.get(i);
1051 BOOST_REQUIRE_CLOSE(load_f,load_fc,7.0);
1058 for (
size_t i = 0 ; i < 25 ; i++)
1070 vecDist.
getPos(p)[0] += v.get(0) * 0.09;
1071 vecDist.
getPos(p)[1] += v.get(1) * 0.09;
1072 vecDist.
getPos(p)[2] += v.get(2) * 0.09;
1079 vecDist.
map(RUN_ON_DEVICE);
1080 vecDist.template ghost_get<0>(RUN_ON_DEVICE);
1083 vecDist.template deviceToHostProp<0,1,2>();
1086 size_t opt = (sorted)? CL_GPU_REORDER : 0;
1088 auto cellListGPU = vecDist.template getCellListGPU<CellList_type>(r_cut, opt | CL_NON_SYMMETRIC);
1093 vecDist.template hostToDeviceProp<0>();
1095 vecDist.template updateCellListGPU<0>(cellListGPU);
1096 compareCellListCpuGpuSorted(vecDist,cellListGPU,cellList);
1101 vecDist.updateCellListGPU(cellListGPU);
1102 compareCellListCpuGpu(vecDist,cellListGPU,cellList);
1110 while (it2.isNext())
1114 match &= vecDist.template getProp<0>(p) == VV2.getNNPart(p.getKey());
1119 BOOST_REQUIRE_EQUAL(match,
true);
1124 vecDist.
map(RUN_ON_DEVICE);
1128 vecDist.template ghost_get<0>(RUN_ON_DEVICE);
1130 vecDist.template deviceToHostProp<0>();
1135 size_t load = vecDist.
getDecomposition().getDistribution().getProcessorLoad();
1139 for (
size_t i = 0 ; i < loads.
size() ; i++)
1141 double load_f = load;
1142 double load_fc = loads.get(i);
1145 BOOST_REQUIRE_CLOSE(load_f,load_fc,30.0);
1147 BOOST_REQUIRE_CLOSE(load_f,load_fc,10.0);
1153 template<
typename CellList_type,
bool sorted>
1154 void vector_dist_dlb_on_cuda_impl_async(
size_t k,
double r_cut)
1156 std::random_device r;
1158 std::seed_seq seed2{r() + create_vcluster().rank(),
1159 r() + create_vcluster().rank(),
1160 r() + create_vcluster().rank(),
1161 r() + create_vcluster().rank(),
1162 r() + create_vcluster().rank(),
1163 r() + create_vcluster().rank(),
1164 r() + create_vcluster().rank(),
1165 r() + create_vcluster().rank()};
1166 std::mt19937 e2(seed2);
1175 std::uniform_real_distribution<double> unif(0.0,0.3);
1179 size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
1181 vector_type vecDist(0,domain,bc,g,DEC_GRAN(2048));
1187 for(
size_t i = 0 ; i < k ; i++)
1199 vecDist.template hostToDeviceProp<0>();
1201 vecDist.
map(RUN_ON_DEVICE);
1202 vecDist.template Ighost_get<>(RUN_ON_DEVICE);
1203 vecDist.template ghost_wait<>(RUN_ON_DEVICE);
1208 vecDist.template deviceToHostProp<0>();
1222 vecDist.template getProp<0>(p) = VV.getNNPart(p.getKey());
1228 vecDist.template hostToDeviceProp<0>();
1234 vecDist.
map(RUN_ON_DEVICE);
1242 size_t load = vecDist.
getDecomposition().getDistribution().getProcessorLoad();
1246 for (
size_t i = 0 ; i < loads.
size() ; i++)
1248 double load_f = load;
1249 double load_fc = loads.get(i);
1251 BOOST_REQUIRE_CLOSE(load_f,load_fc,7.0);
1258 for (
size_t i = 0 ; i < 25 ; i++)
1270 vecDist.
getPos(p)[0] += v.get(0) * 0.09;
1271 vecDist.
getPos(p)[1] += v.get(1) * 0.09;
1272 vecDist.
getPos(p)[2] += v.get(2) * 0.09;
1279 vecDist.
map(RUN_ON_DEVICE);
1280 vecDist.template Ighost_get<0>(RUN_ON_DEVICE);
1281 vecDist.template ghost_wait<0>(RUN_ON_DEVICE);
1283 vecDist.template deviceToHostProp<0,1,2>();
1286 size_t opt = (sorted)? CL_GPU_REORDER : 0;
1288 auto cellListGPU = vecDist.template getCellListGPU<CellList_type>(r_cut, opt | CL_NON_SYMMETRIC);
1293 vecDist.template hostToDeviceProp<0>();
1295 vecDist.template updateCellListGPU<0>(cellListGPU);
1296 compareCellListCpuGpuSorted(vecDist,cellListGPU,cellList);
1301 vecDist.updateCellListGPU(cellListGPU);
1302 compareCellListCpuGpu(vecDist,cellListGPU,cellList);
1310 while (it2.isNext())
1314 match &= vecDist.template getProp<0>(p) == VV2.getNNPart(p.getKey());
1319 BOOST_REQUIRE_EQUAL(match,
true);
1324 vecDist.
map(RUN_ON_DEVICE);
1328 vecDist.template Ighost_get<0>(RUN_ON_DEVICE);
1329 vecDist.template ghost_wait<0>(RUN_ON_DEVICE);
1331 vecDist.template deviceToHostProp<0>();
1336 size_t load = vecDist.
getDecomposition().getDistribution().getProcessorLoad();
1340 for (
size_t i = 0 ; i < loads.
size() ; i++)
1342 double load_f = load;
1343 double load_fc = loads.get(i);
1345 BOOST_REQUIRE_CLOSE(load_f,load_fc,10.0);
1350 BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda_async)
1352 vector_dist_dlb_on_cuda_impl_async<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,
false>,
false>(50000,0.01);
1355 BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda_async_sorted)
1357 vector_dist_dlb_on_cuda_impl_async<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,
false>,
true>(50000,0.01);
1360 BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda)
1362 vector_dist_dlb_on_cuda_impl<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,
false>,
false>(50000,0.01);
1365 BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda_sorted)
1367 vector_dist_dlb_on_cuda_impl<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,
false>,
true>(50000,0.01);
1370 BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda_sparse)
1372 vector_dist_dlb_on_cuda_impl<CELLLIST_GPU_SPARSE<3,double>,
false>(50000,0.01);
1375 BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda_sparse_sorted)
1377 vector_dist_dlb_on_cuda_impl<CELLLIST_GPU_SPARSE<3,double>,
true>(50000,0.01);
1380 BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda2)
1382 if (create_vcluster().size() <= 3)
1386 vector_dist_dlb_on_cuda_impl<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,
false>,
false>(1000000,0.01);
1390 BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda3)
1392 if (create_vcluster().size() < 8)
1396 vector_dist_dlb_on_cuda_impl<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,
false>,
false>(15000000,0.005);
1401 BOOST_AUTO_TEST_CASE(vector_dist_keep_prop_on_cuda)
1412 size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
1414 vector_type vecDist(0,domain,bc,g,DEC_GRAN(2048));
1420 for(
size_t i = 0 ; i < 50000 ; i++)
1424 vecDist.
getLastPos()[0] = ((double)rand())/RAND_MAX * 0.3;
1425 vecDist.
getLastPos()[1] = ((double)rand())/RAND_MAX * 0.3;
1426 vecDist.
getLastPos()[2] = ((double)rand())/RAND_MAX * 0.3;
1432 vecDist.template hostToDeviceProp<0>();
1434 vecDist.
map(RUN_ON_DEVICE);
1435 vecDist.template ghost_get<>(RUN_ON_DEVICE);
1440 vecDist.template deviceToHostProp<0>();
1451 vecDist.template getProp<0>(p) = 0.0;
1453 vecDist.template getProp<1>(p)[0] = 1000.0;
1454 vecDist.template getProp<1>(p)[1] = 2000.0;
1455 vecDist.template getProp<1>(p)[2] = 3000.0;
1457 vecDist.template getProp<2>(p)[0][0] = 6000,0;
1458 vecDist.template getProp<2>(p)[0][1] = 7000.0;
1459 vecDist.template getProp<2>(p)[0][2] = 8000.0;
1460 vecDist.template getProp<2>(p)[1][0] = 9000.0;
1461 vecDist.template getProp<2>(p)[1][1] = 10000.0;
1462 vecDist.template getProp<2>(p)[1][2] = 11000.0;
1463 vecDist.template getProp<2>(p)[2][0] = 12000.0;
1464 vecDist.template getProp<2>(p)[2][1] = 13000.0;
1465 vecDist.template getProp<2>(p)[2][2] = 14000.0;
1471 vecDist.template hostToDeviceProp<0,1,2>();
1477 vecDist.
map(RUN_ON_DEVICE);
1485 size_t load = vecDist.
getDecomposition().getDistribution().getProcessorLoad();
1489 for (
size_t i = 0 ; i < loads.
size() ; i++)
1491 double load_f = load;
1492 double load_fc = loads.get(i);
1494 BOOST_REQUIRE_CLOSE(load_f,load_fc,7.0);
1503 for (
size_t i = 0 ; i < 25 ; i++)
1517 vecDist.
getPos(p)[0] += v.get(0) * 0.09;
1518 vecDist.
getPos(p)[1] += v.get(1) * 0.09;
1519 vecDist.
getPos(p)[2] += v.get(2) * 0.09;
1526 vecDist.
map(RUN_ON_DEVICE);
1527 vecDist.template ghost_get<>(RUN_ON_DEVICE);
1529 vecDist.template deviceToHostProp<0,1,2>();
1534 vecDist.
map(RUN_ON_DEVICE);
1538 vecDist.template ghost_get<0>(RUN_ON_DEVICE);
1540 vecDist.template deviceToHostProp<0,1,2>();
1545 size_t load = vecDist.
getDecomposition().getDistribution().getProcessorLoad();
1549 for (
size_t i = 0 ; i < loads.
size() ; i++)
1551 double load_f = load;
1552 double load_fc = loads.get(i);
1554 BOOST_REQUIRE_CLOSE(load_f,load_fc,10.0);
1559 vecDist.template deviceToHostProp<0,1,2>();
1564 while (it2.isNext())
1568 vecDist.template getProp<0>(p) += 1;
1570 vecDist.template getProp<1>(p)[0] += 1.0;
1571 vecDist.template getProp<1>(p)[1] += 1.0;
1572 vecDist.template getProp<1>(p)[2] += 1.0;
1574 vecDist.template getProp<2>(p)[0][0] += 1.0;
1575 vecDist.template getProp<2>(p)[0][1] += 1.0;
1576 vecDist.template getProp<2>(p)[0][2] += 1.0;
1577 vecDist.template getProp<2>(p)[1][0] += 1.0;
1578 vecDist.template getProp<2>(p)[1][1] += 1.0;
1579 vecDist.template getProp<2>(p)[1][2] += 1.0;
1580 vecDist.template getProp<2>(p)[2][0] += 1.0;
1581 vecDist.template getProp<2>(p)[2][1] += 1.0;
1582 vecDist.template getProp<2>(p)[2][2] += 1.0;
1587 vecDist.template hostToDeviceProp<0,1,2>();
1591 vecDist.template ghost_get<0,1,2>(RUN_ON_DEVICE | KEEP_PROPERTIES);
1592 vecDist.template deviceToHostProp<0,1,2>();
1598 while (itg.isNext())
1602 match &= vecDist.template getProp<0>(p) == base;
1604 match &= vecDist.template getProp<1>(p)[0] == base + 1000.0;
1605 match &= vecDist.template getProp<1>(p)[1] == base + 2000.0;
1606 match &= vecDist.template getProp<1>(p)[2] == base + 3000.0;
1608 match &= vecDist.template getProp<2>(p)[0][0] == base + 6000.0;
1609 match &= vecDist.template getProp<2>(p)[0][1] == base + 7000.0;
1610 match &= vecDist.template getProp<2>(p)[0][2] == base + 8000.0;
1611 match &= vecDist.template getProp<2>(p)[1][0] == base + 9000.0;
1612 match &= vecDist.template getProp<2>(p)[1][1] == base + 10000.0;
1613 match &= vecDist.template getProp<2>(p)[1][2] == base + 11000.0;
1614 match &= vecDist.template getProp<2>(p)[2][0] == base + 12000.0;
1615 match &= vecDist.template getProp<2>(p)[2][1] == base + 13000.0;
1616 match &= vecDist.template getProp<2>(p)[2][2] == base + 14000.0;
1621 BOOST_REQUIRE_EQUAL(match,
true);
1628 __device__
static bool check(
int c)
1634 BOOST_AUTO_TEST_CASE(vector_dist_get_index_set)
1638 size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
1640 if (create_vcluster().size() >= 16)
1647 auto it = vdg.getDomainIterator();
1653 vdg.getPos(p)[0] = (double)rand() / RAND_MAX;
1654 vdg.getPos(p)[1] = (double)rand() / RAND_MAX;
1655 vdg.getPos(p)[2] = (double)rand() / RAND_MAX;
1657 vdg.template getProp<0>(p) = (
int)((
double)rand() / RAND_MAX / 0.5);
1659 vdg.template getProp<1>(p) = (double)rand() / RAND_MAX;
1666 vdg.hostToDeviceProp<0,1>();
1667 vdg.hostToDevicePos();
1669 auto cl = vdg.getCellListGPU(0.1);
1670 vdg.updateCellListGPU(cl);
1676 get_indexes_by_type<0,type_is_one>(vdg.getPropVector(),ids,vdg.size_local(),vCluster.
getGpuContext());
1680 ids.template deviceToHost<0>();
1682 auto & vs = vdg.getPropVector();
1683 vs.template deviceToHost<0>();
1687 for (
int i = 0 ; i < ids.
size() ; i++)
1689 if (vs.template get<0>(ids.template get<0>(i)) != 1)
1693 BOOST_REQUIRE_EQUAL(match,
true);
1696 BOOST_AUTO_TEST_CASE(vector_dist_compare_host_device)
1700 size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
1702 if (create_vcluster().size() >= 16)
1707 auto it = vdg.getDomainIterator();
1713 vdg.getPos(p)[0] = (double)rand() / RAND_MAX;
1714 vdg.getPos(p)[1] = (double)rand() / RAND_MAX;
1715 vdg.getPos(p)[2] = (double)rand() / RAND_MAX;
1717 vdg.template getProp<0>(p) = (double)rand() / RAND_MAX;
1719 vdg.template getProp<1>(p)[0] = (double)rand() / RAND_MAX;
1720 vdg.template getProp<1>(p)[1] = (double)rand() / RAND_MAX;
1721 vdg.template getProp<1>(p)[2] = (double)rand() / RAND_MAX;
1723 vdg.template getProp<2>(p)[0][0] = (double)rand() / RAND_MAX;
1724 vdg.template getProp<2>(p)[0][1] = (double)rand() / RAND_MAX;
1725 vdg.template getProp<2>(p)[0][2] = (double)rand() / RAND_MAX;
1726 vdg.template getProp<2>(p)[1][0] = (double)rand() / RAND_MAX;
1727 vdg.template getProp<2>(p)[1][1] = (double)rand() / RAND_MAX;
1728 vdg.template getProp<2>(p)[1][2] = (double)rand() / RAND_MAX;
1729 vdg.template getProp<2>(p)[2][0] = (double)rand() / RAND_MAX;
1730 vdg.template getProp<2>(p)[2][1] = (double)rand() / RAND_MAX;
1731 vdg.template getProp<2>(p)[2][2] = (double)rand() / RAND_MAX;
1738 vdg.hostToDeviceProp<0,1,2>();
1739 vdg.hostToDevicePos();
1741 bool test = vdg.compareHostAndDevicePos(0.00001,0.00000001);
1742 BOOST_REQUIRE_EQUAL(test,
true);
1744 vdg.getPos(100)[0] = 0.99999999;
1746 test = vdg.compareHostAndDevicePos(0.00001,0.00000001);
1747 BOOST_REQUIRE_EQUAL(test,
false);
1749 vdg.hostToDevicePos();
1750 vdg.getPos(100)[0] = 0.99999999;
1752 test = vdg.compareHostAndDevicePos(0.00001,0.00000001);
1753 BOOST_REQUIRE_EQUAL(test,
true);
1757 test = vdg.compareHostAndDeviceProp<1>(0.00001,0.00000001);
1758 BOOST_REQUIRE_EQUAL(test,
true);
1760 vdg.getProp<1>(103)[0] = 0.99999999;
1762 test = vdg.compareHostAndDeviceProp<1>(0.00001,0.00000001);
1763 BOOST_REQUIRE_EQUAL(test,
false);
1765 vdg.hostToDeviceProp<1>();
1766 vdg.getProp<1>(103)[0] = 0.99999999;
1768 test = vdg.compareHostAndDeviceProp<1>(0.00001,0.00000001);
1769 BOOST_REQUIRE_EQUAL(test,
true);
1774 test = vdg.compareHostAndDeviceProp<0>(0.00001,0.00000001);
1775 BOOST_REQUIRE_EQUAL(test,
true);
1777 vdg.getProp<0>(105) = 0.99999999;
1779 test = vdg.compareHostAndDeviceProp<0>(0.00001,0.00000001);
1780 BOOST_REQUIRE_EQUAL(test,
false);
1782 vdg.hostToDeviceProp<0>();
1783 vdg.getProp<0>(105) = 0.99999999;
1785 test = vdg.compareHostAndDeviceProp<0>(0.00001,0.00000001);
1786 BOOST_REQUIRE_EQUAL(test,
true);
1792 test = vdg.compareHostAndDeviceProp<2>(0.00001,0.00000001);
1793 BOOST_REQUIRE_EQUAL(test,
true);
1795 vdg.getProp<2>(108)[1][2] = 0.99999999;
1797 test = vdg.compareHostAndDeviceProp<2>(0.00001,0.00000001);
1798 BOOST_REQUIRE_EQUAL(test,
false);
1800 vdg.hostToDeviceProp<2>();
1801 vdg.getProp<2>(108)[1][2] = 0.99999999;
1803 test = vdg.compareHostAndDeviceProp<2>(0.00001,0.00000001);
1804 BOOST_REQUIRE_EQUAL(test,
true);
1807 template<
typename vector_dist_type>
1810 int i = threadIdx.x + blockIdx.x * blockDim.x;
1812 if (i >= vds.size()) {
return;}
1814 vds.template getProp<0>(i) = 1000.0 + i;
1816 vds.template getProp<1>(i)[0] = 2000.0 + i;
1817 vds.template getProp<1>(i)[1] = 3000.0 + i;
1818 vds.template getProp<1>(i)[2] = 4000.0 + i;
1820 vds.template getProp<2>(i)[0][0] = 12000.0 + i;
1821 vds.template getProp<2>(i)[0][1] = 13000.0 + i;
1822 vds.template getProp<2>(i)[0][2] = 14000.0 + i;
1823 vds.template getProp<2>(i)[1][0] = 22000.0 + i;
1824 vds.template getProp<2>(i)[1][1] = 23000.0 + i;
1825 vds.template getProp<2>(i)[1][2] = 24000.0 + i;
1826 vds.template getProp<2>(i)[2][0] = 32000.0 + i;
1827 vds.template getProp<2>(i)[2][1] = 33000.0 + i;
1828 vds.template getProp<2>(i)[2][2] = 34000.0 + i;
1832 BOOST_AUTO_TEST_CASE(vector_dist_domain_and_ghost_test)
1836 size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
1838 if (create_vcluster().size() >= 16)
1843 auto ite = vdg.getDomainAndGhostIteratorGPU();
1845 CUDA_LAUNCH(assign_to_ghost,ite,vdg.toKernel());
1847 vdg.template deviceToHostProp<0,1,2>();
1850 auto it = vdg.getDomainAndGhostIterator();
1858 check &= vdg.template getProp<0>(k) == 1000.0 + k.getKey();
1860 check &= vdg.template getProp<1>(k)[0] == 2000.0 + k.getKey();
1861 check &= vdg.template getProp<1>(k)[1] == 3000.0 + k.getKey();
1862 check &= vdg.template getProp<1>(k)[2] == 4000.0 + k.getKey();
1864 check &= vdg.template getProp<2>(k)[0][0] == 12000.0 + k.getKey();
1865 check &= vdg.template getProp<2>(k)[0][1] == 13000.0 + k.getKey();
1866 check &= vdg.template getProp<2>(k)[0][2] == 14000.0 + k.getKey();
1867 check &= vdg.template getProp<2>(k)[1][0] == 22000.0 + k.getKey();
1868 check &= vdg.template getProp<2>(k)[1][1] == 23000.0 + k.getKey();
1869 check &= vdg.template getProp<2>(k)[1][2] == 24000.0 + k.getKey();
1870 check &= vdg.template getProp<2>(k)[2][0] == 32000.0 + k.getKey();
1871 check &= vdg.template getProp<2>(k)[2][1] == 33000.0 + k.getKey();
1872 check &= vdg.template getProp<2>(k)[2][2] == 34000.0 + k.getKey();
1878 BOOST_REQUIRE_EQUAL(check,
true);
1881 template<
typename vT>
1882 __global__
void launch_overflow(vT vs, vT vs2)
1884 vs2.template getProp<1>(57)[0];
1887 BOOST_AUTO_TEST_CASE(vector_dist_overflow_se_class1)
1891 size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
1893 if (create_vcluster().size() >= 16)
1896 std::cout <<
"****** TEST ERROR MESSAGE BEGIN ********" << std::endl;
1902 vdg.setCapacity(100);
1915 CUDA_LAUNCH(launch_overflow,ite,vdg.toKernel(),vdg2.toKernel());
1919 std::cout <<
"SE_CLASS1 Catch" << std::endl;
1922 std::cout <<
"****** TEST ERROR MESSAGE END ********" << std::endl;
1927 BOOST_AUTO_TEST_CASE( vector_dist_ghost_put_gpu )
1931 #ifndef CUDIFY_USE_OPENMP
1935 long int k = 25*25*25*create_vcluster().getProcessingUnits();
1936 k = std::pow(k, 1/3.);
1941 print_test(
"Testing 3D periodic ghost put GPU k=",k);
1942 BOOST_TEST_CHECKPOINT(
"Testing 3D periodic ghost put k=" << k );
1944 long int big_step = k / 30;
1945 big_step = (big_step == 0)?1:big_step;
1946 long int small_step = 21;
1949 for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step )
1951 float r_cut = 1.3 / k;
1952 float r_g = 1.5 / k;
1957 size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
1971 auto key = it.
get();
1991 vecDist.template hostToDeviceProp<0,2>();
1994 vecDist.template deviceToHostProp<0,2>();
2005 while (it2.isNext())
2012 auto Np = NN.getNNIteratorBox(NN.getCell(xp));
2024 vecDist.
getPropWrite<0>(q) += a*(-dist*dist+r_cut*r_cut);
2025 vecDist.
getPropWrite<2>(q) += a*(-dist*dist+r_cut*r_cut) / 2;
2035 vecDist.template hostToDeviceProp<0,2>();
2036 vecDist.template ghost_put<add_atomic_,0,2>(RUN_ON_DEVICE);
2037 vecDist.template deviceToHostProp<0,2>();
2043 float constant = vecDist.
getProp<0>(it3.get());
2044 float constanta = vecDist.
getProp<2>(it3.get());
2047 while (it3.isNext())
2049 float constant2 = vecDist.
getProp<0>(it3.get());
2050 float constant3 = vecDist.
getProp<2>(it3.get());
2051 if (fabs(constant - constant2)/constant > eps || fabs(constanta - constant3)/constanta > eps)
2055 std::cout << p.toString() <<
" " << constant2 <<
"/" << constant <<
"/" << constant3 <<
" " << vCluster.
getProcessUnitID() << std::endl;
2062 BOOST_REQUIRE_EQUAL(ret,
true);
2066 while (itp.isNext())
2068 auto key = itp.
get();
2084 while (it2.isNext())
2091 auto Np = NN.getNNIteratorBox(NN.getCell(xp));
2103 vecDist.
getPropWrite<0>(q) += a*(-dist*dist+r_cut*r_cut);
2104 vecDist.
getPropWrite<2>(q) += a*(-dist*dist+r_cut*r_cut);
2114 vecDist.template hostToDeviceProp<0,2>();
2115 vecDist.template ghost_put<add_atomic_,0>(RUN_ON_DEVICE);
2116 vecDist.template ghost_put<add_atomic_,2>(RUN_ON_DEVICE);
2117 vecDist.template deviceToHostProp<0,2>();
2123 float constant = vecDist.
getPropRead<0>(it3.get());
2124 float constanta = vecDist.
getPropRead<2>(it3.get());
2127 while (it3.isNext())
2129 float constant2 = vecDist.
getPropRead<0>(it3.get());
2130 float constant3 = vecDist.
getPropRead<0>(it3.get());
2131 if (fabs(constant - constant2)/constant > eps || fabs(constanta - constant3)/constanta > eps)
2135 std::cout << p.toString() <<
" " << constant2 <<
"/" << constant <<
"/" << constant3 <<
" " << vCluster.
getProcessUnitID() << std::endl;
2142 BOOST_REQUIRE_EQUAL(ret,
true);
2149 BOOST_AUTO_TEST_SUITE_END()
void enlarge(const Box< dim, T > &gh)
Enlarge the box with ghost margin.
size_t getNelements(const size_t cell_id) const
Return the number of elements in the cell.
This class implement the point shape in an N-dimensional space.
__device__ __host__ T distance(const Point< dim, T > &q) const
It calculate the distance between 2 points.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
__device__ __host__ T norm() const
norm of the vector
void execute()
Execute all the requests.
size_t getProcessUnitID()
Get the process unit id.
gpu::ofp_context_t & getGpuContext(bool iw=true)
If nvidia cuda is activated return a gpu context.
size_t getProcessingUnits()
Get the total number of processors.
bool allGather(T &send, openfpm::vector< T, Mem, gr > &v)
Gather the data from all processors.
Implementation of VCluster class.
Implementation of 1-D std::vector like structure.
vect_dist_key_dx get()
Get the actual key.
size_t size_local() const
return the local size of the vector
auto getProp(vect_dist_key_dx vec_key) -> decltype(vPrp.template get< id >(vec_key.getKey()))
Get the property of an element.
size_t size_local_with_ghost() const
return the local size of the vector
void deviceToHostPos()
Move the memory from the device to host memory.
const vector_dist_prop & getPropVector() const
return the property vector of all the particles
Decomposition & getDecomposition()
Get the decomposition.
auto getLastPosWrite() -> decltype(vPos.template get< 0 >(0))
Get the position of the last element.
vector_dist_iterator getGhostIterator() const
Get the iterator across the position of the ghost particles.
VerletList_type getVerlet(St r_cut, size_t neighborMaxNum=0)
for each particle get the verlet list
grid_dist_id_iterator_dec< Decomposition > getGridIterator(const size_t(&sz)[dim])
auto getPos(vect_dist_key_dx vec_key) -> decltype(vPos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
auto getPosRead(vect_dist_key_dx vec_key) const -> decltype(vPos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
auto getLastPropWrite() -> decltype(vPrp.template get< id >(0))
Get the property of the last element.
const vector_dist_pos & getPosVector() const
return the position vector of all the particles
vector_dist_iterator getDomainIterator() const
Get an iterator that traverse the particles in the domain.
void ghost_get(size_t opt=WITH_POSITION)
It synchronize the properties and position of the ghost particles.
void hostToDevicePos()
Move the memory from the device to host memory.
void map(size_t opt=NONE)
It move all the particles that does not belong to the local processor to the respective processor.
auto getPropRead(vect_dist_key_dx vec_key) const -> decltype(vPrp.template get< id >(vec_key.getKey()))
Get the property of an element.
void deviceToHostProp()
Move the memory from the device to host memory.
vector_dist_iterator getDomainAndGhostIterator() const
Get an iterator that traverse the particles in the domain.
void add()
Add local particle.
CellList_type getCellList(St r_cut, size_t opt=CL_NON_SYMMETRIC|CL_LINEAR_CELL_KEYS, bool no_se3=false, float ghostEnlargeFactor=1.013)
Construct a cell list starting from the stored particles.
void addComputationCosts(const self &vd, Model md=Model())
Add the computation cost on the decomposition coming from the particles.
auto getPropWrite(vect_dist_key_dx vec_key) -> decltype(vPrp.template get< id >(vec_key.getKey()))
Get the property of an element.
auto getLastPos() -> decltype(vPos.template get< 0 >(0))
Get the position of the last element.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...