1#define BOOST_TEST_DYN_LINK 
    3#include <boost/test/unit_test.hpp> 
    4#include "VCluster/VCluster.hpp" 
    5#include <Vector/vector_dist.hpp> 
    6#include "Vector/tests/vector_dist_util_unit_tests.hpp" 
    8#define SUB_UNIT_FACTOR 1024 
   10template<
unsigned int dim , 
typename vector_dist_type>
 
   11__global__ 
void move_parts_gpu_test(vector_dist_type vd)
 
   13    auto p = GET_PARTICLE(vd);
 
   16    for (
int i = 0 ; i < dim ; i++)
 
   18        vd.getPos(p)[i] += 0.05;
 
   22BOOST_AUTO_TEST_SUITE( vector_dist_gpu_test )
 
   24void print_test(std::string test, 
size_t sz)
 
   26    if (create_vcluster().getProcessUnitID() == 0)
 
   27        std::cout << test << 
" " << sz << 
"\n";
 
   31__global__  
void initialize_props(vector_dist_ker<3, 
float, 
aggregate<
float, 
float [3], 
float[3]>> vd)
 
   33    auto p = GET_PARTICLE(vd);
 
   35    vd.template getProp<0>(p) = vd.getPos(p)[0] + vd.getPos(p)[1] + vd.getPos(p)[2];
 
   37    vd.template getProp<1>(p)[0] = vd.getPos(p)[0] + vd.getPos(p)[1];
 
   38    vd.template getProp<1>(p)[1] = vd.getPos(p)[0] + vd.getPos(p)[2];
 
   39    vd.template getProp<1>(p)[2] = vd.getPos(p)[1] + vd.getPos(p)[2];
 
   42template<
typename T,
typename CellList_type>
 
   43__global__  
void calculate_force(vector_dist_ker<3, T, 
aggregate<T, T[3], T [3]>> vd,
 
   44                                 vector_dist_ker<3, T, 
aggregate<T, T[3], T [3]>> vd_sort,
 
   48    auto p = GET_PARTICLE(vd);
 
   52    auto it = cl.getNNIterator(cl.getCell(xp));
 
   59        auto q1 = it.get_sort();
 
   62        if (q2 == p) {++it; 
continue;}
 
   75            force1 += vd_sort.template getProp<0>(q1)*r1;
 
   80            force2 += vd.template getProp<0>(q2)*r2;
 
   86    vd.template getProp<1>(p)[0] = force1.
get(0);
 
   87    vd.template getProp<1>(p)[1] = force1.get(1);
 
   88    vd.template getProp<1>(p)[2] = force1.get(2);
 
   90    vd.template getProp<2>(p)[0] = force2.get(0);
 
   91    vd.template getProp<2>(p)[1] = force2.get(1);
 
   92    vd.template getProp<2>(p)[2] = force2.get(2);
 
   95template<
typename T, 
typename CellList_type>
 
   96__global__  
void calculate_force_full_sort(vector_dist_ker<3, T, 
aggregate<T, T[3], T [3]>> vd,
 
   97                                           CellList_type cl, 
int rank)
 
  100    GET_PARTICLE_SORT(p,cl);
 
  104    auto it = cl.getNNIterator(cl.getCell(xp));
 
  110        auto q1 = it.get_sort();
 
  112        if (q1 == p) {++it; 
continue;}
 
  120        if (r1.
norm() > 1e-6)
 
  124            force1 += vd.template getProp<0>(q1)*r1;
 
  130    vd.template getProp<1>(p)[0] = force1.
get(0);
 
  131    vd.template getProp<1>(p)[1] = force1.get(1);
 
  132    vd.template getProp<1>(p)[2] = force1.get(2);
 
  135template<
typename CellList_type, 
typename vector_type>
 
  136bool check_force(CellList_type & NN_cpu, 
vector_type & vd)
 
  138    typedef typename vector_type::stype St;
 
  154        auto NNc = NN_cpu.getNNIterator(NN_cpu.getCell(xp));
 
  160            if (q == p.getKey()) {++NNc; 
continue;}
 
  167            if (r2.
norm() > 1e-6)
 
  170                force += vd.template getProp<0>(q)*r2;
 
  176        match &= fabs(vd.template getProp<1>(p)[0] - vd.template getProp<2>(p)[0]) < 0.0003;
 
  177        match &= fabs(vd.template getProp<1>(p)[1] - vd.template getProp<2>(p)[1]) < 0.0003;
 
  178        match &= fabs(vd.template getProp<1>(p)[2] - vd.template getProp<2>(p)[2]) < 0.0003;
 
  180        match &= fabs(vd.template getProp<1>(p)[0] - force.get(0)) < 0.0003;
 
  181        match &= fabs(vd.template getProp<1>(p)[1] - force.get(1)) < 0.0003;
 
  182        match &= fabs(vd.template getProp<1>(p)[2] - force.get(2)) < 0.0003;
 
  186            std::cout << 
"ERROR: " << vd.template getProp<1>(p)[0]  << 
"   " << vd.template getProp<2>(p)[0] << std::endl;
 
  187                    std::cout << 
"ERROR: " << vd.template getProp<1>(p)[1]  << 
"   " << vd.template getProp<2>(p)[1] << std::endl;
 
  188                    std::cout << 
"ERROR: " << vd.template getProp<1>(p)[2]  << 
"   " << vd.template getProp<2>(p)[2] << std::endl;
 
  190                    std::cout << p.getKey() << 
" ERROR2: " << vd.template getProp<1>(p)[0] << 
"   " <<  force.get(0) << std::endl;
 
  191                    std::cout << p.getKey() << 
" ERROR2: " << vd.template getProp<1>(p)[1] << 
"   " <<  force.get(1) << std::endl;
 
  192                    std::cout << p.getKey() << 
" ERROR2: " << vd.template getProp<1>(p)[2] << 
"   " <<  force.get(2) << std::endl;
 
  204BOOST_AUTO_TEST_CASE( vector_dist_gpu_ghost_get )
 
  206    auto & v_cl = create_vcluster();
 
  208    if (v_cl.size() > 16)
 
  217    size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
 
  227        vd.
getPos(p)[0] = (float)rand() / (float)RAND_MAX;
 
  228        vd.
getPos(p)[1] = (float)rand() / (float)RAND_MAX;
 
  229        vd.
getPos(p)[2] = (float)rand() / (float)RAND_MAX;
 
  233        vd.template getProp<1>(p)[0] = vd.
getPos(p)[0] + vd.
getPos(p)[1];
 
  234        vd.template getProp<1>(p)[1] = vd.
getPos(p)[0] + vd.
getPos(p)[2];
 
  235        vd.template getProp<1>(p)[2] = vd.
getPos(p)[1] + vd.
getPos(p)[2];
 
  237        vd.template getProp<2>(p)[0] = vd.
getPos(p)[0] + 3.0*vd.
getPos(p)[1];
 
  238        vd.template getProp<2>(p)[1] = vd.
getPos(p)[0] + 3.0*vd.
getPos(p)[2];
 
  239        vd.template getProp<2>(p)[2] = vd.
getPos(p)[1] + 3.0*vd.
getPos(p)[2];
 
  248    vd.template ghost_get<0,1,2>();
 
  260        check &= (vd.template getProp<0>(p) == vd.
getPos(p)[0] + vd.
getPos(p)[1] + vd.
getPos(p)[2]);
 
  262        check &= (vd.template getProp<1>(p)[0] == vd.
getPos(p)[0] + vd.
getPos(p)[1]);
 
  263        check &= (vd.template getProp<1>(p)[1] == vd.
getPos(p)[0] + vd.
getPos(p)[2]);
 
  264        check &= (vd.template getProp<1>(p)[2] == vd.
getPos(p)[1] + vd.
getPos(p)[2]);
 
  266        check &= (vd.template getProp<2>(p)[0] == vd.
getPos(p)[0] + 3.0*vd.
getPos(p)[1]);
 
  267        check &= (vd.template getProp<2>(p)[1] == vd.
getPos(p)[0] + 3.0*vd.
getPos(p)[2]);
 
  268        check &= (vd.template getProp<2>(p)[2] == vd.
getPos(p)[1] + 3.0*vd.
getPos(p)[2]);
 
  279    BOOST_REQUIRE(tot_s > 1000);
 
  282template<
typename vector_type, 
typename CellList_type, 
typename CellList_type_cpu>
 
  283void check_cell_list_cpu_and_gpu(
vector_type & vd, CellList_type & NN, CellList_type_cpu & NN_cpu)
 
  285    const auto it5 = vd.getDomainIteratorGPU(32);
 
  287    CUDA_LAUNCH((calculate_force<
typename vector_type::stype,
decltype(NN.toKernel())>),it5,vd.toKernel(),vd.toKernel_sorted(),NN.toKernel(),create_vcluster().rank());
 
  289    vd.template deviceToHostProp<1,2>();
 
  291    bool test = check_force(NN_cpu,vd);
 
  292    BOOST_REQUIRE_EQUAL(test,
true);
 
  302        vd.template getProp<1>(p)[0] = 0.0;
 
  303        vd.template getProp<1>(p)[1] = 0.0;
 
  304        vd.template getProp<1>(p)[2] = 0.0;
 
  309    vd.template hostToDeviceProp<1>();
 
  313    CUDA_LAUNCH((calculate_force_full_sort<
typename vector_type::stype,
decltype(NN.toKernel())>),it5,vd.toKernel_sorted(),NN.toKernel(),create_vcluster().rank());
 
  315    vd.template merge_sort<1>(NN);
 
  316    vd.template deviceToHostProp<1>();
 
  318    test = check_force(NN_cpu,vd);
 
  319    BOOST_REQUIRE_EQUAL(test,
true);
 
  322template<
typename CellList_type>
 
  323void vector_dist_gpu_test_impl()
 
  325    auto & v_cl = create_vcluster();
 
  327    if (v_cl.size() > 16)
 
  336    size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
 
  340    srand(55067*create_vcluster().rank());
 
  352        vd.
getPos(p)[0] = (float)x / (
float)RAND_MAX;
 
  353        vd.
getPos(p)[1] = (float)y / (
float)RAND_MAX;
 
  354        vd.
getPos(p)[2] = (float)z / (
float)RAND_MAX;
 
  369    BOOST_REQUIRE_EQUAL(size_l,10000);
 
  383        noOut &= ct.isLocal(vd.
getPos(p));
 
  389    BOOST_REQUIRE_EQUAL(noOut,
true);
 
  394    const auto it3 = vd.getDomainIteratorGPU();
 
  399    CUDA_LAUNCH_DIM3(initialize_props,it3.wthr,it3.thr,vd.toKernel());
 
  411        BOOST_REQUIRE_CLOSE(vd.template getProp<0>(p),vd.
getPos(p)[0] + vd.
getPos(p)[1] + vd.
getPos(p)[2],0.01);
 
  413        BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[0],vd.
getPos(p)[0] + vd.
getPos(p)[1],0.01);
 
  414        BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[1],vd.
getPos(p)[0] + vd.
getPos(p)[2],0.01);
 
  415        BOOST_REQUIRE_CLOSE(vd.template getProp<1>(p)[2],vd.
getPos(p)[1] + vd.
getPos(p)[2],0.01);
 
  428    vd.template hostToDeviceProp<0>();
 
  430    auto NN = vd.template getCellListGPU<CellList_type>(0.1);
 
  432    check_cell_list_cpu_and_gpu(vd,NN,NN_cpu);
 
  434    auto NN_up = vd.template getCellListGPU<CellList_type>(0.1);
 
  438    check_cell_list_cpu_and_gpu(vd,NN_up,NN_cpu);
 
  441template<
typename CellList_type>
 
  442void vector_dist_gpu_make_sort_test_impl()
 
  444    auto & v_cl = create_vcluster();
 
  446    if (v_cl.size() > 16)
 
  455    size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
 
  459    srand(55067*create_vcluster().rank());
 
  471        vd.
getPos(p)[0] = (float)x / (
float)RAND_MAX;
 
  472        vd.
getPos(p)[1] = (float)y / (
float)RAND_MAX;
 
  473        vd.
getPos(p)[2] = (float)z / (
float)RAND_MAX;
 
  481    vd.
map(RUN_ON_DEVICE);
 
  483    auto it3 = vd.getDomainIteratorGPU();
 
  485    CUDA_LAUNCH_DIM3(initialize_props,it3.wthr,it3.thr,vd.toKernel());
 
  493    auto NN = vd.template getCellListGPU<CellList_type>(0.1);
 
  503    for (
size_t i = 0 ; i < NN_cpu1.getNCells() ; i++)
 
  505        match &= NN_cpu1.getNelements(i) == NN_cpu2.getNelements(i);
 
  508    BOOST_REQUIRE_EQUAL(match,
true);
 
  514    NN = vd.template getCellListGPU<CellList_type>(0.1);
 
  521    tmp_pos.template deviceToHost<0>();
 
  527    NN = vd.template getCellListGPU<CellList_type>(0.1);
 
  531    vd.make_sort_from(NN);
 
  535    tmp_pos.deviceToHost<0>();
 
  536    vd.deviceToHostPos();
 
  539    for (
size_t i = 0 ; i < vd.size_local() ; i++)
 
  545        auto c1 = NN.getCell(p1);
 
  546        auto c2 = NN.getCell(p1);
 
  551    BOOST_REQUIRE_EQUAL(match,
true);
 
  555BOOST_AUTO_TEST_CASE(vector_dist_gpu_make_sort_sparse)
 
  557    vector_dist_gpu_make_sort_test_impl<CELLLIST_GPU_SPARSE<3,float>>();
 
  560BOOST_AUTO_TEST_CASE(vector_dist_gpu_make_sort)
 
  562    vector_dist_gpu_make_sort_test_impl<CellList_gpu<3,float,CudaMemory,shift_only<3, float>>>();
 
  565BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
 
  567    vector_dist_gpu_test_impl<CellList_gpu<3,float,CudaMemory,shift_only<3, float>>>();
 
  570BOOST_AUTO_TEST_CASE( vector_dist_gpu_test_sparse)
 
  572    vector_dist_gpu_test_impl<CELLLIST_GPU_SPARSE<3,float>>();
 
  576void vdist_calc_gpu_test()
 
  578    auto & v_cl = create_vcluster();
 
  580    if (v_cl.size() > 16)
 
  583    Box<3,St> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
 
  589    size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
 
  599    srand(v_cl.rank()*10000);
 
  606        vd.
getPos(p)[0] = (St)rand() / (float)RAND_MAX;
 
  607        vd.
getPos(p)[1] = (St)rand() / (float)RAND_MAX;
 
  608        vd.
getPos(p)[2] = (St)rand() / (float)RAND_MAX;
 
  612        vd.template getProp<1>(p)[0] = vd.
getPos(p)[0];
 
  613        vd.template getProp<1>(p)[1] = vd.
getPos(p)[1];
 
  614        vd.template getProp<1>(p)[2] = vd.
getPos(p)[2];
 
  616        vd.template getProp<2>(p)[0] = vd.
getPos(p)[0] + vd.
getPos(p)[1];
 
  617        vd.template getProp<2>(p)[1] = vd.
getPos(p)[0] + vd.
getPos(p)[2];
 
  618        vd.template getProp<2>(p)[2] = vd.
getPos(p)[1] + vd.
getPos(p)[2];
 
  625    vd.template hostToDeviceProp<0,1,2>();
 
  628    vd.
map(RUN_ON_DEVICE);
 
  633    vd.template deviceToHostProp<0,1,2>();
 
  647        vd.template getProp<0>(p) = 0.0;
 
  649        vd.template getProp<0>(p) = 0.0;
 
  650        vd.template getProp<0>(p) = 0.0;
 
  651        vd.template getProp<0>(p) = 0.0;
 
  653        vd.template getProp<0>(p) = 0.0;
 
  654        vd.template getProp<0>(p) = 0.0;
 
  655        vd.template getProp<0>(p) = 0.0;
 
  663    vd.template deviceToHostProp<0,1,2>();
 
  674        match &= vd.template getProp<0>(p) == vd.
getPos(p)[0] + vd.
getPos(p)[1] + vd.
getPos(p)[2];
 
  676        match &= vd.template getProp<1>(p)[0] == vd.
getPos(p)[0];
 
  677        match &= vd.template getProp<1>(p)[1] == vd.
getPos(p)[1];
 
  678        match &= vd.template getProp<1>(p)[2] == vd.
getPos(p)[2];
 
  680        match &= vd.template getProp<2>(p)[0] == vd.
getPos(p)[0] + vd.
getPos(p)[1];
 
  681        match &= vd.template getProp<2>(p)[1] == vd.
getPos(p)[0] + vd.
getPos(p)[2];
 
  682        match &= vd.template getProp<2>(p)[2] == vd.
getPos(p)[1] + vd.
getPos(p)[2];
 
  687    BOOST_REQUIRE_EQUAL(match,
true);
 
  700    count_local_n_local<3>(vd,it5,bc,domain,dom_ext,l_cnt,nl_cnt,n_out);
 
  702    BOOST_REQUIRE_EQUAL(n_out,0);
 
  707    for (
size_t i = 0 ; i < 10 ; i++)
 
  709        vd.
map(RUN_ON_DEVICE);
 
  712        vd.template deviceToHostProp<0,1,2>();
 
  726            vd_cpu.getLastPos()[0] = vd.
getPos(p)[0];
 
  727            vd_cpu.getLastPos()[1] = vd.
getPos(p)[1];
 
  728            vd_cpu.getLastPos()[2] = vd.
getPos(p)[2];
 
  730            vd_cpu.template getLastProp<0>() = vd.template getProp<0>(p);
 
  732            vd_cpu.template getLastProp<1>()[0] = vd.template getProp<1>(p)[0];
 
  733            vd_cpu.template getLastProp<1>()[1] = vd.template getProp<1>(p)[1];
 
  734            vd_cpu.template getLastProp<1>()[2] = vd.template getProp<1>(p)[2];
 
  736            vd_cpu.template getLastProp<2>()[0] = vd.template getProp<2>(p)[0];
 
  737            vd_cpu.template getLastProp<2>()[1] = vd.template getProp<2>(p)[1];
 
  738            vd_cpu.template getLastProp<2>()[2] = vd.template getProp<2>(p)[2];
 
  743        vd_cpu.template ghost_get<0,1,2>();
 
  747        vd.template ghost_get<0,1,2>(RUN_ON_DEVICE);
 
  752        vd.template deviceToHostProp<0,1,2>();
 
  767            bool operator<(
const part & tmp)
 const 
  769                if (xp.
get(0) < tmp.xp.get(0))
 
  771                else if (xp.
get(0) > tmp.xp.get(0))
 
  774                if (xp.
get(1) < tmp.xp.get(1))
 
  776                else if (xp.
get(1) > tmp.xp.get(1))
 
  779                if (xp.
get(2) < tmp.xp.get(2))
 
  781                else if (xp.
get(2) > tmp.xp.get(2))
 
  791        cpu_sort.resize(vd_cpu.size_local_with_ghost() - vd_cpu.size_local());
 
  794        BOOST_REQUIRE_EQUAL(cpu_sort.
size(),gpu_sort.
size());
 
  799        while (itc2.isNext())
 
  803            cpu_sort.get(cnt).xp.get(0) = vd_cpu.getPos(p)[0];
 
  804            gpu_sort.get(cnt).xp.get(0) = vd.
getPos(p)[0];
 
  805            cpu_sort.get(cnt).xp.get(1) = vd_cpu.getPos(p)[1];
 
  806            gpu_sort.get(cnt).xp.get(1) = vd.
getPos(p)[1];
 
  807            cpu_sort.get(cnt).xp.get(2) = vd_cpu.getPos(p)[2];
 
  808            gpu_sort.get(cnt).xp.get(2) = vd.
getPos(p)[2];
 
  810            cpu_sort.get(cnt).prp0 = vd_cpu.template getProp<0>(p);
 
  811            gpu_sort.get(cnt).prp0 = vd.template getProp<0>(p);
 
  813            cpu_sort.get(cnt).prp1[0] = vd_cpu.template getProp<1>(p)[0];
 
  814            gpu_sort.get(cnt).prp1[0] = vd.template getProp<1>(p)[0];
 
  815            cpu_sort.get(cnt).prp1[1] = vd_cpu.template getProp<1>(p)[1];
 
  816            gpu_sort.get(cnt).prp1[1] = vd.template getProp<1>(p)[1];
 
  817            cpu_sort.get(cnt).prp1[2] = vd_cpu.template getProp<1>(p)[2];
 
  818            gpu_sort.get(cnt).prp1[2] = vd.template getProp<1>(p)[2];
 
  820            cpu_sort.get(cnt).prp2[0] = vd_cpu.template getProp<2>(p)[0];
 
  821            gpu_sort.get(cnt).prp2[0] = vd.template getProp<2>(p)[0];
 
  822            cpu_sort.get(cnt).prp2[1] = vd_cpu.template getProp<2>(p)[1];
 
  823            gpu_sort.get(cnt).prp2[1] = vd.template getProp<2>(p)[1];
 
  824            cpu_sort.get(cnt).prp2[2] = vd_cpu.template getProp<2>(p)[2];
 
  825            gpu_sort.get(cnt).prp2[2] = vd.template getProp<2>(p)[2];
 
  834        for (
size_t i = 0 ; i < cpu_sort.
size() ; i++)
 
  836            match &= cpu_sort.get(i).xp.get(0) == gpu_sort.get(i).xp.get(0);
 
  837            match &= cpu_sort.get(i).xp.get(1) == gpu_sort.get(i).xp.get(1);
 
  838            match &= cpu_sort.get(i).xp.get(2) == gpu_sort.get(i).xp.get(2);
 
  840            match &= cpu_sort.get(i).prp0 == gpu_sort.get(i).prp0;
 
  841            match &= cpu_sort.get(i).prp1[0] == gpu_sort.get(i).prp1[0];
 
  842            match &= cpu_sort.get(i).prp1[1] == gpu_sort.get(i).prp1[1];
 
  843            match &= cpu_sort.get(i).prp1[2] == gpu_sort.get(i).prp1[2];
 
  845            match &= cpu_sort.get(i).prp2[0] == gpu_sort.get(i).prp2[0];
 
  846            match &= cpu_sort.get(i).prp2[1] == gpu_sort.get(i).prp2[1];
 
  847            match &= cpu_sort.get(i).prp2[2] == gpu_sort.get(i).prp2[2];
 
  850        BOOST_REQUIRE_EQUAL(match,
true);
 
  854        auto ite = vd.getDomainIteratorGPU();
 
  855        CUDA_LAUNCH_DIM3((move_parts_gpu_test<3,
decltype(vd.toKernel())>),ite.wthr,ite.thr,vd.toKernel());
 
  859BOOST_AUTO_TEST_CASE( vector_dist_map_on_gpu_test)
 
  861    vdist_calc_gpu_test<float>();
 
  862    vdist_calc_gpu_test<double>();
 
  865BOOST_AUTO_TEST_CASE(vector_dist_reduce)
 
  867    auto & v_cl = create_vcluster();
 
  869    if (v_cl.size() > 16)
 
  878    size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
 
  893        vd.template getProp<0>(p) = fc;
 
  894        vd.template getProp<1>(p) = dc;
 
  895        vd.template getProp<2>(p) = ic;
 
  896        vd.template getProp<3>(p) = sc;
 
  906    vd.template hostToDeviceProp<0,1,2,3>();
 
  908    float redf = reduce_local<0,_add_>(vd);
 
  909    double redd = reduce_local<1,_add_>(vd);
 
  910    int redi = reduce_local<2,_add_>(vd);
 
  911    size_t reds = reduce_local<3,_add_>(vd);
 
  918    float redf2 = reduce_local<0,_max_>(vd);
 
  919    double redd2 = reduce_local<1,_max_>(vd);
 
  920    int redi2 = reduce_local<2,_max_>(vd);
 
  921    size_t reds2 = reduce_local<3,_max_>(vd);
 
  929template<
typename CellList_type>
 
  930void vector_dist_dlb_on_cuda_impl(
size_t k,
double r_cut)
 
  932    std::random_device r;
 
  934    std::seed_seq seed2{ create_vcluster().rank(),
 
  935                         create_vcluster().rank(),
 
  936                         create_vcluster().rank(),
 
  937                         create_vcluster().rank(),
 
  938                         create_vcluster().rank(),
 
  939                         create_vcluster().rank(),
 
  940                         create_vcluster().rank(),
 
  941                         create_vcluster().rank()};
 
  942    std::mt19937 e2(seed2);
 
  951    std::uniform_real_distribution<double> unif(0.0,0.3);
 
  955    size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
 
  963        for(
size_t i = 0 ; i < k ; i++)
 
  975    vd.template hostToDeviceProp<0>();
 
  977    vd.
map(RUN_ON_DEVICE);
 
  978    vd.template ghost_get<>(RUN_ON_DEVICE);
 
  983    vd.template deviceToHostProp<0>();
 
  997        vd.template getProp<0>(p) = VV.getNNPart(p.getKey());
 
 1003    vd.template hostToDeviceProp<0>();
 
 1009    vd.
map(RUN_ON_DEVICE);
 
 1021    for (
size_t i = 0 ; i < loads.
size() ; i++)
 
 1023        double load_f = load;
 
 1024        double load_fc = loads.get(i);
 
 1026        BOOST_REQUIRE_CLOSE(load_f,load_fc,7.0);
 
 1033    for (
size_t i = 0 ; i < 25 ; i++)
 
 1045            vd.
getPos(p)[0] += v.get(0) * 0.09;
 
 1046            vd.
getPos(p)[1] += v.get(1) * 0.09;
 
 1047            vd.
getPos(p)[2] += v.get(2) * 0.09;
 
 1054        vd.
map(RUN_ON_DEVICE);
 
 1055        vd.template ghost_get<0>(RUN_ON_DEVICE);
 
 1058        vd.template deviceToHostProp<0,1,2>();
 
 1061        auto NN_gpu = vd.template getCellListGPU<CellList_type>(r_cut);
 
 1063        check_cell_list_cpu_and_gpu(vd,NN_gpu,NN_cpu);
 
 1070        while (it2.isNext())
 
 1074            match &= vd.template getProp<0>(p) == VV2.getNNPart(p.getKey());
 
 1079        BOOST_REQUIRE_EQUAL(match,
true);
 
 1084        vd.
map(RUN_ON_DEVICE);
 
 1088        vd.template ghost_get<0>(RUN_ON_DEVICE);
 
 1090        vd.template deviceToHostProp<0>();
 
 1099        for (
size_t i = 0 ; i < loads.
size() ; i++)
 
 1101            double load_f = load;
 
 1102            double load_fc = loads.get(i);
 
 1105            BOOST_REQUIRE_CLOSE(load_f,load_fc,30.0);
 
 1107            BOOST_REQUIRE_CLOSE(load_f,load_fc,10.0);
 
 1113template<
typename CellList_type>
 
 1114void vector_dist_dlb_on_cuda_impl_async(
size_t k,
double r_cut)
 
 1116    std::random_device r;
 
 1118    std::seed_seq seed2{r() + create_vcluster().rank(),
 
 1119                        r() + create_vcluster().rank(),
 
 1120                        r() + create_vcluster().rank(),
 
 1121                        r() + create_vcluster().rank(),
 
 1122                        r() + create_vcluster().rank(),
 
 1123                        r() + create_vcluster().rank(),
 
 1124                        r() + create_vcluster().rank(),
 
 1125                        r() + create_vcluster().rank()};
 
 1126    std::mt19937 e2(seed2);
 
 1135    std::uniform_real_distribution<double> unif(0.0,0.3);
 
 1139    size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
 
 1147        for(
size_t i = 0 ; i < k ; i++)
 
 1159    vd.template hostToDeviceProp<0>();
 
 1161    vd.
map(RUN_ON_DEVICE);
 
 1162    vd.template Ighost_get<>(RUN_ON_DEVICE);
 
 1163    vd.template ghost_wait<>(RUN_ON_DEVICE);
 
 1168    vd.template deviceToHostProp<0>();
 
 1182        vd.template getProp<0>(p) = VV.getNNPart(p.getKey());
 
 1188    vd.template hostToDeviceProp<0>();
 
 1194    vd.
map(RUN_ON_DEVICE);
 
 1206    for (
size_t i = 0 ; i < loads.
size() ; i++)
 
 1208        double load_f = load;
 
 1209        double load_fc = loads.get(i);
 
 1211        BOOST_REQUIRE_CLOSE(load_f,load_fc,7.0);
 
 1218    for (
size_t i = 0 ; i < 25 ; i++)
 
 1230            vd.
getPos(p)[0] += v.get(0) * 0.09;
 
 1231            vd.
getPos(p)[1] += v.get(1) * 0.09;
 
 1232            vd.
getPos(p)[2] += v.get(2) * 0.09;
 
 1239        vd.
map(RUN_ON_DEVICE);
 
 1240        vd.template Ighost_get<0>(RUN_ON_DEVICE);
 
 1241        vd.template ghost_wait<0>(RUN_ON_DEVICE);
 
 1243        vd.template deviceToHostProp<0,1,2>();
 
 1246        auto NN_gpu = vd.template getCellListGPU<CellList_type>(r_cut);
 
 1248        check_cell_list_cpu_and_gpu(vd,NN_gpu,NN_cpu);
 
 1255        while (it2.isNext())
 
 1259            match &= vd.template getProp<0>(p) == VV2.getNNPart(p.getKey());
 
 1264        BOOST_REQUIRE_EQUAL(match,
true);
 
 1269        vd.
map(RUN_ON_DEVICE);
 
 1275        vd.template Ighost_get<0>(RUN_ON_DEVICE);
 
 1276        vd.template ghost_wait<0>(RUN_ON_DEVICE);
 
 1278        vd.template deviceToHostProp<0>();
 
 1287        for (
size_t i = 0 ; i < loads.
size() ; i++)
 
 1289            double load_f = load;
 
 1290            double load_fc = loads.get(i);
 
 1292            BOOST_REQUIRE_CLOSE(load_f,load_fc,10.0);
 
 1297BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda_async)
 
 1299    vector_dist_dlb_on_cuda_impl_async<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,
unsigned int,
int,
false>>(50000,0.01);
 
 1302BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda)
 
 1304    vector_dist_dlb_on_cuda_impl<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,
unsigned int,
int,
false>>(50000,0.01);
 
 1307BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda_sparse)
 
 1309    vector_dist_dlb_on_cuda_impl<CELLLIST_GPU_SPARSE<3,double>>(50000,0.01);
 
 1312BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda2)
 
 1314    if (create_vcluster().size() <= 3)
 
 1318    vector_dist_dlb_on_cuda_impl<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,
unsigned int,
int,
false>>(1000000,0.01);
 
 1322BOOST_AUTO_TEST_CASE(vector_dist_dlb_on_cuda3)
 
 1324    if (create_vcluster().size() < 8)
 
 1328    vector_dist_dlb_on_cuda_impl<CellList_gpu<3,double,CudaMemory,shift_only<3,double>,
unsigned int,
int,
false>>(15000000,0.005);
 
 1333BOOST_AUTO_TEST_CASE(vector_dist_keep_prop_on_cuda)
 
 1344    size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
 
 1352        for(
size_t i = 0 ; i < 50000 ; i++)
 
 1356            vd.
getLastPos()[0] = ((double)rand())/RAND_MAX * 0.3;
 
 1357            vd.
getLastPos()[1] = ((double)rand())/RAND_MAX * 0.3;
 
 1358            vd.
getLastPos()[2] = ((double)rand())/RAND_MAX * 0.3;
 
 1364    vd.template hostToDeviceProp<0>();
 
 1366    vd.
map(RUN_ON_DEVICE);
 
 1367    vd.template ghost_get<>(RUN_ON_DEVICE);
 
 1372    vd.template deviceToHostProp<0>();
 
 1383        vd.template getProp<0>(p) = 0.0;
 
 1385        vd.template getProp<1>(p)[0] = 1000.0;
 
 1386        vd.template getProp<1>(p)[1] = 2000.0;
 
 1387        vd.template getProp<1>(p)[2] = 3000.0;
 
 1389        vd.template getProp<2>(p)[0][0] = 6000,0;
 
 1390        vd.template getProp<2>(p)[0][1] = 7000.0;
 
 1391        vd.template getProp<2>(p)[0][2] = 8000.0;
 
 1392        vd.template getProp<2>(p)[1][0] = 9000.0;
 
 1393        vd.template getProp<2>(p)[1][1] = 10000.0;
 
 1394        vd.template getProp<2>(p)[1][2] = 11000.0;
 
 1395        vd.template getProp<2>(p)[2][0] = 12000.0;
 
 1396        vd.template getProp<2>(p)[2][1] = 13000.0;
 
 1397        vd.template getProp<2>(p)[2][2] = 14000.0;
 
 1403    vd.template hostToDeviceProp<0,1,2>();
 
 1409    vd.
map(RUN_ON_DEVICE);
 
 1421    for (
size_t i = 0 ; i < loads.
size() ; i++)
 
 1423        double load_f = load;
 
 1424        double load_fc = loads.get(i);
 
 1426        BOOST_REQUIRE_CLOSE(load_f,load_fc,7.0);
 
 1435    for (
size_t i = 0 ; i < 25 ; i++)
 
 1449                vd.
getPos(p)[0] += v.get(0) * 0.09;
 
 1450                vd.
getPos(p)[1] += v.get(1) * 0.09;
 
 1451                vd.
getPos(p)[2] += v.get(2) * 0.09;
 
 1458            vd.
map(RUN_ON_DEVICE);
 
 1459            vd.template ghost_get<>(RUN_ON_DEVICE);
 
 1461            vd.template deviceToHostProp<0,1,2>();
 
 1466            vd.
map(RUN_ON_DEVICE);
 
 1470            vd.template ghost_get<0>(RUN_ON_DEVICE);
 
 1472            vd.template deviceToHostProp<0,1,2>();
 
 1481            for (
size_t i = 0 ; i < loads.
size() ; i++)
 
 1483                double load_f = load;
 
 1484                double load_fc = loads.get(i);
 
 1486                BOOST_REQUIRE_CLOSE(load_f,load_fc,10.0);
 
 1491            vd.template deviceToHostProp<0,1,2>();
 
 1496            while (it2.isNext())
 
 1500                vd.template getProp<0>(p) += 1;
 
 1502                vd.template getProp<1>(p)[0] += 1.0;
 
 1503                vd.template getProp<1>(p)[1] += 1.0;
 
 1504                vd.template getProp<1>(p)[2] += 1.0;
 
 1506                vd.template getProp<2>(p)[0][0] += 1.0;
 
 1507                vd.template getProp<2>(p)[0][1] += 1.0;
 
 1508                vd.template getProp<2>(p)[0][2] += 1.0;
 
 1509                vd.template getProp<2>(p)[1][0] += 1.0;
 
 1510                vd.template getProp<2>(p)[1][1] += 1.0;
 
 1511                vd.template getProp<2>(p)[1][2] += 1.0;
 
 1512                vd.template getProp<2>(p)[2][0] += 1.0;
 
 1513                vd.template getProp<2>(p)[2][1] += 1.0;
 
 1514                vd.template getProp<2>(p)[2][2] += 1.0;
 
 1519            vd.template hostToDeviceProp<0,1,2>();
 
 1523            vd.template ghost_get<0,1,2>(RUN_ON_DEVICE | KEEP_PROPERTIES);
 
 1524            vd.template deviceToHostProp<0,1,2>();
 
 1530            while (itg.isNext())
 
 1534                match &= vd.template getProp<0>(p) == base;
 
 1536                match &= vd.template getProp<1>(p)[0] == base + 1000.0;
 
 1537                match &= vd.template getProp<1>(p)[1] == base + 2000.0;
 
 1538                match &= vd.template getProp<1>(p)[2] == base + 3000.0;
 
 1540                match &= vd.template getProp<2>(p)[0][0] == base + 6000.0;
 
 1541                match &= vd.template getProp<2>(p)[0][1] == base + 7000.0;
 
 1542                match &= vd.template getProp<2>(p)[0][2] == base + 8000.0;
 
 1543                match &= vd.template getProp<2>(p)[1][0] == base + 9000.0;
 
 1544                match &= vd.template getProp<2>(p)[1][1] == base + 10000.0;
 
 1545                match &= vd.template getProp<2>(p)[1][2] == base + 11000.0;
 
 1546                match &= vd.template getProp<2>(p)[2][0] == base + 12000.0;
 
 1547                match &= vd.template getProp<2>(p)[2][1] == base + 13000.0;
 
 1548                match &= vd.template getProp<2>(p)[2][2] == base + 14000.0;
 
 1553            BOOST_REQUIRE_EQUAL(match,
true);
 
 1560    __device__ 
static bool check(
int c)
 
 1566BOOST_AUTO_TEST_CASE(vector_dist_get_index_set)
 
 1570    size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
 
 1572    if (create_vcluster().size() >= 16)
 
 1579    auto it = vdg.getDomainIterator();
 
 1585        vdg.getPos(p)[0] = (double)rand() / RAND_MAX;
 
 1586        vdg.getPos(p)[1] = (double)rand() / RAND_MAX;
 
 1587        vdg.getPos(p)[2] = (double)rand() / RAND_MAX;
 
 1589        vdg.template getProp<0>(p) = (
int)((
double)rand() / RAND_MAX / 0.5);
 
 1591        vdg.template getProp<1>(p) = (double)rand() / RAND_MAX;
 
 1598    vdg.hostToDeviceProp<0,1>();
 
 1599    vdg.hostToDevicePos();
 
 1601    auto cl = vdg.getCellListGPU(0.1);
 
 1607    get_indexes_by_type<0,type_is_one>(vdg.getPropVectorSort(),ids,vdg.size_local(),v_cl.
getgpuContext());
 
 1611    ids.template deviceToHost<0>();
 
 1613    auto & vs = vdg.getPropVectorSort();
 
 1614    vs.template deviceToHost<0>();
 
 1618    for (
int i = 0 ; i < ids.
size() ; i++)
 
 1620        if (vs.template get<0>(ids.template get<0>(i)) != 1)
 
 1624    BOOST_REQUIRE_EQUAL(match,
true);
 
 1627BOOST_AUTO_TEST_CASE(vector_dist_compare_host_device)
 
 1631    size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
 
 1633    if (create_vcluster().size() >= 16)
 
 1638    auto it = vdg.getDomainIterator();
 
 1644        vdg.getPos(p)[0] = (double)rand() / RAND_MAX;
 
 1645        vdg.getPos(p)[1] = (double)rand() / RAND_MAX;
 
 1646        vdg.getPos(p)[2] = (double)rand() / RAND_MAX;
 
 1648        vdg.template getProp<0>(p) = (double)rand() / RAND_MAX;
 
 1650        vdg.template getProp<1>(p)[0] = (double)rand() / RAND_MAX;
 
 1651        vdg.template getProp<1>(p)[1] = (double)rand() / RAND_MAX;
 
 1652        vdg.template getProp<1>(p)[2] = (double)rand() / RAND_MAX;
 
 1654        vdg.template getProp<2>(p)[0][0] = (double)rand() / RAND_MAX;
 
 1655        vdg.template getProp<2>(p)[0][1] = (double)rand() / RAND_MAX;
 
 1656        vdg.template getProp<2>(p)[0][2] = (double)rand() / RAND_MAX;
 
 1657        vdg.template getProp<2>(p)[1][0] = (double)rand() / RAND_MAX;
 
 1658        vdg.template getProp<2>(p)[1][1] = (double)rand() / RAND_MAX;
 
 1659        vdg.template getProp<2>(p)[1][2] = (double)rand() / RAND_MAX;
 
 1660        vdg.template getProp<2>(p)[2][0] = (double)rand() / RAND_MAX;
 
 1661        vdg.template getProp<2>(p)[2][1] = (double)rand() / RAND_MAX;
 
 1662        vdg.template getProp<2>(p)[2][2] = (double)rand() / RAND_MAX;
 
 1669    vdg.hostToDeviceProp<0,1,2>();
 
 1670    vdg.hostToDevicePos();
 
 1672    bool test = vdg.compareHostAndDevicePos(0.00001,0.00000001);
 
 1673    BOOST_REQUIRE_EQUAL(test,
true);
 
 1675    vdg.getPos(100)[0] = 0.99999999;
 
 1677    test = vdg.compareHostAndDevicePos(0.00001,0.00000001);
 
 1678    BOOST_REQUIRE_EQUAL(test,
false);
 
 1680    vdg.hostToDevicePos();
 
 1681    vdg.getPos(100)[0] = 0.99999999;
 
 1683    test = vdg.compareHostAndDevicePos(0.00001,0.00000001);
 
 1684    BOOST_REQUIRE_EQUAL(test,
true);
 
 1688    test = vdg.compareHostAndDeviceProp<1>(0.00001,0.00000001);
 
 1689    BOOST_REQUIRE_EQUAL(test,
true);
 
 1691    vdg.getProp<1>(103)[0] = 0.99999999;
 
 1693    test = vdg.compareHostAndDeviceProp<1>(0.00001,0.00000001);
 
 1694    BOOST_REQUIRE_EQUAL(test,
false);
 
 1696    vdg.hostToDeviceProp<1>();
 
 1697    vdg.getProp<1>(103)[0] = 0.99999999;
 
 1699    test = vdg.compareHostAndDeviceProp<1>(0.00001,0.00000001);
 
 1700    BOOST_REQUIRE_EQUAL(test,
true);
 
 1705    test = vdg.compareHostAndDeviceProp<0>(0.00001,0.00000001);
 
 1706    BOOST_REQUIRE_EQUAL(test,
true);
 
 1708    vdg.getProp<0>(105) = 0.99999999;
 
 1710    test = vdg.compareHostAndDeviceProp<0>(0.00001,0.00000001);
 
 1711    BOOST_REQUIRE_EQUAL(test,
false);
 
 1713    vdg.hostToDeviceProp<0>();
 
 1714    vdg.getProp<0>(105) = 0.99999999;
 
 1716    test = vdg.compareHostAndDeviceProp<0>(0.00001,0.00000001);
 
 1717    BOOST_REQUIRE_EQUAL(test,
true);
 
 1723    test = vdg.compareHostAndDeviceProp<2>(0.00001,0.00000001);
 
 1724    BOOST_REQUIRE_EQUAL(test,
true);
 
 1726    vdg.getProp<2>(108)[1][2] = 0.99999999;
 
 1728    test = vdg.compareHostAndDeviceProp<2>(0.00001,0.00000001);
 
 1729    BOOST_REQUIRE_EQUAL(test,
false);
 
 1731    vdg.hostToDeviceProp<2>();
 
 1732    vdg.getProp<2>(108)[1][2] = 0.99999999;
 
 1734    test = vdg.compareHostAndDeviceProp<2>(0.00001,0.00000001);
 
 1735    BOOST_REQUIRE_EQUAL(test,
true);
 
 1738template<
typename vector_dist_type>
 
 1739__global__ 
void assign_to_ghost(vector_dist_type vds)
 
 1741    int i = threadIdx.x + blockIdx.x * blockDim.x;
 
 1743    if (i >= vds.size())    {
return;}
 
 1745    vds.template getProp<0>(i) = 1000.0 + i;
 
 1747    vds.template getProp<1>(i)[0] = 2000.0 + i;
 
 1748    vds.template getProp<1>(i)[1] = 3000.0 + i;
 
 1749    vds.template getProp<1>(i)[2] = 4000.0 + i;
 
 1751    vds.template getProp<2>(i)[0][0] = 12000.0 + i;
 
 1752    vds.template getProp<2>(i)[0][1] = 13000.0 + i;
 
 1753    vds.template getProp<2>(i)[0][2] = 14000.0 + i;
 
 1754    vds.template getProp<2>(i)[1][0] = 22000.0 + i;
 
 1755    vds.template getProp<2>(i)[1][1] = 23000.0 + i;
 
 1756    vds.template getProp<2>(i)[1][2] = 24000.0 + i;
 
 1757    vds.template getProp<2>(i)[2][0] = 32000.0 + i;
 
 1758    vds.template getProp<2>(i)[2][1] = 33000.0 + i;
 
 1759    vds.template getProp<2>(i)[2][2] = 34000.0 + i;
 
 1763BOOST_AUTO_TEST_CASE(vector_dist_domain_and_ghost_test)
 
 1767        size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
 
 1769        if (create_vcluster().size() >= 16)
 
 1774        auto ite = vdg.getDomainAndGhostIteratorGPU();
 
 1776        CUDA_LAUNCH(assign_to_ghost,ite,vdg.toKernel());
 
 1778        vdg.template deviceToHostProp<0,1,2>();
 
 1781        auto it = vdg.getDomainAndGhostIterator();
 
 1789                check &= vdg.template getProp<0>(k) == 1000.0 + k.getKey();
 
 1791                check &= vdg.template getProp<1>(k)[0] == 2000.0 + k.getKey();
 
 1792                check &= vdg.template getProp<1>(k)[1] == 3000.0 + k.getKey();
 
 1793                check &= vdg.template getProp<1>(k)[2] == 4000.0 + k.getKey();
 
 1795                check &= vdg.template getProp<2>(k)[0][0] == 12000.0 + k.getKey();
 
 1796                check &= vdg.template getProp<2>(k)[0][1] == 13000.0 + k.getKey();
 
 1797                check &= vdg.template getProp<2>(k)[0][2] == 14000.0 + k.getKey();
 
 1798                check &= vdg.template getProp<2>(k)[1][0] == 22000.0 + k.getKey();
 
 1799                check &= vdg.template getProp<2>(k)[1][1] == 23000.0 + k.getKey();
 
 1800                check &= vdg.template getProp<2>(k)[1][2] == 24000.0 + k.getKey();
 
 1801                check &= vdg.template getProp<2>(k)[2][0] == 32000.0 + k.getKey();
 
 1802                check &= vdg.template getProp<2>(k)[2][1] == 33000.0 + k.getKey();
 
 1803                check &= vdg.template getProp<2>(k)[2][2] == 34000.0 + k.getKey();
 
 1809        BOOST_REQUIRE_EQUAL(check,
true);
 
 1812template<
typename vT>
 
 1813__global__ 
void launch_overflow(vT vs, vT vs2)
 
 1815    vs2.template getProp<1>(57)[0];
 
 1818BOOST_AUTO_TEST_CASE(vector_dist_overflow_se_class1)
 
 1822    size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
 
 1824    if (create_vcluster().size() >= 16)
 
 1827    std::cout << 
"****** TEST ERROR MESSAGE BEGIN ********" << std::endl;
 
 1833    vdg.setCapacity(100);
 
 1846        CUDA_LAUNCH(launch_overflow,ite,vdg.toKernel(),vdg2.toKernel());
 
 1850        std::cout << 
"SE_CLASS1 Catch" << std::endl;
 
 1853    std::cout << 
"****** TEST ERROR MESSAGE END ********" << std::endl;
 
 1858BOOST_AUTO_TEST_CASE( vector_dist_ghost_put_gpu )
 
 1862#ifndef CUDIFY_USE_OPENMP 
 1866    long int k = 25*25*25*create_vcluster().getProcessingUnits();
 
 1867    k = std::pow(k, 1/3.);
 
 1872    print_test(
"Testing 3D periodic ghost put GPU k=",k);
 
 1873    BOOST_TEST_CHECKPOINT( 
"Testing 3D periodic ghost put k=" << k );
 
 1875    long int big_step = k / 30;
 
 1876    big_step = (big_step == 0)?1:big_step;
 
 1877    long int small_step = 21;
 
 1880    for ( ; k >= 2 ; k-= (k > 2*big_step)?big_step:small_step )
 
 1882        float r_cut = 1.3 / k;
 
 1883        float r_g = 1.5 / k;
 
 1888        size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
 
 1902            auto key = it.get();
 
 1922        vd.template hostToDeviceProp<0,2>();
 
 1925        vd.template deviceToHostProp<0,2>();
 
 1936            while (it2.isNext())
 
 1943                auto Np = NN.getNNIterator<NO_CHECK>(NN.getCell(xp));
 
 1956                        vd.
getPropWrite<2>(q) += a*(-dist*dist+r_cut*r_cut) / 2;
 
 1966            vd.template hostToDeviceProp<0,2>();
 
 1967            vd.template ghost_put<add_atomic_,0,2>(RUN_ON_DEVICE);
 
 1968            vd.template deviceToHostProp<0,2>();
 
 1974            float constant = vd.
getProp<0>(it3.get());
 
 1975            float constanta = vd.
getProp<2>(it3.get());
 
 1978            while (it3.isNext())
 
 1980                float constant2 = vd.
getProp<0>(it3.get());
 
 1981                float constant3 = vd.
getProp<2>(it3.get());
 
 1982                if (fabs(constant - constant2)/constant > eps || fabs(constanta - constant3)/constanta > eps)
 
 1986                    std::cout << p.
toString() << 
"    " <<  constant2 << 
"/" << constant << 
"/" << constant3 << 
"    " << v_cl.
getProcessUnitID() << std::endl;
 
 1993            BOOST_REQUIRE_EQUAL(ret,
true);
 
 1997        while (itp.isNext())
 
 1999            auto key = itp.
get();
 
 2015            while (it2.isNext())
 
 2022                auto Np = NN.getNNIterator<NO_CHECK>(NN.getCell(xp));
 
 2045            vd.template hostToDeviceProp<0,2>();
 
 2046            vd.template ghost_put<add_atomic_,0>(RUN_ON_DEVICE);
 
 2047            vd.template ghost_put<add_atomic_,2>(RUN_ON_DEVICE);
 
 2048            vd.template deviceToHostProp<0,2>();
 
 2058            while (it3.isNext())
 
 2062                if (fabs(constant - constant2)/constant > eps || fabs(constanta - constant3)/constanta > eps)
 
 2066                    std::cout << p.
toString() << 
"    " <<  constant2 << 
"/" << constant << 
"/" << constant3 << 
"    " << v_cl.
getProcessUnitID() << std::endl;
 
 2073            BOOST_REQUIRE_EQUAL(ret,
true);
 
 2080BOOST_AUTO_TEST_SUITE_END()
 
This class represent an N-dimensional box.
void enlarge(const Box< dim, T > &gh)
Enlarge the box with ghost margin.
This class implement the point shape in an N-dimensional space.
__device__ __host__ T distance(const Point< dim, T > &q) const
It calculate the distance between 2 points.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
std::string toString() const
Return the string with the point coordinate.
__device__ __host__ T norm() const
norm of the vector
void execute()
Execute all the requests.
size_t getProcessUnitID()
Get the process unit id.
size_t getProcessingUnits()
Get the total number of processors.
gpu::ofp_context_t & getgpuContext(bool iw=true)
If nvidia cuda is activated return a gpu context.
bool allGather(T &send, openfpm::vector< T, Mem, gr > &v)
Gather the data from all processors.
Implementation of VCluster class.
Implementation of 1-D std::vector like structure.
vect_dist_key_dx get()
Get the actual key.
auto getLastPropWrite() -> decltype(v_prp.template get< id >(0))
Get the property of the last element.
auto getPropWrite(vect_dist_key_dx vec_key) -> decltype(v_prp.template get< id >(vec_key.getKey()))
Get the property of an element.
size_t size_local() const
return the local size of the vector
auto getProp(vect_dist_key_dx vec_key) -> decltype(v_prp.template get< id >(vec_key.getKey()))
Get the property of an element.
const vector_dist_prop & getPropVector() const
return the property vector of all the particles
grid_dist_id_iterator_dec< Decomposition > getGridIterator(const size_t(&sz)[dim])
auto getPosRead(vect_dist_key_dx vec_key) const -> decltype(v_pos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
size_t size_local_with_ghost() const
return the local size of the vector
void deviceToHostPos()
Move the memory from the device to host memory.
void updateCellList(CellL &cell_list, bool no_se3=false, cl_construct_opt opt=cl_construct_opt::Full)
Update a cell list using the stored particles.
VerletL getVerlet(St r_cut)
for each particle get the verlet list
vector_dist_iterator getGhostIterator() const
Get the iterator across the position of the ghost particles.
auto getPos(vect_dist_key_dx vec_key) -> decltype(v_pos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
CellL getCellList(St r_cut, bool no_se3=false)
Construct a cell list starting from the stored particles.
vector_dist_iterator getDomainIterator() const
Get an iterator that traverse the particles in the domain.
void ghost_get(size_t opt=WITH_POSITION)
It synchronize the properties and position of the ghost particles.
auto getLastPosWrite() -> decltype(v_pos.template get< 0 >(0))
Get the position of the last element.
void hostToDevicePos()
Move the memory from the device to host memory.
void map(size_t opt=NONE)
It move all the particles that does not belong to the local processor to the respective processor.
const vector_dist_pos & getPosVector() const
return the position vector of all the particles
auto getLastPos() -> decltype(v_pos.template get< 0 >(0))
Get the position of the last element.
void deviceToHostProp()
Move the memory from the device to host memory.
vector_dist_iterator getDomainAndGhostIterator() const
Get an iterator that traverse the particles in the domain.
void add()
Add local particle.
void addComputationCosts(const self &vd, Model md=Model())
Add the computation cost on the decomposition coming from the particles.
auto getPropRead(vect_dist_key_dx vec_key) const -> decltype(v_prp.template get< id >(vec_key.getKey()))
Get the property of an element.
Decomposition & getDecomposition()
Get the decomposition.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...