doxygen/openfpm/map__vector__cuda__funcs__tests_8cu_source.html

 /*

  * map_vector_cuda_funcs_tests.cu

  *

  *  Created on: Aug 17, 2018

  *      Author: i-bird

  */


 #include "util/cuda_util.hpp"

 #include "config.h"

 #define BOOST_TEST_DYN_LINK

 #include <boost/test/unit_test.hpp>


 #include "Vector/map_vector.hpp"

 #include "util/tokernel_transformation.hpp"


 BOOST_AUTO_TEST_SUITE( vector_cuda_funcs_tests )


 BOOST_AUTO_TEST_CASE( vector_cuda_funcs_add_prp_device )

 {

     openfpm::vector_gpu<aggregate<float,float[3],float[3][3]>> vg_data;

     openfpm::vector_gpu<aggregate<float,float[3],float[3][3]>> vg_data2;


     vg_data.resize(100);

     vg_data2.resize(100);


     // we fill vg_data with something


     for (size_t i = 0 ; i < 100 ; i++)

     {

         vg_data.template get<0>(i) = 2.5 + i;


         vg_data.template get<1>(i)[0] = 4.6 + i;

         vg_data.template get<1>(i)[1] = 7.8 + i;

         vg_data.template get<1>(i)[2] = 9.0 + i;


         vg_data2.template get<0>(i) = 8.5 + i;


         vg_data2.template get<1>(i)[0] = 1.6 + i;

         vg_data2.template get<1>(i)[1] = 3.8 + i;

         vg_data2.template get<1>(i)[2] = 5.1 + i;

     }


     vg_data.hostToDevice<0,1>();

     vg_data2.hostToDevice<0,1>();


     vg_data.add_prp_device<aggregate<float,float[3],float[3][3]>,

                            CudaMemory,

                            openfpm::grow_policy_double,

                            OPENFPM_NATIVE,

                            memory_traits_inte,

                            0,1>(vg_data2);


     vg_data.deviceToHost<0,1>();


     BOOST_REQUIRE_EQUAL(vg_data.size(),200);


     bool match = true;

     for (unsigned int i = 100 ; i < 200 ; i++)

     {

         match &= vg_data.template get<0>(i) == vg_data2.template get<0>(i-100);


         match &= vg_data.template get<1>(i)[0] == vg_data2.template get<1>(i-100)[0];

         match &= vg_data.template get<1>(i)[1] == vg_data2.template get<1>(i-100)[1];

         match &= vg_data.template get<1>(i)[2] == vg_data2.template get<1>(i-100)[2];

     }


     BOOST_REQUIRE_EQUAL(match,true);

 }


 BOOST_AUTO_TEST_CASE( vector_cuda_to_kernel_recursive2 )

 {

     typedef openfpm::vector_gpu<aggregate<int,long int>> test1_type;

     typedef openfpm::vector_gpu<aggregate<int,openfpm::vector_gpu<aggregate<long int>>>> test2_type;

     typedef openfpm::vector_gpu<aggregate<int,openfpm::vector_gpu<aggregate<Box<2,float>>>>> test3_type;

     typedef openfpm::vector<Box<3,float>,CudaMemory,memory_traits_inte> test4_type;


     typedef typename toKernel_transform<memory_traits_inte,test1_type>::type tker1;

     typedef typename toKernel_transform<memory_traits_inte,test2_type>::type tker2;

     typedef typename toKernel_transform<memory_traits_inte,test3_type>::type tker3;

     typedef typename toKernel_transform<memory_traits_inte,test4_type>::type tker4;


     bool test = std::is_same<tker1,openfpm::vector_gpu_ker<aggregate<int, long>, memory_traits_inte>>::value;


     BOOST_REQUIRE_EQUAL(test,true);


     test = std::is_same<tker2,openfpm::vector_gpu_ker<aggregate<int, openfpm::vector_gpu_ker<aggregate<long>, memory_traits_inte> >, memory_traits_inte>>::value;


     BOOST_REQUIRE_EQUAL(test,true);


     test = std::is_same<tker3,openfpm::vector_gpu_ker<aggregate<int, openfpm::vector_gpu_ker<aggregate<Box<2,float>>, memory_traits_inte> >, memory_traits_inte>>::value;


     BOOST_REQUIRE_EQUAL(test,true);


     test = std::is_same<tker4,openfpm::vector_gpu_ker<Box<3,float>,memory_traits_inte>>::value;


     BOOST_REQUIRE_EQUAL(test,true);

 }


 template<typename vv_rc,typename vector_output_type>

 __global__ void kernel_recursive_check(vv_rc vvrc, vector_output_type vot)

 {

     int k = 0;

     for (int i = 0 ; i < vvrc.size() ; i++)

     {

         for (int j = 0 ; j < vvrc.template get<1>(i).size() ; j++)

         {

             vot.template get<0>(k) = vvrc.template get<1>(i).template get<0>(j);

             k++;

         }

     }

 }


 BOOST_AUTO_TEST_CASE( vector_cuda_to_kernel_recursive2_test_toKernel )

 {

     typedef openfpm::vector_gpu<aggregate<int,openfpm::vector_gpu<aggregate<long int>>>> test2_type;

     typedef openfpm::vector_gpu<aggregate<int,openfpm::vector_gpu<aggregate<Box<2,float>>>>> test3_type;


     test2_type tt2;

     test3_type tt3;


     tt2.add_no_device();

     tt2.add_no_device();

     tt2.add_no_device();


 /*  tt3.add();

     tt3.add();

     tt3.add();*/


     tt2.template get<0>(0) = 80;

     tt2.template get<1>(0).add();

     tt2.template get<1>(0).template get<0>(0) = 500;

     tt2.template get<0>(0) = 180;

     tt2.template get<1>(0).add();

     tt2.template get<1>(0).template get<0>(1) = 600;

     tt2.template get<0>(0) = 280;;

     tt2.template get<1>(0).add();

     tt2.template get<1>(0).template get<0>(2) = 700;

     tt2.template get<1>(0).template hostToDevice<0>();


     tt2.template get<0>(1) = 10080;

     tt2.template get<1>(1).add();

     tt2.template get<1>(1).template get<0>(0) = 1500;

     tt2.template get<0>(1) = 20080;

     tt2.template get<1>(1).add();

     tt2.template get<1>(1).template get<0>(1) = 1600;

     tt2.template get<0>(1) = 30080;

     tt2.template get<1>(1).add();

     tt2.template get<1>(1).template get<0>(2) = 1700;

     tt2.template get<1>(1).template hostToDevice<0>();


     tt2.template get<0>(2) = 40080;

     tt2.template get<1>(2).add();

     tt2.template get<1>(2).template get<0>(0) = 2500;

     tt2.template get<0>(2) = 50080;

     tt2.template get<1>(2).add();

     tt2.template get<1>(2).template get<0>(1) = 2600;

     tt2.template get<0>(2) = 60080;

     tt2.template get<1>(2).add();

     tt2.template get<1>(2).template get<0>(2) = 2700;

     tt2.template get<1>(2).template hostToDevice<0>();


     tt2.template hostToDevice<1>();

     openfpm::vector_gpu<aggregate<long int>> vg;

     vg.resize(9);


     CUDA_LAUNCH_DIM3(kernel_recursive_check,1,1,tt2.toKernel(),vg.toKernel());


     vg.template deviceToHost<0>();


     BOOST_REQUIRE_EQUAL(vg.template get<0>(0),500);

     BOOST_REQUIRE_EQUAL(vg.template get<0>(1),600);

     BOOST_REQUIRE_EQUAL(vg.template get<0>(2),700);

     BOOST_REQUIRE_EQUAL(vg.template get<0>(3),1500);

     BOOST_REQUIRE_EQUAL(vg.template get<0>(4),1600);

     BOOST_REQUIRE_EQUAL(vg.template get<0>(5),1700);

     BOOST_REQUIRE_EQUAL(vg.template get<0>(6),2500);

     BOOST_REQUIRE_EQUAL(vg.template get<0>(7),2600);

     BOOST_REQUIRE_EQUAL(vg.template get<0>(8),2700);

 }


 BOOST_AUTO_TEST_CASE( vector_cuda_to_cpu_operator_equal )

 {

     openfpm::vector_gpu<aggregate<int,int,double>> v1;

     openfpm::vector<aggregate<int,int,double>> v2;

     openfpm::vector<aggregate<int,int,double>,HeapMemory, memory_traits_inte > v3;

     openfpm::vector<aggregate<int,int,double>> v4;


     v2.resize(3000);


     for (size_t i = 0 ; i < 3000 ; i++)

     {

         v2.template get<0>(i) = i;

         v2.template get<1>(i) = i+300;

         v2.template get<2>(i) = i+6123.0;

     }


     v1 = v2;

     v3 = v2;

     v4 = v1;


     for (size_t i = 0 ; i < v2.size() ; i++)

     {

         BOOST_REQUIRE_EQUAL(v2.template get<0>(i),v1.template get<0>(i));

         BOOST_REQUIRE_EQUAL(v2.template get<0>(i),v3.template get<0>(i));

         BOOST_REQUIRE_EQUAL(v2.template get<0>(i),v4.template get<0>(i));


         BOOST_REQUIRE_EQUAL(v2.template get<1>(i),v1.template get<1>(i));

         BOOST_REQUIRE_EQUAL(v2.template get<1>(i),v3.template get<1>(i));

         BOOST_REQUIRE_EQUAL(v2.template get<1>(i),v4.template get<1>(i));


         BOOST_REQUIRE_EQUAL(v2.template get<2>(i),v1.template get<2>(i));

         BOOST_REQUIRE_EQUAL(v2.template get<2>(i),v3.template get<2>(i));

         BOOST_REQUIRE_EQUAL(v2.template get<2>(i),v4.template get<2>(i));

     }

 }


 BOOST_AUTO_TEST_CASE( vector_cuda_host_to_device_check )

 {

     openfpm::vector_gpu<aggregate<int,int,double>> v1;


     v1.resize(3);


     for (size_t i = 0 ; i < v1.size() ; i++)

     {

         v1.template get<0>(i) = i;

         v1.template get<1>(i) = i+300;

         v1.template get<2>(i) = i+6123.0;

     }


     v1.hostToDevice<0,1,2>();


     // Now we reset the element 0, 1


     for (size_t i = 0 ; i < v1.size()-1 ; i++)

     {

         v1.template get<0>(i) = 0;

         v1.template get<1>(i) = 0;

         v1.template get<2>(i) = 0;

     }


     v1.hostToDevice<0,1,2>(v1.size()-1,v1.size()-1);


     v1.deviceToHost<0,1,2>();


     for (size_t i = 0 ; i < v1.size() ; i++)

     {

         BOOST_REQUIRE_EQUAL(v1.template get<0>(i),i);

         BOOST_REQUIRE_EQUAL(v1.template get<1>(i),i+300);

         BOOST_REQUIRE_EQUAL(v1.template get<2>(i),i+6123.0);

     }

 }


 BOOST_AUTO_TEST_CASE( vector_cuda_host_to_device_check_NUMA )

 {

     openfpm::vector_gpu<aggregate<int,int,double>> v1;


     v1.resize(3);


     for (size_t i = 0 ; i < v1.size() ; i++)

     {

         v1.template get<0>(i) = i;

         v1.template get<1>(i) = i+300;

         v1.template get<2>(i) = i+6123.0;

     }


     v1.hostToDeviceNUMA<0,1,2>();


     // Now we reset the element 0, 1


     for (size_t i = 0 ; i < v1.size()-1 ; i++)

     {

         v1.template get<0>(i) = 0;

         v1.template get<1>(i) = 0;

         v1.template get<2>(i) = 0;

     }


     v1.hostToDeviceNUMA<0,1,2>(v1.size()-1,v1.size()-1);


     v1.deviceToHost<0,1,2>();


     for (size_t i = 0 ; i < v1.size() ; i++)

     {

         BOOST_REQUIRE_EQUAL(v1.template get<0>(i),i);

         BOOST_REQUIRE_EQUAL(v1.template get<1>(i),i+300);

         BOOST_REQUIRE_EQUAL(v1.template get<2>(i),i+6123.0);

     }

 }


 BOOST_AUTO_TEST_CASE( vector_cuda_host_to_device_vector_and_point_tensor )

 {

     openfpm::vector_gpu<aggregate<float[3],float[3][3]>> v1;


     v1.resize(100);


     for (size_t i = 0 ; i < 50 ; i++)

     {

         v1.template get<0>(i)[0] = i+1500;

         v1.template get<0>(i)[1] = i+2200;

         v1.template get<0>(i)[2] = i+2600;


         v1.template get<1>(i)[0][0] = i+6000;

         v1.template get<1>(i)[0][1] = i+7200;

         v1.template get<1>(i)[0][2] = i+8600;

         v1.template get<1>(i)[1][0] = i+9000;

         v1.template get<1>(i)[1][1] = i+10200;

         v1.template get<1>(i)[1][2] = i+11600;

         v1.template get<1>(i)[2][0] = i+12800;

         v1.template get<1>(i)[2][1] = i+22200;

         v1.template get<1>(i)[2][2] = i+23600;

     }


     v1.hostToDevice<0,1>(0,50);


     for (size_t i = 50 ; i < 100 ; i++)

     {

         v1.template get<0>(i)[0] = i+1500;

         v1.template get<0>(i)[1] = i+2200;

         v1.template get<0>(i)[2] = i+2600;


         v1.template get<1>(i)[0][0] = i+6000;

         v1.template get<1>(i)[0][1] = i+7200;

         v1.template get<1>(i)[0][2] = i+8600;

         v1.template get<1>(i)[1][0] = i+9000;

         v1.template get<1>(i)[1][1] = i+10200;

         v1.template get<1>(i)[1][2] = i+11600;

         v1.template get<1>(i)[2][0] = i+12800;

         v1.template get<1>(i)[2][1] = i+22200;

         v1.template get<1>(i)[2][2] = i+23600;

     }


     v1.hostToDevice<0,1>(50,99);


     v1.deviceToHost<0,1>();


     for (size_t i = 0 ; i < 100 ; i++)

     {

         BOOST_REQUIRE_EQUAL(v1.template get<0>(i)[0],i+1500);

         BOOST_REQUIRE_EQUAL(v1.template get<0>(i)[1],i+2200);

         BOOST_REQUIRE_EQUAL(v1.template get<0>(i)[2],i+2600);


         BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[0][0],i+6000);

         BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[0][1],i+7200);

         BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[0][2],i+8600);

         BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[1][0],i+9000);

         BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[1][1],i+10200);

         BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[1][2],i+11600);

         BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[2][0],i+12800);

         BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[2][1],i+22200);

         BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[2][2],i+23600);

     }

 }


 BOOST_AUTO_TEST_CASE( vector_cuda_copy )

 {

     openfpm::vector_gpu<aggregate<float,float[3],float[3][3]>> v1;

     openfpm::vector_gpu<aggregate<float,float[3],float[3][3]>> v2;


     v1.resize(100);


     auto ite = v1.getIterator();


     while (ite.isNext())

     {

         auto p = ite.get();


         v1.template get<0>(p) = p + 100;


         v1.template get<0>(p) = p + 2000;

         v1.template get<0>(p) = p + 3000;

         v1.template get<0>(p) = p + 4000;


         v1.template get<1>(p)[0] = p + 5000;

         v1.template get<1>(p)[1] = p + 6000;

         v1.template get<1>(p)[2] = p + 7000;


         v1.template get<2>(p)[0][0] = p + 8000;

         v1.template get<2>(p)[0][1] = p + 9000;

         v1.template get<2>(p)[0][2] = p + 10000;


         v1.template get<2>(p)[1][0] = p + 11000;

         v1.template get<2>(p)[1][1] = p + 12000;

         v1.template get<2>(p)[2][2] = p + 13000;


         v1.template get<2>(p)[2][0] = p + 14000;

         v1.template get<2>(p)[2][1] = p + 15000;

         v1.template get<2>(p)[2][2] = p + 16000;


         ++ite;

     }


     v1.hostToDevice<0,1,2>();


     ite = v1.getIterator();


     while (ite.isNext())

     {

         auto p = ite.get();


         v1.template get<0>(p) = p + 6100;


         v1.template get<0>(p) = p + 62000;

         v1.template get<0>(p) = p + 63000;

         v1.template get<0>(p) = p + 64000;


         v1.template get<1>(p)[0] = p + 65000;

         v1.template get<1>(p)[1] = p + 66000;

         v1.template get<1>(p)[2] = p + 67000;


         v1.template get<2>(p)[0][0] = p + 68000;

         v1.template get<2>(p)[0][1] = p + 69000;

         v1.template get<2>(p)[0][2] = p + 610000;


         v1.template get<2>(p)[1][0] = p + 611000;

         v1.template get<2>(p)[1][1] = p + 612000;

         v1.template get<2>(p)[2][2] = p + 613000;


         v1.template get<2>(p)[2][0] = p + 614000;

         v1.template get<2>(p)[2][1] = p + 615000;

         v1.template get<2>(p)[2][2] = p + 616000;


         ++ite;

     }


     v2 = v1;


     // first check the CPU


     bool match = true;


     ite = v2.getIterator();


     while (ite.isNext())

     {

         auto p = ite.get();


         match = v2.template get<0>(p) == p + 6100;


         match = v2.template get<0>(p) == p + 62000;

         match = v2.template get<0>(p) == p + 63000;

         match = v2.template get<0>(p) == p + 64000;


         match = v2.template get<1>(p)[0] == p + 65000;

         match = v2.template get<1>(p)[1] == p + 66000;

         match = v2.template get<1>(p)[2] == p + 67000;


         match = v2.template get<2>(p)[0][0] == p + 68000;

         match = v2.template get<2>(p)[0][1] == p + 69000;

         match = v2.template get<2>(p)[0][2] == p + 610000;


         match = v2.template get<2>(p)[1][0] == p + 611000;

         match = v2.template get<2>(p)[1][1] == p + 612000;

         match = v2.template get<2>(p)[2][2] == p + 613000;


         match = v2.template get<2>(p)[2][0] == p + 614000;

         match = v2.template get<2>(p)[2][1] == p + 615000;

         match = v2.template get<2>(p)[2][2] == p + 616000;


         ++ite;

     }


     BOOST_REQUIRE_EQUAL(match,true);


     v2.deviceToHost<0,1,2>();


     ite = v2.getIterator();


     while (ite.isNext())

     {

         auto p = ite.get();


         match = v2.template get<0>(p) == p + 100;


         match = v2.template get<0>(p) == p + 2000;

         match = v2.template get<0>(p) == p + 3000;

         match = v2.template get<0>(p) == p + 4000;


         match = v2.template get<1>(p)[0] == p + 5000;

         match = v2.template get<1>(p)[1] == p + 6000;

         match = v2.template get<1>(p)[2] == p + 7000;


         match = v2.template get<2>(p)[0][0] == p + 8000;

         match = v2.template get<2>(p)[0][1] == p + 9000;

         match = v2.template get<2>(p)[0][2] == p + 10000;


         match = v2.template get<2>(p)[1][0] == p + 11000;

         match = v2.template get<2>(p)[1][1] == p + 12000;

         match = v2.template get<2>(p)[2][2] == p + 13000;


         match = v2.template get<2>(p)[2][0] == p + 14000;

         match = v2.template get<2>(p)[2][1] == p + 15000;

         match = v2.template get<2>(p)[2][2] == p + 16000;


         if (match == false)

         {

             std::cout << v2.template get<0>(p) << std::endl;

         }


         ++ite;

     }


     BOOST_REQUIRE_EQUAL(match,true);

 }


 BOOST_AUTO_TEST_SUITE_END()


CudaMemory
Definition: CudaMemory.cuh:59

HeapMemory
This class allocate, and destroy CPU memory.
Definition: HeapMemory.hpp:40

openfpm::grow_policy_double
Grow policy define how the vector should grow every time we exceed the size.
Definition: map_vector_grow_p.hpp:47

openfpm::vector
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:204

openfpm::vector::size
size_t size()
Stub size.
Definition: map_vector.hpp:212

aggregate
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:221

memory_traits_inte
Transform the boost::fusion::vector into memory specification (memory_traits)
Definition: memory_conf.hpp:84

toKernel_transform
Definition: tokernel_transformation.hpp:203