OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
decomposition_cuda_tests.cu
1 #define BOOST_TEST_DYN_LINK
2 #include <boost/test/unit_test.hpp>
3 
4 #include "VCluster/VCluster.hpp"
5 #include "Decomposition/CartDecomposition.hpp"
6 
7 #define SUB_UNIT_FACTOR 1024
8 
9 template<typename dec_type>
10 __global__ void test_proc_idbc(Point<3,double> p1 ,Point<3,double> p2 , dec_type dec, unsigned int * pr_id)
11 {
12  pr_id[0] = dec.processorIDBC(p1);
13  pr_id[1] = dec.processorIDBC(p2);
14 }
15 
16 template<typename dec_type>
17 __global__ void test_ghost_n(Point<3,double> p1 ,Point<3,double> p2 , dec_type dec, unsigned int * ng_id)
18 {
19  ng_id[0] = dec.ghost_processorID_N(p1);
20  ng_id[1] = dec.ghost_processorID_N(p2);
21 }
22 
23 template<typename dec_type, typename output_type>
24 __global__ void test_ghost(Point<3,double> p1 ,Point<3,double> p2 , dec_type dec, unsigned int * ng_id , output_type g_id)
25 {
26  for (unsigned int i = 0 ; i < ng_id[0] ; i++)
27  {
28  dec.ghost_processor_ID(p1,g_id,0,i);
29  }
30 
31  for (unsigned int i = 0 ; i < ng_id[1] ; i++)
32  {
33  dec.ghost_processor_ID(p2,g_id,ng_id[0],i);
34  }
35 }
36 
37 BOOST_AUTO_TEST_SUITE( decomposition_to_gpu_test )
38 
39 BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idbc_and_ghost2_gpu )
40 {
41  // Vcluster
42  Vcluster<> & vcl = create_vcluster();
43 
45 
46  size_t bc[3] = {PERIODIC,PERIODIC,PERIODIC};
47 
48  // Physical domain
49  Box<3, double> box( { -0.01, -0.01, 0.0 }, { 0.01, 0.01, 0.003 });
50 
51  Ghost<3,double> g(0.0015);
52 
53  dec.setGoodParameters(box, bc, g, 512);
54 
55  dec.decompose();
56 
57  // Now we check the point
58 
59  for (size_t j = 0 ; j < 3 ; j++ )
60  {
61  for (size_t i = 0 ; i < dec.getNSubDomain() ; i++)
62  {
63  Point<3,double> p1;
64  Point<3,double> p2;
65 
66  p1.get(0) = SpaceBox<3,double>(dec.getSubDomains().get(i)).getLow(0);
67  p1.get(1) = SpaceBox<3,double>(dec.getSubDomains().get(i)).getLow(1);
68  p1.get(2) = SpaceBox<3,double>(dec.getSubDomains().get(i)).getLow(2);
69 
70  p2 = p1;
71 
72 // p2.get(j) = std::nextafter(SpaceBox<3,double>(dec.getSubDomains().get(i)).getLow(j),-1.0);
73 
74  auto gpudec = dec.toKernel();
75 
76  CudaMemory mem;
77  mem.allocate(2*sizeof(unsigned int));
78 
79  CUDA_LAUNCH_DIM3((test_proc_idbc<decltype(gpudec)>),1,1,p1,p2,gpudec,(unsigned int *)mem.getDevicePointer());
80 
81  mem.deviceToHost();
82 
83  BOOST_REQUIRE(((unsigned int *)mem.getPointer())[0] < vcl.size());
84  BOOST_REQUIRE(((unsigned int *)mem.getPointer())[1] < vcl.size());
85 
86  CudaMemory mem2;
87  mem2.allocate(2*sizeof(unsigned int));
88  CUDA_LAUNCH_DIM3((test_ghost_n<decltype(gpudec)>),1,1,p1,p2,gpudec,(unsigned int *)mem2.getDevicePointer());
89 
90  mem2.deviceToHost();
91 
92  unsigned int tot = ((unsigned int *)mem2.getPointer())[0] + ((unsigned int *)mem2.getPointer())[1];
93 
95  vd.resize(tot);
96  CUDA_LAUNCH_DIM3((test_ghost<decltype(gpudec),decltype(vd.toKernel())>),1,1,p1,p2,gpudec,(unsigned int *)mem2.getDevicePointer(),vd.toKernel());
97 
98  if (((unsigned int *)mem.getPointer())[0] != ((unsigned int *)mem.getPointer())[1])
99  {
100  if (vcl.rank() == ((unsigned int *)mem.getPointer())[1] )
101  {
102  BOOST_REQUIRE(((unsigned int *)mem2.getPointer())[1] != 0);
103  BOOST_REQUIRE(((unsigned int *)mem2.getPointer())[0] == 0);
104  }
105 
106  if (vcl.rank() == ((unsigned int *)mem.getPointer())[0])
107  {
108  BOOST_REQUIRE(((unsigned int *)mem2.getPointer())[1] == 0 );
109  BOOST_REQUIRE(((unsigned int *)mem2.getPointer())[0] != 0 );
110  }
111  }
112 
113 
114  p1.get(0) = std::nextafter(SpaceBox<3,double>(dec.getSubDomains().get(i)).getHigh(0),SpaceBox<3,double>(dec.getSubDomains().get(i)).getLow(0));
115  p1.get(1) = std::nextafter(SpaceBox<3,double>(dec.getSubDomains().get(i)).getHigh(1),SpaceBox<3,double>(dec.getSubDomains().get(i)).getLow(1));
116  p1.get(2) = std::nextafter(SpaceBox<3,double>(dec.getSubDomains().get(i)).getHigh(2),SpaceBox<3,double>(dec.getSubDomains().get(i)).getLow(2));
117 
118  p2 = p1;
119 
120  p2.get(j) = std::nextafter(SpaceBox<3,double>(dec.getSubDomains().get(i)).getHigh(j),1.0);
121 
122  CUDA_LAUNCH_DIM3((test_proc_idbc<decltype(gpudec)>),1,1,p1,p2,gpudec,(unsigned int *)mem.getDevicePointer());
123 
124  mem.deviceToHost();
125 
126  BOOST_REQUIRE(((unsigned int *)mem.getPointer())[0] < vcl.size());
127  BOOST_REQUIRE(((unsigned int *)mem.getPointer())[1] < vcl.size());
128 
129  mem2.allocate(2*sizeof(unsigned int));
130  CUDA_LAUNCH_DIM3((test_ghost_n<decltype(gpudec)>),1,1,p1,p2,gpudec,(unsigned int *)mem2.getDevicePointer());
131 
132  mem2.deviceToHost();
133 
134  tot = ((unsigned int *)mem2.getPointer())[0] + ((unsigned int *)mem2.getPointer())[1];
135 
136  vd.resize(tot);
137  CUDA_LAUNCH_DIM3((test_ghost<decltype(gpudec),decltype(vd.toKernel())>),1,1,p1,p2,gpudec,(unsigned int *)mem2.getDevicePointer(),vd.toKernel());
138 
139  if (((unsigned int *)mem.getPointer())[0] != ((unsigned int *)mem.getPointer())[1])
140  {
141  if (vcl.rank() == ((unsigned int *)mem.getPointer())[1])
142  {
143  BOOST_REQUIRE(((unsigned int *)mem2.getPointer())[1] != 0);
144  BOOST_REQUIRE(((unsigned int *)mem2.getPointer())[0] == 0);
145  }
146 
147  if (vcl.rank() == ((unsigned int *)mem.getPointer())[0])
148  {
149  BOOST_REQUIRE(((unsigned int *)mem2.getPointer())[1] == 0 );
150  BOOST_REQUIRE(((unsigned int *)mem2.getPointer())[0] != 0 );
151  }
152  }
153 
154  }
155  }
156 }
157 
158 BOOST_AUTO_TEST_SUITE_END()
This class represent an N-dimensional box.
Definition: SpaceBox.hpp:26
virtual bool allocate(size_t sz)
allocate memory
Definition: CudaMemory.cu:38
virtual void * getPointer()
get a readable pointer with the data
Definition: CudaMemory.cu:352
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:27
Definition: Ghost.hpp:39
Implementation of VCluster class.
Definition: VCluster.hpp:58
virtual void * getDevicePointer()
get a readable pointer with the data
Definition: CudaMemory.cu:497
This class decompose a space into sub-sub-domains and distribute them across processors.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
size_t rank()
Get the process unit id.
This class represent an N-dimensional box.
Definition: Box.hpp:60
virtual void deviceToHost()
Move memory from device to host.
Definition: CudaMemory.cu:367
size_t size()
Get the total number of processors.
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202