OpenFPM  5.2.0
Project that contain the implementation of distributed structures
map_vector_cuda_funcs_tests.cu
1 /*
2  * map_vector_cuda_funcs_tests.cu
3  *
4  * Created on: Aug 17, 2018
5  * Author: i-bird
6  */
7 
8 #include "util/cuda_util.hpp"
9 #include "config.h"
10 #define BOOST_TEST_DYN_LINK
11 #include <boost/test/unit_test.hpp>
12 
13 #include "Vector/map_vector.hpp"
14 #include "util/tokernel_transformation.hpp"
15 
16 BOOST_AUTO_TEST_SUITE( vector_cuda_funcs_tests )
17 
18 
19 BOOST_AUTO_TEST_CASE( vector_cuda_funcs_add_prp_device )
20 {
23 
24  vg_data.resize(100);
25  vg_data2.resize(100);
26 
27  // we fill vg_data with something
28 
29  for (size_t i = 0 ; i < 100 ; i++)
30  {
31  vg_data.template get<0>(i) = 2.5 + i;
32 
33  vg_data.template get<1>(i)[0] = 4.6 + i;
34  vg_data.template get<1>(i)[1] = 7.8 + i;
35  vg_data.template get<1>(i)[2] = 9.0 + i;
36 
37  vg_data2.template get<0>(i) = 8.5 + i;
38 
39  vg_data2.template get<1>(i)[0] = 1.6 + i;
40  vg_data2.template get<1>(i)[1] = 3.8 + i;
41  vg_data2.template get<1>(i)[2] = 5.1 + i;
42  }
43 
44  vg_data.hostToDevice<0,1>();
45  vg_data2.hostToDevice<0,1>();
46 
47  vg_data.add_prp_device<aggregate<float,float[3],float[3][3]>,
48  CudaMemory,
50  OPENFPM_NATIVE,
52  0,1>(vg_data2);
53 
54  vg_data.deviceToHost<0,1>();
55 
56  BOOST_REQUIRE_EQUAL(vg_data.size(),200);
57 
58  bool match = true;
59  for (unsigned int i = 100 ; i < 200 ; i++)
60  {
61  match &= vg_data.template get<0>(i) == vg_data2.template get<0>(i-100);
62 
63  match &= vg_data.template get<1>(i)[0] == vg_data2.template get<1>(i-100)[0];
64  match &= vg_data.template get<1>(i)[1] == vg_data2.template get<1>(i-100)[1];
65  match &= vg_data.template get<1>(i)[2] == vg_data2.template get<1>(i-100)[2];
66  }
67 
68  BOOST_REQUIRE_EQUAL(match,true);
69 }
70 
71 BOOST_AUTO_TEST_CASE( vector_cuda_to_kernel_recursive2 )
72 {
77 
82 
83  bool test = std::is_same<tker1,openfpm::vector_gpu_ker<aggregate<int, long>, memory_traits_inte>>::value;
84 
85  BOOST_REQUIRE_EQUAL(test,true);
86 
87  test = std::is_same<tker2,openfpm::vector_gpu_ker<aggregate<int, openfpm::vector_gpu_ker<aggregate<long>, memory_traits_inte> >, memory_traits_inte>>::value;
88 
89  BOOST_REQUIRE_EQUAL(test,true);
90 
91  test = std::is_same<tker3,openfpm::vector_gpu_ker<aggregate<int, openfpm::vector_gpu_ker<aggregate<Box<2,float>>, memory_traits_inte> >, memory_traits_inte>>::value;
92 
93  BOOST_REQUIRE_EQUAL(test,true);
94 
95  test = std::is_same<tker4,openfpm::vector_gpu_ker<Box<3,float>,memory_traits_inte>>::value;
96 
97  BOOST_REQUIRE_EQUAL(test,true);
98 }
99 
100 template<typename vv_rc,typename vector_output_type>
101 __global__ void kernel_recursive_check(vv_rc vvrc, vector_output_type vot)
102 {
103  int k = 0;
104  for (int i = 0 ; i < vvrc.size() ; i++)
105  {
106  for (int j = 0 ; j < vvrc.template get<1>(i).size() ; j++)
107  {
108  vot.template get<0>(k) = vvrc.template get<1>(i).template get<0>(j);
109  k++;
110  }
111  }
112 }
113 
114 BOOST_AUTO_TEST_CASE( vector_cuda_to_kernel_recursive2_test_toKernel )
115 {
118 
119  test2_type tt2;
120  test3_type tt3;
121 
122  tt2.add_no_device();
123  tt2.add_no_device();
124  tt2.add_no_device();
125 
126 /* tt3.add();
127  tt3.add();
128  tt3.add();*/
129 
130  tt2.template get<0>(0) = 80;
131  tt2.template get<1>(0).add();
132  tt2.template get<1>(0).template get<0>(0) = 500;
133  tt2.template get<0>(0) = 180;
134  tt2.template get<1>(0).add();
135  tt2.template get<1>(0).template get<0>(1) = 600;
136  tt2.template get<0>(0) = 280;;
137  tt2.template get<1>(0).add();
138  tt2.template get<1>(0).template get<0>(2) = 700;
139  tt2.template get<1>(0).template hostToDevice<0>();
140 
141  tt2.template get<0>(1) = 10080;
142  tt2.template get<1>(1).add();
143  tt2.template get<1>(1).template get<0>(0) = 1500;
144  tt2.template get<0>(1) = 20080;
145  tt2.template get<1>(1).add();
146  tt2.template get<1>(1).template get<0>(1) = 1600;
147  tt2.template get<0>(1) = 30080;
148  tt2.template get<1>(1).add();
149  tt2.template get<1>(1).template get<0>(2) = 1700;
150  tt2.template get<1>(1).template hostToDevice<0>();
151 
152  tt2.template get<0>(2) = 40080;
153  tt2.template get<1>(2).add();
154  tt2.template get<1>(2).template get<0>(0) = 2500;
155  tt2.template get<0>(2) = 50080;
156  tt2.template get<1>(2).add();
157  tt2.template get<1>(2).template get<0>(1) = 2600;
158  tt2.template get<0>(2) = 60080;
159  tt2.template get<1>(2).add();
160  tt2.template get<1>(2).template get<0>(2) = 2700;
161  tt2.template get<1>(2).template hostToDevice<0>();
162 
163  tt2.template hostToDevice<1>();
165  vg.resize(9);
166 
167  CUDA_LAUNCH_DIM3(kernel_recursive_check,1,1,tt2.toKernel(),vg.toKernel());
168 
169  vg.template deviceToHost<0>();
170 
171  BOOST_REQUIRE_EQUAL(vg.template get<0>(0),500);
172  BOOST_REQUIRE_EQUAL(vg.template get<0>(1),600);
173  BOOST_REQUIRE_EQUAL(vg.template get<0>(2),700);
174  BOOST_REQUIRE_EQUAL(vg.template get<0>(3),1500);
175  BOOST_REQUIRE_EQUAL(vg.template get<0>(4),1600);
176  BOOST_REQUIRE_EQUAL(vg.template get<0>(5),1700);
177  BOOST_REQUIRE_EQUAL(vg.template get<0>(6),2500);
178  BOOST_REQUIRE_EQUAL(vg.template get<0>(7),2600);
179  BOOST_REQUIRE_EQUAL(vg.template get<0>(8),2700);
180 }
181 
182 BOOST_AUTO_TEST_CASE( vector_cuda_to_cpu_operator_equal )
183 {
188 
189  v2.resize(3000);
190 
191  for (size_t i = 0 ; i < 3000 ; i++)
192  {
193  v2.template get<0>(i) = i;
194  v2.template get<1>(i) = i+300;
195  v2.template get<2>(i) = i+6123.0;
196  }
197 
198  v1 = v2;
199  v3 = v2;
200  v4 = v1;
201 
202  for (size_t i = 0 ; i < v2.size() ; i++)
203  {
204  BOOST_REQUIRE_EQUAL(v2.template get<0>(i),v1.template get<0>(i));
205  BOOST_REQUIRE_EQUAL(v2.template get<0>(i),v3.template get<0>(i));
206  BOOST_REQUIRE_EQUAL(v2.template get<0>(i),v4.template get<0>(i));
207 
208  BOOST_REQUIRE_EQUAL(v2.template get<1>(i),v1.template get<1>(i));
209  BOOST_REQUIRE_EQUAL(v2.template get<1>(i),v3.template get<1>(i));
210  BOOST_REQUIRE_EQUAL(v2.template get<1>(i),v4.template get<1>(i));
211 
212  BOOST_REQUIRE_EQUAL(v2.template get<2>(i),v1.template get<2>(i));
213  BOOST_REQUIRE_EQUAL(v2.template get<2>(i),v3.template get<2>(i));
214  BOOST_REQUIRE_EQUAL(v2.template get<2>(i),v4.template get<2>(i));
215  }
216 }
217 
218 
219 BOOST_AUTO_TEST_CASE( vector_cuda_host_to_device_check )
220 {
222 
223  v1.resize(3);
224 
225  for (size_t i = 0 ; i < v1.size() ; i++)
226  {
227  v1.template get<0>(i) = i;
228  v1.template get<1>(i) = i+300;
229  v1.template get<2>(i) = i+6123.0;
230  }
231 
232  v1.hostToDevice<0,1,2>();
233 
234  // Now we reset the element 0, 1
235 
236  for (size_t i = 0 ; i < v1.size()-1 ; i++)
237  {
238  v1.template get<0>(i) = 0;
239  v1.template get<1>(i) = 0;
240  v1.template get<2>(i) = 0;
241  }
242 
243  v1.hostToDevice<0,1,2>(v1.size()-1,v1.size()-1);
244 
245  v1.deviceToHost<0,1,2>();
246 
247  for (size_t i = 0 ; i < v1.size() ; i++)
248  {
249  BOOST_REQUIRE_EQUAL(v1.template get<0>(i),i);
250  BOOST_REQUIRE_EQUAL(v1.template get<1>(i),i+300);
251  BOOST_REQUIRE_EQUAL(v1.template get<2>(i),i+6123.0);
252  }
253 }
254 
255 BOOST_AUTO_TEST_CASE( vector_cuda_host_to_device_check_NUMA )
256 {
258 
259  v1.resize(3);
260 
261  for (size_t i = 0 ; i < v1.size() ; i++)
262  {
263  v1.template get<0>(i) = i;
264  v1.template get<1>(i) = i+300;
265  v1.template get<2>(i) = i+6123.0;
266  }
267 
268  v1.hostToDeviceNUMA<0,1,2>();
269 
270  // Now we reset the element 0, 1
271 
272  for (size_t i = 0 ; i < v1.size()-1 ; i++)
273  {
274  v1.template get<0>(i) = 0;
275  v1.template get<1>(i) = 0;
276  v1.template get<2>(i) = 0;
277  }
278 
279  v1.hostToDeviceNUMA<0,1,2>(v1.size()-1,v1.size()-1);
280 
281  v1.deviceToHost<0,1,2>();
282 
283  for (size_t i = 0 ; i < v1.size() ; i++)
284  {
285  BOOST_REQUIRE_EQUAL(v1.template get<0>(i),i);
286  BOOST_REQUIRE_EQUAL(v1.template get<1>(i),i+300);
287  BOOST_REQUIRE_EQUAL(v1.template get<2>(i),i+6123.0);
288  }
289 }
290 
291 BOOST_AUTO_TEST_CASE( vector_cuda_host_to_device_vector_and_point_tensor )
292 {
294 
295  v1.resize(100);
296 
297  for (size_t i = 0 ; i < 50 ; i++)
298  {
299  v1.template get<0>(i)[0] = i+1500;
300  v1.template get<0>(i)[1] = i+2200;
301  v1.template get<0>(i)[2] = i+2600;
302 
303  v1.template get<1>(i)[0][0] = i+6000;
304  v1.template get<1>(i)[0][1] = i+7200;
305  v1.template get<1>(i)[0][2] = i+8600;
306  v1.template get<1>(i)[1][0] = i+9000;
307  v1.template get<1>(i)[1][1] = i+10200;
308  v1.template get<1>(i)[1][2] = i+11600;
309  v1.template get<1>(i)[2][0] = i+12800;
310  v1.template get<1>(i)[2][1] = i+22200;
311  v1.template get<1>(i)[2][2] = i+23600;
312  }
313 
314  v1.hostToDevice<0,1>(0,50);
315 
316  for (size_t i = 50 ; i < 100 ; i++)
317  {
318  v1.template get<0>(i)[0] = i+1500;
319  v1.template get<0>(i)[1] = i+2200;
320  v1.template get<0>(i)[2] = i+2600;
321 
322  v1.template get<1>(i)[0][0] = i+6000;
323  v1.template get<1>(i)[0][1] = i+7200;
324  v1.template get<1>(i)[0][2] = i+8600;
325  v1.template get<1>(i)[1][0] = i+9000;
326  v1.template get<1>(i)[1][1] = i+10200;
327  v1.template get<1>(i)[1][2] = i+11600;
328  v1.template get<1>(i)[2][0] = i+12800;
329  v1.template get<1>(i)[2][1] = i+22200;
330  v1.template get<1>(i)[2][2] = i+23600;
331  }
332 
333  v1.hostToDevice<0,1>(50,99);
334 
335  v1.deviceToHost<0,1>();
336 
337  for (size_t i = 0 ; i < 100 ; i++)
338  {
339  BOOST_REQUIRE_EQUAL(v1.template get<0>(i)[0],i+1500);
340  BOOST_REQUIRE_EQUAL(v1.template get<0>(i)[1],i+2200);
341  BOOST_REQUIRE_EQUAL(v1.template get<0>(i)[2],i+2600);
342 
343  BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[0][0],i+6000);
344  BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[0][1],i+7200);
345  BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[0][2],i+8600);
346  BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[1][0],i+9000);
347  BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[1][1],i+10200);
348  BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[1][2],i+11600);
349  BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[2][0],i+12800);
350  BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[2][1],i+22200);
351  BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[2][2],i+23600);
352  }
353 }
354 
355 BOOST_AUTO_TEST_CASE( vector_cuda_copy )
356 {
359 
360  v1.resize(100);
361 
362  auto ite = v1.getIterator();
363 
364  while (ite.isNext())
365  {
366  auto p = ite.get();
367 
368  v1.template get<0>(p) = p + 100;
369 
370  v1.template get<0>(p) = p + 2000;
371  v1.template get<0>(p) = p + 3000;
372  v1.template get<0>(p) = p + 4000;
373 
374  v1.template get<1>(p)[0] = p + 5000;
375  v1.template get<1>(p)[1] = p + 6000;
376  v1.template get<1>(p)[2] = p + 7000;
377 
378  v1.template get<2>(p)[0][0] = p + 8000;
379  v1.template get<2>(p)[0][1] = p + 9000;
380  v1.template get<2>(p)[0][2] = p + 10000;
381 
382  v1.template get<2>(p)[1][0] = p + 11000;
383  v1.template get<2>(p)[1][1] = p + 12000;
384  v1.template get<2>(p)[2][2] = p + 13000;
385 
386  v1.template get<2>(p)[2][0] = p + 14000;
387  v1.template get<2>(p)[2][1] = p + 15000;
388  v1.template get<2>(p)[2][2] = p + 16000;
389 
390  ++ite;
391  }
392 
393  v1.hostToDevice<0,1,2>();
394 
395  ite = v1.getIterator();
396 
397  while (ite.isNext())
398  {
399  auto p = ite.get();
400 
401  v1.template get<0>(p) = p + 6100;
402 
403  v1.template get<0>(p) = p + 62000;
404  v1.template get<0>(p) = p + 63000;
405  v1.template get<0>(p) = p + 64000;
406 
407  v1.template get<1>(p)[0] = p + 65000;
408  v1.template get<1>(p)[1] = p + 66000;
409  v1.template get<1>(p)[2] = p + 67000;
410 
411  v1.template get<2>(p)[0][0] = p + 68000;
412  v1.template get<2>(p)[0][1] = p + 69000;
413  v1.template get<2>(p)[0][2] = p + 610000;
414 
415  v1.template get<2>(p)[1][0] = p + 611000;
416  v1.template get<2>(p)[1][1] = p + 612000;
417  v1.template get<2>(p)[2][2] = p + 613000;
418 
419  v1.template get<2>(p)[2][0] = p + 614000;
420  v1.template get<2>(p)[2][1] = p + 615000;
421  v1.template get<2>(p)[2][2] = p + 616000;
422 
423  ++ite;
424  }
425 
426  v2 = v1;
427 
428  // first check the CPU
429 
430  bool match = true;
431 
432  ite = v2.getIterator();
433 
434  while (ite.isNext())
435  {
436  auto p = ite.get();
437 
438  match = v2.template get<0>(p) == p + 6100;
439 
440  match = v2.template get<0>(p) == p + 62000;
441  match = v2.template get<0>(p) == p + 63000;
442  match = v2.template get<0>(p) == p + 64000;
443 
444  match = v2.template get<1>(p)[0] == p + 65000;
445  match = v2.template get<1>(p)[1] == p + 66000;
446  match = v2.template get<1>(p)[2] == p + 67000;
447 
448  match = v2.template get<2>(p)[0][0] == p + 68000;
449  match = v2.template get<2>(p)[0][1] == p + 69000;
450  match = v2.template get<2>(p)[0][2] == p + 610000;
451 
452  match = v2.template get<2>(p)[1][0] == p + 611000;
453  match = v2.template get<2>(p)[1][1] == p + 612000;
454  match = v2.template get<2>(p)[2][2] == p + 613000;
455 
456  match = v2.template get<2>(p)[2][0] == p + 614000;
457  match = v2.template get<2>(p)[2][1] == p + 615000;
458  match = v2.template get<2>(p)[2][2] == p + 616000;
459 
460  ++ite;
461  }
462 
463  BOOST_REQUIRE_EQUAL(match,true);
464 
465  v2.deviceToHost<0,1,2>();
466 
467  ite = v2.getIterator();
468 
469  while (ite.isNext())
470  {
471  auto p = ite.get();
472 
473  match = v2.template get<0>(p) == p + 100;
474 
475  match = v2.template get<0>(p) == p + 2000;
476  match = v2.template get<0>(p) == p + 3000;
477  match = v2.template get<0>(p) == p + 4000;
478 
479  match = v2.template get<1>(p)[0] == p + 5000;
480  match = v2.template get<1>(p)[1] == p + 6000;
481  match = v2.template get<1>(p)[2] == p + 7000;
482 
483  match = v2.template get<2>(p)[0][0] == p + 8000;
484  match = v2.template get<2>(p)[0][1] == p + 9000;
485  match = v2.template get<2>(p)[0][2] == p + 10000;
486 
487  match = v2.template get<2>(p)[1][0] == p + 11000;
488  match = v2.template get<2>(p)[1][1] == p + 12000;
489  match = v2.template get<2>(p)[2][2] == p + 13000;
490 
491  match = v2.template get<2>(p)[2][0] == p + 14000;
492  match = v2.template get<2>(p)[2][1] == p + 15000;
493  match = v2.template get<2>(p)[2][2] == p + 16000;
494 
495  if (match == false)
496  {
497  std::cout << v2.template get<0>(p) << std::endl;
498  }
499 
500  ++ite;
501  }
502 
503  BOOST_REQUIRE_EQUAL(match,true);
504 }
505 
506 BOOST_AUTO_TEST_SUITE_END()
507 
This class allocate, and destroy CPU memory.
Definition: HeapMemory.hpp:40
Grow policy define how the vector should grow every time we exceed the size.
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:204
size_t size()
Stub size.
Definition: map_vector.hpp:212
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:221
Transform the boost::fusion::vector into memory specification (memory_traits)
Definition: memory_conf.hpp:84