OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
map_vector_cuda_funcs_tests.cu
1/*
2 * map_vector_cuda_funcs_tests.cu
3 *
4 * Created on: Aug 17, 2018
5 * Author: i-bird
6 */
7
8
9#define BOOST_GPU_ENABLED __host__ __device__
10
11#include "util/cuda_launch.hpp"
12
13#include "config.h"
14#define BOOST_TEST_DYN_LINK
15#include <boost/test/unit_test.hpp>
16
17#include "util/cuda_util.hpp"
18#include "Vector/map_vector.hpp"
19#include "util/tokernel_transformation.hpp"
20
21BOOST_AUTO_TEST_SUITE( vector_cuda_funcs_tests )
22
23
24BOOST_AUTO_TEST_CASE( vector_cuda_funcs_add_prp_device )
25{
28
29 vg_data.resize(100);
30 vg_data2.resize(100);
31
32 // we fill vg_data with something
33
34 for (size_t i = 0 ; i < 100 ; i++)
35 {
36 vg_data.template get<0>(i) = 2.5 + i;
37
38 vg_data.template get<1>(i)[0] = 4.6 + i;
39 vg_data.template get<1>(i)[1] = 7.8 + i;
40 vg_data.template get<1>(i)[2] = 9.0 + i;
41
42 vg_data2.template get<0>(i) = 8.5 + i;
43
44 vg_data2.template get<1>(i)[0] = 1.6 + i;
45 vg_data2.template get<1>(i)[1] = 3.8 + i;
46 vg_data2.template get<1>(i)[2] = 5.1 + i;
47 }
48
49 vg_data.hostToDevice<0,1>();
50 vg_data2.hostToDevice<0,1>();
51
52 vg_data.add_prp_device<aggregate<float,float[3],float[3][3]>,
55 OPENFPM_NATIVE,
57 0,1>(vg_data2);
58
59 vg_data.deviceToHost<0,1>();
60
61 BOOST_REQUIRE_EQUAL(vg_data.size(),200);
62
63 bool match = true;
64 for (unsigned int i = 100 ; i < 200 ; i++)
65 {
66 match &= vg_data.template get<0>(i) == vg_data2.template get<0>(i-100);
67
68 match &= vg_data.template get<1>(i)[0] == vg_data2.template get<1>(i-100)[0];
69 match &= vg_data.template get<1>(i)[1] == vg_data2.template get<1>(i-100)[1];
70 match &= vg_data.template get<1>(i)[2] == vg_data2.template get<1>(i-100)[2];
71 }
72
73 BOOST_REQUIRE_EQUAL(match,true);
74}
75
76BOOST_AUTO_TEST_CASE( vector_cuda_to_kernel_recursive2 )
77{
82
87
88 bool test = std::is_same<tker1,openfpm::vector_gpu_ker<aggregate<int, long>, memory_traits_inte>>::value;
89
90 BOOST_REQUIRE_EQUAL(test,true);
91
92 test = std::is_same<tker2,openfpm::vector_gpu_ker<aggregate<int, openfpm::vector_gpu_ker<aggregate<long>, memory_traits_inte> >, memory_traits_inte>>::value;
93
94 BOOST_REQUIRE_EQUAL(test,true);
95
96 test = std::is_same<tker3,openfpm::vector_gpu_ker<aggregate<int, openfpm::vector_gpu_ker<aggregate<Box<2,float>>, memory_traits_inte> >, memory_traits_inte>>::value;
97
98 BOOST_REQUIRE_EQUAL(test,true);
99
100 test = std::is_same<tker4,openfpm::vector_gpu_ker<Box<3,float>,memory_traits_inte>>::value;
101
102 BOOST_REQUIRE_EQUAL(test,true);
103}
104
105template<typename vv_rc,typename vector_output_type>
106__global__ void kernel_recursive_check(vv_rc vvrc, vector_output_type vot)
107{
108 int k = 0;
109 for (int i = 0 ; i < vvrc.size() ; i++)
110 {
111 for (int j = 0 ; j < vvrc.template get<1>(i).size() ; j++)
112 {
113 vot.template get<0>(k) = vvrc.template get<1>(i).template get<0>(j);
114 k++;
115 }
116 }
117}
118
119BOOST_AUTO_TEST_CASE( vector_cuda_to_kernel_recursive2_test_toKernel )
120{
123
124 test2_type tt2;
125 test3_type tt3;
126
127 tt2.add_no_device();
128 tt2.add_no_device();
129 tt2.add_no_device();
130
131/* tt3.add();
132 tt3.add();
133 tt3.add();*/
134
135 tt2.template get<0>(0) = 80;
136 tt2.template get<1>(0).add();
137 tt2.template get<1>(0).template get<0>(0) = 500;
138 tt2.template get<0>(0) = 180;
139 tt2.template get<1>(0).add();
140 tt2.template get<1>(0).template get<0>(1) = 600;
141 tt2.template get<0>(0) = 280;;
142 tt2.template get<1>(0).add();
143 tt2.template get<1>(0).template get<0>(2) = 700;
144 tt2.template get<1>(0).template hostToDevice<0>();
145
146 tt2.template get<0>(1) = 10080;
147 tt2.template get<1>(1).add();
148 tt2.template get<1>(1).template get<0>(0) = 1500;
149 tt2.template get<0>(1) = 20080;
150 tt2.template get<1>(1).add();
151 tt2.template get<1>(1).template get<0>(1) = 1600;
152 tt2.template get<0>(1) = 30080;
153 tt2.template get<1>(1).add();
154 tt2.template get<1>(1).template get<0>(2) = 1700;
155 tt2.template get<1>(1).template hostToDevice<0>();
156
157 tt2.template get<0>(2) = 40080;
158 tt2.template get<1>(2).add();
159 tt2.template get<1>(2).template get<0>(0) = 2500;
160 tt2.template get<0>(2) = 50080;
161 tt2.template get<1>(2).add();
162 tt2.template get<1>(2).template get<0>(1) = 2600;
163 tt2.template get<0>(2) = 60080;
164 tt2.template get<1>(2).add();
165 tt2.template get<1>(2).template get<0>(2) = 2700;
166 tt2.template get<1>(2).template hostToDevice<0>();
167
168 tt2.template hostToDevice<1>();
170 vg.resize(9);
171
172 CUDA_LAUNCH_DIM3(kernel_recursive_check,1,1,tt2.toKernel(),vg.toKernel());
173
174 vg.template deviceToHost<0>();
175
176 BOOST_REQUIRE_EQUAL(vg.template get<0>(0),500);
177 BOOST_REQUIRE_EQUAL(vg.template get<0>(1),600);
178 BOOST_REQUIRE_EQUAL(vg.template get<0>(2),700);
179 BOOST_REQUIRE_EQUAL(vg.template get<0>(3),1500);
180 BOOST_REQUIRE_EQUAL(vg.template get<0>(4),1600);
181 BOOST_REQUIRE_EQUAL(vg.template get<0>(5),1700);
182 BOOST_REQUIRE_EQUAL(vg.template get<0>(6),2500);
183 BOOST_REQUIRE_EQUAL(vg.template get<0>(7),2600);
184 BOOST_REQUIRE_EQUAL(vg.template get<0>(8),2700);
185}
186
187BOOST_AUTO_TEST_CASE( vector_cuda_to_cpu_operator_equal )
188{
193
194 v2.resize(3000);
195
196 for (size_t i = 0 ; i < 3000 ; i++)
197 {
198 v2.template get<0>(i) = i;
199 v2.template get<1>(i) = i+300;
200 v2.template get<2>(i) = i+6123.0;
201 }
202
203 v1 = v2;
204 v3 = v2;
205 v4 = v1;
206
207 for (size_t i = 0 ; i < v2.size() ; i++)
208 {
209 BOOST_REQUIRE_EQUAL(v2.template get<0>(i),v1.template get<0>(i));
210 BOOST_REQUIRE_EQUAL(v2.template get<0>(i),v3.template get<0>(i));
211 BOOST_REQUIRE_EQUAL(v2.template get<0>(i),v4.template get<0>(i));
212
213 BOOST_REQUIRE_EQUAL(v2.template get<1>(i),v1.template get<1>(i));
214 BOOST_REQUIRE_EQUAL(v2.template get<1>(i),v3.template get<1>(i));
215 BOOST_REQUIRE_EQUAL(v2.template get<1>(i),v4.template get<1>(i));
216
217 BOOST_REQUIRE_EQUAL(v2.template get<2>(i),v1.template get<2>(i));
218 BOOST_REQUIRE_EQUAL(v2.template get<2>(i),v3.template get<2>(i));
219 BOOST_REQUIRE_EQUAL(v2.template get<2>(i),v4.template get<2>(i));
220 }
221}
222
223
224BOOST_AUTO_TEST_CASE( vector_cuda_host_to_device_check )
225{
227
228 v1.resize(3);
229
230 for (size_t i = 0 ; i < v1.size() ; i++)
231 {
232 v1.template get<0>(i) = i;
233 v1.template get<1>(i) = i+300;
234 v1.template get<2>(i) = i+6123.0;
235 }
236
237 v1.hostToDevice<0,1,2>();
238
239 // Now we reset the element 0, 1
240
241 for (size_t i = 0 ; i < v1.size()-1 ; i++)
242 {
243 v1.template get<0>(i) = 0;
244 v1.template get<1>(i) = 0;
245 v1.template get<2>(i) = 0;
246 }
247
248 v1.hostToDevice<0,1,2>(v1.size()-1,v1.size()-1);
249
250 v1.deviceToHost<0,1,2>();
251
252 for (size_t i = 0 ; i < v1.size() ; i++)
253 {
254 BOOST_REQUIRE_EQUAL(v1.template get<0>(i),i);
255 BOOST_REQUIRE_EQUAL(v1.template get<1>(i),i+300);
256 BOOST_REQUIRE_EQUAL(v1.template get<2>(i),i+6123.0);
257 }
258}
259
260BOOST_AUTO_TEST_CASE( vector_cuda_host_to_device_check_NUMA )
261{
263
264 v1.resize(3);
265
266 for (size_t i = 0 ; i < v1.size() ; i++)
267 {
268 v1.template get<0>(i) = i;
269 v1.template get<1>(i) = i+300;
270 v1.template get<2>(i) = i+6123.0;
271 }
272
273 v1.hostToDeviceNUMA<0,1,2>();
274
275 // Now we reset the element 0, 1
276
277 for (size_t i = 0 ; i < v1.size()-1 ; i++)
278 {
279 v1.template get<0>(i) = 0;
280 v1.template get<1>(i) = 0;
281 v1.template get<2>(i) = 0;
282 }
283
284 v1.hostToDeviceNUMA<0,1,2>(v1.size()-1,v1.size()-1);
285
286 v1.deviceToHost<0,1,2>();
287
288 for (size_t i = 0 ; i < v1.size() ; i++)
289 {
290 BOOST_REQUIRE_EQUAL(v1.template get<0>(i),i);
291 BOOST_REQUIRE_EQUAL(v1.template get<1>(i),i+300);
292 BOOST_REQUIRE_EQUAL(v1.template get<2>(i),i+6123.0);
293 }
294}
295
296BOOST_AUTO_TEST_CASE( vector_cuda_host_to_device_vector_and_point_tensor )
297{
299
300 v1.resize(100);
301
302 for (size_t i = 0 ; i < 50 ; i++)
303 {
304 v1.template get<0>(i)[0] = i+1500;
305 v1.template get<0>(i)[1] = i+2200;
306 v1.template get<0>(i)[2] = i+2600;
307
308 v1.template get<1>(i)[0][0] = i+6000;
309 v1.template get<1>(i)[0][1] = i+7200;
310 v1.template get<1>(i)[0][2] = i+8600;
311 v1.template get<1>(i)[1][0] = i+9000;
312 v1.template get<1>(i)[1][1] = i+10200;
313 v1.template get<1>(i)[1][2] = i+11600;
314 v1.template get<1>(i)[2][0] = i+12800;
315 v1.template get<1>(i)[2][1] = i+22200;
316 v1.template get<1>(i)[2][2] = i+23600;
317 }
318
319 v1.hostToDevice<0,1>(0,50);
320
321 for (size_t i = 50 ; i < 100 ; i++)
322 {
323 v1.template get<0>(i)[0] = i+1500;
324 v1.template get<0>(i)[1] = i+2200;
325 v1.template get<0>(i)[2] = i+2600;
326
327 v1.template get<1>(i)[0][0] = i+6000;
328 v1.template get<1>(i)[0][1] = i+7200;
329 v1.template get<1>(i)[0][2] = i+8600;
330 v1.template get<1>(i)[1][0] = i+9000;
331 v1.template get<1>(i)[1][1] = i+10200;
332 v1.template get<1>(i)[1][2] = i+11600;
333 v1.template get<1>(i)[2][0] = i+12800;
334 v1.template get<1>(i)[2][1] = i+22200;
335 v1.template get<1>(i)[2][2] = i+23600;
336 }
337
338 v1.hostToDevice<0,1>(50,99);
339
340 v1.deviceToHost<0,1>();
341
342 for (size_t i = 0 ; i < 100 ; i++)
343 {
344 BOOST_REQUIRE_EQUAL(v1.template get<0>(i)[0],i+1500);
345 BOOST_REQUIRE_EQUAL(v1.template get<0>(i)[1],i+2200);
346 BOOST_REQUIRE_EQUAL(v1.template get<0>(i)[2],i+2600);
347
348 BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[0][0],i+6000);
349 BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[0][1],i+7200);
350 BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[0][2],i+8600);
351 BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[1][0],i+9000);
352 BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[1][1],i+10200);
353 BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[1][2],i+11600);
354 BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[2][0],i+12800);
355 BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[2][1],i+22200);
356 BOOST_REQUIRE_EQUAL(v1.template get<1>(i)[2][2],i+23600);
357 }
358}
359
360BOOST_AUTO_TEST_CASE( vector_cuda_copy )
361{
364
365 v1.resize(100);
366
367 auto ite = v1.getIterator();
368
369 while (ite.isNext())
370 {
371 auto p = ite.get();
372
373 v1.template get<0>(p) = p + 100;
374
375 v1.template get<0>(p) = p + 2000;
376 v1.template get<0>(p) = p + 3000;
377 v1.template get<0>(p) = p + 4000;
378
379 v1.template get<1>(p)[0] = p + 5000;
380 v1.template get<1>(p)[1] = p + 6000;
381 v1.template get<1>(p)[2] = p + 7000;
382
383 v1.template get<2>(p)[0][0] = p + 8000;
384 v1.template get<2>(p)[0][1] = p + 9000;
385 v1.template get<2>(p)[0][2] = p + 10000;
386
387 v1.template get<2>(p)[1][0] = p + 11000;
388 v1.template get<2>(p)[1][1] = p + 12000;
389 v1.template get<2>(p)[2][2] = p + 13000;
390
391 v1.template get<2>(p)[2][0] = p + 14000;
392 v1.template get<2>(p)[2][1] = p + 15000;
393 v1.template get<2>(p)[2][2] = p + 16000;
394
395 ++ite;
396 }
397
398 v1.hostToDevice<0,1,2>();
399
400 ite = v1.getIterator();
401
402 while (ite.isNext())
403 {
404 auto p = ite.get();
405
406 v1.template get<0>(p) = p + 6100;
407
408 v1.template get<0>(p) = p + 62000;
409 v1.template get<0>(p) = p + 63000;
410 v1.template get<0>(p) = p + 64000;
411
412 v1.template get<1>(p)[0] = p + 65000;
413 v1.template get<1>(p)[1] = p + 66000;
414 v1.template get<1>(p)[2] = p + 67000;
415
416 v1.template get<2>(p)[0][0] = p + 68000;
417 v1.template get<2>(p)[0][1] = p + 69000;
418 v1.template get<2>(p)[0][2] = p + 610000;
419
420 v1.template get<2>(p)[1][0] = p + 611000;
421 v1.template get<2>(p)[1][1] = p + 612000;
422 v1.template get<2>(p)[2][2] = p + 613000;
423
424 v1.template get<2>(p)[2][0] = p + 614000;
425 v1.template get<2>(p)[2][1] = p + 615000;
426 v1.template get<2>(p)[2][2] = p + 616000;
427
428 ++ite;
429 }
430
431 v2 = v1;
432
433 // first check the CPU
434
435 bool match = true;
436
437 ite = v2.getIterator();
438
439 while (ite.isNext())
440 {
441 auto p = ite.get();
442
443 match = v2.template get<0>(p) == p + 6100;
444
445 match = v2.template get<0>(p) == p + 62000;
446 match = v2.template get<0>(p) == p + 63000;
447 match = v2.template get<0>(p) == p + 64000;
448
449 match = v2.template get<1>(p)[0] == p + 65000;
450 match = v2.template get<1>(p)[1] == p + 66000;
451 match = v2.template get<1>(p)[2] == p + 67000;
452
453 match = v2.template get<2>(p)[0][0] == p + 68000;
454 match = v2.template get<2>(p)[0][1] == p + 69000;
455 match = v2.template get<2>(p)[0][2] == p + 610000;
456
457 match = v2.template get<2>(p)[1][0] == p + 611000;
458 match = v2.template get<2>(p)[1][1] == p + 612000;
459 match = v2.template get<2>(p)[2][2] == p + 613000;
460
461 match = v2.template get<2>(p)[2][0] == p + 614000;
462 match = v2.template get<2>(p)[2][1] == p + 615000;
463 match = v2.template get<2>(p)[2][2] == p + 616000;
464
465 ++ite;
466 }
467
468 BOOST_REQUIRE_EQUAL(match,true);
469
470 v2.deviceToHost<0,1,2>();
471
472 ite = v2.getIterator();
473
474 while (ite.isNext())
475 {
476 auto p = ite.get();
477
478 match = v2.template get<0>(p) == p + 100;
479
480 match = v2.template get<0>(p) == p + 2000;
481 match = v2.template get<0>(p) == p + 3000;
482 match = v2.template get<0>(p) == p + 4000;
483
484 match = v2.template get<1>(p)[0] == p + 5000;
485 match = v2.template get<1>(p)[1] == p + 6000;
486 match = v2.template get<1>(p)[2] == p + 7000;
487
488 match = v2.template get<2>(p)[0][0] == p + 8000;
489 match = v2.template get<2>(p)[0][1] == p + 9000;
490 match = v2.template get<2>(p)[0][2] == p + 10000;
491
492 match = v2.template get<2>(p)[1][0] == p + 11000;
493 match = v2.template get<2>(p)[1][1] == p + 12000;
494 match = v2.template get<2>(p)[2][2] == p + 13000;
495
496 match = v2.template get<2>(p)[2][0] == p + 14000;
497 match = v2.template get<2>(p)[2][1] == p + 15000;
498 match = v2.template get<2>(p)[2][2] == p + 16000;
499
500 if (match == false)
501 {
502 std::cout << v2.template get<0>(p) << std::endl;
503 }
504
505 ++ite;
506 }
507
508 BOOST_REQUIRE_EQUAL(match,true);
509}
510
511BOOST_AUTO_TEST_SUITE_END()
512
This class allocate, and destroy CPU memory.
Grow policy define how the vector should grow every time we exceed the size.
Implementation of 1-D std::vector like structure.
size_t size()
Stub size.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Transform the boost::fusion::vector into memory specification (memory_traits)