9#define BOOST_TEST_DYN_LINK
10#include <boost/test/unit_test.hpp>
11#include "Grid/map_grid.hpp"
12#include "Point_test.hpp"
13#include "Grid/grid_util_test.hpp"
14#include "cuda_grid_unit_tests_func.cuh"
15#include "util/cuda_launch.hpp"
16#include "Grid/grid_test_utils.hpp"
18BOOST_AUTO_TEST_SUITE( grid_gpu_func_test )
21BOOST_AUTO_TEST_CASE (gpu_computation_func)
25 size_t sz[3] = {64,64,64};
33 auto gcf = c3.getGPUIterator(k1,k2);
37 BOOST_REQUIRE_EQUAL(gcf.thr.x,8ul);
38 BOOST_REQUIRE_EQUAL(gcf.thr.y,8ul);
39 BOOST_REQUIRE_EQUAL(gcf.thr.z,4ul);
41 BOOST_REQUIRE_EQUAL(gcf.wthr.x,8ul);
42 BOOST_REQUIRE_EQUAL(gcf.wthr.y,8ul);
43 BOOST_REQUIRE_EQUAL(gcf.wthr.z,16ul);
47 BOOST_REQUIRE_EQUAL(gcf.thr.x,16ul);
48 BOOST_REQUIRE_EQUAL(gcf.thr.y,8ul);
49 BOOST_REQUIRE_EQUAL(gcf.thr.z,8ul);
51 BOOST_REQUIRE_EQUAL(gcf.wthr.x,4ul);
52 BOOST_REQUIRE_EQUAL(gcf.wthr.y,8ul);
53 BOOST_REQUIRE_EQUAL(gcf.wthr.z,8ul);
61 auto gcf2 = c3.getGPUIterator(k3,k4);
65 BOOST_REQUIRE_EQUAL(gcf2.thr.x,8ul);
66 BOOST_REQUIRE_EQUAL(gcf2.thr.y,8ul);
67 BOOST_REQUIRE_EQUAL(gcf2.thr.z,4ul);
69 BOOST_REQUIRE_EQUAL(gcf2.wthr.x,2ul);
70 BOOST_REQUIRE_EQUAL(gcf2.wthr.y,2ul);
71 BOOST_REQUIRE_EQUAL(gcf2.wthr.z,4ul);
75 BOOST_REQUIRE_EQUAL(gcf2.thr.x,13ul);
76 BOOST_REQUIRE_EQUAL(gcf2.thr.y,8ul);
77 BOOST_REQUIRE_EQUAL(gcf2.thr.z,8ul);
79 BOOST_REQUIRE_EQUAL(gcf2.wthr.x,1ul);
80 BOOST_REQUIRE_EQUAL(gcf2.wthr.y,2ul);
81 BOOST_REQUIRE_EQUAL(gcf2.wthr.z,2ul);
85 gcf2 = c3.getGPUIterator(k3,k4,511);
87 BOOST_REQUIRE_EQUAL(gcf2.thr.x,8ul);
88 BOOST_REQUIRE_EQUAL(gcf2.thr.y,8ul);
89 BOOST_REQUIRE_EQUAL(gcf2.thr.z,4ul);
91 BOOST_REQUIRE_EQUAL(gcf2.wthr.x,2ul);
92 BOOST_REQUIRE_EQUAL(gcf2.wthr.y,2ul);
93 BOOST_REQUIRE_EQUAL(gcf2.wthr.z,4ul);
95 gcf2 = c3.getGPUIterator(k3,k4,1);
97 BOOST_REQUIRE_EQUAL(gcf2.thr.x,1ul);
98 BOOST_REQUIRE_EQUAL(gcf2.thr.y,1ul);
99 BOOST_REQUIRE_EQUAL(gcf2.thr.z,1ul);
101 BOOST_REQUIRE_EQUAL(gcf2.wthr.x,13ul);
102 BOOST_REQUIRE_EQUAL(gcf2.wthr.y,13ul);
103 BOOST_REQUIRE_EQUAL(gcf2.wthr.z,13ul);
105 gcf2 = c3.getGPUIterator(k3,k5,32);
107 BOOST_REQUIRE_EQUAL(gcf2.thr.x,4ul);
108 BOOST_REQUIRE_EQUAL(gcf2.thr.y,4ul);
109 BOOST_REQUIRE_EQUAL(gcf2.thr.z,2ul);
111 BOOST_REQUIRE_EQUAL(gcf2.wthr.x,3ul);
112 BOOST_REQUIRE_EQUAL(gcf2.wthr.y,3ul);
113 BOOST_REQUIRE_EQUAL(gcf2.wthr.z,7ul);
115 gcf2 = c3.getGPUIterator(k3,k5,1);
117 BOOST_REQUIRE_EQUAL(gcf2.thr.x,1ul);
118 BOOST_REQUIRE_EQUAL(gcf2.thr.y,1ul);
119 BOOST_REQUIRE_EQUAL(gcf2.thr.z,1ul);
121 BOOST_REQUIRE_EQUAL(gcf2.wthr.x,11ul);
122 BOOST_REQUIRE_EQUAL(gcf2.wthr.y,12ul);
123 BOOST_REQUIRE_EQUAL(gcf2.wthr.z,13ul);
129BOOST_AUTO_TEST_CASE (gpu_computation)
134 size_t sz[3] = {64,64,64};
138 test_layout_gridNd<3>(c3,sz[0]);
140 gpu_grid_3D_compute(c3);
142 c3.deviceToHost<0>();
144 auto it = c3.getIterator();
151 good &= c3.getGrid().LinId(key) == c3.template get<0>(key);
156 BOOST_REQUIRE_EQUAL(good,
true);
163BOOST_AUTO_TEST_CASE (gpu_computation_lambda)
168 size_t sz[3] = {64,64,64};
175 auto c3_k = c3.toKernel();
177 auto lamb = [c3_k] __device__ (dim3 & blockIdx, dim3 & threadIdx)
180 blockIdx.y * blockDim.y + threadIdx.y,
181 blockIdx.z * blockDim.z + threadIdx.z});
183 c3_k.template get<0>(p) = 5.0;
185 c3_k.template get<1>(p)[0] = 5.0;
186 c3_k.template get<1>(p)[1] = 5.0;
188 c3_k.template get<2>(p)[0][0] = 5.0;
189 c3_k.template get<2>(p)[0][1] = 5.0;
190 c3_k.template get<2>(p)[1][0] = 5.0;
191 c3_k.template get<2>(p)[1][1] = 5.0;
194 auto ite = c3.getGPUIterator({0,0,0},{63,63,63});
196 CUDA_LAUNCH_LAMBDA(ite,lamb);
198 c3.deviceToHost<0,1,2>();
200 auto it = c3.getIterator();
207 good &= c3.template get<0>(key) == 5.0;
209 good &= c3.template get<1>(key)[0] == 5.0;
210 good &= c3.template get<1>(key)[1] == 5.0;
212 good &= c3.template get<2>(key)[0][0] == 5.0;
213 good &= c3.template get<2>(key)[0][1] == 5.0;
214 good &= c3.template get<2>(key)[1][0] == 5.0;
215 good &= c3.template get<2>(key)[1][1] == 5.0;
220 BOOST_REQUIRE_EQUAL(good,
true);
227BOOST_AUTO_TEST_CASE (gpu_computation_stencil)
232 size_t sz[3] = {64,64,64};
241 test_layout_gridNd<3>(c3,sz[0]);
242 test_layout_gridNd<3>(c2,sz[0]);
247 c2.deviceToHost<0>();
250 auto it = c2.getIterator();
257 good &= c2.get<0>(key) == 1.0;
262 BOOST_REQUIRE_EQUAL(good,
true);
265 gpu_grid_3D_compute(c3);
266 c3.deviceToHost<0>();
269 auto it = c3.getIterator();
276 good &= c3.getGrid().LinId(key) == c3.get<0>(key);
281 BOOST_REQUIRE_EQUAL(good,
true);
284 gpu_grid_3D_compute_stencil(c3,c2,key1,key2);
286 c2.deviceToHost<0>();
288 auto it = c2.getIterator(key1,key2);
295 good &= c2.get<0>(key) == 0;
300 BOOST_REQUIRE_EQUAL(good,
true);
307BOOST_AUTO_TEST_CASE (gpu_computation_grid_stencil)
312 size_t sz[3] = {64,64,64};
323 test_layout_gridNd<3>(c3,sz[0]);
324 test_layout_gridNd<3>(c2,sz[0]);
329 c2.deviceToHost<0>();
332 auto it = c2.getIterator();
339 good &= c2.get<0>(key) == 1.0;
344 BOOST_REQUIRE_EQUAL(good,
true);
347 gpu_grid_3D_compute(c3);
348 c3.deviceToHost<0>();
351 auto it = c3.getIterator();
358 good &= c3.getGrid().LinId(key) == c3.get<0>(key);
363 BOOST_REQUIRE_EQUAL(good,
true);
366 gpu_grid_3D_compute_grid_stencil(c3,c2,key1,key2);
368 c2.deviceToHost<0>();
370 auto it = c2.getIterator(key1,key2);
376 good &= c2.get<0>(key) == 0;
381 BOOST_REQUIRE_EQUAL(good,
true);
385 gpu_grid_fill_vector(c3,zero,keyl);
392BOOST_AUTO_TEST_CASE (gpu_computation_grid_stencil_vector)
397 size_t sz[3] = {64,64,64};
409 gpu_grid_fill_vector(c3,zero,keyl);
412 c3.deviceToHost<4>();
415 auto it = c3.getIterator(key1,key2);
422 good &= c3.get<4>(key)[0] == 1.0;
423 good &= c3.get<4>(key)[1] == 2.0;
424 good &= c3.get<4>(key)[2] == 3.0;
429 BOOST_REQUIRE_EQUAL(good,
true);
434 gpu_grid_3D_compute(c3);
435 gpu_grid_gradient_vector(c3,c2,key1,key2);
438 c2.deviceToHost<4>();
441 auto it = c2.getIterator(key1,key2);
448 good &= c2.get<4>(key)[0] == 1.0;
449 good &= c2.get<4>(key)[1] == 64.0;
450 good &= c2.get<4>(key)[2] == 4096.0;
455 BOOST_REQUIRE_EQUAL(good,
true);
463BOOST_AUTO_TEST_CASE (gpu_swap_vector)
468 size_t sz[3] = {64,64,64};
480 gpu_grid_fill_vector(c2,zero,keyl);
481 gpu_grid_fill_vector2(c3,zero,keyl);
483 auto it4 = c3.getIterator(zero,keyl);
488 auto key = it4.get();
490 c2.get<4>(key)[0] = 1.0;
491 c2.get<4>(key)[1] = 2.0;
492 c2.get<4>(key)[2] = 3.0;
494 c3.get<4>(key)[0] = 1001.0;
495 c3.get<4>(key)[1] = 1002.0;
496 c3.get<4>(key)[2] = 1003.0;
507 auto it = c3.getIterator(zero,keyl);
514 good &= c3.get<4>(key)[0] == 1.0;
515 good &= c3.get<4>(key)[1] == 2.0;
516 good &= c3.get<4>(key)[2] == 3.0;
518 good &= c2.get<4>(key)[0] == 1001.0;
519 good &= c2.get<4>(key)[1] == 1002.0;
520 good &= c2.get<4>(key)[2] == 1003.0;
522 if (good ==
false) {
break;}
526 c3.get<4>(key)[0] = 0.0;
527 c3.get<4>(key)[1] = 0.0;
528 c3.get<4>(key)[2] = 0.0;
530 c2.get<4>(key)[0] = 0.0;
531 c2.get<4>(key)[1] = 0.0;
532 c2.get<4>(key)[2] = 0.0;
537 BOOST_REQUIRE_EQUAL(good,
true);
539 c2.template deviceToHost<4>();
540 c3.template deviceToHost<4>();
542 auto it2 = c3.getIterator(zero,keyl);
547 auto key = it2.get();
549 good &= c3.get<4>(key)[0] == 1.0;
550 good &= c3.get<4>(key)[1] == 2.0;
551 good &= c3.get<4>(key)[2] == 3.0;
553 good &= c2.get<4>(key)[0] == 1001.0;
554 good &= c2.get<4>(key)[1] == 1002.0;
555 good &= c2.get<4>(key)[2] == 1003.0;
557 if (good ==
false) {
break;}
562 BOOST_REQUIRE_EQUAL(good,
true);
571template<
unsigned int dim>
572void gpu_copy_device_test()
576 for (
size_t i = 0 ; i < dim ; i++)
584 auto it4 = c3.getIterator();
587 auto key = it4.get();
589 c3.template get<0>(key) = g.LinId(key);
591 c3.template get<4>(key)[0] = g.LinId(key) + 2000;
592 c3.template get<4>(key)[1] = g.LinId(key) + 6000;
593 c3.template get<4>(key)[2] = g.LinId(key) + 56000;
598 c3.template hostToDevice<0>();
602 for (
size_t i = 0 ; i < dim ; i++)
607 auto it = c3.getIterator();
614 bool to_check =
true;
615 for (
size_t j = 0 ; j < dim ; j++)
617 if (key.get(j) >= (
unsigned int)sz[j])
621 if (to_check ==
true)
623 match &= c3.template get<0>(key) == g.LinId(key);
625 match &= c3.template get<4>(key)[0] == g.LinId(key) + 2000;
626 match &= c3.template get<4>(key)[1] == g.LinId(key) + 6000;
627 match &= c3.template get<4>(key)[2] == g.LinId(key) + 56000;
633 BOOST_REQUIRE_EQUAL(match,
true);
637 auto it2 = c3.getIterator();
642 auto key = it2.get();
644 c3.template get<0>(key) = 0;
651 c3.template deviceToHost<0>();
653 auto it3 = c3.getIterator();
658 auto key = it3.get();
660 bool to_check =
true;
661 for (
size_t j = 0 ; j < dim ; j++)
663 if (key.get(j) >= (
unsigned int)sz[j])
667 if (to_check ==
true)
669 match = c3.template get<0>(key) == g.LinId(key);
671 match &= c3.template get<4>(key)[0] == g.LinId(key) + 2000;
672 match &= c3.template get<4>(key)[1] == g.LinId(key) + 6000;
673 match &= c3.template get<4>(key)[2] == g.LinId(key) + 56000;
679 BOOST_REQUIRE_EQUAL(match,
true);
682BOOST_AUTO_TEST_CASE (gpu_copy_device)
684 gpu_copy_device_test<4>();
685 gpu_copy_device_test<3>();
686 gpu_copy_device_test<2>();
687 gpu_copy_device_test<1>();
690template<
typename gr
id_type>
693 int p = blockIdx.x * blockDim.x + threadIdx.x;
699 gt1.template get<1>(k)[2] = 6.0;
703template<
typename gr
id_type>
708 gt1.template get<2>(k)[2][2] = 6.0;
711BOOST_AUTO_TEST_CASE (gpu_grid_test_se_class1)
713#if defined(SE_CLASS1) && !defined(__clang__)
715 size_t sz[2] = {5,5};
723 int dev_mem[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
734 CUDA_LAUNCH_DIM3_DEBUG_SE1(test_se1_crash_gt2,wthr,thr,c3.toKernel(),c2.toKernel());
735 cudaDeviceSynchronize();
737 cudaMemcpyFromSymbol(dev_mem,global_cuda_error_array,
sizeof(dev_mem));
739 BOOST_REQUIRE_EQUAL(dev_mem[0],1);
740 BOOST_REQUIRE_EQUAL(*(
size_t *)(&dev_mem[1]),(
size_t)(c3.toKernel().template getPointer<1>()));
741 BOOST_REQUIRE_EQUAL(dev_mem[3],1);
742 BOOST_REQUIRE_EQUAL(dev_mem[4],2);
743 BOOST_REQUIRE_EQUAL(dev_mem[5],10000);
744 BOOST_REQUIRE_EQUAL(dev_mem[6],12345);
746 BOOST_REQUIRE_EQUAL(dev_mem[7],17);
747 BOOST_REQUIRE_EQUAL(dev_mem[8],0);
748 BOOST_REQUIRE_EQUAL(dev_mem[9],0);
750 BOOST_REQUIRE_EQUAL(dev_mem[10],16);
751 BOOST_REQUIRE_EQUAL(dev_mem[11],1);
752 BOOST_REQUIRE_EQUAL(dev_mem[12],1);
754 BOOST_REQUIRE_EQUAL(dev_mem[13],7);
755 BOOST_REQUIRE_EQUAL(dev_mem[14],0);
756 BOOST_REQUIRE_EQUAL(dev_mem[15],0);
758 int dev_mem2[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
770 CUDA_LAUNCH_DIM3_DEBUG_SE1(test_se1_crash_gt3,wthr,thr,c2.toKernel(),c3.toKernel());
771 cudaDeviceSynchronize();
774 cudaMemcpyFromSymbol(dev_mem2,global_cuda_error_array,
sizeof(dev_mem2));
776 BOOST_REQUIRE_EQUAL(dev_mem2[0],1);
777 BOOST_REQUIRE_EQUAL(*(
size_t *)(&dev_mem2[1]),(
size_t)(c2.toKernel().template getPointer<2>()));
778 BOOST_REQUIRE_EQUAL(dev_mem2[3],2);
779 BOOST_REQUIRE_EQUAL(dev_mem2[4],2);
781 std::cout <<
"######### Testing error message #########" << std::endl;
791 CUDA_LAUNCH(test_se1_crash_gt2,gr,c3.toKernel(),c2.toKernel());
792 std::cout <<
"######### End Testing error message #########" << std::endl;
797BOOST_AUTO_TEST_CASE(grid_test_copy_to_gpu_2d)
799 size_t sz_dst[] = {5,5};
800 size_t sz_src[] = {3,2};
807 copy_test(g_src,g_dst,box_src,box_dst);
810BOOST_AUTO_TEST_CASE(grid_test_copy_to_gpu_3d)
812 size_t sz_dst[] = {5,5,5};
813 size_t sz_src[] = {3,2,2};
820 copy_test(g_src,g_dst,box_src,box_dst);
824BOOST_AUTO_TEST_SUITE_END()
This class represent an N-dimensional box.
This is a distributed grid.
grid_key_dx is the key to access any element in the grid