9 #define BOOST_TEST_DYN_LINK
10 #include <boost/test/unit_test.hpp>
11 #include "Grid/map_grid.hpp"
12 #include "Point_test.hpp"
13 #include "Grid/grid_util_test.hpp"
14 #include "cuda_grid_unit_tests_func.cuh"
15 #include "util/cuda_util.hpp"
16 #include "Grid/grid_test_utils.hpp"
18 BOOST_AUTO_TEST_SUITE( grid_gpu_func_test )
21 BOOST_AUTO_TEST_CASE (gpu_computation_func)
25 size_t sz[3] = {64,64,64};
33 auto gcf = c3.getGPUIterator(k1,k2);
37 BOOST_REQUIRE_EQUAL(gcf.thr.x,8ul);
38 BOOST_REQUIRE_EQUAL(gcf.thr.y,8ul);
39 BOOST_REQUIRE_EQUAL(gcf.thr.z,4ul);
41 BOOST_REQUIRE_EQUAL(gcf.wthr.x,8ul);
42 BOOST_REQUIRE_EQUAL(gcf.wthr.y,8ul);
43 BOOST_REQUIRE_EQUAL(gcf.wthr.z,16ul);
47 if (default_kernel_wg_threads_ == 1024 ) {
48 BOOST_REQUIRE_EQUAL(gcf.thr.x,16ul);
49 BOOST_REQUIRE_EQUAL(gcf.thr.y,8ul);
50 BOOST_REQUIRE_EQUAL(gcf.thr.z,8ul);
52 BOOST_REQUIRE_EQUAL(gcf.wthr.x,4ul);
53 BOOST_REQUIRE_EQUAL(gcf.wthr.y,8ul);
54 BOOST_REQUIRE_EQUAL(gcf.wthr.z,8ul);
63 auto gcf2 = c3.getGPUIterator(k3,k4);
67 BOOST_REQUIRE_EQUAL(gcf2.thr.x,8ul);
68 BOOST_REQUIRE_EQUAL(gcf2.thr.y,8ul);
69 BOOST_REQUIRE_EQUAL(gcf2.thr.z,4ul);
71 BOOST_REQUIRE_EQUAL(gcf2.wthr.x,2ul);
72 BOOST_REQUIRE_EQUAL(gcf2.wthr.y,2ul);
73 BOOST_REQUIRE_EQUAL(gcf2.wthr.z,4ul);
77 if (default_kernel_wg_threads_ == 1024 ) {
78 BOOST_REQUIRE_EQUAL(gcf2.thr.x,13ul);
79 BOOST_REQUIRE_EQUAL(gcf2.thr.y,8ul);
80 BOOST_REQUIRE_EQUAL(gcf2.thr.z,8ul);
82 BOOST_REQUIRE_EQUAL(gcf2.wthr.x,1ul);
83 BOOST_REQUIRE_EQUAL(gcf2.wthr.y,2ul);
84 BOOST_REQUIRE_EQUAL(gcf2.wthr.z,2ul);
89 gcf2 = c3.getGPUIterator(k3,k4,511);
91 BOOST_REQUIRE_EQUAL(gcf2.thr.x,8ul);
92 BOOST_REQUIRE_EQUAL(gcf2.thr.y,8ul);
93 BOOST_REQUIRE_EQUAL(gcf2.thr.z,4ul);
95 BOOST_REQUIRE_EQUAL(gcf2.wthr.x,2ul);
96 BOOST_REQUIRE_EQUAL(gcf2.wthr.y,2ul);
97 BOOST_REQUIRE_EQUAL(gcf2.wthr.z,4ul);
99 gcf2 = c3.getGPUIterator(k3,k4,1);
101 BOOST_REQUIRE_EQUAL(gcf2.thr.x,1ul);
102 BOOST_REQUIRE_EQUAL(gcf2.thr.y,1ul);
103 BOOST_REQUIRE_EQUAL(gcf2.thr.z,1ul);
105 BOOST_REQUIRE_EQUAL(gcf2.wthr.x,13ul);
106 BOOST_REQUIRE_EQUAL(gcf2.wthr.y,13ul);
107 BOOST_REQUIRE_EQUAL(gcf2.wthr.z,13ul);
109 gcf2 = c3.getGPUIterator(k3,k5,32);
111 BOOST_REQUIRE_EQUAL(gcf2.thr.x,4ul);
112 BOOST_REQUIRE_EQUAL(gcf2.thr.y,4ul);
113 BOOST_REQUIRE_EQUAL(gcf2.thr.z,2ul);
115 BOOST_REQUIRE_EQUAL(gcf2.wthr.x,3ul);
116 BOOST_REQUIRE_EQUAL(gcf2.wthr.y,3ul);
117 BOOST_REQUIRE_EQUAL(gcf2.wthr.z,7ul);
119 gcf2 = c3.getGPUIterator(k3,k5,1);
121 BOOST_REQUIRE_EQUAL(gcf2.thr.x,1ul);
122 BOOST_REQUIRE_EQUAL(gcf2.thr.y,1ul);
123 BOOST_REQUIRE_EQUAL(gcf2.thr.z,1ul);
125 BOOST_REQUIRE_EQUAL(gcf2.wthr.x,11ul);
126 BOOST_REQUIRE_EQUAL(gcf2.wthr.y,12ul);
127 BOOST_REQUIRE_EQUAL(gcf2.wthr.z,13ul);
133 BOOST_AUTO_TEST_CASE (gpu_computation)
138 size_t sz[3] = {64,64,64};
142 test_layout_gridNd<3>(c3,sz[0]);
144 gpu_grid_3D_compute(c3);
146 c3.deviceToHost<0>();
148 auto it = c3.getIterator();
155 good &= c3.getGrid().LinId(key) == c3.template get<0>(key);
160 BOOST_REQUIRE_EQUAL(good,
true);
167 BOOST_AUTO_TEST_CASE (gpu_computation_lambda)
172 size_t sz[3] = {64,64,64};
179 auto c3_k = c3.toKernel();
181 auto lamb = [c3_k] __device__ (dim3 & blockIdx, dim3 & threadIdx)
184 blockIdx.y * blockDim.y + threadIdx.y,
185 blockIdx.z * blockDim.z + threadIdx.z});
187 c3_k.template get<0>(p) = 5.0;
189 c3_k.template get<1>(p)[0] = 5.0;
190 c3_k.template get<1>(p)[1] = 5.0;
192 c3_k.template get<2>(p)[0][0] = 5.0;
193 c3_k.template get<2>(p)[0][1] = 5.0;
194 c3_k.template get<2>(p)[1][0] = 5.0;
195 c3_k.template get<2>(p)[1][1] = 5.0;
198 auto ite = c3.getGPUIterator({0,0,0},{63,63,63});
200 CUDA_LAUNCH_LAMBDA(ite,lamb);
202 c3.deviceToHost<0,1,2>();
204 auto it = c3.getIterator();
211 good &= c3.template get<0>(key) == 5.0;
213 good &= c3.template get<1>(key)[0] == 5.0;
214 good &= c3.template get<1>(key)[1] == 5.0;
216 good &= c3.template get<2>(key)[0][0] == 5.0;
217 good &= c3.template get<2>(key)[0][1] == 5.0;
218 good &= c3.template get<2>(key)[1][0] == 5.0;
219 good &= c3.template get<2>(key)[1][1] == 5.0;
224 BOOST_REQUIRE_EQUAL(good,
true);
231 BOOST_AUTO_TEST_CASE (gpu_computation_stencil)
236 size_t sz[3] = {64,64,64};
245 test_layout_gridNd<3>(c3,sz[0]);
246 test_layout_gridNd<3>(c2,sz[0]);
251 c2.deviceToHost<0>();
254 auto it = c2.getIterator();
261 good &= c2.get<0>(key) == 1.0;
266 BOOST_REQUIRE_EQUAL(good,
true);
269 gpu_grid_3D_compute(c3);
270 c3.deviceToHost<0>();
273 auto it = c3.getIterator();
280 good &= c3.getGrid().LinId(key) == c3.get<0>(key);
285 BOOST_REQUIRE_EQUAL(good,
true);
288 gpu_grid_3D_compute_stencil(c3,c2,key1,key2);
290 c2.deviceToHost<0>();
292 auto it = c2.getIterator(key1,key2);
299 good &= c2.get<0>(key) == 0;
304 BOOST_REQUIRE_EQUAL(good,
true);
311 BOOST_AUTO_TEST_CASE (gpu_computation_grid_stencil)
316 size_t sz[3] = {64,64,64};
327 test_layout_gridNd<3>(c3,sz[0]);
328 test_layout_gridNd<3>(c2,sz[0]);
333 c2.deviceToHost<0>();
336 auto it = c2.getIterator();
343 good &= c2.get<0>(key) == 1.0;
348 BOOST_REQUIRE_EQUAL(good,
true);
351 gpu_grid_3D_compute(c3);
352 c3.deviceToHost<0>();
355 auto it = c3.getIterator();
362 good &= c3.getGrid().LinId(key) == c3.get<0>(key);
367 BOOST_REQUIRE_EQUAL(good,
true);
370 gpu_grid_3D_compute_grid_stencil(c3,c2,key1,key2);
372 c2.deviceToHost<0>();
374 auto it = c2.getIterator(key1,key2);
380 good &= c2.get<0>(key) == 0;
385 BOOST_REQUIRE_EQUAL(good,
true);
389 gpu_grid_fill_vector(c3,zero,keyl);
396 BOOST_AUTO_TEST_CASE (gpu_computation_grid_stencil_vector)
401 size_t sz[3] = {64,64,64};
413 gpu_grid_fill_vector(c3,zero,keyl);
416 c3.deviceToHost<4>();
419 auto it = c3.getIterator(key1,key2);
426 good &= c3.get<4>(key)[0] == 1.0;
427 good &= c3.get<4>(key)[1] == 2.0;
428 good &= c3.get<4>(key)[2] == 3.0;
433 BOOST_REQUIRE_EQUAL(good,
true);
438 gpu_grid_3D_compute(c3);
439 gpu_grid_gradient_vector(c3,c2,key1,key2);
442 c2.deviceToHost<4>();
445 auto it = c2.getIterator(key1,key2);
452 good &= c2.get<4>(key)[0] == 1.0;
453 good &= c2.get<4>(key)[1] == 64.0;
454 good &= c2.get<4>(key)[2] == 4096.0;
459 BOOST_REQUIRE_EQUAL(good,
true);
467 BOOST_AUTO_TEST_CASE (gpu_swap_vector)
472 size_t sz[3] = {64,64,64};
484 gpu_grid_fill_vector(c2,zero,keyl);
485 gpu_grid_fill_vector2(c3,zero,keyl);
487 auto it4 = c3.getIterator(zero,keyl);
492 auto key = it4.get();
494 c2.get<4>(key)[0] = 1.0;
495 c2.get<4>(key)[1] = 2.0;
496 c2.get<4>(key)[2] = 3.0;
498 c3.get<4>(key)[0] = 1001.0;
499 c3.get<4>(key)[1] = 1002.0;
500 c3.get<4>(key)[2] = 1003.0;
511 auto it = c3.getIterator(zero,keyl);
518 good &= c3.get<4>(key)[0] == 1.0;
519 good &= c3.get<4>(key)[1] == 2.0;
520 good &= c3.get<4>(key)[2] == 3.0;
522 good &= c2.get<4>(key)[0] == 1001.0;
523 good &= c2.get<4>(key)[1] == 1002.0;
524 good &= c2.get<4>(key)[2] == 1003.0;
526 if (good ==
false) {
break;}
530 c3.get<4>(key)[0] = 0.0;
531 c3.get<4>(key)[1] = 0.0;
532 c3.get<4>(key)[2] = 0.0;
534 c2.get<4>(key)[0] = 0.0;
535 c2.get<4>(key)[1] = 0.0;
536 c2.get<4>(key)[2] = 0.0;
541 BOOST_REQUIRE_EQUAL(good,
true);
543 c2.template deviceToHost<4>();
544 c3.template deviceToHost<4>();
546 auto it2 = c3.getIterator(zero,keyl);
551 auto key = it2.get();
553 good &= c3.get<4>(key)[0] == 1.0;
554 good &= c3.get<4>(key)[1] == 2.0;
555 good &= c3.get<4>(key)[2] == 3.0;
557 good &= c2.get<4>(key)[0] == 1001.0;
558 good &= c2.get<4>(key)[1] == 1002.0;
559 good &= c2.get<4>(key)[2] == 1003.0;
561 if (good ==
false) {
break;}
566 BOOST_REQUIRE_EQUAL(good,
true);
575 template<
unsigned int dim>
576 void gpu_copy_device_test()
580 for (
size_t i = 0 ; i < dim ; i++)
588 auto it4 = c3.getIterator();
591 auto key = it4.get();
593 c3.template get<0>(key) = g.LinId(key);
595 c3.template get<4>(key)[0] = g.LinId(key) + 2000;
596 c3.template get<4>(key)[1] = g.LinId(key) + 6000;
597 c3.template get<4>(key)[2] = g.LinId(key) + 56000;
602 c3.template hostToDevice<0>();
606 for (
size_t i = 0 ; i < dim ; i++)
611 auto it = c3.getIterator();
618 bool to_check =
true;
619 for (
size_t j = 0 ; j < dim ; j++)
621 if (key.get(j) >= (
unsigned int)sz[j])
625 if (to_check ==
true)
627 match &= c3.template get<0>(key) == g.LinId(key);
629 match &= c3.template get<4>(key)[0] == g.LinId(key) + 2000;
630 match &= c3.template get<4>(key)[1] == g.LinId(key) + 6000;
631 match &= c3.template get<4>(key)[2] == g.LinId(key) + 56000;
637 BOOST_REQUIRE_EQUAL(match,
true);
641 auto it2 = c3.getIterator();
646 auto key = it2.get();
648 c3.template get<0>(key) = 0;
655 c3.template deviceToHost<0>();
657 auto it3 = c3.getIterator();
662 auto key = it3.get();
664 bool to_check =
true;
665 for (
size_t j = 0 ; j < dim ; j++)
667 if (key.get(j) >= (
unsigned int)sz[j])
671 if (to_check ==
true)
673 match = c3.template get<0>(key) == g.LinId(key);
675 match &= c3.template get<4>(key)[0] == g.LinId(key) + 2000;
676 match &= c3.template get<4>(key)[1] == g.LinId(key) + 6000;
677 match &= c3.template get<4>(key)[2] == g.LinId(key) + 56000;
683 BOOST_REQUIRE_EQUAL(match,
true);
686 BOOST_AUTO_TEST_CASE (gpu_copy_device)
688 gpu_copy_device_test<4>();
689 gpu_copy_device_test<3>();
690 gpu_copy_device_test<2>();
691 gpu_copy_device_test<1>();
694 template<
typename gr
id_type>
697 int p = blockIdx.x * blockDim.x + threadIdx.x;
703 gt1.template get<1>(k)[2] = 6.0;
707 template<
typename gr
id_type>
712 gt1.template get<2>(k)[2][2] = 6.0;
715 BOOST_AUTO_TEST_CASE (gpu_grid_test_se_class1)
717 #if defined(SE_CLASS1) && !defined(__clang__)
719 size_t sz[2] = {5,5};
727 int dev_mem[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
738 CUDA_LAUNCH_DIM3_DEBUG_SE1(test_se1_crash_gt2,wthr,thr,c3.toKernel(),c2.toKernel());
739 cudaDeviceSynchronize();
741 cudaMemcpyFromSymbol(dev_mem,global_cuda_error_array,
sizeof(dev_mem));
743 BOOST_REQUIRE_EQUAL(dev_mem[0],1);
744 BOOST_REQUIRE_EQUAL(*(
size_t *)(&dev_mem[1]),(
size_t)(c3.toKernel().template getPointer<1>()));
745 BOOST_REQUIRE_EQUAL(dev_mem[3],1);
746 BOOST_REQUIRE_EQUAL(dev_mem[4],2);
747 BOOST_REQUIRE_EQUAL(dev_mem[5],10000);
748 BOOST_REQUIRE_EQUAL(dev_mem[6],12345);
750 BOOST_REQUIRE_EQUAL(dev_mem[7],17);
751 BOOST_REQUIRE_EQUAL(dev_mem[8],0);
752 BOOST_REQUIRE_EQUAL(dev_mem[9],0);
754 BOOST_REQUIRE_EQUAL(dev_mem[10],16);
755 BOOST_REQUIRE_EQUAL(dev_mem[11],1);
756 BOOST_REQUIRE_EQUAL(dev_mem[12],1);
758 BOOST_REQUIRE_EQUAL(dev_mem[13],7);
759 BOOST_REQUIRE_EQUAL(dev_mem[14],0);
760 BOOST_REQUIRE_EQUAL(dev_mem[15],0);
762 int dev_mem2[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
774 CUDA_LAUNCH_DIM3_DEBUG_SE1(test_se1_crash_gt3,wthr,thr,c2.toKernel(),c3.toKernel());
775 cudaDeviceSynchronize();
778 cudaMemcpyFromSymbol(dev_mem2,global_cuda_error_array,
sizeof(dev_mem2));
780 BOOST_REQUIRE_EQUAL(dev_mem2[0],1);
781 BOOST_REQUIRE_EQUAL(*(
size_t *)(&dev_mem2[1]),(
size_t)(c2.toKernel().template getPointer<2>()));
782 BOOST_REQUIRE_EQUAL(dev_mem2[3],2);
783 BOOST_REQUIRE_EQUAL(dev_mem2[4],2);
785 std::cout <<
"######### Testing error message #########" << std::endl;
795 CUDA_LAUNCH(test_se1_crash_gt2,gr,c3.toKernel(),c2.toKernel());
796 std::cout <<
"######### End Testing error message #########" << std::endl;
801 BOOST_AUTO_TEST_CASE(grid_test_copy_to_gpu_2d)
803 size_t sz_dst[] = {5,5};
804 size_t sz_src[] = {3,2};
811 copy_test(g_src,g_dst,box_src,box_dst);
814 BOOST_AUTO_TEST_CASE(grid_test_copy_to_gpu_3d)
816 size_t sz_dst[] = {5,5,5};
817 size_t sz_src[] = {3,2,2};
824 copy_test(g_src,g_dst,box_src,box_dst);
828 BOOST_AUTO_TEST_SUITE_END()
This class represent an N-dimensional box.
This is a distributed grid.
grid_key_dx is the key to access any element in the grid