OpenFPM  5.2.0
Project that contain the implementation of distributed structures
cuda_grid_gpu_tests.cu
1 /*
2  * cuda_gpu_func.cpp
3  *
4  * Created on: Jun 3, 2018
5  * Author: i-bird
6  */
7 
8 #include "config.h"
9 #define BOOST_TEST_DYN_LINK
10 #include <boost/test/unit_test.hpp>
11 #include "Grid/map_grid.hpp"
12 #include "Point_test.hpp"
13 #include "Grid/grid_util_test.hpp"
14 #include "cuda_grid_unit_tests_func.cuh"
15 #include "util/cuda_util.hpp"
16 #include "Grid/grid_test_utils.hpp"
17 
18 BOOST_AUTO_TEST_SUITE( grid_gpu_func_test )
19 
20 
21 BOOST_AUTO_TEST_CASE (gpu_computation_func)
22 {
23 #ifdef CUDA_GPU
24 
25  size_t sz[3] = {64,64,64};
27 
28  grid_key_dx<3> k1({1,1,1});
29  grid_key_dx<3> k2({62,62,62});
30 
31  c3.setMemory();
32 
33  auto gcf = c3.getGPUIterator(k1,k2);
34 
35 #ifdef __HIP__
36 
37  BOOST_REQUIRE_EQUAL(gcf.thr.x,8ul);
38  BOOST_REQUIRE_EQUAL(gcf.thr.y,8ul);
39  BOOST_REQUIRE_EQUAL(gcf.thr.z,4ul);
40 
41  BOOST_REQUIRE_EQUAL(gcf.wthr.x,8ul);
42  BOOST_REQUIRE_EQUAL(gcf.wthr.y,8ul);
43  BOOST_REQUIRE_EQUAL(gcf.wthr.z,16ul);
44 
45 #else
46 
47  if (default_kernel_wg_threads_ == 1024 ) {
48  BOOST_REQUIRE_EQUAL(gcf.thr.x,16ul);
49  BOOST_REQUIRE_EQUAL(gcf.thr.y,8ul);
50  BOOST_REQUIRE_EQUAL(gcf.thr.z,8ul);
51 
52  BOOST_REQUIRE_EQUAL(gcf.wthr.x,4ul);
53  BOOST_REQUIRE_EQUAL(gcf.wthr.y,8ul);
54  BOOST_REQUIRE_EQUAL(gcf.wthr.z,8ul);
55  }
56 
57 #endif
58 
59  grid_key_dx<3> k3({50,50,50});
60  grid_key_dx<3> k4({62,62,62});
61  grid_key_dx<3> k5({60,61,62});
62 
63  auto gcf2 = c3.getGPUIterator(k3,k4);
64 
65 #ifdef __HIP__
66 
67  BOOST_REQUIRE_EQUAL(gcf2.thr.x,8ul);
68  BOOST_REQUIRE_EQUAL(gcf2.thr.y,8ul);
69  BOOST_REQUIRE_EQUAL(gcf2.thr.z,4ul);
70 
71  BOOST_REQUIRE_EQUAL(gcf2.wthr.x,2ul);
72  BOOST_REQUIRE_EQUAL(gcf2.wthr.y,2ul);
73  BOOST_REQUIRE_EQUAL(gcf2.wthr.z,4ul);
74 
75 #else
76 
77  if (default_kernel_wg_threads_ == 1024 ) {
78  BOOST_REQUIRE_EQUAL(gcf2.thr.x,13ul);
79  BOOST_REQUIRE_EQUAL(gcf2.thr.y,8ul);
80  BOOST_REQUIRE_EQUAL(gcf2.thr.z,8ul);
81 
82  BOOST_REQUIRE_EQUAL(gcf2.wthr.x,1ul);
83  BOOST_REQUIRE_EQUAL(gcf2.wthr.y,2ul);
84  BOOST_REQUIRE_EQUAL(gcf2.wthr.z,2ul);
85  }
86 
87 #endif
88 
89  gcf2 = c3.getGPUIterator(k3,k4,511);
90 
91  BOOST_REQUIRE_EQUAL(gcf2.thr.x,8ul);
92  BOOST_REQUIRE_EQUAL(gcf2.thr.y,8ul);
93  BOOST_REQUIRE_EQUAL(gcf2.thr.z,4ul);
94 
95  BOOST_REQUIRE_EQUAL(gcf2.wthr.x,2ul);
96  BOOST_REQUIRE_EQUAL(gcf2.wthr.y,2ul);
97  BOOST_REQUIRE_EQUAL(gcf2.wthr.z,4ul);
98 
99  gcf2 = c3.getGPUIterator(k3,k4,1);
100 
101  BOOST_REQUIRE_EQUAL(gcf2.thr.x,1ul);
102  BOOST_REQUIRE_EQUAL(gcf2.thr.y,1ul);
103  BOOST_REQUIRE_EQUAL(gcf2.thr.z,1ul);
104 
105  BOOST_REQUIRE_EQUAL(gcf2.wthr.x,13ul);
106  BOOST_REQUIRE_EQUAL(gcf2.wthr.y,13ul);
107  BOOST_REQUIRE_EQUAL(gcf2.wthr.z,13ul);
108 
109  gcf2 = c3.getGPUIterator(k3,k5,32);
110 
111  BOOST_REQUIRE_EQUAL(gcf2.thr.x,4ul);
112  BOOST_REQUIRE_EQUAL(gcf2.thr.y,4ul);
113  BOOST_REQUIRE_EQUAL(gcf2.thr.z,2ul);
114 
115  BOOST_REQUIRE_EQUAL(gcf2.wthr.x,3ul);
116  BOOST_REQUIRE_EQUAL(gcf2.wthr.y,3ul);
117  BOOST_REQUIRE_EQUAL(gcf2.wthr.z,7ul);
118 
119  gcf2 = c3.getGPUIterator(k3,k5,1);
120 
121  BOOST_REQUIRE_EQUAL(gcf2.thr.x,1ul);
122  BOOST_REQUIRE_EQUAL(gcf2.thr.y,1ul);
123  BOOST_REQUIRE_EQUAL(gcf2.thr.z,1ul);
124 
125  BOOST_REQUIRE_EQUAL(gcf2.wthr.x,11ul);
126  BOOST_REQUIRE_EQUAL(gcf2.wthr.y,12ul);
127  BOOST_REQUIRE_EQUAL(gcf2.wthr.z,13ul);
128 
129 #endif
130 }
131 
132 
133 BOOST_AUTO_TEST_CASE (gpu_computation)
134 {
135  #ifdef CUDA_GPU
136 
137  {
138  size_t sz[3] = {64,64,64};
140 
141  c3.setMemory();
142  test_layout_gridNd<3>(c3,sz[0]);
143 
144  gpu_grid_3D_compute(c3);
145 
146  c3.deviceToHost<0>();
147 
148  auto it = c3.getIterator();
149 
150  bool good = true;
151  while(it.isNext())
152  {
153  auto key = it.get();
154 
155  good &= c3.getGrid().LinId(key) == c3.template get<0>(key);
156 
157  ++it;
158  }
159 
160  BOOST_REQUIRE_EQUAL(good,true);
161 
162  }
163 
164  #endif
165 }
166 
167 BOOST_AUTO_TEST_CASE (gpu_computation_lambda)
168 {
169  #ifdef CUDA_GPU
170 
171  {
172  size_t sz[3] = {64,64,64};
174 
175  c3.setMemory();
176 
177  // Assign
178 
179  auto c3_k = c3.toKernel();
180 
181  auto lamb = [c3_k] __device__ (dim3 & blockIdx, dim3 & threadIdx)
182  {
183  grid_key_dx<3,int> p({blockIdx.x * blockDim.x + threadIdx.x,
184  blockIdx.y * blockDim.y + threadIdx.y,
185  blockIdx.z * blockDim.z + threadIdx.z});
186 
187  c3_k.template get<0>(p) = 5.0;
188 
189  c3_k.template get<1>(p)[0] = 5.0;
190  c3_k.template get<1>(p)[1] = 5.0;
191 
192  c3_k.template get<2>(p)[0][0] = 5.0;
193  c3_k.template get<2>(p)[0][1] = 5.0;
194  c3_k.template get<2>(p)[1][0] = 5.0;
195  c3_k.template get<2>(p)[1][1] = 5.0;
196  };
197 
198  auto ite = c3.getGPUIterator({0,0,0},{63,63,63});
199 
200  CUDA_LAUNCH_LAMBDA(ite,lamb);
201 
202  c3.deviceToHost<0,1,2>();
203 
204  auto it = c3.getIterator();
205 
206  bool good = true;
207  while(it.isNext())
208  {
209  auto key = it.get();
210 
211  good &= c3.template get<0>(key) == 5.0;
212 
213  good &= c3.template get<1>(key)[0] == 5.0;
214  good &= c3.template get<1>(key)[1] == 5.0;
215 
216  good &= c3.template get<2>(key)[0][0] == 5.0;
217  good &= c3.template get<2>(key)[0][1] == 5.0;
218  good &= c3.template get<2>(key)[1][0] == 5.0;
219  good &= c3.template get<2>(key)[1][1] == 5.0;
220 
221  ++it;
222  }
223 
224  BOOST_REQUIRE_EQUAL(good,true);
225 
226  }
227 
228  #endif
229 }
230 
231 BOOST_AUTO_TEST_CASE (gpu_computation_stencil)
232 {
233  #ifdef CUDA_GPU
234 
235  {
236  size_t sz[3] = {64,64,64};
239  grid_key_dx<3> key1({1,1,1});
240  grid_key_dx<3> key2({62,62,62});
241 
242 
243  c3.setMemory();
244  c2.setMemory();
245  test_layout_gridNd<3>(c3,sz[0]);
246  test_layout_gridNd<3>(c2,sz[0]);
247 
248  gpu_grid_3D_one(c2);
249 
250  // Check property 1 is 1.0
251  c2.deviceToHost<0>();
252 
253  {
254  auto it = c2.getIterator();
255 
256  bool good = true;
257  while(it.isNext())
258  {
259  auto key = it.get();
260 
261  good &= c2.get<0>(key) == 1.0;
262 
263  ++it;
264  }
265 
266  BOOST_REQUIRE_EQUAL(good,true);
267  }
268 
269  gpu_grid_3D_compute(c3);
270  c3.deviceToHost<0>();
271 
272  {
273  auto it = c3.getIterator();
274 
275  bool good = true;
276  while(it.isNext())
277  {
278  auto key = it.get();
279 
280  good &= c3.getGrid().LinId(key) == c3.get<0>(key);
281 
282  ++it;
283  }
284 
285  BOOST_REQUIRE_EQUAL(good,true);
286  }
287 
288  gpu_grid_3D_compute_stencil(c3,c2,key1,key2);
289 
290  c2.deviceToHost<0>();
291 
292  auto it = c2.getIterator(key1,key2);
293 
294  bool good = true;
295  while(it.isNext())
296  {
297  auto key = it.get();
298 
299  good &= c2.get<0>(key) == 0;
300 
301  ++it;
302  }
303 
304  BOOST_REQUIRE_EQUAL(good,true);
305 
306  }
307 
308  #endif
309 }
310 
311 BOOST_AUTO_TEST_CASE (gpu_computation_grid_stencil)
312 {
313  #ifdef CUDA_GPU
314 
315  {
316  size_t sz[3] = {64,64,64};
319  grid_key_dx<3> key1({1,1,1});
320  grid_key_dx<3> zero({0,0,0});
321  grid_key_dx<3> key2({62,62,62});
322  grid_key_dx<3> keyl({63,63,63});
323 
324 
325  c3.setMemory();
326  c2.setMemory();
327  test_layout_gridNd<3>(c3,sz[0]);
328  test_layout_gridNd<3>(c2,sz[0]);
329 
330  gpu_grid_3D_one(c2);
331 
332  // Check property 1 is 1.0
333  c2.deviceToHost<0>();
334 
335  {
336  auto it = c2.getIterator();
337 
338  bool good = true;
339  while(it.isNext())
340  {
341  auto key = it.get();
342 
343  good &= c2.get<0>(key) == 1.0;
344 
345  ++it;
346  }
347 
348  BOOST_REQUIRE_EQUAL(good,true);
349  }
350 
351  gpu_grid_3D_compute(c3);
352  c3.deviceToHost<0>();
353 
354  {
355  auto it = c3.getIterator();
356 
357  bool good = true;
358  while(it.isNext())
359  {
360  auto key = it.get();
361 
362  good &= c3.getGrid().LinId(key) == c3.get<0>(key);
363 
364  ++it;
365  }
366 
367  BOOST_REQUIRE_EQUAL(good,true);
368  }
369 
370  gpu_grid_3D_compute_grid_stencil(c3,c2,key1,key2);
371 
372  c2.deviceToHost<0>();
373 
374  auto it = c2.getIterator(key1,key2);
375 
376  bool good = true;
377  while(it.isNext())
378  {
379  auto key = it.get();
380  good &= c2.get<0>(key) == 0;
381 
382  ++it;
383  }
384 
385  BOOST_REQUIRE_EQUAL(good,true);
386 
387  // We also try to fill a vectorial quantity
388 
389  gpu_grid_fill_vector(c3,zero,keyl);
390 
391  }
392 
393  #endif
394 }
395 
396 BOOST_AUTO_TEST_CASE (gpu_computation_grid_stencil_vector)
397 {
398  #ifdef CUDA_GPU
399 
400  {
401  size_t sz[3] = {64,64,64};
404  grid_key_dx<3> key1({1,1,1});
405  grid_key_dx<3> zero({0,0,0});
406  grid_key_dx<3> key2({62,62,62});
407  grid_key_dx<3> keyl({63,63,63});
408 
409 
410  c3.setMemory();
411  c2.setMemory();
412 
413  gpu_grid_fill_vector(c3,zero,keyl);
414 
415  // Check property 1 is 1.0
416  c3.deviceToHost<4>();
417 
418  {
419  auto it = c3.getIterator(key1,key2);
420 
421  bool good = true;
422  while(it.isNext())
423  {
424  auto key = it.get();
425 
426  good &= c3.get<4>(key)[0] == 1.0;
427  good &= c3.get<4>(key)[1] == 2.0;
428  good &= c3.get<4>(key)[2] == 3.0;
429 
430  ++it;
431  }
432 
433  BOOST_REQUIRE_EQUAL(good,true);
434  }
435 
436  // Fill c3
437 
438  gpu_grid_3D_compute(c3);
439  gpu_grid_gradient_vector(c3,c2,key1,key2);
440 
441  // Check property 1 is 1.0
442  c2.deviceToHost<4>();
443 
444  {
445  auto it = c2.getIterator(key1,key2);
446 
447  bool good = true;
448  while(it.isNext())
449  {
450  auto key = it.get();
451 
452  good &= c2.get<4>(key)[0] == 1.0;
453  good &= c2.get<4>(key)[1] == 64.0;
454  good &= c2.get<4>(key)[2] == 4096.0;
455 
456  ++it;
457  }
458 
459  BOOST_REQUIRE_EQUAL(good,true);
460  }
461 
462  }
463 
464  #endif
465 }
466 
467 BOOST_AUTO_TEST_CASE (gpu_swap_vector)
468 {
469  #ifdef CUDA_GPU
470 
471  {
472  size_t sz[3] = {64,64,64};
475  grid_key_dx<3> key1({1,1,1});
476  grid_key_dx<3> zero({0,0,0});
477  grid_key_dx<3> key2({62,62,62});
478  grid_key_dx<3> keyl({63,63,63});
479 
480 
481  c3.setMemory();
482  c2.setMemory();
483 
484  gpu_grid_fill_vector(c2,zero,keyl);
485  gpu_grid_fill_vector2(c3,zero,keyl);
486 
487  auto it4 = c3.getIterator(zero,keyl);
488 
489  // fill CPU
490  while(it4.isNext())
491  {
492  auto key = it4.get();
493 
494  c2.get<4>(key)[0] = 1.0;
495  c2.get<4>(key)[1] = 2.0;
496  c2.get<4>(key)[2] = 3.0;
497 
498  c3.get<4>(key)[0] = 1001.0;
499  c3.get<4>(key)[1] = 1002.0;
500  c3.get<4>(key)[2] = 1003.0;
501 
502  ++it4;
503  }
504 
505  // now we swap
506 
507  // Check property 1 is 1.0
508  c3.swap(c2);
509 
510  {
511  auto it = c3.getIterator(zero,keyl);
512 
513  bool good = true;
514  while(it.isNext())
515  {
516  auto key = it.get();
517 
518  good &= c3.get<4>(key)[0] == 1.0;
519  good &= c3.get<4>(key)[1] == 2.0;
520  good &= c3.get<4>(key)[2] == 3.0;
521 
522  good &= c2.get<4>(key)[0] == 1001.0;
523  good &= c2.get<4>(key)[1] == 1002.0;
524  good &= c2.get<4>(key)[2] == 1003.0;
525 
526  if (good == false) {break;}
527 
528  // Set to zero
529 
530  c3.get<4>(key)[0] = 0.0;
531  c3.get<4>(key)[1] = 0.0;
532  c3.get<4>(key)[2] = 0.0;
533 
534  c2.get<4>(key)[0] = 0.0;
535  c2.get<4>(key)[1] = 0.0;
536  c2.get<4>(key)[2] = 0.0;
537 
538  ++it;
539  }
540 
541  BOOST_REQUIRE_EQUAL(good,true);
542 
543  c2.template deviceToHost<4>();
544  c3.template deviceToHost<4>();
545 
546  auto it2 = c3.getIterator(zero,keyl);
547 
548  good = true;
549  while(it2.isNext())
550  {
551  auto key = it2.get();
552 
553  good &= c3.get<4>(key)[0] == 1.0;
554  good &= c3.get<4>(key)[1] == 2.0;
555  good &= c3.get<4>(key)[2] == 3.0;
556 
557  good &= c2.get<4>(key)[0] == 1001.0;
558  good &= c2.get<4>(key)[1] == 1002.0;
559  good &= c2.get<4>(key)[2] == 1003.0;
560 
561  if (good == false) {break;}
562 
563  ++it2;
564  }
565 
566  BOOST_REQUIRE_EQUAL(good,true);
567  }
568 
569 
570  }
571 
572  #endif
573 }
574 
575 template<unsigned int dim>
576 void gpu_copy_device_test()
577 {
578  size_t sz[dim];
579 
580  for (size_t i = 0 ; i < dim ; i++)
581  {sz[i] = 13;}
582 
584 
585  grid_sm<dim,void> g(sz);
586  c3.setMemory();
587 
588  auto it4 = c3.getIterator();
589  while (it4.isNext())
590  {
591  auto key = it4.get();
592 
593  c3.template get<0>(key) = g.LinId(key);
594 
595  c3.template get<4>(key)[0] = g.LinId(key) + 2000;
596  c3.template get<4>(key)[1] = g.LinId(key) + 6000;
597  c3.template get<4>(key)[2] = g.LinId(key) + 56000;
598 
599  ++it4;
600  }
601 
602  c3.template hostToDevice<0>();
603 
604  size_t sz2[dim];
605 
606  for (size_t i = 0 ; i < dim ; i++)
607  {sz2[i] = 17;}
608 
609  c3.resize(sz2);
610 
611  auto it = c3.getIterator();
612 
613  bool match = true;
614  while (it.isNext())
615  {
616  auto key = it.get();
617 
618  bool to_check = true;
619  for (size_t j = 0 ; j < dim ; j++)
620  {
621  if (key.get(j) >= (unsigned int)sz[j])
622  {to_check = false;}
623  }
624 
625  if (to_check == true)
626  {
627  match &= c3.template get<0>(key) == g.LinId(key);
628 
629  match &= c3.template get<4>(key)[0] == g.LinId(key) + 2000;
630  match &= c3.template get<4>(key)[1] == g.LinId(key) + 6000;
631  match &= c3.template get<4>(key)[2] == g.LinId(key) + 56000;
632  }
633 
634  ++it;
635  }
636 
637  BOOST_REQUIRE_EQUAL(match,true);
638 
639  // reset the memory
640 
641  auto it2 = c3.getIterator();
642 
643  match = true;
644  while (it2.isNext())
645  {
646  auto key = it2.get();
647 
648  c3.template get<0>(key) = 0;
649 
650  ++it2;
651  }
652 
653  // brint to CPU
654 
655  c3.template deviceToHost<0>();
656 
657  auto it3 = c3.getIterator();
658 
659  match = true;
660  while (it3.isNext())
661  {
662  auto key = it3.get();
663 
664  bool to_check = true;
665  for (size_t j = 0 ; j < dim ; j++)
666  {
667  if (key.get(j) >= (unsigned int)sz[j])
668  {to_check = false;}
669  }
670 
671  if (to_check == true)
672  {
673  match = c3.template get<0>(key) == g.LinId(key);
674 
675  match &= c3.template get<4>(key)[0] == g.LinId(key) + 2000;
676  match &= c3.template get<4>(key)[1] == g.LinId(key) + 6000;
677  match &= c3.template get<4>(key)[2] == g.LinId(key) + 56000;
678  }
679 
680  ++it3;
681  }
682 
683  BOOST_REQUIRE_EQUAL(match,true);
684 }
685 
686 BOOST_AUTO_TEST_CASE (gpu_copy_device)
687 {
688  gpu_copy_device_test<4>();
689  gpu_copy_device_test<3>();
690  gpu_copy_device_test<2>();
691  gpu_copy_device_test<1>();
692 }
693 
694 template<typename grid_type>
695 __global__ void test_se1_crash_gt2(grid_type gt1, grid_type gt2)
696 {
697  int p = blockIdx.x * blockDim.x + threadIdx.x;
698 
699  if (p == 279)
700  {
701  grid_key_dx<2> k({10000,12345});
702 
703  gt1.template get<1>(k)[2] = 6.0;
704  }
705 }
706 
707 template<typename grid_type>
708 __global__ void test_se1_crash_gt3(grid_type gt1, grid_type gt2)
709 {
710  grid_key_dx<2> k({10000,12345});
711 
712  gt1.template get<2>(k)[2][2] = 6.0;
713 }
714 
715 BOOST_AUTO_TEST_CASE (gpu_grid_test_se_class1)
716 {
717 #if defined(SE_CLASS1) && !defined(__clang__)
718 
719  size_t sz[2] = {5,5};
720 
722  c3.setMemory();
723 
725  c2.setMemory();
726 
727  int dev_mem[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
728 
729  dim3 wthr;
730  wthr.x = 32;
731  wthr.y = 1;
732  wthr.z = 1;
733  dim3 thr;
734  thr.x = 16;
735  thr.y = 1;
736  thr.z = 1;
737 
738  CUDA_LAUNCH_DIM3_DEBUG_SE1(test_se1_crash_gt2,wthr,thr,c3.toKernel(),c2.toKernel());
739  cudaDeviceSynchronize();
740 
741  cudaMemcpyFromSymbol(dev_mem,global_cuda_error_array,sizeof(dev_mem));
742 
743  BOOST_REQUIRE_EQUAL(dev_mem[0],1);
744  BOOST_REQUIRE_EQUAL(*(size_t *)(&dev_mem[1]),(size_t)(c3.toKernel().template getPointer<1>()));
745  BOOST_REQUIRE_EQUAL(dev_mem[3],1);
746  BOOST_REQUIRE_EQUAL(dev_mem[4],2);
747  BOOST_REQUIRE_EQUAL(dev_mem[5],10000);
748  BOOST_REQUIRE_EQUAL(dev_mem[6],12345);
749 
750  BOOST_REQUIRE_EQUAL(dev_mem[7],17);
751  BOOST_REQUIRE_EQUAL(dev_mem[8],0);
752  BOOST_REQUIRE_EQUAL(dev_mem[9],0);
753 
754  BOOST_REQUIRE_EQUAL(dev_mem[10],16);
755  BOOST_REQUIRE_EQUAL(dev_mem[11],1);
756  BOOST_REQUIRE_EQUAL(dev_mem[12],1);
757 
758  BOOST_REQUIRE_EQUAL(dev_mem[13],7);
759  BOOST_REQUIRE_EQUAL(dev_mem[14],0);
760  BOOST_REQUIRE_EQUAL(dev_mem[15],0);
761 
762  int dev_mem2[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
763 
764  {
765  dim3 wthr;
766  wthr.x = 32;
767  wthr.y = 1;
768  wthr.z = 1;
769  dim3 thr;
770  thr.x = 16;
771  thr.y = 1;
772  thr.z = 1;
773 
774  CUDA_LAUNCH_DIM3_DEBUG_SE1(test_se1_crash_gt3,wthr,thr,c2.toKernel(),c3.toKernel());
775  cudaDeviceSynchronize();
776  }
777 
778  cudaMemcpyFromSymbol(dev_mem2,global_cuda_error_array,sizeof(dev_mem2));
779 
780  BOOST_REQUIRE_EQUAL(dev_mem2[0],1);
781  BOOST_REQUIRE_EQUAL(*(size_t *)(&dev_mem2[1]),(size_t)(c2.toKernel().template getPointer<2>()));
782  BOOST_REQUIRE_EQUAL(dev_mem2[3],2);
783  BOOST_REQUIRE_EQUAL(dev_mem2[4],2);
784 
785  std::cout << "######### Testing error message #########" << std::endl;
786 
787  ite_gpu<3> gr;
788 
789  gr.wthr.x = 32;
790  gr.wthr.y = 1;
791  gr.wthr.z = 1;
792  gr.thr.x = 16;
793  gr.thr.y = 1;
794  gr.thr.z = 1;
795  CUDA_LAUNCH(test_se1_crash_gt2,gr,c3.toKernel(),c2.toKernel());
796  std::cout << "######### End Testing error message #########" << std::endl;
797 
798 #endif
799 }
800 
801 BOOST_AUTO_TEST_CASE(grid_test_copy_to_gpu_2d)
802 {
803  size_t sz_dst[] = {5,5};
804  size_t sz_src[] = {3,2};
807 
808  Box<2,size_t> box_dst({1,2},{2,3});
809  Box<2,size_t> box_src({1,0},{2,1});
810 
811  copy_test(g_src,g_dst,box_src,box_dst);
812 }
813 
814 BOOST_AUTO_TEST_CASE(grid_test_copy_to_gpu_3d)
815 {
816  size_t sz_dst[] = {5,5,5};
817  size_t sz_src[] = {3,2,2};
820 
821  Box<3,size_t> box_dst({1,2,2},{2,3,3});
822  Box<3,size_t> box_src({1,0,0},{2,1,1});
823 
824  copy_test(g_src,g_dst,box_src,box_dst);
825 }
826 
827 
828 BOOST_AUTO_TEST_SUITE_END()
This class represent an N-dimensional box.
Definition: Box.hpp:60
This is a distributed grid.
grid_key_dx is the key to access any element in the grid
Definition: grid_key.hpp:19