OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
cuda_grid_gpu_tests.cu
1 /*
2  * cuda_gpu_func.cpp
3  *
4  * Created on: Jun 3, 2018
5  * Author: i-bird
6  */
7 
8 #include "config.h"
9 #define BOOST_TEST_DYN_LINK
10 #include <boost/test/unit_test.hpp>
11 #include "Grid/map_grid.hpp"
12 #include "Point_test.hpp"
13 #include "Grid/grid_util_test.hpp"
14 #include "cuda_grid_unit_tests_func.cuh"
15 #include "util/cuda_launch.hpp"
16 #include "Grid/grid_test_utils.hpp"
17 
18 BOOST_AUTO_TEST_SUITE( grid_gpu_func_test )
19 
20 
21 BOOST_AUTO_TEST_CASE (gpu_computation_func)
22 {
23 #ifdef CUDA_GPU
24 
25  size_t sz[3] = {64,64,64};
27 
28  grid_key_dx<3> k1({1,1,1});
29  grid_key_dx<3> k2({62,62,62});
30 
31  c3.setMemory();
32 
33  auto gcf = c3.getGPUIterator(k1,k2);
34 
35 #ifdef __HIP__
36 
37  BOOST_REQUIRE_EQUAL(gcf.thr.x,8ul);
38  BOOST_REQUIRE_EQUAL(gcf.thr.y,8ul);
39  BOOST_REQUIRE_EQUAL(gcf.thr.z,4ul);
40 
41  BOOST_REQUIRE_EQUAL(gcf.wthr.x,8ul);
42  BOOST_REQUIRE_EQUAL(gcf.wthr.y,8ul);
43  BOOST_REQUIRE_EQUAL(gcf.wthr.z,16ul);
44 
45 #else
46 
47  BOOST_REQUIRE_EQUAL(gcf.thr.x,16ul);
48  BOOST_REQUIRE_EQUAL(gcf.thr.y,8ul);
49  BOOST_REQUIRE_EQUAL(gcf.thr.z,8ul);
50 
51  BOOST_REQUIRE_EQUAL(gcf.wthr.x,4ul);
52  BOOST_REQUIRE_EQUAL(gcf.wthr.y,8ul);
53  BOOST_REQUIRE_EQUAL(gcf.wthr.z,8ul);
54 
55 #endif
56 
57  grid_key_dx<3> k3({50,50,50});
58  grid_key_dx<3> k4({62,62,62});
59  grid_key_dx<3> k5({60,61,62});
60 
61  auto gcf2 = c3.getGPUIterator(k3,k4);
62 
63 #ifdef __HIP__
64 
65  BOOST_REQUIRE_EQUAL(gcf2.thr.x,8ul);
66  BOOST_REQUIRE_EQUAL(gcf2.thr.y,8ul);
67  BOOST_REQUIRE_EQUAL(gcf2.thr.z,4ul);
68 
69  BOOST_REQUIRE_EQUAL(gcf2.wthr.x,2ul);
70  BOOST_REQUIRE_EQUAL(gcf2.wthr.y,2ul);
71  BOOST_REQUIRE_EQUAL(gcf2.wthr.z,4ul);
72 
73 #else
74 
75  BOOST_REQUIRE_EQUAL(gcf2.thr.x,13ul);
76  BOOST_REQUIRE_EQUAL(gcf2.thr.y,8ul);
77  BOOST_REQUIRE_EQUAL(gcf2.thr.z,8ul);
78 
79  BOOST_REQUIRE_EQUAL(gcf2.wthr.x,1ul);
80  BOOST_REQUIRE_EQUAL(gcf2.wthr.y,2ul);
81  BOOST_REQUIRE_EQUAL(gcf2.wthr.z,2ul);
82 
83 #endif
84 
85  gcf2 = c3.getGPUIterator(k3,k4,511);
86 
87  BOOST_REQUIRE_EQUAL(gcf2.thr.x,8ul);
88  BOOST_REQUIRE_EQUAL(gcf2.thr.y,8ul);
89  BOOST_REQUIRE_EQUAL(gcf2.thr.z,4ul);
90 
91  BOOST_REQUIRE_EQUAL(gcf2.wthr.x,2ul);
92  BOOST_REQUIRE_EQUAL(gcf2.wthr.y,2ul);
93  BOOST_REQUIRE_EQUAL(gcf2.wthr.z,4ul);
94 
95  gcf2 = c3.getGPUIterator(k3,k4,1);
96 
97  BOOST_REQUIRE_EQUAL(gcf2.thr.x,1ul);
98  BOOST_REQUIRE_EQUAL(gcf2.thr.y,1ul);
99  BOOST_REQUIRE_EQUAL(gcf2.thr.z,1ul);
100 
101  BOOST_REQUIRE_EQUAL(gcf2.wthr.x,13ul);
102  BOOST_REQUIRE_EQUAL(gcf2.wthr.y,13ul);
103  BOOST_REQUIRE_EQUAL(gcf2.wthr.z,13ul);
104 
105  gcf2 = c3.getGPUIterator(k3,k5,32);
106 
107  BOOST_REQUIRE_EQUAL(gcf2.thr.x,4ul);
108  BOOST_REQUIRE_EQUAL(gcf2.thr.y,4ul);
109  BOOST_REQUIRE_EQUAL(gcf2.thr.z,2ul);
110 
111  BOOST_REQUIRE_EQUAL(gcf2.wthr.x,3ul);
112  BOOST_REQUIRE_EQUAL(gcf2.wthr.y,3ul);
113  BOOST_REQUIRE_EQUAL(gcf2.wthr.z,7ul);
114 
115  gcf2 = c3.getGPUIterator(k3,k5,1);
116 
117  BOOST_REQUIRE_EQUAL(gcf2.thr.x,1ul);
118  BOOST_REQUIRE_EQUAL(gcf2.thr.y,1ul);
119  BOOST_REQUIRE_EQUAL(gcf2.thr.z,1ul);
120 
121  BOOST_REQUIRE_EQUAL(gcf2.wthr.x,11ul);
122  BOOST_REQUIRE_EQUAL(gcf2.wthr.y,12ul);
123  BOOST_REQUIRE_EQUAL(gcf2.wthr.z,13ul);
124 
125 #endif
126 }
127 
128 
129 BOOST_AUTO_TEST_CASE (gpu_computation)
130 {
131  #ifdef CUDA_GPU
132 
133  {
134  size_t sz[3] = {64,64,64};
136 
137  c3.setMemory();
138  test_layout_gridNd<3>(c3,sz[0]);
139 
140  gpu_grid_3D_compute(c3);
141 
142  c3.deviceToHost<0>();
143 
144  auto it = c3.getIterator();
145 
146  bool good = true;
147  while(it.isNext())
148  {
149  auto key = it.get();
150 
151  good &= c3.getGrid().LinId(key) == c3.template get<0>(key);
152 
153  ++it;
154  }
155 
156  BOOST_REQUIRE_EQUAL(good,true);
157 
158  }
159 
160  #endif
161 }
162 
163 BOOST_AUTO_TEST_CASE (gpu_computation_lambda)
164 {
165  #ifdef CUDA_GPU
166 
167  {
168  size_t sz[3] = {64,64,64};
170 
171  c3.setMemory();
172 
173  // Assign
174 
175  auto c3_k = c3.toKernel();
176 
177  auto lamb = [c3_k] __device__ (dim3 & blockIdx, dim3 & threadIdx)
178  {
179  grid_key_dx<3,int> p({blockIdx.x * blockDim.x + threadIdx.x,
180  blockIdx.y * blockDim.y + threadIdx.y,
181  blockIdx.z * blockDim.z + threadIdx.z});
182 
183  c3_k.template get<0>(p) = 5.0;
184 
185  c3_k.template get<1>(p)[0] = 5.0;
186  c3_k.template get<1>(p)[1] = 5.0;
187 
188  c3_k.template get<2>(p)[0][0] = 5.0;
189  c3_k.template get<2>(p)[0][1] = 5.0;
190  c3_k.template get<2>(p)[1][0] = 5.0;
191  c3_k.template get<2>(p)[1][1] = 5.0;
192  };
193 
194  auto ite = c3.getGPUIterator({0,0,0},{63,63,63});
195 
196  CUDA_LAUNCH_LAMBDA(ite,lamb);
197 
198  c3.deviceToHost<0,1,2>();
199 
200  auto it = c3.getIterator();
201 
202  bool good = true;
203  while(it.isNext())
204  {
205  auto key = it.get();
206 
207  good &= c3.template get<0>(key) == 5.0;
208 
209  good &= c3.template get<1>(key)[0] == 5.0;
210  good &= c3.template get<1>(key)[1] == 5.0;
211 
212  good &= c3.template get<2>(key)[0][0] == 5.0;
213  good &= c3.template get<2>(key)[0][1] == 5.0;
214  good &= c3.template get<2>(key)[1][0] == 5.0;
215  good &= c3.template get<2>(key)[1][1] == 5.0;
216 
217  ++it;
218  }
219 
220  BOOST_REQUIRE_EQUAL(good,true);
221 
222  }
223 
224  #endif
225 }
226 
227 BOOST_AUTO_TEST_CASE (gpu_computation_stencil)
228 {
229  #ifdef CUDA_GPU
230 
231  {
232  size_t sz[3] = {64,64,64};
235  grid_key_dx<3> key1({1,1,1});
236  grid_key_dx<3> key2({62,62,62});
237 
238 
239  c3.setMemory();
240  c2.setMemory();
241  test_layout_gridNd<3>(c3,sz[0]);
242  test_layout_gridNd<3>(c2,sz[0]);
243 
244  gpu_grid_3D_one(c2);
245 
246  // Check property 1 is 1.0
247  c2.deviceToHost<0>();
248 
249  {
250  auto it = c2.getIterator();
251 
252  bool good = true;
253  while(it.isNext())
254  {
255  auto key = it.get();
256 
257  good &= c2.get<0>(key) == 1.0;
258 
259  ++it;
260  }
261 
262  BOOST_REQUIRE_EQUAL(good,true);
263  }
264 
265  gpu_grid_3D_compute(c3);
266  c3.deviceToHost<0>();
267 
268  {
269  auto it = c3.getIterator();
270 
271  bool good = true;
272  while(it.isNext())
273  {
274  auto key = it.get();
275 
276  good &= c3.getGrid().LinId(key) == c3.get<0>(key);
277 
278  ++it;
279  }
280 
281  BOOST_REQUIRE_EQUAL(good,true);
282  }
283 
284  gpu_grid_3D_compute_stencil(c3,c2,key1,key2);
285 
286  c2.deviceToHost<0>();
287 
288  auto it = c2.getIterator(key1,key2);
289 
290  bool good = true;
291  while(it.isNext())
292  {
293  auto key = it.get();
294 
295  good &= c2.get<0>(key) == 0;
296 
297  ++it;
298  }
299 
300  BOOST_REQUIRE_EQUAL(good,true);
301 
302  }
303 
304  #endif
305 }
306 
307 BOOST_AUTO_TEST_CASE (gpu_computation_grid_stencil)
308 {
309  #ifdef CUDA_GPU
310 
311  {
312  size_t sz[3] = {64,64,64};
315  grid_key_dx<3> key1({1,1,1});
316  grid_key_dx<3> zero({0,0,0});
317  grid_key_dx<3> key2({62,62,62});
318  grid_key_dx<3> keyl({63,63,63});
319 
320 
321  c3.setMemory();
322  c2.setMemory();
323  test_layout_gridNd<3>(c3,sz[0]);
324  test_layout_gridNd<3>(c2,sz[0]);
325 
326  gpu_grid_3D_one(c2);
327 
328  // Check property 1 is 1.0
329  c2.deviceToHost<0>();
330 
331  {
332  auto it = c2.getIterator();
333 
334  bool good = true;
335  while(it.isNext())
336  {
337  auto key = it.get();
338 
339  good &= c2.get<0>(key) == 1.0;
340 
341  ++it;
342  }
343 
344  BOOST_REQUIRE_EQUAL(good,true);
345  }
346 
347  gpu_grid_3D_compute(c3);
348  c3.deviceToHost<0>();
349 
350  {
351  auto it = c3.getIterator();
352 
353  bool good = true;
354  while(it.isNext())
355  {
356  auto key = it.get();
357 
358  good &= c3.getGrid().LinId(key) == c3.get<0>(key);
359 
360  ++it;
361  }
362 
363  BOOST_REQUIRE_EQUAL(good,true);
364  }
365 
366  gpu_grid_3D_compute_grid_stencil(c3,c2,key1,key2);
367 
368  c2.deviceToHost<0>();
369 
370  auto it = c2.getIterator(key1,key2);
371 
372  bool good = true;
373  while(it.isNext())
374  {
375  auto key = it.get();
376  good &= c2.get<0>(key) == 0;
377 
378  ++it;
379  }
380 
381  BOOST_REQUIRE_EQUAL(good,true);
382 
383  // We also try to fill a vectorial quantity
384 
385  gpu_grid_fill_vector(c3,zero,keyl);
386 
387  }
388 
389  #endif
390 }
391 
392 BOOST_AUTO_TEST_CASE (gpu_computation_grid_stencil_vector)
393 {
394  #ifdef CUDA_GPU
395 
396  {
397  size_t sz[3] = {64,64,64};
400  grid_key_dx<3> key1({1,1,1});
401  grid_key_dx<3> zero({0,0,0});
402  grid_key_dx<3> key2({62,62,62});
403  grid_key_dx<3> keyl({63,63,63});
404 
405 
406  c3.setMemory();
407  c2.setMemory();
408 
409  gpu_grid_fill_vector(c3,zero,keyl);
410 
411  // Check property 1 is 1.0
412  c3.deviceToHost<4>();
413 
414  {
415  auto it = c3.getIterator(key1,key2);
416 
417  bool good = true;
418  while(it.isNext())
419  {
420  auto key = it.get();
421 
422  good &= c3.get<4>(key)[0] == 1.0;
423  good &= c3.get<4>(key)[1] == 2.0;
424  good &= c3.get<4>(key)[2] == 3.0;
425 
426  ++it;
427  }
428 
429  BOOST_REQUIRE_EQUAL(good,true);
430  }
431 
432  // Fill c3
433 
434  gpu_grid_3D_compute(c3);
435  gpu_grid_gradient_vector(c3,c2,key1,key2);
436 
437  // Check property 1 is 1.0
438  c2.deviceToHost<4>();
439 
440  {
441  auto it = c2.getIterator(key1,key2);
442 
443  bool good = true;
444  while(it.isNext())
445  {
446  auto key = it.get();
447 
448  good &= c2.get<4>(key)[0] == 1.0;
449  good &= c2.get<4>(key)[1] == 64.0;
450  good &= c2.get<4>(key)[2] == 4096.0;
451 
452  ++it;
453  }
454 
455  BOOST_REQUIRE_EQUAL(good,true);
456  }
457 
458  }
459 
460  #endif
461 }
462 
463 BOOST_AUTO_TEST_CASE (gpu_swap_vector)
464 {
465  #ifdef CUDA_GPU
466 
467  {
468  size_t sz[3] = {64,64,64};
471  grid_key_dx<3> key1({1,1,1});
472  grid_key_dx<3> zero({0,0,0});
473  grid_key_dx<3> key2({62,62,62});
474  grid_key_dx<3> keyl({63,63,63});
475 
476 
477  c3.setMemory();
478  c2.setMemory();
479 
480  gpu_grid_fill_vector(c2,zero,keyl);
481  gpu_grid_fill_vector2(c3,zero,keyl);
482 
483  auto it4 = c3.getIterator(zero,keyl);
484 
485  // fill CPU
486  while(it4.isNext())
487  {
488  auto key = it4.get();
489 
490  c2.get<4>(key)[0] = 1.0;
491  c2.get<4>(key)[1] = 2.0;
492  c2.get<4>(key)[2] = 3.0;
493 
494  c3.get<4>(key)[0] = 1001.0;
495  c3.get<4>(key)[1] = 1002.0;
496  c3.get<4>(key)[2] = 1003.0;
497 
498  ++it4;
499  }
500 
501  // now we swap
502 
503  // Check property 1 is 1.0
504  c3.swap(c2);
505 
506  {
507  auto it = c3.getIterator(zero,keyl);
508 
509  bool good = true;
510  while(it.isNext())
511  {
512  auto key = it.get();
513 
514  good &= c3.get<4>(key)[0] == 1.0;
515  good &= c3.get<4>(key)[1] == 2.0;
516  good &= c3.get<4>(key)[2] == 3.0;
517 
518  good &= c2.get<4>(key)[0] == 1001.0;
519  good &= c2.get<4>(key)[1] == 1002.0;
520  good &= c2.get<4>(key)[2] == 1003.0;
521 
522  if (good == false) {break;}
523 
524  // Set to zero
525 
526  c3.get<4>(key)[0] = 0.0;
527  c3.get<4>(key)[1] = 0.0;
528  c3.get<4>(key)[2] = 0.0;
529 
530  c2.get<4>(key)[0] = 0.0;
531  c2.get<4>(key)[1] = 0.0;
532  c2.get<4>(key)[2] = 0.0;
533 
534  ++it;
535  }
536 
537  BOOST_REQUIRE_EQUAL(good,true);
538 
539  c2.template deviceToHost<4>();
540  c3.template deviceToHost<4>();
541 
542  auto it2 = c3.getIterator(zero,keyl);
543 
544  good = true;
545  while(it2.isNext())
546  {
547  auto key = it2.get();
548 
549  good &= c3.get<4>(key)[0] == 1.0;
550  good &= c3.get<4>(key)[1] == 2.0;
551  good &= c3.get<4>(key)[2] == 3.0;
552 
553  good &= c2.get<4>(key)[0] == 1001.0;
554  good &= c2.get<4>(key)[1] == 1002.0;
555  good &= c2.get<4>(key)[2] == 1003.0;
556 
557  if (good == false) {break;}
558 
559  ++it2;
560  }
561 
562  BOOST_REQUIRE_EQUAL(good,true);
563  }
564 
565 
566  }
567 
568  #endif
569 }
570 
571 template<unsigned int dim>
572 void gpu_copy_device_test()
573 {
574  size_t sz[dim];
575 
576  for (size_t i = 0 ; i < dim ; i++)
577  {sz[i] = 13;}
578 
580 
581  grid_sm<dim,void> g(sz);
582  c3.setMemory();
583 
584  auto it4 = c3.getIterator();
585  while (it4.isNext())
586  {
587  auto key = it4.get();
588 
589  c3.template get<0>(key) = g.LinId(key);
590 
591  c3.template get<4>(key)[0] = g.LinId(key) + 2000;
592  c3.template get<4>(key)[1] = g.LinId(key) + 6000;
593  c3.template get<4>(key)[2] = g.LinId(key) + 56000;
594 
595  ++it4;
596  }
597 
598  c3.template hostToDevice<0>();
599 
600  size_t sz2[dim];
601 
602  for (size_t i = 0 ; i < dim ; i++)
603  {sz2[i] = 17;}
604 
605  c3.resize(sz2);
606 
607  auto it = c3.getIterator();
608 
609  bool match = true;
610  while (it.isNext())
611  {
612  auto key = it.get();
613 
614  bool to_check = true;
615  for (size_t j = 0 ; j < dim ; j++)
616  {
617  if (key.get(j) >= (unsigned int)sz[j])
618  {to_check = false;}
619  }
620 
621  if (to_check == true)
622  {
623  match &= c3.template get<0>(key) == g.LinId(key);
624 
625  match &= c3.template get<4>(key)[0] == g.LinId(key) + 2000;
626  match &= c3.template get<4>(key)[1] == g.LinId(key) + 6000;
627  match &= c3.template get<4>(key)[2] == g.LinId(key) + 56000;
628  }
629 
630  ++it;
631  }
632 
633  BOOST_REQUIRE_EQUAL(match,true);
634 
635  // reset the memory
636 
637  auto it2 = c3.getIterator();
638 
639  match = true;
640  while (it2.isNext())
641  {
642  auto key = it2.get();
643 
644  c3.template get<0>(key) = 0;
645 
646  ++it2;
647  }
648 
649  // brint to CPU
650 
651  c3.template deviceToHost<0>();
652 
653  auto it3 = c3.getIterator();
654 
655  match = true;
656  while (it3.isNext())
657  {
658  auto key = it3.get();
659 
660  bool to_check = true;
661  for (size_t j = 0 ; j < dim ; j++)
662  {
663  if (key.get(j) >= (unsigned int)sz[j])
664  {to_check = false;}
665  }
666 
667  if (to_check == true)
668  {
669  match = c3.template get<0>(key) == g.LinId(key);
670 
671  match &= c3.template get<4>(key)[0] == g.LinId(key) + 2000;
672  match &= c3.template get<4>(key)[1] == g.LinId(key) + 6000;
673  match &= c3.template get<4>(key)[2] == g.LinId(key) + 56000;
674  }
675 
676  ++it3;
677  }
678 
679  BOOST_REQUIRE_EQUAL(match,true);
680 }
681 
682 BOOST_AUTO_TEST_CASE (gpu_copy_device)
683 {
684  gpu_copy_device_test<4>();
685  gpu_copy_device_test<3>();
686  gpu_copy_device_test<2>();
687  gpu_copy_device_test<1>();
688 }
689 
690 template<typename grid_type>
691 __global__ void test_se1_crash_gt2(grid_type gt1, grid_type gt2)
692 {
693  int p = blockIdx.x * blockDim.x + threadIdx.x;
694 
695  if (p == 279)
696  {
697  grid_key_dx<2> k({10000,12345});
698 
699  gt1.template get<1>(k)[2] = 6.0;
700  }
701 }
702 
703 template<typename grid_type>
704 __global__ void test_se1_crash_gt3(grid_type gt1, grid_type gt2)
705 {
706  grid_key_dx<2> k({10000,12345});
707 
708  gt1.template get<2>(k)[2][2] = 6.0;
709 }
710 
711 BOOST_AUTO_TEST_CASE (gpu_grid_test_se_class1)
712 {
713 #if defined(SE_CLASS1) && !defined(__clang__)
714 
715  size_t sz[2] = {5,5};
716 
718  c3.setMemory();
719 
721  c2.setMemory();
722 
723  int dev_mem[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
724 
725  dim3 wthr;
726  wthr.x = 32;
727  wthr.y = 1;
728  wthr.z = 1;
729  dim3 thr;
730  thr.x = 16;
731  thr.y = 1;
732  thr.z = 1;
733 
734  CUDA_LAUNCH_DIM3_DEBUG_SE1(test_se1_crash_gt2,wthr,thr,c3.toKernel(),c2.toKernel());
735  cudaDeviceSynchronize();
736 
737  cudaMemcpyFromSymbol(dev_mem,global_cuda_error_array,sizeof(dev_mem));
738 
739  BOOST_REQUIRE_EQUAL(dev_mem[0],1);
740  BOOST_REQUIRE_EQUAL(*(size_t *)(&dev_mem[1]),(size_t)(c3.toKernel().template getPointer<1>()));
741  BOOST_REQUIRE_EQUAL(dev_mem[3],1);
742  BOOST_REQUIRE_EQUAL(dev_mem[4],2);
743  BOOST_REQUIRE_EQUAL(dev_mem[5],10000);
744  BOOST_REQUIRE_EQUAL(dev_mem[6],12345);
745 
746  BOOST_REQUIRE_EQUAL(dev_mem[7],17);
747  BOOST_REQUIRE_EQUAL(dev_mem[8],0);
748  BOOST_REQUIRE_EQUAL(dev_mem[9],0);
749 
750  BOOST_REQUIRE_EQUAL(dev_mem[10],16);
751  BOOST_REQUIRE_EQUAL(dev_mem[11],1);
752  BOOST_REQUIRE_EQUAL(dev_mem[12],1);
753 
754  BOOST_REQUIRE_EQUAL(dev_mem[13],7);
755  BOOST_REQUIRE_EQUAL(dev_mem[14],0);
756  BOOST_REQUIRE_EQUAL(dev_mem[15],0);
757 
758  int dev_mem2[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
759 
760  {
761  dim3 wthr;
762  wthr.x = 32;
763  wthr.y = 1;
764  wthr.z = 1;
765  dim3 thr;
766  thr.x = 16;
767  thr.y = 1;
768  thr.z = 1;
769 
770  CUDA_LAUNCH_DIM3_DEBUG_SE1(test_se1_crash_gt3,wthr,thr,c2.toKernel(),c3.toKernel());
771  cudaDeviceSynchronize();
772  }
773 
774  cudaMemcpyFromSymbol(dev_mem2,global_cuda_error_array,sizeof(dev_mem2));
775 
776  BOOST_REQUIRE_EQUAL(dev_mem2[0],1);
777  BOOST_REQUIRE_EQUAL(*(size_t *)(&dev_mem2[1]),(size_t)(c2.toKernel().template getPointer<2>()));
778  BOOST_REQUIRE_EQUAL(dev_mem2[3],2);
779  BOOST_REQUIRE_EQUAL(dev_mem2[4],2);
780 
781  std::cout << "######### Testing error message #########" << std::endl;
782 
783  ite_gpu<3> gr;
784 
785  gr.wthr.x = 32;
786  gr.wthr.y = 1;
787  gr.wthr.z = 1;
788  gr.thr.x = 16;
789  gr.thr.y = 1;
790  gr.thr.z = 1;
791  CUDA_LAUNCH(test_se1_crash_gt2,gr,c3.toKernel(),c2.toKernel());
792  std::cout << "######### End Testing error message #########" << std::endl;
793 
794 #endif
795 }
796 
797 BOOST_AUTO_TEST_CASE(grid_test_copy_to_gpu_2d)
798 {
799  size_t sz_dst[] = {5,5};
800  size_t sz_src[] = {3,2};
803 
804  Box<2,size_t> box_dst({1,2},{2,3});
805  Box<2,size_t> box_src({1,0},{2,1});
806 
807  copy_test(g_src,g_dst,box_src,box_dst);
808 }
809 
810 BOOST_AUTO_TEST_CASE(grid_test_copy_to_gpu_3d)
811 {
812  size_t sz_dst[] = {5,5,5};
813  size_t sz_src[] = {3,2,2};
816 
817  Box<3,size_t> box_dst({1,2,2},{2,3,3});
818  Box<3,size_t> box_src({1,0,0},{2,1,1});
819 
820  copy_test(g_src,g_dst,box_src,box_dst);
821 }
822 
823 
824 BOOST_AUTO_TEST_SUITE_END()
grid_key_dx is the key to access any element in the grid
Definition: grid_key.hpp:18
This is a distributed grid.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
This class represent an N-dimensional box.
Definition: Box.hpp:60