OpenFPM  5.2.0
Project that contain the implementation of distributed structures
sgrid_dist_id_gpu_unit_tests.cu
1 #include <ostream>
2 #define BOOST_TEST_DYN_LINK
3 
4 #include "config.h"
5 #include <boost/test/unit_test.hpp>
6 #include "Grid/grid_dist_id.hpp"
7 
8 
9 BOOST_AUTO_TEST_SUITE( sgrid_gpu_test_suite )
10 
11 template<unsigned int p>
13 {
14  template<typename SparseGridGpu_type, typename ite_type>
15  __device__ void operator()(SparseGridGpu_type & sg, ite_type & ite, float c)
16  {
17  GRID_ID_2_GLOBAL(ite);
18 
19  sg.init();
20 
21  if (inactive == false)
22  {sg.template insert<p>(key) = c + keyg.get(0) + keyg.get(1);}
23 
24  __syncthreads();
25 
26  sg.flush_block_insert();
27  }
28 };
29 
30 
31 
32 template<unsigned int p>
34 {
35  template<typename SparseGridGpu_type, typename ite_type>
36  __device__ void operator()(SparseGridGpu_type & sg, ite_type & ite, float c)
37  {
38  GRID_ID_3_GLOBAL(ite);
39 
40  sg.init();
41 
42  if (inactive == false)
43  {sg.template insert<p>(key) = c + keyg.get(0) + keyg.get(1) + keyg.get(2);}
44 
45  __syncthreads();
46 
47  sg.flush_block_insert();
48  }
49 };
50 
51 
52 BOOST_AUTO_TEST_CASE( sgrid_gpu_test_base )
53 {
54  size_t sz[2] = {17,17};
55  periodicity<2> bc = {PERIODIC,PERIODIC};
56 
57  Ghost<2,long int> g(1);
58 
59  Box<2,float> domain({0.0,0.0},{1.0,1.0});
60 
61  sgrid_dist_id_gpu<2,float,aggregate<float>> gdist(sz,domain,g,bc);
62 
63  gdist.template setBackgroundValue<0>(666);
64 
66 
67  Box<2,size_t> box({1,1},{1,1});
68  auto it = gdist.getGridIterator(box.getKP1(),box.getKP2());
69 
71 
72  float c = 5.0;
73 
74  gdist.template iterateGridGPU<insert_kernel2D<0>>(it,c);
75  gdist.template flush<smax_<0>>(flush_type::FLUSH_ON_DEVICE);
76 
77  gdist.template deviceToHost<0>();
78 
79  {
80  Box<2,size_t> box2({0,0},{15,15});
81 
82  auto it = gdist.getGridIterator(box2.getKP1(),box2.getKP2());
83 
84  while (it.isNext())
85  {
86  auto p = it.get_dist();
87  auto p2 = it.get();
88 
89  if (p2.get(0) == box.getLow(0) && p2.get(1) == box.getLow(1))
90  {
91  BOOST_REQUIRE_EQUAL(gdist.template get<0>(p), 7.0);
92  }
93  else
94  {
95  BOOST_REQUIRE_EQUAL(gdist.template get<0>(p), 666.0);
96  }
97 
98  ++it;
99  }
100  }
101 
102  //
103 
104  c = 3.0;
105 
106  Box<2,size_t> box3({3,3},{11,11});
107 
108  auto it3 = gdist.getGridIterator(box3.getKP1(),box3.getKP2());
109 
110  gdist.template iterateGridGPU<insert_kernel2D<0>>(it3,c);
111  gdist.template flush<smax_<0>>(flush_type::FLUSH_ON_DEVICE);
112  gdist.template deviceToHost<0>();
113 
114  {
115  Box<2,size_t> box2({0,0},{15,15});
116 
117  auto it = gdist.getGridIterator(box2.getKP1(),box2.getKP2());
118 
119  while (it.isNext())
120  {
121  auto p = it.get_dist();
122  auto p2 = it.get();
123 
124  Point<2,size_t> p2_ = p2.toPoint();
125 
126  if (box.isInside(p2_))
127  {
128  BOOST_REQUIRE_EQUAL(gdist.template get<0>(p), 7.0);
129  }
130  else if (box3.isInside(p2_))
131  {
132  float tst = c + p2.get(0) + p2.get(1);
133 
134  BOOST_REQUIRE_EQUAL(gdist.template get<0>(p), tst);
135  }
136  else
137  {
138  BOOST_REQUIRE_EQUAL(gdist.template get<0>(p), 666.0);
139  }
140 
141  ++it;
142  }
143  }
144 }
145 
146 
147 BOOST_AUTO_TEST_CASE( sgrid_gpu_test_output )
148 {
149  auto & v_cl = create_vcluster();
150 
151  if (v_cl.size() > 3){return;}
152 
153  size_t sz[2] = {17,17};
154  periodicity<2> bc = {PERIODIC,PERIODIC};
155 
156  Ghost<2,long int> g(1);
157 
158  Box<2,float> domain({0.0,0.0},{1.0,1.0});
159 
160  sgrid_dist_id_gpu<2,float,aggregate<float>> gdist(sz,domain,g,bc);
161 
162  gdist.template setBackgroundValue<0>(666);
163 
165 
166  Box<2,size_t> box({1,1},{15,15});
167  auto it = gdist.getGridIterator(box.getKP1(),box.getKP2());
168 
170 
171  float c = 5.0;
172 
173  gdist.template iterateGridGPU<insert_kernel2D<0>>(it,c);
174  gdist.template flush<smax_<0>>(flush_type::FLUSH_ON_DEVICE);
175 
176  gdist.template deviceToHost<0>();
177 
178  gdist.write("sgrid_gpu_output");
179 
180  std::string file_test("sgrid_gpu_output_" + std::to_string(v_cl.size()) + "_" + std::to_string(v_cl.rank()) + ".vtk");
181  std::string file("sgrid_gpu_output_" + std::to_string(v_cl.rank()) + ".vtk");
182 
183  #ifndef HAVE_OSX
184  bool test = compare(file,"test_data/" + file_test);
185 
186  BOOST_REQUIRE_EQUAL(true,test);
187  #endif
188 }
189 
190 template<typename grid, typename box_type>
191 void check_sgrid(grid & gdist2, box_type & box, float c)
192 {
193  size_t n_point = 0;
194  bool match = true;
195  auto it2 = gdist2.getDomainIterator();
196 
197  while (it2.isNext())
198  {
199  auto p = it2.get();
200 
201  auto key = it2.getGKey(p);
202 
203  auto p_xp1 = p.move(0,1);
204  auto p_xm1 = p.move(0,-1);
205  auto p_yp1 = p.move(1,1);
206  auto p_ym1 = p.move(1,-1);
207 
208  auto key_xp1 = key.move(0,1);
209  auto key_xm1 = key.move(0,-1);
210  auto key_yp1 = key.move(1,1);
211  auto key_ym1 = key.move(1,-1);
212 
213  if (box.isInside(key_xp1.toPoint()))
214  {
215  match &= gdist2.template get<0>(p_xp1) == c + key_xp1.get(0) + key_xp1.get(1);
216 
217  if (match == false)
218  {
219  std::cout << gdist2.template get<0>(p_xp1) << " " << c + key_xp1.get(0) + key_xp1.get(1) << std::endl;
220  std::cout << "1 " << key_xp1.to_string() << " " << p_xp1.getKey().toPoint().to_string() << " " << &gdist2.template get<0>(p_xp1) << std::endl;
221  break;
222  }
223  }
224 
225  if (box.isInside(key_xm1.toPoint()))
226  {
227  match &= gdist2.template get<0>(p_xm1) == c + key_xm1.get(0) + key_xm1.get(1);
228 
229  if (match == false)
230  {
231  std::cout << gdist2.template get<0>(p_xm1) << " " << c + key_xm1.get(0) + key_xm1.get(1) << std::endl;
232  std::cout << "2 " << key_xm1.to_string() << std::endl;
233  break;
234  }
235  }
236 
237  if (box.isInside(key_yp1.toPoint()))
238  {
239  match &= gdist2.template get<0>(p_yp1) == c + key_yp1.get(0) + key_yp1.get(1);
240 
241  if (match == false)
242  {
243  std::cout << gdist2.template get<0>(p_yp1) << " " << c + key_yp1.get(0) + key_yp1.get(1) << std::endl;
244  std::cout << "3 " << key_yp1.to_string() << std::endl;
245  break;
246  }
247  }
248 
249  if (box.isInside(key_ym1.toPoint()))
250  {
251  match &= gdist2.template get<0>(p_ym1) == c + key_ym1.get(0) + key_ym1.get(1);
252 
253  if (match == false)
254  {
255  std::cout << gdist2.template get<0>(p_ym1) << " " << c + key_ym1.get(0) + key_ym1.get(1) << std::endl;
256  std::cout << "4 " << key_ym1.to_string() << std::endl;
257  break;
258  }
259  }
260 
261  n_point++;
262 
263  ++it2;
264  }
265 
266  auto & v_cl = create_vcluster();
267 
268  v_cl.sum(n_point);
269  v_cl.execute();
270 
271  BOOST_REQUIRE_EQUAL(match,true);
272  BOOST_REQUIRE_EQUAL(n_point,350*350);
273 }
274 
275 template<typename grid, typename box_type>
276 void check_sgrid_no_ghost(grid & gdist2, box_type & box, float c)
277 {
278  size_t n_point = 0;
279  bool match = true;
280  auto it2 = gdist2.getDomainIterator();
281 
282  while (it2.isNext())
283  {
284  auto p = it2.get();
285 
286  auto key = it2.getGKey(p);
287 
288  match &= gdist2.template get<0>(p) == c + key.get(0) + key.get(1);
289 
290  if (match == false)
291  {
292  std::cout << gdist2.template get<0>(p) << " " << c + key.get(0) + key.get(1) << std::endl;
293  std::cout << "1 " << key.to_string() << " " << p.getKey().toPoint().to_string() << " " << &gdist2.template get<0>(p) << std::endl;
294  break;
295  }
296 
297  n_point++;
298 
299  ++it2;
300  }
301 
302  auto & v_cl = create_vcluster();
303 
304  v_cl.sum(n_point);
305  v_cl.execute();
306 
307  BOOST_REQUIRE_EQUAL(match,true);
308  BOOST_REQUIRE_EQUAL(n_point,350*350);
309 }
310 
311 BOOST_AUTO_TEST_CASE( sgrid_gpu_test_load_from_file )
312 {
313  auto & v_cl = create_vcluster();
314 
315  float c = 5.0;
316 
317  if (v_cl.size() > 8){return;}
318 
319  size_t sz[2] = {370,370};
320  periodicity<2> bc = {PERIODIC,PERIODIC};
321 
322  Ghost<2,long int> g(1);
323 
324  Box<2,float> domain({0.0,0.0},{1.0,1.0});
325 
327 
328  Box<2,size_t> box({1,1},{350,350});
329 
330  // Now load
331 
332  sgrid_dist_id_gpu<2,float,aggregate<float,float>> gdist2(sz,domain,g,bc);
333 
334  gdist2.load("test_data/sgrid_gpu_output_hdf5");
335  gdist2.deviceToHost<0,1>();
336  check_sgrid_no_ghost(gdist2,box,c);
337  gdist2.template hostToDevice<0>();
338  gdist2.template ghost_get<0,1>(RUN_ON_DEVICE);
339 
340  gdist2.deviceToHost<0,1>();
341  check_sgrid(gdist2,box,c);
342 }
343 
344 BOOST_AUTO_TEST_CASE( sgrid_gpu_test_save_and_load )
345 {
346  auto & v_cl = create_vcluster();
347 
348  if (v_cl.size() > 3){return;}
349 
350  size_t sz[2] = {370,370};
351  periodicity<2> bc = {PERIODIC,PERIODIC};
352 
353  Ghost<2,long int> g(1);
354 
355  Box<2,float> domain({0.0,0.0},{1.0,1.0});
356 
357  sgrid_dist_id_gpu<2,float,aggregate<float,float>> gdist(sz,domain,g,bc);
358 
359  gdist.template setBackgroundValue<0>(666);
360 
362 
363  Box<2,size_t> box({1,1},{350,350});
364  auto it = gdist.getGridIterator(box.getKP1(),box.getKP2());
365 
367 
368  typedef typename GetAddBlockType<decltype(gdist)>::type InsertBlockT;
369 
370  float c = 5.0;
371 
372  gdist.addPoints(box.getKP1(),box.getKP2(),[] __device__ (int i, int j)
373  {
374  return true;
375  },
376  [c] __device__ (InsertBlockT & data, int i, int j)
377  {
378  data.template get<0>() = c + i + j;
379  data.template get<1>() = c + 1000 + i + j;
380  }
381  );
382 
383  gdist.template flush<smax_<0>,smax_<1>>(flush_type::FLUSH_ON_DEVICE);
384 
385  gdist.template deviceToHost<0>();
386  gdist.save("sgrid_gpu_output_hdf5");
387  gdist.write("sgrid_conf");
388 
389  // Now load
390 
391  sgrid_dist_id_gpu<2,float,aggregate<float,float>> gdist2(sz,domain,g,bc);
392 
393  gdist2.load("sgrid_gpu_output_hdf5");
394  gdist2.deviceToHost<0,1>();
395  check_sgrid_no_ghost(gdist2,box,c);
396  gdist2.template hostToDevice<0>();
397  gdist2.template ghost_get<0,1>(RUN_ON_DEVICE);
398 
399  gdist2.deviceToHost<0,1>();
400  gdist.deviceToHost<0,1>();
401  check_sgrid(gdist2,box,c);
402 }
403 
404 void sgrid_ghost_get(size_t (& sz)[2],size_t (& sz2)[2])
405 {
406  periodicity<2> bc = {PERIODIC,PERIODIC};
407 
408  Ghost<2,long int> g(1);
409 
410  Box<2,float> domain({0.0,0.0},{1.0,1.0});
411 
412  sgrid_dist_id_gpu<2,float,aggregate<float>> gdist(sz,domain,g,bc);
413 
414  gdist.template setBackgroundValue<0>(666);
415 
417 
418  Box<2,size_t> box({1,1},{sz2[0],sz2[1]});
419  auto it = gdist.getGridIterator(box.getKP1(),box.getKP2());
420 
422 
423  float c = 5.0;
424 
425  gdist.template iterateGridGPU<insert_kernel2D<0>>(it,c);
426  gdist.template flush<smax_<0>>(flush_type::FLUSH_ON_DEVICE);
427 
428  gdist.template deviceToHost<0>();
429 
430  gdist.template ghost_get<0>(RUN_ON_DEVICE);
431 
432  gdist.template deviceToHost<0>();
433 
434  // Now we check that ghost is correct
435 
436  auto it2 = gdist.getDomainIterator();
437 
438  bool match = true;
439 
440  while (it2.isNext())
441  {
442  auto p = it2.get();
443 
444  auto key = it2.getGKey(p);
445 
446  auto p_xp1 = p.move(0,1);
447  auto p_xm1 = p.move(0,-1);
448  auto p_yp1 = p.move(1,1);
449  auto p_ym1 = p.move(1,-1);
450 
451  auto key_xp1 = key.move(0,1);
452  auto key_xm1 = key.move(0,-1);
453  auto key_yp1 = key.move(1,1);
454  auto key_ym1 = key.move(1,-1);
455 
456  if (box.isInside(key_xp1.toPoint()))
457  {
458  match &= gdist.template get<0>(p_xp1) == c + key_xp1.get(0) + key_xp1.get(1);
459 
460  if (match == false)
461  {
462  std::cout << gdist.template get<0>(p_xp1) << " " << c + key_xp1.get(0) + key_xp1.get(1) << std::endl;
463  break;
464  }
465  }
466 
467  if (box.isInside(key_xm1.toPoint()))
468  {
469  match &= gdist.template get<0>(p_xm1) == c + key_xm1.get(0) + key_xm1.get(1);
470 
471  if (match == false)
472  {
473  std::cout << gdist.template get<0>(p_xm1) << " " << c + key_xm1.get(0) + key_xm1.get(1) << std::endl;
474  break;
475  }
476  }
477 
478  if (box.isInside(key_yp1.toPoint()))
479  {
480  match &= gdist.template get<0>(p_yp1) == c + key_yp1.get(0) + key_yp1.get(1);
481 
482  if (match == false)
483  {
484  std::cout << gdist.template get<0>(p_yp1) << " " << c + key_yp1.get(0) + key_yp1.get(1) << std::endl;
485  break;
486  }
487  }
488 
489  if (box.isInside(key_ym1.toPoint()))
490  {
491  match &= gdist.template get<0>(p_ym1) == c + key_ym1.get(0) + key_ym1.get(1);
492 
493  if (match == false)
494  {
495  std::cout << gdist.template get<0>(p_ym1) << " " << c + key_ym1.get(0) + key_ym1.get(1) << std::endl;
496  break;
497  }
498  }
499 
500  ++it2;
501  }
502 
503  BOOST_REQUIRE_EQUAL(match,true);
504 }
505 
506 BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_get )
507 {
508  size_t sz[2] = {17,17};
509  size_t sz6[2] = {15,15};
510  sgrid_ghost_get(sz,sz6);
511 
512  return;
513 
514  size_t sz2[2] = {170,170};
515  size_t sz3[2] = {15,15};
516  sgrid_ghost_get(sz2,sz3);
517 
518  size_t sz4[2] = {168,168};
519  sgrid_ghost_get(sz2,sz4);
520 }
521 
522 BOOST_AUTO_TEST_CASE( sgrid_gpu_app_point_test_no_box )
523 {
524  size_t sz[3] = {75,75,75};
525  periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
526 
527  Ghost<3,long int> g(1);
528 
529  Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
530 
531  sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>> gdist(sz,domain,g,bc);
532 
533  gdist.template setBackgroundValue<0>(666);
534  gdist.template setBackgroundValue<1>(666);
535  gdist.template setBackgroundValue<2>(666);
536  gdist.template setBackgroundValue<3>(666);
537 
539 
540  Box<3,size_t> box({1,1,1},{sz[0],sz[1],sz[2]});
541 
543 
544  float c = 5.0;
545 
546  typedef typename GetAddBlockType<decltype(gdist)>::type InsertBlockT;
547 
548  CudaMemory cmem;
549  cmem.allocate(sizeof(int));
550 
551  *(int *)cmem.getPointer() = 0.0;
552 
553  cmem.hostToDevice();
554 
555  int * cnt = (int *)cmem.getDevicePointer();
556 
557  gdist.addPoints([cnt] __device__ (int i, int j, int k)
558  {
559  atomicAdd(cnt,1);
560 
561  return true;
562  },
563  [c] __device__ (InsertBlockT & data, int i, int j, int k)
564  {
565  data.template get<0>() = c + i + j;
566  data.template get<1>() = c + 1000 + i + j;
567  }
568  );
569 
570  gdist.template flush<smax_<0>,smax_<1>>(flush_type::FLUSH_ON_DEVICE);
571  gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
572 
573  cmem.deviceToHost();
574 
575  int cnt_host = *(int *)cmem.getPointer();
576 
577  auto & v_cl = create_vcluster();
578 
579  v_cl.sum(cnt_host);
580  v_cl.execute();
581 
582  BOOST_REQUIRE_EQUAL(cnt_host,75*75*75);
583 }
584 
585 
586 BOOST_AUTO_TEST_CASE( sgrid_gpu_app_point_test )
587 {
588  size_t sz[3] = {75,75,75};
589  periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
590 
591  Ghost<3,long int> g(1);
592 
593  Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
594 
595  sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>> gdist(sz,domain,g,bc);
596 
597  gdist.template setBackgroundValue<0>(666);
598  gdist.template setBackgroundValue<1>(666);
599  gdist.template setBackgroundValue<2>(666);
600  gdist.template setBackgroundValue<3>(666);
601 
603 
604  Box<3,size_t> box({1,1,1},{sz[0],sz[1],sz[2]});
605 
607 
608  float c = 5.0;
609 
610  typedef typename GetAddBlockType<decltype(gdist)>::type InsertBlockT;
611 
612  CudaMemory cmem;
613  cmem.allocate(sizeof(int));
614  CudaMemory cmem_out;
615  cmem_out.allocate(sizeof(int));
616 
617  *(int *)cmem.getPointer() = 0.0;
618  *(int *)cmem_out.getPointer() = 0.0;
619 
620  cmem.hostToDevice();
621  cmem_out.hostToDevice();
622 
623  int * cnt = (int *)cmem.getDevicePointer();
624  int * cnt_out = (int *)cmem_out.getDevicePointer();
625 
626  Box<3,size_t> bx({23,23,23},{70,70,70});
627 
628  gdist.addPoints(bx.getKP1(),bx.getKP2(),
629  [cnt,cnt_out,bx] __device__ (int i, int j, int k)
630  {
631  Point<3,int> p({i,j,k});
632 
633  if (bx.isInside(p))
634  {atomicAdd(cnt,1);}
635  else
636  {
637  atomicAdd(cnt_out,1);
638  }
639 
640  return true;
641  },
642  [c] __device__ (InsertBlockT & data, int i, int j, int k)
643  {
644  data.template get<0>() = c + i + j;
645  data.template get<1>() = c + 1000 + i + j;
646  }
647  );
648 
649  gdist.template flush<smax_<0>,smax_<1>>(flush_type::FLUSH_ON_DEVICE);
650  gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
651 
652  cmem.deviceToHost();
653  cmem_out.deviceToHost();
654 
655  int cnt_host = *(int *)cmem.getPointer();
656  int cnt_host_out = *(int *)cmem_out.getPointer();
657 
658  auto & v_cl = create_vcluster();
659 
660  v_cl.sum(cnt_host_out);
661  v_cl.sum(cnt_host);
662  v_cl.execute();
663 
664  BOOST_REQUIRE_EQUAL(cnt_host_out,0);
665  BOOST_REQUIRE_EQUAL(cnt_host,bx.getVolumeKey());
666 }
667 
668 
669 BOOST_AUTO_TEST_CASE( sgrid_gpu_test_conv2_test )
670 {
671  size_t sz[2] = {164,164};
672  periodicity<2> bc = {PERIODIC,PERIODIC};
673 
674  Ghost<2,long int> g(1);
675 
676  Box<2,float> domain({0.0,0.0},{1.0,1.0});
677 
678  sgrid_dist_id_gpu<2,float,aggregate<float,float,float,float>> gdist(sz,domain,g,bc);
679 
680  gdist.template setBackgroundValue<0>(666);
681  gdist.template setBackgroundValue<1>(666);
682  gdist.template setBackgroundValue<2>(666);
683  gdist.template setBackgroundValue<3>(666);
684 
686 
687  Box<2,size_t> box({1,1},{sz[0],sz[1]});
688 
690 
691  float c = 5.0;
692 
693  typedef typename GetAddBlockType<decltype(gdist)>::type InsertBlockT;
694 
695  gdist.addPoints(box.getKP1(),box.getKP2(),
696  [] __device__ (int i, int j)
697  {
698  return true;
699  },
700  [c] __device__ (InsertBlockT & data, int i, int j)
701  {
702  data.template get<0>() = c + i + j;
703  data.template get<1>() = c + 1000 + i + j;
704  }
705  );
706 
707  gdist.template flush<smax_<0>,smax_<1>>(flush_type::FLUSH_ON_DEVICE);
708  gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
709 
710 
711  // Now run the convolution
712 
713  typedef typename GetCpBlockType<decltype(gdist),0,1>::type CpBlockType;
714 
715  gdist.template conv2<0,1,2,3,1>({2,2},{(int)sz[0]-2,(int)sz[1]-2},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j){
716  u_out = u(i+1,j) - u(i-1,j) + u(i,j+1) - u(i,j-1);
717  v_out = v(i+1,j) - v(i-1,j) + v(i,j+1) - v(i,j-1);
718  });
719 
720  gdist.deviceToHost<0,1,2,3>();
721 
722  // Now we check that ghost is correct
723 
724  auto it3 = gdist.getSubDomainIterator({2,2},{(int)sz[0]-2,(int)sz[1]-2});
725 
726  bool match = true;
727 
728  while (it3.isNext())
729  {
730  auto p = it3.get();
731 
732  auto p_xp1 = p.move(0,1);
733  auto p_xm1 = p.move(0,-1);
734  auto p_yp1 = p.move(1,1);
735  auto p_ym1 = p.move(1,-1);
736 
737  float sub1 = gdist.template get<2>(p);
738  float sub2 = gdist.template get<3>(p);
739 
740  if (sub1 != 4.0 || sub2 != 4.0)
741  {
742  std::cout << sub1 << " " << sub2 << std::endl;
743  std::cout << gdist.template get<0>(p_xp1) << " " << gdist.template get<0>(p_xm1) << std::endl;
744  std::cout << gdist.template get<1>(p_xp1) << " " << gdist.template get<1>(p_xm1) << std::endl;
745  match = false;
746  break;
747  }
748 
749  ++it3;
750  }
751 
752  BOOST_REQUIRE_EQUAL(match,true);
753 }
754 
755 
756 BOOST_AUTO_TEST_CASE( sgrid_gpu_test_conv2_test_3d )
757 {
758  #ifdef CUDA_ON_CPU
759  size_t sz[3] = {20,20,20};
760  #else
761  size_t sz[3] = {60,60,60};
762  #endif
763  periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
764 
765  Ghost<3,long int> g(1);
766 
767  Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
768 
769  sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>> gdist(sz,domain,g,bc);
770 
771  gdist.template setBackgroundValue<0>(666);
772  gdist.template setBackgroundValue<1>(666);
773  gdist.template setBackgroundValue<2>(666);
774  gdist.template setBackgroundValue<3>(666);
775 
777 
778  Box<3,size_t> box({1,1,1},{sz[0]-1,sz[1]-1,sz[2]-1});
779 
781 
782  float c = 5.0;
783 
784  typedef typename GetAddBlockType<decltype(gdist)>::type InsertBlockT;
785 
786  gdist.addPoints(box.getKP1(),box.getKP2(),
787  [] __device__ (int i, int j, int k)
788  {
789  return true;
790  },
791  [c] __device__ (InsertBlockT & data, int i, int j, int k)
792  {
793  data.template get<0>() = c + i + j + k;
794  data.template get<1>() = c + 1000 + i + j + k;
795  }
796  );
797 
798  gdist.template flush<smax_<0>,smax_<1>>(flush_type::FLUSH_ON_DEVICE);
799 
800  gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
801 
802  for (int i = 0 ; i < 10 ; i++)
803  {
804  gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
805  }
806 
807  // Now run the convolution
808 
809  typedef typename GetCpBlockType<decltype(gdist),0,1>::type CpBlockType;
810 
811  gdist.template conv2<0,1,2,3,1>({2,2,2},{(int)sz[0]-2,(int)sz[1]-2,(int)sz[2]-2},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j, int k){
812  u_out = u(i+1,j,k) - u(i-1,j,k) + u(i,j+1,k) - u(i,j-1,k) + u(i,j,k+1) - u(i,j,k-1);
813  v_out = v(i+1,j,k) - v(i-1,j,k) + v(i,j+1,k) - v(i,j-1,k) + v(i,j,k+1) - v(i,j,k-1);
814  });
815 
816  gdist.deviceToHost<0,1,2,3>();
817 
818  // Now we check that ghost is correct
819 
820  auto it3 = gdist.getSubDomainIterator({2,2,2},{(int)sz[0]-2,(int)sz[1]-2,(int)sz[2]-2});
821 
822  bool match = true;
823 
824  while (it3.isNext())
825  {
826  auto p = it3.get();
827 
828  auto p_xp1 = p.move(0,1);
829  auto p_xm1 = p.move(0,-1);
830  auto p_yp1 = p.move(1,1);
831  auto p_ym1 = p.move(1,-1);
832  auto p_zp1 = p.move(2,1);
833  auto p_zm1 = p.move(2,-1);
834 
835  float sub1 = gdist.template get<2>(p);
836  float sub2 = gdist.template get<3>(p);
837 
838  if (sub1 != 6.0 || sub2 != 6.0)
839  {
840  std::cout << sub1 << " " << sub2 << std::endl;
841  std::cout << gdist.template get<0>(p_xp1) << " " << gdist.template get<0>(p_xm1) << std::endl;
842  std::cout << gdist.template get<1>(p_xp1) << " " << gdist.template get<1>(p_xm1) << std::endl;
843  match = false;
844  break;
845  }
846 
847  ++it3;
848  }
849 
850  BOOST_REQUIRE_EQUAL(match,true);
851 }
852 
853 BOOST_AUTO_TEST_CASE( sgrid_gpu_test_conv_cross_block_test_3d )
854 {
855  #ifdef CUDA_ON_CPU
856  size_t sz[3] = {20,20,20};
857  #else
858  size_t sz[3] = {60,60,60};
859  #endif
860  periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
861 
862  Ghost<3,long int> g(1);
863 
864  Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
865 
866  sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>> gdist(sz,domain,g,bc);
867 
868  gdist.template setBackgroundValue<0>(666);
869  gdist.template setBackgroundValue<1>(666);
870  gdist.template setBackgroundValue<2>(666);
871  gdist.template setBackgroundValue<3>(666);
872 
874 
875  Box<3,size_t> box({1,1,1},{sz[0]-1,sz[1]-1,sz[2]-1});
876 
878 
879  float c = 5.0;
880 
881  typedef typename GetAddBlockType<decltype(gdist)>::type InsertBlockT;
882 
883  gdist.addPoints(box.getKP1(),box.getKP2(),
884  [] __device__ (int i, int j, int k)
885  {
886  return true;
887  },
888  [c] __device__ (InsertBlockT & data, int i, int j, int k)
889  {
890  data.template get<0>() = c + i + j + k;
891  data.template get<1>() = c + 1000 + i + j + k;
892  }
893  );
894 
895  gdist.template flush<smax_<0>,smax_<1>>(flush_type::FLUSH_ON_DEVICE);
896 
897  gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
898 
899  for (int i = 0 ; i < 10 ; i++)
900  {
901  gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
902  }
903 
904  // Now run the convolution
905 
906  typedef typename GetCpBlockType<decltype(gdist),0,1>::type CpBlockType;
907 
908  gdist.template conv_cross_b<0,1,1>({2,2,2},{(int)sz[0]-2,(int)sz[1]-2,(int)sz[2]-2},[] __device__ (CpBlockType & u, auto & block, int offset,int i, int j, int k){
909  return u(i+1,j,k) - u(i-1,j,k) + u(i,j+1,k) - u(i,j-1,k) + u(i,j,k+1) - u(i,j,k-1) + block.template get<0>()[offset];
910  });
911 
912  gdist.deviceToHost<0,1,2,3>();
913 
914  // Now we check that ghost is correct
915 
916  auto it3 = gdist.getSubDomainIterator({2,2,2},{(int)sz[0]-2,(int)sz[1]-2,(int)sz[2]-2});
917 
918  bool match = true;
919 
920  while (it3.isNext())
921  {
922  auto p = it3.get();
923 
924  auto p_xp1 = p.move(0,1);
925  auto p_xm1 = p.move(0,-1);
926  auto p_yp1 = p.move(1,1);
927  auto p_ym1 = p.move(1,-1);
928  auto p_zp1 = p.move(2,1);
929  auto p_zm1 = p.move(2,-1);
930 
931  float sub1 = gdist.template get<1>(p);
932 
933  if (sub1 != 6.0 + gdist.template get<0>(p))
934  {
935  std::cout << sub1 << std::endl;
936  std::cout << gdist.template get<0>(p_xp1) << " " << gdist.template get<0>(p_xm1) << std::endl;
937  std::cout << gdist.template get<1>(p_xp1) << " " << gdist.template get<1>(p_xm1) << std::endl;
938  match = false;
939  break;
940  }
941 
942  ++it3;
943  }
944 
945  BOOST_REQUIRE_EQUAL(match,true);
946 }
947 
948 BOOST_AUTO_TEST_CASE( sgrid_gpu_test_conv2_b_test_3d )
949 {
950  #ifdef CUDA_ON_CPU
951  size_t sz[3] = {20,20,20};
952  #else
953  size_t sz[3] = {60,60,60};
954  #endif
955  periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
956 
957  Ghost<3,long int> g(1);
958 
959  Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
960 
961  sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>> gdist(sz,domain,g,bc);
962 
963  gdist.template setBackgroundValue<0>(666);
964  gdist.template setBackgroundValue<1>(666);
965  gdist.template setBackgroundValue<2>(666);
966  gdist.template setBackgroundValue<3>(666);
967 
969 
970  Box<3,size_t> box({1,1,1},{sz[0]-1,sz[1]-1,sz[2]-1});
971 
973 
974  float c = 5.0;
975 
976  typedef typename GetAddBlockType<decltype(gdist)>::type InsertBlockT;
977 
978  gdist.addPoints(box.getKP1(),box.getKP2(),
979  [] __device__ (int i, int j, int k)
980  {
981  return true;
982  },
983  [c] __device__ (InsertBlockT & data, int i, int j, int k)
984  {
985  data.template get<0>() = c + i + j + k;
986  data.template get<1>() = c + 1000 + i + j + k;
987  }
988  );
989 
990  gdist.template flush<smax_<0>,smax_<1>>(flush_type::FLUSH_ON_DEVICE);
991 
992  gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
993 
994  for (int i = 0 ; i < 10 ; i++)
995  {
996  gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
997  }
998 
999  // Now run the convolution
1000 
1001  typedef typename GetCpBlockType<decltype(gdist),0,1>::type CpBlockType;
1002 
1003  gdist.template conv2_b<0,1,2,3,1>({2,2,2},{(int)sz[0]-2,(int)sz[1]-2,(int)sz[2]-2},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v, auto & block, int offset,int i, int j, int k){
1004  u_out = u(i+1,j,k) - u(i-1,j,k) + u(i,j+1,k) - u(i,j-1,k) + u(i,j,k+1) - u(i,j,k-1) + block.template get<0>()[offset];
1005  v_out = v(i+1,j,k) - v(i-1,j,k) + v(i,j+1,k) - v(i,j-1,k) + v(i,j,k+1) - v(i,j,k-1) + block.template get<1>()[offset];
1006  });
1007 
1008  gdist.deviceToHost<0,1,2,3>();
1009 
1010  // Now we check that ghost is correct
1011 
1012  auto it3 = gdist.getSubDomainIterator({2,2,2},{(int)sz[0]-2,(int)sz[1]-2,(int)sz[2]-2});
1013 
1014  bool match = true;
1015 
1016  while (it3.isNext())
1017  {
1018  auto p = it3.get();
1019 
1020  auto p_xp1 = p.move(0,1);
1021  auto p_xm1 = p.move(0,-1);
1022  auto p_yp1 = p.move(1,1);
1023  auto p_ym1 = p.move(1,-1);
1024  auto p_zp1 = p.move(2,1);
1025  auto p_zm1 = p.move(2,-1);
1026 
1027  float sub1 = gdist.template get<2>(p) ;
1028  float sub2 = gdist.template get<3>(p);
1029 
1030  if (sub1 != 6.0 + gdist.template get<0>(p) || sub2 != 6.0 + gdist.template get<1>(p))
1031  {
1032  std::cout << sub1 << " " << sub2 << std::endl;
1033  std::cout << gdist.template get<0>(p_xp1) << " " << gdist.template get<0>(p_xm1) << std::endl;
1034  std::cout << gdist.template get<1>(p_xp1) << " " << gdist.template get<1>(p_xm1) << std::endl;
1035  match = false;
1036  break;
1037  }
1038 
1039  ++it3;
1040  }
1041 
1042  BOOST_REQUIRE_EQUAL(match,true);
1043 }
1044 
1045 BOOST_AUTO_TEST_CASE( sgrid_gpu_test_conv3_b_test_3d )
1046 {
1047  #ifdef CUDA_ON_CPU
1048  size_t sz[3] = {20,20,20};
1049  #else
1050  size_t sz[3] = {60,60,60};
1051  #endif
1052  periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
1053 
1054  Ghost<3,long int> g(1);
1055 
1056  Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
1057 
1058  sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float,float,float>> gdist(sz,domain,g,bc);
1059 
1060  gdist.template setBackgroundValue<0>(666);
1061  gdist.template setBackgroundValue<1>(666);
1062  gdist.template setBackgroundValue<2>(666);
1063  gdist.template setBackgroundValue<3>(666);
1064  gdist.template setBackgroundValue<4>(666);
1065  gdist.template setBackgroundValue<5>(666);
1066 
1068 
1069  Box<3,size_t> box({1,1,1},{sz[0]-1,sz[1]-1,sz[2]-1});
1070 
1072 
1073  float c = 5.0;
1074 
1075  typedef typename GetAddBlockType<decltype(gdist)>::type InsertBlockT;
1076 
1077  gdist.addPoints(box.getKP1(),box.getKP2(),
1078  [] __device__ (int i, int j, int k)
1079  {
1080  return true;
1081  },
1082  [c] __device__ (InsertBlockT & data, int i, int j, int k)
1083  {
1084  data.template get<0>() = c + i + j + k;
1085  data.template get<1>() = c + 1000 + i + j + k;
1086  data.template get<2>() = c + 10000 + i + j + k;
1087  }
1088  );
1089 
1090  gdist.template flush<smax_<0>,smax_<1>,smax_<2>>(flush_type::FLUSH_ON_DEVICE);
1091 
1092  gdist.template ghost_get<0,1,2>(RUN_ON_DEVICE);
1093 
1094  for (int i = 0 ; i < 10 ; i++)
1095  {
1096  gdist.template ghost_get<0,1,2>(RUN_ON_DEVICE);
1097  }
1098 
1099  // Now run the convolution
1100 
1101  typedef typename GetCpBlockType<decltype(gdist),0,1>::type CpBlockType;
1102 
1103  gdist.template conv3_b<0,1,2,3,4,5,1>({2,2,2},{(int)sz[0]-2,(int)sz[1]-2,(int)sz[2]-2},[] __device__ (float & u_out, float & v_out, float & m_out, CpBlockType & u, CpBlockType & v , CpBlockType & m, auto & block, int offset,int i, int j, int k){
1104  u_out = u(i+1,j,k) - u(i-1,j,k) + u(i,j+1,k) - u(i,j-1,k) + u(i,j,k+1) - u(i,j,k-1) + block.template get<0>()[offset];
1105  v_out = v(i+1,j,k) - v(i-1,j,k) + v(i,j+1,k) - v(i,j-1,k) + v(i,j,k+1) - v(i,j,k-1) + block.template get<1>()[offset];
1106  m_out = m(i+1,j,k) - m(i-1,j,k) + m(i,j+1,k) - m(i,j-1,k) + m(i,j,k+1) - m(i,j,k-1) + block.template get<2>()[offset];
1107  });
1108 
1109  gdist.deviceToHost<0,1,2,3,4,5>();
1110 
1111  // Now we check that ghost is correct
1112 
1113  auto it3 = gdist.getSubDomainIterator({2,2,2},{(int)sz[0]-2,(int)sz[1]-2,(int)sz[2]-2});
1114 
1115  bool match = true;
1116 
1117  while (it3.isNext())
1118  {
1119  auto p = it3.get();
1120 
1121  auto p_xp1 = p.move(0,1);
1122  auto p_xm1 = p.move(0,-1);
1123  auto p_yp1 = p.move(1,1);
1124  auto p_ym1 = p.move(1,-1);
1125  auto p_zp1 = p.move(2,1);
1126  auto p_zm1 = p.move(2,-1);
1127 
1128  float sub1 = gdist.template get<3>(p);
1129  float sub2 = gdist.template get<4>(p);
1130  float sub3 = gdist.template get<5>(p);
1131 
1132  if (sub1 != 6.0 + gdist.template get<0>(p) || sub2 != 6.0 + gdist.template get<1>(p) || sub3 != 6.0 + gdist.template get<2>(p))
1133  {
1134  std::cout << sub1 << " " << sub2 << " " << sub3 << std::endl;
1135  std::cout << gdist.template get<0>(p_xp1) << " " << gdist.template get<0>(p_xm1) << std::endl;
1136  std::cout << gdist.template get<1>(p_xp1) << " " << gdist.template get<1>(p_xm1) << std::endl;
1137  std::cout << gdist.template get<2>(p_xp1) << " " << gdist.template get<2>(p_xm1) << std::endl;
1138  match = false;
1139  break;
1140  }
1141 
1142  ++it3;
1143  }
1144 
1145  BOOST_REQUIRE_EQUAL(match,true);
1146 }
1147 
1148 BOOST_AUTO_TEST_CASE( sgrid_gpu_test_ghost_point_remove )
1149 {
1150  size_t sz[3] = {60,60,60};
1151  periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
1152 
1153  Ghost<3,long int> g(1);
1154 
1155  Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
1156 
1157  sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>> gdist(sz,domain,g,bc);
1158 
1159  gdist.template setBackgroundValue<0>(666);
1160  gdist.template setBackgroundValue<1>(666);
1161  gdist.template setBackgroundValue<2>(666);
1162  gdist.template setBackgroundValue<3>(666);
1163 
1165 
1166  Box<3,size_t> box({1,1,1},{sz[0]-1,sz[1]-1,sz[2]-1});
1167 
1169 
1170  float c = 5.0;
1171 
1172  typedef typename GetAddBlockType<decltype(gdist)>::type InsertBlockT;
1173 
1174  gdist.addPoints(box.getKP1(),box.getKP2(),
1175  [] __device__ (int i, int j, int k)
1176  {
1177  return true;
1178  },
1179  [c] __device__ (InsertBlockT & data, int i, int j, int k)
1180  {
1181  data.template get<0>() = c + i + j + k;
1182  data.template get<1>() = c + 1000 + i + j + k;
1183  }
1184  );
1185 
1186  gdist.template flush<smax_<0>,smax_<1>>(flush_type::FLUSH_ON_DEVICE);
1187 
1188  gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
1189 
1190  // Remove the right side of the points
1191  Box<3,size_t> bxR({59,0,0},{59,59,59});
1192  gdist.removePoints(bxR);
1193 
1194  // Remove the right side of the points
1195  Box<3,size_t> bxT({0,0,59},{59,59,59});
1196  gdist.removePoints(bxT);
1197 
1198  // Remove the right side of the points
1199  Box<3,size_t> bxD({0,59,0},{59,59,59});
1200  gdist.removePoints(bxD);
1201 
1202  gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
1203 
1204  for (int i = 0 ; i < 10 ; i++)
1205  {
1206  gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
1207  }
1208 
1209  // Now run the convolution
1210 
1211  typedef typename GetCpBlockType<decltype(gdist),0,1>::type CpBlockType;
1212 
1213  gdist.template conv2<0,1,2,3,1>({2,2,2},{(int)sz[0]-3,(int)sz[1]-3,(int)sz[2]-3},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j, int k){
1214  u_out = u(i+1,j,k) - u(i-1,j,k) + u(i,j+1,k) - u(i,j-1,k) + u(i,j,k+1) - u(i,j,k-1);
1215  v_out = v(i+1,j,k) - v(i-1,j,k) + v(i,j+1,k) - v(i,j-1,k) + v(i,j,k+1) - v(i,j,k-1);
1216  });
1217 
1218  gdist.deviceToHost<0,1,2,3>();
1219 
1220  // Now we check that ghost is correct
1221 
1222  auto it3 = gdist.getSubDomainIterator({2,2,2},{(int)sz[0]-3,(int)sz[1]-3,(int)sz[2]-3});
1223 
1224  bool match = true;
1225 
1226  while (it3.isNext())
1227  {
1228  auto p = it3.get();
1229 
1230  auto p_xp1 = p.move(0,1);
1231  auto p_xm1 = p.move(0,-1);
1232  auto p_yp1 = p.move(1,1);
1233  auto p_ym1 = p.move(1,-1);
1234  auto p_zp1 = p.move(2,1);
1235  auto p_zm1 = p.move(2,-1);
1236 
1237  float sub1 = gdist.template get<2>(p);
1238  float sub2 = gdist.template get<3>(p);
1239 
1240  if (sub1 != 6.0 || sub2 != 6.0)
1241  {
1242  std::cout << sub1 << " " << sub2 << std::endl;
1243  std::cout << gdist.template get<0>(p_xp1) << " " << gdist.template get<0>(p_xm1) << std::endl;
1244  std::cout << gdist.template get<1>(p_xp1) << " " << gdist.template get<1>(p_xm1) << std::endl;
1245  match = false;
1246  break;
1247  }
1248 
1249  ++it3;
1250  }
1251 
1252  BOOST_REQUIRE_EQUAL(match,true);
1253 
1254  gdist.template deviceToHost<0,1,2,3>();
1255 
1256  auto it4 = gdist.getDomainGhostIterator();
1257  Box<3,long int> bin({0,0,0},{59,59,59});
1258 
1259  match = true;
1260 
1261  while (it4.isNext())
1262  {
1263  auto p = it4.get();
1264 
1265  // We have to check we have no point in the ghost area
1266  auto gkey = it4.getGKey(p);
1267 
1268  if (bin.isInside(gkey.toPoint()) == false)
1269  {match = false;}
1270 
1271  ++it4;
1272  }
1273 
1274  BOOST_REQUIRE_EQUAL(match,true);
1275 }
1276 
1277 BOOST_AUTO_TEST_CASE( sgrid_gpu_test_skip_labelling )
1278 {
1279  size_t sz[3] = {60,60,60};
1280  periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
1281 
1282  Ghost<3,long int> g(1);
1283 
1284  Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
1285 
1286  sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>> gdist(sz,domain,g,bc);
1287 
1288  gdist.template setBackgroundValue<0>(666);
1289  gdist.template setBackgroundValue<1>(666);
1290  gdist.template setBackgroundValue<2>(666);
1291  gdist.template setBackgroundValue<3>(666);
1292 
1294 
1295  Box<3,size_t> box({1,1,1},{sz[0]-1,sz[1]-1,sz[2]-1});
1296 
1298 
1299  float c = 5.0;
1300 
1301  typedef typename GetAddBlockType<decltype(gdist)>::type InsertBlockT;
1302 
1303  gdist.addPoints(box.getKP1(),box.getKP2(),
1304  [] __device__ (int i, int j, int k)
1305  {
1306  return true;
1307  },
1308  [c] __device__ (InsertBlockT & data, int i, int j, int k)
1309  {
1310  data.template get<0>() = c + i + j + k;
1311  data.template get<1>() = c + 1000 + i + j + k;
1312  }
1313  );
1314 
1315  gdist.template flush<smax_<0>,smax_<1>>(flush_type::FLUSH_ON_DEVICE);
1316 
1317  gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
1318 
1319  // Now run the convolution
1320 
1321  typedef typename GetCpBlockType<decltype(gdist),0,1>::type CpBlockType;
1322 
1323  gdist.template conv2<0,1,0,1,1>({0,0,0},{(int)sz[0]-1,(int)sz[1]-1,(int)sz[2]-1},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j, int k){
1324  u_out = 1*u(i,j,k);
1325  v_out = 1*v(i,j,k);
1326  });
1327 
1328  gdist.template ghost_get<0,1>(RUN_ON_DEVICE | SKIP_LABELLING);
1329 
1330  gdist.template conv2<0,1,0,1,1>({0,0,0},{(int)sz[0]-1,(int)sz[1]-1,(int)sz[2]-1},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j, int k){
1331  u_out = 5*u(i,j,k);
1332  v_out = 5*v(i,j,k);
1333  });
1334 
1335  gdist.template ghost_get<0,1>(RUN_ON_DEVICE | SKIP_LABELLING);
1336 
1337  gdist.template conv2<0,1,0,1,1>({0,0,0},{(int)sz[0]-1,(int)sz[1]-1,(int)sz[2]-1},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j, int k){
1338  u_out = 2*u(i,j,k);
1339  v_out = 2*v(i,j,k);
1340  });
1341 
1342  gdist.template ghost_get<0,1>(RUN_ON_DEVICE | SKIP_LABELLING);
1343 
1344  gdist.template conv2<0,1,2,3,1>({2,2,2},{(int)sz[0]-3,(int)sz[1]-3,(int)sz[2]-3},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j, int k){
1345  u_out = u(i+1,j,k) - u(i-1,j,k) + u(i,j+1,k) - u(i,j-1,k) + u(i,j,k+1) - u(i,j,k-1);
1346  v_out = v(i+1,j,k) - v(i-1,j,k) + v(i,j+1,k) - v(i,j-1,k) + v(i,j,k+1) - v(i,j,k-1);
1347  });
1348 
1349 
1350  gdist.deviceToHost<0,1,2,3>();
1351 
1352  // Now we check that ghost is correct
1353 
1354  auto it3 = gdist.getSubDomainIterator({2,2,2},{(int)sz[0]-3,(int)sz[1]-3,(int)sz[2]-3});
1355 
1356  bool match = true;
1357 
1358  while (it3.isNext())
1359  {
1360  auto p = it3.get();
1361 
1362  auto p_xp1 = p.move(0,1);
1363  auto p_xm1 = p.move(0,-1);
1364  auto p_yp1 = p.move(1,1);
1365  auto p_ym1 = p.move(1,-1);
1366  auto p_zp1 = p.move(2,1);
1367  auto p_zm1 = p.move(2,-1);
1368 
1369  float sub1 = gdist.template get<2>(p);
1370  float sub2 = gdist.template get<3>(p);
1371 
1372  if (sub1 != 6.0*10.0 || sub2 != 6.0*10.0)
1373  {
1374  std::cout << sub1 << " " << sub2 << std::endl;
1375  std::cout << gdist.template get<0>(p_xp1) << " " << gdist.template get<0>(p_xm1) << std::endl;
1376  std::cout << gdist.template get<1>(p_xp1) << " " << gdist.template get<1>(p_xm1) << std::endl;
1377  match = false;
1378  break;
1379  }
1380 
1381  ++it3;
1382  }
1383 
1384  BOOST_REQUIRE_EQUAL(match,true);
1385 }
1386 
1387 BOOST_AUTO_TEST_CASE( sgrid_gpu_test_conv_background )
1388 {
1389  size_t sz[3] = {60,60,60};
1390  periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
1391 
1392  Ghost<3,long int> g(1);
1393 
1394  Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
1395 
1396  sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>> gdist(sz,domain,g,bc);
1397 
1398  gdist.template setBackgroundValue<0>(666);
1399  gdist.template setBackgroundValue<1>(666);
1400  gdist.template setBackgroundValue<2>(666);
1401  gdist.template setBackgroundValue<3>(666);
1402 
1404 
1405  Box<3,size_t> box({1,1,1},{sz[0]-1,sz[1]-1,sz[2]-1});
1406 
1408 
1409  float c = 5.0;
1410 
1411  typedef typename GetAddBlockType<decltype(gdist)>::type InsertBlockT;
1412 
1413  gdist.addPoints(box.getKP1(),box.getKP2(),
1414  [] __device__ (int i, int j, int k)
1415  {
1416  return (i == 30 && j == 30 && k == 30);
1417  },
1418  [c] __device__ (InsertBlockT & data, int i, int j, int k)
1419  {
1420  data.template get<0>() = 0;
1421  data.template get<1>() = 0;
1422  }
1423  );
1424 
1425  gdist.template flush<smax_<0>,smax_<1>>(flush_type::FLUSH_ON_DEVICE);
1426 
1427  gdist.template ghost_get<0,1>(RUN_ON_DEVICE);
1428 
1429  // Now run the convolution
1430 
1431  typedef typename GetCpBlockType<decltype(gdist),0,1>::type CpBlockType;
1432 
1433  gdist.template conv2<0,1,2,3,1>({0,0,0},{(int)sz[0]-1,(int)sz[1]-1,(int)sz[2]-1},[] __device__ (float & u_out, float & v_out, CpBlockType & u, CpBlockType & v,int i, int j, int k){
1434  u_out = u(i+1,j,k) + u(i,j+1,k) + u(i,j,k+1);
1435  v_out = v(i+1,j,k) + v(i,j+1,k) + v(i,j,k+1);
1436  });
1437 
1438  gdist.deviceToHost<0,1,2,3>();
1439 
1440  // Now we check that ghost is correct
1441 
1442  auto it3 = gdist.getDomainIterator();
1443 
1444  bool match = true;
1445 
1446  int count = 0;
1447 
1448  while (it3.isNext())
1449  {
1450  auto p = it3.get();
1451 
1452  float sub1 = gdist.template get<2>(p);
1453  float sub2 = gdist.template get<3>(p);
1454 
1455  if (sub1 != 3*666.0 || sub2 != 3*666.0)
1456  {
1457  std::cout << sub1 << " " << sub2 << std::endl;
1458  match = false;
1459  break;
1460  }
1461 
1462  count++;
1463 
1464  ++it3;
1465  }
1466 
1467  BOOST_REQUIRE(count == 0 || count == 1);
1468  BOOST_REQUIRE_EQUAL(match,true);
1469 }
1470 
1471 BOOST_AUTO_TEST_CASE( grid_dense_to_sparse_conversion )
1472 {
1473  Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
1474 
1475  // grid size
1476  size_t sz[3];
1477  sz[0] = 32;
1478  sz[1] = 32;
1479  sz[2] = 32;
1480 
1481  // Ghost
1482  Ghost<3,long int> g(1);
1483 
1484  periodicity<3> pr = {PERIODIC,PERIODIC,PERIODIC};
1485 
1486  // Distributed grid with id decomposition
1487  grid_dist_id<3, float, aggregate<float,float>> g_dist(sz,domain,g,pr);
1488 
1489  auto it = g_dist.getDomainIterator();
1490 
1491  while (it.isNext())
1492  {
1493  auto p = it.get();
1494  auto gkey = it.getGKey(p);
1495 
1496  g_dist.template getProp<0>(p) = gkey.get(0) + gkey.get(1) + gkey.get(2);
1497  g_dist.template getProp<1>(p) = 3.0*gkey.get(0) + gkey.get(1) + gkey.get(2);
1498 
1499  ++it;
1500  }
1501 
1502  sgrid_dist_id_gpu<3,float,aggregate<float,float>> sgdist(g_dist.getDecomposition(),sz,g);
1503 
1504  while (it.isNext())
1505  {
1506  auto p = it.get();
1507  auto gkey = it.getGKey(p);
1508 
1509  sgdist.template insertFlush<0>(p) = g_dist.template get<0>(p);
1510 
1511  ++it;
1512  }
1513 
1514 
1515  bool check = true;
1516 
1517  while (it.isNext())
1518  {
1519  auto p = it.get();
1520  auto gkey = it.getGKey(p);
1521 
1522  check &= sgdist.template getProp<0>(p) == g_dist.template get<0>(p);
1523 
1524  ++it;
1525  }
1526 
1527  BOOST_REQUIRE_EQUAL(check,true);
1528 }
1529 
1530 BOOST_AUTO_TEST_SUITE_END()
This class represent an N-dimensional box.
Definition: Box.hpp:60
virtual void * getDevicePointer()
get a readable pointer with the data
Definition: CudaMemory.cu:503
virtual void deviceToHost()
Move memory from device to host.
Definition: CudaMemory.cu:369
virtual void hostToDevice()
Move memory from host to device.
Definition: CudaMemory.cu:514
virtual void * getPointer()
get a readable pointer with the data
Definition: CudaMemory.cu:354
virtual bool allocate(size_t sz)
allocate memory
Definition: CudaMemory.cu:38
Definition: Ghost.hpp:40
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:28
This is a distributed grid.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
get the type of the insertBlock
get the type of the block
Boundary conditions.
Definition: common.hpp:22