OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
grid_dist_id_comm.hpp
1 /*
2  * grid_dist_id_comm.hpp
3  *
4  * Created on: Nov 13, 2016
5  * Author: yaroslav
6  */
7 
8 #ifndef SRC_GRID_GRID_DIST_ID_COMM_HPP_
9 #define SRC_GRID_GRID_DIST_ID_COMM_HPP_
10 
11 #include "Vector/vector_dist_ofb.hpp"
12 #include "Grid/copy_grid_fast.hpp"
13 #include "grid_dist_util.hpp"
14 #include "util/common_pdata.hpp"
15 #include "lib/pdata.hpp"
16 
17 
22 template<bool result,typename T, typename device_grid, typename Memory>
24 {
33  template<template<typename,typename> class op, typename sub_it_type, int ... prp> static void call_unpack(ExtPreAlloc<Memory> & recv_buf, sub_it_type & sub2, device_grid & gd, Unpack_stat & ps)
34  {
35  std::cerr << __FILE__ << ":" << __LINE__ << " Error: complex properties on grids are not supported yet" << std::endl;
36  }
37 };
38 
43 template<typename T, typename device_grid, typename Memory>
45 {
46 
55  template<template<typename,typename> class op, typename sub_it_type, unsigned int ... prp>
56  static void call_unpack(ExtPreAlloc<Memory> & recv_buf,
57  sub_it_type & sub2,
58  device_grid & gd,
59  Unpack_stat & ps)
60  {
61  gd.template unpack_with_op<op,Memory,prp ...>(recv_buf,sub2,ps);
62  }
63 };
64 
70 template<typename device_grid, typename Memory, typename T>
72 
78 template<typename device_grid, typename Memory , int ... prp>
80 {
81 
90  template<template<typename,typename> class op, typename sub_it_type, typename T>
91  inline static void call_unpack(ExtPreAlloc<Memory> & recv_buf,
92  sub_it_type & sub2,
93  device_grid & dg,
94  Unpack_stat & ps)
95  {
96  const bool result = has_pack_gen<typename T::type>::value == false;
97 
98  grid_unpack_selector_with_prp<result,T,device_grid,Memory>::template call_unpack<op,sub_it_type,prp...>(recv_buf,sub2,dg,ps);
99  }
100 };
101 
107 template<template<typename,typename> class op, typename T, typename device_grid, typename Memory>
109 {
110 
119  template<typename sub_it_type, unsigned int ... prp> static void unpacking(ExtPreAlloc<Memory> & recv_buf, sub_it_type & sub2, device_grid & dg, Unpack_stat & ps)
120  {
121  typedef index_tuple<prp...> ind_prop_to_pack;
122  grid_call_serialize_variadic<device_grid,Memory,ind_prop_to_pack>::template call_unpack<op,sub_it_type,T>(recv_buf, sub2, dg, ps);
123  }
124 };
125 
139 template<unsigned int dim, typename St, typename T, typename Decomposition = CartDecomposition<dim,St>,typename Memory=HeapMemory , typename device_grid=grid_cpu<dim,T> >
141 {
144 
147 
150 
153 
156 
159 
162 
165 
166  struct rp_id
167  {
168  int p_id;
169  int i;
170 
171  bool operator<(const rp_id & tmp) const
172  {
173  return p_id < tmp.p_id;
174  }
175  };
176 
179 
185 
188 
191 
194  openfpm::vector<void *> pointers2;
195 
198  int n_headers_slot = 1;
200 
202  size_t opt;
203 
215  template<int... prp> void ghost_get_local(const openfpm::vector<i_lbox_grid<dim>> & loc_ig_box,
216  const openfpm::vector<e_lbox_grid<dim>> & loc_eg_box,
218  openfpm::vector<device_grid> & loc_grid,
219  std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
220  const grid_sm<dim,void> & ginfo,
221  bool use_bx_def,
222  size_t opt)
223  {
224  rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
225  if (opt & SKIP_LABELLING)
226  {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
227 
228  if (opt_ != rem_copy_opt::KEEP_GEOMETRY)
229  {
230  for (size_t i = 0 ; i < loc_grid.size() ; i++)
231  {loc_grid.get(i).copyRemoveReset();}
232  }
233 
234  grid_key_dx<dim> cnt[1];
235  cnt[0].zero();
236 
238  for (size_t i = 0 ; i < loc_ig_box.size() ; i++)
239  {
241  for (size_t j = 0 ; j < loc_ig_box.get(i).bid.size() ; j++)
242  {
243  size_t sub_id_src_gdb_ext = loc_ig_box.get(i).bid.get(j).sub_gdb_ext;
244 
245  // sub domain connected with external box
246  size_t sub_id_dst = loc_ig_box.get(i).bid.get(j).sub;
247 
248  // local internal ghost box connected
249  for (size_t v = 0 ; v < loc_ig_box.get(i).bid.get(j).k.size() ; v++)
250  {
251  size_t k = loc_ig_box.get(i).bid.get(j).k.get(v);
252 
253  Box<dim,long int> bx_dst = loc_eg_box.get(sub_id_dst).bid.get(k).ebox;
254 
255  // convert into local
256  size_t sub_id_dst_gdb_ext = loc_eg_box.get(sub_id_dst).bid.get(k).sub_gdb_ext;
257  bx_dst -= gdb_ext.get(sub_id_dst_gdb_ext).origin;
258 
259  // create 2 sub grid iterator
260 
261  if (bx_dst.isValid() == false)
262  {continue;}
263 
264  Box<dim,long int> bx_src = flip_box(loc_eg_box.get(sub_id_dst).bid.get(k).ebox,loc_eg_box.get(sub_id_dst).bid.get(k).cmb,ginfo);
265  bx_src -= gdb_ext.get(sub_id_src_gdb_ext).origin;
266 
267  #ifdef SE_CLASS1
268 
269  if (use_bx_def == false)
270  {
271  if (loc_eg_box.get(sub_id_dst).bid.get(k).sub != i)
272  {std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination are not correctly linked" << "\n";}
273  }
274 
275  if (bx_src.getVolumeKey() != bx_dst.getVolumeKey())
276  {std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination does not match in size" << "\n";}
277 
278  #endif
279 
280  auto & gd = loc_grid.get(sub_id_dst_gdb_ext);
281 
282  gd.remove(bx_dst);
283  gd.copy_to(loc_grid.get(sub_id_src_gdb_ext),bx_src,bx_dst);
284  }
285  }
286  }
287 
288  for (size_t i = 0 ; i < loc_grid.size() ; i++)
289  {
290  loc_grid.get(i).template removeCopyToFinalize<prp ...>(v_cl.getmgpuContext(), rem_copy_opt::PHASE1 | opt_);
291  }
292 
293  for (size_t i = 0 ; i < loc_grid.size() ; i++)
294  {
295  loc_grid.get(i).template removeCopyToFinalize<prp ...>(v_cl.getmgpuContext(), rem_copy_opt::PHASE2 | opt_);
296  }
297 
298  for (size_t i = 0 ; i < loc_grid.size() ; i++)
299  {
300  loc_grid.get(i).template removeCopyToFinalize<prp ...>(v_cl.getmgpuContext(), rem_copy_opt::PHASE3 | opt_);
301  }
302  }
303 
315  template<template<typename,typename> class op, int... prp> void ghost_put_local(const openfpm::vector<i_lbox_grid<dim>> & loc_ig_box,
316  const openfpm::vector<e_lbox_grid<dim>> & loc_eg_box,
318  openfpm::vector<device_grid> & loc_grid,
319  openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_external_ghost_box)
320  {
322  for (size_t i = 0 ; i < loc_eg_box.size() ; i++)
323  {
325  for (size_t j = 0 ; j < loc_eg_box.get(i).bid.size() ; j++)
326  {
327  if (loc_eg_box.get(i).bid.get(j).initialized == false)
328  continue;
329 
330  Box<dim,long int> bx_src = loc_eg_box.get(i).bid.get(j).ebox;
331  // convert into local
332  bx_src -= gdb_ext.get(i).origin;
333 
334  // sub domain connected with external box
335  size_t sub_id_dst = loc_eg_box.get(i).bid.get(j).sub;
336 
337  // local external ghost box connected
338  size_t k = loc_eg_box.get(i).bid.get(j).k;
339 
340  Box<dim,long int> bx_dst = loc_ig_box.get(sub_id_dst).bid.get(k).box;
341 
342  // convert into local
343  bx_dst -= gdb_ext.get(sub_id_dst).origin;
344 
345  // create 2 sub grid iterator
346 
347  if (bx_dst.isValid() == false)
348  {continue;}
349 
350 #ifdef SE_CLASS1
351 
352  if (loc_ig_box.get(sub_id_dst).bid.get(k).sub != i)
353  std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination are not correctly linked" << "\n";
354 
355  if (bx_src.getVolume() != bx_dst.getVolume())
356  {std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination does not match in size" << "\n";}
357 
358 #endif
359 
360  auto & gd2 = loc_grid.get(sub_id_dst);
361  gd2.template copy_to_op<op,prp...>(loc_grid.get(i),bx_src,bx_dst);
362 
363  }
364  }
365  }
366 
367  /* Send or queue the the information
368  *
369  * This function send or queue the information to the other processor. In case the
370  * device grid is a compressed format like in multi-resolution the communication is
371  * queued because the other side does not know the size of the communication. If is
372  * not compressed the other side know the size so a direct send is done
373  *
374  */
375  void send_or_queue(size_t prc, char * pointer, char * pointer2)
376  {
377  if (device_grid::isCompressed() == false)
378  {v_cl.send(prc,0,pointer,(char *)pointer2 - (char *)pointer);}
379  else
380  {
381  send_prc_queue.add(prc);
382  send_pointer.add(pointer);
383  send_size.add(pointer2-pointer);
384  }
385  }
386 
387  static void * receive_dynamic(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, size_t tag, void * ptr)
388  {
389  grid_dist_id_comm * gd = static_cast<grid_dist_id_comm *>(ptr);
390 
391  gd->recv_buffers.add();
392 
393  gd->recv_buffers.last().resize(msg_i);
394  gd->recv_proc.add();
395  gd->recv_proc.last().p_id = i;
396  gd->recv_proc.last().i = gd->recv_proc.size()-1;
397 
398  if (gd->opt & RUN_ON_DEVICE)
399  {return gd->recv_buffers.last().getDevicePointer();}
400 
401  return gd->recv_buffers.last().getPointer();
402  }
403 
404  /* Send or queue the the information
405  *
406  * This function send or queue the information to the other processor. In case the
407  * device grid is a compressed format like in multi-resolution the communication is
408  * queued because the other side does not know the size of the communication. If is
409  * not compressed the other side know the size so a direct send is done
410  *
411  */
412  template <typename prp_object>
414  std::vector<size_t> & prp_recv,
415  ExtPreAlloc<Memory> & prRecv_prp)
416  {
417 #ifdef __NVCC__
418  cudaDeviceSynchronize();
419 #endif
420 
421  if (device_grid::isCompressed() == false)
422  {
424  for ( size_t i = 0 ; i < eg_box.size() ; i++ )
425  {
426  prp_recv.push_back(eg_box.get(i).recv_pnt * sizeof(prp_object) + sizeof(size_t)*eg_box.get(i).n_r_box);
427  }
428 
429  size_t tot_recv = ExtPreAlloc<Memory>::calculateMem(prp_recv);
430 
432  g_recv_prp_mem.resize(tot_recv);
433 
434  // queue the receives
435  for ( size_t i = 0 ; i < eg_box.size() ; i++ )
436  {
437  prRecv_prp.allocate(prp_recv[i]);
438  v_cl.recv(eg_box.get(i).prc,0,prRecv_prp.getPointer(),prp_recv[i]);
439  }
440  }
441  else
442  {
443  // It is not possible to calculate the total information so we have to receive
444 
445  if (send_prc_queue.size() == 0)
446  {
448  NULL,NULL,
449  receive_dynamic,this);
450  }
451  else
452  {
454  &send_prc_queue.get(0),&send_pointer.get(0),
455  receive_dynamic,this);
456  }
457 
458  // Reorder what we received
459 
460  recv_proc.sort();
461 
463  tmp.resize(recv_proc.size());
464 
465  for (int i = 0 ; i < recv_proc.size() ; i++)
466  {
467  tmp.get(i).swap(recv_buffers.get(recv_proc.get(i).i));
468  }
469 
470  recv_buffers.swap(tmp);
471  }
472  }
473 
474  /* Send or queue the the information
475  *
476  * This function send or queue the information to the other processor. In case the
477  * device grid is a compressed format like in multi-resolution the communication is
478  * queued because the other side does not know the size of the communication. If is
479  * not compressed the other side know the size so a direct send is done
480  *
481  */
482  template <typename prp_object>
484  std::vector<size_t> & prp_recv,
485  ExtPreAlloc<Memory> & prRecv_prp)
486  {
487  if (device_grid::isCompressed() == false)
488  {
489  // Receive the information from each processors
490  for ( size_t i = 0 ; i < ig_box.size() ; i++ )
491  {
492  prp_recv.push_back(0);
493 
494  // for each external ghost box
495  for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
496  {
497  // External ghost box
498  Box<dim,size_t> g_ig_box = ig_box.get(i).bid.get(j).box;
499  prp_recv[prp_recv.size()-1] += g_ig_box.getVolumeKey() * sizeof(prp_object) + sizeof(size_t);
500  }
501  }
502 
503  size_t tot_recv = ExtPreAlloc<Memory>::calculateMem(prp_recv);
504 
506  g_recv_prp_mem.resize(tot_recv);
507 
508  prRecv_prp.incRef();
509 
510  // queue the receives
511  for ( size_t i = 0 ; i < ig_box.size() ; i++ )
512  {
513  prRecv_prp.allocate(prp_recv[i]);
514  v_cl.recv(ig_box.get(i).prc,0,prRecv_prp.getPointer(),prp_recv[i]);
515  }
516 
517  prRecv_prp.decRef();
518  }
519  else
520  {
521  // It is not possible to calculate the total information so we have to receive
522 
523  if (send_prc_queue.size() == 0)
524  {
526  NULL,NULL,
527  receive_dynamic,this);
528  }
529  else
530  {
532  &send_prc_queue.get(0),&send_pointer.get(0),
533  receive_dynamic,this);
534  }
535  }
536  }
537 
538  template<typename mem,unsigned ... prp>
539  void unpack_data_to_ext_ghost(ExtPreAlloc<mem> & emem,
540  openfpm::vector<device_grid> & loc_grid,
541  size_t i,
542  const openfpm::vector<ep_box_grid<dim>> & eg_box,
543  const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
544  const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
545  Unpack_stat & ps,
546  size_t opt)
547  {
548  // Unpack the ghost box global-id
549 
550  size_t g_id;
551  // we move from device to host the gid
552  if (opt & RUN_ON_DEVICE)
553  {emem.deviceToHost(ps.getOffset(),ps.getOffset()+sizeof(size_t));}
554  Unpacker<size_t,mem>::unpack(emem,g_id,ps);
555 
556  size_t l_id = 0;
557  // convert the global id into local id
558  auto key = g_id_to_external_ghost_box.find(g_id);
559 
560  if (key != g_id_to_external_ghost_box.end()) // FOUND
561  {l_id = key->second;}
562  else
563  {
564  // NOT FOUND
565 
566  // It must be always found, if not it mean that the processor has no-idea of
567  // what is stored and conseguently do not know how to unpack, print a critical error
568  // and return
569 
570  std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n";
571 
572  return;
573  }
574 
575 
576  // we unpack into the last eb_gid_list that is always big enought to
577  // unpack the information
578 
579  size_t le_id = eb_gid_list.get(l_id).full_match;
580  size_t ei = eb_gid_list.get(l_id).e_id;
581 
582  // Get the external ghost box associated with the packed information
583  Box<dim,long int> box = eg_box.get(ei).bid.get(le_id).l_e_box;
584  size_t sub_id = eg_box.get(ei).bid.get(le_id).sub;
585 
586  // sub-grid where to unpack
587  auto sub2 = loc_grid.get(sub_id).getIterator(box.getKP1(),box.getKP2(),false);
588 
589  rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
590  if (opt & SKIP_LABELLING)
591  {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
592 
593  // Unpack
594  loc_grid.get(sub_id).remove(box);
595  Unpacker<device_grid,mem>::template unpack<decltype(sub2),decltype(v_cl.getmgpuContext()),prp...>(emem,sub2,loc_grid.get(sub_id),ps,v_cl.getmgpuContext(),opt_);
596 
597  // Copy the information on the other grid
598  for (long int j = 0 ; j < (long int)eb_gid_list.get(l_id).eb_list.size() ; j++)
599  {
600  size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
601  if (nle_id != le_id)
602  {
603 // size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
604  size_t n_sub_id = eg_box.get(ei).bid.get(nle_id).sub;
605 
606  Box<dim,long int> box = eg_box.get(ei).bid.get(nle_id).l_e_box;
607  Box<dim,long int> rbox = eg_box.get(ei).bid.get(nle_id).lr_e_box;
608 
609  loc_grid.get(n_sub_id).remove(box);
610  loc_grid.get(n_sub_id).copy_to(loc_grid.get(sub_id),rbox,box);
611  }
612  }
613  }
614 
615  template<typename mem, typename header_type,unsigned ... prp>
616  void unpack_data_to_ext_ghost_with_header(ExtPreAlloc<mem> & emem,
617  openfpm::vector<device_grid> & loc_grid,
618  header_type & headers,
619  size_t i,
620  const openfpm::vector<ep_box_grid<dim>> & eg_box,
621  const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
622  const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
623  Unpack_stat & ps,
624  size_t opt)
625  {
626  // Unpack the ghost box global-id
627 
628  size_t g_id;
629  // we move from device to host the gid
630  g_id = headers.template get<0>(i);
631  ps.addOffset(sizeof(size_t));
632 
633  size_t l_id = 0;
634  // convert the global id into local id
635  auto key = g_id_to_external_ghost_box.find(g_id);
636 
637  if (key != g_id_to_external_ghost_box.end()) // FOUND
638  {l_id = key->second;}
639  else
640  {
641  // NOT FOUND
642 
643  // It must be always found, if not it mean that the processor has no-idea of
644  // what is stored and conseguently do not know how to unpack, print a critical error
645  // and return
646 
647  std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n";
648 
649  return;
650  }
651 
652 
653  // we unpack into the last eb_gid_list that is always big enought to
654  // unpack the information
655 
656  size_t le_id = eb_gid_list.get(l_id).full_match;
657  size_t ei = eb_gid_list.get(l_id).e_id;
658 
659  // Get the external ghost box associated with the packed information
660  Box<dim,long int> box = eg_box.get(ei).bid.get(le_id).l_e_box;
661  size_t sub_id = eg_box.get(ei).bid.get(le_id).sub;
662 
663  // sub-grid where to unpack
664  auto sub2 = loc_grid.get(sub_id).getIterator(box.getKP1(),box.getKP2(),false);
665 
666  rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
667  if (opt & SKIP_LABELLING)
668  {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
669 
670  // Unpack
671  loc_grid.get(sub_id).remove(box);
672  Unpacker<device_grid,mem>::template unpack_with_header<decltype(sub2),decltype(headers),decltype(v_cl.getmgpuContext()),prp...>
673  (emem,
674  sub2,
675  loc_grid.get(sub_id),
676  headers,
677  i,
678  ps,
680  opt_);
681 
682  // Copy the information on the other grid
683  for (long int j = 0 ; j < (long int)eb_gid_list.get(l_id).eb_list.size() ; j++)
684  {
685  size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
686  if (nle_id != le_id)
687  {
688 // size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
689  size_t n_sub_id = eg_box.get(ei).bid.get(nle_id).sub;
690 
691  Box<dim,long int> box = eg_box.get(ei).bid.get(nle_id).l_e_box;
692  Box<dim,long int> rbox = eg_box.get(ei).bid.get(nle_id).lr_e_box;
693 
694  loc_grid.get(n_sub_id).remove(box);
695  loc_grid.get(n_sub_id).copy_to(loc_grid.get(sub_id),rbox,box);
696  }
697  }
698  }
699 
700  template<unsigned int ... prp>
701  void fill_headers(size_t opt)
702  {
703  if ((opt & KEEP_PROPERTIES) == 0 && device_grid::is_unpack_header_supported())
704  {
705  headers.resize(n_headers_slot * recv_buffers.size());
706 
707  Memory result;
708  result.allocate(sizeof(int));
709 
710  pointers_h.resize(recv_buffers.size());
711 
712  for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
713  {
714  pointers_h.template get<0>(i) = recv_buffers.get(i).getDevicePointer();
715  pointers_h.template get<1>(i) = (unsigned char *)recv_buffers.get(i).getDevicePointer() + recv_buffers.get(i).size();
716  }
717 
718  pointers_h.template hostToDevice<0,1>();
719 
720  while(1)
721  {
722  for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
723  {pointers_h.template get<2>(i) = 0;}
724  pointers_h.template hostToDevice<2>();
725  *(int *)result.getPointer() = 0;
726  result.hostToDevice();
727 
728  device_grid::template unpack_headers<decltype(pointers_h),decltype(headers),decltype(result),prp ...>(pointers_h,headers,result,n_headers_slot);
729  result.deviceToHost();
730 
731  if (*(int *)result.getPointer() == 0) {break;}
732 
733  n_headers_slot *= 2;
734  headers.resize(n_headers_slot * recv_buffers.size());
735 
736  }
737 
738  headers.template deviceToHost<0,1,2>();
739  }
740  }
741 
742  template<unsigned ... prp>
743  void merge_received_data_get(openfpm::vector<device_grid> & loc_grid,
744  const openfpm::vector<ep_box_grid<dim>> & eg_box,
745  const std::vector<size_t> & prp_recv,
746  ExtPreAlloc<Memory> & prRecv_prp,
747  const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
748  const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
749  size_t opt)
750  {
751  if (device_grid::isCompressed() == false)
752  {
753  // wait to receive communication
754  v_cl.execute();
755 
756  Unpack_stat ps;
757 
758  // Unpack the object
759  for ( size_t i = 0 ; i < eg_box.size() ; i++ )
760  {
761  size_t mark_here = ps.getOffset();
762 
763  // for each external ghost box
764  while (ps.getOffset() - mark_here < prp_recv[i])
765  {
766  // Unpack the ghost box global-id
767 
768 
769  unpack_data_to_ext_ghost<Memory,prp ...>(prRecv_prp,loc_grid,i,
770  eg_box,g_id_to_external_ghost_box,eb_gid_list,
771  ps,opt);
772  }
773  }
774  }
775  else
776  {
777  fill_headers<prp ...>(opt);
778 
779  if (headers.size() != 0)
780  {
781  // Unpack the object
782  for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
783  {
784  Unpack_stat ps;
785  size_t mark_here = ps.getOffset();
786 
788 
789  int j = 0;
790 
791  // for each external ghost box
792  while (ps.getOffset() - mark_here < recv_buffers.get(i).size())
793  {
794  // Unpack the ghost box global-id
795 
796  unpack_data_to_ext_ghost_with_header<BMemory<Memory>,decltype(headers),prp ...>(mem,loc_grid,headers,i*n_headers_slot+j,
797  eg_box,g_id_to_external_ghost_box,eb_gid_list,
798  ps,opt);
799 
800  j++;
801  }
802  }
803  }
804  else
805  {
806  // Unpack the object
807  for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
808  {
809  Unpack_stat ps;
810  size_t mark_here = ps.getOffset();
811 
813 
814  // for each external ghost box
815  while (ps.getOffset() - mark_here < recv_buffers.get(i).size())
816  {
817  // Unpack the ghost box global-id
818 
819  unpack_data_to_ext_ghost<BMemory<Memory>,prp ...>(mem,loc_grid,i,
820  eg_box,g_id_to_external_ghost_box,eb_gid_list,
821  ps,opt);
822  }
823  }
824  }
825  }
826  }
827 
828 
829  template<template<typename,typename> class op, unsigned ... prp>
830  void merge_received_data_put(Decomposition & dec, openfpm::vector<device_grid> & loc_grid,
831  const openfpm::vector<ip_box_grid<dim>> & ig_box,
832  const std::vector<size_t> & prp_recv,
833  ExtPreAlloc<Memory> & prRecv_prp,
835  const openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_internal_ghost_box)
836  {
837  typedef object<typename object_creator<typename T::type,prp...>::type> prp_object;
838 
839  if (device_grid::isCompressed() == false)
840  {
841  v_cl.execute();
842 
843  Unpack_stat ps;
844 
845  // Unpack the object
846  for ( size_t i = 0 ; i < ig_box.size() ; i++ )
847  {
848  // for each external ghost box
849  for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
850  {
851  // Unpack the ghost box global-id
852 
853  size_t g_id;
854  Unpacker<size_t,HeapMemory>::unpack(prRecv_prp,g_id,ps);
855 
856  size_t l_id = 0;
857  // convert the global id into local id
858  auto key = g_id_to_internal_ghost_box.get(i).find(g_id);
859  if (key != g_id_to_internal_ghost_box.get(i).end()) // FOUND
860  {l_id = key->second;}
861  else
862  {
863  // NOT FOUND
864 
865  // It must be always found, if not it mean that the processor has no-idea of
866  // what is stored and conseguently do not know how to unpack, print a critical error
867  // and return
868 
869  std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n";
870 
871  return;
872  }
873 
874  // Get the internal ghost box associated with the packed information
875  Box<dim,size_t> box = ig_box.get(i).bid.get(l_id).box;
876  size_t sub_id = ig_box.get(i).bid.get(l_id).sub;
877  box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
878 
879  // sub-grid where to unpack
880  auto sub2 = loc_grid.get(sub_id).getIterator(box.getKP1(),box.getKP2());
881  grid_unpack_with_prp<op,prp_object,device_grid,Memory>::template unpacking<decltype(sub2),prp...>(prRecv_prp,sub2,loc_grid.get(sub_id),ps);
882  }
883  }
884  }
885  else
886  {
887  // Unpack the object
888  for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
889  {
890  Unpack_stat ps;
891  size_t mark_here = ps.getOffset();
892 
894 
895  // for each external ghost box
896  while (ps.getOffset() - mark_here < recv_buffers.get(i).size())
897  {
898  // Unpack the ghost box global-id
899 
900  // Unpack the ghost box global-id
901 
902  size_t g_id;
903  Unpacker<size_t,BMemory<HeapMemory>>::unpack(mem,g_id,ps);
904 
905  size_t pid = dec.ProctoID(recv_proc.get(i).p_id);
906 
907  size_t l_id = 0;
908  // convert the global id into local id
909  auto key = g_id_to_internal_ghost_box.get(pid).find(g_id);
910  if (key != g_id_to_internal_ghost_box.get(pid).end()) // FOUND
911  {l_id = key->second;}
912  else
913  {
914  // NOT FOUND
915 
916  // It must be always found, if not it mean that the processor has no-idea of
917  // what is stored and conseguently do not know how to unpack, print a critical error
918  // and return
919 
920  std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n";
921 
922  return;
923  }
924 
925  // Get the internal ghost box associated with the packed information
926  Box<dim,size_t> box = ig_box.get(pid).bid.get(l_id).box;
927  size_t sub_id = ig_box.get(pid).bid.get(l_id).sub;
928  box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
929 
930  // sub-grid where to unpack
931  auto sub2 = loc_grid.get(sub_id).getIterator(box.getKP1(),box.getKP2());
932  grid_unpack_with_prp<op,prp_object,device_grid,BMemory<HeapMemory>>::template unpacking<decltype(sub2),prp...>(mem,sub2,loc_grid.get(sub_id),ps);
933  }
934  }
935  }
936  }
937 
938 public:
939 
949  openfpm::vector<device_grid> & loc_grid,
951  CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm)
952  {
953  // Clear the information of the grid
954  for (size_t i = 0 ; i < loc_grid.size() ; i++)
955  {loc_grid.get(i).clear();}
956 
957  for (size_t a = 0; a < m_oGrid_recv.size(); a++)
958  {
959  for (size_t k = 0; k < m_oGrid_recv.get(a).size(); k++)
960  {
961  device_grid & g = m_oGrid_recv.get(a).template get<0>(k);
962 
963  SpaceBox<dim,long int> b = m_oGrid_recv.get(a).template get<1>(k);
964 
965  Point<dim,St> p;
966  for (size_t n = 0; n < dim; n++)
967  {p.get(n) = g.getGrid().getBox().getHigh(n);}
968 
969  Point<dim,St> point;
970  for (size_t n = 0; n < dim; n++)
971  {point.get(n) = (b.getHigh(n) + b.getLow(n))/2;}
972 
973  for (size_t j = 0; j < gdb_ext.size(); j++)
974  {
975  // Local sub-domain
976  SpaceBox<dim,long int> sub = gdb_ext.get(j).Dbox;
977  sub += gdb_ext.get(j).origin;
978 
979  if (sub.isInside(point) == true)
980  {
981 
982 
983  grid_key_dx<dim> start = b.getKP1() - grid_key_dx<dim>(gdb_ext.get(j).origin.asArray());
984  grid_key_dx<dim> stop = b.getKP2() - grid_key_dx<dim>(gdb_ext.get(j).origin.asArray());
985 
986  Box<dim,size_t> box_src;
987  Box<dim,size_t> box_dst;
988 
989  for(size_t i = 0 ; i < dim ; i++)
990  {
991  box_dst.setLow(i,start.get(i));
992  box_dst.setHigh(i,stop.get(i));
993  box_src.setLow(i,0);
994  box_src.setHigh(i,stop.get(i)-start.get(i));
995  }
996 
997  loc_grid.get(j).copy_to(g,box_src,box_dst);
998  }
999  }
1000  }
1001  }
1002  }
1003 
1017  CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm,
1018  openfpm::vector<device_grid> & loc_grid_old,
1021  openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_global,
1023  openfpm::vector<size_t> & prc_sz)
1024  {
1025  lbl_b.clear();
1026 
1027  // resize the label buffer
1028  lbl_b.resize(v_cl.getProcessingUnits());
1029 
1030  // Label all the intersection grids with the processor id where they should go
1031 
1032  for (size_t i = 0; i < gdb_ext_old.size(); i++)
1033  {
1034  // Local old sub-domain in global coordinates
1035  SpaceBox<dim,long int> sub_dom = gdb_ext_old.get(i).Dbox;
1036  sub_dom += gdb_ext_old.get(i).origin;
1037 
1038  for (size_t j = 0; j < gdb_ext_global.size(); j++)
1039  {
1040  size_t p_id = 0;
1041 
1042  // Intersection box
1043  SpaceBox<dim,long int> inte_box;
1044 
1045  // Global new sub-domain in global coordinates
1046  SpaceBox<dim,long int> sub_dom_new = gdb_ext_global.get(j).Dbox;
1047  sub_dom_new += gdb_ext_global.get(j).origin;
1048 
1049  bool intersect = false;
1050 
1051  if (sub_dom.isValid() == true && sub_dom_new.isValid() == true)
1052  intersect = sub_dom.Intersect(sub_dom_new, inte_box);
1053 
1054  if (intersect == true)
1055  {
1056  auto inte_box_cont = cd_sm.convertCellUnitsIntoDomainSpace(inte_box);
1057 
1058  // Get processor ID that store intersection box
1059  Point<dim,St> p;
1060  for (size_t n = 0; n < dim; n++)
1061  p.get(n) = (inte_box_cont.getHigh(n) + inte_box_cont.getLow(n))/2;
1062 
1063  p_id = dec.processorID(p);
1064  prc_sz.get(p_id)++;
1065 
1066  // Transform coordinates to local
1067  auto inte_box_local = inte_box;
1068 
1069  inte_box_local -= gdb_ext_old.get(i).origin;
1070 
1071  // Grid corresponding for gdb_ext_old.get(i) box
1072  device_grid & gr = loc_grid_old.get(i);
1073 
1074  // Size of the grid to send
1075  size_t sz[dim];
1076  for (size_t l = 0; l < dim; l++)
1077  {
1078  sz[l] = inte_box_local.getHigh(l) - inte_box_local.getLow(l) + 1;
1079  //std::cout << "GR_send size on " << l << " dimension: " << sz[l] << std::endl;
1080  }
1081 
1082  // Grid to send
1083  device_grid gr_send(sz);
1084  gr_send.setMemory();
1085 
1086  // Sub iterator across intersection box inside local grid
1087  grid_key_dx<dim> start = inte_box_local.getKP1();
1088  grid_key_dx<dim> stop = inte_box_local.getKP2();
1089 
1090  Box<dim,long int> box_src;
1091  Box<dim,long int> box_dst;
1092 
1093  for(size_t i = 0 ; i < dim ; i++)
1094  {
1095  box_src.setLow(i,start.get(i));
1096  box_src.setHigh(i,stop.get(i));
1097  box_dst.setLow(i,0);
1098  box_dst.setHigh(i,stop.get(i)-start.get(i));
1099  }
1100 
1101  gr_send.copy_to(gr,box_src,box_dst);
1102 
1104 
1105  aggr.template get<0>() = gr_send;
1106  aggr.template get<1>() = inte_box;
1107 
1108  // Add to the labeling vector
1109  lbl_b.get(p_id).add(aggr);
1110  }
1111  }
1112  }
1113  }
1114 
1128  void map_(Decomposition & dec,
1129  CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm,
1130  openfpm::vector<device_grid> & loc_grid,
1131  openfpm::vector<device_grid> & loc_grid_old,
1134  openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_global)
1135  {
1136  // Processor communication size
1138 
1139  // Contains the processor id of each box (basically where they have to go)
1140  labelIntersectionGridsProcessor(dec,cd_sm,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global,m_oGrid,prc_sz);
1141 
1142  // Calculate the sending buffer size for each processor, put this information in
1143  // a contiguous buffer
1144  p_map_req.resize(v_cl.getProcessingUnits());
1145 
1146  // Vector of number of sending grids for each involved processor
1147  openfpm::vector<size_t> prc_sz_r;
1148  // Vector of ranks of involved processors
1150 
1151  for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
1152  {
1153  if (m_oGrid.get(i).size() != 0)
1154  {
1155  p_map_req.get(i) = prc_r.size();
1156  prc_r.add(i);
1157  prc_sz_r.add(m_oGrid.get(i).size());
1158  }
1159  }
1160 
1161  decltype(m_oGrid) m_oGrid_new;
1162  for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
1163  {
1164  if (m_oGrid.get(i).size() != 0)
1165  {m_oGrid_new.add(m_oGrid.get(i));}
1166  }
1167 
1168  // Vector for receiving of intersection grids
1170 
1171  // Send and recieve intersection grids
1172  v_cl.SSendRecv(m_oGrid_new,m_oGrid_recv,prc_r,prc_recv_map,recv_sz_map);
1173 
1174  // Reconstruct the new local grids
1175  grids_reconstruct(m_oGrid_recv,loc_grid,gdb_ext,cd_sm);
1176  }
1177 
1189  template<int... prp> void ghost_get_(const openfpm::vector<ip_box_grid<dim>> & ig_box,
1190  const openfpm::vector<ep_box_grid<dim>> & eg_box,
1191  const openfpm::vector<i_lbox_grid<dim>> & loc_ig_box,
1192  const openfpm::vector<e_lbox_grid<dim>> & loc_eg_box,
1193  const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext,
1194  const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
1195  bool use_bx_def,
1196  openfpm::vector<device_grid> & loc_grid,
1197  const grid_sm<dim,void> & ginfo,
1198  std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
1199  size_t opt)
1200  {
1201 #ifdef PROFILE_SCOREP
1202  SCOREP_USER_REGION("ghost_get",SCOREP_USER_REGION_TYPE_FUNCTION)
1203 #endif
1204 
1205  // Sending property object
1206  typedef object<typename object_creator<typename T::type,prp...>::type> prp_object;
1207 
1208  recv_buffers.clear();
1209  recv_proc.clear();
1210  send_prc_queue.clear();
1211  send_pointer.clear();
1212  send_size.clear();
1213 
1214  this->opt = opt;
1215 
1216  size_t req = 0;
1217 
1218  // Pack information
1219  Pack_stat sts;
1220 
1221  // We check if skip labelling is possible in this condition
1222  for (int i = 0 ; i < loc_grid.size() ; i++)
1223  {opt &= (loc_grid.get(i).isSkipLabellingPossible())?(int)-1:~SKIP_LABELLING;}
1224 
1225  #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1226  timer packing_time;
1227  packing_time.start();
1228  #endif
1229 
1230  if (!(opt & SKIP_LABELLING))
1231  {
1232  // first we initialize the pack buffer on all internal grids
1233 
1234  for (size_t i = 0 ; i < loc_grid.size() ; i++)
1235  {loc_grid.get(i).packReset();}
1236 
1237  // Calculating the size to pack all the data to send
1238  for ( size_t i = 0 ; i < ig_box.size() ; i++ )
1239  {
1240  // for each ghost box
1241  for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
1242  {
1243  // And linked sub-domain
1244  size_t sub_id = ig_box.get(i).bid.get(j).sub;
1245  // Internal ghost box
1246  Box<dim,long int> g_ig_box = ig_box.get(i).bid.get(j).box;
1247 
1248  if (g_ig_box.isValid() == false)
1249  {continue;}
1250 
1251  g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1252 
1253  // Pack a size_t for the internal ghost id
1255  // Create a sub grid iterator spanning the internal ghost layer
1256  auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.getKP1(),g_ig_box.getKP2(),false);
1257 
1258  // get the size to pack
1259  Packer<device_grid,Memory>::template packRequest<decltype(sub_it),prp...>(loc_grid.get(sub_id),sub_it,req);
1260  }
1261  }
1262 
1263  // Finalize calculation
1264  for (size_t i = 0 ; i < loc_grid.size() ; i++)
1265  {loc_grid.get(i).template packCalculate<prp ...>(req,v_cl.getmgpuContext());}
1266 
1267  // resize the property buffer memory
1268  g_send_prp_mem.resize(req);
1269 
1270  // Create an object of preallocated memory for properties
1271  ExtPreAlloc<Memory> & prAlloc_prp = *(new ExtPreAlloc<Memory>(req,g_send_prp_mem));
1272  // Necessary. We do not want this memory to be destroyed untill is going out of scope
1273  // P.S. Packer shaoe this memory with data-structures and data structures if they see the
1274  // reference counter to zero they destriy this memory
1275  prAlloc_prp.incRef();
1276 
1277  pointers.clear();
1278  pointers2.clear();
1279 
1280  // Pack the information for each processor and send it
1281  for ( size_t i = 0 ; i < ig_box.size() ; i++ )
1282  {
1283 
1284  sts.mark();
1285 
1286  void * pointer;
1287 
1288  if (opt & RUN_ON_DEVICE)
1289  {pointer = prAlloc_prp.getDevicePointerEnd();}
1290  else
1291  {pointer = prAlloc_prp.getPointerEnd();}
1292 
1293  // for each ghost box
1294  for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
1295  {
1296  // we pack only if it is valid
1297  if (ig_box.get(i).bid.get(j).box.isValid() == false)
1298  continue;
1299 
1300  // And linked sub-domain
1301  size_t sub_id = ig_box.get(i).bid.get(j).sub;
1302  // Internal ghost box
1303  Box<dim,size_t> g_ig_box = ig_box.get(i).bid.get(j).box;
1304  g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1305  // Ghost box global id
1306  size_t g_id = ig_box.get(i).bid.get(j).g_id;
1307 
1308  // Pack a size_t for the internal ghost id
1309  Packer<size_t,Memory>::pack(prAlloc_prp,g_id,sts);
1310  prAlloc_prp.hostToDevice(prAlloc_prp.getOffset(),prAlloc_prp.getOffsetEnd());
1311  // Create a sub grid iterator spanning the internal ghost layer
1312  auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.getKP1(),g_ig_box.getKP2(),false);
1313  // and pack the internal ghost grid
1314  Packer<device_grid,Memory>::template pack<decltype(sub_it),prp...>(prAlloc_prp,loc_grid.get(sub_id),sub_it,sts);
1315  }
1316  // send the request
1317 
1318  void * pointer2;
1319 
1320  if (opt & RUN_ON_DEVICE)
1321  {pointer2 = prAlloc_prp.getDevicePointerEnd();}
1322  else
1323  {pointer2 = prAlloc_prp.getPointerEnd();}
1324 
1325  pointers.add(pointer);
1326  pointers2.add(pointer2);
1327  }
1328 
1329  for (size_t i = 0 ; i < loc_grid.size() ; i++)
1330  {
1331  rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1332  if (opt & SKIP_LABELLING)
1333  {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1334 
1335  loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts,opt_,true);
1336  }
1337 
1338  prAlloc_prp.decRef();
1339  delete &prAlloc_prp;
1340  }
1341  else
1342  {
1343  req = g_send_prp_mem.size();
1344 
1345  // Create an object of preallocated memory for properties
1346  ExtPreAlloc<Memory> & prAlloc_prp = *(new ExtPreAlloc<Memory>(req,g_send_prp_mem));
1347  prAlloc_prp.incRef();
1348 
1349  for (size_t i = 0 ; i < loc_grid.size() ; i++)
1350  {
1351  rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1352  if (opt & SKIP_LABELLING)
1353  {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1354 
1355  loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts,opt_,true);
1356  }
1357 
1358  prAlloc_prp.decRef();
1359  delete &prAlloc_prp;
1360  }
1361 
1362  #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1363  packing_time.stop();
1364  tot_pack += packing_time.getwct();
1365  timer sendrecv_time;
1366  sendrecv_time.start();
1367  #endif
1368 
1369  for ( size_t i = 0 ; i < ig_box.size() ; i++ )
1370  {
1371  // This function send (or queue for sending) the information
1372  send_or_queue(ig_box.get(i).prc,(char *)pointers.get(i),(char *)pointers2.get(i));
1373  }
1374 
1375  // Calculate the total information to receive from each processors
1376  std::vector<size_t> prp_recv;
1377 
1378  // Create an object of preallocated memory for properties
1380  prRecv_prp.incRef();
1381 
1382  // Before wait for the communication to complete we sync the local ghost
1383  // in order to overlap with communication
1384 
1385  queue_recv_data_get<prp_object>(eg_box,prp_recv,prRecv_prp);
1386 
1387  #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1388  sendrecv_time.stop();
1389  tot_sendrecv += sendrecv_time.getwct();
1390  timer merge_loc_time;
1391  merge_loc_time.start();
1392  #endif
1393 
1394  ghost_get_local<prp...>(loc_ig_box,loc_eg_box,gdb_ext,loc_grid,g_id_to_external_ghost_box,ginfo,use_bx_def,opt);
1395 
1396  #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1397  merge_loc_time.stop();
1398  tot_loc_merge += merge_loc_time.getwct();
1399  timer merge_time;
1400  merge_time.start();
1401  #endif
1402 
1403  for (size_t i = 0 ; i < loc_grid.size() ; i++)
1404  {loc_grid.get(i).removeAddUnpackReset();}
1405 
1406  merge_received_data_get<prp ...>(loc_grid,eg_box,prp_recv,prRecv_prp,g_id_to_external_ghost_box,eb_gid_list,opt);
1407 
1408  rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1409  if (opt & SKIP_LABELLING)
1410  {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1411 
1412  for (size_t i = 0 ; i < loc_grid.size() ; i++)
1413  {loc_grid.get(i).template removeAddUnpackFinalize<prp ...>(v_cl.getmgpuContext(),opt_);}
1414 
1415  #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1416  merge_time.stop();
1417  tot_merge += merge_time.getwct();
1418  #endif
1419 
1420  prRecv_prp.decRef();
1421  delete &prRecv_prp;
1422  }
1423 
1438  template<template<typename,typename> class op,int... prp>
1440  const openfpm::vector<ip_box_grid<dim>> & ig_box,
1441  const openfpm::vector<ep_box_grid<dim>> & eg_box,
1442  const openfpm::vector<i_lbox_grid<dim>> & loc_ig_box,
1443  const openfpm::vector<e_lbox_grid<dim>> & loc_eg_box,
1444  const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext,
1445  openfpm::vector<device_grid> & loc_grid,
1446  openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_internal_ghost_box)
1447  {
1448  // Sending property object
1449  typedef object<typename object_creator<typename T::type,prp...>::type> prp_object;
1450 
1451  recv_buffers.clear();
1452  recv_proc.clear();
1453  send_prc_queue.clear();
1454  send_pointer.clear();
1455  send_size.clear();
1456 
1457  size_t req = 0;
1458 
1459  // Create a packing request vector
1460  for ( size_t i = 0 ; i < eg_box.size() ; i++ )
1461  {
1462  // for each ghost box
1463  for (size_t j = 0 ; j < eg_box.get(i).bid.size() ; j++)
1464  {
1465  // And linked sub-domain
1466  size_t sub_id = eg_box.get(i).bid.get(j).sub;
1467  // Internal ghost box
1468  Box<dim,long int> g_eg_box = eg_box.get(i).bid.get(j).g_e_box;
1469 
1470  if (g_eg_box.isValid() == false)
1471  continue;
1472 
1473  g_eg_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1474 
1475  // Pack a size_t for the internal ghost id
1477 
1478  // Create a sub grid iterator spanning the external ghost layer
1479  auto sub_it = loc_grid.get(sub_id).getIterator(g_eg_box.getKP1(),g_eg_box.getKP2());
1480 
1481  // and pack the internal ghost grid
1482  Packer<device_grid,HeapMemory>::template packRequest<decltype(sub_it),prp...>(loc_grid.get(sub_id),sub_it,req);
1483  }
1484  }
1485 
1486  // resize the property buffer memory
1487  g_send_prp_mem.resize(req);
1488 
1489  // Create an object of preallocated memory for properties
1490  ExtPreAlloc<Memory> & prAlloc_prp = *(new ExtPreAlloc<Memory>(req,g_send_prp_mem));
1491 
1492  prAlloc_prp.incRef();
1493 
1494  // Pack information
1495  Pack_stat sts;
1496 
1497  // Pack the information for each processor and send it
1498  for ( size_t i = 0 ; i < eg_box.size() ; i++ )
1499  {
1500 
1501  sts.mark();
1502  void * pointer = prAlloc_prp.getPointerEnd();
1503 
1504  // for each ghost box
1505  for (size_t j = 0 ; j < eg_box.get(i).bid.size() ; j++)
1506  {
1507  // we pack only if it is valid
1508  if (eg_box.get(i).bid.get(j).g_e_box.isValid() == false)
1509  continue;
1510 
1511  // And linked sub-domain
1512  size_t sub_id = eg_box.get(i).bid.get(j).sub;
1513  // Internal ghost box
1514  Box<dim,size_t> g_eg_box = eg_box.get(i).bid.get(j).g_e_box;
1515  g_eg_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1516  // Ghost box global id
1517  size_t g_id = eg_box.get(i).bid.get(j).g_id;
1518 
1519  // Pack a size_t for the internal ghost id
1520  Packer<size_t,HeapMemory>::pack(prAlloc_prp,g_id,sts);
1521  // Create a sub grid iterator spanning the external ghost layer
1522  auto sub_it = loc_grid.get(sub_id).getIterator(g_eg_box.getKP1(),g_eg_box.getKP2());
1523  // and pack the internal ghost grid
1524  Packer<device_grid,HeapMemory>::template pack<decltype(sub_it),prp...>(prAlloc_prp,loc_grid.get(sub_id),sub_it,sts);
1525  }
1526  // send the request
1527 
1528  void * pointer2 = prAlloc_prp.getPointerEnd();
1529 
1530  // This function send (or queue for sending) the information
1531  send_or_queue(ig_box.get(i).prc,(char *)pointer,(char *)pointer2);
1532  }
1533 
1534  // Calculate the total information to receive from each processors
1535  std::vector<size_t> prp_recv;
1536 
1537  // Create an object of preallocated memory for properties
1538  ExtPreAlloc<Memory> & prRecv_prp = *(new ExtPreAlloc<Memory>(0,g_recv_prp_mem));
1539  prRecv_prp.incRef();
1540 
1541  queue_recv_data_put<prp_object>(ig_box,prp_recv,prRecv_prp);
1542 
1543  // Before wait for the communication to complete we sync the local ghost
1544  // in order to overlap with communication
1545 
1546  ghost_put_local<op,prp...>(loc_ig_box,loc_eg_box,gdb_ext,loc_grid,g_id_to_internal_ghost_box);
1547 
1548  merge_received_data_put<op,prp ...>(dec,loc_grid,ig_box,prp_recv,prRecv_prp,gdb_ext,g_id_to_internal_ghost_box);
1549 
1550  prRecv_prp.decRef();
1551  prAlloc_prp.decRef();
1552  delete &prAlloc_prp;
1553  delete &prRecv_prp;
1554  }
1555 
1561  :v_cl(create_vcluster<Memory>())
1562  {
1563 
1564  }
1565 
1573  :v_cl(gc.v_cl)
1574  {
1575 
1576  }
1577 };
1578 
1579 
1580 #endif /* SRC_GRID_GRID_DIST_ID_COMM_HPP_ */
size_t getOffset()
Return the actual counter.
Definition: Pack_stat.hpp:41
This class represent an N-dimensional box.
Definition: SpaceBox.hpp:26
static void call_unpack(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &gd, Unpack_stat &ps)
Error i do not know how to unpack.
void clear()
Eliminate all elements.
Definition: map_vector.hpp:972
void mark()
Mark.
Definition: Pack_stat.hpp:99
void queue_recv_data_put(const openfpm::vector< ip_box_grid< dim >> &ig_box, std::vector< size_t > &prp_recv, ExtPreAlloc< Memory > &prRecv_prp)
virtual size_t size() const
Get the size of the LAST allocated memory.
Vcluster< Memory > & v_cl
VCluster.
openfpm::vector< size_t > send_prc_queue
List of processor to send to.
openfpm::vector< size_t > recv_sz_map
Stores the size of the elements added for each processor that communicate with us (local processor)
Unpacker class.
Definition: Packer_util.hpp:20
void sendrecvMultipleMessagesNBX(openfpm::vector< size_t > &prc, openfpm::vector< T > &data, openfpm::vector< size_t > &prc_recv, openfpm::vector< size_t > &recv_sz, void *(*msg_alloc)(size_t, size_t, size_t, size_t, size_t, size_t, void *), void *ptr_arg, long int opt=NONE)
Send and receive multiple messages.
This class is an helper for the communication of grid_dist_id.
virtual void deviceToHost()
Do nothing.
size_t size()
return the size of the vector
Definition: map_vector.hpp:935
local Internal ghost box
void ghost_put_(Decomposition &dec, const openfpm::vector< ip_box_grid< dim >> &ig_box, const openfpm::vector< ep_box_grid< dim >> &eg_box, const openfpm::vector< i_lbox_grid< dim >> &loc_ig_box, const openfpm::vector< e_lbox_grid< dim >> &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, openfpm::vector< device_grid > &loc_grid, openfpm::vector< std::unordered_map< size_t, size_t >> &g_id_to_internal_ghost_box)
It merge the information in the ghost with the real information.
Memory g_send_prp_mem
Memory for the ghost sending buffer.
Memory g_recv_prp_mem
Memory for the ghost receiving buffer.
openfpm::vector_fr< BMemory< Memory > > recv_buffers
receiving buffers in case of dynamic
size_t opt
Receiving option.
virtual void * getPointer()
Return the pointer of the last allocation.
size_t getOffsetEnd()
Get offset.
For each external ghost id, it contain a set of sub-domain at which this external box is linked.
Per-processor Internal ghost box.
grid_dist_id_comm(const grid_dist_id_comm< dim, St, T, Decomposition, Memory, device_grid > &gc)
Copy constructor.
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition: grid_key.hpp:503
void labelIntersectionGridsProcessor(Decomposition &dec, CellDecomposer_sm< dim, St, shift< dim, St >> &cd_sm, openfpm::vector< device_grid > &loc_grid_old, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext_old, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext_global, openfpm::vector< openfpm::vector< aggregate< device_grid, SpaceBox< dim, long int >>>> &lbl_b, openfpm::vector< size_t > &prc_sz)
Label intersection grids for mappings.
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:27
void grids_reconstruct(openfpm::vector< openfpm::vector< aggregate< device_grid, SpaceBox< dim, long int >>>> &m_oGrid_recv, openfpm::vector< device_grid > &loc_grid, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, CellDecomposer_sm< dim, St, shift< dim, St >> &cd_sm)
Reconstruct the local grids.
__host__ __device__ bool isInside(const Point< dim, T > &p) const
Check if the point is inside the box.
Definition: Box.hpp:1004
size_t size()
Stub size.
Definition: map_vector.hpp:211
T getVolumeKey() const
Get the volume spanned by the Box P1 and P2 interpreted as grid key.
Definition: Box.hpp:1351
openfpm::vector_gpu< aggregate< void *, void *, int > > pointers_h
header unpacker info
bool send(size_t proc, size_t tag, const void *mem, size_t sz)
Send data to a processor.
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
virtual bool allocate(size_t sz)
Allocate a chunk of memory.
T & last()
Get an element of the vector.
void resize(size_t sz)
resize the vector retaining the objects
Definition: map_vector.hpp:955
__device__ __host__ void setHigh(int i, T val)
set the high interval of the box
Definition: Box.hpp:544
This structure store the Box that define the domain inside the Ghost + domain box.
Definition: GBoxes.hpp:39
openfpm::vector< rp_id > recv_proc
receiving processors
void execute()
Execute all the requests.
This class define the domain decomposition interface.
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:920
void * getDevicePointerEnd()
Return the device end pointer of the previous allocated memory.
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
Definition: Box.hpp:669
virtual void hostToDevice()
Return the pointer of the last allocation.
void swap(openfpm::vector_fr< T > &v)
static void unpacking(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &dg, Unpack_stat &ps)
Unpack.
static size_t packRequest(const T &obj, size_t &req)
Error, no implementation.
Definition: Packer.hpp:66
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
static void call_unpack(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &gd, Unpack_stat &ps)
Unpack.
size_t getOffset()
Get offset.
__device__ __host__ void setLow(int i, T val)
set the low interval of the box
Definition: Box.hpp:533
Packing class.
Definition: Packer.hpp:49
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
static void call_unpack(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &dg, Unpack_stat &ps)
Unpack.
void start()
Start the timer.
Definition: timer.hpp:90
bool recv(size_t proc, size_t tag, void *v, size_t sz)
Recv data from a processor.
size_t getProcessingUnits()
Get the total number of processors.
void addOffset(size_t off)
Increment the offset pointer by off.
Definition: Pack_stat.hpp:31
virtual void incRef()
Increment the reference counter.
Definition: ExtPreAlloc.hpp:98
Unpacking status object.
Definition: Pack_stat.hpp:15
These set of classes generate an array definition at compile-time.
Definition: ct_array.hpp:26
void ghost_get_local(const openfpm::vector< i_lbox_grid< dim >> &loc_ig_box, const openfpm::vector< e_lbox_grid< dim >> &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, openfpm::vector< device_grid > &loc_grid, std::unordered_map< size_t, size_t > &g_id_to_external_ghost_box, const grid_sm< dim, void > &ginfo, bool use_bx_def, size_t opt)
Sync the local ghost part.
mgpu::ofp_context_t & getmgpuContext(bool iw=true)
If nvidia cuda is activated return a mgpu context.
static void unpack(ExtPreAlloc< Mem >, T &obj)
Error, no implementation.
Definition: Unpacker.hpp:40
bool isValid() const
Check if the Box is a valid box P2 >= P1.
Definition: Box.hpp:1180
openfpm::vector< openfpm::vector< aggregate< device_grid, SpaceBox< dim, long int > > > > m_oGrid
openfpm::vector< size_t > prc_recv_map
Stores the list of processors that communicate with us (local processor)
openfpm::vector< size_t > p_map_req
Maps the processor id with the communication request into map procedure.
Per-processor external ghost box.
It return true if the object T require complex serialization.
void map_(Decomposition &dec, CellDecomposer_sm< dim, St, shift< dim, St >> &cd_sm, openfpm::vector< device_grid > &loc_grid, openfpm::vector< device_grid > &loc_grid_old, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext_old, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext_global)
Moves all the grids that does not belong to the local processor to the respective processor.
Per-processor external ghost box.
openfpm::vector< size_t > send_size
size to send
__device__ __host__ bool Intersect(const Box< dim, T > &b, Box< dim, T > &b_out) const
Intersect.
Definition: Box.hpp:95
virtual void decRef()
Decrement the reference counter.
void add()
Add another element.
Definition: map_vector.hpp:981
T getVolume() const
Get the volume of the box.
Definition: Box.hpp:1336
It create a boost::fusion vector with the selected properties.
void queue_recv_data_get(const openfpm::vector< ep_box_grid< dim >> &eg_box, std::vector< size_t > &prp_recv, ExtPreAlloc< Memory > &prRecv_prp)
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
Definition: Box.hpp:656
void ghost_get_(const openfpm::vector< ip_box_grid< dim >> &ig_box, const openfpm::vector< ep_box_grid< dim >> &eg_box, const openfpm::vector< i_lbox_grid< dim >> &loc_ig_box, const openfpm::vector< e_lbox_grid< dim >> &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, const openfpm::vector< e_box_multi< dim >> &eb_gid_list, bool use_bx_def, openfpm::vector< device_grid > &loc_grid, const grid_sm< dim, void > &ginfo, std::unordered_map< size_t, size_t > &g_id_to_external_ghost_box, size_t opt)
It fill the ghost part of the grids.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:214
Packing status object.
Definition: Pack_stat.hpp:60
openfpm::vector< void * > pointers
send pointers
openfpm::vector< void * > send_pointer
Pointer to the memory to send.
static void pack(ExtPreAlloc< Mem >, const T &obj)
Error, no implementation.
Definition: Packer.hpp:56
static size_t calculateMem(std::vector< size_t > &mm)
Calculate the total memory required to pack the message.
void * getPointerEnd()
Return the end pointer of the previous allocated memory.
bool SSendRecv(openfpm::vector< T > &send, S &recv, openfpm::vector< size_t > &prc_send, openfpm::vector< size_t > &prc_recv, openfpm::vector< size_t > &sz_recv, size_t opt=NONE)
Semantic Send and receive, send the data to processors and receive from the other processors.
Definition: VCluster.hpp:797
T & get(size_t id)
Get an element of the vector.
grid_dist_id_comm()
Constructor.
void ghost_put_local(const openfpm::vector< i_lbox_grid< dim >> &loc_ig_box, const openfpm::vector< e_lbox_grid< dim >> &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, openfpm::vector< device_grid > &loc_grid, openfpm::vector< std::unordered_map< size_t, size_t >> &g_id_to_external_ghost_box)
Sync the local ghost part.
Class for cpu time benchmarking.
Definition: timer.hpp:27
void stop()
Stop the timer.
Definition: timer.hpp:119