OpenFPM  5.2.0
Project that contain the implementation of distributed structures
grid_dist_id_comm.hpp
1 /*
2  * grid_dist_id_comm.hpp
3  *
4  * Created on: Nov 13, 2016
5  * Author: yaroslav
6  */
7 
8 #ifndef SRC_GRID_GRID_DIST_ID_COMM_HPP_
9 #define SRC_GRID_GRID_DIST_ID_COMM_HPP_
10 
11 #include "Vector/vector_dist_ofb.hpp"
12 #include "Grid/copy_grid_fast.hpp"
13 #include "grid_dist_util.hpp"
14 #include "util/common_pdata.hpp"
15 #include "lib/pdata.hpp"
16 #include "Grid/grid_common.hpp"
17 
18 
23 template<bool result,typename T, typename device_grid, typename Memory>
25 {
34  template<template<typename,typename> class op, typename sub_it_type, int ... prp> static void call_unpack(ExtPreAlloc<Memory> & recv_buf, sub_it_type & sub2, device_grid & gd, Unpack_stat & ps)
35  {
36  std::cerr << __FILE__ << ":" << __LINE__ << " Error: complex properties on grids are not supported yet" << std::endl;
37  }
38 };
39 
44 template<typename T, typename device_grid, typename Memory>
46 {
47 
56  template<template<typename,typename> class op, typename sub_it_type, unsigned int ... prp>
57  static void call_unpack(ExtPreAlloc<Memory> & recv_buf,
58  sub_it_type & sub2,
59  device_grid & gd,
60  Unpack_stat & ps)
61  {
62  gd.template unpack_with_op<op,Memory,prp ...>(recv_buf,sub2,ps);
63  }
64 };
65 
71 template<typename device_grid, typename Memory, typename T>
73 
79 template<typename device_grid, typename Memory , int ... prp>
81 {
82 
91  template<template<typename,typename> class op, typename sub_it_type, typename T>
92  inline static void call_unpack(ExtPreAlloc<Memory> & recv_buf,
93  sub_it_type & sub2,
94  device_grid & dg,
95  Unpack_stat & ps)
96  {
97  const bool result = has_pack_gen<typename T::type>::value == false;
98 
99  grid_unpack_selector_with_prp<result,T,device_grid,Memory>::template call_unpack<op,sub_it_type,prp...>(recv_buf,sub2,dg,ps);
100  }
101 };
102 
108 template<template<typename,typename> class op, typename T, typename device_grid, typename Memory>
110 {
111 
120  template<typename sub_it_type, unsigned int ... prp> static void unpacking(ExtPreAlloc<Memory> & recv_buf, sub_it_type & sub2, device_grid & dg, Unpack_stat & ps)
121  {
122  typedef index_tuple<prp...> ind_prop_to_pack;
123  grid_call_serialize_variadic<device_grid,Memory,ind_prop_to_pack>::template call_unpack<op,sub_it_type,T>(recv_buf, sub2, dg, ps);
124  }
125 };
126 
140 template<unsigned int dim, typename St, typename T, typename Decomposition = CartDecomposition<dim,St>,typename Memory=HeapMemory , typename device_grid=grid_cpu<dim,T> >
142 {
145 
148 
151 
154 
157 
160 
163 
165  openfpm::vector_fr<BMemory<Memory>> recv_buffers;
166 
167  struct rp_id
168  {
169  int p_id;
170  int size;
171  int i;
172 
173  bool operator<(const rp_id & tmp) const
174  {
175  return p_id < tmp.p_id;
176  }
177  };
178 
181 
187  openfpm::vector<int> m_oGrid_c;
188 
191 
194 
197  openfpm::vector<void *> pointers2;
198 
201  int n_headers_slot = 1;
203 
205  size_t opt;
206 
218  template<int... prp> void ghost_get_local(const openfpm::vector<i_lbox_grid<dim>> & loc_ig_box,
219  const openfpm::vector<e_lbox_grid<dim>> & loc_eg_box,
221  openfpm::vector<device_grid> & loc_grid,
222  std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
223  const grid_sm<dim,void> & ginfo,
224  bool use_bx_def,
225  size_t opt)
226  {
227  rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
228  if (opt & SKIP_LABELLING)
229  {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
230 
231  if (opt_ != rem_copy_opt::KEEP_GEOMETRY)
232  {
233  for (size_t i = 0 ; i < loc_grid.size() ; i++)
234  {loc_grid.get(i).copyRemoveReset();}
235  }
236 
237  grid_key_dx<dim> cnt[1];
238  cnt[0].zero();
239 
241  for (size_t i = 0 ; i < loc_ig_box.size() ; i++)
242  {
244  for (size_t j = 0 ; j < loc_ig_box.get(i).bid.size() ; j++)
245  {
246  size_t sub_id_src_gdb_ext = loc_ig_box.get(i).bid.get(j).sub_gdb_ext;
247 
248  // sub domain connected with external box
249  size_t sub_id_dst = loc_ig_box.get(i).bid.get(j).sub;
250 
251  // local internal ghost box connected
252  for (size_t v = 0 ; v < loc_ig_box.get(i).bid.get(j).k.size() ; v++)
253  {
254  size_t k = loc_ig_box.get(i).bid.get(j).k.get(v);
255 
256  Box<dim,long int> bx_dst = loc_eg_box.get(sub_id_dst).bid.get(k).ebox;
257 
258  // convert into local
259  size_t sub_id_dst_gdb_ext = loc_eg_box.get(sub_id_dst).bid.get(k).sub_gdb_ext;
260  bx_dst -= gdb_ext.get(sub_id_dst_gdb_ext).origin;
261 
262  // create 2 sub grid iterator
263 
264  if (bx_dst.isValid() == false)
265  {continue;}
266 
267  Box<dim,long int> bx_src = flip_box(loc_eg_box.get(sub_id_dst).bid.get(k).ebox,loc_eg_box.get(sub_id_dst).bid.get(k).cmb,ginfo);
268  bx_src -= gdb_ext.get(sub_id_src_gdb_ext).origin;
269 
270  #ifdef SE_CLASS1
271 
272  if (use_bx_def == false)
273  {
274  if (loc_eg_box.get(sub_id_dst).bid.get(k).sub != i)
275  {std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination are not correctly linked" << "\n";}
276  }
277 
278  if (bx_src.getVolumeKey() != bx_dst.getVolumeKey())
279  {std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination does not match in size" << "\n";}
280 
281  #endif
282 
283  auto & gd = loc_grid.get(sub_id_dst_gdb_ext);
284 
285  gd.remove(bx_dst);
286  gd.copy_to(loc_grid.get(sub_id_src_gdb_ext),bx_src,bx_dst);
287  }
288  }
289  }
290 
291  for (size_t i = 0 ; i < loc_grid.size() ; i++)
292  {
293  loc_grid.get(i).template removeCopyToFinalize<prp ...>(v_cl.getGpuContext(), rem_copy_opt::PHASE1 | opt_);
294  }
295 
296  for (size_t i = 0 ; i < loc_grid.size() ; i++)
297  {
298  loc_grid.get(i).template removeCopyToFinalize<prp ...>(v_cl.getGpuContext(), rem_copy_opt::PHASE2 | opt_);
299  }
300 
301  for (size_t i = 0 ; i < loc_grid.size() ; i++)
302  {
303  loc_grid.get(i).template removeCopyToFinalize<prp ...>(v_cl.getGpuContext(), rem_copy_opt::PHASE3 | opt_);
304  }
305  }
306 
318  template<template<typename,typename> class op, int... prp> void ghost_put_local(const openfpm::vector<i_lbox_grid<dim>> & loc_ig_box,
319  const openfpm::vector<e_lbox_grid<dim>> & loc_eg_box,
321  openfpm::vector<device_grid> & loc_grid,
322  openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_external_ghost_box)
323  {
325  for (size_t i = 0 ; i < loc_eg_box.size() ; i++)
326  {
328  for (size_t j = 0 ; j < loc_eg_box.get(i).bid.size() ; j++)
329  {
330  if (loc_eg_box.get(i).bid.get(j).initialized == false)
331  continue;
332 
333  Box<dim,long int> bx_src = loc_eg_box.get(i).bid.get(j).ebox;
334  // convert into local
335  bx_src -= gdb_ext.get(i).origin;
336 
337  // sub domain connected with external box
338  size_t sub_id_dst = loc_eg_box.get(i).bid.get(j).sub;
339 
340  // local external ghost box connected
341  size_t k = loc_eg_box.get(i).bid.get(j).k;
342 
343  Box<dim,long int> bx_dst = loc_ig_box.get(sub_id_dst).bid.get(k).box;
344 
345  // convert into local
346  bx_dst -= gdb_ext.get(sub_id_dst).origin;
347 
348  // create 2 sub grid iterator
349 
350  if (bx_dst.isValid() == false)
351  {continue;}
352 
353 #ifdef SE_CLASS1
354 
355  if (loc_ig_box.get(sub_id_dst).bid.get(k).sub != i)
356  std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination are not correctly linked" << "\n";
357 
358  if (bx_src.getVolume() != bx_dst.getVolume())
359  {std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination does not match in size" << "\n";}
360 
361 #endif
362 
363  auto & gd2 = loc_grid.get(sub_id_dst);
364  gd2.template copy_to_op<op,prp...>(loc_grid.get(i),bx_src,bx_dst);
365 
366  }
367  }
368  }
369 
370  /* Send or queue the the information
371  *
372  * This function send or queue the information to the other processor. In case the
373  * device grid is a compressed format like in multi-resolution the communication is
374  * queued because the other side does not know the size of the communication. If is
375  * not compressed the other side know the size so a direct send is done
376  *
377  */
378  void send_or_queue(size_t prc, char * pointer, char * pointer2)
379  {
380  if (device_grid::isCompressed() == false)
381  {v_cl.send(prc,0,pointer,(char *)pointer2 - (char *)pointer);}
382  else
383  {
384  send_prc_queue.add(prc);
385  send_pointer.add(pointer);
386  send_size.add(pointer2-pointer);
387  }
388  }
389 
390  static void * receive_dynamic(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, size_t tag, void * ptr)
391  {
392  grid_dist_id_comm * gd = static_cast<grid_dist_id_comm *>(ptr);
393 
394  gd->recv_buffers.add();
395 
396  gd->recv_buffers.last().resize(msg_i);
397  gd->recv_proc.add();
398  gd->recv_proc.last().p_id = i;
399  gd->recv_proc.last().size = msg_i;
400  gd->recv_proc.last().i = gd->recv_proc.size()-1;
401 
402  if (gd->opt & RUN_ON_DEVICE)
403  {
404  return gd->recv_buffers.last().getDevicePointer();
405  }
406 
407  return gd->recv_buffers.last().getPointer();
408  }
409 
410  /* Send or queue the the information
411  *
412  * This function send or queue the information to the other processor. In case the
413  * device grid is a compressed format like in multi-resolution the communication is
414  * queued because the other side does not know the size of the communication. If is
415  * not compressed the other side know the size so a direct send is done
416  *
417  */
418  template <typename prp_object>
420  std::vector<size_t> & prp_recv,
421  ExtPreAlloc<Memory> & prRecv_prp)
422  {
423 #ifdef __NVCC__
424  cudaDeviceSynchronize();
425 #endif
426 
427  if (device_grid::isCompressed() == false)
428  {
430  for ( size_t i = 0 ; i < eg_box.size() ; i++ )
431  {
432  prp_recv.push_back(eg_box.get(i).recv_pnt * sizeof(prp_object) + sizeof(size_t)*eg_box.get(i).n_r_box);
433  }
434 
435  size_t tot_recv = ExtPreAlloc<Memory>::calculateMem(prp_recv);
436 
438  g_recv_prp_mem.resize(tot_recv);
439 
440  // queue the receives
441  for ( size_t i = 0 ; i < eg_box.size() ; i++ )
442  {
443  prRecv_prp.allocate(prp_recv[i]);
444  v_cl.recv(eg_box.get(i).prc,0,prRecv_prp.getPointer(),prp_recv[i]);
445  }
446  }
447  else
448  {
449  // It is not possible to calculate the total information so we have to receive
450 
451  if (send_prc_queue.size() == 0)
452  {
453  v_cl.sendrecvMultipleMessagesNBX(send_prc_queue.size(),NULL,
454  NULL,NULL,
455  receive_dynamic,this);
456  }
457  else
458  {
459  v_cl.sendrecvMultipleMessagesNBX(send_prc_queue.size(),&send_size.get(0),
460  &send_prc_queue.get(0),&send_pointer.get(0),
461  receive_dynamic,this);
462  }
463 
464  // Reorder what we received
465 
466  recv_proc.sort();
467 
468  openfpm::vector_fr<BMemory<Memory>> tmp;
469  tmp.resize(recv_proc.size());
470 
471  for (int i = 0 ; i < recv_proc.size() ; i++)
472  {
473  tmp.get(i).swap(recv_buffers.get(recv_proc.get(i).i));
474  }
475 
476  recv_buffers.swap(tmp);
477  }
478  }
479 
480  /* Send or queue the the information
481  *
482  * This function send or queue the information to the other processor. In case the
483  * device grid is a compressed format like in multi-resolution the communication is
484  * queued because the other side does not know the size of the communication. If is
485  * not compressed the other side know the size so a direct send is done
486  *
487  */
488  template <typename prp_object>
490  std::vector<size_t> & prp_recv,
491  ExtPreAlloc<Memory> & prRecv_prp)
492  {
493  if (device_grid::isCompressed() == false)
494  {
495  // Receive the information from each processors
496  for ( size_t i = 0 ; i < ig_box.size() ; i++ )
497  {
498  prp_recv.push_back(0);
499 
500  // for each external ghost box
501  for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
502  {
503  // External ghost box
504  Box<dim,size_t> g_ig_box = ig_box.get(i).bid.get(j).box;
505  prp_recv[prp_recv.size()-1] += g_ig_box.getVolumeKey() * sizeof(prp_object) + sizeof(size_t);
506  }
507  }
508 
509  size_t tot_recv = ExtPreAlloc<Memory>::calculateMem(prp_recv);
510 
512  g_recv_prp_mem.resize(tot_recv);
513 
514  prRecv_prp.incRef();
515 
516  // queue the receives
517  for ( size_t i = 0 ; i < ig_box.size() ; i++ )
518  {
519  prRecv_prp.allocate(prp_recv[i]);
520  v_cl.recv(ig_box.get(i).prc,0,prRecv_prp.getPointer(),prp_recv[i]);
521  }
522 
523  prRecv_prp.decRef();
524  }
525  else
526  {
527  // It is not possible to calculate the total information so we have to receive
528 
529  if (send_prc_queue.size() == 0)
530  {
531  v_cl.sendrecvMultipleMessagesNBX(send_prc_queue.size(),NULL,
532  NULL,NULL,
533  receive_dynamic,this);
534  }
535  else
536  {
537  v_cl.sendrecvMultipleMessagesNBX(send_prc_queue.size(),&send_size.get(0),
538  &send_prc_queue.get(0),&send_pointer.get(0),
539  receive_dynamic,this);
540  }
541  }
542  }
543 
544  template<typename mem,unsigned ... prp>
545  void unpack_data_to_ext_ghost(ExtPreAlloc<mem> & emem,
546  openfpm::vector<device_grid> & loc_grid,
547  size_t i,
548  const openfpm::vector<ep_box_grid<dim>> & eg_box,
549  const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
550  const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
551  Unpack_stat & ps,
552  size_t opt)
553  {
554  // Unpack the ghost box global-id
555 
556  size_t g_id;
557  // we move from device to host the gid
558  if (opt & RUN_ON_DEVICE)
559  {emem.deviceToHost(ps.getOffset(),ps.getOffset()+sizeof(size_t));}
560  Unpacker<size_t,mem>::unpack(emem,g_id,ps);
561 
562  size_t l_id = 0;
563  // convert the global id into local id
564  auto key = g_id_to_external_ghost_box.find(g_id);
565 
566  if (key != g_id_to_external_ghost_box.end()) // FOUND
567  {l_id = key->second;}
568  else
569  {
570  // NOT FOUND
571 
572  // It must be always found, if not it mean that the processor has no-idea of
573  // what is stored and conseguently do not know how to unpack, print a critical error
574  // and return
575 
576  std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n";
577 
578  return;
579  }
580 
581 
582  // we unpack into the last eb_gid_list that is always big enought to
583  // unpack the information
584 
585  size_t le_id = eb_gid_list.get(l_id).full_match;
586  size_t ei = eb_gid_list.get(l_id).e_id;
587 
588  // Get the external ghost box associated with the packed information
589  Box<dim,long int> box = eg_box.get(ei).bid.get(le_id).l_e_box;
590  size_t sub_id = eg_box.get(ei).bid.get(le_id).sub;
591 
592  // sub-grid where to unpack
593  auto sub2 = loc_grid.get(sub_id).getIterator(box.getKP1(),box.getKP2(),false);
594 
595  rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
596  if (opt & SKIP_LABELLING)
597  {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
598 
599  // Unpack
600  loc_grid.get(sub_id).remove(box);
601  Unpacker<device_grid,mem>::template unpack<decltype(sub2),decltype(v_cl.getGpuContext()),prp...>(emem,sub2,loc_grid.get(sub_id),ps,v_cl.getGpuContext(),opt_);
602 
603  // Copy the information on the other grid
604  for (long int j = 0 ; j < (long int)eb_gid_list.get(l_id).eb_list.size() ; j++)
605  {
606  size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
607  if (nle_id != le_id)
608  {
609 // size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
610  size_t n_sub_id = eg_box.get(ei).bid.get(nle_id).sub;
611 
612  Box<dim,long int> box = eg_box.get(ei).bid.get(nle_id).l_e_box;
613  Box<dim,long int> rbox = eg_box.get(ei).bid.get(nle_id).lr_e_box;
614 
615  loc_grid.get(n_sub_id).remove(box);
616  loc_grid.get(n_sub_id).copy_to(loc_grid.get(sub_id),rbox,box);
617  }
618  }
619  }
620 
621  template<typename mem, typename header_type,unsigned ... prp>
622  void unpack_data_to_ext_ghost_with_header(ExtPreAlloc<mem> & emem,
623  openfpm::vector<device_grid> & loc_grid,
624  header_type & headers,
625  size_t i,
626  const openfpm::vector<ep_box_grid<dim>> & eg_box,
627  const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
628  const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
629  Unpack_stat & ps,
630  size_t opt)
631  {
632  // Unpack the ghost box global-id
633 
634  size_t g_id;
635  // we move from device to host the gid
636  g_id = headers.template get<0>(i);
637  ps.addOffset(sizeof(size_t));
638 
639  size_t l_id = 0;
640  // convert the global id into local id
641  auto key = g_id_to_external_ghost_box.find(g_id);
642 
643  if (key != g_id_to_external_ghost_box.end()) // FOUND
644  {l_id = key->second;}
645  else
646  {
647  // NOT FOUND
648 
649  // It must be always found, if not it mean that the processor has no-idea of
650  // what is stored and conseguently do not know how to unpack, print a critical error
651  // and return
652 
653  std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n";
654 
655  return;
656  }
657 
658 
659  // we unpack into the last eb_gid_list that is always big enought to
660  // unpack the information
661 
662  size_t le_id = eb_gid_list.get(l_id).full_match;
663  size_t ei = eb_gid_list.get(l_id).e_id;
664 
665  // Get the external ghost box associated with the packed information
666  Box<dim,long int> box = eg_box.get(ei).bid.get(le_id).l_e_box;
667  size_t sub_id = eg_box.get(ei).bid.get(le_id).sub;
668 
669  // sub-grid where to unpack
670  auto sub2 = loc_grid.get(sub_id).getIterator(box.getKP1(),box.getKP2(),false);
671 
672  rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
673  if (opt & SKIP_LABELLING)
674  {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
675 
676  // Unpack
677  loc_grid.get(sub_id).remove(box);
678  Unpacker<device_grid,mem>::template unpack_with_header<decltype(sub2),decltype(headers),decltype(v_cl.getGpuContext()),prp...>
679  (emem,
680  sub2,
681  loc_grid.get(sub_id),
682  headers,
683  i,
684  ps,
685  v_cl.getGpuContext(),
686  opt_);
687 
688  // Copy the information on the other grid
689  for (long int j = 0 ; j < (long int)eb_gid_list.get(l_id).eb_list.size() ; j++)
690  {
691  size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
692  if (nle_id != le_id)
693  {
694 // size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
695  size_t n_sub_id = eg_box.get(ei).bid.get(nle_id).sub;
696 
697  Box<dim,long int> box = eg_box.get(ei).bid.get(nle_id).l_e_box;
698  Box<dim,long int> rbox = eg_box.get(ei).bid.get(nle_id).lr_e_box;
699 
700  loc_grid.get(n_sub_id).remove(box);
701  loc_grid.get(n_sub_id).copy_to(loc_grid.get(sub_id),rbox,box);
702  }
703  }
704  }
705 
706  template<unsigned int ... prp>
707  void fill_headers(size_t opt)
708  {
709  if ((opt & KEEP_PROPERTIES) == 0 && device_grid::is_unpack_header_supported())
710  {
711  headers.resize(n_headers_slot * recv_buffers.size());
712 
713  Memory result;
714  result.allocate(sizeof(int));
715 
716  pointers_h.resize(recv_buffers.size());
717 
718  for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
719  {
720  pointers_h.template get<0>(i) = recv_buffers.get(i).getDevicePointer();
721  pointers_h.template get<1>(i) = (unsigned char *)recv_buffers.get(i).getDevicePointer() + recv_buffers.get(i).size();
722  }
723 
724  pointers_h.template hostToDevice<0,1>();
725 
726  while(1)
727  {
728  for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
729  {pointers_h.template get<2>(i) = 0;}
730  pointers_h.template hostToDevice<2>();
731  *(int *)result.getPointer() = 0;
732  result.hostToDevice();
733 
734  device_grid::template unpack_headers<decltype(pointers_h),decltype(headers),decltype(result),prp ...>(pointers_h,headers,result,n_headers_slot);
735  result.deviceToHost();
736 
737  if (*(int *)result.getPointer() == 0) {break;}
738 
739  n_headers_slot *= 2;
740  headers.resize(n_headers_slot * recv_buffers.size());
741 
742  }
743 
744  headers.template deviceToHost<0,1,2>();
745  }
746  }
747 
748  template<unsigned ... prp>
749  void merge_received_data_get(openfpm::vector<device_grid> & loc_grid,
750  const openfpm::vector<ep_box_grid<dim>> & eg_box,
751  const std::vector<size_t> & prp_recv,
752  ExtPreAlloc<Memory> & prRecv_prp,
753  const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
754  const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
755  size_t opt)
756  {
757  if (device_grid::isCompressed() == false)
758  {
759  // wait to receive communication
760  v_cl.execute();
761 
762  Unpack_stat ps;
763 
764  // Unpack the object
765  for ( size_t i = 0 ; i < eg_box.size() ; i++ )
766  {
767  size_t mark_here = ps.getOffset();
768 
769  // for each external ghost box
770  while (ps.getOffset() - mark_here < prp_recv[i])
771  {
772  // Unpack the ghost box global-id
773 
774 
775  unpack_data_to_ext_ghost<Memory,prp ...>(prRecv_prp,loc_grid,i,
776  eg_box,g_id_to_external_ghost_box,eb_gid_list,
777  ps,opt);
778  }
779  }
780  }
781  else
782  {
783  fill_headers<prp ...>(opt);
784 
785  if (headers.size() != 0)
786  {
787  // Unpack the object
788  for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
789  {
790  Unpack_stat ps;
791  size_t mark_here = ps.getOffset();
792 
793  ExtPreAlloc<BMemory<Memory>> mem(recv_buffers.get(i).size(),recv_buffers.get(i));
794 
795  int j = 0;
796 
797  // for each external ghost box
798  while (ps.getOffset() - mark_here < recv_buffers.get(i).size())
799  {
800  // Unpack the ghost box global-id
801 
802  unpack_data_to_ext_ghost_with_header<BMemory<Memory>,decltype(headers),prp ...>(mem,loc_grid,headers,i*n_headers_slot+j,
803  eg_box,g_id_to_external_ghost_box,eb_gid_list,
804  ps,opt);
805 
806  j++;
807  }
808  }
809  }
810  else
811  {
812  // Unpack the object
813  for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
814  {
815  Unpack_stat ps;
816  size_t mark_here = ps.getOffset();
817 
818  ExtPreAlloc<BMemory<Memory>> mem(recv_buffers.get(i).size(),recv_buffers.get(i));
819 
820  // for each external ghost box
821  while (ps.getOffset() - mark_here < recv_buffers.get(i).size())
822  {
823  // Unpack the ghost box global-id
824 
825  unpack_data_to_ext_ghost<BMemory<Memory>,prp ...>(mem,loc_grid,i,
826  eg_box,g_id_to_external_ghost_box,eb_gid_list,
827  ps,opt);
828  }
829  }
830  }
831  }
832  }
833 
834 
835  template<template<typename,typename> class op, unsigned ... prp>
836  void merge_received_data_put(Decomposition & dec, openfpm::vector<device_grid> & loc_grid,
837  const openfpm::vector<ip_box_grid<dim>> & ig_box,
838  const std::vector<size_t> & prp_recv,
839  ExtPreAlloc<Memory> & prRecv_prp,
841  const openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_internal_ghost_box)
842  {
843  typedef object<typename object_creator<typename T::type,prp...>::type> prp_object;
844 
845  if (device_grid::isCompressed() == false)
846  {
847  v_cl.execute();
848 
849  Unpack_stat ps;
850 
851  // Unpack the object
852  for ( size_t i = 0 ; i < ig_box.size() ; i++ )
853  {
854  // for each external ghost box
855  for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
856  {
857  // Unpack the ghost box global-id
858 
859  size_t g_id;
860  Unpacker<size_t,HeapMemory>::unpack(prRecv_prp,g_id,ps);
861 
862  size_t l_id = 0;
863  // convert the global id into local id
864  auto key = g_id_to_internal_ghost_box.get(i).find(g_id);
865  if (key != g_id_to_internal_ghost_box.get(i).end()) // FOUND
866  {l_id = key->second;}
867  else
868  {
869  // NOT FOUND
870 
871  // It must be always found, if not it mean that the processor has no-idea of
872  // what is stored and conseguently do not know how to unpack, print a critical error
873  // and return
874 
875  std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n";
876 
877  return;
878  }
879 
880  // Get the internal ghost box associated with the packed information
881  Box<dim,size_t> box = ig_box.get(i).bid.get(l_id).box;
882  size_t sub_id = ig_box.get(i).bid.get(l_id).sub;
883  box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
884 
885  // sub-grid where to unpack
886  auto sub2 = loc_grid.get(sub_id).getIterator(box.getKP1(),box.getKP2());
887  grid_unpack_with_prp<op,prp_object,device_grid,Memory>::template unpacking<decltype(sub2),prp...>(prRecv_prp,sub2,loc_grid.get(sub_id),ps);
888  }
889  }
890  }
891  else
892  {
893  // Unpack the object
894  for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
895  {
896  Unpack_stat ps;
897  size_t mark_here = ps.getOffset();
898 
899  ExtPreAlloc<BMemory<HeapMemory>> mem(recv_buffers.get(i).size(),recv_buffers.get(i));
900 
901  // for each external ghost box
902  while (ps.getOffset() - mark_here < recv_buffers.get(i).size())
903  {
904  // Unpack the ghost box global-id
905 
906  // Unpack the ghost box global-id
907 
908  size_t g_id;
909  Unpacker<size_t,BMemory<HeapMemory>>::unpack(mem,g_id,ps);
910 
911  size_t pid = dec.ProctoID(recv_proc.get(i).p_id);
912 
913  size_t l_id = 0;
914  // convert the global id into local id
915  auto key = g_id_to_internal_ghost_box.get(pid).find(g_id);
916  if (key != g_id_to_internal_ghost_box.get(pid).end()) // FOUND
917  {l_id = key->second;}
918  else
919  {
920  // NOT FOUND
921 
922  // It must be always found, if not it mean that the processor has no-idea of
923  // what is stored and conseguently do not know how to unpack, print a critical error
924  // and return
925 
926  std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n";
927 
928  return;
929  }
930 
931  // Get the internal ghost box associated with the packed information
932  Box<dim,size_t> box = ig_box.get(pid).bid.get(l_id).box;
933  size_t sub_id = ig_box.get(pid).bid.get(l_id).sub;
934  box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
935 
936  // sub-grid where to unpack
937  auto sub2 = loc_grid.get(sub_id).getIterator(box.getKP1(),box.getKP2());
938  grid_unpack_with_prp<op,prp_object,device_grid,BMemory<HeapMemory>>::template unpacking<decltype(sub2),prp...>(mem,sub2,loc_grid.get(sub_id),ps);
939  }
940  }
941  }
942  }
943 
944  int find_local_sub(Box<dim, long int> & box_dst, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext)
945  {
946  Point<dim,long int> point;
947  for (size_t n = 0; n < dim; n++)
948  {point.get(n) = (box_dst.getHigh(n) + box_dst.getLow(n))/2;}
949 
950  for (size_t j = 0; j < gdb_ext.size(); j++)
951  {
952  // Local sub-domain
953  Box<dim,long int> sub = gdb_ext.get(j).Dbox;
954  sub += gdb_ext.get(j).origin;
955 
956  if (sub.isInside(point) == true)
957  {
958  return j;
959  }
960  }
961  return -1;
962  }
963 
964 public:
965 
975  openfpm::vector<device_grid> & loc_grid,
977  CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm)
978  {
979  // Clear the information of the grid
980  for (size_t i = 0 ; i < loc_grid.size() ; i++)
981  {loc_grid.get(i).clear();}
982 
983  for (size_t a = 0; a < m_oGrid_recv.size(); a++)
984  {
985  for (size_t k = 0; k < m_oGrid_recv.get(a).size(); k++)
986  {
987  device_grid & g = m_oGrid_recv.get(a).template get<0>(k);
988 
989  Box<dim,long int> b = m_oGrid_recv.get(a).template get<1>(k);
990 
991  Point<dim,St> p;
992  for (size_t n = 0; n < dim; n++)
993  {p.get(n) = g.getGrid().getBox().getHigh(n);}
994 
995  Point<dim,St> point;
996  for (size_t n = 0; n < dim; n++)
997  {point.get(n) = (b.getHigh(n) + b.getLow(n))/2;}
998 
999  for (size_t j = 0; j < gdb_ext.size(); j++)
1000  {
1001  // Local sub-domain
1002  Box<dim,long int> sub = gdb_ext.get(j).Dbox;
1003  sub += gdb_ext.get(j).origin;
1004 
1005  if (sub.isInside(point) == true)
1006  {
1007 
1008 
1009  grid_key_dx<dim> start = b.getKP1() - grid_key_dx<dim>(gdb_ext.get(j).origin.asArray());
1010  grid_key_dx<dim> stop = b.getKP2() - grid_key_dx<dim>(gdb_ext.get(j).origin.asArray());
1011 
1012  Box<dim,size_t> box_src;
1013  Box<dim,size_t> box_dst;
1014 
1015  for(size_t i = 0 ; i < dim ; i++)
1016  {
1017  box_dst.setLow(i,start.get(i));
1018  box_dst.setHigh(i,stop.get(i));
1019  box_src.setLow(i,0);
1020  box_src.setHigh(i,stop.get(i)-start.get(i));
1021  }
1022 
1023  loc_grid.get(j).copy_to(g,box_src,box_dst);
1024  }
1025  }
1026  }
1027  }
1028 
1029  std::cout << "UNPACKING " << std::endl;
1030 
1031  for (size_t i = 0 ; i < m_oGrid_recv.size() ; i++)
1032  {
1033  for (size_t j = 0 ; j < m_oGrid_recv.get(i).size() ; j++)
1034  {
1035  m_oGrid_recv.get(i).template get<0>(j).template deviceToHost<0>();
1036  std::cout << "UNPACKING POINTS: " << m_oGrid_recv.get(i).template get<0>(j).size() << std::endl;
1037  m_oGrid_recv.get(i).template get<0>(j).template removeCopyToFinalize<0>(v_cl.getGpuContext(), rem_copy_opt::PHASE1);
1038  }
1039  }
1040 
1041  for (size_t i = 0 ; i < m_oGrid_recv.size() ; i++)
1042  {
1043  for (size_t j = 0 ; j < m_oGrid_recv.get(i).size() ; j++)
1044  {m_oGrid_recv.get(i).template get<0>(j).template removeCopyToFinalize<0>(v_cl.getGpuContext(), rem_copy_opt::PHASE2);}
1045  }
1046 
1047  for (size_t i = 0 ; i < m_oGrid_recv.size() ; i++)
1048  {
1049  for (size_t j = 0 ; j < m_oGrid_recv.get(i).size() ; j++)
1050  {m_oGrid_recv.get(i).template get<0>(j).template removeCopyToFinalize<0>(v_cl.getGpuContext(), rem_copy_opt::PHASE3);}
1051  }
1052  }
1053 
1066  template<typename lambda_t>
1068  CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm,
1069  openfpm::vector<device_grid> & loc_grid_old,
1072  openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_global,
1073  size_t p_id_cur,
1074  lambda_t f)
1075  {
1076  // lbl_bc.clear();
1077  // lbl_bc.resize(v_cl.getProcessingUnits());
1078 
1079  // for (int i = 0 ; i < lbl_bc.size() ; i++)
1080  // {lbl_bc.get(i) = 0;}
1081 
1082  // // count
1083 
1084  // for (size_t i = 0; i < gdb_ext_old.size(); i++)
1085  // {
1086  // // Local old sub-domain in global coordinates
1087  // Box<dim,long int> sub_dom = gdb_ext_old.get(i).Dbox;
1088  // sub_dom += gdb_ext_old.get(i).origin;
1089 
1090  // for (size_t j = 0; j < gdb_ext_global.size(); j++)
1091  // {
1092  // size_t p_id = 0;
1093 
1094  // // Intersection box
1095  // Box<dim,long int> inte_box;
1096 
1097  // // Global new sub-domain in global coordinates
1098  // Box<dim,long int> sub_dom_new = gdb_ext_global.get(j).Dbox;
1099  // sub_dom_new += gdb_ext_global.get(j).origin;
1100 
1101  // bool intersect = false;
1102 
1103  // if (sub_dom.isValid() == true && sub_dom_new.isValid() == true)
1104  // intersect = sub_dom.Intersect(sub_dom_new, inte_box);
1105 
1106  // if (intersect == true)
1107  // {
1108  // auto inte_box_cont = cd_sm.convertCellUnitsIntoDomainSpace(inte_box);
1109 
1110  // // Get processor ID that store intersection box
1111  // Point<dim,St> p;
1112  // for (size_t n = 0; n < dim; n++)
1113  // p.get(n) = (inte_box_cont.getHigh(n) + inte_box_cont.getLow(n))/2;
1114 
1115  // p_id = dec.processorID(p);
1116 
1117  // lbl_bc.get(p_id) += 1;
1118  // }
1119  // }
1120  // }
1121 
1122  // // reserve
1123  // for (int i = 0 ; i < lbl_b.size() ; i++)
1124  // {lbl_b.get(i).reserve(lbl_bc.get(i));}
1125 
1126 
1127  // Label all the intersection grids with the processor id where they should go
1128 
1129  for (size_t i = 0; i < gdb_ext_old.size(); i++)
1130  {
1131  // Local old sub-domain in global coordinates
1132  Box<dim,long int> sub_dom = gdb_ext_old.get(i).Dbox;
1133  sub_dom += gdb_ext_old.get(i).origin;
1134 
1135  for (size_t j = 0; j < gdb_ext_global.size(); j++)
1136  {
1137  size_t p_id = 0;
1138 
1139  // Intersection box
1140  Box<dim,long int> inte_box;
1141 
1142  // Global new sub-domain in global coordinates
1143  Box<dim,long int> sub_dom_new = gdb_ext_global.get(j).Dbox;
1144  sub_dom_new += gdb_ext_global.get(j).origin;
1145 
1146  bool intersect = false;
1147 
1148  if (sub_dom.isValid() == true && sub_dom_new.isValid() == true)
1149  intersect = sub_dom.Intersect(sub_dom_new, inte_box);
1150 
1151  if (intersect == true)
1152  {
1153  auto inte_box_cont = cd_sm.convertCellUnitsIntoDomainSpace(inte_box);
1154 
1155  // Get processor ID that store intersection box
1156  Point<dim,St> p;
1157  for (size_t n = 0; n < dim; n++)
1158  p.get(n) = (inte_box_cont.getHigh(n) + inte_box_cont.getLow(n))/2;
1159 
1160  p_id = dec.processorID(p);
1161  if (p_id != p_id_cur)
1162  {continue;}
1163 // prc_sz.get(p_id)++;
1164 
1165  // Transform coordinates to local
1166  auto inte_box_local = inte_box;
1167 
1168  inte_box_local -= gdb_ext_old.get(i).origin;
1169 
1170  // Grid corresponding for gdb_ext_old.get(i) box
1171  device_grid & gr = loc_grid_old.get(i);
1172 
1173  // Size of the grid to send
1174  size_t sz[dim];
1175  for (size_t l = 0; l < dim; l++)
1176  {
1177  sz[l] = inte_box_local.getHigh(l) - inte_box_local.getLow(l) + 1;
1178  //std::cout << "GR_send size on " << l << " dimension: " << sz[l] << std::endl;
1179  }
1180 
1181  // Grid to send
1182  //device_grid gr_send(sz);
1183  //gr_send.setMemory();
1184  // lbl_b.get(p_id).add();
1185  // device_grid & gr_send = lbl_b.get(p_id).last().template get<0>();
1186  // Box<dim,long int> & box_send = lbl_b.get(p_id).last().template get<1>();
1187  // gr_send.setMemory();
1188 
1189  // Sub iterator across intersection box inside local grid
1190  grid_key_dx<dim> start = inte_box_local.getKP1();
1191  grid_key_dx<dim> stop = inte_box_local.getKP2();
1192 
1193  Box<dim,long int> box_src;
1194  Box<dim,long int> box_dst;
1195 
1196  for(size_t i = 0 ; i < dim ; i++)
1197  {
1198  box_src.setLow(i,start.get(i));
1199  box_src.setHigh(i,stop.get(i));
1200  box_dst.setLow(i,inte_box.getLow(i));
1201  box_dst.setHigh(i,inte_box.getHigh(i));
1202  }
1203 
1204  f(box_src,box_dst,gr,p_id);
1205  }
1206  }
1207  }
1208 
1209 /* for (size_t i = 0 ; i < loc_grid_old.size() ; i++)
1210  {
1211  loc_grid_old.get(i).template removeCopyToFinalize<0>(v_cl.getGpuContext(), rem_copy_opt::PHASE1);
1212  }
1213 
1214  for (size_t i = 0 ; i < loc_grid_old.size() ; i++)
1215  {
1216  loc_grid_old.get(i).template removeCopyToFinalize<0>(v_cl.getGpuContext(), rem_copy_opt::PHASE2);
1217  }
1218 
1219  for (size_t i = 0 ; i < loc_grid_old.size() ; i++)
1220  {
1221  loc_grid_old.get(i).template removeCopyToFinalize<0>(v_cl.getGpuContext(), rem_copy_opt::PHASE3);
1222  }*/
1223  }
1224 
1230  template<int ... prp>
1233  ExtPreAlloc<Memory> & send_buffer,
1234  size_t sz)
1235  {
1236  // unpack local
1237  Unpack_stat ps;
1238 
1239  while (ps.getOffset() < sz)
1240  {
1241  send_buffer.reset();
1242 
1243  Box<dim,long int> box_dst;
1244  send_buffer.deviceToHost(ps.getOffset(),ps.getOffset()+sizeof(Box<dim,long int>));
1245  Unpacker<Box<dim,long int>,Memory>::unpack(send_buffer,box_dst,ps);
1246 
1247  int s = find_local_sub(box_dst,gdb_ext);
1248  if (s == -1)
1249  {std::cout << __FILE__ << ":" << __LINE__ << " map, error non-local subdomain " << std::endl;}
1250 
1251  // convert box_dst to local
1252  for (int d = 0 ; d < dim ; d++ )
1253  {
1254  box_dst.setLow(d, box_dst.getLow(d) - gdb_ext.get(s).origin.get(d));
1255  box_dst.setHigh(d, box_dst.getHigh(d) - gdb_ext.get(s).origin.get(d));
1256  }
1257 
1258  loc_grid.get(s).remove(box_dst);
1259  auto sub2 = loc_grid.get(s).getIterator(box_dst.getKP1(),box_dst.getKP2(),0);
1260  Unpacker<device_grid,Memory>::template unpack<decltype(sub2),decltype(v_cl.getGpuContext()),prp ...>(send_buffer,sub2,loc_grid.get(s),ps,v_cl.getGpuContext(),NONE_OPT);
1261  }
1262 
1263  for (int s = 0 ; s < loc_grid.size() ; s++)
1264  {loc_grid.get(s).template removeAddUnpackFinalize<prp ...>(v_cl.getGpuContext(),0);}
1265  }
1266 
1280  template<int ... prp>
1281  void map_(Decomposition & dec,
1282  CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm,
1283  openfpm::vector<device_grid> & loc_grid,
1284  openfpm::vector<device_grid> & loc_grid_old,
1287  openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext_global,
1288  size_t opt)
1289  {
1290  this->opt = opt;
1291 
1292  openfpm::vector<size_t> send_buffer_sizes(v_cl.getProcessingUnits());
1293  openfpm::vector<Memory> send_buffers_;
1295  send_buffers_.resize(v_cl.getProcessingUnits());
1296  send_buffers.resize(v_cl.getProcessingUnits());
1297 
1298  send_prc_queue.clear();
1299  send_pointer.clear();
1300  send_size.clear();
1301 
1302  for (int p_id = 0 ; p_id < v_cl.getProcessingUnits() ; p_id++)
1303  {
1304  for (int i = 0 ; i < loc_grid_old.size() ; i++)
1305  {loc_grid_old.get(i).packReset();}
1306 
1307  auto l = [&](Box<dim,long int> & box_src,
1308  Box<dim,long int> & box_dst,
1309  device_grid & gr,
1310  size_t p_id){
1311  //gr_send.copy_to(gr,box_src,box_dst);
1312 
1313 
1314  Packer<Box<dim,long int>,BMemory<Memory>>::packRequest(box_dst,send_buffer_sizes.get(p_id));
1315 
1316  auto sub_it = gr.getIterator(box_src.getKP1(),box_src.getKP2(),0);
1317  gr.template packRequest<prp ...>(sub_it,send_buffer_sizes.get(p_id));
1318 
1319  //box_send = inte_box;
1320  };
1321 
1322  // Contains the processor id of each box (basically where they have to go)
1323  labelIntersectionGridsProcessor_and_pack(dec,cd_sm,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global,p_id,l);
1324 
1325  for (int i = 0 ; i < loc_grid_old.size(); i++)
1326  {
1327  loc_grid_old.get(i).template packCalculate<prp ...>(send_buffer_sizes.get(p_id),v_cl.getGpuContext());
1328  }
1329 
1330  send_buffers_.get(p_id).resize(send_buffer_sizes.get(p_id));
1331  send_buffers.get(p_id).setMemory(send_buffer_sizes.get(p_id),send_buffers_.get(p_id));
1332  send_buffers.get(p_id).incRef();
1333 
1334  // we now pack
1335  Pack_stat sts;
1336 
1337  auto lp = [&](Box<dim,long int> & box_src,
1338  Box<dim,long int> & box_dst,
1339  device_grid & gr,
1340  size_t p_id){
1341 
1342  size_t offset = send_buffers.get(p_id).getOffsetEnd();
1343  Packer<Box<dim,long int>,Memory>::pack(send_buffers.get(p_id),box_dst,sts);
1344  size_t offset2 = send_buffers.get(p_id).getOffsetEnd();
1345 
1346  send_buffers.get(p_id).hostToDevice(offset,offset2);
1347 
1348  auto sub_it = gr.getIterator(box_src.getKP1(),box_src.getKP2(),0);
1349 
1350  Packer<device_grid,Memory>::template pack<decltype(sub_it),prp ...>(send_buffers.get(p_id),gr,sub_it,sts);
1351  };
1352 
1353  // Contains the processor id of each box (basically where they have to go)
1354  labelIntersectionGridsProcessor_and_pack(dec,cd_sm,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global,p_id,lp);
1355 
1356  for (int i = 0 ; i < loc_grid_old.size() ; i++)
1357  {
1358  loc_grid_old.get(i).template packFinalize<prp ...>(send_buffers.get(p_id),sts,0,false);
1359  }
1360  }
1361 
1362  // std::cout << "Local buffer: " << send_buffers.get(v_cl.rank()).size() << std::endl;
1363  // int sz = send_buffers.get(v_cl.rank()).size();
1364  //send_buffers.get(v_cl.rank()).reset();
1365 
1366  // // Print all the byte in send_buffers_
1367  // for (int j = 0 ; j < 16 && j < sz ; j++) {
1368  // std::cout << "Local buffer " << v_cl.rank() << " " << ((long int *)send_buffers.get(v_cl.rank()).getPointer())[j] << " " << &((long int *)send_buffers.get(v_cl.rank()).getPointer())[j] << std::endl;
1369  // }
1370 
1371  unpack_buffer_to_local_grid<prp ...>(loc_grid,gdb_ext,send_buffers.get(v_cl.rank()),send_buffers.get(v_cl.rank()).size());
1372 
1373  //openfpm::vector<void *> send_pointer;
1374  //openfpm::vector<int> send_size;
1375  for (int i = 0 ; i < send_buffers.size() ; i++)
1376  {
1377  if (i != v_cl.rank())
1378  {
1379  send_pointer.add(send_buffers_.get(i).getDevicePointer());
1380  send_size.add(send_buffers_.get(i).size());
1381  send_prc_queue.add(i);
1382  }
1383  }
1384 
1385  size_t * send_size_ptr = NULL;
1386  size_t * send_prc_queue_ptr = NULL;
1387  void ** send_pointer_ptr = NULL;
1388 
1389  if (send_size.size() != 0)
1390  {
1391  send_size_ptr = &send_size.get(0);
1392  send_pointer_ptr = &send_pointer.get(0);
1393  send_prc_queue_ptr = &send_prc_queue.get(0);
1394  }
1395 
1396  recv_buffers.clear();
1397  recv_proc.clear();
1398 
1399  v_cl.sendrecvMultipleMessagesNBX(send_pointer.size(),send_size_ptr,
1400  send_prc_queue_ptr,send_pointer_ptr,
1401  receive_dynamic,this);
1402 
1403 
1404  for (int i = 0 ; i < recv_buffers.size() ; i++)
1405  {
1406  ExtPreAlloc<Memory> prAlloc_;
1407  prAlloc_.setMemory(recv_buffers.get(i).size(),recv_buffers.get(i));
1408  unpack_buffer_to_local_grid<prp ...>(loc_grid,gdb_ext,prAlloc_,recv_proc.get(i).size);
1409  }
1410 
1411  for (int i = 0 ; i < send_buffers.size() ; i++)
1412  {send_buffers.get(i).decRef();}
1413  }
1414 
1426  template<int... prp> void ghost_get_(const openfpm::vector<ip_box_grid<dim>> & ig_box,
1427  const openfpm::vector<ep_box_grid<dim>> & eg_box,
1428  const openfpm::vector<i_lbox_grid<dim>> & loc_ig_box,
1429  const openfpm::vector<e_lbox_grid<dim>> & loc_eg_box,
1430  const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext,
1431  const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
1432  bool use_bx_def,
1433  openfpm::vector<device_grid> & loc_grid,
1434  const grid_sm<dim,void> & ginfo,
1435  std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
1436  size_t opt)
1437  {
1438 #ifdef PROFILE_SCOREP
1439  SCOREP_USER_REGION("ghost_get",SCOREP_USER_REGION_TYPE_FUNCTION)
1440 #endif
1441 
1442  // Sending property object
1443  typedef object<typename object_creator<typename T::type,prp...>::type> prp_object;
1444 
1445  recv_buffers.clear();
1446  recv_proc.clear();
1447  send_prc_queue.clear();
1448  send_pointer.clear();
1449  send_size.clear();
1450 
1451  this->opt = opt;
1452 
1453  size_t req = 0;
1454 
1455  // Pack information
1456  Pack_stat sts;
1457 
1458  // We check if skip labelling is possible in this condition
1459  for (int i = 0 ; i < loc_grid.size() ; i++)
1460  {opt &= (loc_grid.get(i).isSkipLabellingPossible())?(int)-1:~SKIP_LABELLING;}
1461 
1462  #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1463  timer packing_time;
1464  packing_time.start();
1465  #endif
1466 
1467  if (!(opt & SKIP_LABELLING))
1468  {
1469  // first we initialize the pack buffer on all internal grids
1470 
1471  for (size_t i = 0 ; i < loc_grid.size() ; i++)
1472  {loc_grid.get(i).packReset();}
1473 
1474  // Calculating the size to pack all the data to send
1475  for ( size_t i = 0 ; i < ig_box.size() ; i++ )
1476  {
1477  // for each ghost box
1478  for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
1479  {
1480  // And linked sub-domain
1481  size_t sub_id = ig_box.get(i).bid.get(j).sub;
1482  // Internal ghost box
1483  Box<dim,long int> g_ig_box = ig_box.get(i).bid.get(j).box;
1484 
1485  if (g_ig_box.isValid() == false)
1486  {continue;}
1487 
1488  g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1489 
1490  // Pack a size_t for the internal ghost id
1492  // Create a sub grid iterator spanning the internal ghost layer
1493  auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.getKP1(),g_ig_box.getKP2(),false);
1494 
1495  // get the size to pack
1496  Packer<device_grid,Memory>::template packRequest<decltype(sub_it),prp...>(loc_grid.get(sub_id),sub_it,req);
1497  }
1498  }
1499 
1500  // Finalize calculation
1501  for (size_t i = 0 ; i < loc_grid.size() ; i++)
1502  {loc_grid.get(i).template packCalculate<prp ...>(req,v_cl.getGpuContext());}
1503 
1504  // resize the property buffer memory
1505  g_send_prp_mem.resize(req);
1506 
1507  // Create an object of preallocated memory for properties
1508  ExtPreAlloc<Memory> & prAlloc_prp = *(new ExtPreAlloc<Memory>(req,g_send_prp_mem));
1509  // Necessary. We do not want this memory to be destroyed untill is going out of scope
1510  // P.S. Packer shaoe this memory with data-structures and data structures if they see the
1511  // reference counter to zero they destriy this memory
1512  prAlloc_prp.incRef();
1513 
1514  pointers.clear();
1515  pointers2.clear();
1516 
1517  // Pack the information for each processor and send it
1518  for ( size_t i = 0 ; i < ig_box.size() ; i++ )
1519  {
1520 
1521  sts.mark();
1522 
1523  void * pointer;
1524 
1525  if (opt & RUN_ON_DEVICE)
1526  {pointer = prAlloc_prp.getDevicePointerEnd();}
1527  else
1528  {pointer = prAlloc_prp.getPointerEnd();}
1529 
1530  // for each ghost box
1531  for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
1532  {
1533  // we pack only if it is valid
1534  if (ig_box.get(i).bid.get(j).box.isValid() == false)
1535  continue;
1536 
1537  // And linked sub-domain
1538  size_t sub_id = ig_box.get(i).bid.get(j).sub;
1539  // Internal ghost box
1540  Box<dim,size_t> g_ig_box = ig_box.get(i).bid.get(j).box;
1541  g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1542  // Ghost box global id
1543  size_t g_id = ig_box.get(i).bid.get(j).g_id;
1544 
1545  // Pack a size_t for the internal ghost id
1546  Packer<size_t,Memory>::pack(prAlloc_prp,g_id,sts);
1547  prAlloc_prp.hostToDevice(prAlloc_prp.getOffset(),prAlloc_prp.getOffsetEnd());
1548  // Create a sub grid iterator spanning the internal ghost layer
1549  auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.getKP1(),g_ig_box.getKP2(),false);
1550  // and pack the internal ghost grid
1551  Packer<device_grid,Memory>::template pack<decltype(sub_it),prp...>(prAlloc_prp,loc_grid.get(sub_id),sub_it,sts);
1552  }
1553  // send the request
1554 
1555  void * pointer2;
1556 
1557  if (opt & RUN_ON_DEVICE)
1558  {pointer2 = prAlloc_prp.getDevicePointerEnd();}
1559  else
1560  {pointer2 = prAlloc_prp.getPointerEnd();}
1561 
1562  pointers.add(pointer);
1563  pointers2.add(pointer2);
1564  }
1565 
1566  for (size_t i = 0 ; i < loc_grid.size() ; i++)
1567  {
1568  rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1569  if (opt & SKIP_LABELLING)
1570  {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1571 
1572  loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts,opt_,true);
1573  }
1574 
1575  prAlloc_prp.decRef();
1576  delete &prAlloc_prp;
1577  }
1578  else
1579  {
1580  req = g_send_prp_mem.size();
1581 
1582  // Create an object of preallocated memory for properties
1583  ExtPreAlloc<Memory> & prAlloc_prp = *(new ExtPreAlloc<Memory>(req,g_send_prp_mem));
1584  prAlloc_prp.incRef();
1585 
1586  for (size_t i = 0 ; i < loc_grid.size() ; i++)
1587  {
1588  rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1589  if (opt & SKIP_LABELLING)
1590  {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1591 
1592  loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts,opt_,true);
1593  }
1594 
1595  prAlloc_prp.decRef();
1596  delete &prAlloc_prp;
1597  }
1598 
1599  #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1600  packing_time.stop();
1601  tot_pack += packing_time.getwct();
1602  timer sendrecv_time;
1603  sendrecv_time.start();
1604  #endif
1605 
1606  for ( size_t i = 0 ; i < ig_box.size() ; i++ )
1607  {
1608  // This function send (or queue for sending) the information
1609  send_or_queue(ig_box.get(i).prc,(char *)pointers.get(i),(char *)pointers2.get(i));
1610  }
1611 
1612  // Calculate the total information to receive from each processors
1613  std::vector<size_t> prp_recv;
1614 
1615  // Create an object of preallocated memory for properties
1617  prRecv_prp.incRef();
1618 
1619  // Before wait for the communication to complete we sync the local ghost
1620  // in order to overlap with communication
1621 
1622  queue_recv_data_get<prp_object>(eg_box,prp_recv,prRecv_prp);
1623 
1624  #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1625  sendrecv_time.stop();
1626  tot_sendrecv += sendrecv_time.getwct();
1627  timer merge_loc_time;
1628  merge_loc_time.start();
1629  #endif
1630 
1631  ghost_get_local<prp...>(loc_ig_box,loc_eg_box,gdb_ext,loc_grid,g_id_to_external_ghost_box,ginfo,use_bx_def,opt);
1632 
1633  #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1634  merge_loc_time.stop();
1635  tot_loc_merge += merge_loc_time.getwct();
1636  timer merge_time;
1637  merge_time.start();
1638  #endif
1639 
1640  for (size_t i = 0 ; i < loc_grid.size() ; i++)
1641  {loc_grid.get(i).removeAddUnpackReset();}
1642 
1643  merge_received_data_get<prp ...>(loc_grid,eg_box,prp_recv,prRecv_prp,g_id_to_external_ghost_box,eb_gid_list,opt);
1644 
1645  rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1646  if (opt & SKIP_LABELLING)
1647  {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1648 
1649  for (size_t i = 0 ; i < loc_grid.size() ; i++)
1650  {loc_grid.get(i).template removeAddUnpackFinalize<prp ...>(v_cl.getGpuContext(),opt_);}
1651 
1652  #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1653  merge_time.stop();
1654  tot_merge += merge_time.getwct();
1655  #endif
1656 
1657  prRecv_prp.decRef();
1658  delete &prRecv_prp;
1659  }
1660 
1675  template<template<typename,typename> class op,int... prp>
1677  const openfpm::vector<ip_box_grid<dim>> & ig_box,
1678  const openfpm::vector<ep_box_grid<dim>> & eg_box,
1679  const openfpm::vector<i_lbox_grid<dim>> & loc_ig_box,
1680  const openfpm::vector<e_lbox_grid<dim>> & loc_eg_box,
1681  const openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext,
1682  openfpm::vector<device_grid> & loc_grid,
1683  openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_internal_ghost_box)
1684  {
1685  // Sending property object
1686  typedef object<typename object_creator<typename T::type,prp...>::type> prp_object;
1687 
1688  recv_buffers.clear();
1689  recv_proc.clear();
1690  send_prc_queue.clear();
1691  send_pointer.clear();
1692  send_size.clear();
1693 
1694  size_t req = 0;
1695 
1696  // Create a packing request vector
1697  for ( size_t i = 0 ; i < eg_box.size() ; i++ )
1698  {
1699  // for each ghost box
1700  for (size_t j = 0 ; j < eg_box.get(i).bid.size() ; j++)
1701  {
1702  // And linked sub-domain
1703  size_t sub_id = eg_box.get(i).bid.get(j).sub;
1704  // Internal ghost box
1705  Box<dim,long int> g_eg_box = eg_box.get(i).bid.get(j).g_e_box;
1706 
1707  if (g_eg_box.isValid() == false)
1708  continue;
1709 
1710  g_eg_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1711 
1712  // Pack a size_t for the internal ghost id
1714 
1715  // Create a sub grid iterator spanning the external ghost layer
1716  auto sub_it = loc_grid.get(sub_id).getIterator(g_eg_box.getKP1(),g_eg_box.getKP2());
1717 
1718  // and pack the internal ghost grid
1719  Packer<device_grid,HeapMemory>::template packRequest<decltype(sub_it),prp...>(loc_grid.get(sub_id),sub_it,req);
1720  }
1721  }
1722 
1723  // resize the property buffer memory
1724  g_send_prp_mem.resize(req);
1725 
1726  // Create an object of preallocated memory for properties
1727  ExtPreAlloc<Memory> & prAlloc_prp = *(new ExtPreAlloc<Memory>(req,g_send_prp_mem));
1728 
1729  prAlloc_prp.incRef();
1730 
1731  // Pack information
1732  Pack_stat sts;
1733 
1734  // Pack the information for each processor and send it
1735  for ( size_t i = 0 ; i < eg_box.size() ; i++ )
1736  {
1737 
1738  sts.mark();
1739  void * pointer = prAlloc_prp.getPointerEnd();
1740 
1741  // for each ghost box
1742  for (size_t j = 0 ; j < eg_box.get(i).bid.size() ; j++)
1743  {
1744  // we pack only if it is valid
1745  if (eg_box.get(i).bid.get(j).g_e_box.isValid() == false)
1746  continue;
1747 
1748  // And linked sub-domain
1749  size_t sub_id = eg_box.get(i).bid.get(j).sub;
1750  // Internal ghost box
1751  Box<dim,size_t> g_eg_box = eg_box.get(i).bid.get(j).g_e_box;
1752  g_eg_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1753  // Ghost box global id
1754  size_t g_id = eg_box.get(i).bid.get(j).g_id;
1755 
1756  // Pack a size_t for the internal ghost id
1757  Packer<size_t,HeapMemory>::pack(prAlloc_prp,g_id,sts);
1758  // Create a sub grid iterator spanning the external ghost layer
1759  auto sub_it = loc_grid.get(sub_id).getIterator(g_eg_box.getKP1(),g_eg_box.getKP2());
1760  // and pack the internal ghost grid
1761  Packer<device_grid,HeapMemory>::template pack<decltype(sub_it),prp...>(prAlloc_prp,loc_grid.get(sub_id),sub_it,sts);
1762  }
1763  // send the request
1764 
1765  void * pointer2 = prAlloc_prp.getPointerEnd();
1766 
1767  // This function send (or queue for sending) the information
1768  send_or_queue(ig_box.get(i).prc,(char *)pointer,(char *)pointer2);
1769  }
1770 
1771  // Calculate the total information to receive from each processors
1772  std::vector<size_t> prp_recv;
1773 
1774  // Create an object of preallocated memory for properties
1775  ExtPreAlloc<Memory> & prRecv_prp = *(new ExtPreAlloc<Memory>(0,g_recv_prp_mem));
1776  prRecv_prp.incRef();
1777 
1778  queue_recv_data_put<prp_object>(ig_box,prp_recv,prRecv_prp);
1779 
1780  // Before wait for the communication to complete we sync the local ghost
1781  // in order to overlap with communication
1782 
1783  ghost_put_local<op,prp...>(loc_ig_box,loc_eg_box,gdb_ext,loc_grid,g_id_to_internal_ghost_box);
1784 
1785  merge_received_data_put<op,prp ...>(dec,loc_grid,ig_box,prp_recv,prRecv_prp,gdb_ext,g_id_to_internal_ghost_box);
1786 
1787  prRecv_prp.decRef();
1788  prAlloc_prp.decRef();
1789  delete &prAlloc_prp;
1790  delete &prRecv_prp;
1791  }
1792 
1798  :v_cl(create_vcluster<Memory>())
1799  {
1800 
1801  }
1802 
1810  :v_cl(gc.v_cl)
1811  {
1812 
1813  }
1814 };
1815 
1816 
1817 #endif /* SRC_GRID_GRID_DIST_ID_COMM_HPP_ */
It override the behavior if size()
Definition: BHeapMemory.hpp:47
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
Definition: Box.hpp:555
__device__ __host__ bool Intersect(const Box< dim, T > &b, Box< dim, T > &b_out) const
Intersect.
Definition: Box.hpp:94
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Definition: Box.hpp:566
__host__ __device__ bool isInside(const Point< dim, T > &p) const
Check if the point is inside the box.
Definition: Box.hpp:1016
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
Definition: Box.hpp:655
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
Definition: Box.hpp:668
bool isValid() const
Check if the Box is a valid box P2 >= P1.
Definition: Box.hpp:1186
T getVolumeKey() const
Get the volume spanned by the Box P1 and P2 interpreted as grid key.
Definition: Box.hpp:1354
__device__ __host__ void setHigh(int i, T val)
set the high interval of the box
Definition: Box.hpp:543
__device__ __host__ void setLow(int i, T val)
set the low interval of the box
Definition: Box.hpp:532
T getVolume() const
Get the volume of the box.
Definition: Box.hpp:1339
This class define the domain decomposition interface.
virtual void decRef()
Decrement the reference counter.
void * getDevicePointerEnd()
Return the device end pointer of the previous allocated memory.
size_t getOffsetEnd()
Get offset.
size_t getOffset()
Get offset.
virtual void incRef()
Increment the reference counter.
virtual void deviceToHost()
Do nothing.
virtual void * getPointer()
Return the pointer of the last allocation.
static size_t calculateMem(std::vector< size_t > &mm)
Calculate the total memory required to pack the message.
void reset()
Reset the internal counters.
virtual void hostToDevice()
Return the pointer of the last allocation.
void setMemory(size_t size, Mem &mem)
Set the internal memory if you did not do it in the constructor.
Definition: ExtPreAlloc.hpp:74
virtual bool allocate(size_t sz)
Allocate a chunk of memory.
void * getPointerEnd()
Return the end pointer of the previous allocated memory.
Packing status object.
Definition: Pack_stat.hpp:61
void mark()
Mark.
Definition: Pack_stat.hpp:99
Packing class.
Definition: Packer.hpp:50
static void pack(ExtPreAlloc< Mem >, const T &obj)
Error, no implementation.
Definition: Packer.hpp:56
static size_t packRequest(const T &obj, size_t &req)
Error, no implementation.
Definition: Packer.hpp:66
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
Unpacking status object.
Definition: Pack_stat.hpp:16
size_t getOffset()
Return the actual counter.
Definition: Pack_stat.hpp:41
void addOffset(size_t off)
Increment the offset pointer by off.
Definition: Pack_stat.hpp:31
Unpacker class.
Definition: Unpacker.hpp:34
static void unpack(ExtPreAlloc< Mem >, T &obj)
Error, no implementation.
Definition: Unpacker.hpp:40
Implementation of VCluster class.
Definition: VCluster.hpp:59
This class is an helper for the communication of grid_dist_id.
void queue_recv_data_put(const openfpm::vector< ip_box_grid< dim >> &ig_box, std::vector< size_t > &prp_recv, ExtPreAlloc< Memory > &prRecv_prp)
openfpm::vector< size_t > send_prc_queue
List of processor to send to.
openfpm::vector< size_t > send_size
size to send
grid_dist_id_comm()
Constructor.
openfpm::vector< void * > pointers
send pointers
void labelIntersectionGridsProcessor_and_pack(Decomposition &dec, CellDecomposer_sm< dim, St, shift< dim, St >> &cd_sm, openfpm::vector< device_grid > &loc_grid_old, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext_old, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext_global, size_t p_id_cur, lambda_t f)
Label intersection grids for mappings.
Vcluster< Memory > & v_cl
VCluster.
void ghost_put_local(const openfpm::vector< i_lbox_grid< dim >> &loc_ig_box, const openfpm::vector< e_lbox_grid< dim >> &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, openfpm::vector< device_grid > &loc_grid, openfpm::vector< std::unordered_map< size_t, size_t >> &g_id_to_external_ghost_box)
Sync the local ghost part.
openfpm::vector_gpu< aggregate< void *, void *, int > > pointers_h
header unpacker info
Memory g_recv_prp_mem
Memory for the ghost receiving buffer.
void map_(Decomposition &dec, CellDecomposer_sm< dim, St, shift< dim, St >> &cd_sm, openfpm::vector< device_grid > &loc_grid, openfpm::vector< device_grid > &loc_grid_old, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext_old, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext_global, size_t opt)
Moves all the grids that does not belong to the local processor to the respective processor.
openfpm::vector_fr< BMemory< Memory > > recv_buffers
receiving buffers in case of dynamic
void ghost_get_(const openfpm::vector< ip_box_grid< dim >> &ig_box, const openfpm::vector< ep_box_grid< dim >> &eg_box, const openfpm::vector< i_lbox_grid< dim >> &loc_ig_box, const openfpm::vector< e_lbox_grid< dim >> &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, const openfpm::vector< e_box_multi< dim >> &eb_gid_list, bool use_bx_def, openfpm::vector< device_grid > &loc_grid, const grid_sm< dim, void > &ginfo, std::unordered_map< size_t, size_t > &g_id_to_external_ghost_box, size_t opt)
It fill the ghost part of the grids.
openfpm::vector< size_t > p_map_req
Maps the processor id with the communication request into map procedure.
Memory g_send_prp_mem
Memory for the ghost sending buffer.
void queue_recv_data_get(const openfpm::vector< ep_box_grid< dim >> &eg_box, std::vector< size_t > &prp_recv, ExtPreAlloc< Memory > &prRecv_prp)
void ghost_get_local(const openfpm::vector< i_lbox_grid< dim >> &loc_ig_box, const openfpm::vector< e_lbox_grid< dim >> &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, openfpm::vector< device_grid > &loc_grid, std::unordered_map< size_t, size_t > &g_id_to_external_ghost_box, const grid_sm< dim, void > &ginfo, bool use_bx_def, size_t opt)
Sync the local ghost part.
size_t opt
Receiving option.
grid_dist_id_comm(const grid_dist_id_comm< dim, St, T, Decomposition, Memory, device_grid > &gc)
Copy constructor.
void ghost_put_(Decomposition &dec, const openfpm::vector< ip_box_grid< dim >> &ig_box, const openfpm::vector< ep_box_grid< dim >> &eg_box, const openfpm::vector< i_lbox_grid< dim >> &loc_ig_box, const openfpm::vector< e_lbox_grid< dim >> &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, openfpm::vector< device_grid > &loc_grid, openfpm::vector< std::unordered_map< size_t, size_t >> &g_id_to_internal_ghost_box)
It merge the information in the ghost with the real information.
void grids_reconstruct(openfpm::vector< openfpm::vector< aggregate< device_grid, Box< dim, long int >>>> &m_oGrid_recv, openfpm::vector< device_grid > &loc_grid, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, CellDecomposer_sm< dim, St, shift< dim, St >> &cd_sm)
Reconstruct the local grids.
openfpm::vector< size_t > recv_sz_map
Stores the size of the elements added for each processor that communicate with us (local processor)
void unpack_buffer_to_local_grid(openfpm::vector< device_grid > &loc_grid, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, ExtPreAlloc< Memory > &send_buffer, size_t sz)
Unpack.
openfpm::vector< size_t > prc_recv_map
Stores the list of processors that communicate with us (local processor)
openfpm::vector< void * > send_pointer
Pointer to the memory to send.
openfpm::vector< rp_id > recv_proc
receiving processors
openfpm::vector< openfpm::vector< aggregate< device_grid, Box< dim, long int > > > > m_oGrid
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition: grid_key.hpp:503
size_t size()
Stub size.
Definition: map_vector.hpp:212
Class for cpu time benchmarking.
Definition: timer.hpp:28
void stop()
Stop the timer.
Definition: timer.hpp:119
void start()
Start the timer.
Definition: timer.hpp:90
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
This structure store the Box that define the domain inside the Ghost + domain box.
Definition: GBoxes.hpp:40
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:221
For each external ghost id, it contain a set of sub-domain at which this external box is linked.
Per-processor external ghost box.
Per-processor external ghost box.
static void call_unpack(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &dg, Unpack_stat &ps)
Unpack.
static void call_unpack(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &gd, Unpack_stat &ps)
Unpack.
static void call_unpack(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &gd, Unpack_stat &ps)
Error i do not know how to unpack.
static void unpacking(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &dg, Unpack_stat &ps)
Unpack.
It return true if the object T require complex serialization.
local Internal ghost box
These set of classes generate an array definition at compile-time.
Definition: ct_array.hpp:26
Per-processor Internal ghost box.
It create a boost::fusion vector with the selected properties.