OpenFPM  5.2.0
Project that contain the implementation of distributed structures
vector_dist_comm.hpp
1 /*
2  * vector_dist_comm.hpp
3  *
4  * Created on: Aug 18, 2016
5  * Author: i-bird
6  */
7 
8 #ifndef SRC_VECTOR_VECTOR_DIST_COMM_HPP_
9 #define SRC_VECTOR_VECTOR_DIST_COMM_HPP_
10 
11 #define TEST1
12 
13 #if defined(CUDA_GPU) && defined(__NVCC__)
14 #include "Vector/cuda/vector_dist_cuda_funcs.cuh"
15 #include "util/cuda/kernels.cuh"
16 #endif
17 
18 #include "Vector/util/vector_dist_funcs.hpp"
19 #include "cuda/vector_dist_comm_util_funcs.cuh"
20 #include "util/cuda/scan_ofp.cuh"
21 
22 template<typename T>
23 struct DEBUG
24 {
25  static float ret(T & tmp)
26  {
27  return 0.0;
28  }
29 };
30 
31 template<>
32 struct DEBUG<float &>
33 {
34  static float ret(float & tmp)
35  {
36  return tmp;
37  }
38 };
39 
44 inline static size_t compute_options(size_t opt)
45 {
46  size_t opt_ = NONE;
47  if (opt & NO_CHANGE_ELEMENTS && opt & SKIP_LABELLING)
48  {opt_ = RECEIVE_KNOWN | KNOWN_ELEMENT_OR_BYTE;}
49 
50  if (opt & RUN_ON_DEVICE)
51  {
52 #if defined(CUDA_GPU) && defined(__NVCC__)
53  // Before doing the communication on RUN_ON_DEVICE we have to be sure that the previous kernels complete
54  opt_ |= MPI_GPU_DIRECT;
55 #else
56  std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
57 #endif
58  }
59 
60  return opt_;
61 }
62 
69 template<unsigned int impl, template<typename> class layout_base, unsigned int ... prp>
71 {
72  template<typename Vcluster_type, typename vector_prop_type,
73  typename vector_pos_type, typename send_vector,
74  typename prc_recv_get_type, typename prc_g_opart_type,
75  typename recv_sz_get_type, typename recv_sz_get_byte_type,
76  typename g_opart_sz_type>
77  static inline void sendrecv_prp(Vcluster_type & v_cl,
78  openfpm::vector<send_vector> & g_send_prp,
79  vector_prop_type & v_prp,
80  vector_pos_type & v_pos,
81  prc_g_opart_type & prc_g_opart,
82  prc_recv_get_type & prc_recv_get,
83  recv_sz_get_type & recv_sz_get,
84  recv_sz_get_byte_type & recv_sz_get_byte,
85  g_opart_sz_type & g_opart_sz,
86  size_t ghostMarker,
87  size_t opt)
88  {
89  // if there are no properties skip
90  // SSendRecvP send everything when we do not give properties
91 
92  if (sizeof...(prp) != 0)
93  {
94  size_t opt_ = compute_options(opt);
95  if (opt & SKIP_LABELLING)
96  {
97  if (opt & RUN_ON_DEVICE)
98  {
99  op_ssend_gg_recv_merge_run_device opm(ghostMarker);
100  v_cl.template SSendRecvP_op<op_ssend_gg_recv_merge_run_device,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
101  }
102  else
103  {
104  op_ssend_gg_recv_merge opm(ghostMarker);
105  v_cl.template SSendRecvP_op<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
106  }
107  }
108  else
109  {v_cl.template SSendRecvP<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte,opt_);}
110 
111  // fill g_opart_sz
112  g_opart_sz.resize(prc_g_opart.size());
113 
114  for (size_t i = 0 ; i < prc_g_opart.size() ; i++)
115  g_opart_sz.get(i) = g_send_prp.get(i).size();
116  }
117  }
118 
119  template<typename Vcluster_type, typename vector_prop_type,
120  typename vector_pos_type, typename send_pos_vector,
121  typename prc_recv_get_type, typename prc_g_opart_type,
122  typename recv_sz_get_type>
123  static inline void sendrecv_pos(Vcluster_type & v_cl,
125  vector_prop_type & v_prp,
126  vector_pos_type & v_pos,
127  prc_recv_get_type & prc_recv_get,
128  recv_sz_get_type & recv_sz_get,
129  prc_g_opart_type & prc_g_opart,
130  size_t opt)
131  {
132  size_t opt_ = compute_options(opt);
133  if (opt & SKIP_LABELLING)
134  {
135  v_cl.template SSendRecv<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
136  }
137  else
138  {
139  prc_recv_get.clear();
140  recv_sz_get.clear();
141  v_cl.template SSendRecv<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
142  }
143  }
144 
145  template<typename Vcluster_type, typename vector_prop_type,
146  typename vector_pos_type, typename send_pos_vector,
147  typename prc_recv_get_type, typename prc_g_opart_type,
148  typename recv_sz_get_type>
149  static inline void sendrecv_pos_wait(Vcluster_type & v_cl,
151  vector_prop_type & v_prp,
152  vector_pos_type & v_pos,
153  prc_recv_get_type & prc_recv_get,
154  recv_sz_get_type & recv_sz_get,
155  prc_g_opart_type & prc_g_opart,
156  size_t opt)
157  {}
158 
159  template<typename Vcluster_type, typename vector_prop_type,
160  typename vector_pos_type, typename send_vector,
161  typename prc_recv_get_type, typename prc_g_opart_type,
162  typename recv_sz_get_type, typename recv_sz_get_byte_type,
163  typename g_opart_sz_type>
164  static inline void sendrecv_prp_wait(Vcluster_type & v_cl,
165  openfpm::vector<send_vector> & g_send_prp,
166  vector_prop_type & v_prp,
167  vector_pos_type & v_pos,
168  prc_g_opart_type & prc_g_opart,
169  prc_recv_get_type & prc_recv_get,
170  recv_sz_get_type & recv_sz_get,
171  recv_sz_get_byte_type & recv_sz_get_byte,
172  g_opart_sz_type & g_opart_sz,
173  size_t ghostMarker,
174  size_t opt)
175  {}
176 };
177 
178 
179 template<template<typename> class layout_base, unsigned int ... prp>
180 struct ghost_exchange_comm_impl<GHOST_ASYNC,layout_base, prp ... >
181 {
182  template<typename Vcluster_type, typename vector_prop_type,
183  typename vector_pos_type, typename send_vector,
184  typename prc_recv_get_type, typename prc_g_opart_type,
185  typename recv_sz_get_type, typename recv_sz_get_byte_type,
186  typename g_opart_sz_type>
187  static inline void sendrecv_prp(Vcluster_type & v_cl,
188  openfpm::vector<send_vector> & g_send_prp,
189  vector_prop_type & v_prp,
190  vector_pos_type & v_pos,
191  prc_g_opart_type & prc_g_opart,
192  prc_recv_get_type & prc_recv_get,
193  recv_sz_get_type & recv_sz_get,
194  recv_sz_get_byte_type & recv_sz_get_byte,
195  g_opart_sz_type & g_opart_sz,
196  size_t ghostMarker,
197  size_t opt)
198  {
199  prc_recv_get.clear();
200  recv_sz_get.clear();
201 
202  // if there are no properties skip
203  // SSendRecvP send everything when we do not give properties
204 
205  if (sizeof...(prp) != 0)
206  {
207  size_t opt_ = compute_options(opt);
208  if (opt & SKIP_LABELLING)
209  {
210  if (opt & RUN_ON_DEVICE)
211  {
212  op_ssend_gg_recv_merge_run_device opm(ghostMarker);
213  v_cl.template SSendRecvP_opAsync<op_ssend_gg_recv_merge_run_device,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
214  }
215  else
216  {
217  op_ssend_gg_recv_merge opm(ghostMarker);
218  v_cl.template SSendRecvP_opAsync<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
219  }
220  }
221  else
222  {v_cl.template SSendRecvPAsync<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte,opt_);}
223  }
224 
225  // fill g_opart_sz
226  g_opart_sz.resize(prc_g_opart.size());
227 
228  for (size_t i = 0 ; i < prc_g_opart.size() ; i++)
229  {g_opart_sz.get(i) = g_send_prp.get(i).size();}
230  }
231 
232  template<typename Vcluster_type, typename vector_prop_type,
233  typename vector_pos_type, typename send_pos_vector,
234  typename prc_recv_get_type, typename prc_g_opart_type,
235  typename recv_sz_get_type>
236  static inline void sendrecv_pos(Vcluster_type & v_cl,
238  vector_prop_type & v_prp,
239  vector_pos_type & v_pos,
240  prc_recv_get_type & prc_recv_get,
241  recv_sz_get_type & recv_sz_get,
242  prc_g_opart_type & prc_g_opart,
243  size_t opt)
244  {
245  prc_recv_get.clear();
246  recv_sz_get.clear();
247 
248  size_t opt_ = compute_options(opt);
249  if (opt & SKIP_LABELLING)
250  {
251  v_cl.template SSendRecvAsync<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
252  }
253  else
254  {
255  prc_recv_get.clear();
256  recv_sz_get.clear();
257  v_cl.template SSendRecvAsync<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
258  }
259  }
260 
261  template<typename Vcluster_type, typename vector_prop_type,
262  typename vector_pos_type, typename send_pos_vector,
263  typename prc_recv_get_type, typename prc_g_opart_type,
264  typename recv_sz_get_type>
265  static inline void sendrecv_pos_wait(Vcluster_type & v_cl,
267  vector_prop_type & v_prp,
268  vector_pos_type & v_pos,
269  prc_recv_get_type & prc_recv_get,
270  recv_sz_get_type & recv_sz_get,
271  prc_g_opart_type & prc_g_opart,
272  size_t opt)
273  {
274  size_t opt_ = compute_options(opt);
275  if (opt & SKIP_LABELLING)
276  {
277  v_cl.template SSendRecvWait<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
278  }
279  else
280  {
281  v_cl.template SSendRecvWait<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
282  }
283  }
284 
285  template<typename Vcluster_type, typename vector_prop_type,
286  typename vector_pos_type, typename send_vector,
287  typename prc_recv_get_type, typename prc_g_opart_type,
288  typename recv_sz_get_type, typename recv_sz_get_byte_type,
289  typename g_opart_sz_type>
290  static inline void sendrecv_prp_wait(Vcluster_type & v_cl,
291  openfpm::vector<send_vector> & g_send_prp,
292  vector_prop_type & v_prp,
293  vector_pos_type & v_pos,
294  prc_g_opart_type & prc_g_opart,
295  prc_recv_get_type & prc_recv_get,
296  recv_sz_get_type & recv_sz_get,
297  recv_sz_get_byte_type & recv_sz_get_byte,
298  g_opart_sz_type & g_opart_sz,
299  size_t ghostMarker,
300  size_t opt)
301  {
302  // if there are no properties skip
303  // SSendRecvP send everything when we do not give properties
304 
305  if (sizeof...(prp) != 0)
306  {
307  size_t opt_ = compute_options(opt);
308  if (opt & SKIP_LABELLING)
309  {
310  if (opt & RUN_ON_DEVICE)
311  {
312  op_ssend_gg_recv_merge_run_device opm(ghostMarker);
313  v_cl.template SSendRecvP_opWait<op_ssend_gg_recv_merge_run_device,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
314  }
315  else
316  {
317  op_ssend_gg_recv_merge opm(ghostMarker);
318  v_cl.template SSendRecvP_opWait<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
319  }
320  }
321  else
322  {v_cl.template SSendRecvPWait<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte,opt_);}
323  }
324  }
325 };
326 
327 
340 template<unsigned int dim,
341  typename St,
342  typename prop,
344  typename Memory = HeapMemory,
345  template<typename> class layout_base = memory_traits_lin>
347 {
349  size_t v_sub_unit_factor = 64;
350 
353 
356 
359 
362 
369  Memory,
370  layout_base > m_opart;
371 
376 
379  CudaMemory,
381 
384 
387 
390 
393 
397  openfpm::vector<size_t> prc_recv_get_prp;
398 
401 
404 
408  openfpm::vector<size_t> recv_sz_get_prp;
411 
412 
415 
418 
421 
424  Memory,
425  layout_base> proc_id_out;
426 
429  Memory,
430  layout_base> starts;
431 
434 
435 
438 
441  size_t lg_m;
442 
444  openfpm::vector_fr<Memory> hsmem;
445 
447  template<typename prp_object, int ... prp>
449  {
451  template<typename T1, typename T2> inline static void proc(size_t lbl, size_t cnt, size_t id, T1 & v_prp, T2 & m_prp)
452  {
453  // source object type
455  // destination object type
457 
458  // Copy only the selected properties
459  object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(id), m_prp.get(lbl).get(cnt));
460  }
461  };
462 
470  {
471  // If the last ghost_get did not have properties the information about the number of particles
472  // received is in recv_sz_get_ois
473  if (recv_sz_get_prp.size() != 0)
474  {return recv_sz_get_prp.get(i);}
475  else
476  {return recv_sz_get_pos.get(i);}
477  }
478 
484  {
485  if (prc_recv_get_prp.size() != 0)
486  {return prc_recv_get_prp.size();}
487  else
488  {return prc_recv_get_pos.size();}
489  }
490 
496  {
497  if (prc_recv_get_prp.size() != 0)
498  {return prc_recv_get_prp;}
499  else
500  {return prc_recv_get_pos;}
501  }
502 
510  openfpm::vector<size_t> & prc_sz_r,
511  openfpm::vector<size_t> & prc_r,
512  size_t opt)
513  {
514  if (opt & RUN_ON_DEVICE)
515  {
516 #ifndef TEST1
517  size_t prev_off = 0;
518  for (size_t i = 0; i < prc_sz.size() ; i++)
519  {
520  if (prc_sz.template get<1>(i) != (unsigned int)-1)
521  {
522  prc_r.add(prc_sz.template get<1>(i));
523  prc_sz_r.add(prc_sz.template get<0>(i) - prev_off);
524  }
525  prev_off = prc_sz.template get<0>(i);
526  }
527 #else
528 
529  // Calculate the sending buffer size for each processor, put this information in
530  // a contiguous buffer
531 
532  for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
533  {
534  if (prc_sz.template get<0>(i) != 0 && v_cl.rank() != i)
535  {
536  prc_r.add(i);
537  prc_sz_r.add(prc_sz.template get<0>(i));
538  }
539  }
540 
541 #endif
542  }
543  else
544  {
545  // Calculate the sending buffer size for each processor, put this information in
546  // a contiguous buffer
547 
548  p_map_req.resize(v_cl.getProcessingUnits());
549  for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
550  {
551  if (prc_sz.template get<0>(i) != 0)
552  {
553  p_map_req.get(i) = prc_r.size();
554  prc_r.add(i);
555  prc_sz_r.add(prc_sz.template get<0>(i));
556  }
557  }
558  }
559  }
560 
562  long int shift_box_ndec = -1;
563 
565  std::unordered_map<size_t, size_t> map_cmb;
566 
570 
573  openfpm::vector<aggregate<unsigned int>,Memory,layout_base> box_f_sv;
574 
577 
580 
583 
589  {
590  if (shift_box_ndec == (long int)dec.get_ndec())
591  {return;}
592 
593  struct sh_box
594  {
595  size_t shift_id;
596 
597  unsigned int box_f_sv;
599 
600  bool operator<(const sh_box & tmp) const
601  {
602  return shift_id < tmp.shift_id;
603  }
604 
605  };
606  openfpm::vector<sh_box> reord_shift;
607  box_f.clear();
608  map_cmb.clear();
609  box_cmb.clear();
610 
611  // Add local particles coming from periodic boundary, the only boxes that count are the one
612  // touching the border
613  for (size_t i = 0; i < dec.getNLocalSub(); i++)
614  {
615  size_t Nl = dec.getLocalNIGhost(i);
616 
617  for (size_t j = 0; j < Nl; j++)
618  {
619  // If the ghost does not come from the intersection with an out of
620  // border sub-domain the combination is all zero and n_zero return dim
621  if (dec.getLocalIGhostPos(i, j).n_zero() == dim)
622  continue;
623 
624  // Check if we already have boxes with such combination
625  auto it = map_cmb.find(dec.getLocalIGhostPos(i, j).lin());
626  if (it == map_cmb.end())
627  {
628  // we do not have it
629  box_f.add();
630  box_f.last().add(dec.getLocalIGhostBox(i, j));
631  box_cmb.add(dec.getLocalIGhostPos(i, j));
632  map_cmb[dec.getLocalIGhostPos(i, j).lin()] = box_f.size() - 1;
633  }
634  else
635  {
636  // we have it
637  box_f.get(it->second).add(dec.getLocalIGhostBox(i, j));
638  }
639 
640  reord_shift.add();
641  reord_shift.last().shift_id = dec.getLocalIGhostPos(i, j).lin();
642  reord_shift.last().box_f_dev = dec.getLocalIGhostBox(i, j);
643  reord_shift.last().box_f_sv = dec.convertShift(dec.getLocalIGhostPos(i, j));
644  }
645  }
646 
647  // now we sort box_f by shift_id, the reason is that we have to avoid duplicated particles
648  reord_shift.sort();
649 
650  box_f_dev.resize(reord_shift.size());
651  box_f_sv.resize(reord_shift.size());
652 
653  for (size_t i = 0 ; i < reord_shift.size() ; i++)
654  {
655  box_f_dev.get(i) = reord_shift.get(i).box_f_dev;
656  box_f_sv.template get<0>(i) = reord_shift.get(i).box_f_sv;
657  }
658 
659 #ifdef CUDA_GPU
660 
661  // move box_f_dev and box_f_sv to device
662  box_f_dev.template hostToDevice<0,1>();
663  box_f_sv.template hostToDevice<0>();
664 
665 #endif
666 
667  shift_box_ndec = dec.get_ndec();
668  }
669 
677  void local_ghost_from_opart(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
679  size_t opt)
680  {
681  // get the shift vectors
682  const openfpm::vector<Point<dim, St>,Memory,layout_base> & shifts = dec.getShiftVectors();
683 
684  if (!(opt & NO_POSITION))
685  {
686  if (opt & RUN_ON_DEVICE)
687  {
689  ::run(o_part_loc,shifts,v_pos,v_prp,opt);
690  }
691  else
692  {
693  for (size_t i = 0 ; i < o_part_loc.size() ; i++)
694  {
695  size_t lin_id = o_part_loc.template get<1>(i);
696  size_t key = o_part_loc.template get<0>(i);
697 
698  Point<dim, St> p = v_pos.get(key);
699  // shift
700  p -= shifts.get(lin_id);
701 
702  // add this particle shifting its position
703  v_pos.add(p);
704  v_prp.get(lg_m+i) = v_prp.get(key);
705  }
706  }
707  }
708  else
709  {
710  if (opt & RUN_ON_DEVICE)
711  {
713  ::run(o_part_loc,shifts,v_pos,v_prp,opt);
714  }
715  else
716  {
717  for (size_t i = 0 ; i < o_part_loc.size() ; i++)
718  {
719  size_t key = o_part_loc.template get<0>(i);
720 
721  v_prp.get(lg_m+i) = v_prp.get(key);
722  }
723  }
724  }
725  }
726 
734  void local_ghost_from_dec(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
736  size_t ghostMarker,size_t opt)
737  {
738  o_part_loc.clear();
739 
740  // get the shift vectors
741  const openfpm::vector<Point<dim,St>,Memory,layout_base> & shifts = dec.getShiftVectors();
742 
743  if (opt & RUN_ON_DEVICE)
744  {
746  ::run(o_part_loc,shifts,box_f_dev,box_f_sv,v_cl,starts,v_pos,v_prp,ghostMarker,opt);
747  }
748  else
749  {
750  // Label the internal (assigned) particles
751  auto it = v_pos.getIteratorTo(ghostMarker);
752 
753  while (it.isNext())
754  {
755  auto key = it.get();
756 
757  // If particles are inside these boxes
758  for (size_t i = 0; i < box_f.size(); i++)
759  {
760  for (size_t j = 0; j < box_f.get(i).size(); j++)
761  {
762  if (box_f.get(i).get(j).isInsideNP(v_pos.get(key)) == true)
763  {
764  size_t lin_id = dec.convertShift(box_cmb.get(i));
765 
766  o_part_loc.add();
767  o_part_loc.template get<0>(o_part_loc.size()-1) = key;
768  o_part_loc.template get<1>(o_part_loc.size()-1) = lin_id;
769 
770  Point<dim, St> p = v_pos.get(key);
771  // shift
772  p -= shifts.get(lin_id);
773 
774  // add this particle shifting its position
775  v_pos.add(p);
776  v_prp.add();
777  v_prp.last() = v_prp.get(key);
778 
779  // boxes in one group can be overlapping
780  // we do not have to search for the other
781  // boxes otherwise we will have duplicate particles
782  //
783  // A small note overlap of boxes across groups is fine
784  // (and needed) because each group has different shift
785  // producing non overlapping particles
786  //
787  break;
788  }
789  }
790  }
791 
792  ++it;
793  }
794  }
795  }
796 
845  void add_loc_particles_bc(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
847  size_t & ghostMarker,
848  size_t opt)
849  {
850  // Create the shift boxes
851  createShiftBox();
852 
853  if (!(opt & SKIP_LABELLING))
854  lg_m = v_prp.size();
855 
856  if (box_f.size() == 0)
857  return;
858  else
859  {
860  if (opt & SKIP_LABELLING)
861  {local_ghost_from_opart(v_pos,v_prp,opt);}
862  else
863  {local_ghost_from_dec(v_pos,v_prp,ghostMarker,opt);}
864  }
865  }
866 
873  void fill_send_ghost_pos_buf(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
876  size_t opt,
877  bool async)
878  {
879  // get the shift vectors
880  const openfpm::vector<Point<dim,St>,Memory,layout_base> & shifts = dec.getShiftVectors();
881 
882  // create a number of send buffers equal to the near processors
883  g_pos_send.resize(prc_sz.size());
884 
885  size_t old_hsmem_size = 0;
886 
887  // if we do async
888  if (async == true)
889  {
890  old_hsmem_size = hsmem.size();
891  resize_retained_buffer(hsmem,g_pos_send.size() + hsmem.size());
892  }
893  else
894  {resize_retained_buffer(hsmem,g_pos_send.size());}
895 
896  for (size_t i = 0; i < g_pos_send.size(); i++)
897  {
898  // Buffer must retained and survive the destruction of the
899  // vector
900  if (hsmem.get(i+old_hsmem_size).ref() == 0)
901  {hsmem.get(i+old_hsmem_size).incRef();}
902 
903  // Set the memory for retain the send buffer
904  g_pos_send.get(i).setMemory(hsmem.get(i+old_hsmem_size));
905 
906  // resize the sending vector (No allocation is produced)
907  g_pos_send.get(i).resize(prc_sz.get(i));
908  }
909 
910  if (opt & RUN_ON_DEVICE)
911  {
912 #if defined(CUDA_GPU) && defined(__NVCC__)
913 
914  size_t offset = 0;
915 
916  // Fill the sending buffers
917  for (size_t i = 0 ; i < g_pos_send.size() ; i++)
918  {
919  auto ite = g_pos_send.get(i).getGPUIterator();
920 
921  CUDA_LAUNCH((process_ghost_particles_pos<dim,decltype(g_opart_device.toKernel()),decltype(g_pos_send.get(i).toKernel()),decltype(v_pos.toKernel()),decltype(shifts.toKernel())>),
922  ite,
923  g_opart_device.toKernel(), g_pos_send.get(i).toKernel(),
924  v_pos.toKernel(),shifts.toKernel(),(unsigned int)offset);
925 
926  offset += prc_sz.get(i);
927  }
928 
929 #else
930 
931  std::cout << __FILE__ << ":" << __LINE__ << " error RUN_ON_DEVICE require that you compile with NVCC, but it seem compiled with a normal compiler" << std::endl;
932 
933 #endif
934  }
935  else
936  {
937  // Fill the send buffer
938  for (size_t i = 0; i < g_opart.size(); i++)
939  {
940  for (size_t j = 0; j < g_opart.get(i).size(); j++)
941  {
942  Point<dim, St> s = v_pos.get(g_opart.get(i).template get<0>(j));
943  s -= shifts.get(g_opart.get(i).template get<1>(j));
944  g_pos_send.get(i).set(j, s);
945  }
946  }
947  }
948  }
949 
961  template<typename send_vector, typename prp_object, int ... prp>
963  openfpm::vector<send_vector> & g_send_prp,
964  size_t & ghostMarker,
965  size_t opt)
966  {
967  // create a number of send buffers equal to the near processors
968  // from which we received
969 
970  // NOTE in some case the information can be in prc_recv_get_pos
971 
972  size_t nproc = get_last_ghost_get_num_proc();
973 
974  g_send_prp.resize(nproc);
975 
976  resize_retained_buffer(hsmem,g_send_prp.size());
977 
978  for (size_t i = 0; i < g_send_prp.size(); i++)
979  {
980  // Buffer must retained and survive the destruction of the
981  // vector
982  if (hsmem.get(i).ref() == 0)
983  hsmem.get(i).incRef();
984 
985  // Set the memory for retain the send buffer
986  g_send_prp.get(i).setMemory(hsmem.get(i));
987 
988  size_t n_part_recv = get_last_ghost_get_received_parts(i);
989 
990  // resize the sending vector (No allocation is produced)
991  g_send_prp.get(i).resize(n_part_recv);
992  }
993 
994  size_t accum = ghostMarker;
995 
996  if (opt & RUN_ON_DEVICE)
997  {
998 #if defined(CUDA_GPU) && defined(__NVCC__)
999 
1000  if (sizeof...(prp) != 0)
1001  {
1002  // Fill the sending buffers
1003  for (size_t i = 0 ; i < g_send_prp.size() ; i++)
1004  {
1005  size_t n_part_recv = get_last_ghost_get_received_parts(i);
1006 
1007  auto ite = g_send_prp.get(i).getGPUIterator();
1008 
1009  if (ite.nblocks() == 0) {continue;}
1010 
1011  CUDA_LAUNCH((process_ghost_particles_prp_put<decltype(g_send_prp.get(i).toKernel()),decltype(v_prp.toKernel()),prp...>),
1012  ite,
1013  g_send_prp.get(i).toKernel(),
1014  v_prp.toKernel(),(unsigned int)accum);
1015 
1016  accum = accum + n_part_recv;
1017  }
1018  }
1019 
1020 #else
1021 
1022  std::cout << __FILE__ << ":" << __LINE__ << " error RUN_ON_DEVICE require that you compile with NVCC, but it seem compiled with a normal compiler" << std::endl;
1023 
1024 #endif
1025  }
1026  else
1027  {
1028  // Fill the send buffer
1029  for (size_t i = 0; i < g_send_prp.size(); i++)
1030  {
1031  size_t j2 = 0;
1032  size_t n_part_recv = get_last_ghost_get_received_parts(i);
1033 
1034  for (size_t j = accum; j < accum + n_part_recv; j++)
1035  {
1036  // source object type
1038  // destination object type
1040 
1041  // Copy only the selected properties
1042  object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(j), g_send_prp.get(i).get(j2));
1043 
1044  j2++;
1045  }
1046 
1047  accum = accum + n_part_recv;
1048  }
1049  }
1050  }
1051 
1056  void resize_retained_buffer(openfpm::vector_fr<Memory> & rt_buf, size_t nbf)
1057  {
1058  // Release all the buffer that are going to be deleted
1059  for (size_t i = nbf ; i < rt_buf.size() ; i++)
1060  {
1061  rt_buf.get(i).decRef();
1062  }
1063 
1064  hsmem.resize(nbf);
1065  }
1066 
1071  template<typename send_vector, typename v_mpl>
1073  {
1074  openfpm::vector<send_vector> & g_send_prp;
1075 
1076  size_t i;
1077 
1078  openfpm::vector_fr<Memory> & hsmem;
1079 
1080  size_t j;
1081 
1083  openfpm::vector_fr<Memory> & hsmem, size_t j)
1084  :g_send_prp(g_send_prp),i(i),hsmem(hsmem),j(j)
1085  {}
1086 
1088  template<typename T>
1089  inline void operator()(T& t)
1090  {
1091  g_send_prp.get(i).template setMemory<T::value>(hsmem.get(j));
1092 
1093  j++;
1094  }
1095  };
1096 
1097  template<bool inte_or_lin,typename send_vector, typename v_mpl>
1099  {
1100  static inline size_t set_mem_retained_buffers_(openfpm::vector<send_vector> & g_send_prp,
1102  size_t i,
1103  openfpm::vector_fr<Memory> & hsmem,
1104  size_t j)
1105  {
1106  // Set the memory for retain the send buffer
1107  g_send_prp.get(i).setMemory(hsmem.get(j));
1108 
1109  // resize the sending vector (No allocation is produced)
1110  g_send_prp.get(i).resize(prc_sz.get(i));
1111 
1112  return j+1;
1113  }
1114  };
1115 
1116  template<typename send_vector, typename v_mpl>
1117  struct set_mem_retained_buffers<true,send_vector,v_mpl>
1118  {
1119  static inline size_t set_mem_retained_buffers_(openfpm::vector<send_vector> & g_send_prp,
1121  size_t i,
1122  openfpm::vector_fr<Memory> & hsmem,
1123  size_t j)
1124  {
1126 
1127  boost::mpl::for_each_ref<boost::mpl::range_c<int,0,boost::mpl::size<v_mpl>::type::value>>(smrbi);
1128 
1129  // if we do not send properties do not reallocate
1130  if (boost::mpl::size<v_mpl>::type::value != 0)
1131  {
1132  // resize the sending vector (No allocation is produced)
1133  g_send_prp.get(i).resize(prc_sz.get(i));
1134  }
1135 
1136  return smrbi.j;
1137  }
1138  };
1139 
1150  template<typename send_vector, typename prp_object, int ... prp>
1153  openfpm::vector<send_vector> & g_send_prp,
1154  size_t opt)
1155  {
1156  size_t factor = 1;
1157 
1158  typedef typename to_boost_vmpl<prp...>::type v_mpl;
1159 
1160  if (is_layout_inte<layout_base<prop>>::value == true) {factor *= sizeof...(prp);}
1161 
1162  // create a number of send buffers equal to the near processors
1163  g_send_prp.resize(prc_sz.size());
1164 
1165  resize_retained_buffer(hsmem,g_send_prp.size()*factor);
1166 
1167  for (size_t i = 0; i < hsmem.size(); i++)
1168  {
1169  // Buffer must retained and survive the destruction of the
1170  // vector
1171  if (hsmem.get(i).ref() == 0)
1172  {hsmem.get(i).incRef();}
1173  }
1174 
1175  size_t j = 0;
1176  for (size_t i = 0; i < g_send_prp.size(); i++)
1177  {
1178  j = set_mem_retained_buffers<is_layout_inte<layout_base<prop>>::value,send_vector,v_mpl>::set_mem_retained_buffers_(g_send_prp,prc_sz,i,hsmem,j);
1179  }
1180 
1181  if (opt & RUN_ON_DEVICE)
1182  {
1183 #if defined(CUDA_GPU) && defined(__NVCC__)
1184 
1185  size_t offset = 0;
1186 
1187  if (sizeof...(prp) != 0)
1188  {
1189  // Fill the sending buffers
1190  for (size_t i = 0 ; i < g_send_prp.size() ; i++)
1191  {
1192  auto ite = g_send_prp.get(i).getGPUIterator();
1193 
1194  CUDA_LAUNCH((process_ghost_particles_prp<decltype(g_opart_device.toKernel()),decltype(g_send_prp.get(i).toKernel()),decltype(v_prp.toKernel()),prp...>),
1195  ite,
1196  g_opart_device.toKernel(), g_send_prp.get(i).toKernel(),
1197  v_prp.toKernel(),(unsigned int)offset);
1198 
1199  offset += prc_sz.get(i);
1200  }
1201  }
1202 
1203 #else
1204 
1205  std::cout << __FILE__ << ":" << __LINE__ << " error RUN_ON_DEVICE require that you compile with NVCC, but it seem compiled with a normal compiler" << std::endl;
1206 
1207 #endif
1208  }
1209  else
1210  {
1211  // if no properties must be sent skip this step
1212  if (sizeof...(prp) == 0) {return;}
1213 
1214  // Fill the send buffer
1215  for (size_t i = 0; i < g_opart.size(); i++)
1216  {
1217  for (size_t j = 0; j < g_opart.get(i).size(); j++)
1218  {
1219  // source object type
1220  typedef decltype(v_prp.get(g_opart.get(i).template get<0>(j))) encap_src;
1221  // destination object type
1222  typedef decltype(g_send_prp.get(i).get(j)) encap_dst;
1223 
1224  // Copy only the selected properties
1225  object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(g_opart.get(i).template get<0>(j)), g_send_prp.get(i).get(j));
1226  }
1227  }
1228  }
1229  }
1230 
1242  void fill_send_map_buf(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
1244  openfpm::vector<size_t> & prc_sz_r,
1245  openfpm::vector<size_t> & prc_r,
1249  size_t opt)
1250  {
1251  m_prp.resize(prc_sz_r.size());
1252  m_pos.resize(prc_sz_r.size());
1253  openfpm::vector<size_t> cnt(prc_sz_r.size());
1254 
1255  for (size_t i = 0; i < prc_sz_r.size() ; i++)
1256  {
1257  // set the size and allocate, using mem warant that pos and prp is contiguous
1258  m_pos.get(i).resize(prc_sz_r.get(i));
1259  m_prp.get(i).resize(prc_sz_r.get(i));
1260  cnt.get(i) = 0;
1261  }
1262 
1263  if (opt & RUN_ON_DEVICE)
1264  {
1265  if (v_cl.size() == 1)
1266  {return;}
1267 
1268 #if defined(CUDA_GPU) && defined(__NVCC__)
1269 
1270  // The first part of m_opart and prc_sz contain the local particles
1271 
1272  int rank = v_cl.rank();
1273 
1274  v_pos_tmp.resize(prc_sz.template get<0>(rank));
1275  v_prp_tmp.resize(prc_sz.template get<0>(rank));
1276 
1277  auto ite = v_pos_tmp.getGPUIterator();
1278 
1279  starts.template deviceToHost<0>();
1280  size_t offset = starts.template get<0>(rank);
1281 
1282  // no work to do
1283  if (ite.wthr.x != 0)
1284  {
1285  // fill v_pos_tmp and v_prp_tmp with local particles
1286  CUDA_LAUNCH((process_map_particles<decltype(m_opart.toKernel()),decltype(v_pos_tmp.toKernel()),decltype(v_prp_tmp.toKernel()),
1287  decltype(v_pos.toKernel()),decltype(v_prp.toKernel())>),
1288  ite,
1289  m_opart.toKernel(),v_pos_tmp.toKernel(), v_prp_tmp.toKernel(),
1290  v_pos.toKernel(),v_prp.toKernel(),(unsigned int)offset);
1291  }
1292 
1293  // Fill the sending buffers
1294  for (size_t i = 0 ; i < m_pos.size() ; i++)
1295  {
1296  size_t offset = starts.template get<0>(prc_r.template get<0>(i));
1297 
1298  auto ite = m_pos.get(i).getGPUIterator();
1299 
1300  // no work to do
1301  if (ite.wthr.x != 0)
1302  {
1303 
1304  CUDA_LAUNCH((process_map_particles<decltype(m_opart.toKernel()),decltype(m_pos.get(i).toKernel()),decltype(m_prp.get(i).toKernel()),
1305  decltype(v_pos.toKernel()),decltype(v_prp.toKernel())>),
1306  ite,
1307  m_opart.toKernel(),m_pos.get(i).toKernel(), m_prp.get(i).toKernel(),
1308  v_pos.toKernel(),v_prp.toKernel(),(unsigned int)offset);
1309 
1310  }
1311  }
1312 
1313  // old local particles with the actual local particles
1314  v_pos_tmp.swap(v_pos);
1315  v_prp_tmp.swap(v_prp);
1316 
1317 #else
1318 
1319  std::cout << __FILE__ << ":" << __LINE__ << " error RUN_ON_DEVICE require that you compile with NVCC, but it seem compiled with a normal compiler" << std::endl;
1320 
1321 #endif
1322  }
1323  else
1324  {
1325  // end vector point
1326  long int id_end = v_pos.size();
1327 
1328  // end opart point
1329  long int end = m_opart.size()-1;
1330 
1331  // Run through all the particles and fill the sending buffer
1332  for (size_t i = 0; i < m_opart.size(); i++)
1333  {
1334  process_map_particle<proc_without_prp>(i,end,id_end,m_opart,p_map_req,m_pos,m_prp,v_pos,v_prp,cnt);
1335  }
1336 
1337  v_pos.resize(v_pos.size() - m_opart.size());
1338  v_prp.resize(v_prp.size() - m_opart.size());
1339  }
1340  }
1341 
1342 
1355  template<typename prp_object,int ... prp>
1358  openfpm::vector<size_t> & prc_sz_r,
1361  {
1362  m_prp.resize(prc_sz_r.size());
1363  m_pos.resize(prc_sz_r.size());
1364  openfpm::vector<size_t> cnt(prc_sz_r.size());
1365 
1366  for (size_t i = 0; i < prc_sz_r.size(); i++)
1367  {
1368  // set the size and allocate, using mem warant that pos and prp is contiguous
1369  m_pos.get(i).resize(prc_sz_r.get(i));
1370  m_prp.get(i).resize(prc_sz_r.get(i));
1371  cnt.get(i) = 0;
1372  }
1373 
1374  // end vector point
1375  long int id_end = v_pos.size();
1376 
1377  // end opart point
1378  long int end = m_opart.size()-1;
1379 
1380  // Run through all the particles and fill the sending buffer
1381  for (size_t i = 0; i < m_opart.size(); i++)
1382  {
1383  process_map_particle<proc_with_prp<prp_object,prp...>>(i,end,id_end,m_opart,p_map_req,m_pos,m_prp,v_pos,v_prp,cnt);
1384  }
1385 
1386  v_pos.resize(v_pos.size() - m_opart.size());
1387  v_prp.resize(v_prp.size() - m_opart.size());
1388  }
1389 
1398  template<typename obp> void labelParticleProcessor(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
1400  Memory,
1401  layout_base> & lbl_p,
1403  size_t opt)
1404  {
1405  if (opt == RUN_ON_DEVICE)
1406  {
1407 #ifdef __NVCC__
1408 
1409  // Map directly on gpu
1410 
1411  lbl_p.resize(v_pos.size());
1412 
1413  // labelling kernel
1414 
1415  prc_sz.template fill<0>(0);
1416 
1417  auto ite = v_pos.getGPUIterator();
1418  if (ite.wthr.x == 0)
1419  {
1420  starts.resize(v_cl.size());
1421  starts.template fill<0>(0);
1422  return;
1423  }
1424 
1425  // we have one process we can skip ...
1426  if (v_cl.size() == 1)
1427  {
1428  // ... but we have to apply the boundary conditions
1429 
1431 
1432  for (size_t i = 0 ; i < dim ; i++) {bc.bc[i] = dec.periodicity(i);}
1433 
1434  CUDA_LAUNCH((apply_bc_each_part<dim,St,decltype(v_pos.toKernel())>),ite,dec.getDomain(),bc,v_pos.toKernel());
1435 
1436  return;
1437  }
1438 
1439  // label particle processor
1440  CUDA_LAUNCH((process_id_proc_each_part<dim,St,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(lbl_p.toKernel()),decltype(prc_sz.toKernel())>),
1441  ite,
1442  dec.toKernel(),v_pos.toKernel(),lbl_p.toKernel(),prc_sz.toKernel(),(int)v_cl.rank());
1443 
1444  starts.resize(v_cl.size());
1445  openfpm::scan((unsigned int *)prc_sz.template getDeviceBuffer<0>(), prc_sz.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getGpuContext());
1446 
1447  // move prc_sz to host
1448  prc_sz.template deviceToHost<0>();
1449 
1450  ite = lbl_p.getGPUIterator();
1451 
1452  // we order lbl_p
1453  CUDA_LAUNCH((reorder_lbl<decltype(lbl_p.toKernel()),decltype(starts.toKernel())>),ite,lbl_p.toKernel(),starts.toKernel());
1454 
1455 
1456 #else
1457 
1458  std::cout << __FILE__ << ":" << __LINE__ << " error, it seems you tried to call map with RUN_ON_DEVICE option, this requires to compile the program with NVCC" << std::endl;
1459 
1460 #endif
1461  }
1462  else
1463  {
1464  // reset lbl_p
1465  lbl_p.clear();
1466  prc_sz_gg.clear();
1467  o_part_loc.clear();
1468  g_opart.clear();
1469  prc_g_opart.clear();
1470 
1471  // resize the label buffer
1472  prc_sz.template fill<0>(0);
1473 
1474  auto it = v_pos.getIterator();
1475 
1476  // Label all the particles with the processor id where they should go
1477  while (it.isNext())
1478  {
1479  auto key = it.get();
1480 
1481  // Apply the boundary conditions
1482  dec.applyPointBC(v_pos.get(key));
1483 
1484  size_t p_id = 0;
1485 
1486  // Check if the particle is inside the domain
1487  if (dec.getDomain().isInside(v_pos.get(key)) == true)
1488  {p_id = dec.processorID(v_pos.get(key));}
1489  else
1490  {p_id = obp::out(key, v_cl.getProcessUnitID());}
1491 
1492  // Particle to move
1493  if (p_id != v_cl.getProcessUnitID())
1494  {
1495  if ((long int) p_id != -1)
1496  {
1497  prc_sz.template get<0>(p_id)++;
1498  lbl_p.add();
1499  lbl_p.last().template get<0>() = key;
1500  lbl_p.last().template get<2>() = p_id;
1501  }
1502  else
1503  {
1504  lbl_p.add();
1505  lbl_p.last().template get<0>() = key;
1506  lbl_p.last().template get<2>() = p_id;
1507  }
1508  }
1509 
1510  // Add processors and add size
1511 
1512  ++it;
1513  }
1514  }
1515  }
1516 
1530  void labelParticlesGhost(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
1535  size_t & ghostMarker,
1536  size_t opt)
1537  {
1538  // Buffer that contain for each processor the id of the particle to send
1539  prc_sz.clear();
1540  g_opart.clear();
1541  g_opart.resize(dec.getNNProcessors());
1542  prc_g_opart.clear();
1543 
1544  if (opt & RUN_ON_DEVICE)
1545  {
1546  labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,
1547  Decomposition,std::is_same<Memory,CudaMemory>::value>
1548  ::run(mem,dec,g_opart_device,proc_id_out,starts,v_cl,v_pos,v_prp,prc,prc_sz,prc_offset,ghostMarker,opt);
1549  }
1550  else
1551  {
1552  // Iterate over all particles
1553  auto it = v_pos.getIteratorTo(ghostMarker);
1554  while (it.isNext())
1555  {
1556  auto key = it.get();
1557 
1558  // Given a particle, it return which processor require it (first id) and shift id, second id
1559  // For an explanation about shifts vectors please consult getShiftVector in ie_ghost
1560  const openfpm::vector<std::pair<size_t, size_t>> & vp_id = dec.template ghost_processorID_pair<typename Decomposition::lc_processor_id, typename Decomposition::shift_id>(v_pos.get(key), UNIQUE);
1561 
1562  for (size_t i = 0; i < vp_id.size(); i++)
1563  {
1564  // processor id
1565  size_t p_id = vp_id.get(i).first;
1566 
1567  // add particle to communicate
1568  g_opart.get(p_id).add();
1569  g_opart.get(p_id).last().template get<0>() = key;
1570  g_opart.get(p_id).last().template get<1>() = vp_id.get(i).second;
1571  }
1572 
1573  ++it;
1574  }
1575 
1576  // remove all zero entry and construct prc (the list of the sending processors)
1578 
1579  // count the non zero element
1580  for (size_t i = 0 ; i < g_opart.size() ; i++)
1581  {
1582  if (g_opart.get(i).size() != 0)
1583  {
1584  prc_sz.add(g_opart.get(i).size());
1585  g_opart_f.add();
1586  g_opart.get(i).swap(g_opart_f.last());
1587  prc.add(dec.IDtoProc(i));
1588  }
1589  }
1590 
1591  g_opart.swap(g_opart_f);
1592  }
1593 #ifdef EXTREA_TRACE_PRE_COMM
1594  Extrae_user_function (0);
1595 #endif
1596  }
1597 
1598 public:
1599 
1606  :v_cl(create_vcluster<Memory>()),dec(create_vcluster()),lg_m(0)
1607  {
1608  this->operator=(v);
1609  }
1610 
1611 
1618  :v_cl(create_vcluster<Memory>()),dec(dec),lg_m(0)
1619  {
1620 
1621  }
1622 
1629  :v_cl(create_vcluster<Memory>()),dec(dec),lg_m(0)
1630  {
1631 
1632  }
1633 
1638  :v_cl(create_vcluster<Memory>()),dec(create_vcluster()),lg_m(0)
1639  {
1640  }
1641 
1648  {
1649  for (size_t i = 0 ; i < hsmem.size() ; i++)
1650  {
1651  if (hsmem.get(i).ref() == 1)
1652  hsmem.get(i).decRef();
1653  else
1654  std::cout << __FILE__ << ":" << __LINE__ << " internal error memory is in an invalid state " << std::endl;
1655  }
1656 
1657  }
1658 
1665  {
1666  return v_sub_unit_factor;
1667  }
1668 
1674  void setDecompositionGranularity(size_t n_sub)
1675  {
1676  this->v_sub_unit_factor = n_sub;
1677  }
1678 
1688  const size_t (& bc)[dim],
1689  const Ghost<dim,St> & g,
1690  size_t opt,
1691  const grid_sm<dim,void> & gdist)
1692  {
1693  size_t div[dim];
1694 
1695  if (opt & BIND_DEC_TO_GHOST)
1696  {
1697  // padding
1698  size_t pad = 0;
1699 
1700  // CellDecomposer
1701  CellDecomposer_sm<dim,St,shift<dim,St>> cd_sm;
1702 
1703  // Calculate the divisions for the symmetric Cell-lists
1704  cl_param_calculateSym<dim,St>(box,cd_sm,g,pad);
1705 
1706  for (size_t i = 0 ; i < dim ; i++)
1707  {div[i] = cd_sm.getDiv()[i] - 2*pad;}
1708 
1709  // Create the sub-domains
1710  dec.setParameters(div, box, bc, g, gdist);
1711  }
1712  else
1713  {
1714  dec.setGoodParameters(box, bc, g, getDecompositionGranularity(), gdist);
1715  }
1716  dec.decompose();
1717  }
1718 
1728  const size_t (& bc)[dim],
1729  const Ghost<dim,St> & g,
1730  size_t opt,
1731  const grid_sm<dim,void> & gdist)
1732  {
1733  size_t div[dim];
1734 
1735  for (size_t i = 0 ; i < dim ; i++)
1736  {div[i] = gdist.size(i);}
1737 
1738  // Create the sub-domains
1739  dec.setParameters(div, box, bc, g);
1740 
1741  dec.decompose();
1742  }
1743 
1754  template<unsigned int impl, int ... prp> inline void ghost_get_(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
1756  size_t & ghostMarker,
1757  size_t opt = WITH_POSITION)
1758  {
1759 #ifdef PROFILE_SCOREP
1760  SCOREP_USER_REGION("ghost_get",SCOREP_USER_REGION_TYPE_FUNCTION)
1761 #endif
1762 
1763  // Sending property object
1764  typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object;
1765 
1766  // send vector for each processor
1768 
1769  if (!(opt & NO_POSITION))
1770  {v_pos.resize(ghostMarker);}
1771 
1772  // reset the ghost part
1773 
1774  if (!(opt & SKIP_LABELLING))
1775  {v_prp.resize(ghostMarker);}
1776 
1777  // Label all the particles
1778  if ((opt & SKIP_LABELLING) == false)
1779  {labelParticlesGhost(v_pos,v_prp,prc_g_opart,prc_sz_gg,prc_offset,ghostMarker,opt);}
1780 
1781  {
1782  // Send and receive ghost particle information
1783  openfpm::vector<send_vector> g_send_prp;
1784 
1785  fill_send_ghost_prp_buf<send_vector, prp_object, prp...>(v_prp,prc_sz_gg,g_send_prp,opt);
1786 
1787  #if defined(CUDA_GPU) && defined(__NVCC__)
1788  cudaDeviceSynchronize();
1789  #endif
1790 
1791  // if there are no properties skip
1792  // SSendRecvP send everything when we do not give properties
1793 
1795  sendrecv_prp(v_cl,g_send_prp,v_prp,v_pos,prc_g_opart,
1796  prc_recv_get_prp,recv_sz_get_prp,recv_sz_get_byte,g_opart_sz,ghostMarker,opt);
1797  }
1798 
1799  if (!(opt & NO_POSITION))
1800  {
1801  // Sending buffer for the ghost particles position
1803 
1804  fill_send_ghost_pos_buf(v_pos,prc_sz_gg,g_pos_send,opt,impl == GHOST_ASYNC);
1805 
1806 #if defined(CUDA_GPU) && defined(__NVCC__)
1807  cudaDeviceSynchronize();
1808 #endif
1809 
1811  sendrecv_pos(v_cl,g_pos_send,v_prp,v_pos,prc_recv_get_pos,recv_sz_get_pos,prc_g_opart,opt);
1812 
1813  // fill g_opart_sz
1814  g_opart_sz.resize(prc_g_opart.size());
1815 
1816  for (size_t i = 0 ; i < prc_g_opart.size() ; i++)
1817  g_opart_sz.get(i) = g_pos_send.get(i).size();
1818  }
1819 
1820  // Important to ensure that the number of particles in v_prp must be equal to v_pos
1821  // Note that if we do not give properties sizeof...(prp) == 0 in general at this point
1822  // v_prp.size() != v_pos.size()
1823  if (!(opt & SKIP_LABELLING))
1824  {
1825  v_prp.resize(v_pos.size());
1826  }
1827 
1828  add_loc_particles_bc(v_pos,v_prp,ghostMarker,opt);
1829  }
1830 
1841  template<int ... prp> inline void ghost_wait_(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
1843  size_t & ghostMarker,
1844  size_t opt = WITH_POSITION)
1845  {
1846  // Sending property object
1847  typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object;
1848 
1849  // send vector for each processor
1851 
1852  // Send and receive ghost particle information
1853  openfpm::vector<send_vector> g_send_prp;
1855 
1857  sendrecv_prp_wait(v_cl,g_send_prp,v_prp,v_pos,prc_g_opart,
1858  prc_recv_get_prp,recv_sz_get_prp,recv_sz_get_byte,g_opart_sz,ghostMarker,opt);
1859 
1860 
1863  }
1864 
1881  template<unsigned int ... prp> void map_list_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & ghostMarker, size_t opt)
1882  {
1883  if (opt & RUN_ON_DEVICE)
1884  {
1885  std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " map_list is unsupported on device (coming soon)" << std::endl;
1886  return;
1887  }
1888 
1889  typedef KillParticle obp;
1890 
1891  // Processor communication size
1892  openfpm::vector<aggregate<unsigned int,unsigned int>,Memory,layout_base> prc_sz(v_cl.getProcessingUnits());
1893 
1894  // map completely reset the ghost part
1895  v_pos.resize(ghostMarker);
1896  v_prp.resize(ghostMarker);
1897 
1898  // m_opart, Contain the processor id of each particle (basically where they have to go)
1899  labelParticleProcessor<obp>(v_pos,m_opart, prc_sz,opt);
1900 
1901  // Calculate the sending buffer size for each processor, put this information in
1902  // a contiguous buffer
1903  p_map_req.resize(v_cl.getProcessingUnits());
1904  openfpm::vector<size_t> prc_sz_r;
1906 
1907  for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
1908  {
1909  if (prc_sz.template get<0>(i) != 0)
1910  {
1911  p_map_req.get(i) = prc_r.size();
1912  prc_r.add(i);
1913  prc_sz_r.add(prc_sz.template get<0>(i));
1914  }
1915  }
1916 
1917  if (opt & MAP_LOCAL)
1918  {
1919  // if the map is local we indicate that we receive only from the neighborhood processors
1920 
1921  prc_recv_map.clear();
1922  for (size_t i = 0 ; i < dec.getNNProcessors() ; i++)
1923  {prc_recv_map.add(dec.IDtoProc(i));}
1924  }
1925 
1926  // Sending property object
1927  typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object;
1928 
1933 
1934  fill_send_map_buf_list<prp_object,prp...>(v_pos,v_prp,prc_sz_r, m_pos, m_prp);
1935 
1936  v_cl.SSendRecv(m_pos,v_pos,prc_r,prc_recv_map,recv_sz_map,opt);
1937  v_cl.template SSendRecvP<openfpm::vector<prp_object>,decltype(v_prp),layout_base,prp...>(m_prp,v_prp,prc_r,prc_recv_map,recv_sz_map,opt);
1938 
1939  // mark the ghost part
1940 
1941  ghostMarker = v_pos.size();
1942  }
1943 
1957  template<typename obp = KillParticle>
1958  void map_(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
1959  openfpm::vector<prop,Memory,layout_base> & v_prp, size_t & ghostMarker,
1960  size_t opt)
1961  {
1962 #ifdef PROFILE_SCOREP
1963  SCOREP_USER_REGION("map",SCOREP_USER_REGION_TYPE_FUNCTION)
1964 #endif
1965 
1966  prc_sz.resize(v_cl.getProcessingUnits());
1967 
1968  // map completely reset the ghost part
1969  v_pos.resize(ghostMarker);
1970  v_prp.resize(ghostMarker);
1971 
1972  // Contain the processor id of each particle (basically where they have to go)
1973  labelParticleProcessor<obp>(v_pos,m_opart, prc_sz,opt);
1974 
1975  openfpm::vector<size_t> prc_sz_r;
1977 
1978  // Calculate the sending buffer size for each processor, put this information in
1979  // a contiguous buffer
1980  calc_send_buffers(prc_sz,prc_sz_r,prc_r,opt);
1981 
1986 
1987  fill_send_map_buf(v_pos,v_prp, prc_sz_r,prc_r, m_pos, m_prp,prc_sz,opt);
1988 
1989  size_t opt_ = 0;
1990  if (opt & RUN_ON_DEVICE)
1991  {
1992 #if defined(CUDA_GPU) && defined(__NVCC__)
1993  // Before doing the communication on RUN_ON_DEVICE we have to be sure that the previous kernels complete
1994  cudaDeviceSynchronize();
1995  opt_ |= MPI_GPU_DIRECT;
1996 #else
1997  std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
1998 #endif
1999  }
2000 
2001  v_cl.template SSendRecv<openfpm::vector<Point<dim, St>,Memory,layout_base,openfpm::grow_policy_identity>,
2002  openfpm::vector<Point<dim, St>,Memory,layout_base>,
2003  layout_base>
2004  (m_pos,v_pos,prc_r,prc_recv_map,recv_sz_map,opt_);
2005 
2006  v_cl.template SSendRecv<openfpm::vector<prop,Memory,layout_base,openfpm::grow_policy_identity>,
2008  layout_base>
2009  (m_prp,v_prp,prc_r,prc_recv_map,recv_sz_map,opt_);
2010 
2011  // mark the ghost part
2012 
2013  ghostMarker = v_pos.size();
2014  }
2015 
2022  {
2023  dec = dec2;
2024  }
2025 
2032  {
2033  return dec;
2034  }
2035 
2041  inline const Decomposition & getDecomposition() const
2042  {
2043  return dec;
2044  }
2045 
2054  {
2055  dec = vc.dec;
2056 
2057  return *this;
2058  }
2059 
2068  {
2069  dec = vc.dec;
2070 
2071  return *this;
2072  }
2073 
2085  template<template<typename,typename> class op, int ... prp>
2086  void ghost_put_(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
2088  size_t & ghostMarker,
2089  size_t opt)
2090  {
2091  // Sending property object
2092  typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object;
2093 
2094  // send vector for each processor
2096 
2097  openfpm::vector<send_vector> g_send_prp;
2098  fill_send_ghost_put_prp_buf<send_vector, prp_object, prp...>(v_prp,g_send_prp,ghostMarker,opt);
2099 
2100  if (opt & RUN_ON_DEVICE)
2101  {
2102 #if defined(CUDA_GPU) && defined(__NVCC__)
2103  // Before doing the communication on RUN_ON_DEVICE we have to be sure that the previous kernels complete
2104  cudaDeviceSynchronize();
2105 #else
2106  std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
2107 #endif
2108  }
2109 
2110  // Send and receive ghost particle information
2111  if (opt & NO_CHANGE_ELEMENTS)
2112  {
2113  size_t opt_ = compute_options(opt);
2114 
2115  if (opt & RUN_ON_DEVICE)
2116  {
2118  v_cl.template SSendRecvP_op<op_ssend_recv_merge_gpu<op,decltype(g_opart_device),decltype(prc_offset)>,
2119  send_vector,
2120  decltype(v_prp),
2121  layout_base,
2122  prp...>(g_send_prp,v_prp,prc_recv_get_prp,opm,prc_g_opart,g_opart_sz,opt_);
2123  }
2124  else
2125  {
2126  op_ssend_recv_merge<op,decltype(g_opart)> opm(g_opart);
2127  v_cl.template SSendRecvP_op<op_ssend_recv_merge<op,decltype(g_opart)>,
2128  send_vector,
2129  decltype(v_prp),
2130  layout_base,
2131  prp...>(g_send_prp,v_prp,prc_recv_get_prp,opm,prc_g_opart,g_opart_sz,opt_);
2132  }
2133  }
2134  else
2135  {
2136  size_t opt_ = compute_options(opt);
2137 
2138  if (opt & RUN_ON_DEVICE)
2139  {
2141  v_cl.template SSendRecvP_op<op_ssend_recv_merge_gpu<op,decltype(g_opart_device),decltype(prc_offset)>,
2142  send_vector,
2143  decltype(v_prp),
2144  layout_base,
2145  prp...>(g_send_prp,v_prp,get_last_ghost_get_num_proc_vector(),opm,prc_recv_put,recv_sz_put,opt_);
2146  }
2147  else
2148  {
2149  op_ssend_recv_merge<op,decltype(g_opart)> opm(g_opart);
2150  v_cl.template SSendRecvP_op<op_ssend_recv_merge<op,decltype(g_opart)>,
2151  send_vector,
2152  decltype(v_prp),
2153  layout_base,
2154  prp...>(g_send_prp,v_prp,get_last_ghost_get_num_proc_vector(),opm,prc_recv_put,recv_sz_put,opt_);
2155  }
2156  }
2157 
2158  // process also the local replicated particles
2159 
2160  if (lg_m < v_prp.size() && v_prp.size() - lg_m != o_part_loc.size())
2161  {
2162  std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Local ghost particles = " << v_prp.size() - lg_m << " != " << o_part_loc.size() << std::endl;
2163  std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Check that you did a ghost_get before a ghost_put" << std::endl;
2164  }
2165 
2166 
2167  if (opt & RUN_ON_DEVICE)
2168  {
2169  v_prp.template merge_prp_v_device<op,prop,Memory,
2171  layout_base,
2172  decltype(o_part_loc),prp ...>(v_prp,lg_m,o_part_loc);
2173  }
2174  else
2175  {
2176  v_prp.template merge_prp_v<op,prop,Memory,
2178  layout_base,
2179  decltype(o_part_loc),prp ...>(v_prp,lg_m,o_part_loc);
2180  }
2181  }
2182 };
2183 
2184 
2185 #endif /* SRC_VECTOR_VECTOR_DIST_COMM_HPP_ */
This class define the domain decomposition interface.
This class allocate, and destroy CPU memory.
Definition: HeapMemory.hpp:40
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:28
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
Implementation of VCluster class.
Definition: VCluster.hpp:59
__device__ __host__ size_t size() const
Return the size of the grid.
Definition: grid_sm.hpp:657
Grow policy define how the vector should grow every time we exceed the size.
Grow policy define how the vector should grow every time we exceed the size.
size_t size()
Stub size.
Definition: map_vector.hpp:212
This class is an helper for the communication of vector_dist.
openfpm::vector< size_t > prc_recv_get_pos
void init_decomposition(Box< dim, St > &box, const size_t(&bc)[dim], const Ghost< dim, St > &g, size_t opt, const grid_sm< dim, void > &gdist)
Initialize the decomposition.
void init_decomposition_gr_cell(Box< dim, St > &box, const size_t(&bc)[dim], const Ghost< dim, St > &g, size_t opt, const grid_sm< dim, void > &gdist)
Initialize the decomposition.
void map_(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, size_t &ghostMarker, size_t opt)
It move all the particles that does not belong to the local processor to the respective processor.
openfpm::vector< size_t > recv_sz_get_byte
Conversion to byte of recv_sz_get.
void ghost_wait_(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, size_t &ghostMarker, size_t opt=WITH_POSITION)
It synchronize the properties and position of the ghost particles.
vector_dist_comm(const vector_dist_comm< dim, St, prop, Decomposition, Memory, layout_base > &v)
Copy Constructor.
void fill_send_map_buf(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, openfpm::vector< size_t > &prc_sz_r, openfpm::vector< size_t > &prc_r, openfpm::vector< openfpm::vector< Point< dim, St >, Memory, layout_base, openfpm::grow_policy_identity >> &m_pos, openfpm::vector< openfpm::vector< prop, Memory, layout_base, openfpm::grow_policy_identity >> &m_prp, openfpm::vector< aggregate< unsigned int, unsigned int >, Memory, layout_base > &prc_sz, size_t opt)
allocate and fill the send buffer for the map function
void ghost_get_(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, size_t &ghostMarker, size_t opt=WITH_POSITION)
It synchronize the properties and position of the ghost particles.
openfpm::vector< size_t > recv_sz_put
The same as recv_sz_get but for put.
Decomposition dec
Domain decomposition.
~vector_dist_comm()
Destructor.
const Decomposition & getDecomposition() const
Get the decomposition.
openfpm::vector< aggregate< unsigned int, unsigned int >, Memory, layout_base > prc_offset
Processor communication size.
size_t get_last_ghost_get_received_parts(size_t i)
Get the number of particles received from each processor during the last ghost_get.
openfpm::vector< aggregate< unsigned int, unsigned int >, Memory, layout_base > prc_sz
Processor communication size.
Decomposition & getDecomposition()
Get the decomposition.
openfpm::vector< size_t > & get_last_ghost_get_num_proc_vector()
Get the number of processor involved during the last ghost_get.
openfpm::vector< Point< dim, St >, Memory, layout_base, openfpm::grow_policy_identity > send_pos_vector
definition of the send vector for position
openfpm::vector< Box< dim, St >, Memory, layout_base > box_f_dev
The boxes touching the border of the domain + shift vector linearized from where they come from.
std::unordered_map< size_t, size_t > map_cmb
this map is used to check if a combination is already present
vector_dist_comm()
Constructor.
openfpm::vector_fr< Memory > hsmem
Sending buffer.
openfpm::vector< size_t > prc_sz_gg
elements sent for each processors (ghost_get)
openfpm::vector< aggregate< int, int, int >, Memory, layout_base > m_opart
openfpm::vector< Point< dim, St >, Memory, layout_base > v_pos_tmp
Helper buffer for computation (on GPU) of local particles (position)
void map_list_(openfpm::vector< Point< dim, St >> &v_pos, openfpm::vector< prop > &v_prp, size_t &ghostMarker, size_t opt)
It move all the particles that does not belong to the local processor to the respective processor.
openfpm::vector< size_t > prc_recv_put
the same as prc_recv_get but for put
openfpm::vector< aggregate< unsigned int, unsigned long int >, CudaMemory, memory_traits_inte > g_opart_device
Same as g_opart but on device, the vector of vector is flatten into a single vector.
openfpm::vector< openfpm::vector< aggregate< size_t, size_t > > > g_opart
void resize_retained_buffer(openfpm::vector_fr< Memory > &rt_buf, size_t nbf)
resize the retained buffer by nbf
openfpm::vector< size_t > prc_g_opart
processor rank list of g_opart
void fill_send_map_buf_list(openfpm::vector< Point< dim, St >> &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, openfpm::vector< size_t > &prc_sz_r, openfpm::vector< openfpm::vector< Point< dim, St >>> &m_pos, openfpm::vector< openfpm::vector< prp_object >> &m_prp)
allocate and fill the send buffer for the map function
void setDecomposition(Decomposition &dec2)
Set the decomposition.
void labelParticlesGhost(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, openfpm::vector< size_t > &prc, openfpm::vector< size_t > &prc_sz, openfpm::vector< aggregate< unsigned int, unsigned int >, Memory, layout_base > &prc_offset, size_t &ghostMarker, size_t opt)
Label the particles.
CudaMemory mem
Temporary CudaMemory to do stuff.
openfpm::vector_std< openfpm::vector_std< Box< dim, St > > > box_f
openfpm::vector< aggregate< unsigned int, unsigned int >, Memory, layout_base > o_part_loc
Id of the local particle to replicate for ghost_get.
size_t get_last_ghost_get_num_proc()
Get the number of processor involved during the last ghost_get.
openfpm::vector< prop, Memory, layout_base > v_prp_tmp
Helper buffer for computation (on GPU) of local particles (properties)
void createShiftBox()
For every internal ghost box we create a structure that order such internal local ghost box in shift ...
Vcluster< Memory > & v_cl
VCluster.
vector_dist_comm(Decomposition &&dec)
Constructor.
vector_dist_comm< dim, St, prop, Decomposition, Memory, layout_base > & operator=(vector_dist_comm< dim, St, prop, Decomposition, Memory, layout_base > &&vc)
Copy a vector.
long int shift_box_ndec
From which decomposition the shift boxes are calculated.
void local_ghost_from_opart(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, size_t opt)
Local ghost from labeled particles.
vector_dist_comm< dim, St, prop, Decomposition, Memory, layout_base > & operator=(const vector_dist_comm< dim, St, prop, Decomposition, Memory, layout_base > &vc)
Copy a vector.
size_t getDecompositionGranularity()
Get the number of minimum sub-domain per processor.
openfpm::vector< size_t > prc_recv_map
the same as prc_recv_get but for map
openfpm::vector< size_t > recv_sz_map
The same as recv_sz_get but for map.
void fill_send_ghost_pos_buf(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< size_t > &prc_sz, openfpm::vector< send_pos_vector > &g_pos_send, size_t opt, bool async)
This function fill the send buffer for the particle position after the particles has been label with ...
openfpm::vector< size_t > p_map_req
It map the processor id with the communication request into map procedure.
void add_loc_particles_bc(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, size_t &ghostMarker, size_t opt)
Add local particles based on the boundary conditions.
openfpm::vector< aggregate< unsigned int >, Memory, layout_base > starts
temporary buffer for the scan result
void local_ghost_from_dec(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, size_t ghostMarker, size_t opt)
Local ghost from decomposition.
void fill_send_ghost_prp_buf(openfpm::vector< prop, Memory, layout_base > &v_prp, openfpm::vector< size_t > &prc_sz, openfpm::vector< send_vector > &g_send_prp, size_t opt)
This function fill the send buffer for properties after the particles has been label with labelPartic...
void fill_send_ghost_put_prp_buf(openfpm::vector< prop, Memory, layout_base > &v_prp, openfpm::vector< send_vector > &g_send_prp, size_t &ghostMarker, size_t opt)
This function fill the send buffer for ghost_put.
openfpm::vector_std< comb< dim > > box_cmb
Store the sector for each group (previous vector)
vector_dist_comm(const Decomposition &dec)
Constructor.
openfpm::vector< size_t > recv_sz_get_pos
void calc_send_buffers(openfpm::vector< aggregate< unsigned int, unsigned int >, Memory, layout_base > &prc_sz, openfpm::vector< size_t > &prc_sz_r, openfpm::vector< size_t > &prc_r, size_t opt)
Calculate sending buffer size for each processor.
openfpm::vector< size_t > g_opart_sz
Per processor number of particle g_opart_sz.get(i) = g_opart.get(i).size()
size_t v_sub_unit_factor
Number of units for each sub-domain.
void ghost_put_(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, size_t &ghostMarker, size_t opt)
Ghost put.
openfpm::vector< aggregate< unsigned int >, Memory, layout_base > proc_id_out
temporary buffer to processors ids
void labelParticleProcessor(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< aggregate< int, int, int >, Memory, layout_base > &lbl_p, openfpm::vector< aggregate< unsigned int, unsigned int >, Memory, layout_base > &prc_sz, size_t opt)
Label particles for mappings.
void setDecompositionGranularity(size_t n_sub)
Set the minimum number of sub-domain per processor.
Out of bound policy it detect out of bound particles and decide what to do.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:221
boost::fusion::vector< list... > type
internal type containing the data
Definition: aggregate.hpp:223
template selector for asynchronous or not asynchronous
Transform the boost::fusion::vector into memory specification (memory_traits)
Definition: memory_conf.hpp:84
Transform the boost::fusion::vector into memory specification (memory_traits)
It create a boost::fusion vector with the selected properties.
It copy the properties from one object to another.
Helper class to merge data.
Helper class to merge data.
Helper class to merge data.
Boundary conditions.
Definition: common.hpp:31
process the particle with properties
static void proc(size_t lbl, size_t cnt, size_t id, T1 &v_prp, T2 &m_prp)
process the particle
void operator()(T &t)
It call the setMemory function for each property.