OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
vector_dist_comm.hpp
1/*
2 * vector_dist_comm.hpp
3 *
4 * Created on: Aug 18, 2016
5 * Author: i-bird
6 */
7
8#ifndef SRC_VECTOR_VECTOR_DIST_COMM_HPP_
9#define SRC_VECTOR_VECTOR_DIST_COMM_HPP_
10
11#define TEST1
12
13#if defined(CUDA_GPU) && defined(__NVCC__)
14#include "Vector/cuda/vector_dist_cuda_funcs.cuh"
15#include "util/cuda/kernels.cuh"
16#endif
17
18#include "Vector/util/vector_dist_funcs.hpp"
19#include "cuda/vector_dist_comm_util_funcs.cuh"
20#include "util/cuda/scan_ofp.cuh"
21
22template<typename T>
23struct DEBUG
24{
25 static float ret(T & tmp)
26 {
27 return 0.0;
28 }
29};
30
31template<>
32struct DEBUG<float &>
33{
34 static float ret(float & tmp)
35 {
36 return tmp;
37 }
38};
39
44inline static size_t compute_options(size_t opt)
45{
46 size_t opt_ = NONE;
47 if (opt & NO_CHANGE_ELEMENTS && opt & SKIP_LABELLING)
48 {opt_ = RECEIVE_KNOWN | KNOWN_ELEMENT_OR_BYTE;}
49
50 if (opt & RUN_ON_DEVICE)
51 {
52#if defined(CUDA_GPU) && defined(__NVCC__)
53 // Before doing the communication on RUN_ON_DEVICE we have to be sure that the previous kernels complete
54 opt_ |= MPI_GPU_DIRECT;
55#else
56 std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
57#endif
58 }
59
60 return opt_;
61}
62
69template<unsigned int impl, template<typename> class layout_base, unsigned int ... prp>
71{
72 template<typename Vcluster_type, typename vector_prop_type,
73 typename vector_pos_type, typename send_vector,
74 typename prc_recv_get_type, typename prc_g_opart_type,
75 typename recv_sz_get_type, typename recv_sz_get_byte_type,
76 typename g_opart_sz_type>
77 static inline void sendrecv_prp(Vcluster_type & v_cl,
79 vector_prop_type & v_prp,
80 vector_pos_type & v_pos,
81 prc_g_opart_type & prc_g_opart,
82 prc_recv_get_type & prc_recv_get,
83 recv_sz_get_type & recv_sz_get,
84 recv_sz_get_byte_type & recv_sz_get_byte,
85 g_opart_sz_type & g_opart_sz,
86 size_t g_m,
87 size_t opt)
88 {
89 // if there are no properties skip
90 // SSendRecvP send everything when we do not give properties
91
92 if (sizeof...(prp) != 0)
93 {
94 size_t opt_ = compute_options(opt);
95 if (opt & SKIP_LABELLING)
96 {
97 if (opt & RUN_ON_DEVICE)
98 {
100 v_cl.template SSendRecvP_op<op_ssend_gg_recv_merge_run_device,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
101 }
102 else
103 {
104 op_ssend_gg_recv_merge opm(g_m);
105 v_cl.template SSendRecvP_op<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
106 }
107 }
108 else
109 {v_cl.template SSendRecvP<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte,opt_);}
110
111 // fill g_opart_sz
112 g_opart_sz.resize(prc_g_opart.size());
113
114 for (size_t i = 0 ; i < prc_g_opart.size() ; i++)
115 g_opart_sz.get(i) = g_send_prp.get(i).size();
116 }
117 }
118
119 template<typename Vcluster_type, typename vector_prop_type,
120 typename vector_pos_type, typename send_pos_vector,
121 typename prc_recv_get_type, typename prc_g_opart_type,
122 typename recv_sz_get_type>
123 static inline void sendrecv_pos(Vcluster_type & v_cl,
125 vector_prop_type & v_prp,
126 vector_pos_type & v_pos,
127 prc_recv_get_type & prc_recv_get,
128 recv_sz_get_type & recv_sz_get,
129 prc_g_opart_type & prc_g_opart,
130 size_t opt)
131 {
132 size_t opt_ = compute_options(opt);
133 if (opt & SKIP_LABELLING)
134 {
135 v_cl.template SSendRecv<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
136 }
137 else
138 {
139 prc_recv_get.clear();
140 recv_sz_get.clear();
141 v_cl.template SSendRecv<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
142 }
143 }
144
145 template<typename Vcluster_type, typename vector_prop_type,
146 typename vector_pos_type, typename send_pos_vector,
147 typename prc_recv_get_type, typename prc_g_opart_type,
148 typename recv_sz_get_type>
149 static inline void sendrecv_pos_wait(Vcluster_type & v_cl,
151 vector_prop_type & v_prp,
152 vector_pos_type & v_pos,
153 prc_recv_get_type & prc_recv_get,
154 recv_sz_get_type & recv_sz_get,
155 prc_g_opart_type & prc_g_opart,
156 size_t opt)
157 {}
158
159 template<typename Vcluster_type, typename vector_prop_type,
160 typename vector_pos_type, typename send_vector,
161 typename prc_recv_get_type, typename prc_g_opart_type,
162 typename recv_sz_get_type, typename recv_sz_get_byte_type,
163 typename g_opart_sz_type>
164 static inline void sendrecv_prp_wait(Vcluster_type & v_cl,
165 openfpm::vector<send_vector> & g_send_prp,
166 vector_prop_type & v_prp,
167 vector_pos_type & v_pos,
168 prc_g_opart_type & prc_g_opart,
169 prc_recv_get_type & prc_recv_get,
170 recv_sz_get_type & recv_sz_get,
171 recv_sz_get_byte_type & recv_sz_get_byte,
172 g_opart_sz_type & g_opart_sz,
173 size_t g_m,
174 size_t opt)
175 {}
176};
177
178
179template<template<typename> class layout_base, unsigned int ... prp>
180struct ghost_exchange_comm_impl<GHOST_ASYNC,layout_base, prp ... >
181{
182 template<typename Vcluster_type, typename vector_prop_type,
183 typename vector_pos_type, typename send_vector,
184 typename prc_recv_get_type, typename prc_g_opart_type,
185 typename recv_sz_get_type, typename recv_sz_get_byte_type,
186 typename g_opart_sz_type>
187 static inline void sendrecv_prp(Vcluster_type & v_cl,
188 openfpm::vector<send_vector> & g_send_prp,
189 vector_prop_type & v_prp,
190 vector_pos_type & v_pos,
191 prc_g_opart_type & prc_g_opart,
192 prc_recv_get_type & prc_recv_get,
193 recv_sz_get_type & recv_sz_get,
194 recv_sz_get_byte_type & recv_sz_get_byte,
195 g_opart_sz_type & g_opart_sz,
196 size_t g_m,
197 size_t opt)
198 {
199 prc_recv_get.clear();
200 recv_sz_get.clear();
201
202 // if there are no properties skip
203 // SSendRecvP send everything when we do not give properties
204
205 if (sizeof...(prp) != 0)
206 {
207 size_t opt_ = compute_options(opt);
208 if (opt & SKIP_LABELLING)
209 {
210 if (opt & RUN_ON_DEVICE)
211 {
213 v_cl.template SSendRecvP_opAsync<op_ssend_gg_recv_merge_run_device,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
214 }
215 else
216 {
217 op_ssend_gg_recv_merge opm(g_m);
218 v_cl.template SSendRecvP_opAsync<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
219 }
220 }
221 else
222 {v_cl.template SSendRecvPAsync<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte,opt_);}
223 }
224
225 // fill g_opart_sz
226 g_opart_sz.resize(prc_g_opart.size());
227
228 for (size_t i = 0 ; i < prc_g_opart.size() ; i++)
229 {g_opart_sz.get(i) = g_send_prp.get(i).size();}
230 }
231
232 template<typename Vcluster_type, typename vector_prop_type,
233 typename vector_pos_type, typename send_pos_vector,
234 typename prc_recv_get_type, typename prc_g_opart_type,
235 typename recv_sz_get_type>
236 static inline void sendrecv_pos(Vcluster_type & v_cl,
238 vector_prop_type & v_prp,
239 vector_pos_type & v_pos,
240 prc_recv_get_type & prc_recv_get,
241 recv_sz_get_type & recv_sz_get,
242 prc_g_opart_type & prc_g_opart,
243 size_t opt)
244 {
245 prc_recv_get.clear();
246 recv_sz_get.clear();
247
248 size_t opt_ = compute_options(opt);
249 if (opt & SKIP_LABELLING)
250 {
251 v_cl.template SSendRecvAsync<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
252 }
253 else
254 {
255 prc_recv_get.clear();
256 recv_sz_get.clear();
257 v_cl.template SSendRecvAsync<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
258 }
259 }
260
261 template<typename Vcluster_type, typename vector_prop_type,
262 typename vector_pos_type, typename send_pos_vector,
263 typename prc_recv_get_type, typename prc_g_opart_type,
264 typename recv_sz_get_type>
265 static inline void sendrecv_pos_wait(Vcluster_type & v_cl,
267 vector_prop_type & v_prp,
268 vector_pos_type & v_pos,
269 prc_recv_get_type & prc_recv_get,
270 recv_sz_get_type & recv_sz_get,
271 prc_g_opart_type & prc_g_opart,
272 size_t opt)
273 {
274 size_t opt_ = compute_options(opt);
275 if (opt & SKIP_LABELLING)
276 {
277 v_cl.template SSendRecvWait<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
278 }
279 else
280 {
281 v_cl.template SSendRecvWait<send_pos_vector,decltype(v_pos),layout_base>(g_pos_send,v_pos,prc_g_opart,prc_recv_get,recv_sz_get,opt_);
282 }
283 }
284
285 template<typename Vcluster_type, typename vector_prop_type,
286 typename vector_pos_type, typename send_vector,
287 typename prc_recv_get_type, typename prc_g_opart_type,
288 typename recv_sz_get_type, typename recv_sz_get_byte_type,
289 typename g_opart_sz_type>
290 static inline void sendrecv_prp_wait(Vcluster_type & v_cl,
291 openfpm::vector<send_vector> & g_send_prp,
292 vector_prop_type & v_prp,
293 vector_pos_type & v_pos,
294 prc_g_opart_type & prc_g_opart,
295 prc_recv_get_type & prc_recv_get,
296 recv_sz_get_type & recv_sz_get,
297 recv_sz_get_byte_type & recv_sz_get_byte,
298 g_opart_sz_type & g_opart_sz,
299 size_t g_m,
300 size_t opt)
301 {
302 // if there are no properties skip
303 // SSendRecvP send everything when we do not give properties
304
305 if (sizeof...(prp) != 0)
306 {
307 size_t opt_ = compute_options(opt);
308 if (opt & SKIP_LABELLING)
309 {
310 if (opt & RUN_ON_DEVICE)
311 {
313 v_cl.template SSendRecvP_opWait<op_ssend_gg_recv_merge_run_device,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
314 }
315 else
316 {
317 op_ssend_gg_recv_merge opm(g_m);
318 v_cl.template SSendRecvP_opWait<op_ssend_gg_recv_merge,send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,opm,prc_recv_get,recv_sz_get,opt_);
319 }
320 }
321 else
322 {v_cl.template SSendRecvPWait<send_vector,decltype(v_prp),layout_base,prp...>(g_send_prp,v_prp,prc_g_opart,prc_recv_get,recv_sz_get,recv_sz_get_byte,opt_);}
323 }
324 }
325};
326
327
340template<unsigned int dim,
341 typename St,
342 typename prop,
344 typename Memory = HeapMemory,
345 template<typename> class layout_base = memory_traits_lin>
347{
349 size_t v_sub_unit_factor = 64;
350
353
356
359
362
369 Memory,
370 layout_base > m_opart;
371
376
381
384
387
390
393
397 openfpm::vector<size_t> prc_recv_get_prp;
398
401
404
408 openfpm::vector<size_t> recv_sz_get_prp;
411
412
415
418
421
424 Memory,
425 layout_base> proc_id_out;
426
429 Memory,
430 layout_base> starts;
431
434
435
438
441 size_t lg_m;
442
444 openfpm::vector_fr<Memory> hsmem;
445
447 template<typename prp_object, int ... prp>
449 {
451 template<typename T1, typename T2> inline static void proc(size_t lbl, size_t cnt, size_t id, T1 & v_prp, T2 & m_prp)
452 {
453 // source object type
455 // destination object type
457
458 // Copy only the selected properties
459 object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(id), m_prp.get(lbl).get(cnt));
460 }
461 };
462
470 {
471 // If the last ghost_get did not have properties the information about the number of particles
472 // received is in recv_sz_get_ois
473 if (recv_sz_get_prp.size() != 0)
474 {return recv_sz_get_prp.get(i);}
475 else
476 {return recv_sz_get_pos.get(i);}
477 }
478
484 {
485 if (prc_recv_get_prp.size() != 0)
486 {return prc_recv_get_prp.size();}
487 else
488 {return prc_recv_get_pos.size();}
489 }
490
496 {
497 if (prc_recv_get_prp.size() != 0)
498 {return prc_recv_get_prp;}
499 else
500 {return prc_recv_get_pos;}
501 }
502
510 openfpm::vector<size_t> & prc_sz_r,
512 size_t opt)
513 {
514 if (opt & RUN_ON_DEVICE)
515 {
516#ifndef TEST1
517 size_t prev_off = 0;
518 for (size_t i = 0; i < prc_sz.size() ; i++)
519 {
520 if (prc_sz.template get<1>(i) != (unsigned int)-1)
521 {
522 prc_r.add(prc_sz.template get<1>(i));
523 prc_sz_r.add(prc_sz.template get<0>(i) - prev_off);
524 }
525 prev_off = prc_sz.template get<0>(i);
526 }
527#else
528
529 // Calculate the sending buffer size for each processor, put this information in
530 // a contiguous buffer
531
532 for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
533 {
534 if (prc_sz.template get<0>(i) != 0 && v_cl.rank() != i)
535 {
536 prc_r.add(i);
537 prc_sz_r.add(prc_sz.template get<0>(i));
538 }
539 }
540
541#endif
542 }
543 else
544 {
545 // Calculate the sending buffer size for each processor, put this information in
546 // a contiguous buffer
547
548 p_map_req.resize(v_cl.getProcessingUnits());
549 for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
550 {
551 if (prc_sz.template get<0>(i) != 0)
552 {
553 p_map_req.get(i) = prc_r.size();
554 prc_r.add(i);
555 prc_sz_r.add(prc_sz.template get<0>(i));
556 }
557 }
558 }
559 }
560
562 long int shift_box_ndec = -1;
563
565 std::unordered_map<size_t, size_t> map_cmb;
566
570
573 openfpm::vector<aggregate<unsigned int>,Memory,layout_base> box_f_sv;
574
577
580
583
589 {
590 if (shift_box_ndec == (long int)dec.get_ndec())
591 {return;}
592
593 struct sh_box
594 {
595 size_t shift_id;
596
597 unsigned int box_f_sv;
598 Box<dim,St> box_f_dev;
599
600 bool operator<(const sh_box & tmp) const
601 {
602 return shift_id < tmp.shift_id;
603 }
604
605 };
606 openfpm::vector<sh_box> reord_shift;
607 box_f.clear();
608 map_cmb.clear();
609 box_cmb.clear();
610
611 // Add local particles coming from periodic boundary, the only boxes that count are the one
612 // touching the border
613 for (size_t i = 0; i < dec.getNLocalSub(); i++)
614 {
615 size_t Nl = dec.getLocalNIGhost(i);
616
617 for (size_t j = 0; j < Nl; j++)
618 {
619 // If the ghost does not come from the intersection with an out of
620 // border sub-domain the combination is all zero and n_zero return dim
621 if (dec.getLocalIGhostPos(i, j).n_zero() == dim)
622 continue;
623
624 // Check if we already have boxes with such combination
625 auto it = map_cmb.find(dec.getLocalIGhostPos(i, j).lin());
626 if (it == map_cmb.end())
627 {
628 // we do not have it
629 box_f.add();
630 box_f.last().add(dec.getLocalIGhostBox(i, j));
631 box_cmb.add(dec.getLocalIGhostPos(i, j));
632 map_cmb[dec.getLocalIGhostPos(i, j).lin()] = box_f.size() - 1;
633 }
634 else
635 {
636 // we have it
637 box_f.get(it->second).add(dec.getLocalIGhostBox(i, j));
638 }
639
640 reord_shift.add();
641 reord_shift.last().shift_id = dec.getLocalIGhostPos(i, j).lin();
642 reord_shift.last().box_f_dev = dec.getLocalIGhostBox(i, j);
643 reord_shift.last().box_f_sv = dec.convertShift(dec.getLocalIGhostPos(i, j));
644 }
645 }
646
647 // now we sort box_f by shift_id, the reason is that we have to avoid duplicated particles
648 reord_shift.sort();
649
650 box_f_dev.resize(reord_shift.size());
651 box_f_sv.resize(reord_shift.size());
652
653 for (size_t i = 0 ; i < reord_shift.size() ; i++)
654 {
655 box_f_dev.get(i) = reord_shift.get(i).box_f_dev;
656 box_f_sv.template get<0>(i) = reord_shift.get(i).box_f_sv;
657 }
658
659#ifdef CUDA_GPU
660
661 // move box_f_dev and box_f_sv to device
662 box_f_dev.template hostToDevice<0,1>();
663 box_f_sv.template hostToDevice<0>();
664
665#endif
666
667 shift_box_ndec = dec.get_ndec();
668 }
669
677 void local_ghost_from_opart(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
679 size_t opt)
680 {
681 // get the shift vectors
682 const openfpm::vector<Point<dim, St>,Memory,layout_base> & shifts = dec.getShiftVectors();
683
684 if (!(opt & NO_POSITION))
685 {
686 if (opt & RUN_ON_DEVICE)
687 {
689 ::run(o_part_loc,shifts,v_pos,v_prp,opt);
690 }
691 else
692 {
693 for (size_t i = 0 ; i < o_part_loc.size() ; i++)
694 {
695 size_t lin_id = o_part_loc.template get<1>(i);
696 size_t key = o_part_loc.template get<0>(i);
697
698 Point<dim, St> p = v_pos.get(key);
699 // shift
700 p -= shifts.get(lin_id);
701
702 // add this particle shifting its position
703 v_pos.add(p);
704 v_prp.get(lg_m+i) = v_prp.get(key);
705 }
706 }
707 }
708 else
709 {
710 if (opt & RUN_ON_DEVICE)
711 {
713 ::run(o_part_loc,shifts,v_pos,v_prp,opt);
714 }
715 else
716 {
717 for (size_t i = 0 ; i < o_part_loc.size() ; i++)
718 {
719 size_t key = o_part_loc.template get<0>(i);
720
721 v_prp.get(lg_m+i) = v_prp.get(key);
722 }
723 }
724 }
725 }
726
734 void local_ghost_from_dec(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
736 size_t g_m,size_t opt)
737 {
738 o_part_loc.clear();
739
740 // get the shift vectors
741 const openfpm::vector<Point<dim,St>,Memory,layout_base> & shifts = dec.getShiftVectors();
742
743 if (opt & RUN_ON_DEVICE)
744 {
746 ::run(o_part_loc,shifts,box_f_dev,box_f_sv,v_cl,starts,v_pos,v_prp,g_m,opt);
747 }
748 else
749 {
750 // Label the internal (assigned) particles
751 auto it = v_pos.getIteratorTo(g_m);
752
753 while (it.isNext())
754 {
755 auto key = it.get();
756
757 // If particles are inside these boxes
758 for (size_t i = 0; i < box_f.size(); i++)
759 {
760 for (size_t j = 0; j < box_f.get(i).size(); j++)
761 {
762 if (box_f.get(i).get(j).isInsideNP(v_pos.get(key)) == true)
763 {
764 size_t lin_id = dec.convertShift(box_cmb.get(i));
765
766 o_part_loc.add();
767 o_part_loc.template get<0>(o_part_loc.size()-1) = key;
768 o_part_loc.template get<1>(o_part_loc.size()-1) = lin_id;
769
770 Point<dim, St> p = v_pos.get(key);
771 // shift
772 p -= shifts.get(lin_id);
773
774 // add this particle shifting its position
775 v_pos.add(p);
776 v_prp.add();
777 v_prp.last() = v_prp.get(key);
778
779 // boxes in one group can be overlapping
780 // we do not have to search for the other
781 // boxes otherwise we will have duplicate particles
782 //
783 // A small note overlap of boxes across groups is fine
784 // (and needed) because each group has different shift
785 // producing non overlapping particles
786 //
787 break;
788 }
789 }
790 }
791
792 ++it;
793 }
794 }
795 }
796
845 void add_loc_particles_bc(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
847 size_t & g_m,
848 size_t opt)
849 {
850 // Create the shift boxes
852
853 if (!(opt & SKIP_LABELLING))
854 lg_m = v_prp.size();
855
856 if (box_f.size() == 0)
857 return;
858 else
859 {
860 if (opt & SKIP_LABELLING)
861 {local_ghost_from_opart(v_pos,v_prp,opt);}
862 else
863 {local_ghost_from_dec(v_pos,v_prp,g_m,opt);}
864 }
865 }
866
876 size_t opt,
877 bool async)
878 {
879 // get the shift vectors
880 const openfpm::vector<Point<dim,St>,Memory,layout_base> & shifts = dec.getShiftVectors();
881
882 // create a number of send buffers equal to the near processors
883 g_pos_send.resize(prc_sz.size());
884
885 size_t old_hsmem_size = 0;
886
887 // if we do async
888 if (async == true)
889 {
890 old_hsmem_size = hsmem.size();
891 resize_retained_buffer(hsmem,g_pos_send.size() + hsmem.size());
892 }
893 else
894 {resize_retained_buffer(hsmem,g_pos_send.size());}
895
896 for (size_t i = 0; i < g_pos_send.size(); i++)
897 {
898 // Buffer must retained and survive the destruction of the
899 // vector
900 if (hsmem.get(i+old_hsmem_size).ref() == 0)
901 {hsmem.get(i+old_hsmem_size).incRef();}
902
903 // Set the memory for retain the send buffer
904 g_pos_send.get(i).setMemory(hsmem.get(i+old_hsmem_size));
905
906 // resize the sending vector (No allocation is produced)
907 g_pos_send.get(i).resize(prc_sz.get(i));
908 }
909
910 if (opt & RUN_ON_DEVICE)
911 {
912#if defined(CUDA_GPU) && defined(__NVCC__)
913
914 size_t offset = 0;
915
916 // Fill the sending buffers
917 for (size_t i = 0 ; i < g_pos_send.size() ; i++)
918 {
919 auto ite = g_pos_send.get(i).getGPUIterator();
920
921 CUDA_LAUNCH((process_ghost_particles_pos<dim,decltype(g_opart_device.toKernel()),decltype(g_pos_send.get(i).toKernel()),decltype(v_pos.toKernel()),decltype(shifts.toKernel())>),
922 ite,
923 g_opart_device.toKernel(), g_pos_send.get(i).toKernel(),
924 v_pos.toKernel(),shifts.toKernel(),offset);
925
926 offset += prc_sz.get(i);
927 }
928
929#else
930
931 std::cout << __FILE__ << ":" << __LINE__ << " error RUN_ON_DEVICE require that you compile with NVCC, but it seem compiled with a normal compiler" << std::endl;
932
933#endif
934 }
935 else
936 {
937 // Fill the send buffer
938 for (size_t i = 0; i < g_opart.size(); i++)
939 {
940 for (size_t j = 0; j < g_opart.get(i).size(); j++)
941 {
942 Point<dim, St> s = v_pos.get(g_opart.get(i).template get<0>(j));
943 s -= shifts.get(g_opart.get(i).template get<1>(j));
944 g_pos_send.get(i).set(j, s);
945 }
946 }
947 }
948 }
949
961 template<typename send_vector, typename prp_object, int ... prp>
963 openfpm::vector<send_vector> & g_send_prp,
964 size_t & g_m,
965 size_t opt)
966 {
967 // create a number of send buffers equal to the near processors
968 // from which we received
969
970 // NOTE in some case the information can be in prc_recv_get_pos
971
972 size_t nproc = get_last_ghost_get_num_proc();
973
974 g_send_prp.resize(nproc);
975
976 resize_retained_buffer(hsmem,g_send_prp.size());
977
978 for (size_t i = 0; i < g_send_prp.size(); i++)
979 {
980 // Buffer must retained and survive the destruction of the
981 // vector
982 if (hsmem.get(i).ref() == 0)
983 hsmem.get(i).incRef();
984
985 // Set the memory for retain the send buffer
986 g_send_prp.get(i).setMemory(hsmem.get(i));
987
988 size_t n_part_recv = get_last_ghost_get_received_parts(i);
989
990 // resize the sending vector (No allocation is produced)
991 g_send_prp.get(i).resize(n_part_recv);
992 }
993
994 size_t accum = g_m;
995
996 if (opt & RUN_ON_DEVICE)
997 {
998#if defined(CUDA_GPU) && defined(__NVCC__)
999
1000 if (sizeof...(prp) != 0)
1001 {
1002 // Fill the sending buffers
1003 for (size_t i = 0 ; i < g_send_prp.size() ; i++)
1004 {
1005 size_t n_part_recv = get_last_ghost_get_received_parts(i);
1006
1007 auto ite = g_send_prp.get(i).getGPUIterator();
1008
1009 if (ite.nblocks() == 0) {continue;}
1010
1011 CUDA_LAUNCH((process_ghost_particles_prp_put<decltype(g_send_prp.get(i).toKernel()),decltype(v_prp.toKernel()),prp...>),
1012 ite,
1013 g_send_prp.get(i).toKernel(),
1014 v_prp.toKernel(),accum);
1015
1016 accum = accum + n_part_recv;
1017 }
1018 }
1019
1020#else
1021
1022 std::cout << __FILE__ << ":" << __LINE__ << " error RUN_ON_DEVICE require that you compile with NVCC, but it seem compiled with a normal compiler" << std::endl;
1023
1024#endif
1025 }
1026 else
1027 {
1028 // Fill the send buffer
1029 for (size_t i = 0; i < g_send_prp.size(); i++)
1030 {
1031 size_t j2 = 0;
1032 size_t n_part_recv = get_last_ghost_get_received_parts(i);
1033
1034 for (size_t j = accum; j < accum + n_part_recv; j++)
1035 {
1036 // source object type
1038 // destination object type
1040
1041 // Copy only the selected properties
1042 object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(j), g_send_prp.get(i).get(j2));
1043
1044 j2++;
1045 }
1046
1047 accum = accum + n_part_recv;
1048 }
1049 }
1050 }
1051
1056 void resize_retained_buffer(openfpm::vector_fr<Memory> & rt_buf, size_t nbf)
1057 {
1058 // Release all the buffer that are going to be deleted
1059 for (size_t i = nbf ; i < rt_buf.size() ; i++)
1060 {
1061 rt_buf.get(i).decRef();
1062 }
1063
1064 hsmem.resize(nbf);
1065 }
1066
1071 template<typename send_vector, typename v_mpl>
1073 {
1074 openfpm::vector<send_vector> & g_send_prp;
1075
1076 size_t i;
1077
1078 openfpm::vector_fr<Memory> & hsmem;
1079
1080 size_t j;
1081
1083 openfpm::vector_fr<Memory> & hsmem, size_t j)
1084 :g_send_prp(g_send_prp),i(i),hsmem(hsmem),j(j)
1085 {}
1086
1088 template<typename T>
1089 inline void operator()(T& t)
1090 {
1091 g_send_prp.get(i).template setMemory<T::value>(hsmem.get(j));
1092
1093 j++;
1094 }
1095 };
1096
1097 template<bool inte_or_lin,typename send_vector, typename v_mpl>
1099 {
1100 static inline size_t set_mem_retained_buffers_(openfpm::vector<send_vector> & g_send_prp,
1102 size_t i,
1103 openfpm::vector_fr<Memory> & hsmem,
1104 size_t j)
1105 {
1106 // Set the memory for retain the send buffer
1107 g_send_prp.get(i).setMemory(hsmem.get(j));
1108
1109 // resize the sending vector (No allocation is produced)
1110 g_send_prp.get(i).resize(prc_sz.get(i));
1111
1112 return j+1;
1113 }
1114 };
1115
1116 template<typename send_vector, typename v_mpl>
1117 struct set_mem_retained_buffers<true,send_vector,v_mpl>
1118 {
1119 static inline size_t set_mem_retained_buffers_(openfpm::vector<send_vector> & g_send_prp,
1121 size_t i,
1122 openfpm::vector_fr<Memory> & hsmem,
1123 size_t j)
1124 {
1126
1127 boost::mpl::for_each_ref<boost::mpl::range_c<int,0,boost::mpl::size<v_mpl>::type::value>>(smrbi);
1128
1129 // if we do not send properties do not reallocate
1130 if (boost::mpl::size<v_mpl>::type::value != 0)
1131 {
1132 // resize the sending vector (No allocation is produced)
1133 g_send_prp.get(i).resize(prc_sz.get(i));
1134 }
1135
1136 return smrbi.j;
1137 }
1138 };
1139
1150 template<typename send_vector, typename prp_object, int ... prp>
1153 openfpm::vector<send_vector> & g_send_prp,
1154 size_t opt)
1155 {
1156 size_t factor = 1;
1157
1158 typedef typename to_boost_vmpl<prp...>::type v_mpl;
1159
1160 if (is_layout_inte<layout_base<prop>>::value == true) {factor *= sizeof...(prp);}
1161
1162 // create a number of send buffers equal to the near processors
1163 g_send_prp.resize(prc_sz.size());
1164
1165 resize_retained_buffer(hsmem,g_send_prp.size()*factor);
1166
1167 for (size_t i = 0; i < hsmem.size(); i++)
1168 {
1169 // Buffer must retained and survive the destruction of the
1170 // vector
1171 if (hsmem.get(i).ref() == 0)
1172 {hsmem.get(i).incRef();}
1173 }
1174
1175 size_t j = 0;
1176 for (size_t i = 0; i < g_send_prp.size(); i++)
1177 {
1178 j = set_mem_retained_buffers<is_layout_inte<layout_base<prop>>::value,send_vector,v_mpl>::set_mem_retained_buffers_(g_send_prp,prc_sz,i,hsmem,j);
1179 }
1180
1181 if (opt & RUN_ON_DEVICE)
1182 {
1183#if defined(CUDA_GPU) && defined(__NVCC__)
1184
1185 size_t offset = 0;
1186
1187 if (sizeof...(prp) != 0)
1188 {
1189 // Fill the sending buffers
1190 for (size_t i = 0 ; i < g_send_prp.size() ; i++)
1191 {
1192 auto ite = g_send_prp.get(i).getGPUIterator();
1193
1194 CUDA_LAUNCH((process_ghost_particles_prp<decltype(g_opart_device.toKernel()),decltype(g_send_prp.get(i).toKernel()),decltype(v_prp.toKernel()),prp...>),
1195 ite,
1196 g_opart_device.toKernel(), g_send_prp.get(i).toKernel(),
1197 v_prp.toKernel(),offset);
1198
1199 offset += prc_sz.get(i);
1200 }
1201 }
1202
1203#else
1204
1205 std::cout << __FILE__ << ":" << __LINE__ << " error RUN_ON_DEVICE require that you compile with NVCC, but it seem compiled with a normal compiler" << std::endl;
1206
1207#endif
1208 }
1209 else
1210 {
1211 // if no properties must be sent skip this step
1212 if (sizeof...(prp) == 0) {return;}
1213
1214 // Fill the send buffer
1215 for (size_t i = 0; i < g_opart.size(); i++)
1216 {
1217 for (size_t j = 0; j < g_opart.get(i).size(); j++)
1218 {
1219 // source object type
1220 typedef decltype(v_prp.get(g_opart.get(i).template get<0>(j))) encap_src;
1221 // destination object type
1222 typedef decltype(g_send_prp.get(i).get(j)) encap_dst;
1223
1224 // Copy only the selected properties
1225 object_si_d<encap_src, encap_dst, OBJ_ENCAP, prp...>(v_prp.get(g_opart.get(i).template get<0>(j)), g_send_prp.get(i).get(j));
1226 }
1227 }
1228 }
1229 }
1230
1242 void fill_send_map_buf(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
1244 openfpm::vector<size_t> & prc_sz_r,
1249 size_t opt)
1250 {
1251 m_prp.resize(prc_sz_r.size());
1252 m_pos.resize(prc_sz_r.size());
1253 openfpm::vector<size_t> cnt(prc_sz_r.size());
1254
1255 for (size_t i = 0; i < prc_sz_r.size() ; i++)
1256 {
1257 // set the size and allocate, using mem warant that pos and prp is contiguous
1258 m_pos.get(i).resize(prc_sz_r.get(i));
1259 m_prp.get(i).resize(prc_sz_r.get(i));
1260 cnt.get(i) = 0;
1261 }
1262
1263 if (opt & RUN_ON_DEVICE)
1264 {
1265 if (v_cl.size() == 1)
1266 {return;}
1267
1268#if defined(CUDA_GPU) && defined(__NVCC__)
1269
1270 // The first part of m_opart and prc_sz contain the local particles
1271
1272 int rank = v_cl.rank();
1273
1274 v_pos_tmp.resize(prc_sz.template get<0>(rank));
1275 v_prp_tmp.resize(prc_sz.template get<0>(rank));
1276
1277 auto ite = v_pos_tmp.getGPUIterator();
1278
1279 starts.template deviceToHost<0>();
1280 size_t offset = starts.template get<0>(rank);
1281
1282 // no work to do
1283 if (ite.wthr.x != 0)
1284 {
1285 // fill v_pos_tmp and v_prp_tmp with local particles
1286 CUDA_LAUNCH((process_map_particles<decltype(m_opart.toKernel()),decltype(v_pos_tmp.toKernel()),decltype(v_prp_tmp.toKernel()),
1287 decltype(v_pos.toKernel()),decltype(v_prp.toKernel())>),
1288 ite,
1289 m_opart.toKernel(),v_pos_tmp.toKernel(), v_prp_tmp.toKernel(),
1290 v_pos.toKernel(),v_prp.toKernel(),offset);
1291 }
1292
1293 // Fill the sending buffers
1294 for (size_t i = 0 ; i < m_pos.size() ; i++)
1295 {
1296 size_t offset = starts.template get<0>(prc_r.template get<0>(i));
1297
1298 auto ite = m_pos.get(i).getGPUIterator();
1299
1300 // no work to do
1301 if (ite.wthr.x != 0)
1302 {
1303
1304 CUDA_LAUNCH((process_map_particles<decltype(m_opart.toKernel()),decltype(m_pos.get(i).toKernel()),decltype(m_prp.get(i).toKernel()),
1305 decltype(v_pos.toKernel()),decltype(v_prp.toKernel())>),
1306 ite,
1307 m_opart.toKernel(),m_pos.get(i).toKernel(), m_prp.get(i).toKernel(),
1308 v_pos.toKernel(),v_prp.toKernel(),offset);
1309
1310 }
1311 }
1312
1313 // old local particles with the actual local particles
1314 v_pos_tmp.swap(v_pos);
1315 v_prp_tmp.swap(v_prp);
1316
1317#else
1318
1319 std::cout << __FILE__ << ":" << __LINE__ << " error RUN_ON_DEVICE require that you compile with NVCC, but it seem compiled with a normal compiler" << std::endl;
1320
1321#endif
1322 }
1323 else
1324 {
1325 // end vector point
1326 long int id_end = v_pos.size();
1327
1328 // end opart point
1329 long int end = m_opart.size()-1;
1330
1331 // Run through all the particles and fill the sending buffer
1332 for (size_t i = 0; i < m_opart.size(); i++)
1333 {
1334 process_map_particle<proc_without_prp>(i,end,id_end,m_opart,p_map_req,m_pos,m_prp,v_pos,v_prp,cnt);
1335 }
1336
1337 v_pos.resize(v_pos.size() - m_opart.size());
1338 v_prp.resize(v_prp.size() - m_opart.size());
1339 }
1340 }
1341
1342
1355 template<typename prp_object,int ... prp>
1358 openfpm::vector<size_t> & prc_sz_r,
1361 {
1362 m_prp.resize(prc_sz_r.size());
1363 m_pos.resize(prc_sz_r.size());
1364 openfpm::vector<size_t> cnt(prc_sz_r.size());
1365
1366 for (size_t i = 0; i < prc_sz_r.size(); i++)
1367 {
1368 // set the size and allocate, using mem warant that pos and prp is contiguous
1369 m_pos.get(i).resize(prc_sz_r.get(i));
1370 m_prp.get(i).resize(prc_sz_r.get(i));
1371 cnt.get(i) = 0;
1372 }
1373
1374 // end vector point
1375 long int id_end = v_pos.size();
1376
1377 // end opart point
1378 long int end = m_opart.size()-1;
1379
1380 // Run through all the particles and fill the sending buffer
1381 for (size_t i = 0; i < m_opart.size(); i++)
1382 {
1383 process_map_particle<proc_with_prp<prp_object,prp...>>(i,end,id_end,m_opart,p_map_req,m_pos,m_prp,v_pos,v_prp,cnt);
1384 }
1385
1386 v_pos.resize(v_pos.size() - m_opart.size());
1387 v_prp.resize(v_prp.size() - m_opart.size());
1388 }
1389
1398 template<typename obp> void labelParticleProcessor(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
1400 Memory,
1401 layout_base> & lbl_p,
1403 size_t opt)
1404 {
1405 if (opt == RUN_ON_DEVICE)
1406 {
1407#ifdef __NVCC__
1408
1409 // Map directly on gpu
1410
1411 lbl_p.resize(v_pos.size());
1412
1413 // labelling kernel
1414
1415 prc_sz.template fill<0>(0);
1416
1417 auto ite = v_pos.getGPUIterator();
1418 if (ite.wthr.x == 0)
1419 {
1420 starts.resize(v_cl.size());
1421 starts.template fill<0>(0);
1422 return;
1423 }
1424
1425 // we have one process we can skip ...
1426 if (v_cl.size() == 1)
1427 {
1428 // ... but we have to apply the boundary conditions
1429
1431
1432 for (size_t i = 0 ; i < dim ; i++) {bc.bc[i] = dec.periodicity(i);}
1433
1434 CUDA_LAUNCH((apply_bc_each_part<dim,St,decltype(v_pos.toKernel())>),ite,dec.getDomain(),bc,v_pos.toKernel());
1435
1436 return;
1437 }
1438
1439 // label particle processor
1440 CUDA_LAUNCH((process_id_proc_each_part<dim,St,decltype(dec.toKernel()),decltype(v_pos.toKernel()),decltype(lbl_p.toKernel()),decltype(prc_sz.toKernel())>),
1441 ite,
1442 dec.toKernel(),v_pos.toKernel(),lbl_p.toKernel(),prc_sz.toKernel(),v_cl.rank());
1443
1444 starts.resize(v_cl.size());
1445 openfpm::scan((unsigned int *)prc_sz.template getDeviceBuffer<0>(), prc_sz.size(), (unsigned int *)starts.template getDeviceBuffer<0>() , v_cl.getgpuContext());
1446
1447 // move prc_sz to host
1448 prc_sz.template deviceToHost<0>();
1449
1450 ite = lbl_p.getGPUIterator();
1451
1452 // we order lbl_p
1453 CUDA_LAUNCH((reorder_lbl<decltype(lbl_p.toKernel()),decltype(starts.toKernel())>),ite,lbl_p.toKernel(),starts.toKernel());
1454
1455
1456#else
1457
1458 std::cout << __FILE__ << ":" << __LINE__ << " error, it seems you tried to call map with RUN_ON_DEVICE option, this requires to compile the program with NVCC" << std::endl;
1459
1460#endif
1461 }
1462 else
1463 {
1464 // reset lbl_p
1465 lbl_p.clear();
1466 prc_sz_gg.clear();
1467 o_part_loc.clear();
1468 g_opart.clear();
1469 prc_g_opart.clear();
1470
1471 // resize the label buffer
1472 prc_sz.template fill<0>(0);
1473
1474 auto it = v_pos.getIterator();
1475
1476 // Label all the particles with the processor id where they should go
1477 while (it.isNext())
1478 {
1479 auto key = it.get();
1480
1481 // Apply the boundary conditions
1482 dec.applyPointBC(v_pos.get(key));
1483
1484 size_t p_id = 0;
1485
1486 // Check if the particle is inside the domain
1487 if (dec.getDomain().isInside(v_pos.get(key)) == true)
1488 {p_id = dec.processorID(v_pos.get(key));}
1489 else
1490 {p_id = obp::out(key, v_cl.getProcessUnitID());}
1491
1492 // Particle to move
1493 if (p_id != v_cl.getProcessUnitID())
1494 {
1495 if ((long int) p_id != -1)
1496 {
1497 prc_sz.template get<0>(p_id)++;
1498 lbl_p.add();
1499 lbl_p.last().template get<0>() = key;
1500 lbl_p.last().template get<2>() = p_id;
1501 }
1502 else
1503 {
1504 lbl_p.add();
1505 lbl_p.last().template get<0>() = key;
1506 lbl_p.last().template get<2>() = p_id;
1507 }
1508 }
1509
1510 // Add processors and add size
1511
1512 ++it;
1513 }
1514 }
1515 }
1516
1530 void labelParticlesGhost(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
1535 size_t & g_m,
1536 size_t opt)
1537 {
1538 // Buffer that contain for each processor the id of the particle to send
1539 prc_sz.clear();
1540 g_opart.clear();
1541 g_opart.resize(dec.getNNProcessors());
1542 prc_g_opart.clear();
1543
1544 if (opt & RUN_ON_DEVICE)
1545 {
1546 labelParticlesGhost_impl<dim,St,prop,Memory,layout_base,
1547 Decomposition,std::is_same<Memory,CudaMemory>::value>
1548 ::run(mem,dec,g_opart_device,proc_id_out,starts,v_cl,v_pos,v_prp,prc,prc_sz,prc_offset,g_m,opt);
1549 }
1550 else
1551 {
1552 // Iterate over all particles
1553 auto it = v_pos.getIteratorTo(g_m);
1554 while (it.isNext())
1555 {
1556 auto key = it.get();
1557
1558 // Given a particle, it return which processor require it (first id) and shift id, second id
1559 // For an explanation about shifts vectors please consult getShiftVector in ie_ghost
1560 const openfpm::vector<std::pair<size_t, size_t>> & vp_id = dec.template ghost_processorID_pair<typename Decomposition::lc_processor_id, typename Decomposition::shift_id>(v_pos.get(key), UNIQUE);
1561
1562 for (size_t i = 0; i < vp_id.size(); i++)
1563 {
1564 // processor id
1565 size_t p_id = vp_id.get(i).first;
1566
1567 // add particle to communicate
1568 g_opart.get(p_id).add();
1569 g_opart.get(p_id).last().template get<0>() = key;
1570 g_opart.get(p_id).last().template get<1>() = vp_id.get(i).second;
1571 }
1572
1573 ++it;
1574 }
1575
1576 // remove all zero entry and construct prc (the list of the sending processors)
1578
1579 // count the non zero element
1580 for (size_t i = 0 ; i < g_opart.size() ; i++)
1581 {
1582 if (g_opart.get(i).size() != 0)
1583 {
1584 prc_sz.add(g_opart.get(i).size());
1585 g_opart_f.add();
1586 g_opart.get(i).swap(g_opart_f.last());
1587 prc.add(dec.IDtoProc(i));
1588 }
1589 }
1590
1591 g_opart.swap(g_opart_f);
1592 }
1593#ifdef EXTREA_TRACE_PRE_COMM
1594 Extrae_user_function (0);
1595#endif
1596 }
1597
1611 static void * message_alloc_map(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
1612 {
1613 // cast the pointer
1615
1616 vd->recv_mem_gm.resize(vd->v_cl.getProcessingUnits());
1617 vd->recv_mem_gm.get(i).resize(msg_i);
1618
1619 return vd->recv_mem_gm.get(i).getPointer();
1620 }
1621
1622public:
1623
1630 :v_cl(create_vcluster<Memory>()),dec(create_vcluster()),lg_m(0)
1631 {
1632 this->operator=(v);
1633 }
1634
1635
1642 :v_cl(create_vcluster<Memory>()),dec(dec),lg_m(0)
1643 {
1644
1645 }
1646
1653 :v_cl(create_vcluster<Memory>()),dec(dec),lg_m(0)
1654 {
1655
1656 }
1657
1662 :v_cl(create_vcluster<Memory>()),dec(create_vcluster()),lg_m(0)
1663 {
1664 }
1665
1672 {
1673 for (size_t i = 0 ; i < hsmem.size() ; i++)
1674 {
1675 if (hsmem.get(i).ref() == 1)
1676 hsmem.get(i).decRef();
1677 else
1678 std::cout << __FILE__ << ":" << __LINE__ << " internal error memory is in an invalid state " << std::endl;
1679 }
1680
1681 }
1682
1689 {
1690 return v_sub_unit_factor;
1691 }
1692
1699 {
1700 this->v_sub_unit_factor = n_sub;
1701 }
1702
1712 const size_t (& bc)[dim],
1713 const Ghost<dim,St> & g,
1714 size_t opt,
1715 const grid_sm<dim,void> & gdist)
1716 {
1717 size_t div[dim];
1718
1719 if (opt & BIND_DEC_TO_GHOST)
1720 {
1721 // padding
1722 size_t pad = 0;
1723
1724 // CellDecomposer
1725 CellDecomposer_sm<dim,St,shift<dim,St>> cd_sm;
1726
1727 // Calculate the divisions for the symmetric Cell-lists
1728 cl_param_calculateSym<dim,St>(box,cd_sm,g,pad);
1729
1730 for (size_t i = 0 ; i < dim ; i++)
1731 {div[i] = cd_sm.getDiv()[i] - 2*pad;}
1732
1733 // Create the sub-domains
1734 dec.setParameters(div, box, bc, g, gdist);
1735 }
1736 else
1737 {
1738 dec.setGoodParameters(box, bc, g, getDecompositionGranularity(), gdist);
1739 }
1740 dec.decompose();
1741 }
1742
1752 const size_t (& bc)[dim],
1753 const Ghost<dim,St> & g,
1754 size_t opt,
1755 const grid_sm<dim,void> & gdist)
1756 {
1757 size_t div[dim];
1758
1759 for (size_t i = 0 ; i < dim ; i++)
1760 {div[i] = gdist.size(i);}
1761
1762 // Create the sub-domains
1763 dec.setParameters(div, box, bc, g);
1764
1765 dec.decompose();
1766 }
1767
1778 template<unsigned int impl, int ... prp> inline void ghost_get_(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
1780 size_t & g_m,
1781 size_t opt = WITH_POSITION)
1782 {
1783#ifdef PROFILE_SCOREP
1784 SCOREP_USER_REGION("ghost_get",SCOREP_USER_REGION_TYPE_FUNCTION)
1785#endif
1786
1787 // Sending property object
1788 typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object;
1789
1790 // send vector for each processor
1792
1793 if (!(opt & NO_POSITION))
1794 {v_pos.resize(g_m);}
1795
1796 // reset the ghost part
1797
1798 if (!(opt & SKIP_LABELLING))
1799 {v_prp.resize(g_m);}
1800
1801 // Label all the particles
1802 if ((opt & SKIP_LABELLING) == false)
1803 {labelParticlesGhost(v_pos,v_prp,prc_g_opart,prc_sz_gg,prc_offset,g_m,opt);}
1804
1805 {
1806 // Send and receive ghost particle information
1808
1809 fill_send_ghost_prp_buf<send_vector, prp_object, prp...>(v_prp,prc_sz_gg,g_send_prp,opt);
1810
1811 #if defined(CUDA_GPU) && defined(__NVCC__)
1812 cudaDeviceSynchronize();
1813 #endif
1814
1815 // if there are no properties skip
1816 // SSendRecvP send everything when we do not give properties
1817
1818 ghost_exchange_comm_impl<impl,layout_base,prp ...>::template
1819 sendrecv_prp(v_cl,g_send_prp,v_prp,v_pos,prc_g_opart,
1820 prc_recv_get_prp,recv_sz_get_prp,recv_sz_get_byte,g_opart_sz,g_m,opt);
1821 }
1822
1823 if (!(opt & NO_POSITION))
1824 {
1825 // Sending buffer for the ghost particles position
1827
1828 fill_send_ghost_pos_buf(v_pos,prc_sz_gg,g_pos_send,opt,impl == GHOST_ASYNC);
1829
1830#if defined(CUDA_GPU) && defined(__NVCC__)
1831 cudaDeviceSynchronize();
1832#endif
1833
1834 ghost_exchange_comm_impl<impl,layout_base,prp ...>::template
1835 sendrecv_pos(v_cl,g_pos_send,v_prp,v_pos,prc_recv_get_pos,recv_sz_get_pos,prc_g_opart,opt);
1836
1837 // fill g_opart_sz
1838 g_opart_sz.resize(prc_g_opart.size());
1839
1840 for (size_t i = 0 ; i < prc_g_opart.size() ; i++)
1841 g_opart_sz.get(i) = g_pos_send.get(i).size();
1842 }
1843
1844 // Important to ensure that the number of particles in v_prp must be equal to v_pos
1845 // Note that if we do not give properties sizeof...(prp) == 0 in general at this point
1846 // v_prp.size() != v_pos.size()
1847 if (!(opt & SKIP_LABELLING))
1848 {
1849 v_prp.resize(v_pos.size());
1850 }
1851
1852 add_loc_particles_bc(v_pos,v_prp,g_m,opt);
1853 }
1854
1865 template<int ... prp> inline void ghost_wait_(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
1867 size_t & g_m,
1868 size_t opt = WITH_POSITION)
1869 {
1870 // Sending property object
1871 typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object;
1872
1873 // send vector for each processor
1875
1876 // Send and receive ghost particle information
1879
1880 ghost_exchange_comm_impl<GHOST_ASYNC,layout_base,prp ...>::template
1881 sendrecv_prp_wait(v_cl,g_send_prp,v_prp,v_pos,prc_g_opart,
1882 prc_recv_get_prp,recv_sz_get_prp,recv_sz_get_byte,g_opart_sz,g_m,opt);
1883
1884
1885 ghost_exchange_comm_impl<GHOST_ASYNC,layout_base,prp ...>::template
1886 sendrecv_pos_wait(v_cl,g_pos_send,v_prp,v_pos,prc_recv_get_pos,recv_sz_get_pos,prc_g_opart,opt);
1887 }
1888
1905 template<unsigned int ... prp> void map_list_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m, size_t opt)
1906 {
1907 if (opt & RUN_ON_DEVICE)
1908 {
1909 std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " map_list is unsupported on device (coming soon)" << std::endl;
1910 return;
1911 }
1912
1913 typedef KillParticle obp;
1914
1915 // Processor communication size
1916 openfpm::vector<aggregate<unsigned int,unsigned int>,Memory,layout_base> prc_sz(v_cl.getProcessingUnits());
1917
1918 // map completely reset the ghost part
1919 v_pos.resize(g_m);
1920 v_prp.resize(g_m);
1921
1922 // m_opart, Contain the processor id of each particle (basically where they have to go)
1923 labelParticleProcessor<obp>(v_pos,m_opart, prc_sz,opt);
1924
1925 // Calculate the sending buffer size for each processor, put this information in
1926 // a contiguous buffer
1927 p_map_req.resize(v_cl.getProcessingUnits());
1928 openfpm::vector<size_t> prc_sz_r;
1930
1931 for (size_t i = 0; i < v_cl.getProcessingUnits(); i++)
1932 {
1933 if (prc_sz.template get<0>(i) != 0)
1934 {
1935 p_map_req.get(i) = prc_r.size();
1936 prc_r.add(i);
1937 prc_sz_r.add(prc_sz.template get<0>(i));
1938 }
1939 }
1940
1941 if (opt & MAP_LOCAL)
1942 {
1943 // if the map is local we indicate that we receive only from the neighborhood processors
1944
1945 prc_recv_map.clear();
1946 for (size_t i = 0 ; i < dec.getNNProcessors() ; i++)
1947 {prc_recv_map.add(dec.IDtoProc(i));}
1948 }
1949
1950 // Sending property object
1951 typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object;
1952
1957
1958 fill_send_map_buf_list<prp_object,prp...>(v_pos,v_prp,prc_sz_r, m_pos, m_prp);
1959
1960 v_cl.SSendRecv(m_pos,v_pos,prc_r,prc_recv_map,recv_sz_map,opt);
1961 v_cl.template SSendRecvP<openfpm::vector<prp_object>,decltype(v_prp),layout_base,prp...>(m_prp,v_prp,prc_r,prc_recv_map,recv_sz_map,opt);
1962
1963 // mark the ghost part
1964
1965 g_m = v_pos.size();
1966 }
1967
1981 template<typename obp = KillParticle>
1982 void map_(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
1983 openfpm::vector<prop,Memory,layout_base> & v_prp, size_t & g_m,
1984 size_t opt)
1985 {
1986#ifdef PROFILE_SCOREP
1987 SCOREP_USER_REGION("map",SCOREP_USER_REGION_TYPE_FUNCTION)
1988#endif
1989
1990 prc_sz.resize(v_cl.getProcessingUnits());
1991
1992 // map completely reset the ghost part
1993 v_pos.resize(g_m);
1994 v_prp.resize(g_m);
1995
1996 // Contain the processor id of each particle (basically where they have to go)
1997 labelParticleProcessor<obp>(v_pos,m_opart, prc_sz,opt);
1998
1999 openfpm::vector<size_t> prc_sz_r;
2001
2002 // Calculate the sending buffer size for each processor, put this information in
2003 // a contiguous buffer
2004 calc_send_buffers(prc_sz,prc_sz_r,prc_r,opt);
2005
2010
2011 fill_send_map_buf(v_pos,v_prp, prc_sz_r,prc_r, m_pos, m_prp,prc_sz,opt);
2012
2013 size_t opt_ = 0;
2014 if (opt & RUN_ON_DEVICE)
2015 {
2016#if defined(CUDA_GPU) && defined(__NVCC__)
2017 // Before doing the communication on RUN_ON_DEVICE we have to be sure that the previous kernels complete
2018 cudaDeviceSynchronize();
2019 opt_ |= MPI_GPU_DIRECT;
2020#else
2021 std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
2022#endif
2023 }
2024
2025 v_cl.template SSendRecv<openfpm::vector<Point<dim, St>,Memory,layout_base,openfpm::grow_policy_identity>,
2026 openfpm::vector<Point<dim, St>,Memory,layout_base>,
2027 layout_base>
2028 (m_pos,v_pos,prc_r,prc_recv_map,recv_sz_map,opt_);
2029
2030 v_cl.template SSendRecv<openfpm::vector<prop,Memory,layout_base,openfpm::grow_policy_identity>,
2032 layout_base>
2033 (m_prp,v_prp,prc_r,prc_recv_map,recv_sz_map,opt_);
2034
2035 // mark the ghost part
2036
2037 g_m = v_pos.size();
2038 }
2039
2046 {
2047 return dec;
2048 }
2049
2055 inline const Decomposition & getDecomposition() const
2056 {
2057 return dec;
2058 }
2059
2068 {
2069 dec = vc.dec;
2070
2071 return *this;
2072 }
2073
2082 {
2083 dec = vc.dec;
2084
2085 return *this;
2086 }
2087
2099 template<template<typename,typename> class op, int ... prp>
2100 void ghost_put_(openfpm::vector<Point<dim, St>,Memory,layout_base> & v_pos,
2102 size_t & g_m,
2103 size_t opt)
2104 {
2105 // Sending property object
2106 typedef object<typename object_creator<typename prop::type, prp...>::type> prp_object;
2107
2108 // send vector for each processor
2110
2112 fill_send_ghost_put_prp_buf<send_vector, prp_object, prp...>(v_prp,g_send_prp,g_m,opt);
2113
2114 if (opt & RUN_ON_DEVICE)
2115 {
2116#if defined(CUDA_GPU) && defined(__NVCC__)
2117 // Before doing the communication on RUN_ON_DEVICE we have to be sure that the previous kernels complete
2118 cudaDeviceSynchronize();
2119#else
2120 std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
2121#endif
2122 }
2123
2124 // Send and receive ghost particle information
2125 if (opt & NO_CHANGE_ELEMENTS)
2126 {
2127 size_t opt_ = compute_options(opt);
2128
2129 if (opt & RUN_ON_DEVICE)
2130 {
2132 v_cl.template SSendRecvP_op<op_ssend_recv_merge_gpu<op,decltype(g_opart_device),decltype(prc_offset)>,
2133 send_vector,
2134 decltype(v_prp),
2135 layout_base,
2136 prp...>(g_send_prp,v_prp,prc_recv_get_prp,opm,prc_g_opart,g_opart_sz,opt_);
2137 }
2138 else
2139 {
2140 op_ssend_recv_merge<op,decltype(g_opart)> opm(g_opart);
2141 v_cl.template SSendRecvP_op<op_ssend_recv_merge<op,decltype(g_opart)>,
2142 send_vector,
2143 decltype(v_prp),
2144 layout_base,
2145 prp...>(g_send_prp,v_prp,prc_recv_get_prp,opm,prc_g_opart,g_opart_sz,opt_);
2146 }
2147 }
2148 else
2149 {
2150 size_t opt_ = compute_options(opt);
2151
2152 if (opt & RUN_ON_DEVICE)
2153 {
2155 v_cl.template SSendRecvP_op<op_ssend_recv_merge_gpu<op,decltype(g_opart_device),decltype(prc_offset)>,
2156 send_vector,
2157 decltype(v_prp),
2158 layout_base,
2159 prp...>(g_send_prp,v_prp,get_last_ghost_get_num_proc_vector(),opm,prc_recv_put,recv_sz_put,opt_);
2160 }
2161 else
2162 {
2163 op_ssend_recv_merge<op,decltype(g_opart)> opm(g_opart);
2164 v_cl.template SSendRecvP_op<op_ssend_recv_merge<op,decltype(g_opart)>,
2165 send_vector,
2166 decltype(v_prp),
2167 layout_base,
2168 prp...>(g_send_prp,v_prp,get_last_ghost_get_num_proc_vector(),opm,prc_recv_put,recv_sz_put,opt_);
2169 }
2170 }
2171
2172 // process also the local replicated particles
2173
2174 if (lg_m < v_prp.size() && v_prp.size() - lg_m != o_part_loc.size())
2175 {
2176 std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Local ghost particles = " << v_prp.size() - lg_m << " != " << o_part_loc.size() << std::endl;
2177 std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Check that you did a ghost_get before a ghost_put" << std::endl;
2178 }
2179
2180
2181 if (opt & RUN_ON_DEVICE)
2182 {
2183 v_prp.template merge_prp_v_device<op,prop,Memory,
2185 layout_base,
2186 decltype(o_part_loc),prp ...>(v_prp,lg_m,o_part_loc);
2187 }
2188 else
2189 {
2190 v_prp.template merge_prp_v<op,prop,Memory,
2192 layout_base,
2193 decltype(o_part_loc),prp ...>(v_prp,lg_m,o_part_loc);
2194 }
2195 }
2196};
2197
2198
2199#endif /* SRC_VECTOR_VECTOR_DIST_COMM_HPP_ */
This class represent an N-dimensional box.
Definition Box.hpp:61
This class decompose a space into sub-sub-domains and distribute them across processors.
This class define the domain decomposition interface.
This class allocate, and destroy CPU memory.
This class implement the point shape in an N-dimensional space.
Definition Point.hpp:28
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition Point.hpp:172
Implementation of VCluster class.
Definition VCluster.hpp:59
Declaration grid_sm.
Definition grid_sm.hpp:167
__device__ __host__ size_t size() const
Return the size of the grid.
Definition grid_sm.hpp:657
Grow policy define how the vector should grow every time we exceed the size.
Grow policy define how the vector should grow every time we exceed the size.
Implementation of 1-D std::vector like structure.
size_t size()
Stub size.
This class is an helper for the communication of vector_dist.
openfpm::vector< size_t > prc_recv_get_pos
void init_decomposition(Box< dim, St > &box, const size_t(&bc)[dim], const Ghost< dim, St > &g, size_t opt, const grid_sm< dim, void > &gdist)
Initialize the decomposition.
void map_(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, size_t &g_m, size_t opt)
It move all the particles that does not belong to the local processor to the respective processor.
void init_decomposition_gr_cell(Box< dim, St > &box, const size_t(&bc)[dim], const Ghost< dim, St > &g, size_t opt, const grid_sm< dim, void > &gdist)
Initialize the decomposition.
openfpm::vector< size_t > recv_sz_get_byte
Conversion to byte of recv_sz_get.
vector_dist_comm(const vector_dist_comm< dim, St, prop, Decomposition, Memory, layout_base > &v)
Copy Constructor.
openfpm::vector< size_t > recv_sz_put
The same as recv_sz_get but for put.
Decomposition dec
Domain decomposition.
void map_list_(openfpm::vector< Point< dim, St > > &v_pos, openfpm::vector< prop > &v_prp, size_t &g_m, size_t opt)
It move all the particles that does not belong to the local processor to the respective processor.
vector_dist_comm< dim, St, prop, Decomposition, Memory, layout_base > & operator=(vector_dist_comm< dim, St, prop, Decomposition, Memory, layout_base > &&vc)
Copy a vector.
~vector_dist_comm()
Destructor.
openfpm::vector< aggregate< unsigned int, unsigned int >, Memory, layout_base > prc_offset
Processor communication size.
size_t get_last_ghost_get_received_parts(size_t i)
Get the number of particles received from each processor during the last ghost_get.
openfpm::vector< aggregate< unsigned int, unsigned int >, Memory, layout_base > prc_sz
Processor communication size.
openfpm::vector< Point< dim, St >, Memory, layout_base, openfpm::grow_policy_identity > send_pos_vector
definition of the send vector for position
openfpm::vector< Box< dim, St >, Memory, layout_base > box_f_dev
The boxes touching the border of the domain + shift vector linearized from where they come from.
std::unordered_map< size_t, size_t > map_cmb
this map is used to check if a combination is already present
vector_dist_comm()
Constructor.
openfpm::vector_fr< Memory > hsmem
Sending buffer.
void ghost_put_(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, size_t &g_m, size_t opt)
Ghost put.
openfpm::vector< size_t > prc_sz_gg
elements sent for each processors (ghost_get)
openfpm::vector< aggregate< int, int, int >, Memory, layout_base > m_opart
openfpm::vector< Point< dim, St >, Memory, layout_base > v_pos_tmp
Helper buffer for computation (on GPU) of local particles (position)
openfpm::vector< size_t > prc_recv_put
the same as prc_recv_get but for put
openfpm::vector< aggregate< unsigned int, unsigned long int >, CudaMemory, memory_traits_inte > g_opart_device
Same as g_opart but on device, the vector of vector is flatten into a single vector.
openfpm::vector< openfpm::vector< aggregate< size_t, size_t > > > g_opart
void resize_retained_buffer(openfpm::vector_fr< Memory > &rt_buf, size_t nbf)
resize the retained buffer by nbf
openfpm::vector< size_t > prc_g_opart
processor rank list of g_opart
openfpm::vector< size_t > & get_last_ghost_get_num_proc_vector()
Get the number of processor involved during the last ghost_get.
CudaMemory mem
Temporary CudaMemory to do stuff.
void ghost_wait_(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, size_t &g_m, size_t opt=WITH_POSITION)
It synchronize the properties and position of the ghost particles.
openfpm::vector_std< openfpm::vector_std< Box< dim, St > > > box_f
openfpm::vector< aggregate< unsigned int, unsigned int >, Memory, layout_base > o_part_loc
Id of the local particle to replicate for ghost_get.
void local_ghost_from_dec(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, size_t g_m, size_t opt)
Local ghost from decomposition.
size_t get_last_ghost_get_num_proc()
Get the number of processor involved during the last ghost_get.
openfpm::vector< prop, Memory, layout_base > v_prp_tmp
Helper buffer for computation (on GPU) of local particles (properties)
void createShiftBox()
For every internal ghost box we create a structure that order such internal local ghost box in shift ...
Vcluster< Memory > & v_cl
VCluster.
vector_dist_comm(Decomposition &&dec)
Constructor.
Decomposition & getDecomposition()
Get the decomposition.
long int shift_box_ndec
From which decomposition the shift boxes are calculated.
void local_ghost_from_opart(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, size_t opt)
Local ghost from labeled particles.
static void * message_alloc_map(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, void *ptr)
Call-back to allocate buffer to receive incoming elements (particles)
void fill_send_map_buf(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, openfpm::vector< size_t > &prc_sz_r, openfpm::vector< size_t > &prc_r, openfpm::vector< openfpm::vector< Point< dim, St >, Memory, layout_base, openfpm::grow_policy_identity > > &m_pos, openfpm::vector< openfpm::vector< prop, Memory, layout_base, openfpm::grow_policy_identity > > &m_prp, openfpm::vector< aggregate< unsigned int, unsigned int >, Memory, layout_base > &prc_sz, size_t opt)
allocate and fill the send buffer for the map function
size_t getDecompositionGranularity()
Get the number of minimum sub-domain per processor.
openfpm::vector< size_t > prc_recv_map
the same as prc_recv_get but for map
void add_loc_particles_bc(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, size_t &g_m, size_t opt)
Add local particles based on the boundary conditions.
openfpm::vector< size_t > recv_sz_map
The same as recv_sz_get but for map.
void fill_send_ghost_pos_buf(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< size_t > &prc_sz, openfpm::vector< send_pos_vector > &g_pos_send, size_t opt, bool async)
This function fill the send buffer for the particle position after the particles has been label with ...
openfpm::vector< size_t > p_map_req
It map the processor id with the communication request into map procedure.
openfpm::vector< aggregate< unsigned int >, Memory, layout_base > starts
temporary buffer for the scan result
void labelParticlesGhost(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, openfpm::vector< size_t > &prc, openfpm::vector< size_t > &prc_sz, openfpm::vector< aggregate< unsigned int, unsigned int >, Memory, layout_base > &prc_offset, size_t &g_m, size_t opt)
Label the particles.
void fill_send_ghost_prp_buf(openfpm::vector< prop, Memory, layout_base > &v_prp, openfpm::vector< size_t > &prc_sz, openfpm::vector< send_vector > &g_send_prp, size_t opt)
This function fill the send buffer for properties after the particles has been label with labelPartic...
const Decomposition & getDecomposition() const
Get the decomposition.
openfpm::vector_std< comb< dim > > box_cmb
Store the sector for each group (previous vector)
vector_dist_comm(const Decomposition &dec)
Constructor.
openfpm::vector< size_t > recv_sz_get_pos
void calc_send_buffers(openfpm::vector< aggregate< unsigned int, unsigned int >, Memory, layout_base > &prc_sz, openfpm::vector< size_t > &prc_sz_r, openfpm::vector< size_t > &prc_r, size_t opt)
Calculate sending buffer size for each processor.
openfpm::vector< size_t > g_opart_sz
Per processor number of particle g_opart_sz.get(i) = g_opart.get(i).size()
size_t v_sub_unit_factor
Number of units for each sub-domain.
vector_dist_comm< dim, St, prop, Decomposition, Memory, layout_base > & operator=(const vector_dist_comm< dim, St, prop, Decomposition, Memory, layout_base > &vc)
Copy a vector.
void fill_send_ghost_put_prp_buf(openfpm::vector< prop, Memory, layout_base > &v_prp, openfpm::vector< send_vector > &g_send_prp, size_t &g_m, size_t opt)
This function fill the send buffer for ghost_put.
openfpm::vector< aggregate< unsigned int >, Memory, layout_base > proc_id_out
temporary buffer to processors ids
void fill_send_map_buf_list(openfpm::vector< Point< dim, St > > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, openfpm::vector< size_t > &prc_sz_r, openfpm::vector< openfpm::vector< Point< dim, St > > > &m_pos, openfpm::vector< openfpm::vector< prp_object > > &m_prp)
allocate and fill the send buffer for the map function
void labelParticleProcessor(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< aggregate< int, int, int >, Memory, layout_base > &lbl_p, openfpm::vector< aggregate< unsigned int, unsigned int >, Memory, layout_base > &prc_sz, size_t opt)
Label particles for mappings.
void setDecompositionGranularity(size_t n_sub)
Set the minimum number of sub-domain per processor.
void ghost_get_(openfpm::vector< Point< dim, St >, Memory, layout_base > &v_pos, openfpm::vector< prop, Memory, layout_base > &v_prp, size_t &g_m, size_t opt=WITH_POSITION)
It synchronize the properties and position of the ghost particles.
Out of bound policy it detect out of bound particles and decide what to do.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
boost::fusion::vector< list... > type
internal type containing the data
template selector for asynchronous or not asynchronous
Transform the boost::fusion::vector into memory specification (memory_traits)
Transform the boost::fusion::vector into memory specification (memory_traits)
It create a boost::fusion vector with the selected properties.
It copy the properties from one object to another.
Helper class to merge data.
Helper class to merge data.
Helper class to merge data.
Boundary conditions.
Definition common.hpp:31
process the particle with properties
static void proc(size_t lbl, size_t cnt, size_t id, T1 &v_prp, T2 &m_prp)
process the particle
void operator()(T &t)
It call the setMemory function for each property.