OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
grid_dist_id_comm.hpp
1/*
2 * grid_dist_id_comm.hpp
3 *
4 * Created on: Nov 13, 2016
5 * Author: yaroslav
6 */
7
8#ifndef SRC_GRID_GRID_DIST_ID_COMM_HPP_
9#define SRC_GRID_GRID_DIST_ID_COMM_HPP_
10
11#include "Vector/vector_dist_ofb.hpp"
12#include "Grid/copy_grid_fast.hpp"
13#include "grid_dist_util.hpp"
14#include "util/common_pdata.hpp"
15#include "lib/pdata.hpp"
16#include "Grid/grid_common.hpp"
17
18
23template<bool result,typename T, typename device_grid, typename Memory>
25{
34 template<template<typename,typename> class op, typename sub_it_type, int ... prp> static void call_unpack(ExtPreAlloc<Memory> & recv_buf, sub_it_type & sub2, device_grid & gd, Unpack_stat & ps)
35 {
36 std::cerr << __FILE__ << ":" << __LINE__ << " Error: complex properties on grids are not supported yet" << std::endl;
37 }
38};
39
44template<typename T, typename device_grid, typename Memory>
46{
47
56 template<template<typename,typename> class op, typename sub_it_type, unsigned int ... prp>
57 static void call_unpack(ExtPreAlloc<Memory> & recv_buf,
58 sub_it_type & sub2,
59 device_grid & gd,
60 Unpack_stat & ps)
61 {
62 gd.template unpack_with_op<op,Memory,prp ...>(recv_buf,sub2,ps);
63 }
64};
65
71template<typename device_grid, typename Memory, typename T>
73
79template<typename device_grid, typename Memory , int ... prp>
81{
82
91 template<template<typename,typename> class op, typename sub_it_type, typename T>
92 inline static void call_unpack(ExtPreAlloc<Memory> & recv_buf,
93 sub_it_type & sub2,
94 device_grid & dg,
95 Unpack_stat & ps)
96 {
97 const bool result = has_pack_gen<typename T::type>::value == false;
98
99 grid_unpack_selector_with_prp<result,T,device_grid,Memory>::template call_unpack<op,sub_it_type,prp...>(recv_buf,sub2,dg,ps);
100 }
101};
102
108template<template<typename,typename> class op, typename T, typename device_grid, typename Memory>
110{
111
120 template<typename sub_it_type, unsigned int ... prp> static void unpacking(ExtPreAlloc<Memory> & recv_buf, sub_it_type & sub2, device_grid & dg, Unpack_stat & ps)
121 {
122 typedef index_tuple<prp...> ind_prop_to_pack;
123 grid_call_serialize_variadic<device_grid,Memory,ind_prop_to_pack>::template call_unpack<op,sub_it_type,T>(recv_buf, sub2, dg, ps);
124 }
125};
126
140template<unsigned int dim, typename St, typename T, typename Decomposition = CartDecomposition<dim,St>,typename Memory=HeapMemory , typename device_grid=grid_cpu<dim,T> >
142{
145
148
151
154
157
160
163
165 openfpm::vector_fr<BMemory<Memory>> recv_buffers;
166
167 struct rp_id
168 {
169 int p_id;
170 int size;
171 int i;
172
173 bool operator<(const rp_id & tmp) const
174 {
175 return p_id < tmp.p_id;
176 }
177 };
178
181
187 openfpm::vector<int> m_oGrid_c;
188
191
194
197 openfpm::vector<void *> pointers2;
198
201 int n_headers_slot = 1;
203
205 size_t opt;
206
218 template<int... prp> void ghost_get_local(const openfpm::vector<i_lbox_grid<dim>> & loc_ig_box,
219 const openfpm::vector<e_lbox_grid<dim>> & loc_eg_box,
222 std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
223 const grid_sm<dim,void> & ginfo,
224 bool use_bx_def,
225 size_t opt)
226 {
227 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
228 if (opt & SKIP_LABELLING)
229 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
230
231 if (opt_ != rem_copy_opt::KEEP_GEOMETRY)
232 {
233 for (size_t i = 0 ; i < loc_grid.size() ; i++)
234 {loc_grid.get(i).copyRemoveReset();}
235 }
236
237 grid_key_dx<dim> cnt[1];
238 cnt[0].zero();
239
241 for (size_t i = 0 ; i < loc_ig_box.size() ; i++)
242 {
244 for (size_t j = 0 ; j < loc_ig_box.get(i).bid.size() ; j++)
245 {
246 size_t sub_id_src_gdb_ext = loc_ig_box.get(i).bid.get(j).sub_gdb_ext;
247
248 // sub domain connected with external box
249 size_t sub_id_dst = loc_ig_box.get(i).bid.get(j).sub;
250
251 // local internal ghost box connected
252 for (size_t v = 0 ; v < loc_ig_box.get(i).bid.get(j).k.size() ; v++)
253 {
254 size_t k = loc_ig_box.get(i).bid.get(j).k.get(v);
255
256 Box<dim,long int> bx_dst = loc_eg_box.get(sub_id_dst).bid.get(k).ebox;
257
258 // convert into local
259 size_t sub_id_dst_gdb_ext = loc_eg_box.get(sub_id_dst).bid.get(k).sub_gdb_ext;
260 bx_dst -= gdb_ext.get(sub_id_dst_gdb_ext).origin;
261
262 // create 2 sub grid iterator
263
264 if (bx_dst.isValid() == false)
265 {continue;}
266
267 Box<dim,long int> bx_src = flip_box(loc_eg_box.get(sub_id_dst).bid.get(k).ebox,loc_eg_box.get(sub_id_dst).bid.get(k).cmb,ginfo);
268 bx_src -= gdb_ext.get(sub_id_src_gdb_ext).origin;
269
270 #ifdef SE_CLASS1
271
272 if (use_bx_def == false)
273 {
274 if (loc_eg_box.get(sub_id_dst).bid.get(k).sub != i)
275 {std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination are not correctly linked" << "\n";}
276 }
277
278 if (bx_src.getVolumeKey() != bx_dst.getVolumeKey())
279 {std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination does not match in size" << "\n";}
280
281 #endif
282
283 auto & gd = loc_grid.get(sub_id_dst_gdb_ext);
284
285 gd.remove(bx_dst);
286 gd.copy_to(loc_grid.get(sub_id_src_gdb_ext),bx_src,bx_dst);
287 }
288 }
289 }
290
291 for (size_t i = 0 ; i < loc_grid.size() ; i++)
292 {
293 loc_grid.get(i).template removeCopyToFinalize<prp ...>(v_cl.getgpuContext(), rem_copy_opt::PHASE1 | opt_);
294 }
295
296 for (size_t i = 0 ; i < loc_grid.size() ; i++)
297 {
298 loc_grid.get(i).template removeCopyToFinalize<prp ...>(v_cl.getgpuContext(), rem_copy_opt::PHASE2 | opt_);
299 }
300
301 for (size_t i = 0 ; i < loc_grid.size() ; i++)
302 {
303 loc_grid.get(i).template removeCopyToFinalize<prp ...>(v_cl.getgpuContext(), rem_copy_opt::PHASE3 | opt_);
304 }
305 }
306
318 template<template<typename,typename> class op, int... prp> void ghost_put_local(const openfpm::vector<i_lbox_grid<dim>> & loc_ig_box,
319 const openfpm::vector<e_lbox_grid<dim>> & loc_eg_box,
322 openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_external_ghost_box)
323 {
325 for (size_t i = 0 ; i < loc_eg_box.size() ; i++)
326 {
328 for (size_t j = 0 ; j < loc_eg_box.get(i).bid.size() ; j++)
329 {
330 if (loc_eg_box.get(i).bid.get(j).initialized == false)
331 continue;
332
333 Box<dim,long int> bx_src = loc_eg_box.get(i).bid.get(j).ebox;
334 // convert into local
335 bx_src -= gdb_ext.get(i).origin;
336
337 // sub domain connected with external box
338 size_t sub_id_dst = loc_eg_box.get(i).bid.get(j).sub;
339
340 // local external ghost box connected
341 size_t k = loc_eg_box.get(i).bid.get(j).k;
342
343 Box<dim,long int> bx_dst = loc_ig_box.get(sub_id_dst).bid.get(k).box;
344
345 // convert into local
346 bx_dst -= gdb_ext.get(sub_id_dst).origin;
347
348 // create 2 sub grid iterator
349
350 if (bx_dst.isValid() == false)
351 {continue;}
352
353#ifdef SE_CLASS1
354
355 if (loc_ig_box.get(sub_id_dst).bid.get(k).sub != i)
356 std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination are not correctly linked" << "\n";
357
358 if (bx_src.getVolume() != bx_dst.getVolume())
359 {std::cerr << "Error " << __FILE__ << ":" << __LINE__ << " source and destination does not match in size" << "\n";}
360
361#endif
362
363 auto & gd2 = loc_grid.get(sub_id_dst);
364 gd2.template copy_to_op<op,prp...>(loc_grid.get(i),bx_src,bx_dst);
365
366 }
367 }
368 }
369
370 /* Send or queue the the information
371 *
372 * This function send or queue the information to the other processor. In case the
373 * device grid is a compressed format like in multi-resolution the communication is
374 * queued because the other side does not know the size of the communication. If is
375 * not compressed the other side know the size so a direct send is done
376 *
377 */
378 void send_or_queue(size_t prc, char * pointer, char * pointer2)
379 {
380 if (device_grid::isCompressed() == false)
381 {v_cl.send(prc,0,pointer,(char *)pointer2 - (char *)pointer);}
382 else
383 {
384 send_prc_queue.add(prc);
385 send_pointer.add(pointer);
386 send_size.add(pointer2-pointer);
387 }
388 }
389
390 static void * receive_dynamic(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, size_t tag, void * ptr)
391 {
392 grid_dist_id_comm * gd = static_cast<grid_dist_id_comm *>(ptr);
393
394 gd->recv_buffers.add();
395
396 gd->recv_buffers.last().resize(msg_i);
397 gd->recv_proc.add();
398 gd->recv_proc.last().p_id = i;
399 gd->recv_proc.last().size = msg_i;
400 gd->recv_proc.last().i = gd->recv_proc.size()-1;
401
402 if (gd->opt & RUN_ON_DEVICE)
403 {
404 return gd->recv_buffers.last().getDevicePointer();
405 }
406
407 return gd->recv_buffers.last().getPointer();
408 }
409
410 /* Send or queue the the information
411 *
412 * This function send or queue the information to the other processor. In case the
413 * device grid is a compressed format like in multi-resolution the communication is
414 * queued because the other side does not know the size of the communication. If is
415 * not compressed the other side know the size so a direct send is done
416 *
417 */
418 template <typename prp_object>
420 std::vector<size_t> & prp_recv,
421 ExtPreAlloc<Memory> & prRecv_prp)
422 {
423#ifdef __NVCC__
424 cudaDeviceSynchronize();
425#endif
426
427 if (device_grid::isCompressed() == false)
428 {
430 for ( size_t i = 0 ; i < eg_box.size() ; i++ )
431 {
432 prp_recv.push_back(eg_box.get(i).recv_pnt * sizeof(prp_object) + sizeof(size_t)*eg_box.get(i).n_r_box);
433 }
434
435 size_t tot_recv = ExtPreAlloc<Memory>::calculateMem(prp_recv);
436
438 g_recv_prp_mem.resize(tot_recv);
439
440 // queue the receives
441 for ( size_t i = 0 ; i < eg_box.size() ; i++ )
442 {
443 prRecv_prp.allocate(prp_recv[i]);
444 v_cl.recv(eg_box.get(i).prc,0,prRecv_prp.getPointer(),prp_recv[i]);
445 }
446 }
447 else
448 {
449 // It is not possible to calculate the total information so we have to receive
450
451 if (send_prc_queue.size() == 0)
452 {
453 v_cl.sendrecvMultipleMessagesNBX(send_prc_queue.size(),NULL,
454 NULL,NULL,
455 receive_dynamic,this);
456 }
457 else
458 {
459 v_cl.sendrecvMultipleMessagesNBX(send_prc_queue.size(),&send_size.get(0),
460 &send_prc_queue.get(0),&send_pointer.get(0),
461 receive_dynamic,this);
462 }
463
464 // Reorder what we received
465
466 recv_proc.sort();
467
468 openfpm::vector_fr<BMemory<Memory>> tmp;
469 tmp.resize(recv_proc.size());
470
471 for (int i = 0 ; i < recv_proc.size() ; i++)
472 {
473 tmp.get(i).swap(recv_buffers.get(recv_proc.get(i).i));
474 }
475
476 recv_buffers.swap(tmp);
477 }
478 }
479
480 /* Send or queue the the information
481 *
482 * This function send or queue the information to the other processor. In case the
483 * device grid is a compressed format like in multi-resolution the communication is
484 * queued because the other side does not know the size of the communication. If is
485 * not compressed the other side know the size so a direct send is done
486 *
487 */
488 template <typename prp_object>
490 std::vector<size_t> & prp_recv,
491 ExtPreAlloc<Memory> & prRecv_prp)
492 {
493 if (device_grid::isCompressed() == false)
494 {
495 // Receive the information from each processors
496 for ( size_t i = 0 ; i < ig_box.size() ; i++ )
497 {
498 prp_recv.push_back(0);
499
500 // for each external ghost box
501 for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
502 {
503 // External ghost box
504 Box<dim,size_t> g_ig_box = ig_box.get(i).bid.get(j).box;
505 prp_recv[prp_recv.size()-1] += g_ig_box.getVolumeKey() * sizeof(prp_object) + sizeof(size_t);
506 }
507 }
508
509 size_t tot_recv = ExtPreAlloc<Memory>::calculateMem(prp_recv);
510
512 g_recv_prp_mem.resize(tot_recv);
513
514 prRecv_prp.incRef();
515
516 // queue the receives
517 for ( size_t i = 0 ; i < ig_box.size() ; i++ )
518 {
519 prRecv_prp.allocate(prp_recv[i]);
520 v_cl.recv(ig_box.get(i).prc,0,prRecv_prp.getPointer(),prp_recv[i]);
521 }
522
523 prRecv_prp.decRef();
524 }
525 else
526 {
527 // It is not possible to calculate the total information so we have to receive
528
529 if (send_prc_queue.size() == 0)
530 {
531 v_cl.sendrecvMultipleMessagesNBX(send_prc_queue.size(),NULL,
532 NULL,NULL,
533 receive_dynamic,this);
534 }
535 else
536 {
537 v_cl.sendrecvMultipleMessagesNBX(send_prc_queue.size(),&send_size.get(0),
538 &send_prc_queue.get(0),&send_pointer.get(0),
539 receive_dynamic,this);
540 }
541 }
542 }
543
544 template<typename mem,unsigned ... prp>
545 void unpack_data_to_ext_ghost(ExtPreAlloc<mem> & emem,
547 size_t i,
548 const openfpm::vector<ep_box_grid<dim>> & eg_box,
549 const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
550 const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
551 Unpack_stat & ps,
552 size_t opt)
553 {
554 // Unpack the ghost box global-id
555
556 size_t g_id;
557 // we move from device to host the gid
558 if (opt & RUN_ON_DEVICE)
559 {emem.deviceToHost(ps.getOffset(),ps.getOffset()+sizeof(size_t));}
560 Unpacker<size_t,mem>::unpack(emem,g_id,ps);
561
562 size_t l_id = 0;
563 // convert the global id into local id
564 auto key = g_id_to_external_ghost_box.find(g_id);
565
566 if (key != g_id_to_external_ghost_box.end()) // FOUND
567 {l_id = key->second;}
568 else
569 {
570 // NOT FOUND
571
572 // It must be always found, if not it mean that the processor has no-idea of
573 // what is stored and conseguently do not know how to unpack, print a critical error
574 // and return
575
576 std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n";
577
578 return;
579 }
580
581
582 // we unpack into the last eb_gid_list that is always big enought to
583 // unpack the information
584
585 size_t le_id = eb_gid_list.get(l_id).full_match;
586 size_t ei = eb_gid_list.get(l_id).e_id;
587
588 // Get the external ghost box associated with the packed information
589 Box<dim,long int> box = eg_box.get(ei).bid.get(le_id).l_e_box;
590 size_t sub_id = eg_box.get(ei).bid.get(le_id).sub;
591
592 // sub-grid where to unpack
593 auto sub2 = loc_grid.get(sub_id).getIterator(box.getKP1(),box.getKP2(),false);
594
595 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
596 if (opt & SKIP_LABELLING)
597 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
598
599 // Unpack
600 loc_grid.get(sub_id).remove(box);
601 Unpacker<device_grid,mem>::template unpack<decltype(sub2),decltype(v_cl.getgpuContext()),prp...>(emem,sub2,loc_grid.get(sub_id),ps,v_cl.getgpuContext(),opt_);
602
603 // Copy the information on the other grid
604 for (long int j = 0 ; j < (long int)eb_gid_list.get(l_id).eb_list.size() ; j++)
605 {
606 size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
607 if (nle_id != le_id)
608 {
609// size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
610 size_t n_sub_id = eg_box.get(ei).bid.get(nle_id).sub;
611
612 Box<dim,long int> box = eg_box.get(ei).bid.get(nle_id).l_e_box;
613 Box<dim,long int> rbox = eg_box.get(ei).bid.get(nle_id).lr_e_box;
614
615 loc_grid.get(n_sub_id).remove(box);
616 loc_grid.get(n_sub_id).copy_to(loc_grid.get(sub_id),rbox,box);
617 }
618 }
619 }
620
621 template<typename mem, typename header_type,unsigned ... prp>
622 void unpack_data_to_ext_ghost_with_header(ExtPreAlloc<mem> & emem,
624 header_type & headers,
625 size_t i,
626 const openfpm::vector<ep_box_grid<dim>> & eg_box,
627 const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
628 const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
629 Unpack_stat & ps,
630 size_t opt)
631 {
632 // Unpack the ghost box global-id
633
634 size_t g_id;
635 // we move from device to host the gid
636 g_id = headers.template get<0>(i);
637 ps.addOffset(sizeof(size_t));
638
639 size_t l_id = 0;
640 // convert the global id into local id
641 auto key = g_id_to_external_ghost_box.find(g_id);
642
643 if (key != g_id_to_external_ghost_box.end()) // FOUND
644 {l_id = key->second;}
645 else
646 {
647 // NOT FOUND
648
649 // It must be always found, if not it mean that the processor has no-idea of
650 // what is stored and conseguently do not know how to unpack, print a critical error
651 // and return
652
653 std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n";
654
655 return;
656 }
657
658
659 // we unpack into the last eb_gid_list that is always big enought to
660 // unpack the information
661
662 size_t le_id = eb_gid_list.get(l_id).full_match;
663 size_t ei = eb_gid_list.get(l_id).e_id;
664
665 // Get the external ghost box associated with the packed information
666 Box<dim,long int> box = eg_box.get(ei).bid.get(le_id).l_e_box;
667 size_t sub_id = eg_box.get(ei).bid.get(le_id).sub;
668
669 // sub-grid where to unpack
670 auto sub2 = loc_grid.get(sub_id).getIterator(box.getKP1(),box.getKP2(),false);
671
672 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
673 if (opt & SKIP_LABELLING)
674 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
675
676 // Unpack
677 loc_grid.get(sub_id).remove(box);
678 Unpacker<device_grid,mem>::template unpack_with_header<decltype(sub2),decltype(headers),decltype(v_cl.getgpuContext()),prp...>
679 (emem,
680 sub2,
681 loc_grid.get(sub_id),
682 headers,
683 i,
684 ps,
685 v_cl.getgpuContext(),
686 opt_);
687
688 // Copy the information on the other grid
689 for (long int j = 0 ; j < (long int)eb_gid_list.get(l_id).eb_list.size() ; j++)
690 {
691 size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
692 if (nle_id != le_id)
693 {
694// size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
695 size_t n_sub_id = eg_box.get(ei).bid.get(nle_id).sub;
696
697 Box<dim,long int> box = eg_box.get(ei).bid.get(nle_id).l_e_box;
698 Box<dim,long int> rbox = eg_box.get(ei).bid.get(nle_id).lr_e_box;
699
700 loc_grid.get(n_sub_id).remove(box);
701 loc_grid.get(n_sub_id).copy_to(loc_grid.get(sub_id),rbox,box);
702 }
703 }
704 }
705
706 template<unsigned int ... prp>
707 void fill_headers(size_t opt)
708 {
709 if ((opt & KEEP_PROPERTIES) == 0 && device_grid::is_unpack_header_supported())
710 {
711 headers.resize(n_headers_slot * recv_buffers.size());
712
713 Memory result;
714 result.allocate(sizeof(int));
715
716 pointers_h.resize(recv_buffers.size());
717
718 for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
719 {
720 pointers_h.template get<0>(i) = recv_buffers.get(i).getDevicePointer();
721 pointers_h.template get<1>(i) = (unsigned char *)recv_buffers.get(i).getDevicePointer() + recv_buffers.get(i).size();
722 }
723
724 pointers_h.template hostToDevice<0,1>();
725
726 while(1)
727 {
728 for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
729 {pointers_h.template get<2>(i) = 0;}
730 pointers_h.template hostToDevice<2>();
731 *(int *)result.getPointer() = 0;
732 result.hostToDevice();
733
734 device_grid::template unpack_headers<decltype(pointers_h),decltype(headers),decltype(result),prp ...>(pointers_h,headers,result,n_headers_slot);
735 result.deviceToHost();
736
737 if (*(int *)result.getPointer() == 0) {break;}
738
739 n_headers_slot *= 2;
740 headers.resize(n_headers_slot * recv_buffers.size());
741
742 }
743
744 headers.template deviceToHost<0,1,2>();
745 }
746 }
747
748 template<unsigned ... prp>
749 void merge_received_data_get(openfpm::vector<device_grid> & loc_grid,
750 const openfpm::vector<ep_box_grid<dim>> & eg_box,
751 const std::vector<size_t> & prp_recv,
752 ExtPreAlloc<Memory> & prRecv_prp,
753 const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
754 const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
755 size_t opt)
756 {
757 if (device_grid::isCompressed() == false)
758 {
759 // wait to receive communication
760 v_cl.execute();
761
762 Unpack_stat ps;
763
764 // Unpack the object
765 for ( size_t i = 0 ; i < eg_box.size() ; i++ )
766 {
767 size_t mark_here = ps.getOffset();
768
769 // for each external ghost box
770 while (ps.getOffset() - mark_here < prp_recv[i])
771 {
772 // Unpack the ghost box global-id
773
774
775 unpack_data_to_ext_ghost<Memory,prp ...>(prRecv_prp,loc_grid,i,
776 eg_box,g_id_to_external_ghost_box,eb_gid_list,
777 ps,opt);
778 }
779 }
780 }
781 else
782 {
783 fill_headers<prp ...>(opt);
784
785 if (headers.size() != 0)
786 {
787 // Unpack the object
788 for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
789 {
790 Unpack_stat ps;
791 size_t mark_here = ps.getOffset();
792
793 ExtPreAlloc<BMemory<Memory>> mem(recv_buffers.get(i).size(),recv_buffers.get(i));
794
795 int j = 0;
796
797 // for each external ghost box
798 while (ps.getOffset() - mark_here < recv_buffers.get(i).size())
799 {
800 // Unpack the ghost box global-id
801
802 unpack_data_to_ext_ghost_with_header<BMemory<Memory>,decltype(headers),prp ...>(mem,loc_grid,headers,i*n_headers_slot+j,
803 eg_box,g_id_to_external_ghost_box,eb_gid_list,
804 ps,opt);
805
806 j++;
807 }
808 }
809 }
810 else
811 {
812 // Unpack the object
813 for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
814 {
815 Unpack_stat ps;
816 size_t mark_here = ps.getOffset();
817
818 ExtPreAlloc<BMemory<Memory>> mem(recv_buffers.get(i).size(),recv_buffers.get(i));
819
820 // for each external ghost box
821 while (ps.getOffset() - mark_here < recv_buffers.get(i).size())
822 {
823 // Unpack the ghost box global-id
824
825 unpack_data_to_ext_ghost<BMemory<Memory>,prp ...>(mem,loc_grid,i,
826 eg_box,g_id_to_external_ghost_box,eb_gid_list,
827 ps,opt);
828 }
829 }
830 }
831 }
832 }
833
834
835 template<template<typename,typename> class op, unsigned ... prp>
836 void merge_received_data_put(Decomposition & dec, openfpm::vector<device_grid> & loc_grid,
837 const openfpm::vector<ip_box_grid<dim>> & ig_box,
838 const std::vector<size_t> & prp_recv,
839 ExtPreAlloc<Memory> & prRecv_prp,
841 const openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_internal_ghost_box)
842 {
843 typedef object<typename object_creator<typename T::type,prp...>::type> prp_object;
844
845 if (device_grid::isCompressed() == false)
846 {
847 v_cl.execute();
848
849 Unpack_stat ps;
850
851 // Unpack the object
852 for ( size_t i = 0 ; i < ig_box.size() ; i++ )
853 {
854 // for each external ghost box
855 for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
856 {
857 // Unpack the ghost box global-id
858
859 size_t g_id;
860 Unpacker<size_t,HeapMemory>::unpack(prRecv_prp,g_id,ps);
861
862 size_t l_id = 0;
863 // convert the global id into local id
864 auto key = g_id_to_internal_ghost_box.get(i).find(g_id);
865 if (key != g_id_to_internal_ghost_box.get(i).end()) // FOUND
866 {l_id = key->second;}
867 else
868 {
869 // NOT FOUND
870
871 // It must be always found, if not it mean that the processor has no-idea of
872 // what is stored and conseguently do not know how to unpack, print a critical error
873 // and return
874
875 std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n";
876
877 return;
878 }
879
880 // Get the internal ghost box associated with the packed information
881 Box<dim,size_t> box = ig_box.get(i).bid.get(l_id).box;
882 size_t sub_id = ig_box.get(i).bid.get(l_id).sub;
883 box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
884
885 // sub-grid where to unpack
886 auto sub2 = loc_grid.get(sub_id).getIterator(box.getKP1(),box.getKP2());
887 grid_unpack_with_prp<op,prp_object,device_grid,Memory>::template unpacking<decltype(sub2),prp...>(prRecv_prp,sub2,loc_grid.get(sub_id),ps);
888 }
889 }
890 }
891 else
892 {
893 // Unpack the object
894 for ( size_t i = 0 ; i < recv_buffers.size() ; i++ )
895 {
896 Unpack_stat ps;
897 size_t mark_here = ps.getOffset();
898
900
901 // for each external ghost box
902 while (ps.getOffset() - mark_here < recv_buffers.get(i).size())
903 {
904 // Unpack the ghost box global-id
905
906 // Unpack the ghost box global-id
907
908 size_t g_id;
909 Unpacker<size_t,BMemory<HeapMemory>>::unpack(mem,g_id,ps);
910
911 size_t pid = dec.ProctoID(recv_proc.get(i).p_id);
912
913 size_t l_id = 0;
914 // convert the global id into local id
915 auto key = g_id_to_internal_ghost_box.get(pid).find(g_id);
916 if (key != g_id_to_internal_ghost_box.get(pid).end()) // FOUND
917 {l_id = key->second;}
918 else
919 {
920 // NOT FOUND
921
922 // It must be always found, if not it mean that the processor has no-idea of
923 // what is stored and conseguently do not know how to unpack, print a critical error
924 // and return
925
926 std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " Critical, cannot unpack object, because received data cannot be interpreted\n";
927
928 return;
929 }
930
931 // Get the internal ghost box associated with the packed information
932 Box<dim,size_t> box = ig_box.get(pid).bid.get(l_id).box;
933 size_t sub_id = ig_box.get(pid).bid.get(l_id).sub;
934 box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
935
936 // sub-grid where to unpack
937 auto sub2 = loc_grid.get(sub_id).getIterator(box.getKP1(),box.getKP2());
938 grid_unpack_with_prp<op,prp_object,device_grid,BMemory<HeapMemory>>::template unpacking<decltype(sub2),prp...>(mem,sub2,loc_grid.get(sub_id),ps);
939 }
940 }
941 }
942 }
943
944 int find_local_sub(Box<dim, long int> & box_dst, openfpm::vector<GBoxes<device_grid::dims>> & gdb_ext)
945 {
947 for (size_t n = 0; n < dim; n++)
948 {point.get(n) = (box_dst.getHigh(n) + box_dst.getLow(n))/2;}
949
950 for (size_t j = 0; j < gdb_ext.size(); j++)
951 {
952 // Local sub-domain
953 SpaceBox<dim,long int> sub = gdb_ext.get(j).Dbox;
954 sub += gdb_ext.get(j).origin;
955
956 if (sub.isInside(point) == true)
957 {
958 return j;
959 }
960 }
961 return -1;
962 }
963
964public:
965
977 CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm)
978 {
979 // Clear the information of the grid
980 for (size_t i = 0 ; i < loc_grid.size() ; i++)
981 {loc_grid.get(i).clear();}
982
983 for (size_t a = 0; a < m_oGrid_recv.size(); a++)
984 {
985 for (size_t k = 0; k < m_oGrid_recv.get(a).size(); k++)
986 {
987 device_grid & g = m_oGrid_recv.get(a).template get<0>(k);
988
989 SpaceBox<dim,long int> b = m_oGrid_recv.get(a).template get<1>(k);
990
992 for (size_t n = 0; n < dim; n++)
993 {p.get(n) = g.getGrid().getBox().getHigh(n);}
994
995 Point<dim,St> point;
996 for (size_t n = 0; n < dim; n++)
997 {point.get(n) = (b.getHigh(n) + b.getLow(n))/2;}
998
999 for (size_t j = 0; j < gdb_ext.size(); j++)
1000 {
1001 // Local sub-domain
1002 SpaceBox<dim,long int> sub = gdb_ext.get(j).Dbox;
1003 sub += gdb_ext.get(j).origin;
1004
1005 if (sub.isInside(point) == true)
1006 {
1007
1008
1009 grid_key_dx<dim> start = b.getKP1() - grid_key_dx<dim>(gdb_ext.get(j).origin.asArray());
1010 grid_key_dx<dim> stop = b.getKP2() - grid_key_dx<dim>(gdb_ext.get(j).origin.asArray());
1011
1012 Box<dim,size_t> box_src;
1013 Box<dim,size_t> box_dst;
1014
1015 for(size_t i = 0 ; i < dim ; i++)
1016 {
1017 box_dst.setLow(i,start.get(i));
1018 box_dst.setHigh(i,stop.get(i));
1019 box_src.setLow(i,0);
1020 box_src.setHigh(i,stop.get(i)-start.get(i));
1021 }
1022
1023 loc_grid.get(j).copy_to(g,box_src,box_dst);
1024 }
1025 }
1026 }
1027 }
1028
1029 std::cout << "UNPACKING " << std::endl;
1030
1031 for (size_t i = 0 ; i < m_oGrid_recv.size() ; i++)
1032 {
1033 for (size_t j = 0 ; j < m_oGrid_recv.get(i).size() ; j++)
1034 {
1035 m_oGrid_recv.get(i).template get<0>(j).template deviceToHost<0>();
1036 std::cout << "UNPACKING POINTS: " << m_oGrid_recv.get(i).template get<0>(j).size() << std::endl;
1037 m_oGrid_recv.get(i).template get<0>(j).template removeCopyToFinalize<0>(v_cl.getgpuContext(), rem_copy_opt::PHASE1);
1038 }
1039 }
1040
1041 for (size_t i = 0 ; i < m_oGrid_recv.size() ; i++)
1042 {
1043 for (size_t j = 0 ; j < m_oGrid_recv.get(i).size() ; j++)
1044 {m_oGrid_recv.get(i).template get<0>(j).template removeCopyToFinalize<0>(v_cl.getgpuContext(), rem_copy_opt::PHASE2);}
1045 }
1046
1047 for (size_t i = 0 ; i < m_oGrid_recv.size() ; i++)
1048 {
1049 for (size_t j = 0 ; j < m_oGrid_recv.get(i).size() ; j++)
1050 {m_oGrid_recv.get(i).template get<0>(j).template removeCopyToFinalize<0>(v_cl.getgpuContext(), rem_copy_opt::PHASE3);}
1051 }
1052 }
1053
1066 template<typename lambda_t>
1068 CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm,
1069 openfpm::vector<device_grid> & loc_grid_old,
1073 size_t p_id_cur,
1074 lambda_t f)
1075 {
1076 // lbl_bc.clear();
1077 // lbl_bc.resize(v_cl.getProcessingUnits());
1078
1079 // for (int i = 0 ; i < lbl_bc.size() ; i++)
1080 // {lbl_bc.get(i) = 0;}
1081
1082 // // count
1083
1084 // for (size_t i = 0; i < gdb_ext_old.size(); i++)
1085 // {
1086 // // Local old sub-domain in global coordinates
1087 // SpaceBox<dim,long int> sub_dom = gdb_ext_old.get(i).Dbox;
1088 // sub_dom += gdb_ext_old.get(i).origin;
1089
1090 // for (size_t j = 0; j < gdb_ext_global.size(); j++)
1091 // {
1092 // size_t p_id = 0;
1093
1094 // // Intersection box
1095 // SpaceBox<dim,long int> inte_box;
1096
1097 // // Global new sub-domain in global coordinates
1098 // SpaceBox<dim,long int> sub_dom_new = gdb_ext_global.get(j).Dbox;
1099 // sub_dom_new += gdb_ext_global.get(j).origin;
1100
1101 // bool intersect = false;
1102
1103 // if (sub_dom.isValid() == true && sub_dom_new.isValid() == true)
1104 // intersect = sub_dom.Intersect(sub_dom_new, inte_box);
1105
1106 // if (intersect == true)
1107 // {
1108 // auto inte_box_cont = cd_sm.convertCellUnitsIntoDomainSpace(inte_box);
1109
1110 // // Get processor ID that store intersection box
1111 // Point<dim,St> p;
1112 // for (size_t n = 0; n < dim; n++)
1113 // p.get(n) = (inte_box_cont.getHigh(n) + inte_box_cont.getLow(n))/2;
1114
1115 // p_id = dec.processorID(p);
1116
1117 // lbl_bc.get(p_id) += 1;
1118 // }
1119 // }
1120 // }
1121
1122 // // reserve
1123 // for (int i = 0 ; i < lbl_b.size() ; i++)
1124 // {lbl_b.get(i).reserve(lbl_bc.get(i));}
1125
1126
1127 // Label all the intersection grids with the processor id where they should go
1128
1129 for (size_t i = 0; i < gdb_ext_old.size(); i++)
1130 {
1131 // Local old sub-domain in global coordinates
1132 SpaceBox<dim,long int> sub_dom = gdb_ext_old.get(i).Dbox;
1133 sub_dom += gdb_ext_old.get(i).origin;
1134
1135 for (size_t j = 0; j < gdb_ext_global.size(); j++)
1136 {
1137 size_t p_id = 0;
1138
1139 // Intersection box
1140 SpaceBox<dim,long int> inte_box;
1141
1142 // Global new sub-domain in global coordinates
1143 SpaceBox<dim,long int> sub_dom_new = gdb_ext_global.get(j).Dbox;
1144 sub_dom_new += gdb_ext_global.get(j).origin;
1145
1146 bool intersect = false;
1147
1148 if (sub_dom.isValid() == true && sub_dom_new.isValid() == true)
1149 intersect = sub_dom.Intersect(sub_dom_new, inte_box);
1150
1151 if (intersect == true)
1152 {
1153 auto inte_box_cont = cd_sm.convertCellUnitsIntoDomainSpace(inte_box);
1154
1155 // Get processor ID that store intersection box
1156 Point<dim,St> p;
1157 for (size_t n = 0; n < dim; n++)
1158 p.get(n) = (inte_box_cont.getHigh(n) + inte_box_cont.getLow(n))/2;
1159
1160 p_id = dec.processorID(p);
1161 if (p_id != p_id_cur)
1162 {continue;}
1163// prc_sz.get(p_id)++;
1164
1165 // Transform coordinates to local
1166 auto inte_box_local = inte_box;
1167
1168 inte_box_local -= gdb_ext_old.get(i).origin;
1169
1170 // Grid corresponding for gdb_ext_old.get(i) box
1171 device_grid & gr = loc_grid_old.get(i);
1172
1173 // Size of the grid to send
1174 size_t sz[dim];
1175 for (size_t l = 0; l < dim; l++)
1176 {
1177 sz[l] = inte_box_local.getHigh(l) - inte_box_local.getLow(l) + 1;
1178 //std::cout << "GR_send size on " << l << " dimension: " << sz[l] << std::endl;
1179 }
1180
1181 // Grid to send
1182 //device_grid gr_send(sz);
1183 //gr_send.setMemory();
1184 // lbl_b.get(p_id).add();
1185 // device_grid & gr_send = lbl_b.get(p_id).last().template get<0>();
1186 // SpaceBox<dim,long int> & box_send = lbl_b.get(p_id).last().template get<1>();
1187 // gr_send.setMemory();
1188
1189 // Sub iterator across intersection box inside local grid
1190 grid_key_dx<dim> start = inte_box_local.getKP1();
1191 grid_key_dx<dim> stop = inte_box_local.getKP2();
1192
1193 Box<dim,long int> box_src;
1194 Box<dim,long int> box_dst;
1195
1196 for(size_t i = 0 ; i < dim ; i++)
1197 {
1198 box_src.setLow(i,start.get(i));
1199 box_src.setHigh(i,stop.get(i));
1200 box_dst.setLow(i,inte_box.getLow(i));
1201 box_dst.setHigh(i,inte_box.getHigh(i));
1202 }
1203
1204 f(box_src,box_dst,gr,p_id);
1205 }
1206 }
1207 }
1208
1209/* for (size_t i = 0 ; i < loc_grid_old.size() ; i++)
1210 {
1211 loc_grid_old.get(i).template removeCopyToFinalize<0>(v_cl.getgpuContext(), rem_copy_opt::PHASE1);
1212 }
1213
1214 for (size_t i = 0 ; i < loc_grid_old.size() ; i++)
1215 {
1216 loc_grid_old.get(i).template removeCopyToFinalize<0>(v_cl.getgpuContext(), rem_copy_opt::PHASE2);
1217 }
1218
1219 for (size_t i = 0 ; i < loc_grid_old.size() ; i++)
1220 {
1221 loc_grid_old.get(i).template removeCopyToFinalize<0>(v_cl.getgpuContext(), rem_copy_opt::PHASE3);
1222 }*/
1223 }
1224
1230 template<int ... prp>
1233 ExtPreAlloc<Memory> & send_buffer,
1234 size_t sz)
1235 {
1236 // unpack local
1237 Unpack_stat ps;
1238
1239 while (ps.getOffset() < sz)
1240 {
1241 send_buffer.reset();
1242
1243 Box<dim,long int> box_dst;
1244 send_buffer.deviceToHost(ps.getOffset(),ps.getOffset()+sizeof(Box<dim,long int>));
1245 Unpacker<Box<dim,long int>,Memory>::unpack(send_buffer,box_dst,ps);
1246
1247 int s = find_local_sub(box_dst,gdb_ext);
1248 if (s == -1)
1249 {std::cout << __FILE__ << ":" << __LINE__ << " map, error non-local subdomain " << std::endl;}
1250
1251 // convert box_dst to local
1252 for (int d = 0 ; d < dim ; d++ )
1253 {
1254 box_dst.setLow(d, box_dst.getLow(d) - gdb_ext.get(s).origin.get(d));
1255 box_dst.setHigh(d, box_dst.getHigh(d) - gdb_ext.get(s).origin.get(d));
1256 }
1257
1258 loc_grid.get(s).remove(box_dst);
1259 auto sub2 = loc_grid.get(s).getIterator(box_dst.getKP1(),box_dst.getKP2(),0);
1260 Unpacker<device_grid,Memory>::template unpack<decltype(sub2),decltype(v_cl.getgpuContext()),prp ...>(send_buffer,sub2,loc_grid.get(s),ps,v_cl.getgpuContext(),NONE_OPT);
1261 }
1262
1263 for (int s = 0 ; s < loc_grid.size() ; s++)
1264 {loc_grid.get(s).template removeAddUnpackFinalize<prp ...>(v_cl.getgpuContext(),0);}
1265 }
1266
1280 template<int ... prp>
1282 CellDecomposer_sm<dim,St,shift<dim,St>> & cd_sm,
1284 openfpm::vector<device_grid> & loc_grid_old,
1288 size_t opt)
1289 {
1290 this->opt = opt;
1291
1292 openfpm::vector<size_t> send_buffer_sizes(v_cl.getProcessingUnits());
1293 openfpm::vector<Memory> send_buffers_;
1295 send_buffers_.resize(v_cl.getProcessingUnits());
1296 send_buffers.resize(v_cl.getProcessingUnits());
1297
1298 send_prc_queue.clear();
1299 send_pointer.clear();
1300 send_size.clear();
1301
1302 for (int p_id = 0 ; p_id < v_cl.getProcessingUnits() ; p_id++)
1303 {
1304 for (int i = 0 ; i < loc_grid_old.size() ; i++)
1305 {loc_grid_old.get(i).packReset();}
1306
1307 auto l = [&](Box<dim,long int> & box_src,
1308 Box<dim,long int> & box_dst,
1309 device_grid & gr,
1310 size_t p_id){
1311 //gr_send.copy_to(gr,box_src,box_dst);
1312
1313
1314 Packer<SpaceBox<dim,long int>,BMemory<Memory>>::packRequest(box_dst,send_buffer_sizes.get(p_id));
1315
1316 auto sub_it = gr.getIterator(box_src.getKP1(),box_src.getKP2(),0);
1317 gr.template packRequest<prp ...>(sub_it,send_buffer_sizes.get(p_id));
1318
1319 //box_send = inte_box;
1320 };
1321
1322 // Contains the processor id of each box (basically where they have to go)
1323 labelIntersectionGridsProcessor_and_pack(dec,cd_sm,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global,p_id,l);
1324
1325 for (int i = 0 ; i < loc_grid_old.size(); i++)
1326 {
1327 loc_grid_old.get(i).template packCalculate<prp ...>(send_buffer_sizes.get(p_id),v_cl.getgpuContext());
1328 }
1329
1330 send_buffers_.get(p_id).resize(send_buffer_sizes.get(p_id));
1331 send_buffers.get(p_id).setMemory(send_buffer_sizes.get(p_id),send_buffers_.get(p_id));
1332 send_buffers.get(p_id).incRef();
1333
1334 // we now pack
1335 Pack_stat sts;
1336
1337 auto lp = [&](Box<dim,long int> & box_src,
1338 Box<dim,long int> & box_dst,
1339 device_grid & gr,
1340 size_t p_id){
1341
1342 size_t offset = send_buffers.get(p_id).getOffsetEnd();
1343 Packer<Box<dim,long int>,Memory>::pack(send_buffers.get(p_id),box_dst,sts);
1344 size_t offset2 = send_buffers.get(p_id).getOffsetEnd();
1345
1346 send_buffers.get(p_id).hostToDevice(offset,offset2);
1347
1348 auto sub_it = gr.getIterator(box_src.getKP1(),box_src.getKP2(),0);
1349
1350 Packer<device_grid,Memory>::template pack<decltype(sub_it),prp ...>(send_buffers.get(p_id),gr,sub_it,sts);
1351 };
1352
1353 // Contains the processor id of each box (basically where they have to go)
1354 labelIntersectionGridsProcessor_and_pack(dec,cd_sm,loc_grid_old,gdb_ext,gdb_ext_old,gdb_ext_global,p_id,lp);
1355
1356 for (int i = 0 ; i < loc_grid_old.size() ; i++)
1357 {
1358 loc_grid_old.get(i).template packFinalize<prp ...>(send_buffers.get(p_id),sts,0,false);
1359 }
1360 }
1361
1362 // std::cout << "Local buffer: " << send_buffers.get(v_cl.rank()).size() << std::endl;
1363 // int sz = send_buffers.get(v_cl.rank()).size();
1364 //send_buffers.get(v_cl.rank()).reset();
1365
1366 // // Print all the byte in send_buffers_
1367 // for (int j = 0 ; j < 16 && j < sz ; j++) {
1368 // std::cout << "Local buffer " << v_cl.rank() << " " << ((long int *)send_buffers.get(v_cl.rank()).getPointer())[j] << " " << &((long int *)send_buffers.get(v_cl.rank()).getPointer())[j] << std::endl;
1369 // }
1370
1371 unpack_buffer_to_local_grid<prp ...>(loc_grid,gdb_ext,send_buffers.get(v_cl.rank()),send_buffers.get(v_cl.rank()).size());
1372
1373 //openfpm::vector<void *> send_pointer;
1374 //openfpm::vector<int> send_size;
1375 for (int i = 0 ; i < send_buffers.size() ; i++)
1376 {
1377 if (i != v_cl.rank())
1378 {
1379 send_pointer.add(send_buffers_.get(i).getDevicePointer());
1380 send_size.add(send_buffers_.get(i).size());
1381 send_prc_queue.add(i);
1382 }
1383 }
1384
1385 size_t * send_size_ptr = NULL;
1386 size_t * send_prc_queue_ptr = NULL;
1387 void ** send_pointer_ptr = NULL;
1388
1389 if (send_size.size() != 0)
1390 {
1391 send_size_ptr = &send_size.get(0);
1392 send_pointer_ptr = &send_pointer.get(0);
1393 send_prc_queue_ptr = &send_prc_queue.get(0);
1394 }
1395
1396 recv_buffers.clear();
1397 recv_proc.clear();
1398
1399 v_cl.sendrecvMultipleMessagesNBX(send_pointer.size(),send_size_ptr,
1400 send_prc_queue_ptr,send_pointer_ptr,
1401 receive_dynamic,this);
1402
1403
1404 for (int i = 0 ; i < recv_buffers.size() ; i++)
1405 {
1406 ExtPreAlloc<Memory> prAlloc_;
1407 prAlloc_.setMemory(recv_buffers.get(i).size(),recv_buffers.get(i));
1408 unpack_buffer_to_local_grid<prp ...>(loc_grid,gdb_ext,prAlloc_,recv_proc.get(i).size);
1409 }
1410
1411 for (int i = 0 ; i < send_buffers.size() ; i++)
1412 {send_buffers.get(i).decRef();}
1413 }
1414
1426 template<int... prp> void ghost_get_(const openfpm::vector<ip_box_grid<dim>> & ig_box,
1427 const openfpm::vector<ep_box_grid<dim>> & eg_box,
1428 const openfpm::vector<i_lbox_grid<dim>> & loc_ig_box,
1429 const openfpm::vector<e_lbox_grid<dim>> & loc_eg_box,
1431 const openfpm::vector<e_box_multi<dim>> & eb_gid_list,
1432 bool use_bx_def,
1434 const grid_sm<dim,void> & ginfo,
1435 std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
1436 size_t opt)
1437 {
1438#ifdef PROFILE_SCOREP
1439 SCOREP_USER_REGION("ghost_get",SCOREP_USER_REGION_TYPE_FUNCTION)
1440#endif
1441
1442 // Sending property object
1443 typedef object<typename object_creator<typename T::type,prp...>::type> prp_object;
1444
1445 recv_buffers.clear();
1446 recv_proc.clear();
1447 send_prc_queue.clear();
1448 send_pointer.clear();
1449 send_size.clear();
1450
1451 this->opt = opt;
1452
1453 size_t req = 0;
1454
1455 // Pack information
1456 Pack_stat sts;
1457
1458 // We check if skip labelling is possible in this condition
1459 for (int i = 0 ; i < loc_grid.size() ; i++)
1460 {opt &= (loc_grid.get(i).isSkipLabellingPossible())?(int)-1:~SKIP_LABELLING;}
1461
1462 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1463 timer packing_time;
1464 packing_time.start();
1465 #endif
1466
1467 if (!(opt & SKIP_LABELLING))
1468 {
1469 // first we initialize the pack buffer on all internal grids
1470
1471 for (size_t i = 0 ; i < loc_grid.size() ; i++)
1472 {loc_grid.get(i).packReset();}
1473
1474 // Calculating the size to pack all the data to send
1475 for ( size_t i = 0 ; i < ig_box.size() ; i++ )
1476 {
1477 // for each ghost box
1478 for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
1479 {
1480 // And linked sub-domain
1481 size_t sub_id = ig_box.get(i).bid.get(j).sub;
1482 // Internal ghost box
1483 Box<dim,long int> g_ig_box = ig_box.get(i).bid.get(j).box;
1484
1485 if (g_ig_box.isValid() == false)
1486 {continue;}
1487
1488 g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1489
1490 // Pack a size_t for the internal ghost id
1492 // Create a sub grid iterator spanning the internal ghost layer
1493 auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.getKP1(),g_ig_box.getKP2(),false);
1494
1495 // get the size to pack
1496 Packer<device_grid,Memory>::template packRequest<decltype(sub_it),prp...>(loc_grid.get(sub_id),sub_it,req);
1497 }
1498 }
1499
1500 // Finalize calculation
1501 for (size_t i = 0 ; i < loc_grid.size() ; i++)
1502 {loc_grid.get(i).template packCalculate<prp ...>(req,v_cl.getgpuContext());}
1503
1504 // resize the property buffer memory
1505 g_send_prp_mem.resize(req);
1506
1507 // Create an object of preallocated memory for properties
1508 ExtPreAlloc<Memory> & prAlloc_prp = *(new ExtPreAlloc<Memory>(req,g_send_prp_mem));
1509 // Necessary. We do not want this memory to be destroyed untill is going out of scope
1510 // P.S. Packer shaoe this memory with data-structures and data structures if they see the
1511 // reference counter to zero they destriy this memory
1512 prAlloc_prp.incRef();
1513
1514 pointers.clear();
1515 pointers2.clear();
1516
1517 // Pack the information for each processor and send it
1518 for ( size_t i = 0 ; i < ig_box.size() ; i++ )
1519 {
1520
1521 sts.mark();
1522
1523 void * pointer;
1524
1525 if (opt & RUN_ON_DEVICE)
1526 {pointer = prAlloc_prp.getDevicePointerEnd();}
1527 else
1528 {pointer = prAlloc_prp.getPointerEnd();}
1529
1530 // for each ghost box
1531 for (size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
1532 {
1533 // we pack only if it is valid
1534 if (ig_box.get(i).bid.get(j).box.isValid() == false)
1535 continue;
1536
1537 // And linked sub-domain
1538 size_t sub_id = ig_box.get(i).bid.get(j).sub;
1539 // Internal ghost box
1540 Box<dim,size_t> g_ig_box = ig_box.get(i).bid.get(j).box;
1541 g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1542 // Ghost box global id
1543 size_t g_id = ig_box.get(i).bid.get(j).g_id;
1544
1545 // Pack a size_t for the internal ghost id
1546 Packer<size_t,Memory>::pack(prAlloc_prp,g_id,sts);
1547 prAlloc_prp.hostToDevice(prAlloc_prp.getOffset(),prAlloc_prp.getOffsetEnd());
1548 // Create a sub grid iterator spanning the internal ghost layer
1549 auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.getKP1(),g_ig_box.getKP2(),false);
1550 // and pack the internal ghost grid
1551 Packer<device_grid,Memory>::template pack<decltype(sub_it),prp...>(prAlloc_prp,loc_grid.get(sub_id),sub_it,sts);
1552 }
1553 // send the request
1554
1555 void * pointer2;
1556
1557 if (opt & RUN_ON_DEVICE)
1558 {pointer2 = prAlloc_prp.getDevicePointerEnd();}
1559 else
1560 {pointer2 = prAlloc_prp.getPointerEnd();}
1561
1562 pointers.add(pointer);
1563 pointers2.add(pointer2);
1564 }
1565
1566 for (size_t i = 0 ; i < loc_grid.size() ; i++)
1567 {
1568 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1569 if (opt & SKIP_LABELLING)
1570 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1571
1572 loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts,opt_,true);
1573 }
1574
1575 prAlloc_prp.decRef();
1576 delete &prAlloc_prp;
1577 }
1578 else
1579 {
1580 req = g_send_prp_mem.size();
1581
1582 // Create an object of preallocated memory for properties
1583 ExtPreAlloc<Memory> & prAlloc_prp = *(new ExtPreAlloc<Memory>(req,g_send_prp_mem));
1584 prAlloc_prp.incRef();
1585
1586 for (size_t i = 0 ; i < loc_grid.size() ; i++)
1587 {
1588 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1589 if (opt & SKIP_LABELLING)
1590 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1591
1592 loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts,opt_,true);
1593 }
1594
1595 prAlloc_prp.decRef();
1596 delete &prAlloc_prp;
1597 }
1598
1599 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1600 packing_time.stop();
1601 tot_pack += packing_time.getwct();
1602 timer sendrecv_time;
1603 sendrecv_time.start();
1604 #endif
1605
1606 for ( size_t i = 0 ; i < ig_box.size() ; i++ )
1607 {
1608 // This function send (or queue for sending) the information
1609 send_or_queue(ig_box.get(i).prc,(char *)pointers.get(i),(char *)pointers2.get(i));
1610 }
1611
1612 // Calculate the total information to receive from each processors
1613 std::vector<size_t> prp_recv;
1614
1615 // Create an object of preallocated memory for properties
1617 prRecv_prp.incRef();
1618
1619 // Before wait for the communication to complete we sync the local ghost
1620 // in order to overlap with communication
1621
1622 queue_recv_data_get<prp_object>(eg_box,prp_recv,prRecv_prp);
1623
1624 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1625 sendrecv_time.stop();
1626 tot_sendrecv += sendrecv_time.getwct();
1627 timer merge_loc_time;
1628 merge_loc_time.start();
1629 #endif
1630
1631 ghost_get_local<prp...>(loc_ig_box,loc_eg_box,gdb_ext,loc_grid,g_id_to_external_ghost_box,ginfo,use_bx_def,opt);
1632
1633 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1634 merge_loc_time.stop();
1635 tot_loc_merge += merge_loc_time.getwct();
1636 timer merge_time;
1637 merge_time.start();
1638 #endif
1639
1640 for (size_t i = 0 ; i < loc_grid.size() ; i++)
1641 {loc_grid.get(i).removeAddUnpackReset();}
1642
1643 merge_received_data_get<prp ...>(loc_grid,eg_box,prp_recv,prRecv_prp,g_id_to_external_ghost_box,eb_gid_list,opt);
1644
1645 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1646 if (opt & SKIP_LABELLING)
1647 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1648
1649 for (size_t i = 0 ; i < loc_grid.size() ; i++)
1650 {loc_grid.get(i).template removeAddUnpackFinalize<prp ...>(v_cl.getgpuContext(),opt_);}
1651
1652 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1653 merge_time.stop();
1654 tot_merge += merge_time.getwct();
1655 #endif
1656
1657 prRecv_prp.decRef();
1658 delete &prRecv_prp;
1659 }
1660
1675 template<template<typename,typename> class op,int... prp>
1677 const openfpm::vector<ip_box_grid<dim>> & ig_box,
1678 const openfpm::vector<ep_box_grid<dim>> & eg_box,
1679 const openfpm::vector<i_lbox_grid<dim>> & loc_ig_box,
1680 const openfpm::vector<e_lbox_grid<dim>> & loc_eg_box,
1683 openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_internal_ghost_box)
1684 {
1685 // Sending property object
1686 typedef object<typename object_creator<typename T::type,prp...>::type> prp_object;
1687
1688 recv_buffers.clear();
1689 recv_proc.clear();
1690 send_prc_queue.clear();
1691 send_pointer.clear();
1692 send_size.clear();
1693
1694 size_t req = 0;
1695
1696 // Create a packing request vector
1697 for ( size_t i = 0 ; i < eg_box.size() ; i++ )
1698 {
1699 // for each ghost box
1700 for (size_t j = 0 ; j < eg_box.get(i).bid.size() ; j++)
1701 {
1702 // And linked sub-domain
1703 size_t sub_id = eg_box.get(i).bid.get(j).sub;
1704 // Internal ghost box
1705 Box<dim,long int> g_eg_box = eg_box.get(i).bid.get(j).g_e_box;
1706
1707 if (g_eg_box.isValid() == false)
1708 continue;
1709
1710 g_eg_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1711
1712 // Pack a size_t for the internal ghost id
1714
1715 // Create a sub grid iterator spanning the external ghost layer
1716 auto sub_it = loc_grid.get(sub_id).getIterator(g_eg_box.getKP1(),g_eg_box.getKP2());
1717
1718 // and pack the internal ghost grid
1719 Packer<device_grid,HeapMemory>::template packRequest<decltype(sub_it),prp...>(loc_grid.get(sub_id),sub_it,req);
1720 }
1721 }
1722
1723 // resize the property buffer memory
1724 g_send_prp_mem.resize(req);
1725
1726 // Create an object of preallocated memory for properties
1727 ExtPreAlloc<Memory> & prAlloc_prp = *(new ExtPreAlloc<Memory>(req,g_send_prp_mem));
1728
1729 prAlloc_prp.incRef();
1730
1731 // Pack information
1732 Pack_stat sts;
1733
1734 // Pack the information for each processor and send it
1735 for ( size_t i = 0 ; i < eg_box.size() ; i++ )
1736 {
1737
1738 sts.mark();
1739 void * pointer = prAlloc_prp.getPointerEnd();
1740
1741 // for each ghost box
1742 for (size_t j = 0 ; j < eg_box.get(i).bid.size() ; j++)
1743 {
1744 // we pack only if it is valid
1745 if (eg_box.get(i).bid.get(j).g_e_box.isValid() == false)
1746 continue;
1747
1748 // And linked sub-domain
1749 size_t sub_id = eg_box.get(i).bid.get(j).sub;
1750 // Internal ghost box
1751 Box<dim,size_t> g_eg_box = eg_box.get(i).bid.get(j).g_e_box;
1752 g_eg_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1753 // Ghost box global id
1754 size_t g_id = eg_box.get(i).bid.get(j).g_id;
1755
1756 // Pack a size_t for the internal ghost id
1757 Packer<size_t,HeapMemory>::pack(prAlloc_prp,g_id,sts);
1758 // Create a sub grid iterator spanning the external ghost layer
1759 auto sub_it = loc_grid.get(sub_id).getIterator(g_eg_box.getKP1(),g_eg_box.getKP2());
1760 // and pack the internal ghost grid
1761 Packer<device_grid,HeapMemory>::template pack<decltype(sub_it),prp...>(prAlloc_prp,loc_grid.get(sub_id),sub_it,sts);
1762 }
1763 // send the request
1764
1765 void * pointer2 = prAlloc_prp.getPointerEnd();
1766
1767 // This function send (or queue for sending) the information
1768 send_or_queue(ig_box.get(i).prc,(char *)pointer,(char *)pointer2);
1769 }
1770
1771 // Calculate the total information to receive from each processors
1772 std::vector<size_t> prp_recv;
1773
1774 // Create an object of preallocated memory for properties
1776 prRecv_prp.incRef();
1777
1778 queue_recv_data_put<prp_object>(ig_box,prp_recv,prRecv_prp);
1779
1780 // Before wait for the communication to complete we sync the local ghost
1781 // in order to overlap with communication
1782
1783 ghost_put_local<op,prp...>(loc_ig_box,loc_eg_box,gdb_ext,loc_grid,g_id_to_internal_ghost_box);
1784
1785 merge_received_data_put<op,prp ...>(dec,loc_grid,ig_box,prp_recv,prRecv_prp,gdb_ext,g_id_to_internal_ghost_box);
1786
1787 prRecv_prp.decRef();
1788 prAlloc_prp.decRef();
1789 delete &prAlloc_prp;
1790 delete &prRecv_prp;
1791 }
1792
1798 :v_cl(create_vcluster<Memory>())
1799 {
1800
1801 }
1802
1810 :v_cl(gc.v_cl)
1811 {
1812
1813 }
1814};
1815
1816
1817#endif /* SRC_GRID_GRID_DIST_ID_COMM_HPP_ */
It override the behavior if size()
This class represent an N-dimensional box.
Definition Box.hpp:61
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
Definition Box.hpp:556
__device__ __host__ bool Intersect(const Box< dim, T > &b, Box< dim, T > &b_out) const
Intersect.
Definition Box.hpp:95
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Definition Box.hpp:567
__host__ __device__ bool isInside(const Point< dim, T > &p) const
Check if the point is inside the box.
Definition Box.hpp:1004
bool isValid() const
Check if the Box is a valid box P2 >= P1.
Definition Box.hpp:1180
T getVolumeKey() const
Get the volume spanned by the Box P1 and P2 interpreted as grid key.
Definition Box.hpp:1351
__device__ __host__ void setHigh(int i, T val)
set the high interval of the box
Definition Box.hpp:544
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
Definition Box.hpp:669
__device__ __host__ void setLow(int i, T val)
set the low interval of the box
Definition Box.hpp:533
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
Definition Box.hpp:656
T getVolume() const
Get the volume of the box.
Definition Box.hpp:1336
This class define the domain decomposition interface.
virtual void decRef()
Decrement the reference counter.
size_t getOffsetEnd()
Get offset.
size_t getOffset()
Get offset.
void * getPointerEnd()
Return the end pointer of the previous allocated memory.
virtual void incRef()
Increment the reference counter.
virtual void * getPointer()
Return the pointer of the last allocation.
virtual void deviceToHost()
Do nothing.
void * getDevicePointerEnd()
Return the device end pointer of the previous allocated memory.
static size_t calculateMem(std::vector< size_t > &mm)
Calculate the total memory required to pack the message.
void reset()
Reset the internal counters.
virtual void hostToDevice()
Return the pointer of the last allocation.
void setMemory(size_t size, Mem &mem)
Set the internal memory if you did not do it in the constructor.
virtual bool allocate(size_t sz)
Allocate a chunk of memory.
virtual size_t size() const
Get the size of the LAST allocated memory.
Packing status object.
Definition Pack_stat.hpp:61
void mark()
Mark.
Definition Pack_stat.hpp:99
Packing class.
Definition Packer.hpp:50
static void pack(ExtPreAlloc< Mem >, const T &obj)
Error, no implementation.
Definition Packer.hpp:56
static size_t packRequest(const T &obj, size_t &req)
Error, no implementation.
Definition Packer.hpp:66
This class implement the point shape in an N-dimensional space.
Definition Point.hpp:28
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition Point.hpp:172
This class represent an N-dimensional box.
Definition SpaceBox.hpp:27
Unpacking status object.
Definition Pack_stat.hpp:16
size_t getOffset()
Return the actual counter.
Definition Pack_stat.hpp:41
void addOffset(size_t off)
Increment the offset pointer by off.
Definition Pack_stat.hpp:31
Unpacker class.
Definition Unpacker.hpp:34
static void unpack(ExtPreAlloc< Mem >, T &obj)
Error, no implementation.
Definition Unpacker.hpp:40
Implementation of VCluster class.
Definition VCluster.hpp:59
This class is an helper for the communication of grid_dist_id.
openfpm::vector< openfpm::vector< aggregate< device_grid, SpaceBox< dim, long int > > > > m_oGrid
void ghost_put_(Decomposition &dec, const openfpm::vector< ip_box_grid< dim > > &ig_box, const openfpm::vector< ep_box_grid< dim > > &eg_box, const openfpm::vector< i_lbox_grid< dim > > &loc_ig_box, const openfpm::vector< e_lbox_grid< dim > > &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext, openfpm::vector< device_grid > &loc_grid, openfpm::vector< std::unordered_map< size_t, size_t > > &g_id_to_internal_ghost_box)
It merge the information in the ghost with the real information.
void unpack_buffer_to_local_grid(openfpm::vector< device_grid > &loc_grid, openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext, ExtPreAlloc< Memory > &send_buffer, size_t sz)
Unpack.
openfpm::vector< size_t > send_prc_queue
List of processor to send to.
openfpm::vector< size_t > send_size
size to send
grid_dist_id_comm()
Constructor.
openfpm::vector< void * > pointers
send pointers
void map_(Decomposition &dec, CellDecomposer_sm< dim, St, shift< dim, St > > &cd_sm, openfpm::vector< device_grid > &loc_grid, openfpm::vector< device_grid > &loc_grid_old, openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext, openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext_old, openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext_global, size_t opt)
Moves all the grids that does not belong to the local processor to the respective processor.
Vcluster< Memory > & v_cl
VCluster.
void queue_recv_data_get(const openfpm::vector< ep_box_grid< dim > > &eg_box, std::vector< size_t > &prp_recv, ExtPreAlloc< Memory > &prRecv_prp)
void queue_recv_data_put(const openfpm::vector< ip_box_grid< dim > > &ig_box, std::vector< size_t > &prp_recv, ExtPreAlloc< Memory > &prRecv_prp)
openfpm::vector_gpu< aggregate< void *, void *, int > > pointers_h
header unpacker info
void ghost_put_local(const openfpm::vector< i_lbox_grid< dim > > &loc_ig_box, const openfpm::vector< e_lbox_grid< dim > > &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext, openfpm::vector< device_grid > &loc_grid, openfpm::vector< std::unordered_map< size_t, size_t > > &g_id_to_external_ghost_box)
Sync the local ghost part.
void ghost_get_(const openfpm::vector< ip_box_grid< dim > > &ig_box, const openfpm::vector< ep_box_grid< dim > > &eg_box, const openfpm::vector< i_lbox_grid< dim > > &loc_ig_box, const openfpm::vector< e_lbox_grid< dim > > &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext, const openfpm::vector< e_box_multi< dim > > &eb_gid_list, bool use_bx_def, openfpm::vector< device_grid > &loc_grid, const grid_sm< dim, void > &ginfo, std::unordered_map< size_t, size_t > &g_id_to_external_ghost_box, size_t opt)
It fill the ghost part of the grids.
Memory g_recv_prp_mem
Memory for the ghost receiving buffer.
openfpm::vector_fr< BMemory< Memory > > recv_buffers
receiving buffers in case of dynamic
openfpm::vector< size_t > p_map_req
Maps the processor id with the communication request into map procedure.
Memory g_send_prp_mem
Memory for the ghost sending buffer.
size_t opt
Receiving option.
grid_dist_id_comm(const grid_dist_id_comm< dim, St, T, Decomposition, Memory, device_grid > &gc)
Copy constructor.
void grids_reconstruct(openfpm::vector< openfpm::vector< aggregate< device_grid, SpaceBox< dim, long int > > > > &m_oGrid_recv, openfpm::vector< device_grid > &loc_grid, openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext, CellDecomposer_sm< dim, St, shift< dim, St > > &cd_sm)
Reconstruct the local grids.
void ghost_get_local(const openfpm::vector< i_lbox_grid< dim > > &loc_ig_box, const openfpm::vector< e_lbox_grid< dim > > &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext, openfpm::vector< device_grid > &loc_grid, std::unordered_map< size_t, size_t > &g_id_to_external_ghost_box, const grid_sm< dim, void > &ginfo, bool use_bx_def, size_t opt)
Sync the local ghost part.
openfpm::vector< size_t > recv_sz_map
Stores the size of the elements added for each processor that communicate with us (local processor)
openfpm::vector< size_t > prc_recv_map
Stores the list of processors that communicate with us (local processor)
openfpm::vector< void * > send_pointer
Pointer to the memory to send.
openfpm::vector< rp_id > recv_proc
receiving processors
void labelIntersectionGridsProcessor_and_pack(Decomposition &dec, CellDecomposer_sm< dim, St, shift< dim, St > > &cd_sm, openfpm::vector< device_grid > &loc_grid_old, openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext, openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext_old, openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext_global, size_t p_id_cur, lambda_t f)
Label intersection grids for mappings.
grid_key_dx is the key to access any element in the grid
Definition grid_key.hpp:19
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition grid_key.hpp:503
Declaration grid_sm.
Definition grid_sm.hpp:167
Implementation of 1-D std::vector like structure.
size_t size()
Stub size.
Class for cpu time benchmarking.
Definition timer.hpp:28
void stop()
Stop the timer.
Definition timer.hpp:119
void start()
Start the timer.
Definition timer.hpp:90
double getwct()
Return the elapsed real time.
Definition timer.hpp:130
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
This structure store the Box that define the domain inside the Ghost + domain box.
Definition GBoxes.hpp:40
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
For each external ghost id, it contain a set of sub-domain at which this external box is linked.
Per-processor external ghost box.
Per-processor external ghost box.
static void call_unpack(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &dg, Unpack_stat &ps)
Unpack.
static void call_unpack(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &gd, Unpack_stat &ps)
Unpack.
static void call_unpack(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &gd, Unpack_stat &ps)
Error i do not know how to unpack.
static void unpacking(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &dg, Unpack_stat &ps)
Unpack.
It return true if the object T require complex serialization.
local Internal ghost box
These set of classes generate an array definition at compile-time.
Definition ct_array.hpp:26
Per-processor Internal ghost box.
It create a boost::fusion vector with the selected properties.