8#ifndef SRC_GRID_GRID_DIST_ID_COMM_HPP_
9#define SRC_GRID_GRID_DIST_ID_COMM_HPP_
11#include "Vector/vector_dist_ofb.hpp"
12#include "Grid/copy_grid_fast.hpp"
13#include "grid_dist_util.hpp"
14#include "util/common_pdata.hpp"
15#include "lib/pdata.hpp"
16#include "Grid/grid_common.hpp"
23template<
bool result,
typename T,
typename device_gr
id,
typename Memory>
36 std::cerr << __FILE__ <<
":" << __LINE__ <<
" Error: complex properties on grids are not supported yet" << std::endl;
44template<
typename T,
typename device_gr
id,
typename Memory>
56 template<
template<
typename,
typename>
class op,
typename sub_it_type,
unsigned int ... prp>
62 gd.template unpack_with_op<op,Memory,prp ...>(recv_buf,sub2,ps);
71template<
typename device_gr
id,
typename Memory,
typename T>
79template<
typename device_grid,
typename Memory ,
int ... prp>
91 template<
template<
typename,
typename>
class op,
typename sub_it_type,
typename T>
108template<
template<
typename,
typename>
class op,
typename T,
typename device_grid,
typename Memory>
140template<
unsigned int dim,
typename St,
typename T,
typename Decomposition = CartDecomposition<dim,St>,
typename Memory=HeapMemory ,
typename device_gr
id=gr
id_cpu<dim,T> >
173 bool operator<(
const rp_id & tmp)
const
175 return p_id < tmp.p_id;
201 int n_headers_slot = 1;
222 std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
227 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
228 if (
opt & SKIP_LABELLING)
229 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
231 if (opt_ != rem_copy_opt::KEEP_GEOMETRY)
233 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
234 {loc_grid.get(i).copyRemoveReset();}
241 for (
size_t i = 0 ; i < loc_ig_box.size() ; i++)
244 for (
size_t j = 0 ; j < loc_ig_box.get(i).bid.size() ; j++)
246 size_t sub_id_src_gdb_ext = loc_ig_box.get(i).bid.get(j).sub_gdb_ext;
249 size_t sub_id_dst = loc_ig_box.get(i).bid.get(j).sub;
252 for (
size_t v = 0 ; v < loc_ig_box.get(i).bid.get(j).k.size() ; v++)
254 size_t k = loc_ig_box.get(i).bid.get(j).k.get(v);
259 size_t sub_id_dst_gdb_ext = loc_eg_box.get(sub_id_dst).bid.get(k).sub_gdb_ext;
260 bx_dst -= gdb_ext.get(sub_id_dst_gdb_ext).origin;
267 Box<dim,long int> bx_src = flip_box(loc_eg_box.get(sub_id_dst).bid.get(k).ebox,loc_eg_box.get(sub_id_dst).bid.get(k).cmb,ginfo);
268 bx_src -= gdb_ext.get(sub_id_src_gdb_ext).origin;
272 if (use_bx_def ==
false)
274 if (loc_eg_box.get(sub_id_dst).bid.get(k).sub != i)
275 {std::cerr <<
"Error " << __FILE__ <<
":" << __LINE__ <<
" source and destination are not correctly linked" <<
"\n";}
279 {std::cerr <<
"Error " << __FILE__ <<
":" << __LINE__ <<
" source and destination does not match in size" <<
"\n";}
283 auto & gd = loc_grid.get(sub_id_dst_gdb_ext);
286 gd.copy_to(loc_grid.get(sub_id_src_gdb_ext),bx_src,bx_dst);
291 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
293 loc_grid.get(i).template removeCopyToFinalize<prp ...>(
v_cl.getgpuContext(), rem_copy_opt::PHASE1 | opt_);
296 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
298 loc_grid.get(i).template removeCopyToFinalize<prp ...>(
v_cl.getgpuContext(), rem_copy_opt::PHASE2 | opt_);
301 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
303 loc_grid.get(i).template removeCopyToFinalize<prp ...>(
v_cl.getgpuContext(), rem_copy_opt::PHASE3 | opt_);
322 openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_external_ghost_box)
325 for (
size_t i = 0 ; i < loc_eg_box.size() ; i++)
328 for (
size_t j = 0 ; j < loc_eg_box.get(i).bid.size() ; j++)
330 if (loc_eg_box.get(i).bid.get(j).initialized ==
false)
335 bx_src -= gdb_ext.get(i).origin;
338 size_t sub_id_dst = loc_eg_box.get(i).bid.get(j).sub;
341 size_t k = loc_eg_box.get(i).bid.get(j).k;
346 bx_dst -= gdb_ext.get(sub_id_dst).origin;
355 if (loc_ig_box.get(sub_id_dst).bid.get(k).sub != i)
356 std::cerr <<
"Error " << __FILE__ <<
":" << __LINE__ <<
" source and destination are not correctly linked" <<
"\n";
359 {std::cerr <<
"Error " << __FILE__ <<
":" << __LINE__ <<
" source and destination does not match in size" <<
"\n";}
363 auto & gd2 = loc_grid.get(sub_id_dst);
364 gd2.template copy_to_op<op,prp...>(loc_grid.get(i),bx_src,bx_dst);
378 void send_or_queue(
size_t prc,
char * pointer,
char * pointer2)
380 if (device_grid::isCompressed() ==
false)
381 {
v_cl.send(prc,0,pointer,(
char *)pointer2 - (
char *)pointer);}
390 static void * receive_dynamic(
size_t msg_i ,
size_t total_msg,
size_t total_p,
size_t i,
size_t ri,
size_t tag,
void * ptr)
402 if (gd->
opt & RUN_ON_DEVICE)
418 template <
typename prp_
object>
420 std::vector<size_t> & prp_recv,
424 cudaDeviceSynchronize();
427 if (device_grid::isCompressed() ==
false)
430 for (
size_t i = 0 ; i < eg_box.size() ; i++ )
432 prp_recv.push_back(eg_box.get(i).recv_pnt *
sizeof(prp_object) +
sizeof(
size_t)*eg_box.get(i).n_r_box);
441 for (
size_t i = 0 ; i < eg_box.size() ; i++ )
444 v_cl.recv(eg_box.get(i).prc,0,prRecv_prp.
getPointer(),prp_recv[i]);
455 receive_dynamic,
this);
461 receive_dynamic,
this);
468 openfpm::vector_fr<BMemory<Memory>> tmp;
471 for (
int i = 0 ; i <
recv_proc.size() ; i++)
488 template <
typename prp_
object>
490 std::vector<size_t> & prp_recv,
493 if (device_grid::isCompressed() ==
false)
496 for (
size_t i = 0 ; i < ig_box.size() ; i++ )
498 prp_recv.push_back(0);
501 for (
size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
505 prp_recv[prp_recv.size()-1] += g_ig_box.
getVolumeKey() *
sizeof(prp_object) +
sizeof(
size_t);
517 for (
size_t i = 0 ; i < ig_box.size() ; i++ )
520 v_cl.recv(ig_box.get(i).prc,0,prRecv_prp.
getPointer(),prp_recv[i]);
533 receive_dynamic,
this);
539 receive_dynamic,
this);
544 template<
typename mem,
unsigned ... prp>
549 const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
558 if (
opt & RUN_ON_DEVICE)
564 auto key = g_id_to_external_ghost_box.find(g_id);
566 if (key != g_id_to_external_ghost_box.end())
567 {l_id = key->second;}
576 std::cerr <<
"Error: " << __FILE__ <<
":" << __LINE__ <<
" Critical, cannot unpack object, because received data cannot be interpreted\n";
585 size_t le_id = eb_gid_list.get(l_id).full_match;
586 size_t ei = eb_gid_list.get(l_id).e_id;
590 size_t sub_id = eg_box.get(ei).bid.get(le_id).sub;
593 auto sub2 = loc_grid.get(sub_id).getIterator(box.
getKP1(),box.
getKP2(),
false);
595 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
596 if (
opt & SKIP_LABELLING)
597 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
600 loc_grid.get(sub_id).remove(box);
604 for (
long int j = 0 ; j < (
long int)eb_gid_list.get(l_id).eb_list.size() ; j++)
606 size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
610 size_t n_sub_id = eg_box.get(ei).bid.get(nle_id).sub;
615 loc_grid.get(n_sub_id).remove(box);
616 loc_grid.get(n_sub_id).copy_to(loc_grid.get(sub_id),rbox,box);
621 template<
typename mem,
typename header_type,
unsigned ... prp>
624 header_type & headers,
627 const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
636 g_id = headers.template get<0>(i);
641 auto key = g_id_to_external_ghost_box.find(g_id);
643 if (key != g_id_to_external_ghost_box.end())
644 {l_id = key->second;}
653 std::cerr <<
"Error: " << __FILE__ <<
":" << __LINE__ <<
" Critical, cannot unpack object, because received data cannot be interpreted\n";
662 size_t le_id = eb_gid_list.get(l_id).full_match;
663 size_t ei = eb_gid_list.get(l_id).e_id;
667 size_t sub_id = eg_box.get(ei).bid.get(le_id).sub;
670 auto sub2 = loc_grid.get(sub_id).getIterator(box.
getKP1(),box.
getKP2(),
false);
672 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
673 if (
opt & SKIP_LABELLING)
674 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
677 loc_grid.get(sub_id).remove(box);
681 loc_grid.get(sub_id),
685 v_cl.getgpuContext(),
689 for (
long int j = 0 ; j < (
long int)eb_gid_list.get(l_id).eb_list.size() ; j++)
691 size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
695 size_t n_sub_id = eg_box.get(ei).bid.get(nle_id).sub;
700 loc_grid.get(n_sub_id).remove(box);
701 loc_grid.get(n_sub_id).copy_to(loc_grid.get(sub_id),rbox,box);
706 template<
unsigned int ... prp>
707 void fill_headers(
size_t opt)
709 if ((
opt & KEEP_PROPERTIES) == 0 && device_grid::is_unpack_header_supported())
714 result.allocate(
sizeof(
int));
731 *(
int *)result.getPointer() = 0;
732 result.hostToDevice();
734 device_grid::template unpack_headers<
decltype(
pointers_h),
decltype(headers),
decltype(result),prp ...>(
pointers_h,headers,result,n_headers_slot);
735 result.deviceToHost();
737 if (*(
int *)result.getPointer() == 0) {
break;}
744 headers.template deviceToHost<0,1,2>();
748 template<
unsigned ... prp>
751 const std::vector<size_t> & prp_recv,
753 const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
757 if (device_grid::isCompressed() ==
false)
765 for (
size_t i = 0 ; i < eg_box.size() ; i++ )
770 while (ps.
getOffset() - mark_here < prp_recv[i])
775 unpack_data_to_ext_ghost<Memory,prp ...>(prRecv_prp,loc_grid,i,
776 eg_box,g_id_to_external_ghost_box,eb_gid_list,
783 fill_headers<prp ...>(
opt);
785 if (headers.size() != 0)
802 unpack_data_to_ext_ghost_with_header<BMemory<Memory>,
decltype(headers),prp ...>(mem,loc_grid,headers,i*n_headers_slot+j,
803 eg_box,g_id_to_external_ghost_box,eb_gid_list,
825 unpack_data_to_ext_ghost<BMemory<Memory>,prp ...>(mem,loc_grid,i,
826 eg_box,g_id_to_external_ghost_box,eb_gid_list,
835 template<
template<
typename,
typename>
class op,
unsigned ... prp>
838 const std::vector<size_t> & prp_recv,
841 const openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_internal_ghost_box)
843 typedef object<
typename object_creator<
typename T::type,prp...>::type> prp_object;
845 if (device_grid::isCompressed() ==
false)
852 for (
size_t i = 0 ; i < ig_box.size() ; i++ )
855 for (
size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
864 auto key = g_id_to_internal_ghost_box.get(i).find(g_id);
865 if (key != g_id_to_internal_ghost_box.get(i).end())
866 {l_id = key->second;}
875 std::cerr <<
"Error: " << __FILE__ <<
":" << __LINE__ <<
" Critical, cannot unpack object, because received data cannot be interpreted\n";
882 size_t sub_id = ig_box.get(i).bid.get(l_id).sub;
883 box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
886 auto sub2 = loc_grid.get(sub_id).getIterator(box.
getKP1(),box.
getKP2());
911 size_t pid = dec.ProctoID(
recv_proc.get(i).p_id);
915 auto key = g_id_to_internal_ghost_box.get(pid).find(g_id);
916 if (key != g_id_to_internal_ghost_box.get(pid).end())
917 {l_id = key->second;}
926 std::cerr <<
"Error: " << __FILE__ <<
":" << __LINE__ <<
" Critical, cannot unpack object, because received data cannot be interpreted\n";
933 size_t sub_id = ig_box.get(pid).bid.get(l_id).sub;
934 box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
937 auto sub2 = loc_grid.get(sub_id).getIterator(box.
getKP1(),box.
getKP2());
947 for (
size_t n = 0; n < dim; n++)
950 for (
size_t j = 0; j < gdb_ext.size(); j++)
954 sub += gdb_ext.get(j).origin;
980 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
981 {loc_grid.get(i).clear();}
983 for (
size_t a = 0; a < m_oGrid_recv.size(); a++)
985 for (
size_t k = 0; k < m_oGrid_recv.get(a).size(); k++)
987 device_grid & g = m_oGrid_recv.get(a).template get<0>(k);
992 for (
size_t n = 0; n < dim; n++)
993 {p.
get(n) = g.getGrid().getBox().getHigh(n);}
996 for (
size_t n = 0; n < dim; n++)
997 {point.
get(n) = (b.getHigh(n) + b.getLow(n))/2;}
999 for (
size_t j = 0; j < gdb_ext.size(); j++)
1003 sub += gdb_ext.get(j).origin;
1015 for(
size_t i = 0 ; i < dim ; i++)
1023 loc_grid.get(j).copy_to(g,box_src,box_dst);
1029 std::cout <<
"UNPACKING " << std::endl;
1031 for (
size_t i = 0 ; i < m_oGrid_recv.size() ; i++)
1033 for (
size_t j = 0 ; j < m_oGrid_recv.get(i).size() ; j++)
1035 m_oGrid_recv.get(i).template get<0>(j).template deviceToHost<0>();
1036 std::cout <<
"UNPACKING POINTS: " << m_oGrid_recv.get(i).template get<0>(j).size() << std::endl;
1037 m_oGrid_recv.get(i).template get<0>(j).template removeCopyToFinalize<0>(
v_cl.getgpuContext(), rem_copy_opt::PHASE1);
1041 for (
size_t i = 0 ; i < m_oGrid_recv.size() ; i++)
1043 for (
size_t j = 0 ; j < m_oGrid_recv.get(i).size() ; j++)
1044 {m_oGrid_recv.get(i).template get<0>(j).template removeCopyToFinalize<0>(
v_cl.getgpuContext(), rem_copy_opt::PHASE2);}
1047 for (
size_t i = 0 ; i < m_oGrid_recv.size() ; i++)
1049 for (
size_t j = 0 ; j < m_oGrid_recv.get(i).size() ; j++)
1050 {m_oGrid_recv.get(i).template get<0>(j).template removeCopyToFinalize<0>(
v_cl.getgpuContext(), rem_copy_opt::PHASE3);}
1066 template<
typename lambda_t>
1129 for (
size_t i = 0; i < gdb_ext_old.size(); i++)
1133 sub_dom += gdb_ext_old.get(i).origin;
1135 for (
size_t j = 0; j < gdb_ext_global.size(); j++)
1144 sub_dom_new += gdb_ext_global.get(j).origin;
1146 bool intersect =
false;
1149 intersect = sub_dom.
Intersect(sub_dom_new, inte_box);
1151 if (intersect ==
true)
1153 auto inte_box_cont = cd_sm.convertCellUnitsIntoDomainSpace(inte_box);
1157 for (
size_t n = 0; n < dim; n++)
1158 p.
get(n) = (inte_box_cont.getHigh(n) + inte_box_cont.getLow(n))/2;
1160 p_id = dec.processorID(p);
1161 if (p_id != p_id_cur)
1166 auto inte_box_local = inte_box;
1168 inte_box_local -= gdb_ext_old.get(i).origin;
1175 for (
size_t l = 0; l < dim; l++)
1177 sz[l] = inte_box_local.getHigh(l) - inte_box_local.getLow(l) + 1;
1196 for(
size_t i = 0 ; i < dim ; i++)
1204 f(box_src,box_dst,gr,p_id);
1230 template<
int ... prp>
1241 send_buffer.
reset();
1247 int s = find_local_sub(box_dst,gdb_ext);
1249 {std::cout << __FILE__ <<
":" << __LINE__ <<
" map, error non-local subdomain " << std::endl;}
1252 for (
int d = 0 ; d < dim ; d++ )
1254 box_dst.
setLow(d, box_dst.
getLow(d) - gdb_ext.get(s).origin.get(d));
1255 box_dst.
setHigh(d, box_dst.
getHigh(d) - gdb_ext.get(s).origin.get(d));
1258 loc_grid.get(s).remove(box_dst);
1259 auto sub2 = loc_grid.get(s).getIterator(box_dst.
getKP1(),box_dst.
getKP2(),0);
1263 for (
int s = 0 ; s < loc_grid.
size() ; s++)
1264 {loc_grid.get(s).template removeAddUnpackFinalize<prp ...>(
v_cl.getgpuContext(),0);}
1280 template<
int ... prp>
1295 send_buffers_.resize(
v_cl.getProcessingUnits());
1296 send_buffers.resize(
v_cl.getProcessingUnits());
1302 for (
int p_id = 0 ; p_id <
v_cl.getProcessingUnits() ; p_id++)
1304 for (
int i = 0 ; i < loc_grid_old.
size() ; i++)
1305 {loc_grid_old.get(i).packReset();}
1316 auto sub_it = gr.getIterator(box_src.
getKP1(),box_src.
getKP2(),0);
1317 gr.template packRequest<prp ...>(sub_it,send_buffer_sizes.get(p_id));
1325 for (
int i = 0 ; i < loc_grid_old.
size(); i++)
1327 loc_grid_old.get(i).template packCalculate<prp ...>(send_buffer_sizes.get(p_id),
v_cl.getgpuContext());
1330 send_buffers_.get(p_id).resize(send_buffer_sizes.get(p_id));
1331 send_buffers.get(p_id).setMemory(send_buffer_sizes.get(p_id),send_buffers_.get(p_id));
1332 send_buffers.get(p_id).incRef();
1342 size_t offset = send_buffers.get(p_id).getOffsetEnd();
1344 size_t offset2 = send_buffers.get(p_id).getOffsetEnd();
1346 send_buffers.get(p_id).hostToDevice(offset,offset2);
1348 auto sub_it = gr.getIterator(box_src.
getKP1(),box_src.
getKP2(),0);
1356 for (
int i = 0 ; i < loc_grid_old.
size() ; i++)
1358 loc_grid_old.get(i).template packFinalize<prp ...>(send_buffers.get(p_id),sts,0,
false);
1375 for (
int i = 0 ; i < send_buffers.
size() ; i++)
1377 if (i !=
v_cl.rank())
1379 send_pointer.add(send_buffers_.get(i).getDevicePointer());
1385 size_t * send_size_ptr = NULL;
1386 size_t * send_prc_queue_ptr = NULL;
1387 void ** send_pointer_ptr = NULL;
1400 send_prc_queue_ptr,send_pointer_ptr,
1401 receive_dynamic,
this);
1411 for (
int i = 0 ; i < send_buffers.
size() ; i++)
1412 {send_buffers.get(i).decRef();}
1435 std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
1438#ifdef PROFILE_SCOREP
1439 SCOREP_USER_REGION(
"ghost_get",SCOREP_USER_REGION_TYPE_FUNCTION)
1443 typedef object<
typename object_creator<
typename T::type,prp...>::type> prp_object;
1459 for (
int i = 0 ; i < loc_grid.
size() ; i++)
1460 {
opt &= (loc_grid.get(i).isSkipLabellingPossible())?(
int)-1:~SKIP_LABELLING;}
1462 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1464 packing_time.
start();
1467 if (!(
opt & SKIP_LABELLING))
1471 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
1472 {loc_grid.get(i).packReset();}
1475 for (
size_t i = 0 ; i < ig_box.size() ; i++ )
1478 for (
size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
1481 size_t sub_id = ig_box.get(i).bid.get(j).sub;
1485 if (g_ig_box.
isValid() ==
false)
1488 g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1493 auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.
getKP1(),g_ig_box.
getKP2(),
false);
1501 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
1502 {loc_grid.get(i).template packCalculate<prp ...>(req,
v_cl.getgpuContext());}
1518 for (
size_t i = 0 ; i < ig_box.size() ; i++ )
1525 if (
opt & RUN_ON_DEVICE)
1531 for (
size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
1534 if (ig_box.get(i).bid.get(j).box.isValid() ==
false)
1538 size_t sub_id = ig_box.get(i).bid.get(j).sub;
1541 g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1543 size_t g_id = ig_box.get(i).bid.get(j).g_id;
1549 auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.
getKP1(),g_ig_box.
getKP2(),
false);
1557 if (
opt & RUN_ON_DEVICE)
1563 pointers2.add(pointer2);
1566 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
1568 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1569 if (
opt & SKIP_LABELLING)
1570 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1572 loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts,opt_,
true);
1576 delete &prAlloc_prp;
1586 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
1588 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1589 if (
opt & SKIP_LABELLING)
1590 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1592 loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts,opt_,
true);
1596 delete &prAlloc_prp;
1599 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1600 packing_time.
stop();
1601 tot_pack += packing_time.
getwct();
1602 timer sendrecv_time;
1603 sendrecv_time.
start();
1606 for (
size_t i = 0 ; i < ig_box.size() ; i++ )
1609 send_or_queue(ig_box.get(i).prc,(
char *)
pointers.get(i),(
char *)pointers2.get(i));
1613 std::vector<size_t> prp_recv;
1622 queue_recv_data_get<prp_object>(eg_box,prp_recv,prRecv_prp);
1624 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1625 sendrecv_time.
stop();
1626 tot_sendrecv += sendrecv_time.
getwct();
1627 timer merge_loc_time;
1628 merge_loc_time.
start();
1631 ghost_get_local<prp...>(loc_ig_box,loc_eg_box,gdb_ext,loc_grid,g_id_to_external_ghost_box,ginfo,use_bx_def,
opt);
1633 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1634 merge_loc_time.
stop();
1635 tot_loc_merge += merge_loc_time.
getwct();
1640 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
1641 {loc_grid.get(i).removeAddUnpackReset();}
1643 merge_received_data_get<prp ...>(loc_grid,eg_box,prp_recv,prRecv_prp,g_id_to_external_ghost_box,eb_gid_list,
opt);
1645 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1646 if (
opt & SKIP_LABELLING)
1647 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1649 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
1650 {loc_grid.get(i).template removeAddUnpackFinalize<prp ...>(
v_cl.getgpuContext(),opt_);}
1652 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1654 tot_merge += merge_time.
getwct();
1675 template<
template<
typename,
typename>
class op,
int... prp>
1683 openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_internal_ghost_box)
1686 typedef object<
typename object_creator<
typename T::type,prp...>::type> prp_object;
1697 for (
size_t i = 0 ; i < eg_box.size() ; i++ )
1700 for (
size_t j = 0 ; j < eg_box.get(i).bid.size() ; j++)
1703 size_t sub_id = eg_box.get(i).bid.get(j).sub;
1707 if (g_eg_box.
isValid() ==
false)
1710 g_eg_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1716 auto sub_it = loc_grid.get(sub_id).getIterator(g_eg_box.
getKP1(),g_eg_box.
getKP2());
1735 for (
size_t i = 0 ; i < eg_box.size() ; i++ )
1742 for (
size_t j = 0 ; j < eg_box.get(i).bid.size() ; j++)
1745 if (eg_box.get(i).bid.get(j).g_e_box.isValid() ==
false)
1749 size_t sub_id = eg_box.get(i).bid.get(j).sub;
1752 g_eg_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1754 size_t g_id = eg_box.get(i).bid.get(j).g_id;
1759 auto sub_it = loc_grid.get(sub_id).getIterator(g_eg_box.
getKP1(),g_eg_box.
getKP2());
1768 send_or_queue(ig_box.get(i).prc,(
char *)pointer,(
char *)pointer2);
1772 std::vector<size_t> prp_recv;
1778 queue_recv_data_put<prp_object>(ig_box,prp_recv,prRecv_prp);
1783 ghost_put_local<op,prp...>(loc_ig_box,loc_eg_box,gdb_ext,loc_grid,g_id_to_internal_ghost_box);
1785 merge_received_data_put<op,prp ...>(dec,loc_grid,ig_box,prp_recv,prRecv_prp,gdb_ext,g_id_to_internal_ghost_box);
1789 delete &prAlloc_prp;
1798 :
v_cl(create_vcluster<Memory>())
It override the behavior if size()
This class represent an N-dimensional box.
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
__device__ __host__ bool Intersect(const Box< dim, T > &b, Box< dim, T > &b_out) const
Intersect.
__device__ __host__ T getHigh(int i) const
get the high interval of the box
__host__ __device__ bool isInside(const Point< dim, T > &p) const
Check if the point is inside the box.
bool isValid() const
Check if the Box is a valid box P2 >= P1.
T getVolumeKey() const
Get the volume spanned by the Box P1 and P2 interpreted as grid key.
__device__ __host__ void setHigh(int i, T val)
set the high interval of the box
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
__device__ __host__ void setLow(int i, T val)
set the low interval of the box
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
T getVolume() const
Get the volume of the box.
This class define the domain decomposition interface.
virtual void decRef()
Decrement the reference counter.
size_t getOffsetEnd()
Get offset.
size_t getOffset()
Get offset.
void * getPointerEnd()
Return the end pointer of the previous allocated memory.
virtual void incRef()
Increment the reference counter.
virtual void * getPointer()
Return the pointer of the last allocation.
virtual void deviceToHost()
Do nothing.
void * getDevicePointerEnd()
Return the device end pointer of the previous allocated memory.
static size_t calculateMem(std::vector< size_t > &mm)
Calculate the total memory required to pack the message.
void reset()
Reset the internal counters.
virtual void hostToDevice()
Return the pointer of the last allocation.
void setMemory(size_t size, Mem &mem)
Set the internal memory if you did not do it in the constructor.
virtual bool allocate(size_t sz)
Allocate a chunk of memory.
virtual size_t size() const
Get the size of the LAST allocated memory.
static void pack(ExtPreAlloc< Mem >, const T &obj)
Error, no implementation.
static size_t packRequest(const T &obj, size_t &req)
Error, no implementation.
This class implement the point shape in an N-dimensional space.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
This class represent an N-dimensional box.
size_t getOffset()
Return the actual counter.
void addOffset(size_t off)
Increment the offset pointer by off.
static void unpack(ExtPreAlloc< Mem >, T &obj)
Error, no implementation.
Implementation of VCluster class.
This class is an helper for the communication of grid_dist_id.
openfpm::vector< openfpm::vector< aggregate< device_grid, SpaceBox< dim, long int > > > > m_oGrid
void ghost_put_(Decomposition &dec, const openfpm::vector< ip_box_grid< dim > > &ig_box, const openfpm::vector< ep_box_grid< dim > > &eg_box, const openfpm::vector< i_lbox_grid< dim > > &loc_ig_box, const openfpm::vector< e_lbox_grid< dim > > &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext, openfpm::vector< device_grid > &loc_grid, openfpm::vector< std::unordered_map< size_t, size_t > > &g_id_to_internal_ghost_box)
It merge the information in the ghost with the real information.
void unpack_buffer_to_local_grid(openfpm::vector< device_grid > &loc_grid, openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext, ExtPreAlloc< Memory > &send_buffer, size_t sz)
Unpack.
openfpm::vector< size_t > send_prc_queue
List of processor to send to.
openfpm::vector< size_t > send_size
size to send
grid_dist_id_comm()
Constructor.
openfpm::vector< void * > pointers
send pointers
void map_(Decomposition &dec, CellDecomposer_sm< dim, St, shift< dim, St > > &cd_sm, openfpm::vector< device_grid > &loc_grid, openfpm::vector< device_grid > &loc_grid_old, openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext, openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext_old, openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext_global, size_t opt)
Moves all the grids that does not belong to the local processor to the respective processor.
Vcluster< Memory > & v_cl
VCluster.
void queue_recv_data_get(const openfpm::vector< ep_box_grid< dim > > &eg_box, std::vector< size_t > &prp_recv, ExtPreAlloc< Memory > &prRecv_prp)
void queue_recv_data_put(const openfpm::vector< ip_box_grid< dim > > &ig_box, std::vector< size_t > &prp_recv, ExtPreAlloc< Memory > &prRecv_prp)
openfpm::vector_gpu< aggregate< void *, void *, int > > pointers_h
header unpacker info
void ghost_put_local(const openfpm::vector< i_lbox_grid< dim > > &loc_ig_box, const openfpm::vector< e_lbox_grid< dim > > &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext, openfpm::vector< device_grid > &loc_grid, openfpm::vector< std::unordered_map< size_t, size_t > > &g_id_to_external_ghost_box)
Sync the local ghost part.
void ghost_get_(const openfpm::vector< ip_box_grid< dim > > &ig_box, const openfpm::vector< ep_box_grid< dim > > &eg_box, const openfpm::vector< i_lbox_grid< dim > > &loc_ig_box, const openfpm::vector< e_lbox_grid< dim > > &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext, const openfpm::vector< e_box_multi< dim > > &eb_gid_list, bool use_bx_def, openfpm::vector< device_grid > &loc_grid, const grid_sm< dim, void > &ginfo, std::unordered_map< size_t, size_t > &g_id_to_external_ghost_box, size_t opt)
It fill the ghost part of the grids.
Memory g_recv_prp_mem
Memory for the ghost receiving buffer.
openfpm::vector_fr< BMemory< Memory > > recv_buffers
receiving buffers in case of dynamic
openfpm::vector< size_t > p_map_req
Maps the processor id with the communication request into map procedure.
Memory g_send_prp_mem
Memory for the ghost sending buffer.
size_t opt
Receiving option.
grid_dist_id_comm(const grid_dist_id_comm< dim, St, T, Decomposition, Memory, device_grid > &gc)
Copy constructor.
void grids_reconstruct(openfpm::vector< openfpm::vector< aggregate< device_grid, SpaceBox< dim, long int > > > > &m_oGrid_recv, openfpm::vector< device_grid > &loc_grid, openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext, CellDecomposer_sm< dim, St, shift< dim, St > > &cd_sm)
Reconstruct the local grids.
void ghost_get_local(const openfpm::vector< i_lbox_grid< dim > > &loc_ig_box, const openfpm::vector< e_lbox_grid< dim > > &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext, openfpm::vector< device_grid > &loc_grid, std::unordered_map< size_t, size_t > &g_id_to_external_ghost_box, const grid_sm< dim, void > &ginfo, bool use_bx_def, size_t opt)
Sync the local ghost part.
openfpm::vector< size_t > recv_sz_map
Stores the size of the elements added for each processor that communicate with us (local processor)
openfpm::vector< size_t > prc_recv_map
Stores the list of processors that communicate with us (local processor)
openfpm::vector< void * > send_pointer
Pointer to the memory to send.
openfpm::vector< rp_id > recv_proc
receiving processors
void labelIntersectionGridsProcessor_and_pack(Decomposition &dec, CellDecomposer_sm< dim, St, shift< dim, St > > &cd_sm, openfpm::vector< device_grid > &loc_grid_old, openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext, openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext_old, openfpm::vector< GBoxes< device_grid::dims > > &gdb_ext_global, size_t p_id_cur, lambda_t f)
Label intersection grids for mappings.
grid_key_dx is the key to access any element in the grid
__device__ __host__ index_type get(index_type i) const
Get the i index.
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
This structure store the Box that define the domain inside the Ghost + domain box.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
For each external ghost id, it contain a set of sub-domain at which this external box is linked.
Per-processor external ghost box.
Per-processor external ghost box.
static void call_unpack(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &dg, Unpack_stat &ps)
Unpack.
static void call_unpack(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &gd, Unpack_stat &ps)
Unpack.
static void call_unpack(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &gd, Unpack_stat &ps)
Error i do not know how to unpack.
static void unpacking(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &dg, Unpack_stat &ps)
Unpack.
It return true if the object T require complex serialization.
These set of classes generate an array definition at compile-time.
Per-processor Internal ghost box.
It create a boost::fusion vector with the selected properties.