8 #ifndef SRC_GRID_GRID_DIST_ID_COMM_HPP_
9 #define SRC_GRID_GRID_DIST_ID_COMM_HPP_
11 #include "Vector/vector_dist_ofb.hpp"
12 #include "Grid/copy_grid_fast.hpp"
13 #include "grid_dist_util.hpp"
14 #include "util/common_pdata.hpp"
15 #include "lib/pdata.hpp"
16 #include "Grid/grid_common.hpp"
23 template<
bool result,
typename T,
typename device_gr
id,
typename Memory>
36 std::cerr << __FILE__ <<
":" << __LINE__ <<
" Error: complex properties on grids are not supported yet" << std::endl;
44 template<
typename T,
typename device_gr
id,
typename Memory>
56 template<
template<
typename,
typename>
class op,
typename sub_it_type,
unsigned int ... prp>
62 gd.template unpack_with_op<op,Memory,prp ...>(recv_buf,sub2,ps);
71 template<
typename device_gr
id,
typename Memory,
typename T>
79 template<
typename device_grid,
typename Memory ,
int ... prp>
91 template<
template<
typename,
typename>
class op,
typename sub_it_type,
typename T>
108 template<
template<
typename,
typename>
class op,
typename T,
typename device_grid,
typename Memory>
140 template<
unsigned int dim,
typename St,
typename T,
typename Decomposition = CartDecomposition<dim,St>,
typename Memory=HeapMemory ,
typename device_gr
id=gr
id_cpu<dim,T> >
173 bool operator<(
const rp_id & tmp)
const
175 return p_id < tmp.p_id;
201 int n_headers_slot = 1;
222 std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
227 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
228 if (
opt & SKIP_LABELLING)
229 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
231 if (opt_ != rem_copy_opt::KEEP_GEOMETRY)
233 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
234 {loc_grid.get(i).copyRemoveReset();}
241 for (
size_t i = 0 ; i < loc_ig_box.size() ; i++)
244 for (
size_t j = 0 ; j < loc_ig_box.get(i).bid.size() ; j++)
246 size_t sub_id_src_gdb_ext = loc_ig_box.get(i).bid.get(j).sub_gdb_ext;
249 size_t sub_id_dst = loc_ig_box.get(i).bid.get(j).sub;
252 for (
size_t v = 0 ; v < loc_ig_box.get(i).bid.get(j).k.size() ; v++)
254 size_t k = loc_ig_box.get(i).bid.get(j).k.get(v);
259 size_t sub_id_dst_gdb_ext = loc_eg_box.get(sub_id_dst).bid.get(k).sub_gdb_ext;
260 bx_dst -= gdb_ext.get(sub_id_dst_gdb_ext).origin;
267 Box<dim,long int> bx_src = flip_box(loc_eg_box.get(sub_id_dst).bid.get(k).ebox,loc_eg_box.get(sub_id_dst).bid.get(k).cmb,ginfo);
268 bx_src -= gdb_ext.get(sub_id_src_gdb_ext).origin;
272 if (use_bx_def ==
false)
274 if (loc_eg_box.get(sub_id_dst).bid.get(k).sub != i)
275 {std::cerr <<
"Error " << __FILE__ <<
":" << __LINE__ <<
" source and destination are not correctly linked" <<
"\n";}
279 {std::cerr <<
"Error " << __FILE__ <<
":" << __LINE__ <<
" source and destination does not match in size" <<
"\n";}
283 auto & gd = loc_grid.get(sub_id_dst_gdb_ext);
286 gd.copy_to(loc_grid.get(sub_id_src_gdb_ext),bx_src,bx_dst);
291 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
293 loc_grid.get(i).template removeCopyToFinalize<prp ...>(
v_cl.getGpuContext(), rem_copy_opt::PHASE1 | opt_);
296 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
298 loc_grid.get(i).template removeCopyToFinalize<prp ...>(
v_cl.getGpuContext(), rem_copy_opt::PHASE2 | opt_);
301 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
303 loc_grid.get(i).template removeCopyToFinalize<prp ...>(
v_cl.getGpuContext(), rem_copy_opt::PHASE3 | opt_);
322 openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_external_ghost_box)
325 for (
size_t i = 0 ; i < loc_eg_box.size() ; i++)
328 for (
size_t j = 0 ; j < loc_eg_box.get(i).bid.size() ; j++)
330 if (loc_eg_box.get(i).bid.get(j).initialized ==
false)
335 bx_src -= gdb_ext.get(i).origin;
338 size_t sub_id_dst = loc_eg_box.get(i).bid.get(j).sub;
341 size_t k = loc_eg_box.get(i).bid.get(j).k;
346 bx_dst -= gdb_ext.get(sub_id_dst).origin;
355 if (loc_ig_box.get(sub_id_dst).bid.get(k).sub != i)
356 std::cerr <<
"Error " << __FILE__ <<
":" << __LINE__ <<
" source and destination are not correctly linked" <<
"\n";
359 {std::cerr <<
"Error " << __FILE__ <<
":" << __LINE__ <<
" source and destination does not match in size" <<
"\n";}
363 auto & gd2 = loc_grid.get(sub_id_dst);
364 gd2.template copy_to_op<op,prp...>(loc_grid.get(i),bx_src,bx_dst);
378 void send_or_queue(
size_t prc,
char * pointer,
char * pointer2)
380 if (device_grid::isCompressed() ==
false)
381 {
v_cl.send(prc,0,pointer,(
char *)pointer2 - (
char *)pointer);}
390 static void * receive_dynamic(
size_t msg_i ,
size_t total_msg,
size_t total_p,
size_t i,
size_t ri,
size_t tag,
void * ptr)
402 if (gd->
opt & RUN_ON_DEVICE)
418 template <
typename prp_
object>
420 std::vector<size_t> & prp_recv,
424 cudaDeviceSynchronize();
427 if (device_grid::isCompressed() ==
false)
430 for (
size_t i = 0 ; i < eg_box.size() ; i++ )
432 prp_recv.push_back(eg_box.get(i).recv_pnt *
sizeof(prp_object) +
sizeof(
size_t)*eg_box.get(i).n_r_box);
441 for (
size_t i = 0 ; i < eg_box.size() ; i++ )
444 v_cl.recv(eg_box.get(i).prc,0,prRecv_prp.
getPointer(),prp_recv[i]);
455 receive_dynamic,
this);
461 receive_dynamic,
this);
468 openfpm::vector_fr<BMemory<Memory>> tmp;
471 for (
int i = 0 ; i <
recv_proc.size() ; i++)
488 template <
typename prp_
object>
490 std::vector<size_t> & prp_recv,
493 if (device_grid::isCompressed() ==
false)
496 for (
size_t i = 0 ; i < ig_box.size() ; i++ )
498 prp_recv.push_back(0);
501 for (
size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
505 prp_recv[prp_recv.size()-1] += g_ig_box.
getVolumeKey() *
sizeof(prp_object) +
sizeof(
size_t);
517 for (
size_t i = 0 ; i < ig_box.size() ; i++ )
520 v_cl.recv(ig_box.get(i).prc,0,prRecv_prp.
getPointer(),prp_recv[i]);
533 receive_dynamic,
this);
539 receive_dynamic,
this);
544 template<
typename mem,
unsigned ... prp>
549 const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
558 if (
opt & RUN_ON_DEVICE)
564 auto key = g_id_to_external_ghost_box.find(g_id);
566 if (key != g_id_to_external_ghost_box.end())
567 {l_id = key->second;}
576 std::cerr <<
"Error: " << __FILE__ <<
":" << __LINE__ <<
" Critical, cannot unpack object, because received data cannot be interpreted\n";
585 size_t le_id = eb_gid_list.get(l_id).full_match;
586 size_t ei = eb_gid_list.get(l_id).e_id;
590 size_t sub_id = eg_box.get(ei).bid.get(le_id).sub;
593 auto sub2 = loc_grid.get(sub_id).getIterator(box.
getKP1(),box.
getKP2(),
false);
595 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
596 if (
opt & SKIP_LABELLING)
597 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
600 loc_grid.get(sub_id).remove(box);
604 for (
long int j = 0 ; j < (
long int)eb_gid_list.get(l_id).eb_list.size() ; j++)
606 size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
610 size_t n_sub_id = eg_box.get(ei).bid.get(nle_id).sub;
615 loc_grid.get(n_sub_id).remove(box);
616 loc_grid.get(n_sub_id).copy_to(loc_grid.get(sub_id),rbox,box);
621 template<
typename mem,
typename header_type,
unsigned ... prp>
624 header_type & headers,
627 const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
636 g_id = headers.template get<0>(i);
641 auto key = g_id_to_external_ghost_box.find(g_id);
643 if (key != g_id_to_external_ghost_box.end())
644 {l_id = key->second;}
653 std::cerr <<
"Error: " << __FILE__ <<
":" << __LINE__ <<
" Critical, cannot unpack object, because received data cannot be interpreted\n";
662 size_t le_id = eb_gid_list.get(l_id).full_match;
663 size_t ei = eb_gid_list.get(l_id).e_id;
667 size_t sub_id = eg_box.get(ei).bid.get(le_id).sub;
670 auto sub2 = loc_grid.get(sub_id).getIterator(box.
getKP1(),box.
getKP2(),
false);
672 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
673 if (
opt & SKIP_LABELLING)
674 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
677 loc_grid.get(sub_id).remove(box);
681 loc_grid.get(sub_id),
685 v_cl.getGpuContext(),
689 for (
long int j = 0 ; j < (
long int)eb_gid_list.get(l_id).eb_list.size() ; j++)
691 size_t nle_id = eb_gid_list.get(l_id).eb_list.get(j);
695 size_t n_sub_id = eg_box.get(ei).bid.get(nle_id).sub;
700 loc_grid.get(n_sub_id).remove(box);
701 loc_grid.get(n_sub_id).copy_to(loc_grid.get(sub_id),rbox,box);
706 template<
unsigned int ... prp>
707 void fill_headers(
size_t opt)
709 if ((
opt & KEEP_PROPERTIES) == 0 && device_grid::is_unpack_header_supported())
714 result.allocate(
sizeof(
int));
731 *(
int *)result.getPointer() = 0;
732 result.hostToDevice();
734 device_grid::template unpack_headers<decltype(
pointers_h),decltype(headers),decltype(result),prp ...>(
pointers_h,headers,result,n_headers_slot);
735 result.deviceToHost();
737 if (*(
int *)result.getPointer() == 0) {
break;}
744 headers.template deviceToHost<0,1,2>();
748 template<
unsigned ... prp>
751 const std::vector<size_t> & prp_recv,
753 const std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
757 if (device_grid::isCompressed() ==
false)
765 for (
size_t i = 0 ; i < eg_box.size() ; i++ )
770 while (ps.
getOffset() - mark_here < prp_recv[i])
775 unpack_data_to_ext_ghost<Memory,prp ...>(prRecv_prp,loc_grid,i,
776 eg_box,g_id_to_external_ghost_box,eb_gid_list,
783 fill_headers<prp ...>(
opt);
785 if (headers.size() != 0)
802 unpack_data_to_ext_ghost_with_header<BMemory<Memory>,decltype(headers),prp ...>(mem,loc_grid,headers,i*n_headers_slot+j,
803 eg_box,g_id_to_external_ghost_box,eb_gid_list,
825 unpack_data_to_ext_ghost<BMemory<Memory>,prp ...>(mem,loc_grid,i,
826 eg_box,g_id_to_external_ghost_box,eb_gid_list,
835 template<
template<
typename,
typename>
class op,
unsigned ... prp>
838 const std::vector<size_t> & prp_recv,
841 const openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_internal_ghost_box)
843 typedef object<
typename object_creator<
typename T::type,prp...>::type> prp_object;
845 if (device_grid::isCompressed() ==
false)
852 for (
size_t i = 0 ; i < ig_box.size() ; i++ )
855 for (
size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
864 auto key = g_id_to_internal_ghost_box.get(i).find(g_id);
865 if (key != g_id_to_internal_ghost_box.get(i).end())
866 {l_id = key->second;}
875 std::cerr <<
"Error: " << __FILE__ <<
":" << __LINE__ <<
" Critical, cannot unpack object, because received data cannot be interpreted\n";
882 size_t sub_id = ig_box.get(i).bid.get(l_id).sub;
883 box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
886 auto sub2 = loc_grid.get(sub_id).getIterator(box.
getKP1(),box.
getKP2());
911 size_t pid = dec.ProctoID(
recv_proc.get(i).p_id);
915 auto key = g_id_to_internal_ghost_box.get(pid).find(g_id);
916 if (key != g_id_to_internal_ghost_box.get(pid).end())
917 {l_id = key->second;}
926 std::cerr <<
"Error: " << __FILE__ <<
":" << __LINE__ <<
" Critical, cannot unpack object, because received data cannot be interpreted\n";
933 size_t sub_id = ig_box.get(pid).bid.get(l_id).sub;
934 box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
937 auto sub2 = loc_grid.get(sub_id).getIterator(box.
getKP1(),box.
getKP2());
947 for (
size_t n = 0; n < dim; n++)
950 for (
size_t j = 0; j < gdb_ext.size(); j++)
954 sub += gdb_ext.get(j).origin;
980 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
981 {loc_grid.get(i).clear();}
983 for (
size_t a = 0; a < m_oGrid_recv.size(); a++)
985 for (
size_t k = 0; k < m_oGrid_recv.get(a).size(); k++)
987 device_grid & g = m_oGrid_recv.get(a).template get<0>(k);
992 for (
size_t n = 0; n < dim; n++)
993 {p.get(n) = g.getGrid().getBox().getHigh(n);}
996 for (
size_t n = 0; n < dim; n++)
997 {point.
get(n) = (b.getHigh(n) + b.getLow(n))/2;}
999 for (
size_t j = 0; j < gdb_ext.size(); j++)
1003 sub += gdb_ext.get(j).origin;
1015 for(
size_t i = 0 ; i < dim ; i++)
1023 loc_grid.get(j).copy_to(g,box_src,box_dst);
1029 std::cout <<
"UNPACKING " << std::endl;
1031 for (
size_t i = 0 ; i < m_oGrid_recv.size() ; i++)
1033 for (
size_t j = 0 ; j < m_oGrid_recv.get(i).size() ; j++)
1035 m_oGrid_recv.get(i).template get<0>(j).template deviceToHost<0>();
1036 std::cout <<
"UNPACKING POINTS: " << m_oGrid_recv.get(i).template get<0>(j).size() << std::endl;
1037 m_oGrid_recv.get(i).template get<0>(j).template removeCopyToFinalize<0>(
v_cl.getGpuContext(), rem_copy_opt::PHASE1);
1041 for (
size_t i = 0 ; i < m_oGrid_recv.size() ; i++)
1043 for (
size_t j = 0 ; j < m_oGrid_recv.get(i).size() ; j++)
1044 {m_oGrid_recv.get(i).template get<0>(j).template removeCopyToFinalize<0>(
v_cl.getGpuContext(), rem_copy_opt::PHASE2);}
1047 for (
size_t i = 0 ; i < m_oGrid_recv.size() ; i++)
1049 for (
size_t j = 0 ; j < m_oGrid_recv.get(i).size() ; j++)
1050 {m_oGrid_recv.get(i).template get<0>(j).template removeCopyToFinalize<0>(
v_cl.getGpuContext(), rem_copy_opt::PHASE3);}
1066 template<
typename lambda_t>
1129 for (
size_t i = 0; i < gdb_ext_old.size(); i++)
1133 sub_dom += gdb_ext_old.get(i).origin;
1135 for (
size_t j = 0; j < gdb_ext_global.size(); j++)
1144 sub_dom_new += gdb_ext_global.get(j).origin;
1146 bool intersect =
false;
1149 intersect = sub_dom.
Intersect(sub_dom_new, inte_box);
1151 if (intersect ==
true)
1153 auto inte_box_cont = cd_sm.convertCellUnitsIntoDomainSpace(inte_box);
1157 for (
size_t n = 0; n < dim; n++)
1158 p.get(n) = (inte_box_cont.getHigh(n) + inte_box_cont.getLow(n))/2;
1160 p_id = dec.processorID(p);
1161 if (p_id != p_id_cur)
1166 auto inte_box_local = inte_box;
1168 inte_box_local -= gdb_ext_old.get(i).origin;
1175 for (
size_t l = 0; l < dim; l++)
1177 sz[l] = inte_box_local.getHigh(l) - inte_box_local.getLow(l) + 1;
1196 for(
size_t i = 0 ; i < dim ; i++)
1204 f(box_src,box_dst,gr,p_id);
1230 template<
int ... prp>
1241 send_buffer.
reset();
1247 int s = find_local_sub(box_dst,gdb_ext);
1249 {std::cout << __FILE__ <<
":" << __LINE__ <<
" map, error non-local subdomain " << std::endl;}
1252 for (
int d = 0 ; d < dim ; d++ )
1254 box_dst.
setLow(d, box_dst.
getLow(d) - gdb_ext.get(s).origin.get(d));
1255 box_dst.
setHigh(d, box_dst.
getHigh(d) - gdb_ext.get(s).origin.get(d));
1258 loc_grid.get(s).remove(box_dst);
1259 auto sub2 = loc_grid.get(s).getIterator(box_dst.
getKP1(),box_dst.
getKP2(),0);
1263 for (
int s = 0 ; s < loc_grid.
size() ; s++)
1264 {loc_grid.get(s).template removeAddUnpackFinalize<prp ...>(
v_cl.getGpuContext(),0);}
1280 template<
int ... prp>
1295 send_buffers_.resize(
v_cl.getProcessingUnits());
1296 send_buffers.resize(
v_cl.getProcessingUnits());
1302 for (
int p_id = 0 ; p_id <
v_cl.getProcessingUnits() ; p_id++)
1304 for (
int i = 0 ; i < loc_grid_old.
size() ; i++)
1305 {loc_grid_old.get(i).packReset();}
1316 auto sub_it = gr.getIterator(box_src.
getKP1(),box_src.
getKP2(),0);
1317 gr.template packRequest<prp ...>(sub_it,send_buffer_sizes.get(p_id));
1325 for (
int i = 0 ; i < loc_grid_old.
size(); i++)
1327 loc_grid_old.get(i).template packCalculate<prp ...>(send_buffer_sizes.get(p_id),
v_cl.getGpuContext());
1330 send_buffers_.get(p_id).resize(send_buffer_sizes.get(p_id));
1331 send_buffers.get(p_id).setMemory(send_buffer_sizes.get(p_id),send_buffers_.get(p_id));
1332 send_buffers.get(p_id).incRef();
1342 size_t offset = send_buffers.get(p_id).getOffsetEnd();
1344 size_t offset2 = send_buffers.get(p_id).getOffsetEnd();
1346 send_buffers.get(p_id).hostToDevice(offset,offset2);
1348 auto sub_it = gr.getIterator(box_src.
getKP1(),box_src.
getKP2(),0);
1356 for (
int i = 0 ; i < loc_grid_old.
size() ; i++)
1358 loc_grid_old.get(i).template packFinalize<prp ...>(send_buffers.get(p_id),sts,0,
false);
1375 for (
int i = 0 ; i < send_buffers.
size() ; i++)
1377 if (i !=
v_cl.rank())
1379 send_pointer.add(send_buffers_.get(i).getDevicePointer());
1385 size_t * send_size_ptr = NULL;
1386 size_t * send_prc_queue_ptr = NULL;
1387 void ** send_pointer_ptr = NULL;
1400 send_prc_queue_ptr,send_pointer_ptr,
1401 receive_dynamic,
this);
1411 for (
int i = 0 ; i < send_buffers.
size() ; i++)
1412 {send_buffers.get(i).decRef();}
1435 std::unordered_map<size_t,size_t> & g_id_to_external_ghost_box,
1438 #ifdef PROFILE_SCOREP
1439 SCOREP_USER_REGION(
"ghost_get",SCOREP_USER_REGION_TYPE_FUNCTION)
1443 typedef object<
typename object_creator<
typename T::type,prp...>::type> prp_object;
1459 for (
int i = 0 ; i < loc_grid.
size() ; i++)
1460 {
opt &= (loc_grid.get(i).isSkipLabellingPossible())?(
int)-1:~SKIP_LABELLING;}
1462 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1464 packing_time.
start();
1467 if (!(
opt & SKIP_LABELLING))
1471 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
1472 {loc_grid.get(i).packReset();}
1475 for (
size_t i = 0 ; i < ig_box.size() ; i++ )
1478 for (
size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
1481 size_t sub_id = ig_box.get(i).bid.get(j).sub;
1485 if (g_ig_box.
isValid() ==
false)
1488 g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1493 auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.
getKP1(),g_ig_box.
getKP2(),
false);
1501 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
1502 {loc_grid.get(i).template packCalculate<prp ...>(req,
v_cl.getGpuContext());}
1518 for (
size_t i = 0 ; i < ig_box.size() ; i++ )
1525 if (
opt & RUN_ON_DEVICE)
1531 for (
size_t j = 0 ; j < ig_box.get(i).bid.size() ; j++)
1534 if (ig_box.get(i).bid.get(j).box.isValid() ==
false)
1538 size_t sub_id = ig_box.get(i).bid.get(j).sub;
1541 g_ig_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1543 size_t g_id = ig_box.get(i).bid.get(j).g_id;
1549 auto sub_it = loc_grid.get(sub_id).getIterator(g_ig_box.
getKP1(),g_ig_box.
getKP2(),
false);
1557 if (
opt & RUN_ON_DEVICE)
1563 pointers2.add(pointer2);
1566 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
1568 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1569 if (
opt & SKIP_LABELLING)
1570 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1572 loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts,opt_,
true);
1576 delete &prAlloc_prp;
1586 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
1588 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1589 if (
opt & SKIP_LABELLING)
1590 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1592 loc_grid.get(i).template packFinalize<prp ...>(prAlloc_prp,sts,opt_,
true);
1596 delete &prAlloc_prp;
1599 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1600 packing_time.
stop();
1601 tot_pack += packing_time.
getwct();
1602 timer sendrecv_time;
1603 sendrecv_time.
start();
1606 for (
size_t i = 0 ; i < ig_box.size() ; i++ )
1609 send_or_queue(ig_box.get(i).prc,(
char *)
pointers.get(i),(
char *)pointers2.get(i));
1613 std::vector<size_t> prp_recv;
1622 queue_recv_data_get<prp_object>(eg_box,prp_recv,prRecv_prp);
1624 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1625 sendrecv_time.
stop();
1626 tot_sendrecv += sendrecv_time.
getwct();
1627 timer merge_loc_time;
1628 merge_loc_time.
start();
1631 ghost_get_local<prp...>(loc_ig_box,loc_eg_box,gdb_ext,loc_grid,g_id_to_external_ghost_box,ginfo,use_bx_def,
opt);
1633 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1634 merge_loc_time.
stop();
1635 tot_loc_merge += merge_loc_time.
getwct();
1640 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
1641 {loc_grid.get(i).removeAddUnpackReset();}
1643 merge_received_data_get<prp ...>(loc_grid,eg_box,prp_recv,prRecv_prp,g_id_to_external_ghost_box,eb_gid_list,
opt);
1645 rem_copy_opt opt_ = rem_copy_opt::NONE_OPT;
1646 if (
opt & SKIP_LABELLING)
1647 {opt_ = rem_copy_opt::KEEP_GEOMETRY;}
1649 for (
size_t i = 0 ; i < loc_grid.
size() ; i++)
1650 {loc_grid.get(i).template removeAddUnpackFinalize<prp ...>(
v_cl.getGpuContext(),opt_);}
1652 #ifdef ENABLE_GRID_DIST_ID_PERF_STATS
1654 tot_merge += merge_time.
getwct();
1675 template<
template<
typename,
typename>
class op,
int... prp>
1683 openfpm::vector<std::unordered_map<size_t,size_t>> & g_id_to_internal_ghost_box)
1686 typedef object<
typename object_creator<
typename T::type,prp...>::type> prp_object;
1697 for (
size_t i = 0 ; i < eg_box.size() ; i++ )
1700 for (
size_t j = 0 ; j < eg_box.get(i).bid.size() ; j++)
1703 size_t sub_id = eg_box.get(i).bid.get(j).sub;
1707 if (g_eg_box.
isValid() ==
false)
1710 g_eg_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1716 auto sub_it = loc_grid.get(sub_id).getIterator(g_eg_box.
getKP1(),g_eg_box.
getKP2());
1735 for (
size_t i = 0 ; i < eg_box.size() ; i++ )
1742 for (
size_t j = 0 ; j < eg_box.get(i).bid.size() ; j++)
1745 if (eg_box.get(i).bid.get(j).g_e_box.isValid() ==
false)
1749 size_t sub_id = eg_box.get(i).bid.get(j).sub;
1752 g_eg_box -= gdb_ext.get(sub_id).origin.template convertPoint<size_t>();
1754 size_t g_id = eg_box.get(i).bid.get(j).g_id;
1759 auto sub_it = loc_grid.get(sub_id).getIterator(g_eg_box.
getKP1(),g_eg_box.
getKP2());
1768 send_or_queue(ig_box.get(i).prc,(
char *)pointer,(
char *)pointer2);
1772 std::vector<size_t> prp_recv;
1778 queue_recv_data_put<prp_object>(ig_box,prp_recv,prRecv_prp);
1783 ghost_put_local<op,prp...>(loc_ig_box,loc_eg_box,gdb_ext,loc_grid,g_id_to_internal_ghost_box);
1785 merge_received_data_put<op,prp ...>(dec,loc_grid,ig_box,prp_recv,prRecv_prp,gdb_ext,g_id_to_internal_ghost_box);
1789 delete &prAlloc_prp;
1798 :
v_cl(create_vcluster<Memory>())
It override the behavior if size()
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
__device__ __host__ bool Intersect(const Box< dim, T > &b, Box< dim, T > &b_out) const
Intersect.
__device__ __host__ T getHigh(int i) const
get the high interval of the box
__host__ __device__ bool isInside(const Point< dim, T > &p) const
Check if the point is inside the box.
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
bool isValid() const
Check if the Box is a valid box P2 >= P1.
T getVolumeKey() const
Get the volume spanned by the Box P1 and P2 interpreted as grid key.
__device__ __host__ void setHigh(int i, T val)
set the high interval of the box
__device__ __host__ void setLow(int i, T val)
set the low interval of the box
T getVolume() const
Get the volume of the box.
This class define the domain decomposition interface.
virtual void decRef()
Decrement the reference counter.
void * getDevicePointerEnd()
Return the device end pointer of the previous allocated memory.
size_t getOffsetEnd()
Get offset.
size_t getOffset()
Get offset.
virtual void incRef()
Increment the reference counter.
virtual void deviceToHost()
Do nothing.
virtual void * getPointer()
Return the pointer of the last allocation.
static size_t calculateMem(std::vector< size_t > &mm)
Calculate the total memory required to pack the message.
void reset()
Reset the internal counters.
virtual void hostToDevice()
Return the pointer of the last allocation.
void setMemory(size_t size, Mem &mem)
Set the internal memory if you did not do it in the constructor.
virtual bool allocate(size_t sz)
Allocate a chunk of memory.
void * getPointerEnd()
Return the end pointer of the previous allocated memory.
static void pack(ExtPreAlloc< Mem >, const T &obj)
Error, no implementation.
static size_t packRequest(const T &obj, size_t &req)
Error, no implementation.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
size_t getOffset()
Return the actual counter.
void addOffset(size_t off)
Increment the offset pointer by off.
static void unpack(ExtPreAlloc< Mem >, T &obj)
Error, no implementation.
Implementation of VCluster class.
This class is an helper for the communication of grid_dist_id.
void queue_recv_data_put(const openfpm::vector< ip_box_grid< dim >> &ig_box, std::vector< size_t > &prp_recv, ExtPreAlloc< Memory > &prRecv_prp)
openfpm::vector< size_t > send_prc_queue
List of processor to send to.
openfpm::vector< size_t > send_size
size to send
grid_dist_id_comm()
Constructor.
openfpm::vector< void * > pointers
send pointers
void labelIntersectionGridsProcessor_and_pack(Decomposition &dec, CellDecomposer_sm< dim, St, shift< dim, St >> &cd_sm, openfpm::vector< device_grid > &loc_grid_old, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext_old, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext_global, size_t p_id_cur, lambda_t f)
Label intersection grids for mappings.
Vcluster< Memory > & v_cl
VCluster.
void ghost_put_local(const openfpm::vector< i_lbox_grid< dim >> &loc_ig_box, const openfpm::vector< e_lbox_grid< dim >> &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, openfpm::vector< device_grid > &loc_grid, openfpm::vector< std::unordered_map< size_t, size_t >> &g_id_to_external_ghost_box)
Sync the local ghost part.
openfpm::vector_gpu< aggregate< void *, void *, int > > pointers_h
header unpacker info
Memory g_recv_prp_mem
Memory for the ghost receiving buffer.
void map_(Decomposition &dec, CellDecomposer_sm< dim, St, shift< dim, St >> &cd_sm, openfpm::vector< device_grid > &loc_grid, openfpm::vector< device_grid > &loc_grid_old, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext_old, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext_global, size_t opt)
Moves all the grids that does not belong to the local processor to the respective processor.
openfpm::vector_fr< BMemory< Memory > > recv_buffers
receiving buffers in case of dynamic
void ghost_get_(const openfpm::vector< ip_box_grid< dim >> &ig_box, const openfpm::vector< ep_box_grid< dim >> &eg_box, const openfpm::vector< i_lbox_grid< dim >> &loc_ig_box, const openfpm::vector< e_lbox_grid< dim >> &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, const openfpm::vector< e_box_multi< dim >> &eb_gid_list, bool use_bx_def, openfpm::vector< device_grid > &loc_grid, const grid_sm< dim, void > &ginfo, std::unordered_map< size_t, size_t > &g_id_to_external_ghost_box, size_t opt)
It fill the ghost part of the grids.
openfpm::vector< size_t > p_map_req
Maps the processor id with the communication request into map procedure.
Memory g_send_prp_mem
Memory for the ghost sending buffer.
void queue_recv_data_get(const openfpm::vector< ep_box_grid< dim >> &eg_box, std::vector< size_t > &prp_recv, ExtPreAlloc< Memory > &prRecv_prp)
void ghost_get_local(const openfpm::vector< i_lbox_grid< dim >> &loc_ig_box, const openfpm::vector< e_lbox_grid< dim >> &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, openfpm::vector< device_grid > &loc_grid, std::unordered_map< size_t, size_t > &g_id_to_external_ghost_box, const grid_sm< dim, void > &ginfo, bool use_bx_def, size_t opt)
Sync the local ghost part.
size_t opt
Receiving option.
grid_dist_id_comm(const grid_dist_id_comm< dim, St, T, Decomposition, Memory, device_grid > &gc)
Copy constructor.
void ghost_put_(Decomposition &dec, const openfpm::vector< ip_box_grid< dim >> &ig_box, const openfpm::vector< ep_box_grid< dim >> &eg_box, const openfpm::vector< i_lbox_grid< dim >> &loc_ig_box, const openfpm::vector< e_lbox_grid< dim >> &loc_eg_box, const openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, openfpm::vector< device_grid > &loc_grid, openfpm::vector< std::unordered_map< size_t, size_t >> &g_id_to_internal_ghost_box)
It merge the information in the ghost with the real information.
void grids_reconstruct(openfpm::vector< openfpm::vector< aggregate< device_grid, Box< dim, long int >>>> &m_oGrid_recv, openfpm::vector< device_grid > &loc_grid, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, CellDecomposer_sm< dim, St, shift< dim, St >> &cd_sm)
Reconstruct the local grids.
openfpm::vector< size_t > recv_sz_map
Stores the size of the elements added for each processor that communicate with us (local processor)
void unpack_buffer_to_local_grid(openfpm::vector< device_grid > &loc_grid, openfpm::vector< GBoxes< device_grid::dims >> &gdb_ext, ExtPreAlloc< Memory > &send_buffer, size_t sz)
Unpack.
openfpm::vector< size_t > prc_recv_map
Stores the list of processors that communicate with us (local processor)
openfpm::vector< void * > send_pointer
Pointer to the memory to send.
openfpm::vector< rp_id > recv_proc
receiving processors
openfpm::vector< openfpm::vector< aggregate< device_grid, Box< dim, long int > > > > m_oGrid
__device__ __host__ index_type get(index_type i) const
Get the i index.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
This structure store the Box that define the domain inside the Ghost + domain box.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
For each external ghost id, it contain a set of sub-domain at which this external box is linked.
Per-processor external ghost box.
Per-processor external ghost box.
static void call_unpack(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &dg, Unpack_stat &ps)
Unpack.
static void call_unpack(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &gd, Unpack_stat &ps)
Unpack.
static void call_unpack(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &gd, Unpack_stat &ps)
Error i do not know how to unpack.
static void unpacking(ExtPreAlloc< Memory > &recv_buf, sub_it_type &sub2, device_grid &dg, Unpack_stat &ps)
Unpack.
It return true if the object T require complex serialization.
These set of classes generate an array definition at compile-time.
Per-processor Internal ghost box.
It create a boost::fusion vector with the selected properties.