8 #ifndef OPENFPM_VCLUSTER_SRC_VCLUSTER_VCLUSTER_META_FUNCTION_HPP_
9 #define OPENFPM_VCLUSTER_SRC_VCLUSTER_VCLUSTER_META_FUNCTION_HPP_
11 #include "memory/BHeapMemory.hpp"
12 #include "Packer_Unpacker/has_max_prop.hpp"
18 static inline bool is_mpi_rdma_cuda_active()
20 #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
27 template<
bool result,
typename T,
typename S,
template<
typename>
class layout_base,
typename Memory>
32 static void call_unpack(S & recv,
40 sz_byte->resize(recv_buf.size());
42 for (
size_t i = 0 ; i < recv_buf.size() ; i++)
53 size_t recv_size_old = recv.size();
56 op_param.template execute<
true,T,decltype(recv),decltype(unp),layout_base,prp...>(recv,unp,i,opt);
58 size_t recv_size_new = recv.size();
61 sz_byte->get(i) = recv_buf.get(i).
size();
63 sz->get(i) = recv_size_new - recv_size_old;
71 template<
typename op,
typename Vt,
typename S,
template<
typename>
class layout_base,
typename v_mpl>
76 openfpm::vector_fr<BMemory<HeapMemory>> & recv_buf;
97 :recv(recv),recv_buf(recv_buf),op_param(op_param),i(i),sz(sz),sz_byte(sz_byte)
105 typedef typename boost::mpl::at<typename T::value_type::type,boost::mpl::int_<T::value> >::type prp_type;
108 typedef typename boost::mpl::at<v_mpl,boost::mpl::int_<T::value>>::type prp_num;
111 size_t n_ele = recv_buf.get(i).size() /
sizeof(prp_type);
114 PtrMemory * ptr1 =
new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size());
119 v2.template setMemory<prp_num::value>(*ptr1);
126 size_t recv_size_old = recv.
size();
128 op_param.template execute<false,T,decltype(recv),decltype(v2),layout_base,prp_num::value>(recv,v2,i);
130 size_t recv_size_new = recv.size();
133 sz_byte->get(i) = recv_buf.get(i).
size();
135 sz->get(i) = recv_size_new - recv_size_old;
150 template<
typename sT,
template<
typename>
class layout_base,
typename Memory>
183 typedef typename boost::mpl::at<typename sT::value_type::type,T>::type type_prp;
186 this->n_ele =
recv_buf.get(
i).size() /
sizeof(type_prp);
190 if (opt & MPI_GPU_DIRECT)
192 #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
206 v2.template setMemory<T::value>(*ptr1);
212 template<
bool inte_or_lin,
typename T,
typename S,
template<
typename>
class layout_base,
typename Memory>
215 template<
typename op,
unsigned int ... prp>
static int call_unpack_impl(S & recv,
228 boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(prmti);
230 v2.resize(prmti.n_ele);
234 size_t recv_size_old = recv.
size();
236 op_param.template execute<
false,T,decltype(recv),decltype(v2),layout_base,prp...>(recv,v2,i,opt);
238 size_t recv_size_new = recv.size();
241 sz_byte->get(i) = recv_buf.get(i).
size();
243 sz->get(i) = recv_size_new - recv_size_old;
245 return sizeof...(prp);
249 template<
typename T,
typename S,
template<
typename>
class layout_base,
typename Memory>
252 template<
typename op,
unsigned int ... prp>
static int call_unpack_impl(S & recv,
261 size_t n_ele = recv_buf.get(i).size() /
sizeof(
typename T::value_type);
264 PtrMemory * ptr1 =
new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size());
278 size_t recv_size_old = recv.
size();
280 op_param.template execute<
false,T,decltype(recv),decltype(v2),layout_base,prp...>(recv,v2,i,opt);
282 size_t recv_size_new = recv.size();
285 sz_byte->get(i) = recv_buf.get(i).
size();
287 sz->get(i) = recv_size_new - recv_size_old;
299 template<
typename T,
typename S,
template<
typename>
class layout_base,
typename Memory>
302 template<
typename op,
unsigned int ... prp>
static void call_unpack(S & recv,
310 sz_byte->resize(recv_buf.size());
312 for (
size_t i = 0 ; i < recv_buf.size() ; )
314 i +=
unpack_selector_with_prp_lin<is_layout_mlin<layout_base<dummy_type>>::value,T,S,layout_base,Memory>::template call_unpack_impl<op,prp...>(recv,recv_buf,sz,sz_byte,op_param,i,opt);
323 template<
int ... prp>
326 template<
typename T>
inline static void call_pr(T & send,
size_t & tot_size)
336 template<
typename op,
typename T,
typename S,
template<
typename>
class layout_base,
typename Memory>
337 inline static void call_unpack(S & recv,
346 unpack_selector_with_prp<result, T, S,layout_base,Memory>::template call_unpack<op,prp...>(recv, recv_buf, sz, sz_byte, op_param,opt);
360 template<
typename sT>
376 :
v(
v),send_buf(send_buf),opt(opt)
384 if (opt & MPI_GPU_DIRECT)
386 #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
387 send_buf.add(
v.template getDeviceBuffer<T::value>());
389 v.template deviceToHost<T::value>();
390 send_buf.add(
v.template getPointer<T::value>());
395 send_buf.add(
v.template getPointer<T::value>());
411 template<
typename sT>
432 typedef typename boost::mpl::at<typename sT::value_type::type,T>::type type_prp;
434 sz.add(
sizeof(type_prp)*
v.size());
438 template<typename T, bool impl = is_multiple_buffer_each_prp<T>::value >
443 send_buf.add(send.getPointer());
448 sz.add(send.size()*
sizeof(
typename T::value_type));
453 for (
size_t i = 0 ; i < prc_send.
size() ; i++)
455 prc_send_.add(prc_send.get(i));
468 boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(sbp);
475 boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(sbp);
480 for (
size_t i = 0 ; i < prc_send.
size() ; i++)
482 for (
size_t j = 0 ; j < T::value_type::max_prop ; j++)
483 {prc_send_.add(prc_send.get(i));}
493 template <
typename>
class layout_base,
494 unsigned int ... prp>
499 typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type_ofp<T>::value>::number,
MetaFuncOrd>::result ind_prop_to_pack;
514 typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type_ofp<T>::value>::number,
MetaFuncOrd>::result ind_prop_to_pack;
526 template<
typename Memory>
527 static void unpacking(S & recv,
550 template <
typename>
class layout_base,
551 int ... prp>
static void execute(
D & recv,S & v2,
size_t i,
size_t opt)
553 if (opt & MPI_GPU_DIRECT)
555 #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
558 recv.template add_prp_device<
typename T::value_type,
565 size_t old_size = recv.size();
568 recv.template add_prp<
typename T::value_type,
575 recv.template hostToDevice<prp...>(old_size,old_size+v2.size()-1);
583 recv.template add_prp<
typename T::value_type,
601 template <
typename>
class layout_base,
603 static void execute(
D & recv,S & v2,
size_t i,
size_t opt)
606 recv.template add_prp<
typename T::value_type,
608 typename T::grow_policy,
616 template<
typename op>
624 template <
typename>
class layout_base,
626 static void execute(
D & recv,S & v2,
size_t i,
size_t opt)
634 template<
bool sr,
template<
typename,
typename>
class op,
typename vector_type_opart>
641 template <
typename>
class layout_base,
643 inline static void execute(
D & recv,S & v2,
size_t i,vector_type_opart & opart)
646 recv.template merge_prp_v<op,
647 typename T::value_type,
651 typename vector_type_opart::value_type,
652 prp...>(v2,opart.get(i));
657 template<
template<
typename,
typename>
class op,
typename vector_type_opart>
664 template <
typename>
class layout_base,
666 inline static void execute(
D & recv,S & v2,
size_t i,vector_type_opart & opart)
669 recv.template merge_prp_v<op,
670 typename T::value_type,
674 typename vector_type_opart::value_type,
675 prp...>(v2,opart.get(i));
680 template<
template<
typename,
typename>
class op,
typename vector_type_opart>
696 template <
typename>
class layout_base,
705 template<
bool sr,
template<
typename,
typename>
class op,
typename vector_type_opart,
typename vector_type_prc_offset>
712 template <
typename>
class layout_base,
714 inline static void execute(
D & recv,S & v2,
size_t i,vector_type_opart & opart, vector_type_prc_offset & prc_off)
716 prc_off.template deviceToHost<0>();
718 unsigned int start = 0;
719 unsigned int stop = prc_off.template get<0>(i /
sizeof...(prp));
722 {start = prc_off.template get<0>(i /
sizeof...(prp)-1);}
725 recv.template merge_prp_v_device<op,
726 typename T::value_type,
731 prp...>(v2,opart,start,stop);
736 template<
template<
typename,
typename>
class op,
typename vector_type_opart,
typename vector_type_prc_offset>
743 template <
typename>
class layout_base,
745 inline static void execute(
D & recv,S & v2,
size_t i,vector_type_opart & opart, vector_type_prc_offset & prc_off)
747 std::cout << __FILE__ <<
":" << __LINE__ <<
" Error: not implemented" << std::endl;
752 template<
template<
typename,
typename>
class op,
typename vector_type_opart,
typename vector_type_prc_offset>
758 vector_type_prc_offset & prc_offset;
770 template <
typename>
class layout_base,
774 op_ssend_recv_merge_gpu_impl<sr,op,vector_type_opart,vector_type_prc_offset>::template execute<T,
D,S,layout_base,prp...>(recv,v2,i,
opart,prc_offset);
786 template <
typename>
class layout_base,
788 inline static void execute(
D & recv,S & v2,
size_t i,
size_t & start)
791 if ((start + v2.size()) > recv.size())
792 recv.resize(start + v2.size());
794 typename T::value_type,
812 template <
typename>
class layout_base,
814 inline static void execute(
D & recv,S & v2,
size_t i,
size_t & start)
818 typename T::value_type,
819 typename S::Memory_type,
824 recv.template hostToDevice<prp ...>(start,start+v2.size()-1);
838 template <
typename>
class layout_base,
840 inline static void execute(
D & recv,S & v2,
size_t i,
size_t & start)
843 recv.template merge_prp_device<
replace_,
844 typename T::value_type,
845 typename S::Memory_type,
861 template <
typename>
class layout_base,
862 int ... prp>
inline static void execute(
D & recv,S & v2,
size_t i,
size_t & start)
866 typename T::value_type,
868 typename S::grow_policy,
889 template<
bool sr,
typename T,
typename D,
typename S,
template<
typename>
class layout_base,
int ... prp>
void execute(
D & recv,S & v2,
size_t i,
size_t opt)
907 template<
bool sr,
typename T,
typename D,
typename S,
template<
typename>
class layout_base,
int ... prp>
void execute(
D & recv,S & v2,
size_t i,
size_t opt)
909 bool active = is_mpi_rdma_cuda_active();