8 #ifndef OPENFPM_VCLUSTER_SRC_VCLUSTER_VCLUSTER_META_FUNCTION_HPP_ 9 #define OPENFPM_VCLUSTER_SRC_VCLUSTER_VCLUSTER_META_FUNCTION_HPP_ 11 #include "memory/BHeapMemory.hpp" 12 #include "Packer_Unpacker/has_max_prop.hpp" 18 static inline bool is_mpi_rdma_cuda_active()
20 #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT 27 template<
bool result,
typename T,
typename S,
template<
typename>
class layout_base,
typename Memory>
32 static void call_unpack(S & recv,
40 sz_byte->resize(recv_buf.size());
42 for (
size_t i = 0 ; i < recv_buf.size() ; i++)
53 size_t recv_size_old = recv.size();
56 op_param.template execute<
true,T,decltype(recv),decltype(unp),layout_base,prp...>(recv,unp,i,opt);
58 size_t recv_size_new = recv.size();
61 sz_byte->get(i) = recv_buf.get(i).
size();
63 sz->get(i) = recv_size_new - recv_size_old;
71 template<
typename op,
typename Vt,
typename S,
template<
typename>
class layout_base,
typename v_mpl>
97 :recv(recv),recv_buf(recv_buf),op_param(op_param),i(i),sz(sz),sz_byte(sz_byte)
105 typedef typename boost::mpl::at<typename T::value_type::type,boost::mpl::int_<T::value> >::type prp_type;
108 typedef typename boost::mpl::at<v_mpl,boost::mpl::int_<T::value>>::type prp_num;
111 size_t n_ele = recv_buf.
get(i).size() /
sizeof(prp_type);
119 v2.template setMemory<prp_num::value>(*ptr1);
126 size_t recv_size_old = recv.size();
128 op_param.template execute<false,T,decltype(recv),decltype(v2),layout_base,prp_num::value>(recv,v2,i);
130 size_t recv_size_new = recv.size();
133 sz_byte->get(i) = recv_buf.
get(i).size();
135 sz->get(i) = recv_size_new - recv_size_old;
150 template<
typename sT,
template<
typename>
class layout_base,
typename Memory>
183 typedef typename boost::mpl::at<typename sT::value_type::type,T>::type type_prp;
186 this->n_ele =
recv_buf.
get(
i).size() /
sizeof(type_prp);
190 if (opt & MPI_GPU_DIRECT)
192 #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT 206 v2.template setMemory<T::value>(*ptr1);
212 template<
bool inte_or_lin,
typename T,
typename S,
template<
typename>
class layout_base,
typename Memory>
215 template<
typename op,
unsigned int ... prp>
static int call_unpack_impl(S & recv,
228 boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(prmti);
230 v2.resize(prmti.n_ele);
234 size_t recv_size_old = recv.
size();
236 op_param.template execute<
false,T,decltype(recv),decltype(v2),layout_base,prp...>(recv,v2,i,opt);
238 size_t recv_size_new = recv.size();
241 sz_byte->get(i) = recv_buf.get(i).
size();
243 sz->get(i) = recv_size_new - recv_size_old;
245 return sizeof...(prp);
249 template<
typename T,
typename S,
template<
typename>
class layout_base,
typename Memory>
252 template<
typename op,
unsigned int ... prp>
static int call_unpack_impl(S & recv,
261 size_t n_ele = recv_buf.get(i).size() /
sizeof(
typename T::value_type);
264 PtrMemory * ptr1 =
new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size());
278 size_t recv_size_old = recv.
size();
280 op_param.template execute<
false,T,decltype(recv),decltype(v2),layout_base,prp...>(recv,v2,i,opt);
282 size_t recv_size_new = recv.size();
285 sz_byte->get(i) = recv_buf.get(i).
size();
287 sz->get(i) = recv_size_new - recv_size_old;
299 template<
typename T,
typename S,
template<
typename>
class layout_base,
typename Memory>
302 template<
typename op,
unsigned int ... prp>
static void call_unpack(S & recv,
310 sz_byte->resize(recv_buf.size());
312 for (
size_t i = 0 ; i < recv_buf.size() ; )
314 i +=
unpack_selector_with_prp_lin<is_layout_mlin<layout_base<dummy_type>>::value,T,S,layout_base,Memory>::template call_unpack_impl<op,prp...>(recv,recv_buf,sz,sz_byte,op_param,i,opt);
323 template<
int ... prp>
326 template<
typename T>
inline static void call_pr(T & send,
size_t & tot_size)
336 template<
typename op,
typename T,
typename S,
template<
typename>
class layout_base,
typename Memory>
337 inline static void call_unpack(S & recv,
346 unpack_selector_with_prp<result, T, S,layout_base,Memory>::template call_unpack<op,prp...>(recv, recv_buf, sz, sz_byte, op_param,opt);
360 template<
typename sT>
376 :
v(
v),send_buf(send_buf),opt(opt)
384 if (opt & MPI_GPU_DIRECT)
386 #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT 387 send_buf.add(
v.template getDeviceBuffer<T::value>());
389 v.template deviceToHost<T::value>();
390 send_buf.add(
v.template getPointer<T::value>());
395 send_buf.add(
v.template getPointer<T::value>());
411 template<
typename sT>
432 typedef typename boost::mpl::at<typename sT::value_type::type,T>::type type_prp;
434 sz.add(
sizeof(type_prp)*
v.size());
438 template<typename T, bool impl = is_multiple_buffer_each_prp<T>::value >
443 send_buf.add(send.getPointer());
448 sz.add(send.size()*
sizeof(
typename T::value_type));
453 for (
size_t i = 0 ; i < prc_send.
size() ; i++)
455 prc_send_.add(prc_send.get(i));
468 boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(sbp);
475 boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(sbp);
480 for (
size_t i = 0 ; i < prc_send.
size() ; i++)
482 for (
size_t j = 0 ; j < T::value_type::max_prop ; j++)
483 {prc_send_.add(prc_send.get(i));}
493 template <
typename>
class layout_base,
494 unsigned int ... prp>
499 typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type_ofp<T>::value>::number,
MetaFuncOrd>::result ind_prop_to_pack;
514 typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type_ofp<T>::value>::number,
MetaFuncOrd>::result ind_prop_to_pack;
526 template<
typename Memory>
527 static void unpacking(S & recv,
550 template <
typename>
class layout_base,
551 int ... prp>
static void execute(
D & recv,S & v2,
size_t i,
size_t opt)
553 if (opt & MPI_GPU_DIRECT)
555 #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT 558 recv.template add_prp_device<
typename T::value_type,
565 size_t old_size = recv.size();
568 recv.template add_prp<
typename T::value_type,
575 recv.template hostToDevice<prp...>(old_size,old_size+v2.size()-1);
583 recv.template add_prp<
typename T::value_type,
601 template <
typename>
class layout_base,
603 static void execute(
D & recv,S & v2,
size_t i,
size_t opt)
606 recv.template add_prp<
typename T::value_type,
608 typename T::grow_policy,
616 template<
typename op>
624 template <
typename>
class layout_base,
626 static void execute(
D & recv,S & v2,
size_t i,
size_t opt)
634 template<
bool sr,
template<
typename,
typename>
class op,
typename vector_type_opart>
641 template <
typename>
class layout_base,
643 inline static void execute(
D & recv,S & v2,
size_t i,vector_type_opart & opart)
646 recv.template merge_prp_v<op,
647 typename T::value_type,
651 typename vector_type_opart::value_type,
652 prp...>(v2,opart.get(i));
657 template<
template<
typename,
typename>
class op,
typename vector_type_opart>
664 template <
typename>
class layout_base,
666 inline static void execute(
D & recv,S & v2,
size_t i,vector_type_opart & opart)
669 recv.template merge_prp_v<op,
670 typename T::value_type,
674 typename vector_type_opart::value_type,
675 prp...>(v2,opart.get(i));
680 template<
template<
typename,
typename>
class op,
typename vector_type_opart>
696 template <
typename>
class layout_base,
705 template<
bool sr,
template<
typename,
typename>
class op,
typename vector_type_opart,
typename vector_type_prc_offset>
712 template <
typename>
class layout_base,
714 inline static void execute(
D & recv,S & v2,
size_t i,vector_type_opart & opart, vector_type_prc_offset & prc_off)
716 prc_off.template deviceToHost<0>();
718 unsigned int start = 0;
719 unsigned int stop = prc_off.template get<0>(i /
sizeof...(prp));
722 {start = prc_off.template get<0>(i /
sizeof...(prp)-1);}
725 recv.template merge_prp_v_device<op,
726 typename T::value_type,
731 prp...>(v2,opart,start,stop);
736 template<
template<
typename,
typename>
class op,
typename vector_type_opart,
typename vector_type_prc_offset>
743 template <
typename>
class layout_base,
745 inline static void execute(
D & recv,S & v2,
size_t i,vector_type_opart & opart, vector_type_prc_offset & prc_off)
747 std::cout << __FILE__ <<
":" << __LINE__ <<
" Error: not implemented" << std::endl;
752 template<
template<
typename,
typename>
class op,
typename vector_type_opart,
typename vector_type_prc_offset>
758 vector_type_prc_offset & prc_offset;
770 template <
typename>
class layout_base,
774 op_ssend_recv_merge_gpu_impl<sr,op,vector_type_opart,vector_type_prc_offset>::template execute<T,
D,S,layout_base,prp...>(recv,v2,i,
opart,prc_offset);
786 template <
typename>
class layout_base,
788 inline static void execute(
D & recv,S & v2,
size_t i,
size_t & start)
792 typename T::value_type,
810 template <
typename>
class layout_base,
812 inline static void execute(
D & recv,S & v2,
size_t i,
size_t & start)
816 typename T::value_type,
817 typename S::Memory_type,
822 recv.template hostToDevice<prp ...>(start,start+v2.size()-1);
836 template <
typename>
class layout_base,
838 inline static void execute(
D & recv,S & v2,
size_t i,
size_t & start)
841 recv.template merge_prp_device<
replace_,
842 typename T::value_type,
843 typename S::Memory_type,
859 template <
typename>
class layout_base,
860 int ... prp>
inline static void execute(
D & recv,S & v2,
size_t i,
size_t & start)
864 typename T::value_type,
866 typename S::grow_policy,
887 template<
bool sr,
typename T,
typename D,
typename S,
template<
typename>
class layout_base,
int ... prp>
void execute(
D & recv,S & v2,
size_t i,
size_t opt)
905 template<
bool sr,
typename T,
typename D,
typename S,
template<
typename>
class layout_base,
int ... prp>
void execute(
D & recv,S & v2,
size_t i,
size_t opt)
907 bool active = is_mpi_rdma_cuda_active();
size_t start
starting marker
vector_type_opart & opart
For each processor contain the list of the particles with which I must merge the information.
Derivative second order on h (spacing)
It override the behavior if size()
static void execute(D &recv, S &v2, size_t i, size_t opt)
Add data.
static void execute(D &recv, S &v2, size_t i, size_t opt)
Add data.
Helper class to merge data.
Grow policy define how the vector should grow every time we exceed the size.
op_ssend_gg_recv_merge_run_device(size_t start)
constructor
static void execute(D &recv, S &v2, size_t i, vector_type_opart &opart)
Merge the.
There is max_prop inside.
size_t start
starting marker
Helper class to merge data without serialization.
Helper class to merge data.
static void execute(D &recv, S &v2, size_t i, vector_type_opart &opart, vector_type_prc_offset &prc_off)
merge the data
openfpm::vector_fr< BMemory< Memory > > & recv_buf
Receive buffer.
Helper class to merge data without serialization direct transfer to CUDA buffer.
This class allocate, and destroy CPU memory.
op_ssend_gg_recv_merge(size_t start)
constructor
vector_type_opart & opart
For each processor contain the list of the particles with which I must merge the information.
Helper class to merge data without serialization, using host memory.
Implementation of 1-D std::vector like structure.
void execute(D &recv, S &v2, size_t i, size_t opt)
execute the merge
op_ssend_recv_merge_gpu(vector_type_opart &opart, vector_type_prc_offset &prc_offset)
constructor
Helper class to merge data without serialization.
static void execute(D &recv, S &v2, size_t i, size_t &start)
Merge the.
Grow policy define how the vector should grow every time we exceed the size.
static void execute(D &recv, S &v2, size_t i, vector_type_opart &opart)
merge the data
set_buf_size_for_each_prop(sT &v, openfpm::vector< size_t > &sz)
constructor
set_buf_pointer_for_each_prop(sT &v, openfpm::vector< const void * > &send_buf, size_t opt)
constructor
virtual size_t size() const
the the size of the allocated memory
void operator()(T &t) const
It call the copy function for each property.
It analyze the type given and it select correctly the implementation for vector.
void execute(D &recv, S &v2, size_t i, size_t opt)
execute the merge
virtual void decRef()
Decrement the reference counter.
static void execute(D &recv, S &v2, size_t i, size_t &start)
Merge the.
Helper class to merge data.
void execute(D &recv, S &v2, size_t i, size_t opt)
execute the merge
virtual void incRef()
Increment the reference counter.
static void execute(D &recv, S &v2, size_t i, size_t &start)
merge the data
virtual void incRef()
Increment the reference counter.
These set of classes generate an array definition at compile-time.
void operator()(T &t)
It call the copy function for each property.
static void execute(D &recv, S &v2, size_t i, vector_type_opart &opart, vector_type_prc_offset &prc_off)
Merge the.
void operator()(T &t) const
It call the copy function for each property.
openfpm::vector< typename sT::value_type, PtrMemory, layout_base, openfpm::grow_policy_identity > & v2
Fake vector that map over received memory.
static void execute(D &recv, S &v2, size_t i, size_t opt)
Add data.
this class is a functor for "for_each" algorithm
static void execute(D &recv, S &v2, size_t i, size_t &start)
Merge the.
It return true if the object T require complex serialization.
process_receive_mem_traits_inte(openfpm::vector< typename sT::value_type, PtrMemory, layout_base, openfpm::grow_policy_identity > &v2, openfpm::vector_fr< BMemory< Memory >> &recv_buf, size_t i, size_t opt)
constructor
this class is a functor for "for_each" algorithm
void operator()(T &t) const
It call the copy function for each property.
this class is a functor for "for_each" algorithm
virtual void decRef()
Decrement the reference counter.
Helper class to merge data without serialization.
Helper class to add data.
Helper class to merge data.
Helper class to add data without serialization.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
This structure define the operation add to use with copy general.
void execute(D &recv, S &v2, size_t i, size_t opt)
execute the merge
op_ssend_recv_merge(vector_type_opart &opart)
constructor
void * getPointerEnd()
Return the end pointer of the previous allocated memory.
T & get(size_t id)
Get an element of the vector.
unpack_each_prop_buffer(S &recv, openfpm::vector_fr< BMemory< HeapMemory >> &recv_buf, op &op_param, size_t i, openfpm::vector< size_t > *sz, openfpm::vector< size_t > *sz_byte)
constructor
This class give memory from a preallocated memory, memory destruction is not performed.