8#ifndef OPENFPM_VCLUSTER_SRC_VCLUSTER_VCLUSTER_META_FUNCTION_HPP_
9#define OPENFPM_VCLUSTER_SRC_VCLUSTER_VCLUSTER_META_FUNCTION_HPP_
11#include "memory/BHeapMemory.hpp"
12#include "Packer_Unpacker/has_max_prop.hpp"
18static inline bool is_mpi_rdma_cuda_active()
20#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
27template<
bool result,
typename T,
typename S,
template<
typename>
class layout_base,
typename Memory>
32 static void call_unpack(S & recv,
40 sz_byte->resize(recv_buf.size());
42 for (
size_t i = 0 ; i < recv_buf.size() ; i++)
53 size_t recv_size_old = recv.size();
56 op_param.template execute<
true,T,
decltype(recv),
decltype(unp),layout_base,prp...>(recv,unp,i,opt);
58 size_t recv_size_new = recv.size();
61 sz_byte->get(i) = recv_buf.get(i).
size();
63 sz->get(i) = recv_size_new - recv_size_old;
71template<
typename op,
typename Vt,
typename S,
template<
typename>
class layout_base,
typename v_mpl>
76 openfpm::vector_fr<BMemory<HeapMemory>> & recv_buf;
97 :recv(recv),recv_buf(recv_buf),op_param(op_param),i(i),sz(sz),sz_byte(sz_byte)
105 typedef typename boost::mpl::at<typename T::value_type::type,boost::mpl::int_<T::value> >::type prp_type;
108 typedef typename boost::mpl::at<v_mpl,boost::mpl::int_<T::value>>::type prp_num;
111 size_t n_ele = recv_buf.get(i).size() /
sizeof(prp_type);
114 PtrMemory * ptr1 =
new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size());
119 v2.template setMemory<prp_num::value>(*ptr1);
126 size_t recv_size_old = recv.
size();
128 op_param.template execute<false,T,decltype(recv),decltype(v2),layout_base,prp_num::value>(recv,v2,i);
130 size_t recv_size_new = recv.size();
133 sz_byte->get(i) = recv_buf.get(i).
size();
135 sz->get(i) = recv_size_new - recv_size_old;
150template<
typename sT,
template<
typename>
class layout_base,
typename Memory>
183 typedef typename boost::mpl::at<typename sT::value_type::type,T>::type type_prp;
186 this->n_ele =
recv_buf.get(
i).size() /
sizeof(type_prp);
190 if (opt & MPI_GPU_DIRECT)
192#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
206 v2.template setMemory<T::value>(*ptr1);
212template<
bool inte_or_lin,
typename T,
typename S,
template<
typename>
class layout_base,
typename Memory>
215 template<
typename op,
unsigned int ... prp>
static int call_unpack_impl(S & recv,
228 boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(prmti);
230 v2.resize(prmti.n_ele);
234 size_t recv_size_old = recv.
size();
236 op_param.template execute<
false,T,
decltype(recv),
decltype(v2),layout_base,prp...>(recv,v2,i,opt);
238 size_t recv_size_new = recv.size();
241 sz_byte->get(i) = recv_buf.get(i).
size();
243 sz->get(i) = recv_size_new - recv_size_old;
245 return sizeof...(prp);
249template<
typename T,
typename S,
template<
typename>
class layout_base,
typename Memory>
252 template<
typename op,
unsigned int ... prp>
static int call_unpack_impl(S & recv,
261 size_t n_ele = recv_buf.get(i).size() /
sizeof(
typename T::value_type);
264 PtrMemory * ptr1 =
new PtrMemory(recv_buf.get(i).getPointer(),recv_buf.get(i).size());
278 size_t recv_size_old = recv.
size();
280 op_param.template execute<
false,T,
decltype(recv),
decltype(v2),layout_base,prp...>(recv,v2,i,opt);
282 size_t recv_size_new = recv.size();
285 sz_byte->get(i) = recv_buf.get(i).
size();
287 sz->get(i) = recv_size_new - recv_size_old;
299template<
typename T,
typename S,
template<
typename>
class layout_base,
typename Memory>
302 template<
typename op,
unsigned int ... prp>
static void call_unpack(S & recv,
310 sz_byte->resize(recv_buf.size());
312 for (
size_t i = 0 ; i < recv_buf.size() ; )
314 i +=
unpack_selector_with_prp_lin<is_layout_mlin<layout_base<dummy_type>>::value,T,S,layout_base,Memory>::template call_unpack_impl<op,prp...>(recv,recv_buf,sz,sz_byte,op_param,i,opt);
326 template<
typename T>
inline static void call_pr(T & send,
size_t & tot_size)
336 template<
typename op,
typename T,
typename S,
template<
typename>
class layout_base,
typename Memory>
337 inline static void call_unpack(S & recv,
346 unpack_selector_with_prp<result, T, S,layout_base,Memory>::template call_unpack<op,prp...>(recv, recv_buf, sz, sz_byte, op_param,opt);
376 :
v(
v),send_buf(send_buf),opt(opt)
384 if (opt & MPI_GPU_DIRECT)
386#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
387 send_buf.add(
v.template getDeviceBuffer<T::value>());
389 v.template deviceToHost<T::value>();
390 send_buf.add(
v.template getPointer<T::value>());
395 send_buf.add(
v.template getPointer<T::value>());
432 typedef typename boost::mpl::at<typename sT::value_type::type,T>::type type_prp;
434 sz.add(
sizeof(type_prp)*
v.size());
438template<typename T, bool impl = is_multiple_buffer_each_prp<T>::value >
443 send_buf.add(send.getPointer());
448 sz.add(send.size()*
sizeof(
typename T::value_type));
453 for (
size_t i = 0 ; i < prc_send.
size() ; i++)
455 prc_send_.add(prc_send.get(i));
468 boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(sbp);
475 boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::value_type::max_prop>>(sbp);
480 for (
size_t i = 0 ; i < prc_send.
size() ; i++)
482 for (
size_t j = 0 ; j < T::value_type::max_prop ; j++)
483 {prc_send_.add(prc_send.get(i));}
493 template <
typename>
class layout_base,
494 unsigned int ... prp>
499 typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type_ofp<T>::value>::number,
MetaFuncOrd>::result ind_prop_to_pack;
514 typedef typename ::generate_indexes<int, has_max_prop<T, has_value_type_ofp<T>::value>::number,
MetaFuncOrd>::result ind_prop_to_pack;
526 template<
typename Memory>
527 static void unpacking(S & recv,
550 template <
typename>
class layout_base,
551 int ... prp>
static void execute(
D & recv,S & v2,
size_t i,
size_t opt)
553 if (opt & MPI_GPU_DIRECT)
555#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
558 recv.template add_prp_device<
typename T::value_type,
565 size_t old_size = recv.size();
568 recv.template add_prp<
typename T::value_type,
575 recv.template hostToDevice<prp...>(old_size,old_size+v2.size()-1);
583 recv.template add_prp<
typename T::value_type,
601 template <
typename>
class layout_base,
603 static void execute(
D & recv,S & v2,
size_t i,
size_t opt)
606 recv.template add_prp<
typename T::value_type,
608 typename T::grow_policy,
624 template <
typename>
class layout_base,
626 static void execute(
D & recv,S & v2,
size_t i,
size_t opt)
634template<
bool sr,
template<
typename,
typename>
class op,
typename vector_type_opart>
641 template <
typename>
class layout_base,
643 inline static void execute(
D & recv,S & v2,
size_t i,vector_type_opart & opart)
646 recv.template merge_prp_v<op,
647 typename T::value_type,
651 typename vector_type_opart::value_type,
652 prp...>(v2,opart.get(i));
657template<
template<
typename,
typename>
class op,
typename vector_type_opart>
664 template <
typename>
class layout_base,
666 inline static void execute(
D & recv,S & v2,
size_t i,vector_type_opart & opart)
669 recv.template merge_prp_v<op,
670 typename T::value_type,
674 typename vector_type_opart::value_type,
675 prp...>(v2,opart.get(i));
680template<
template<
typename,
typename>
class op,
typename vector_type_opart>
696 template <
typename>
class layout_base,
705template<
bool sr,
template<
typename,
typename>
class op,
typename vector_type_opart,
typename vector_type_prc_offset>
712 template <
typename>
class layout_base,
714 inline static void execute(
D & recv,S & v2,
size_t i,vector_type_opart & opart, vector_type_prc_offset & prc_off)
716 prc_off.template deviceToHost<0>();
718 unsigned int start = 0;
719 unsigned int stop = prc_off.template get<0>(i /
sizeof...(prp));
722 {start = prc_off.template get<0>(i /
sizeof...(prp)-1);}
725 recv.template merge_prp_v_device<op,
726 typename T::value_type,
731 prp...>(v2,opart,start,stop);
736template<
template<
typename,
typename>
class op,
typename vector_type_opart,
typename vector_type_prc_offset>
743 template <
typename>
class layout_base,
745 inline static void execute(
D & recv,S & v2,
size_t i,vector_type_opart & opart, vector_type_prc_offset & prc_off)
747 std::cout << __FILE__ <<
":" << __LINE__ <<
" Error: not implemented" << std::endl;
752template<
template<
typename,
typename>
class op,
typename vector_type_opart,
typename vector_type_prc_offset>
758 vector_type_prc_offset & prc_offset;
770 template <
typename>
class layout_base,
774 op_ssend_recv_merge_gpu_impl<sr,op,vector_type_opart,vector_type_prc_offset>::template execute<T,
D,S,layout_base,prp...>(recv,v2,i,
opart,prc_offset);
786 template <
typename>
class layout_base,
788 inline static void execute(
D & recv,S & v2,
size_t i,
size_t & start)
792 typename T::value_type,
810 template <
typename>
class layout_base,
812 inline static void execute(
D & recv,S & v2,
size_t i,
size_t & start)
816 typename T::value_type,
817 typename S::Memory_type,
822 recv.template hostToDevice<prp ...>(start,start+v2.size()-1);
836 template <
typename>
class layout_base,
838 inline static void execute(
D & recv,S & v2,
size_t i,
size_t & start)
841 recv.template merge_prp_device<
replace_,
842 typename T::value_type,
843 typename S::Memory_type,
859 template <
typename>
class layout_base,
860 int ... prp>
inline static void execute(
D & recv,S & v2,
size_t i,
size_t & start)
864 typename T::value_type,
866 typename S::grow_policy,
887 template<
bool sr,
typename T,
typename D,
typename S,
template<
typename>
class layout_base,
int ... prp>
void execute(
D & recv,S & v2,
size_t i,
size_t opt)
905 template<
bool sr,
typename T,
typename D,
typename S,
template<
typename>
class layout_base,
int ... prp>
void execute(
D & recv,S & v2,
size_t i,
size_t opt)
907 bool active = is_mpi_rdma_cuda_active();
It override the behavior if size()
Derivative second order on h (spacing)
virtual void decRef()
Decrement the reference counter.
void * getPointerEnd()
Return the end pointer of the previous allocated memory.
virtual void incRef()
Increment the reference counter.
This class allocate, and destroy CPU memory.
This class give memory from a preallocated memory, memory destruction is not performed.
virtual void decRef()
Decrement the reference counter.
virtual void incRef()
Increment the reference counter.
Grow policy define how the vector should grow every time we exceed the size.
Grow policy define how the vector should grow every time we exceed the size.
Implementation of 1-D std::vector like structure.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
It return true if the object T require complex serialization.
These set of classes generate an array definition at compile-time.
static void execute(D &recv, S &v2, size_t i, size_t &start)
merge the data
Helper class to merge data without serialization direct transfer to CUDA buffer.
static void execute(D &recv, S &v2, size_t i, size_t &start)
Merge the.
Helper class to merge data without serialization, using host memory.
static void execute(D &recv, S &v2, size_t i, size_t &start)
Merge the.
Helper class to merge data without serialization.
static void execute(D &recv, S &v2, size_t i, size_t &start)
Merge the.
Helper class to merge data.
op_ssend_gg_recv_merge_run_device(size_t start)
constructor
size_t start
starting marker
void execute(D &recv, S &v2, size_t i, size_t opt)
execute the merge
Helper class to merge data.
size_t start
starting marker
void execute(D &recv, S &v2, size_t i, size_t opt)
execute the merge
op_ssend_gg_recv_merge(size_t start)
constructor
static void execute(D &recv, S &v2, size_t i, size_t opt)
Add data.
Helper class to add data without serialization.
static void execute(D &recv, S &v2, size_t i, size_t opt)
Add data.
Helper class to add data.
static void execute(D &recv, S &v2, size_t i, size_t opt)
Add data.
static void execute(D &recv, S &v2, size_t i, vector_type_opart &opart, vector_type_prc_offset &prc_off)
merge the data
Helper class to merge data without serialization.
static void execute(D &recv, S &v2, size_t i, vector_type_opart &opart, vector_type_prc_offset &prc_off)
Merge the.
Helper class to merge data.
void execute(D &recv, S &v2, size_t i, size_t opt)
execute the merge
op_ssend_recv_merge_gpu(vector_type_opart &opart, vector_type_prc_offset &prc_offset)
constructor
vector_type_opart & opart
For each processor contain the list of the particles with which I must merge the information.
static void execute(D &recv, S &v2, size_t i, vector_type_opart &opart)
merge the data
Helper class to merge data without serialization.
static void execute(D &recv, S &v2, size_t i, vector_type_opart &opart)
Merge the.
Helper class to merge data.
vector_type_opart & opart
For each processor contain the list of the particles with which I must merge the information.
op_ssend_recv_merge(vector_type_opart &opart)
constructor
void execute(D &recv, S &v2, size_t i, size_t opt)
execute the merge
It analyze the type given and it select correctly the implementation for vector.
There is max_prop inside.
this class is a functor for "for_each" algorithm
process_receive_mem_traits_inte(openfpm::vector< typename sT::value_type, PtrMemory, layout_base, openfpm::grow_policy_identity > &v2, openfpm::vector_fr< BMemory< Memory > > &recv_buf, size_t i, size_t opt)
constructor
openfpm::vector< typename sT::value_type, PtrMemory, layout_base, openfpm::grow_policy_identity > & v2
Fake vector that map over received memory.
openfpm::vector_fr< BMemory< Memory > > & recv_buf
Receive buffer.
void operator()(T &t)
It call the copy function for each property.
This structure define the operation add to use with copy general.
this class is a functor for "for_each" algorithm
set_buf_pointer_for_each_prop(sT &v, openfpm::vector< const void * > &send_buf, size_t opt)
constructor
void operator()(T &t) const
It call the copy function for each property.
this class is a functor for "for_each" algorithm
void operator()(T &t) const
It call the copy function for each property.
set_buf_size_for_each_prop(sT &v, openfpm::vector< size_t > &sz)
constructor
unpack_each_prop_buffer(S &recv, openfpm::vector_fr< BMemory< HeapMemory > > &recv_buf, op &op_param, size_t i, openfpm::vector< size_t > *sz, openfpm::vector< size_t > *sz_byte)
constructor
void operator()(T &t) const
It call the copy function for each property.