This class virtualize the cluster of PC as a set of processes that communicate. More...

Detailed Description

template<typename InternalMemory>
class Vcluster_base< InternalMemory >

This class virtualize the cluster of PC as a set of processes that communicate.

At the moment it is an MPI-like interface, with a more type aware, and simple, interface. It also give some more complex communication functionalities like Dynamic Sparse Data Exchange

Actually VCluster expose a Computation driven parallelism (MPI-like), with a plan of extending to communication driven parallelism

In computation driven parallelism, the program compute than communicate to the other processors
In a communication driven parallelism, (Charm++ or HPX), the program receive messages, this receiving messages trigger computation

An example of sending and receive plain buffers

 
    // Send to 8 processors
    for (size_t i = 0 ; i < 8 ; i++)
        vcl.send( mod(vcl.getProcessUnitID() + i * P_STRIDE, vcl.getProcessingUnits()) ,i,v_send.getPointer(),v_send.size()*sizeof(T));
 
    openfpm::vector<openfpm::vector<T> > pt_buf;
    pt_buf.resize(8);
 
    // Recv from 8 processors
    for (size_t i = 0 ; i < 8 ; i++)
    {
        pt_buf.get(i).resize(n);
        vcl.recv( mod( (vcl.getProcessUnitID() - i * P_STRIDE), vcl.getProcessingUnits()) ,i,pt_buf.get(i).getPointer(),pt_buf.get(i).size()*sizeof(T));
    }
 
    vcl.execute();
 

An example of sending vectors of primitives with (T=float,double,lont int,...)

 
    // Send to 8 processors
    for (size_t i = 0 ; i < 8 ; i++)
        vcl.send( mod(vcl.getProcessUnitID() + i * P_STRIDE, vcl.getProcessingUnits()) ,i,v_send);
 
    openfpm::vector<openfpm::vector<T> > pt_buf;
    pt_buf.resize(8);
 
    // Recv from 8 processors
    for (size_t i = 0 ; i < 8 ; i++)
    {
        pt_buf.get(i).resize(n);
        vcl.recv( mod( (vcl.getProcessUnitID() - i * P_STRIDE), vcl.getProcessingUnits()) ,i,pt_buf.get(i));
    }
 
    vcl.execute();
 

An example of sending vectors of complexes object

 
    // Point test typedef
    typedef Point_test<float> p;
 
    openfpm::vector<Point_test<float>> v_send = allocate_openfpm_fill(n,vcl.getProcessUnitID());
 
    // Send to 8 processors
    for (size_t i = 0 ; i < 8 ; i++)
        vcl.send( mod(vcl.getProcessUnitID() + i * P_STRIDE, vcl.getProcessingUnits()) ,i,v_send);
 
    openfpm::vector<openfpm::vector<Point_test<float>> > pt_buf;
    pt_buf.resize(8);
 
    // Recv from 8 processors
    for (size_t i = 0 ; i < 8 ; i++)
    {
        pt_buf.get(i).resize(n);
        vcl.recv( mod( (vcl.getProcessUnitID() - i * P_STRIDE), vcl.getProcessingUnits()) ,i,pt_buf.get(i));
    }
 
    vcl.execute();
 

An example of gathering numbers from all processors

 
    openfpm::vector<T> clt;
    T data = vcl.getProcessUnitID();
 
    vcl.allGather(data,clt);
    vcl.execute();
 
    for (size_t i = 0 ; i < vcl.getProcessingUnits() ; i++)
        BOOST_REQUIRE_EQUAL(i,(size_t)clt.get(i));
 

Definition at line 125 of file VCluster_base.hpp.

#include <VCluster_base.hpp>

Inheritance diagram for Vcluster_base< InternalMemory >:

Public Member Functions
	Vcluster_base (int argc, char **argv)
	Virtual cluster constructor.

gpu::ofp_context_t &	getgpuContext (bool iw=true)
	If nvidia cuda is activated return a gpu context.

MPI_Comm	getMPIComm ()
	Get the MPI_Communicator (or processor group) this VCluster is using.

size_t	getProcessingUnits ()
	Get the total number of processors.

size_t	size ()
	Get the total number of processors.

void	print_stats ()

void	clear_stats ()

size_t	getProcessUnitID ()
	Get the process unit id.

size_t	rank ()
	Get the process unit id.

template<typename T >
void	sum (T &num)
	Sum the numbers across all processors and get the result.

template<typename T >
void	max (T &num)
	Get the maximum number across all processors (or reduction with infinity norm)

template<typename T >
void	min (T &num)
	Get the minimum number across all processors (or reduction with insinity norm)

void	progressCommunication ()
	In case of Asynchonous communications like sendrecvMultipleMessagesNBXAsync this function progress the communication.

template<typename T >
void	sendrecvMultipleMessagesNBX (openfpm::vector< size_t > &prc, openfpm::vector< T > &data, openfpm::vector< size_t > &prc_recv, openfpm::vector< size_t > &recv_sz, void (msg_alloc)(size_t, size_t, size_t, size_t, size_t, size_t, void ), void ptr_arg, long int opt=NONE)
	Send and receive multiple messages.

template<typename T >
void	sendrecvMultipleMessagesNBXAsync (openfpm::vector< size_t > &prc, openfpm::vector< T > &data, openfpm::vector< size_t > &prc_recv, openfpm::vector< size_t > &recv_sz, void (msg_alloc)(size_t, size_t, size_t, size_t, size_t, size_t, void ), void ptr_arg, long int opt=NONE)
	Send and receive multiple messages asynchronous version.

template<typename T >
void	sendrecvMultipleMessagesNBX (openfpm::vector< size_t > &prc, openfpm::vector< T > &data, void (msg_alloc)(size_t, size_t, size_t, size_t, size_t, size_t, void ), void ptr_arg, long int opt=NONE)
	Send and receive multiple messages.

template<typename T >
void	sendrecvMultipleMessagesNBXAsync (openfpm::vector< size_t > &prc, openfpm::vector< T > &data, void (msg_alloc)(size_t, size_t, size_t, size_t, size_t, size_t, void ), void ptr_arg, long int opt=NONE)
	Send and receive multiple messages asynchronous version.

void	sendrecvMultipleMessagesNBX (size_t n_send, size_t sz[], size_t prc[], void ptr[], size_t n_recv, size_t prc_recv[], size_t sz_recv[], void (msg_alloc)(size_t, size_t, size_t, size_t, size_t, size_t, void ), void *ptr_arg, long int opt=NONE)
	Send and receive multiple messages.

void	sendrecvMultipleMessagesNBXAsync (size_t n_send, size_t sz[], size_t prc[], void ptr[], size_t n_recv, size_t prc_recv[], size_t sz_recv[], void (msg_alloc)(size_t, size_t, size_t, size_t, size_t, size_t, void ), void *ptr_arg, long int opt=NONE)
	Send and receive multiple messages asynchronous version.

void	sendrecvMultipleMessagesNBX (size_t n_send, size_t sz[], size_t prc[], void ptr[], size_t n_recv, size_t prc_recv[], void (msg_alloc)(size_t, size_t, size_t, size_t, size_t, size_t, void ), void *ptr_arg, long int opt=NONE)
	Send and receive multiple messages.

void	sendrecvMultipleMessagesNBXAsync (size_t n_send, size_t sz[], size_t prc[], void ptr[], size_t n_recv, size_t prc_recv[], void (msg_alloc)(size_t, size_t, size_t, size_t, size_t, size_t, void ), void *ptr_arg, long int opt=NONE)
	Send and receive multiple messages asynchronous version.

void	sendrecvMultipleMessagesNBX (size_t n_send, size_t sz[], size_t prc[], void ptr[], void (msg_alloc)(size_t, size_t, size_t, size_t, size_t, size_t, void ), void *ptr_arg, long int opt=NONE)
	Send and receive multiple messages.

void	sendrecvMultipleMessagesNBXAsync (size_t n_send, size_t sz[], size_t prc[], void ptr[], void (msg_alloc)(size_t, size_t, size_t, size_t, size_t, size_t, void ), void *ptr_arg, long int opt=NONE)
	Send and receive multiple messages Asynchronous version.

void	sendrecvMultipleMessagesNBXWait ()
	Send and receive multiple messages wait NBX communication to complete.

bool	send (size_t proc, size_t tag, const void *mem, size_t sz)
	Send data to a processor.

template<typename T , typename Mem , template< typename > class gr>
bool	send (size_t proc, size_t tag, openfpm::vector< T, Mem, gr > &v)
	Send data to a processor.

bool	recv (size_t proc, size_t tag, void *v, size_t sz)
	Recv data from a processor.

template<typename T , typename Mem , template< typename > class gr>
bool	recv (size_t proc, size_t tag, openfpm::vector< T, Mem, gr > &v)
	Recv data from a processor.

template<typename T , typename Mem , template< typename > class gr>
bool	allGather (T &send, openfpm::vector< T, Mem, gr > &v)
	Gather the data from all processors.

template<typename T , typename Mem , template< typename > class layout_base>
bool	Bcast (openfpm::vector< T, Mem, layout_base > &v, size_t root)
	Broadcast the data to all processors.

void	execute ()
	Execute all the requests.

void	clear ()
	Release the buffer used for communication.

Data Fields
openfpm::vector< size_t >	sz_recv_tmp

Protected Attributes
openfpm::vector_fr< BMemory< InternalMemory > >	recv_buf [NQUEUE]
	Receive buffers.

openfpm::vector< size_t >	tags [NQUEUE]
	tags receiving

Private Member Functions
Vcluster_base &	operator= (const Vcluster_base &)
	disable operator=

	Vcluster_base (const Vcluster_base &)
	disable copy constructor

void	queue_all_sends (size_t n_send, size_t sz[], size_t prc[], void *ptr[])

Private Attributes
Vcluster_log	log
	log file

openfpm::vector< size_t >	proc_com

openfpm::vector< int >	map_scatter
	vector that contain the scatter map (it is basically an array of one)

openfpm::vector< MPI_Request >	req
	vector of MPI requests

openfpm::vector< MPI_Status >	stat
	vector of MPI status

std::vector< int >	post_exe
	vector of functions to execute after all the request has been performed

gpu::ofp_context_t *	context
	standard context for gpu (if cuda is detected otherwise is unused)

int	m_size
	number of processes

int	m_rank
	actual rank

int	numPE = 1
	number of processing unit per process

NBX_Type	NBX_active [NQUEUE]

size_t	rid [NQUEUE]
	request id

int	NBX_prc_qcnt = -1
	NBX comunication on queue (-1 mean 0, 0 mean 1, 1 mean 2, .... )

bool	NBX_prc_reached_bar_req [NQUEUE]
	Is the barrier request reached.

int	NBX_prc_cnt_base = 0

size_t	NBX_prc_n_send [NQUEUE]

size_t *	NBX_prc_prc [NQUEUE]

void **	NBX_prc_ptr [NQUEUE]

size_t *	NBX_prc_sz [NQUEUE]

size_t	NBX_prc_n_recv [NQUEUE]

void (	NBX_prc_msg_alloc [NQUEUE])(size_t, size_t, size_t, size_t, size_t, size_t, void *)

size_t *	NBX_prc_prc_recv [NQUEUE]

void *	NBX_prc_ptr_arg [NQUEUE]

std::vector< red >	r

openfpm::vector< void * >	ptr_send [NQUEUE]
	vector of pointers of send buffers

openfpm::vector< size_t >	sz_send [NQUEUE]
	vector of the size of send buffers

MPI_Request	bar_req
	barrier request

MPI_Status	bar_stat
	barrier status

int	shmrank
	rank within the node

int	nbx_cycle
	NBX_cycle.

Constructor & Destructor Documentation

◆ Vcluster_base() [1/2]

template<typename InternalMemory >

Vcluster_base< InternalMemory >::Vcluster_base ( const Vcluster_base< InternalMemory > & )

inlineprivate

disable copy constructor

Definition at line 217 of file VCluster_base.hpp.

◆ ~Vcluster_base()

template<typename InternalMemory >

Vcluster_base< InternalMemory >::~Vcluster_base ( )

inline

Definition at line 268 of file VCluster_base.hpp.

◆ Vcluster_base() [2/2]

template<typename InternalMemory >

Vcluster_base< InternalMemory >::Vcluster_base	(	int *	argc,
		char ***	argv
	)

inline

Virtual cluster constructor.

Parameters

argc	pointer to arguments counts passed to the program
argv	pointer to arguments vector passed to the program

Definition at line 299 of file VCluster_base.hpp.

Member Function Documentation

◆ allGather()

template<typename InternalMemory >

template<typename T , typename Mem , template< typename > class gr>

bool Vcluster_base< InternalMemory >::allGather	(	T &	send,
		openfpm::vector< T, Mem, gr > &	v
	)

inline

Gather the data from all processors.

send a primitive data T receive the same primitive T from all the other processors

Warning: operation is asynchronous execute must be called to ensure they are executed

Parameters

v	vector to receive (automaticaly resized)
send	data to send

Returns: true if succeed false otherwise

Definition at line 1705 of file VCluster_base.hpp.

◆ Bcast()

template<typename InternalMemory >

template<typename T , typename Mem , template< typename > class layout_base>

bool Vcluster_base< InternalMemory >::Bcast	(	openfpm::vector< T, Mem, layout_base > &	v,
		size_t	root
	)

inline

Broadcast the data to all processors.

broadcast a vector of primitives.

Warning: operation is asynchronous execute must be called to ensure the operation is executed; the non-root processor must resize the vector to the exact receive size. This mean the each processor must known a priory the receiving size

Parameters

v	vector to send in the case of the root processor and vector where to receive in the case of non-root
root	processor (who broadcast)

Returns: true if succeed false otherwise

Definition at line 1740 of file VCluster_base.hpp.

◆ clear()

template<typename InternalMemory >

void Vcluster_base< InternalMemory >::clear ( )

inline

Release the buffer used for communication.

Definition at line 1774 of file VCluster_base.hpp.

◆ clear_stats()

template<typename InternalMemory >

void Vcluster_base< InternalMemory >::clear_stats ( )

inline

Definition at line 515 of file VCluster_base.hpp.

◆ execute()

template<typename InternalMemory >

void Vcluster_base< InternalMemory >::execute ( )

inline

Execute all the requests.

Definition at line 1754 of file VCluster_base.hpp.

◆ getgpuContext()

template<typename InternalMemory >

gpu::ofp_context_t & Vcluster_base< InternalMemory >::getgpuContext ( bool iw = true )

inline

If nvidia cuda is activated return a gpu context.

Parameters

iw	ignore warning

Definition at line 453 of file VCluster_base.hpp.

◆ getMPIComm()

template<typename InternalMemory >

MPI_Comm Vcluster_base< InternalMemory >::getMPIComm ( )

inline

Get the MPI_Communicator (or processor group) this VCluster is using.

Returns: MPI comunicator

Definition at line 469 of file VCluster_base.hpp.

◆ getProcessingUnits()

template<typename InternalMemory >

size_t Vcluster_base< InternalMemory >::getProcessingUnits ( )

inline

Get the total number of processors.

Returns: the total number of processors

Definition at line 479 of file VCluster_base.hpp.

◆ getProcessUnitID()

template<typename InternalMemory >

size_t Vcluster_base< InternalMemory >::getProcessUnitID ( )

inline

Get the process unit id.

Returns: the process ID (rank in MPI)

Definition at line 535 of file VCluster_base.hpp.

◆ max()

template<typename InternalMemory >

template<typename T >

void Vcluster_base< InternalMemory >::max ( T & num )

inline

Get the maximum number across all processors (or reduction with infinity norm)

Parameters

num to reduce

Definition at line 581 of file VCluster_base.hpp.

◆ min()

template<typename InternalMemory >

template<typename T >

void Vcluster_base< InternalMemory >::min ( T & num )

inline

Get the minimum number across all processors (or reduction with insinity norm)

Parameters

num to reduce

Definition at line 601 of file VCluster_base.hpp.

◆ operator=()

template<typename InternalMemory >

Vcluster_base & Vcluster_base< InternalMemory >::operator= ( const Vcluster_base< InternalMemory > & )

inlineprivate

disable operator=

Definition at line 208 of file VCluster_base.hpp.

◆ print_stats()

template<typename InternalMemory >

void Vcluster_base< InternalMemory >::print_stats ( )

inline

Definition at line 498 of file VCluster_base.hpp.

◆ progressCommunication()

template<typename InternalMemory >

void Vcluster_base< InternalMemory >::progressCommunication ( )

inline

In case of Asynchonous communications like sendrecvMultipleMessagesNBXAsync this function progress the communication.

Definition at line 620 of file VCluster_base.hpp.

◆ queue_all_sends()

template<typename InternalMemory >

void Vcluster_base< InternalMemory >::queue_all_sends	(	size_t	n_send,
		size_t	sz[],
		size_t	prc[],
		void *	ptr[]
	)

inlineprivate

Definition at line 220 of file VCluster_base.hpp.

◆ rank()

template<typename InternalMemory >

size_t Vcluster_base< InternalMemory >::rank ( )

inline

Get the process unit id.

It is the same as getProcessUnitID()

See also: getProcessUnitID()

Returns: the process ID

Definition at line 549 of file VCluster_base.hpp.

◆ recv() [1/2]

template<typename InternalMemory >

template<typename T , typename Mem , template< typename > class gr>

bool Vcluster_base< InternalMemory >::recv	(	size_t	proc,
		size_t	tag,
		openfpm::vector< T, Mem, gr > &	v
	)

inline

Recv data from a processor.

Warning: In order to avoid deadlock every recv must be coupled with a send in case you want to send data without knowledge from the other side consider to use sendrecvMultipleMessagesNBX; operation is asynchronous execute must be called to ensure they are executed

See also: sendrecvMultipleMessagesNBX

Parameters

proc	processor id
tag	id
v	vector to send

Returns: true if succeed false otherwise

Definition at line 1676 of file VCluster_base.hpp.

◆ recv() [2/2]

template<typename InternalMemory >

bool Vcluster_base< InternalMemory >::recv	(	size_t	proc,
		size_t	tag,
		void *	v,
		size_t	sz
	)

inline

Recv data from a processor.

Warning: In order to avoid deadlock every recv must be coupled with a send in case you want to send data without knowledge from the other side consider to use or sendrecvMultipleMessagesNBX; operation is asynchronous execute must be called to ensure they are executed

See also: sendrecvMultipleMessagesNBX

Parameters

proc	processor id
tag	id
v	buffer to send
sz	size of the buffer

Returns: true if succeed false otherwise

Definition at line 1646 of file VCluster_base.hpp.

◆ send() [1/2]

template<typename InternalMemory >

bool Vcluster_base< InternalMemory >::send	(	size_t	proc,
		size_t	tag,
		const void *	mem,
		size_t	sz
	)

inline

Send data to a processor.

Warning: In order to avoid deadlock every send must be coupled with a recv in case you want to send data without knowledge from the other side consider to use sendRecvMultipleMessages; operation is asynchronous execute must be called to ensure they are executed

See also: sendRecvMultipleMessages

Parameters

proc	processor id
tag	id
mem	buffer with the data to send
sz	size

Returns: true if succeed false otherwise

Definition at line 1580 of file VCluster_base.hpp.

◆ send() [2/2]

template<typename InternalMemory >

template<typename T , typename Mem , template< typename > class gr>

bool Vcluster_base< InternalMemory >::send	(	size_t	proc,
		size_t	tag,
		openfpm::vector< T, Mem, gr > &	v
	)

inline

Send data to a processor.

Warning: In order to avoid deadlock every send must be coupled with a recv in case you want to send data without knowledge from the other side consider to use sendRecvMultipleMessages; operation is asynchronous execute must be called to ensure they are executed

See also: sendRecvMultipleMessages

Parameters

proc	processor id
tag	id
v	buffer to send

Returns: true if succeed false otherwise

Definition at line 1611 of file VCluster_base.hpp.

◆ sendrecvMultipleMessagesNBX() [1/5]

template<typename InternalMemory >

template<typename T >

void Vcluster_base< InternalMemory >::sendrecvMultipleMessagesNBX	(	openfpm::vector< size_t > &	prc,
		openfpm::vector< T > &	data,
		openfpm::vector< size_t > &	prc_recv,
		openfpm::vector< size_t > &	recv_sz,
		void ()(size_t, size_t, size_t, size_t, size_t, size_t, void *)	msg_alloc,
		void *	ptr_arg,
		long int	opt = `NONE`
	)

inline

Send and receive multiple messages.

It send multiple messages to a set of processors the and receive multiple messages from another set of processors, all the processor must call this function. In this particular case the receiver know from which processor is going to receive.

suppose the following situation the calling processor want to communicate

2 messages of size 100 byte to processor 1
1 message of size 50 byte to processor 6
1 message of size 48 byte to processor 7
1 message of size 70 byte to processor 8

Parameters

prc	list of processor with which it should communicate [1,1,6,7,8]
data	data to send for each processors in contain a pointer to some type T this type T must have a method size() that return the size of the data-structure
prc_recv	processor that receive data
recv_sz	for each processor indicate the size of the data received
msg_alloc	This is a call-back with the purpose of allocate space for the incoming message and give back a valid pointer, supposing that this call-back has been triggered by the processor of id 5 that want to communicate with me a message of size 100 byte the call-back will have the following 6 parameters in the call-back are in order: message size required to receive the message [100] total message size to receive from all the processors (NBX does not provide this information) the total number of processor want to communicate with you (NBX does not provide this information) processor id [5] ri request id (it is an id that goes from 0 to total_p, and is incremented every time message_alloc is called) void pointer, parameter for additional data to pass to the call-back
ptr_arg	data passed to the call-back function specified
opt	options, NONE (ignored in this moment)

Definition at line 763 of file VCluster_base.hpp.

◆ sendrecvMultipleMessagesNBX() [2/5]

template<typename InternalMemory >

template<typename T >

void Vcluster_base< InternalMemory >::sendrecvMultipleMessagesNBX	(	openfpm::vector< size_t > &	prc,
		openfpm::vector< T > &	data,
		void ()(size_t, size_t, size_t, size_t, size_t, size_t, void *)	msg_alloc,
		void *	ptr_arg,
		long int	opt = `NONE`
	)

inline

Send and receive multiple messages.

It send multiple messages to a set of processors the and receive multiple messages from another set of processors, all the processor must call this function

suppose the following situation the calling processor want to communicate

2 vector of 100 integers to processor 1
1 vector of 50 integers to processor 6
1 vector of 48 integers to processor 7
1 vector of 70 integers to processor 8

Parameters

prc	list of processors you should communicate with [1,1,6,7,8]
data	vector containing the data to send [v=vector<vector<int>>, v.size()=4, T=vector<int>], T at the moment is only tested for vectors of 0 or more generic elements (without pointers)
msg_alloc	This is a call-back with the purpose to allocate space for the incoming messages and give back a valid pointer, supposing that this call-back has been triggered by the processor of id 5 that want to communicate with me a message of size 100 byte the call-back will have the following 6 parameters in the call-back in order: message size required to receive the message (100) total message size to receive from all the processors (NBX does not provide this information) the total number of processor want to communicate with you (NBX does not provide this information) processor id (5) ri request id (it is an id that goes from 0 to total_p, and is incremented every time message_alloc is called) void pointer, parameter for additional data to pass to the call-back
ptr_arg	data passed to the call-back function specified
opt	options, only NONE supported

Definition at line 911 of file VCluster_base.hpp.

◆ sendrecvMultipleMessagesNBX() [3/5]

template<typename InternalMemory >

void Vcluster_base< InternalMemory >::sendrecvMultipleMessagesNBX	(	size_t	n_send,
		size_t	sz[],
		size_t	prc[],
		void *	ptr[],
		size_t	n_recv,
		size_t	prc_recv[],
		size_t	sz_recv[],
		void ()(size_t, size_t, size_t, size_t, size_t, size_t, void *)	msg_alloc,
		void *	ptr_arg,
		long int	opt = `NONE`
	)

inline

Send and receive multiple messages.

It send multiple messages to a set of processors the and receive multiple messages from another set of processors, all the processor must call this function. In this particular case the receiver know from which processor is going to receive.

Warning: this function only work with one send for each processor

suppose the following situation the calling processor want to communicate

2 messages of size 100 byte to processor 1
1 message of size 50 byte to processor 6
1 message of size 48 byte to processor 7
1 message of size 70 byte to processor 8

Parameters

n_send	number of send for this processor [4]
prc	list of processor with which it should communicate [1,1,6,7,8]
sz	the array contain the size of the message for each processor (zeros must not be presents) [100,100,50,48,70]
ptr	array that contain the pointers to the message to send
msg_alloc	This is a call-back with the purpose of allocate space for the incoming message and give back a valid pointer, supposing that this call-back has been triggered by the processor of id 5 that want to communicate with me a message of size 100 byte the call-back will have the following 6 parameters in the call-back are in order: message size required to receive the message [100] total message size to receive from all the processors (NBX does not provide this information) the total number of processor want to communicate with you (NBX does not provide this information) processor id [5] ri request id (it is an id that goes from 0 to total_p, and is incremented every time message_alloc is called) void pointer, parameter for additional data to pass to the call-back
ptr_arg	data passed to the call-back function specified
opt	options, NONE (ignored in this moment)

Definition at line 1037 of file VCluster_base.hpp.

◆ sendrecvMultipleMessagesNBX() [4/5]

template<typename InternalMemory >

void Vcluster_base< InternalMemory >::sendrecvMultipleMessagesNBX	(	size_t	n_send,
		size_t	sz[],
		size_t	prc[],
		void *	ptr[],
		size_t	n_recv,
		size_t	prc_recv[],
		void ()(size_t, size_t, size_t, size_t, size_t, size_t, void *)	msg_alloc,
		void *	ptr_arg,
		long int	opt = `NONE`
	)

inline

Send and receive multiple messages.

It send multiple messages to a set of processors the and receive multiple messages from another set of processors, all the processor must call this function. In this particular case the receiver know from which processor is going to receive, but does not know the size.

suppose the following situation the calling processor want to communicate

2 messages of size 100 byte to processor 1
1 message of size 50 byte to processor 6
1 message of size 48 byte to processor 7
1 message of size 70 byte to processor 8

Parameters

n_send	number of send for this processor [4]
prc	list of processor with which it should communicate [1,1,6,7,8]
sz	the array contain the size of the message for each processor (zeros must not be presents) [100,100,50,48,70]
ptr	array that contain the pointers to the message to send
msg_alloc	This is a call-back with the purpose of allocate space for the incoming message and give back a valid pointer, supposing that this call-back has been triggered by the processor of id 5 that want to communicate with me a message of size 100 byte the call-back will have the following 6 parameters in the call-back are in order: message size required to receive the message [100] total message size to receive from all the processors (NBX does not provide this information) the total number of processor want to communicate with you (NBX does not provide this information) processor id [5] ri request id (it is an id that goes from 0 to total_p, and is incremented every time message_alloc is called) void pointer, parameter for additional data to pass to the call-back
ptr_arg	data passed to the call-back function specified
opt	options, NONE (ignored in this moment)

Definition at line 1189 of file VCluster_base.hpp.

◆ sendrecvMultipleMessagesNBX() [5/5]

template<typename InternalMemory >

void Vcluster_base< InternalMemory >::sendrecvMultipleMessagesNBX	(	size_t	n_send,
		size_t	sz[],
		size_t	prc[],
		void *	ptr[],
		void ()(size_t, size_t, size_t, size_t, size_t, size_t, void *)	msg_alloc,
		void *	ptr_arg,
		long int	opt = `NONE`
	)

inline

Send and receive multiple messages.

It send multiple messages to a set of processors the and receive multiple messages from another set of processors, all the processor must call this function

suppose the following situation the calling processor want to communicate

2 messages of size 100 byte to processor 1
1 message of size 50 byte to processor 6
1 message of size 48 byte to processor 7
1 message of size 70 byte to processor 8

Parameters

n_send	number of send for this processor [4]
prc	list of processor with which it should communicate [1,1,6,7,8]
sz	the array contain the size of the message for each processor (zeros must not be presents) [100,100,50,48,70]
ptr	array that contain the pointers to the message to send
msg_alloc	This is a call-back with the purpose of allocate space for the incoming message and give back a valid pointer, supposing that this call-back has been triggered by the processor of id 5 that want to communicate with me a message of size 100 byte the call-back will have the following 6 parameters in the call-back are in order: message size required to receive the message [100] total message size to receive from all the processors (NBX does not provide this information) the total number of processor want to communicate with you (NBX does not provide this information) processor id [5] ri request id (it is an id that goes from 0 to total_p, and is incremented every time message_alloc is called) void pointer, parameter for additional data to pass to the call-back
ptr_arg	data passed to the call-back function specified
opt	options, NONE (ignored in this moment)

Definition at line 1357 of file VCluster_base.hpp.

◆ sendrecvMultipleMessagesNBXAsync() [1/5]

template<typename InternalMemory >

template<typename T >

void Vcluster_base< InternalMemory >::sendrecvMultipleMessagesNBXAsync	(	openfpm::vector< size_t > &	prc,
		openfpm::vector< T > &	data,
		openfpm::vector< size_t > &	prc_recv,
		openfpm::vector< size_t > &	recv_sz,
		void ()(size_t, size_t, size_t, size_t, size_t, size_t, void *)	msg_alloc,
		void *	ptr_arg,
		long int	opt = `NONE`
	)

inline

Send and receive multiple messages asynchronous version.

It send multiple messages to a set of processors the and receive multiple messages from another set of processors, all the processor must call this function. In this particular case the receiver know from which processor is going to receive.

suppose the following situation the calling processor want to communicate

2 messages of size 100 byte to processor 1
1 message of size 50 byte to processor 6
1 message of size 48 byte to processor 7
1 message of size 70 byte to processor 8

Parameters

prc	list of processor with which it should communicate [1,1,6,7,8]
data	data to send for each processors in contain a pointer to some type T this type T must have a method size() that return the size of the data-structure
prc_recv	processor that receive data
recv_sz	for each processor indicate the size of the data received
msg_alloc	This is a call-back with the purpose of allocate space for the incoming message and give back a valid pointer, supposing that this call-back has been triggered by the processor of id 5 that want to communicate with me a message of size 100 byte the call-back will have the following 6 parameters in the call-back are in order: message size required to receive the message [100] total message size to receive from all the processors (NBX does not provide this information) the total number of processor want to communicate with you (NBX does not provide this information) processor id [5] ri request id (it is an id that goes from 0 to total_p, and is incremented every time message_alloc is called) void pointer, parameter for additional data to pass to the call-back
ptr_arg	data passed to the call-back function specified
opt	options, NONE (ignored in this moment)

Definition at line 843 of file VCluster_base.hpp.

◆ sendrecvMultipleMessagesNBXAsync() [2/5]

template<typename InternalMemory >

template<typename T >

void Vcluster_base< InternalMemory >::sendrecvMultipleMessagesNBXAsync	(	openfpm::vector< size_t > &	prc,
		openfpm::vector< T > &	data,
		void ()(size_t, size_t, size_t, size_t, size_t, size_t, void *)	msg_alloc,
		void *	ptr_arg,
		long int	opt = `NONE`
	)

inline

Send and receive multiple messages asynchronous version.

This is the Asynchronous version of Send and receive NBX. This call return immediately, use sendrecvMultipleMessagesNBXWait to synchronize. Optionally you can use the function progress_communication to move on the communication

It send multiple messages to a set of processors the and receive multiple messages from another set of processors, all the processor must call this function

suppose the following situation the calling processor want to communicate

2 vector of 100 integers to processor 1
1 vector of 50 integers to processor 6
1 vector of 48 integers to processor 7
1 vector of 70 integers to processor 8

Parameters

prc	list of processors you should communicate with [1,1,6,7,8]
data	vector containing the data to send [v=vector<vector<int>>, v.size()=4, T=vector<int>], T at the moment is only tested for vectors of 0 or more generic elements (without pointers)
msg_alloc	This is a call-back with the purpose to allocate space for the incoming messages and give back a valid pointer, supposing that this call-back has been triggered by the processor of id 5 that want to communicate with me a message of size 100 byte the call-back will have the following 6 parameters in the call-back in order: message size required to receive the message (100) total message size to receive from all the processors (NBX does not provide this information) the total number of processor want to communicate with you (NBX does not provide this information) processor id (5) ri request id (it is an id that goes from 0 to total_p, and is incremented every time message_alloc is called) void pointer, parameter for additional data to pass to the call-back
ptr_arg	data passed to the call-back function specified
opt	options, only NONE supported

Definition at line 973 of file VCluster_base.hpp.

◆ sendrecvMultipleMessagesNBXAsync() [3/5]

template<typename InternalMemory >

void Vcluster_base< InternalMemory >::sendrecvMultipleMessagesNBXAsync	(	size_t	n_send,
		size_t	sz[],
		size_t	prc[],
		void *	ptr[],
		size_t	n_recv,
		size_t	prc_recv[],
		size_t	sz_recv[],
		void ()(size_t, size_t, size_t, size_t, size_t, size_t, void *)	msg_alloc,
		void *	ptr_arg,
		long int	opt = `NONE`
	)

inline

Send and receive multiple messages asynchronous version.

It send multiple messages to a set of processors the and receive multiple messages from another set of processors, all the processor must call this function. In this particular case the receiver know from which processor is going to receive.

Warning: this function only work with one send for each processor

suppose the following situation the calling processor want to communicate

2 messages of size 100 byte to processor 1
1 message of size 50 byte to processor 6
1 message of size 48 byte to processor 7
1 message of size 70 byte to processor 8

Parameters

n_send	number of send for this processor [4]
prc	list of processor with which it should communicate [1,1,6,7,8]
sz	the array contain the size of the message for each processor (zeros must not be presents) [100,100,50,48,70]
ptr	array that contain the pointers to the message to send
msg_alloc	This is a call-back with the purpose of allocate space for the incoming message and give back a valid pointer, supposing that this call-back has been triggered by the processor of id 5 that want to communicate with me a message of size 100 byte the call-back will have the following 6 parameters in the call-back are in order: message size required to receive the message [100] total message size to receive from all the processors (NBX does not provide this information) the total number of processor want to communicate with you (NBX does not provide this information) processor id [5] ri request id (it is an id that goes from 0 to total_p, and is incremented every time message_alloc is called) void pointer, parameter for additional data to pass to the call-back
ptr_arg	data passed to the call-back function specified
opt	options, NONE (ignored in this moment)

Definition at line 1115 of file VCluster_base.hpp.

◆ sendrecvMultipleMessagesNBXAsync() [4/5]

template<typename InternalMemory >

void Vcluster_base< InternalMemory >::sendrecvMultipleMessagesNBXAsync	(	size_t	n_send,
		size_t	sz[],
		size_t	prc[],
		void *	ptr[],
		size_t	n_recv,
		size_t	prc_recv[],
		void ()(size_t, size_t, size_t, size_t, size_t, size_t, void *)	msg_alloc,
		void *	ptr_arg,
		long int	opt = `NONE`
	)

inline

Send and receive multiple messages asynchronous version.

It send multiple messages to a set of processors the and receive multiple messages from another set of processors, all the processor must call this function. In this particular case the receiver know from which processor is going to receive, but does not know the size.

suppose the following situation the calling processor want to communicate

2 messages of size 100 byte to processor 1
1 message of size 50 byte to processor 6
1 message of size 48 byte to processor 7
1 message of size 70 byte to processor 8

Parameters

n_send	number of send for this processor [4]
prc	list of processor with which it should communicate [1,1,6,7,8]
sz	the array contain the size of the message for each processor (zeros must not be presents) [100,100,50,48,70]
ptr	array that contain the pointers to the message to send
msg_alloc	This is a call-back with the purpose of allocate space for the incoming message and give back a valid pointer, supposing that this call-back has been triggered by the processor of id 5 that want to communicate with me a message of size 100 byte the call-back will have the following 6 parameters in the call-back are in order: message size required to receive the message [100] total message size to receive from all the processors (NBX does not provide this information) the total number of processor want to communicate with you (NBX does not provide this information) processor id [5] ri request id (it is an id that goes from 0 to total_p, and is incremented every time message_alloc is called) void pointer, parameter for additional data to pass to the call-back
ptr_arg	data passed to the call-back function specified
opt	options, NONE (ignored in this moment)

Definition at line 1279 of file VCluster_base.hpp.

◆ sendrecvMultipleMessagesNBXAsync() [5/5]

template<typename InternalMemory >

void Vcluster_base< InternalMemory >::sendrecvMultipleMessagesNBXAsync	(	size_t	n_send,
		size_t	sz[],
		size_t	prc[],
		void *	ptr[],
		void ()(size_t, size_t, size_t, size_t, size_t, size_t, void *)	msg_alloc,
		void *	ptr_arg,
		long int	opt = `NONE`
	)

inline

Send and receive multiple messages Asynchronous version.

This is the Asynchronous version of Send and receive NBX. This call return immediately, use sendrecvMultipleMessagesNBXWait to synchronize. Optionally you can use the function progress_communication to move on the communication

It send multiple messages to a set of processors the and receive multiple messages from another set of processors, all the processor must call this function

suppose the following situation the calling processor want to communicate

2 messages of size 100 byte to processor 1
1 message of size 50 byte to processor 6
1 message of size 48 byte to processor 7
1 message of size 70 byte to processor 8

Parameters

n_send	number of send for this processor [4]
prc	list of processor with which it should communicate [1,1,6,7,8]
sz	the array contain the size of the message for each processor (zeros must not be presents) [100,100,50,48,70]
ptr	array that contain the pointers to the message to send
msg_alloc	This is a call-back with the purpose of allocate space for the incoming message and give back a valid pointer, supposing that this call-back has been triggered by the processor of id 5 that want to communicate with me a message of size 100 byte the call-back will have the following 6 parameters in the call-back are in order: message size required to receive the message [100] total message size to receive from all the processors (NBX does not provide this information) the total number of processor want to communicate with you (NBX does not provide this information) processor id [5] ri request id (it is an id that goes from 0 to total_p, and is incremented every time message_alloc is called) void pointer, parameter for additional data to pass to the call-back
ptr_arg	data passed to the call-back function specified
opt	options, NONE (ignored in this moment)

Definition at line 1463 of file VCluster_base.hpp.

◆ sendrecvMultipleMessagesNBXWait()

template<typename InternalMemory >

void Vcluster_base< InternalMemory >::sendrecvMultipleMessagesNBXWait ( )

inline

Send and receive multiple messages wait NBX communication to complete.

Definition at line 1490 of file VCluster_base.hpp.

◆ size()

template<typename InternalMemory >

size_t Vcluster_base< InternalMemory >::size ( )

inline

Get the total number of processors.

It is the same as getProcessingUnits()

See also: getProcessingUnits()

Returns: the total number of processors

Definition at line 493 of file VCluster_base.hpp.

◆ sum()

template<typename InternalMemory >

template<typename T >

void Vcluster_base< InternalMemory >::sum ( T & num )

inline

Sum the numbers across all processors and get the result.

Parameters

num	to reduce, input and output

Definition at line 561 of file VCluster_base.hpp.

Field Documentation

◆ bar_req

template<typename InternalMemory >

MPI_Request Vcluster_base< InternalMemory >::bar_req

private

barrier request

Definition at line 202 of file VCluster_base.hpp.

◆ bar_stat

template<typename InternalMemory >

MPI_Status Vcluster_base< InternalMemory >::bar_stat

private

barrier status

Definition at line 205 of file VCluster_base.hpp.

◆ context

template<typename InternalMemory >

gpu::ofp_context_t* Vcluster_base< InternalMemory >::context

private

standard context for gpu (if cuda is detected otherwise is unused)

Definition at line 147 of file VCluster_base.hpp.

◆ log

template<typename InternalMemory >

Vcluster_log Vcluster_base< InternalMemory >::log

private

log file

Definition at line 128 of file VCluster_base.hpp.

◆ m_rank

template<typename InternalMemory >

int Vcluster_base< InternalMemory >::m_rank

private

actual rank

Definition at line 154 of file VCluster_base.hpp.

◆ m_size

template<typename InternalMemory >

int Vcluster_base< InternalMemory >::m_size

private

number of processes

Definition at line 152 of file VCluster_base.hpp.

◆ map_scatter

template<typename InternalMemory >

openfpm::vector<int> Vcluster_base< InternalMemory >::map_scatter

private

vector that contain the scatter map (it is basically an array of one)

Definition at line 135 of file VCluster_base.hpp.

◆ NBX_active

template<typename InternalMemory >

NBX_Type Vcluster_base< InternalMemory >::NBX_active[NQUEUE]

private

Definition at line 161 of file VCluster_base.hpp.

◆ nbx_cycle

template<typename InternalMemory >

int Vcluster_base< InternalMemory >::nbx_cycle

private

NBX_cycle.

Definition at line 214 of file VCluster_base.hpp.

◆ NBX_prc_cnt_base

template<typename InternalMemory >

int Vcluster_base< InternalMemory >::NBX_prc_cnt_base = 0

private

Definition at line 174 of file VCluster_base.hpp.

◆ NBX_prc_msg_alloc

template<typename InternalMemory >

void *(* Vcluster_base< InternalMemory >::NBX_prc_msg_alloc[NQUEUE])(size_t, size_t, size_t, size_t, size_t, size_t, void *)

private

Definition at line 180 of file VCluster_base.hpp.

◆ NBX_prc_n_recv

template<typename InternalMemory >

size_t Vcluster_base< InternalMemory >::NBX_prc_n_recv[NQUEUE]

private

Definition at line 179 of file VCluster_base.hpp.

◆ NBX_prc_n_send

template<typename InternalMemory >

size_t Vcluster_base< InternalMemory >::NBX_prc_n_send[NQUEUE]

private

Definition at line 175 of file VCluster_base.hpp.

◆ NBX_prc_prc

template<typename InternalMemory >

size_t* Vcluster_base< InternalMemory >::NBX_prc_prc[NQUEUE]

private

Definition at line 176 of file VCluster_base.hpp.

◆ NBX_prc_prc_recv

template<typename InternalMemory >

size_t* Vcluster_base< InternalMemory >::NBX_prc_prc_recv[NQUEUE]

private

Definition at line 181 of file VCluster_base.hpp.

◆ NBX_prc_ptr

template<typename InternalMemory >

void** Vcluster_base< InternalMemory >::NBX_prc_ptr[NQUEUE]

private

Definition at line 177 of file VCluster_base.hpp.

◆ NBX_prc_ptr_arg

template<typename InternalMemory >

void* Vcluster_base< InternalMemory >::NBX_prc_ptr_arg[NQUEUE]

private

Definition at line 182 of file VCluster_base.hpp.

◆ NBX_prc_qcnt

template<typename InternalMemory >

int Vcluster_base< InternalMemory >::NBX_prc_qcnt = -1

private

NBX comunication on queue (-1 mean 0, 0 mean 1, 1 mean 2, .... )

Definition at line 167 of file VCluster_base.hpp.

◆ NBX_prc_reached_bar_req

template<typename InternalMemory >

bool Vcluster_base< InternalMemory >::NBX_prc_reached_bar_req[NQUEUE]

private

Is the barrier request reached.

Definition at line 170 of file VCluster_base.hpp.

◆ NBX_prc_sz

template<typename InternalMemory >

size_t* Vcluster_base< InternalMemory >::NBX_prc_sz[NQUEUE]

private

Definition at line 178 of file VCluster_base.hpp.

◆ numPE

template<typename InternalMemory >

int Vcluster_base< InternalMemory >::numPE = 1

private

number of processing unit per process

Definition at line 157 of file VCluster_base.hpp.

◆ post_exe

template<typename InternalMemory >

std::vector<int> Vcluster_base< InternalMemory >::post_exe

private

vector of functions to execute after all the request has been performed

Definition at line 144 of file VCluster_base.hpp.

◆ proc_com

template<typename InternalMemory >

openfpm::vector<size_t> Vcluster_base< InternalMemory >::proc_com

private

temporal vector used for meta-communication ( or meta-data before the real communication )

Definition at line 132 of file VCluster_base.hpp.

◆ ptr_send

template<typename InternalMemory >

openfpm::vector<void *> Vcluster_base< InternalMemory >::ptr_send[NQUEUE]

private

vector of pointers of send buffers

Definition at line 196 of file VCluster_base.hpp.

◆ r

template<typename InternalMemory >

std::vector<red> Vcluster_base< InternalMemory >::r

private

This buffer is a temporal buffer for reductions

MPI_Iallreduce does not accept recv and send buffer to be the same r is used to overcome this problem (is given as second parameter) after the execution the data is copied back

Definition at line 193 of file VCluster_base.hpp.

◆ recv_buf

template<typename InternalMemory >

openfpm::vector_fr<BMemory<InternalMemory> > Vcluster_base< InternalMemory >::recv_buf[NQUEUE]

protected

Receive buffers.

Definition at line 260 of file VCluster_base.hpp.

◆ req

template<typename InternalMemory >

openfpm::vector<MPI_Request> Vcluster_base< InternalMemory >::req

private

vector of MPI requests

Definition at line 138 of file VCluster_base.hpp.

◆ rid

template<typename InternalMemory >

size_t Vcluster_base< InternalMemory >::rid[NQUEUE]

private

request id

Definition at line 164 of file VCluster_base.hpp.

◆ shmrank

template<typename InternalMemory >

int Vcluster_base< InternalMemory >::shmrank

private

rank within the node

Definition at line 211 of file VCluster_base.hpp.

◆ stat

template<typename InternalMemory >

openfpm::vector<MPI_Status> Vcluster_base< InternalMemory >::stat

private

vector of MPI status

Definition at line 141 of file VCluster_base.hpp.

◆ sz_recv_tmp

template<typename InternalMemory >

openfpm::vector<size_t> Vcluster_base< InternalMemory >::sz_recv_tmp

Definition at line 1145 of file VCluster_base.hpp.

◆ sz_send

template<typename InternalMemory >

openfpm::vector<size_t> Vcluster_base< InternalMemory >::sz_send[NQUEUE]

private

vector of the size of send buffers

Definition at line 199 of file VCluster_base.hpp.

◆ tags

template<typename InternalMemory >

openfpm::vector<size_t> Vcluster_base< InternalMemory >::tags[NQUEUE]

protected

tags receiving

Definition at line 263 of file VCluster_base.hpp.

The documentation for this class was generated from the following file:

openfpm_vcluster/src/VCluster/VCluster_base.hpp

Detailed Description

An example of sending and receive plain buffers

An example of sending vectors of primitives with (T=float,double,lont int,...)

An example of sending vectors of complexes object

An example of gathering numbers from all processors

Public Member Functions

Data Fields

Protected Attributes

Private Member Functions

Private Attributes

Constructor & Destructor Documentation

◆ Vcluster_base() [1/2]

◆ ~Vcluster_base()

◆ Vcluster_base() [2/2]

Member Function Documentation

◆ allGather()

◆ Bcast()

◆ clear()

◆ clear_stats()

◆ execute()

◆ getgpuContext()

◆ getMPIComm()

◆ getProcessingUnits()

◆ getProcessUnitID()

◆ max()

◆ min()

◆ operator=()

◆ print_stats()

◆ progressCommunication()

◆ queue_all_sends()

◆ rank()

◆ recv() [1/2]

◆ recv() [2/2]

◆ send() [1/2]

◆ send() [2/2]

◆ sendrecvMultipleMessagesNBX() [1/5]

◆ sendrecvMultipleMessagesNBX() [2/5]

◆ sendrecvMultipleMessagesNBX() [3/5]

◆ sendrecvMultipleMessagesNBX() [4/5]

◆ sendrecvMultipleMessagesNBX() [5/5]

◆ sendrecvMultipleMessagesNBXAsync() [1/5]

◆ sendrecvMultipleMessagesNBXAsync() [2/5]

◆ sendrecvMultipleMessagesNBXAsync() [3/5]

◆ sendrecvMultipleMessagesNBXAsync() [4/5]

◆ sendrecvMultipleMessagesNBXAsync() [5/5]

◆ sendrecvMultipleMessagesNBXWait()

◆ size()

◆ sum()

Field Documentation

◆ bar_req

◆ bar_stat

◆ context

◆ log

◆ m_rank

◆ m_size

◆ map_scatter

◆ NBX_active

◆ nbx_cycle

◆ NBX_prc_cnt_base

◆ NBX_prc_msg_alloc

◆ NBX_prc_n_recv

◆ NBX_prc_n_send

◆ NBX_prc_prc

◆ NBX_prc_prc_recv

◆ NBX_prc_ptr

◆ NBX_prc_ptr_arg

◆ NBX_prc_qcnt

◆ NBX_prc_reached_bar_req

◆ NBX_prc_sz

◆ numPE

◆ post_exe

◆ proc_com

◆ ptr_send

◆ r

◆ recv_buf

◆ req

◆ rid

◆ shmrank

◆ stat

◆ sz_recv_tmp