OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
ParMetisDistribution.hpp
1/*
2 * ParMetisDistribution.hpp
3 *
4 * Created on: Nov 19, 2015
5 * Author: Antonio Leo
6 */
7
8
9#ifndef SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_
10#define SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_
11
12
13#include "SubdomainGraphNodes.hpp"
14#include "parmetis_util.hpp"
15#include "Graph/ids.hpp"
16#include "Graph/CartesianGraphFactory.hpp"
17
18#define PARMETIS_DISTRIBUTION_ERROR 100002
19
35template<unsigned int dim, typename T>
37{
39 bool is_distributed = false;
40
43
46
49
52
55
58
60 //
61 // vtxdist is a common array across processor, it indicate how
62 // vertex are distributed across processors
63 //
64 // Example we have 3 processors
65 //
66 // processor 0 has 3 vertices
67 // processor 1 has 5 vertices
68 // processor 2 has 4 vertices
69 //
70 // vtxdist contain, 0,3,8,12
71 //
72 // vtx dist is the unique global-id of the vertices
73 //
75
78
81
83 std::unordered_map<rid, gid> m2g;
84
86 bool verticesGotWeights = false;
87
92 {
93 sub_sub_owner.clear();
94
95 size_t Np = v_cl.getProcessingUnits();
96
97 // Init n_vtxdist to gather informations about the new decomposition
98 openfpm::vector<rid> n_vtxdist(Np + 1);
99 for (size_t i = 0; i <= Np; i++)
100 n_vtxdist.get(i).id = 0;
101
102 // Update the main graph with received data from processor i
103 for (size_t i = 0; i < Np; i++)
104 {
105 size_t ndata = partitions.get(i).size();
106 size_t k = 0;
107
108 // Update the main graph with the received informations
109 for (rid l = vtxdist.get(i); k < ndata && l < vtxdist.get(i + 1); k++, ++l)
110 {
111 // Create new n_vtxdist (just count processors vertices)
112 ++n_vtxdist.get(partitions.get(i).get(k) + 1);
113
114 // vertex id from vtx to grobal id
115 auto v_id = m2g.find(l)->second.id;
116
117 // Update proc id in the vertex (using the old map)
118 gp.template vertex_p<nm_v_proc_id>(v_id) = partitions.get(i).get(k);
119
120 if (partitions.get(i).get(k) == (long int)v_cl.getProcessUnitID())
121 {sub_sub_owner.add(v_id);}
122
123 // Add vertex to temporary structure of distribution (needed to update main graph)
124 v_per_proc.get(partitions.get(i).get(k)).add(getVertexGlobalId(l));
125 }
126 }
127
128 // Create new n_vtxdist (accumulate the counters)
129 for (size_t i = 2; i <= Np; i++)
130 n_vtxdist.get(i) += n_vtxdist.get(i - 1);
131
132 // Copy the new decomposition in the main vtxdist
133 for (size_t i = 0; i <= Np; i++)
134 vtxdist.get(i) = n_vtxdist.get(i);
135
137 cnt.resize(Np);
138
139 for (size_t i = 0 ; i < gp.getNVertex(); ++i)
140 {
141 size_t pid = gp.template vertex_p<nm_v_proc_id>(i);
142
143 rid j = rid(vtxdist.get(pid).id + cnt.get(pid));
144 gid gi = gid(i);
145
146 gp.template vertex_p<nm_v_id>(i) = j.id;
147 cnt.get(pid)++;
148
149 setMapId(j,gi);
150 }
151 }
152
160 inline auto vertexByMapId(rid id) -> decltype( gp.vertex(m2g.find(id)->second.id) )
161 {
162 return gp.vertex(m2g.find(id)->second.id);
163 }
164
171 inline void setMapId(rid n, gid g)
172 {
173 m2g[n] = g;
174 }
175
183 {
184 return m2g.find(n)->second;
185 }
186
193 {
194 gid g;
195 rid i;
196 i.id = 0;
197
198 m2g.clear();
199 for ( ; (size_t)i.id < gp.getNVertex(); ++i)
200 {
201 g.id = i.id;
202
203 m2g.insert( { i, g });
204 }
205 }
206
216 static void * message_receive(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, size_t tag, void * ptr)
217 {
218 openfpm::vector < openfpm::vector < idx_t >> *v = static_cast<openfpm::vector<openfpm::vector<idx_t>> *>(ptr);
219
220 v->get(i).resize(msg_i / sizeof(idx_t));
221
222 return &(v->get(i).get(0));
223 }
224
230 {
232 size_t p_id = v_cl.getProcessUnitID();
233
235 size_t Np = v_cl.getProcessingUnits();
236
237 // Number of local vertex
238 size_t nl_vertex = vtxdist.get(p_id+1).id - vtxdist.get(p_id).id;
239
241 idx_t * partition = parmetis_graph.getPartition();
242
244 partitions.get(p_id).resize(nl_vertex);
245 if (nl_vertex != 0)
246 {std::copy(partition, partition + nl_vertex, &partitions.get(p_id).get(0));}
247
248 // Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
249 for (size_t i = 0; i < Np; ++i)
250 {
251 v_per_proc.get(i).clear();
252 }
253
254 // Communicate the local distribution to the other processors
255 // to reconstruct individually the global graph
259
260 for (size_t i = 0; i < Np; i++)
261 {
262 if (i != v_cl.getProcessUnitID())
263 {
264 partitions.get(i).clear();
265 prc.add(i);
266 sz.add(nl_vertex * sizeof(idx_t));
267 ptr.add(partitions.get(p_id).getPointer());
268 }
269 }
270
271 if (prc.size() == 0)
272 v_cl.sendrecvMultipleMessagesNBX(0, NULL, NULL, NULL, message_receive, &partitions,NONE);
273 else
274 v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,NONE);
275
276 // Update graphs with the received data
277 updateGraphs();
278 }
279
280
281public:
282
288 :is_distributed(false),v_cl(v_cl), parmetis_graph(v_cl, v_cl.getProcessingUnits()), vtxdist(v_cl.getProcessingUnits() + 1), partitions(v_cl.getProcessingUnits()), v_per_proc(v_cl.getProcessingUnits())
289 {
290 }
291
298 :v_cl(pm.v_cl),parmetis_graph(v_cl, v_cl.getProcessingUnits())
299 {
300 this->operator=(pm);
301 }
302
309 :v_cl(pm.v_cl)
310 {
311 this->operator=(pm);
312 }
313
320 {
321 size_t bc[dim];
322
323 for (size_t i = 0 ; i < dim ; i++)
324 bc[i] = NON_PERIODIC;
325
326 // Set grid and domain
327 gr = grid;
328 domain = dom;
329
330 // Create a cartesian grid graph
332 gp = g_factory_part.template construct<NO_EDGE, nm_v_id, T, dim - 1, 0>(gr.getSize(), domain, bc);
334
336 size_t Np = v_cl.getProcessingUnits();
337
341 size_t mod_v = gr.size() % Np;
342 size_t div_v = gr.size() / Np;
343
344 for (size_t i = 0; i <= Np; i++)
345 {
346 if (i < mod_v)
347 vtxdist.get(i).id = (div_v + 1) * i;
348 else
349 vtxdist.get(i).id = (div_v) * i + mod_v;
350 }
351
352 // Init to 0.0 axis z (to fix in graphFactory)
353 if (dim < 3)
354 {
355 for (size_t i = 0; i < gp.getNVertex(); i++)
356 {
357 gp.vertex(i).template get<nm_v_x>()[2] = 0.0;
358 }
359 }
360 for (size_t i = 0; i < gp.getNVertex(); i++)
361 {
362 gp.vertex(i).template get<nm_v_global_id>() = i;
363 }
364
365 }
366
371 {
372 return gp;
373 }
374
379 {
380 if (is_distributed == false)
382 else
384
387
388 // update after decomposition
390
391 is_distributed = true;
392 }
393
400 void refine()
401 {
402 // Reset parmetis graph and reconstruct it
404
405 // Refine
407
409 }
410
418 {
419 // Reset parmetis graph and reconstruct it
421
422 // Refine
424
426 }
427
433 {
434 long t_cost = 0;
435
436 long min, max, sum;
437 float unbalance;
438
439 t_cost = getProcessorLoad();
440
441 min = t_cost;
442 max = t_cost;
443 sum = t_cost;
444
445 v_cl.min(min);
446 v_cl.max(max);
447 v_cl.sum(sum);
448 v_cl.execute();
449
450 unbalance = ((float) (max - min)) / (float) (sum / v_cl.getProcessingUnits());
451
452 return unbalance * 100;
453 }
454
461 void getSubSubDomainPosition(size_t id, T (&pos)[dim])
462 {
463#ifdef SE_CLASS1
464 if (id >= gp.getNVertex())
465 std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
466#endif
467
468 // Copy the geometrical informations inside the pos vector
469 pos[0] = gp.vertex(id).template get<nm_v_x>()[0];
470 pos[1] = gp.vertex(id).template get<nm_v_x>()[1];
471 if (dim == 3)
472 pos[2] = gp.vertex(id).template get<nm_v_x>()[2];
473 }
474
481 inline void setComputationCost(size_t id, size_t weight)
482 {
484 {verticesGotWeights = true;}
485
486#ifdef SE_CLASS1
487 if (id >= gp.getNVertex())
488 {std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";}
489#endif
490
491 // Update vertex in main graph
492 gp.vertex(id).template get<nm_v_computation>() = weight;
493 }
494
500 {
501 return verticesGotWeights;
502 }
503
510 {
511#ifdef SE_CLASS1
512 if (id >= gp.getNVertex())
513 std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
514#endif
515
516 return gp.vertex(id).template get<nm_v_computation>();
517 }
518
524 {
525 size_t load = 0;
526
527 // Processor id
528 size_t p_id = v_cl.getProcessUnitID();
529
530
531 for (rid i = vtxdist.get(p_id); i < vtxdist.get(p_id+1) ; ++i)
532 load += gp.vertex(m2g.find(i)->second.id).template get<nm_v_computation>();
533
534 //std::cout << v_cl.getProcessUnitID() << " weight " << load << " size " << sub_g.getNVertex() << "\n";
535 return load;
536 }
537
543 void setMigrationCost(size_t id, size_t migration)
544 {
545#ifdef SE_CLASS1
546 if (id >= gp.getNVertex())
547 std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
548#endif
549
550 gp.vertex(id).template get<nm_v_migration>() = migration;
551 }
552
559 void setCommunicationCost(size_t v_id, size_t e, size_t communication)
560 {
561#ifdef SE_CLASS1
562
563 size_t e_id = v_id + e;
564
565 if (e_id >= gp.getNEdge())
566 std::cerr << "Such edge doesn't exist (id = " << e_id << ", " << "total size = " << gp.getNEdge() << ")\n";
567#endif
568
569 gp.getChildEdge(v_id, e).template get<nm_e::communication>() = communication;
570 }
571
577 size_t getNSubSubDomains() const
578 {
579 return gp.getNVertex();
580 }
581
588 {
589 return sub_sub_owner.size();
590 }
591
599 size_t getOwnerSubSubDomain(size_t id) const
600 {
601 return sub_sub_owner.get(id);
602 }
603
612 {
613#ifdef SE_CLASS1
614 if (id >= gp.getNVertex())
615 std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
616#endif
617
618 return gp.getNChilds(id);
619 }
620
627 {
628 gp.destroy();
629 partitions.clear();
630 partitions.shrink_to_fit();
631 v_per_proc.clear();
632 v_per_proc.shrink_to_fit();
633 m2g.clear();
634 m2g.rehash(0);
635 }
636
642 void write(const std::string & file)
643 {
644 VTKWriter<Graph_CSR<nm_v<dim>, nm_e>, VTK_GRAPH> gv2(gp);
645 gv2.write(std::to_string(v_cl.getProcessUnitID()) + "_" + file + ".vtk");
646 }
647
648 const ParMetisDistribution<dim,T> & operator=(const ParMetisDistribution<dim,T> & dist)
649 {
650 is_distributed = dist.is_distributed;
651 gr = dist.gr;
652 domain = dist.domain;
653 gp = dist.gp;
654 vtxdist = dist.vtxdist;
655 partitions = dist.partitions;
656 v_per_proc = dist.v_per_proc;
657 verticesGotWeights = dist.verticesGotWeights;
658 sub_sub_owner = dist.sub_sub_owner;
659 m2g = dist.m2g;
660 parmetis_graph = dist.parmetis_graph;
661
662 return *this;
663 }
664
666 {
667 is_distributed = dist.is_distributed;
668 gr = dist.gr;
669 domain = dist.domain;
670 gp.swap(dist.gp);
671 vtxdist.swap(dist.vtxdist);
672 partitions.swap(dist.partitions);
673 v_per_proc.swap(dist.v_per_proc);
674 verticesGotWeights = dist.verticesGotWeights;
675 sub_sub_owner.swap(dist.sub_sub_owner);
676 m2g.swap(dist.m2g);
677 parmetis_graph = dist.parmetis_graph;
678
679 return *this;
680 }
681
689 {
690 for (size_t i = 0 ; i < dim ; i++)
691 {p.get(i) = gp.template vertex_p<0>(sub_sub_owner.get(j))[i];}
692 }
693
699 size_t get_ndec()
700 {
701 return parmetis_graph.get_ndec();
702 }
703
709 void setDistTol(double tol)
710 {
712 }
713};
714
715#endif /* SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_ */
This class represent an N-dimensional box.
Definition Box.hpp:61
This class construct a cartesian graph.
Structure that store a graph in CSR format or basically in compressed adjacency matrix format.
auto vertex(size_t id) -> decltype(v.get(id))
Function to access the vertex.
size_t getNEdge() const
Return the number of edges.
auto getChildEdge(size_t v, size_t v_e) -> decltype(e.get(0))
Get the vertex edge.
void destroy()
operator to clear the whole graph
size_t getNChilds(size_t c) const
Return the number of childs of a vertex.
void swap(Graph_CSR< V, E > &g)
swap the memory of g with this graph
size_t getNVertex() const
Return the number of the vertex.
Class that distribute sub-sub-domains across processors using ParMetis Library.
void redecompose()
Redecompose current decomposition.
void refine()
Refine current decomposition.
ParMetisDistribution(ParMetisDistribution< dim, T > &&pm)
gid getVertexGlobalId(rid n)
Get the global id of the vertex given the re-mapped one.
grid_sm< dim, void > gr
Structure that store the cartesian grid information.
openfpm::vector< rid > vtxdist
Init vtxdist needed for Parmetis.
ParMetisDistribution(const ParMetisDistribution< dim, T > &pm)
Graph_CSR< nm_v< dim >, nm_e > & getGraph()
Get the current graph (main)
void postDecomposition()
It update the full decomposition.
Parmetis< Graph_CSR< nm_v< dim >, nm_e > > parmetis_graph
Convert the graph to parmetis format.
size_t getOwnerSubSubDomain(size_t id) const
Return the global id of the owned sub-sub-domain.
void setDistTol(double tol)
Set the tolerance for each partition.
std::unordered_map< rid, gid > m2g
Hashmap to access to the global position given the re-mapped one (needed for access the map)
float getUnbalance()
Compute the unbalance of the processor compared to the optimal balance.
void setMigrationCost(size_t id, size_t migration)
Set migration cost of the vertex id.
openfpm::vector< openfpm::vector< idx_t > > partitions
partitions
void destroy_internal_graph()
In case we do not do Dynamic load balancing this this data-structure it is safe to eliminate the full...
Graph_CSR< nm_v< dim >, nm_e > gp
Global sub-sub-domain graph.
void updateGraphs()
Update main graph ad subgraph with the received data of the partitions from the other processors.
void initLocalToGlobalMap()
operator to init ids vector
static void * message_receive(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, size_t tag, void *ptr)
Callback of the sendrecv to set the size of the array received.
Box< dim, T > domain
rectangular domain to decompose
bool is_distributed
Is distributed.
openfpm::vector< openfpm::vector< gid > > v_per_proc
Init data structure to keep trace of new vertices distribution in processors (needed to update main g...
void write(const std::string &file)
Print the current distribution and save it to VTK file.
size_t getProcessorLoad()
Compute the processor load counting the total weights of its vertices.
void decompose()
Create the decomposition.
void setCommunicationCost(size_t v_id, size_t e, size_t communication)
Set communication cost of the edge id.
size_t get_ndec()
Get the decomposition counter.
bool weightsAreUsed()
Checks if weights are used on the vertices.
size_t getNOwnerSubSubDomains() const
Return the total number of sub-sub-domains this processor own.
size_t getNSubSubDomains() const
Returns total number of sub-sub-domains in the distribution graph.
auto vertexByMapId(rid id) -> decltype(gp.vertex(m2g.find(id) ->second.id))
operator to access the vertex by mapped position
void setComputationCost(size_t id, size_t weight)
Function that set the weight of the vertex.
size_t getNSubSubDomainNeighbors(size_t id)
Returns total number of neighbors of the sub-sub-domain id.
void getSubSubDomainPos(size_t j, Point< dim, T > &p)
return the the position of the sub-sub-domain
void setMapId(rid n, gid g)
operator to remap vertex to a new position
ParMetisDistribution(Vcluster<> &v_cl)
void getSubSubDomainPosition(size_t id, T(&pos)[dim])
function that return the position of the vertex in the space
size_t getSubSubDomainComputationCost(size_t id)
function that get the weight of the vertex
openfpm::vector< size_t > sub_sub_owner
Id of the sub-sub-domain where we set the costs.
void createCartGraph(grid_sm< dim, void > &grid, Box< dim, T > &dom)
Create the Cartesian graph.
bool verticesGotWeights
Flag to check if weights are used on vertices.
Helper class to define Metis graph.
size_t get_ndec()
Get the decomposition counter.
void redecompose(openfpm::vector< rid > &vtxdist)
Redecompose the graph.
void reset(Graph &g, const openfpm::vector< rid > &vtxdist, const std::unordered_map< rid, gid > &m2g, bool vgw)
Reset graph and reconstruct it.
idx_t * getPartition()
Get graph partition vector.
void setDistTol(real_t tol)
Distribution tolerance.
void refine(openfpm::vector< rid > &vtxdist)
Refine the graph.
void initSubGraph(Graph &g, const openfpm::vector< rid > &vtxdist, const std::unordered_map< rid, gid > &m2g, bool w)
Set the Sub-graph.
void decompose(const openfpm::vector< rid > &vtxdist)
Decompose the graph.
This class implement the point shape in an N-dimensional space.
Definition Point.hpp:28
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition Point.hpp:172
void execute()
Execute all the requests.
void sum(T &num)
Sum the numbers across all processors and get the result.
void sendrecvMultipleMessagesNBX(openfpm::vector< size_t > &prc, openfpm::vector< T > &data, openfpm::vector< size_t > &prc_recv, openfpm::vector< size_t > &recv_sz, void *(*msg_alloc)(size_t, size_t, size_t, size_t, size_t, size_t, void *), void *ptr_arg, long int opt=NONE)
Send and receive multiple messages.
size_t getProcessUnitID()
Get the process unit id.
void min(T &num)
Get the minimum number across all processors (or reduction with insinity norm)
size_t getProcessingUnits()
Get the total number of processors.
void max(T &num)
Get the maximum number across all processors (or reduction with infinity norm)
Implementation of VCluster class.
Definition VCluster.hpp:59
Declaration grid_sm.
Definition grid_sm.hpp:167
__device__ __host__ const size_t(& getSize() const)[N]
Return the size of the grid as an array.
Definition grid_sm.hpp:760
__device__ __host__ size_t size() const
Return the size of the grid.
Definition grid_sm.hpp:657
Implementation of 1-D std::vector like structure.
size_t size()
Stub size.
Definition ids.hpp:149
sub-domain edge graph node
Definition ids.hpp:19
idx_t id
id
Definition ids.hpp:21
It model an expression expr1 + ... exprn.
Definition sum.hpp:93