OpenFPM  5.2.0
Project that contain the implementation of distributed structures
ParMetisDistribution.hpp
1 /*
2  * ParMetisDistribution.hpp
3  *
4  * Created on: Nov 19, 2015
5  * Author: Antonio Leo
6  */
7 
8 
9 #ifndef SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_
10 #define SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_
11 
12 
13 #include "SubdomainGraphNodes.hpp"
14 #include "parmetis_util.hpp"
15 #include "Graph/ids.hpp"
16 #include "Graph/CartesianGraphFactory.hpp"
17 
18 #define PARMETIS_DISTRIBUTION_ERROR 100002
19 
35 template<unsigned int dim, typename T>
37 {
39  bool is_distributed = false;
40 
43 
46 
49 
52 
55 
58 
60  //
61  // vtxdist is a common array across processor, it indicate how
62  // vertex are distributed across processors
63  //
64  // Example we have 3 processors
65  //
66  // processor 0 has 3 vertices
67  // processor 1 has 5 vertices
68  // processor 2 has 4 vertices
69  //
70  // vtxdist contain, 0,3,8,12
71  //
72  // vtx dist is the unique global-id of the vertices
73  //
75 
78 
81 
83  std::unordered_map<rid, gid> m2g;
84 
86  bool verticesGotWeights = false;
87 
91  void updateGraphs()
92  {
93  sub_sub_owner.clear();
94 
95  size_t Np = v_cl.getProcessingUnits();
96 
97  // Init n_vtxdist to gather informations about the new decomposition
98  openfpm::vector<rid> n_vtxdist(Np + 1);
99  for (size_t i = 0; i <= Np; i++)
100  n_vtxdist.get(i).id = 0;
101 
102  // Update the main graph with received data from processor i
103  for (size_t i = 0; i < Np; i++)
104  {
105  size_t ndata = partitions.get(i).size();
106  size_t k = 0;
107 
108  // Update the main graph with the received informations
109  for (rid l = vtxdist.get(i); k < ndata && l < vtxdist.get(i + 1); k++, ++l)
110  {
111  // Create new n_vtxdist (just count processors vertices)
112  ++n_vtxdist.get(partitions.get(i).get(k) + 1);
113 
114  // vertex id from vtx to grobal id
115  auto v_id = m2g.find(l)->second.id;
116 
117  // Update proc id in the vertex (using the old map)
118  gp.template vertex_p<nm_v_proc_id>(v_id) = partitions.get(i).get(k);
119 
120  if (partitions.get(i).get(k) == (long int)v_cl.getProcessUnitID())
121  {sub_sub_owner.add(v_id);}
122 
123  // Add vertex to temporary structure of distribution (needed to update main graph)
124  v_per_proc.get(partitions.get(i).get(k)).add(getVertexGlobalId(l));
125  }
126  }
127 
128  // Create new n_vtxdist (accumulate the counters)
129  for (size_t i = 2; i <= Np; i++)
130  n_vtxdist.get(i) += n_vtxdist.get(i - 1);
131 
132  // Copy the new decomposition in the main vtxdist
133  for (size_t i = 0; i <= Np; i++)
134  vtxdist.get(i) = n_vtxdist.get(i);
135 
137  cnt.resize(Np);
138 
139  for (size_t i = 0 ; i < gp.getNVertex(); ++i)
140  {
141  size_t pid = gp.template vertex_p<nm_v_proc_id>(i);
142 
143  rid j = rid(vtxdist.get(pid).id + cnt.get(pid));
144  gid gi = gid(i);
145 
146  gp.template vertex_p<nm_v_id>(i) = j.id;
147  cnt.get(pid)++;
148 
149  setMapId(j,gi);
150  }
151  }
152 
160  inline auto vertexByMapId(rid id) -> decltype( gp.vertex(m2g.find(id)->second.id) )
161  {
162  return gp.vertex(m2g.find(id)->second.id);
163  }
164 
171  inline void setMapId(rid n, gid g)
172  {
173  m2g[n] = g;
174  }
175 
183  {
184  return m2g.find(n)->second;
185  }
186 
193  {
194  gid g;
195  rid i;
196  i.id = 0;
197 
198  m2g.clear();
199  for ( ; (size_t)i.id < gp.getNVertex(); ++i)
200  {
201  g.id = i.id;
202 
203  m2g.insert( { i, g });
204  }
205  }
206 
216  static void * message_receive(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, size_t tag, void * ptr)
217  {
219 
220  v->get(i).resize(msg_i / sizeof(idx_t));
221 
222  return &(v->get(i).get(0));
223  }
224 
230  {
232  size_t p_id = v_cl.getProcessUnitID();
233 
235  size_t Np = v_cl.getProcessingUnits();
236 
237  // Number of local vertex
238  size_t nl_vertex = vtxdist.get(p_id+1).id - vtxdist.get(p_id).id;
239 
241  idx_t * partition = parmetis_graph.getPartition();
242 
244  partitions.get(p_id).resize(nl_vertex);
245  if (nl_vertex != 0)
246  {std::copy(partition, partition + nl_vertex, &partitions.get(p_id).get(0));}
247 
248  // Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
249  for (size_t i = 0; i < Np; ++i)
250  {
251  v_per_proc.get(i).clear();
252  }
253 
254  // Communicate the local distribution to the other processors
255  // to reconstruct individually the global graph
259 
260  for (size_t i = 0; i < Np; i++)
261  {
262  if (i != v_cl.getProcessUnitID())
263  {
264  partitions.get(i).clear();
265  prc.add(i);
266  sz.add(nl_vertex * sizeof(idx_t));
267  ptr.add(partitions.get(p_id).getPointer());
268  }
269  }
270 
271  if (prc.size() == 0)
272  v_cl.sendrecvMultipleMessagesNBX(0, NULL, NULL, NULL, message_receive, &partitions,NONE);
273  else
274  v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,NONE);
275 
276  // Update graphs with the received data
277  updateGraphs();
278  }
279 
280 
281 public:
282 
288  :is_distributed(false),v_cl(v_cl), parmetis_graph(v_cl, v_cl.getProcessingUnits()), vtxdist(v_cl.getProcessingUnits() + 1), partitions(v_cl.getProcessingUnits()), v_per_proc(v_cl.getProcessingUnits())
289  {
290  }
291 
298  :v_cl(pm.v_cl),parmetis_graph(v_cl, v_cl.getProcessingUnits())
299  {
300  this->operator=(pm);
301  }
302 
309  :v_cl(pm.v_cl)
310  {
311  this->operator=(pm);
312  }
313 
320  {
321  size_t bc[dim];
322 
323  for (size_t i = 0 ; i < dim ; i++)
324  bc[i] = NON_PERIODIC;
325 
326  // Set grid and domain
327  gr = grid;
328  domain = dom;
329 
330  // Create a cartesian grid graph
332  gp = g_factory_part.template construct<NO_EDGE, nm_v_id, T, dim - 1, 0>(gr.getSize(), domain, bc);
334 
336  size_t Np = v_cl.getProcessingUnits();
337 
341  size_t mod_v = gr.size() % Np;
342  size_t div_v = gr.size() / Np;
343 
344  for (size_t i = 0; i <= Np; i++)
345  {
346  if (i < mod_v)
347  vtxdist.get(i).id = (div_v + 1) * i;
348  else
349  vtxdist.get(i).id = (div_v) * i + mod_v;
350  }
351 
352  // Init to 0.0 axis z (to fix in graphFactory)
353  if (dim < 3)
354  {
355  for (size_t i = 0; i < gp.getNVertex(); i++)
356  {
357  gp.vertex(i).template get<nm_v_x>()[2] = 0.0;
358  }
359  }
360  for (size_t i = 0; i < gp.getNVertex(); i++)
361  {
362  gp.vertex(i).template get<nm_v_global_id>() = i;
363  }
364 
365  }
366 
371  {
372  return gp;
373  }
374 
378  void decompose()
379  {
380  if (is_distributed == false)
382  else
384 
387 
388  // update after decomposition
390 
391  is_distributed = true;
392  }
393 
400  void refine()
401  {
402  // Reset parmetis graph and reconstruct it
404 
405  // Refine
407 
409  }
410 
417  void redecompose()
418  {
419  // Reset parmetis graph and reconstruct it
421 
422  // Refine
424 
426  }
427 
432  float getUnbalance()
433  {
434  long t_cost = 0;
435 
436  long min, max, sum;
437  float unbalance;
438 
439  t_cost = getProcessorLoad();
440 
441  min = t_cost;
442  max = t_cost;
443  sum = t_cost;
444 
445  v_cl.min(min);
446  v_cl.max(max);
447  v_cl.sum(sum);
448  v_cl.execute();
449 
450  unbalance = ((float) (max - min)) / (float) (sum / v_cl.getProcessingUnits());
451 
452  return unbalance * 100;
453  }
454 
461  void getSubSubDomainPosition(size_t id, T (&pos)[dim])
462  {
463 #ifdef SE_CLASS1
464  if (id >= gp.getNVertex())
465  std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
466 #endif
467 
468  // Copy the geometrical informations inside the pos vector
469  pos[0] = gp.vertex(id).template get<nm_v_x>()[0];
470  pos[1] = gp.vertex(id).template get<nm_v_x>()[1];
471  if (dim == 3)
472  pos[2] = gp.vertex(id).template get<nm_v_x>()[2];
473  }
474 
481  inline void setComputationCost(size_t id, size_t weight)
482  {
483  if (!verticesGotWeights)
484  {verticesGotWeights = true;}
485 
486 #ifdef SE_CLASS1
487  if (id >= gp.getNVertex())
488  {std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";}
489 #endif
490 
491  // Update vertex in main graph
492  gp.vertex(id).template get<nm_v_computation>() = weight;
493  }
494 
500  {
501  return verticesGotWeights;
502  }
503 
510  {
511 #ifdef SE_CLASS1
512  if (id >= gp.getNVertex())
513  std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
514 #endif
515 
516  return gp.vertex(id).template get<nm_v_computation>();
517  }
518 
524  {
525  size_t load = 0;
526 
527  // Processor id
528  size_t p_id = v_cl.getProcessUnitID();
529 
530 
531  for (rid i = vtxdist.get(p_id); i < vtxdist.get(p_id+1) ; ++i)
532  load += gp.vertex(m2g.find(i)->second.id).template get<nm_v_computation>();
533 
534  //std::cout << v_cl.getProcessUnitID() << " weight " << load << " size " << sub_g.getNVertex() << "\n";
535  return load;
536  }
537 
543  void setMigrationCost(size_t id, size_t migration)
544  {
545 #ifdef SE_CLASS1
546  if (id >= gp.getNVertex())
547  std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
548 #endif
549 
550  gp.vertex(id).template get<nm_v_migration>() = migration;
551  }
552 
559  void setCommunicationCost(size_t v_id, size_t e, size_t communication)
560  {
561 #ifdef SE_CLASS1
562 
563  size_t e_id = v_id + e;
564 
565  if (e_id >= gp.getNEdge())
566  std::cerr << "Such edge doesn't exist (id = " << e_id << ", " << "total size = " << gp.getNEdge() << ")\n";
567 #endif
568 
569  gp.getChildEdge(v_id, e).template get<nm_e::communication>() = communication;
570  }
571 
577  size_t getNSubSubDomains() const
578  {
579  return gp.getNVertex();
580  }
581 
587  size_t getNOwnerSubSubDomains() const
588  {
589  return sub_sub_owner.size();
590  }
591 
599  size_t getOwnerSubSubDomain(size_t id) const
600  {
601  return sub_sub_owner.get(id);
602  }
603 
611  size_t getNSubSubDomainNeighbors(size_t id)
612  {
613 #ifdef SE_CLASS1
614  if (id >= gp.getNVertex())
615  std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
616 #endif
617 
618  return gp.getNChilds(id);
619  }
620 
627  {
628  gp.destroy();
629  partitions.clear();
630  partitions.shrink_to_fit();
631  v_per_proc.clear();
632  v_per_proc.shrink_to_fit();
633  m2g.clear();
634  m2g.rehash(0);
635  }
636 
642  void write(const std::string & file)
643  {
644  VTKWriter<Graph_CSR<nm_v<dim>, nm_e>, VTK_GRAPH> gv2(gp);
645  gv2.write(std::to_string(v_cl.getProcessUnitID()) + "_" + file + ".vtk");
646  }
647 
648  const ParMetisDistribution<dim,T> & operator=(const ParMetisDistribution<dim,T> & dist)
649  {
650  is_distributed = dist.is_distributed;
651  gr = dist.gr;
652  domain = dist.domain;
653  gp = dist.gp;
654  vtxdist = dist.vtxdist;
655  partitions = dist.partitions;
656  v_per_proc = dist.v_per_proc;
657  verticesGotWeights = dist.verticesGotWeights;
658  sub_sub_owner = dist.sub_sub_owner;
659  m2g = dist.m2g;
660  parmetis_graph = dist.parmetis_graph;
661 
662  return *this;
663  }
664 
666  {
667  is_distributed = dist.is_distributed;
668  gr = dist.gr;
669  domain = dist.domain;
670  gp.swap(dist.gp);
671  vtxdist.swap(dist.vtxdist);
672  partitions.swap(dist.partitions);
673  v_per_proc.swap(dist.v_per_proc);
674  verticesGotWeights = dist.verticesGotWeights;
675  sub_sub_owner.swap(dist.sub_sub_owner);
676  m2g.swap(dist.m2g);
677  parmetis_graph = dist.parmetis_graph;
678 
679  return *this;
680  }
681 
688  void getSubSubDomainPos(size_t j, Point<dim,T> & p)
689  {
690  for (size_t i = 0 ; i < dim ; i++)
691  {p.get(i) = gp.template vertex_p<0>(sub_sub_owner.get(j))[i];}
692  }
693 
699  size_t get_ndec()
700  {
701  return parmetis_graph.get_ndec();
702  }
703 
709  void setDistTol(double tol)
710  {
712  }
713 };
714 
715 #endif /* SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_ */
This class represent an N-dimensional box.
Definition: Box.hpp:60
This class construct a cartesian graph.
Structure that store a graph in CSR format or basically in compressed adjacency matrix format.
Definition: map_graph.hpp:305
auto vertex(size_t id) -> decltype(v.get(id))
Function to access the vertex.
Definition: map_graph.hpp:596
size_t getNEdge() const
Return the number of edges.
Definition: map_graph.hpp:1056
auto getChildEdge(size_t v, size_t v_e) -> decltype(e.get(0))
Get the vertex edge.
Definition: map_graph.hpp:797
void destroy()
operator to clear the whole graph
Definition: map_graph.hpp:683
size_t getNChilds(size_t c) const
Return the number of childs of a vertex.
Definition: map_graph.hpp:768
void swap(Graph_CSR< V, E > &g)
swap the memory of g with this graph
Definition: map_graph.hpp:976
size_t getNVertex() const
Return the number of the vertex.
Definition: map_graph.hpp:1045
Class that distribute sub-sub-domains across processors using ParMetis Library.
void redecompose()
Redecompose current decomposition.
void refine()
Refine current decomposition.
ParMetisDistribution(ParMetisDistribution< dim, T > &&pm)
gid getVertexGlobalId(rid n)
Get the global id of the vertex given the re-mapped one.
grid_sm< dim, void > gr
Structure that store the cartesian grid information.
openfpm::vector< rid > vtxdist
Init vtxdist needed for Parmetis.
ParMetisDistribution(const ParMetisDistribution< dim, T > &pm)
void postDecomposition()
It update the full decomposition.
Parmetis< Graph_CSR< nm_v< dim >, nm_e > > parmetis_graph
Convert the graph to parmetis format.
size_t getOwnerSubSubDomain(size_t id) const
Return the global id of the owned sub-sub-domain.
void setDistTol(double tol)
Set the tolerance for each partition.
std::unordered_map< rid, gid > m2g
Hashmap to access to the global position given the re-mapped one (needed for access the map)
float getUnbalance()
Compute the unbalance of the processor compared to the optimal balance.
void setMigrationCost(size_t id, size_t migration)
Set migration cost of the vertex id.
openfpm::vector< openfpm::vector< idx_t > > partitions
partitions
void destroy_internal_graph()
In case we do not do Dynamic load balancing this this data-structure it is safe to eliminate the full...
Graph_CSR< nm_v< dim >, nm_e > gp
Global sub-sub-domain graph.
void updateGraphs()
Update main graph ad subgraph with the received data of the partitions from the other processors.
void initLocalToGlobalMap()
operator to init ids vector
Box< dim, T > domain
rectangular domain to decompose
bool is_distributed
Is distributed.
openfpm::vector< openfpm::vector< gid > > v_per_proc
Init data structure to keep trace of new vertices distribution in processors (needed to update main g...
void write(const std::string &file)
Print the current distribution and save it to VTK file.
size_t getProcessorLoad()
Compute the processor load counting the total weights of its vertices.
void decompose()
Create the decomposition.
void setCommunicationCost(size_t v_id, size_t e, size_t communication)
Set communication cost of the edge id.
size_t get_ndec()
Get the decomposition counter.
bool weightsAreUsed()
Checks if weights are used on the vertices.
size_t getNOwnerSubSubDomains() const
Return the total number of sub-sub-domains this processor own.
static void * message_receive(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, size_t tag, void *ptr)
Callback of the sendrecv to set the size of the array received.
size_t getNSubSubDomains() const
Returns total number of sub-sub-domains in the distribution graph.
auto vertexByMapId(rid id) -> decltype(gp.vertex(m2g.find(id) ->second.id))
operator to access the vertex by mapped position
Vcluster & v_cl
Vcluster.
void setComputationCost(size_t id, size_t weight)
Function that set the weight of the vertex.
size_t getNSubSubDomainNeighbors(size_t id)
Returns total number of neighbors of the sub-sub-domain id.
void getSubSubDomainPos(size_t j, Point< dim, T > &p)
return the the position of the sub-sub-domain
void setMapId(rid n, gid g)
operator to remap vertex to a new position
ParMetisDistribution(Vcluster<> &v_cl)
void getSubSubDomainPosition(size_t id, T(&pos)[dim])
function that return the position of the vertex in the space
size_t getSubSubDomainComputationCost(size_t id)
function that get the weight of the vertex
openfpm::vector< size_t > sub_sub_owner
Id of the sub-sub-domain where we set the costs.
void createCartGraph(grid_sm< dim, void > &grid, Box< dim, T > &dom)
Create the Cartesian graph.
Graph_CSR< nm_v< dim >, nm_e > & getGraph()
Get the current graph (main)
bool verticesGotWeights
Flag to check if weights are used on vertices.
Helper class to define Metis graph.
idx_t * getPartition()
Get graph partition vector.
size_t get_ndec()
Get the decomposition counter.
void redecompose(openfpm::vector< rid > &vtxdist)
Redecompose the graph.
void reset(Graph &g, const openfpm::vector< rid > &vtxdist, const std::unordered_map< rid, gid > &m2g, bool vgw)
Reset graph and reconstruct it.
void setDistTol(real_t tol)
Distribution tolerance.
void refine(openfpm::vector< rid > &vtxdist)
Refine the graph.
void initSubGraph(Graph &g, const openfpm::vector< rid > &vtxdist, const std::unordered_map< rid, gid > &m2g, bool w)
Set the Sub-graph.
void decompose(const openfpm::vector< rid > &vtxdist)
Decompose the graph.
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:28
void execute()
Execute all the requests.
void sum(T &num)
Sum the numbers across all processors and get the result.
void sendrecvMultipleMessagesNBX(openfpm::vector< size_t > &prc, openfpm::vector< T > &data, openfpm::vector< size_t > &prc_recv, openfpm::vector< size_t > &recv_sz, void *(*msg_alloc)(size_t, size_t, size_t, size_t, size_t, size_t, void *), void *ptr_arg, long int opt=NONE)
Send and receive multiple messages.
size_t getProcessUnitID()
Get the process unit id.
void min(T &num)
Get the minimum number across all processors (or reduction with insinity norm)
size_t getProcessingUnits()
Get the total number of processors.
void max(T &num)
Get the maximum number across all processors (or reduction with infinity norm)
Implementation of VCluster class.
Definition: VCluster.hpp:59
__device__ __host__ const size_t(& getSize() const)[N]
Return the size of the grid as an array.
Definition: grid_sm.hpp:760
__device__ __host__ size_t size() const
Return the size of the grid.
Definition: grid_sm.hpp:657
size_t size()
Stub size.
Definition: map_vector.hpp:212
Definition: ids.hpp:149
sub-domain edge graph node
Definition: ids.hpp:19
idx_t id
id
Definition: ids.hpp:21
It model an expression expr1 + ... exprn.
Definition: sum.hpp:93