OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
ParMetisDistribution.hpp
1 /*
2  * ParMetisDistribution.hpp
3  *
4  * Created on: Nov 19, 2015
5  * Author: Antonio Leo
6  */
7 
8 
9 #ifndef SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_
10 #define SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_
11 
12 
13 #include "SubdomainGraphNodes.hpp"
14 #include "parmetis_util.hpp"
15 #include "Graph/ids.hpp"
16 #include "Graph/CartesianGraphFactory.hpp"
17 
18 #define PARMETIS_DISTRIBUTION_ERROR 100002
19 
35 template<unsigned int dim, typename T>
37 {
39  bool is_distributed = false;
40 
43 
46 
49 
52 
55 
58 
60  //
61  // vtxdist is a common array across processor, it indicate how
62  // vertex are distributed across processors
63  //
64  // Example we have 3 processors
65  //
66  // processor 0 has 3 vertices
67  // processor 1 has 5 vertices
68  // processor 2 has 4 vertices
69  //
70  // vtxdist contain, 0,3,8,12
71  //
72  // vtx dist is the unique global-id of the vertices
73  //
75 
78 
81 
83  std::unordered_map<rid, gid> m2g;
84 
86  bool verticesGotWeights = false;
87 
91  void updateGraphs()
92  {
93  sub_sub_owner.clear();
94 
95  size_t Np = v_cl.getProcessingUnits();
96 
97  // Init n_vtxdist to gather informations about the new decomposition
98  openfpm::vector<rid> n_vtxdist(Np + 1);
99  for (size_t i = 0; i <= Np; i++)
100  n_vtxdist.get(i).id = 0;
101 
102  // Update the main graph with received data from processor i
103  for (size_t i = 0; i < Np; i++)
104  {
105  size_t ndata = partitions.get(i).size();
106  size_t k = 0;
107 
108  // Update the main graph with the received informations
109  for (rid l = vtxdist.get(i); k < ndata && l < vtxdist.get(i + 1); k++, ++l)
110  {
111  // Create new n_vtxdist (just count processors vertices)
112  ++n_vtxdist.get(partitions.get(i).get(k) + 1);
113 
114  // vertex id from vtx to grobal id
115  auto v_id = m2g.find(l)->second.id;
116 
117  // Update proc id in the vertex (using the old map)
118  gp.template vertex_p<nm_v_proc_id>(v_id) = partitions.get(i).get(k);
119 
120  if (partitions.get(i).get(k) == (long int)v_cl.getProcessUnitID())
121  {sub_sub_owner.add(v_id);}
122 
123  // Add vertex to temporary structure of distribution (needed to update main graph)
124  v_per_proc.get(partitions.get(i).get(k)).add(getVertexGlobalId(l));
125  }
126  }
127 
128  // Create new n_vtxdist (accumulate the counters)
129  for (size_t i = 2; i <= Np; i++)
130  n_vtxdist.get(i) += n_vtxdist.get(i - 1);
131 
132  // Copy the new decomposition in the main vtxdist
133  for (size_t i = 0; i <= Np; i++)
134  vtxdist.get(i) = n_vtxdist.get(i);
135 
137  cnt.resize(Np);
138 
139  for (size_t i = 0 ; i < gp.getNVertex(); ++i)
140  {
141  size_t pid = gp.template vertex_p<nm_v_proc_id>(i);
142 
143  rid j = rid(vtxdist.get(pid).id + cnt.get(pid));
144  gid gi = gid(i);
145 
146  gp.template vertex_p<nm_v_id>(i) = j.id;
147  cnt.get(pid)++;
148 
149  setMapId(j,gi);
150  }
151  }
152 
160  inline auto vertexByMapId(rid id) -> decltype( gp.vertex(m2g.find(id)->second.id) )
161  {
162  return gp.vertex(m2g.find(id)->second.id);
163  }
164 
171  inline void setMapId(rid n, gid g)
172  {
173  m2g[n] = g;
174  }
175 
183  {
184  return m2g.find(n)->second;
185  }
186 
193  {
194  gid g;
195  rid i;
196  i.id = 0;
197 
198  m2g.clear();
199  for ( ; (size_t)i.id < gp.getNVertex(); ++i)
200  {
201  g.id = i.id;
202 
203  m2g.insert( { i, g });
204  }
205  }
206 
216  static void * message_receive(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, size_t tag, void * ptr)
217  {
219 
220  v->get(i).resize(msg_i / sizeof(idx_t));
221 
222  return &(v->get(i).get(0));
223  }
224 
230  {
232  size_t p_id = v_cl.getProcessUnitID();
233 
235  size_t Np = v_cl.getProcessingUnits();
236 
237  // Number of local vertex
238  size_t nl_vertex = vtxdist.get(p_id+1).id - vtxdist.get(p_id).id;
239 
241  idx_t * partition = parmetis_graph.getPartition();
242 
244  partitions.get(p_id).resize(nl_vertex);
245  if (nl_vertex != 0)
246  {std::copy(partition, partition + nl_vertex, &partitions.get(p_id).get(0));}
247 
248  // Reset data structure to keep trace of new vertices distribution in processors (needed to update main graph)
249  for (size_t i = 0; i < Np; ++i)
250  {
251  v_per_proc.get(i).clear();
252  }
253 
254  // Communicate the local distribution to the other processors
255  // to reconstruct individually the global graph
259 
260  for (size_t i = 0; i < Np; i++)
261  {
262  if (i != v_cl.getProcessUnitID())
263  {
264  partitions.get(i).clear();
265  prc.add(i);
266  sz.add(nl_vertex * sizeof(idx_t));
267  ptr.add(partitions.get(p_id).getPointer());
268  }
269  }
270 
271  if (prc.size() == 0)
272  v_cl.sendrecvMultipleMessagesNBX(0, NULL, NULL, NULL, message_receive, &partitions,NONE);
273  else
274  v_cl.sendrecvMultipleMessagesNBX(prc.size(), &sz.get(0), &prc.get(0), &ptr.get(0), message_receive, &partitions,NONE);
275 
276  // Update graphs with the received data
277  updateGraphs();
278  }
279 
280 
281 public:
282 
288  :is_distributed(false),v_cl(v_cl), parmetis_graph(v_cl, v_cl.getProcessingUnits()), vtxdist(v_cl.getProcessingUnits() + 1), partitions(v_cl.getProcessingUnits()), v_per_proc(v_cl.getProcessingUnits())
289  {
290  }
291 
298  :v_cl(pm.v_cl),parmetis_graph(v_cl, v_cl.getProcessingUnits())
299  {
300  this->operator=(pm);
301  }
302 
309  :v_cl(pm.v_cl)
310  {
311  this->operator=(pm);
312  }
313 
320  {
321  size_t bc[dim];
322 
323  for (size_t i = 0 ; i < dim ; i++)
324  bc[i] = NON_PERIODIC;
325 
326  // Set grid and domain
327  gr = grid;
328  domain = dom;
329 
330  // Create a cartesian grid graph
332  gp = g_factory_part.template construct<NO_EDGE, nm_v_id, T, dim - 1, 0>(gr.getSize(), domain, bc);
334 
336  size_t Np = v_cl.getProcessingUnits();
337 
341  size_t mod_v = gr.size() % Np;
342  size_t div_v = gr.size() / Np;
343 
344  for (size_t i = 0; i <= Np; i++)
345  {
346  if (i < mod_v)
347  vtxdist.get(i).id = (div_v + 1) * i;
348  else
349  vtxdist.get(i).id = (div_v) * i + mod_v;
350  }
351 
352  // Init to 0.0 axis z (to fix in graphFactory)
353  if (dim < 3)
354  {
355  for (size_t i = 0; i < gp.getNVertex(); i++)
356  {
357  gp.vertex(i).template get<nm_v_x>()[2] = 0.0;
358  }
359  }
360  for (size_t i = 0; i < gp.getNVertex(); i++)
361  {
362  gp.vertex(i).template get<nm_v_global_id>() = i;
363  }
364 
365  }
366 
371  {
372  return gp;
373  }
374 
378  void decompose()
379  {
380  if (is_distributed == false)
382  else
384 
387 
388  // update after decomposition
390 
391  is_distributed = true;
392  }
393 
400  void refine()
401  {
402  // Reset parmetis graph and reconstruct it
404 
405  // Refine
407 
409  }
410 
417  void redecompose()
418  {
419  // Reset parmetis graph and reconstruct it
421 
422  // Refine
424 
426  }
427 
432  float getUnbalance()
433  {
434  long t_cost = 0;
435 
436  long min, max, sum;
437  float unbalance;
438 
439  t_cost = getProcessorLoad();
440 
441  min = t_cost;
442  max = t_cost;
443  sum = t_cost;
444 
445  v_cl.min(min);
446  v_cl.max(max);
447  v_cl.sum(sum);
448  v_cl.execute();
449 
450  unbalance = ((float) (max - min)) / (float) (sum / v_cl.getProcessingUnits());
451 
452  return unbalance * 100;
453  }
454 
461  void getSubSubDomainPosition(size_t id, T (&pos)[dim])
462  {
463 #ifdef SE_CLASS1
464  if (id >= gp.getNVertex())
465  std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
466 #endif
467 
468  // Copy the geometrical informations inside the pos vector
469  pos[0] = gp.vertex(id).template get<nm_v_x>()[0];
470  pos[1] = gp.vertex(id).template get<nm_v_x>()[1];
471  if (dim == 3)
472  pos[2] = gp.vertex(id).template get<nm_v_x>()[2];
473  }
474 
481  inline void setComputationCost(size_t id, size_t weight)
482  {
483  if (!verticesGotWeights)
484  {verticesGotWeights = true;}
485 
486 #ifdef SE_CLASS1
487  if (id >= gp.getNVertex())
488  {std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";}
489 #endif
490 
491  // Update vertex in main graph
492  gp.vertex(id).template get<nm_v_computation>() = weight;
493  }
494 
500  {
501  return verticesGotWeights;
502  }
503 
510  {
511 #ifdef SE_CLASS1
512  if (id >= gp.getNVertex())
513  std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
514 #endif
515 
516  return gp.vertex(id).template get<nm_v_computation>();
517  }
518 
524  {
525  size_t load = 0;
526 
527  // Processor id
528  size_t p_id = v_cl.getProcessUnitID();
529 
530 
531  for (rid i = vtxdist.get(p_id); i < vtxdist.get(p_id+1) ; ++i)
532  load += gp.vertex(m2g.find(i)->second.id).template get<nm_v_computation>();
533 
534  //std::cout << v_cl.getProcessUnitID() << " weight " << load << " size " << sub_g.getNVertex() << "\n";
535  return load;
536  }
537 
543  void setMigrationCost(size_t id, size_t migration)
544  {
545 #ifdef SE_CLASS1
546  if (id >= gp.getNVertex())
547  std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
548 #endif
549 
550  gp.vertex(id).template get<nm_v_migration>() = migration;
551  }
552 
559  void setCommunicationCost(size_t v_id, size_t e, size_t communication)
560  {
561 #ifdef SE_CLASS1
562 
563  size_t e_id = v_id + e;
564 
565  if (e_id >= gp.getNEdge())
566  std::cerr << "Such edge doesn't exist (id = " << e_id << ", " << "total size = " << gp.getNEdge() << ")\n";
567 #endif
568 
569  gp.getChildEdge(v_id, e).template get<nm_e::communication>() = communication;
570  }
571 
577  size_t getNSubSubDomains() const
578  {
579  return gp.getNVertex();
580  }
581 
587  size_t getNOwnerSubSubDomains() const
588  {
589  return sub_sub_owner.size();
590  }
591 
599  size_t getOwnerSubSubDomain(size_t id) const
600  {
601  return sub_sub_owner.get(id);
602  }
603 
611  size_t getNSubSubDomainNeighbors(size_t id)
612  {
613 #ifdef SE_CLASS1
614  if (id >= gp.getNVertex())
615  std::cerr << __FILE__ << ":" << __LINE__ << "Such vertex doesn't exist (id = " << id << ", " << "total size = " << gp.getNVertex() << ")\n";
616 #endif
617 
618  return gp.getNChilds(id);
619  }
620 
627  {
628  gp.destroy();
629  partitions.clear();
630  partitions.shrink_to_fit();
631  v_per_proc.clear();
632  v_per_proc.shrink_to_fit();
633  m2g.clear();
634  m2g.rehash(0);
635  }
636 
642  void write(const std::string & file)
643  {
644  VTKWriter<Graph_CSR<nm_v<dim>, nm_e>, VTK_GRAPH> gv2(gp);
645  gv2.write(std::to_string(v_cl.getProcessUnitID()) + "_" + file + ".vtk");
646  }
647 
648  const ParMetisDistribution<dim,T> & operator=(const ParMetisDistribution<dim,T> & dist)
649  {
650  is_distributed = dist.is_distributed;
651  gr = dist.gr;
652  domain = dist.domain;
653  gp = dist.gp;
654  vtxdist = dist.vtxdist;
655  partitions = dist.partitions;
656  v_per_proc = dist.v_per_proc;
657  verticesGotWeights = dist.verticesGotWeights;
658  sub_sub_owner = dist.sub_sub_owner;
659  m2g = dist.m2g;
660  parmetis_graph = dist.parmetis_graph;
661 
662  return *this;
663  }
664 
666  {
667  is_distributed = dist.is_distributed;
668  gr = dist.gr;
669  domain = dist.domain;
670  gp.swap(dist.gp);
671  vtxdist.swap(dist.vtxdist);
672  partitions.swap(dist.partitions);
673  v_per_proc.swap(dist.v_per_proc);
674  verticesGotWeights = dist.verticesGotWeights;
675  sub_sub_owner.swap(dist.sub_sub_owner);
676  m2g.swap(dist.m2g);
677  parmetis_graph = dist.parmetis_graph;
678 
679  return *this;
680  }
681 
688  void getSubSubDomainPos(size_t j, Point<dim,T> & p)
689  {
690  for (size_t i = 0 ; i < dim ; i++)
691  {p.get(i) = gp.template vertex_p<0>(sub_sub_owner.get(j))[i];}
692  }
693 
699  size_t get_ndec()
700  {
701  return parmetis_graph.get_ndec();
702  }
703 
709  void setDistTol(double tol)
710  {
712  }
713 };
714 
715 #endif /* SRC_DECOMPOSITION_PARMETISDISTRIBUTION_HPP_ */
void refine(openfpm::vector< rid > &vtxdist)
Refine the graph.
void destroy_internal_graph()
In case we do not do Dynamic load balancing this this data-structure it is safe to eliminate the full...
void decompose(const openfpm::vector< rid > &vtxdist)
Decompose the graph.
Box< dim, T > domain
rectangular domain to decompose
void refine()
Refine current decomposition.
Graph_CSR< nm_v< dim >, nm_e > & getGraph()
Get the current graph (main)
auto vertex(size_t id) -> decltype(v.get(id))
Function to access the vertex.
Definition: map_graph.hpp:596
static void * message_receive(size_t msg_i, size_t total_msg, size_t total_p, size_t i, size_t ri, size_t tag, void *ptr)
Callback of the sendrecv to set the size of the array received.
size_t getProcessUnitID()
Get the process unit id.
void setComputationCost(size_t id, size_t weight)
Function that set the weight of the vertex.
ParMetisDistribution(Vcluster<> &v_cl)
void sendrecvMultipleMessagesNBX(openfpm::vector< size_t > &prc, openfpm::vector< T > &data, openfpm::vector< size_t > &prc_recv, openfpm::vector< size_t > &recv_sz, void *(*msg_alloc)(size_t, size_t, size_t, size_t, size_t, size_t, void *), void *ptr_arg, long int opt=NONE)
Send and receive multiple messages.
void postDecomposition()
It update the full decomposition.
void setMapId(rid n, gid g)
operator to remap vertex to a new position
void decompose()
Create the decomposition.
__device__ __host__ size_t size() const
Return the size of the grid.
Definition: grid_sm.hpp:637
size_t getNOwnerSubSubDomains() const
Return the total number of sub-sub-domains this processor own.
void setDistTol(real_t tol)
Distribution tolerance.
void initSubGraph(Graph &g, const openfpm::vector< rid > &vtxdist, const std::unordered_map< rid, gid > &m2g, bool w)
Set the Sub-graph.
grid_sm< dim, void > gr
Structure that store the cartesian grid information.
ParMetisDistribution(ParMetisDistribution< dim, T > &&pm)
void getSubSubDomainPos(size_t j, Point< dim, T > &p)
return the the position of the sub-sub-domain
size_t get_ndec()
Get the decomposition counter.
gid getVertexGlobalId(rid n)
Get the global id of the vertex given the re-mapped one.
void setMigrationCost(size_t id, size_t migration)
Set migration cost of the vertex id.
auto vertexByMapId(rid id) -> decltype(gp.vertex(m2g.find(id) ->second.id))
operator to access the vertex by mapped position
float getUnbalance()
Compute the unbalance of the processor compared to the optimal balance.
void setDistTol(double tol)
Set the tolerance for each partition.
void redecompose()
Redecompose current decomposition.
void getSubSubDomainPosition(size_t id, T(&pos)[dim])
function that return the position of the vertex in the space
Helper class to define Metis graph.
void redecompose(openfpm::vector< rid > &vtxdist)
Redecompose the graph.
size_t get_ndec()
Get the decomposition counter.
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:27
bool verticesGotWeights
Flag to check if weights are used on vertices.
size_t size()
Stub size.
Definition: map_vector.hpp:211
void createCartGraph(grid_sm< dim, void > &grid, Box< dim, T > &dom)
Create the Cartesian graph.
bool is_distributed
Is distributed.
void setCommunicationCost(size_t v_id, size_t e, size_t communication)
Set communication cost of the edge id.
Implementation of VCluster class.
Definition: VCluster.hpp:58
Vcluster & v_cl
Vcluster.
void execute()
Execute all the requests.
openfpm::vector< openfpm::vector< idx_t > > partitions
partitions
This class construct a cartesian graph.
Parmetis< Graph_CSR< nm_v< dim >, nm_e > > parmetis_graph
Convert the graph to parmetis format.
ParMetisDistribution(const ParMetisDistribution< dim, T > &pm)
Class that distribute sub-sub-domains across processors using ParMetis Library.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
sub-domain edge graph node
size_t getProcessorLoad()
Compute the processor load counting the total weights of its vertices.
void write(const std::string &file)
Print the current distribution and save it to VTK file.
void reset(Graph &g, const openfpm::vector< rid > &vtxdist, const std::unordered_map< rid, gid > &m2g, bool vgw)
Reset graph and reconstruct it.
openfpm::vector< rid > vtxdist
Init vtxdist needed for Parmetis.
const size_t(& getSize() const)[N]
Return the size of the grid as an array.
Definition: grid_sm.hpp:740
size_t getNSubSubDomains() const
Returns total number of sub-sub-domains in the distribution graph.
size_t getOwnerSubSubDomain(size_t id) const
Return the global id of the owned sub-sub-domain.
void initLocalToGlobalMap()
operator to init ids vector
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
size_t getProcessingUnits()
Get the total number of processors.
Structure that store a graph in CSR format or basically in compressed adjacency matrix format.
Definition: map_graph.hpp:81
size_t getNVertex() const
Return the number of the vertex.
Definition: map_graph.hpp:1045
Graph_CSR< nm_v< dim >, nm_e > gp
Global sub-sub-domain graph.
This class represent an N-dimensional box.
Definition: Box.hpp:60
void sum(T &num)
Sum the numbers across all processors and get the result.
size_t getSubSubDomainComputationCost(size_t id)
function that get the weight of the vertex
size_t getNChilds(size_t c) const
Return the number of childs of a vertex.
Definition: map_graph.hpp:768
std::unordered_map< rid, gid > m2g
Hashmap to access to the global position given the re-mapped one (needed for access the map)
void destroy()
operator to clear the whole graph
Definition: map_graph.hpp:683
void swap(Graph_CSR< V, E > &g)
swap the memory of g with this graph
Definition: map_graph.hpp:976
size_t getNEdge() const
Return the number of edges.
Definition: map_graph.hpp:1056
void max(T &num)
Get the maximum number across all processors (or reduction with infinity norm)
It model an expression expr1 + ... exprn.
Definition: sum.hpp:92
idx_t id
id
Definition: ids.hpp:21
Definition: ids.hpp:18
auto getChildEdge(size_t v, size_t v_e) -> decltype(e.get(0))
Get the vertex edge.
Definition: map_graph.hpp:797
Definition: ids.hpp:148
openfpm::vector< size_t > sub_sub_owner
Id of the sub-sub-domain where we set the costs.
openfpm::vector< openfpm::vector< gid > > v_per_proc
Init data structure to keep trace of new vertices distribution in processors (needed to update main g...
size_t getNSubSubDomainNeighbors(size_t id)
Returns total number of neighbors of the sub-sub-domain id.
idx_t * getPartition()
Get graph partition vector.
bool weightsAreUsed()
Checks if weights are used on the vertices.
void min(T &num)
Get the minimum number across all processors (or reduction with insinity norm)
void updateGraphs()
Update main graph ad subgraph with the received data of the partitions from the other processors.