OpenFPM_pdata  4.1.0 Project that contain the implementation of distributed structures
main_float128.cpp
1 /*
2  * ### WIKI 1 ###
3  *
4  * ## Simple example
5  *
6  * In this example we show 1D PSE derivative function approximation
7  * in the case of floating points 128bit 2 time better than float128 precision
8  *
9  * ### WIKI END ###
10  *
11  */
12
13 #include "config/config.h"
14 #include "Vector/vector_dist.hpp"
15 #include "Decomposition/CartDecomposition.hpp"
16 #include "PSE/Kernels.hpp"
17 #include "data_type/aggregate.hpp"
18 #include <cmath>
19
20 // This example only work if there is HAVE_LIBQUADMATH
22
23 #include <boost/multiprecision/float128.hpp>
24
25 typedef boost::multiprecision::float128 float128;
26
27 /*
28  * ### WIKI 2 ###
29  *
30  * Here we define the function x*e^(-x*x) and its
31  * second derivative in analytic form
32  *
33  * 2x*(2*x-3)*e^(-x^2)
34  *
35  */
36
37 float128 f_xex2(float128 x)
38 {
39  return x*exp(-x*x);
40 }
41
42 float128 f_xex2(Point<1,float128> & x)
43 {
44  return x.get(0)*exp(-x.get(0)*x.get(0));
45 }
46
47 float128 Lapf_xex2(Point<1,float128> & x)
48 {
49  return 2.0*x.get(0)*(2.0*x.get(0)*x.get(0) - 3.0)*exp(-x.get(0)*x.get(0));
50 }
51
52 /*
53  *
54  * ### WIKI END ###
55  *
56  */
57
58 int main(int argc, char* argv[])
59 {
60  //
61  // ### WIKI 3 ###
62  //
63  // Some useful parameters. Like
64  //
65  // * Number of particles
66  // * Minimum number of padding particles
67  // * The computational domain
68  // * The spacing
69  // * The mollification length
70  // * Second order Laplacian kernel in 1D
71  //
72
73  // Number of particles
74  const size_t Npart = 125;
75
76  // Number of padding particles (At least)
77  const size_t Npad = 40;
78
79  // The domain
80  Box<1,float128> box({0.0},{4.0});
81
82  // Calculated spacing
83  float128 spacing = box.getHigh(0) / Npart;
84
85  // Epsilon of the particle kernel
86  const float128 eps = 2*spacing;
87
88  // Laplacian PSE kernel 1 dimension, on float128, second order
89  Lap_PSE<1,float128,2> lker(eps);
90
91  //
92  // ### WIKI 2 ###
93  //
94  // Here we Initialize the library and we define Ghost size
95  // and non-periodic boundary conditions
96  //
97  openfpm_init(&argc,&argv);
98  Vcluster & v_cl = create_vcluster();
99
100  size_t bc[1]={NON_PERIODIC};
101  Ghost<1,float128> g(12*eps);
102
103  //
104  // ### WIKI 3 ###
105  //
106  // Here we are creating a distributed vector defined by the following parameters
107  //
108  // we create a set of N+1 particles to have a fully covered domain of particles between 0.0 and 4.0
109  // Suppose we have a spacing given by 1.0 you need 4 +1 particles to cover your domain
110  //
112
113  //
114  // ### WIKI 4 ###
115  //
116  // We assign the position to the particles, the scalar property is set to
117  // the function x*e^(-x*x) value.
118  // Each processor has parts of the particles and fill part of the space, the
119  // position is assigned independently from the decomposition.
120  //
121  // In this case, if there are 1001 particles and 3 processors the in the
122  // domain from 0.0 to 4.0
123  //
124  // * processor 0 place particles from 0.0 to 1.332 (334 particles)
125  // * processor 1 place particles from 1.336 to 2.668 (334 particles)
126  // * processor 2 place particles from 2.672 to 4.0 (333 particles)
127  //
128
129  // It return how many particles the processors with id < rank has in total
130  size_t base = vd.init_size_accum(Npart+1);
131  auto it2 = vd.getIterator();
132
133  while (it2.isNext())
134  {
135  auto key = it2.get();
136
137  // set the position of the particles
138  vd.getPos(key)[0] = (key.getKey() + base) * spacing;
139  //set the property of the particles
140  vd.template getProp<0>(key) = f_xex2((key.getKey() + base) * spacing);
141
142  ++it2;
143  }
144
145  //
146  // ### WIKI 5 ###
147  //
148  // Once defined the position, we distribute them across the processors
149  // following the decomposition, finally we get the ghost part
150  //
151  vd.map();
152  vd.ghost_get<0>();
153
154  //
155  // ### WIKI 6 ###
156  //
157  // near the boundary we have two options, or we use one sided kernels,
158  // or we add additional particles, it is required that such particles
159  // produces a 2 time differentiable function. In order to obtain such
160  // result we extend for x < 0.0 and x > 4.0 with the test function xe^(-x*x).
161  //
162  // Note that for x < 0.0 such extension is equivalent to mirror the
163  // particles changing the sign of the strength
164  //
165  // \verbatim
166  //
167  // 0.6 -0.5 0.5 0.6 Strength
168  // +----+----*----*----*-
169  // 0.0 Position
170  //
171  // with * = real particle
172  // + = mirror particle
173  //
174  // \endverbatim
175  //
176  //
179  float128 enlarge = 0.1;
180
181  // Create a box
182  if (Npad * spacing > 0.1)
183  {
186  enlarge = Npad * spacing;
187  }
188
189  auto it = vd.getDomainIterator();
190
191  while (it.isNext())
192  {
193  auto key = it.get();
194
195  // set the position of the particles
197  {
199  vd.getLastPos()[0] = - vd.getPos(key)[0];
200  vd.template getLastProp<0>() = - vd.template getProp<0>(key);
201  }
202
203  // set the position of the particles
205  {
207  vd.getLastPos()[0] = 2.0 * box.getHigh(0) - vd.getPos(key)[0];
208  vd.template getLastProp<0>() = f_xex2(vd.getLastPos()[0]);
209  }
210
211  ++it;
212  }
213
214  //
215  // ### WIKI 7 ###
216  //
217  // We create a CellList with cell spacing 12 sigma
218  //
219
220  // get and construct the Cell list
221
222  Ghost<1,float128> gp(enlarge);
223  auto cl = vd.getCellList(12*eps,gp);
224
225  // Maximum infinity norm
226  double linf = 0.0;
227
228  //
229  // ### WIKI 8 ###
230  //
231  // For each particle get the neighborhood of each particle
232  //
233  // This cycle is literally the formula from PSE operator approximation
234  //
235  // \$\frac{1}{\epsilon^{2}} h (u_{q} - u_{p}) \eta_{\epsilon}(x_q - x_p) \$
236  //
237  //
238
239  auto it_p = vd.getDomainIterator();
240  while (it_p.isNext())
241  {
242  // float128 PSE integration accumulator
243  float128 pse = 0;
244
245  // key
246  vect_dist_key_dx key = it_p.get();
247
248  // Get the position of the particles
249  Point<1,float128> p = vd.getPos(key);
250
251  // We are not interested in calculating out the domain
253  if (p.get(0) < 0.0 || p.get(0) >= 4.0)
254  {
255  ++it_p;
256  continue;
257  }
258
259  // Get f(x) at the position of the particle
260  float128 prp_x = vd.template getProp<0>(key);
261
262  // Get the neighborhood of the particles
263  auto NN = cl.template getNNIterator<NO_CHECK>(cl.getCell(p));
264  while(NN.isNext())
265  {
266  auto nnp = NN.get();
267
268  // Calculate contribution given by the kernel value at position p,
269  // given by the Near particle
270  if (nnp != key.getKey())
271  {
272  // W(x-y)
273  float128 ker = lker.value(p,vd.getPos(nnp));
274
275  // f(y)
276  float128 prp_y = vd.template getProp<0>(nnp);
277
278  // 1.0/(eps)^2 [f(y)-f(x)]*W(x,y)*V_q
279  float128 prp = 1.0/eps/eps * (prp_y - prp_x) * spacing;
280  pse += prp * ker;
281  }
282
283  // Next particle
284  ++NN;
285  }
286
287  // Here we calculate the L_infinity norm or the maximum difference
288  // of the analytic solution from the PSE calculated
289
290  float128 sol = Lapf_xex2(p);
291
292  if (fabs(pse - sol) > linf)
293  linf = static_cast<double>(fabs(pse - sol));
294
295  ++it_p;
296  }
297
298  //
299  // ### WIKI 9 ###
300  //
301  // Calculate the maximum infinity norm across processors and
302  // print it
303  //
304
305  v_cl.max(linf);
306  v_cl.execute();
307
308  if (v_cl.getProcessUnitID() == 0)
309  std::cout << "Norm infinity: " << linf << "\n";
310
311  //
312  // ### WIKI 10 ###
313  //
314  // Deinitialize the library
315  //
316  openfpm_finalize();
317 }
318
319 #else
320
321 int main(int argc, char* argv[])
322 {
323 }
324
325 #endif
size_t getProcessUnitID()
Get the process unit id.
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:27
Grid key for a distributed grid.
Definition: Ghost.hpp:39
Implementation of the Laplacian kernels for PSE.
Definition: Kernels.hpp:25
Implementation of VCluster class.
Definition: VCluster.hpp:58
__device__ __host__ void setHigh(int i, T val)
set the high interval of the box
Definition: Box.hpp:544
void execute()
Execute all the requests.
This class decompose a space into sub-sub-domains and distribute them across processors.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
This class represent an N-dimensional box.
Definition: Box.hpp:60
__device__ __host__ size_t getKey() const
Get the key.
Distributed vector.
void max(T &num)
Get the maximum number across all processors (or reduction with infinity norm)
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Definition: Box.hpp:567