OpenFPM  5.2.0
Project that contain the implementation of distributed structures
main.cu
1 
40 #ifdef __NVCC__
41 
42 #define PRINT_STACKTRACE
43 //#define STOP_ON_ERROR
44 #define OPENMPI
45 //#define SE_CLASS1
46 
47 //#define EXTERNAL_SET_GPU <----- In case you want to distribute the GPUs differently from the default
48 
49 #include "Vector/vector_dist.hpp"
50 #include <math.h>
51 #include "Draw/DrawParticles.hpp"
52 
53 
54 
55 typedef float real_number;
56 
57 // A constant to indicate boundary particles
58 #define BOUNDARY 0
59 
60 // A constant to indicate fluid particles
61 #define FLUID 1
62 
63 // initial spacing between particles dp in the formulas
64 const real_number dp = 0.0085;
65 // Maximum height of the fluid water
66 // is going to be calculated and filled later on
67 real_number h_swl = 0.0;
68 
69 // c_s in the formulas (constant used to calculate the sound speed)
70 const real_number coeff_sound = 20.0;
71 
72 // gamma in the formulas
73 const real_number gamma_ = 7.0;
74 
75 // sqrt(3.0*dp*dp) support of the kernel
76 const real_number H = 0.0147224318643;
77 
78 // Eta in the formulas
79 const real_number Eta2 = 0.01 * H*H;
80 
81 const real_number FourH2 = 4.0 * H*H;
82 
83 // alpha in the formula
84 const real_number visco = 0.1;
85 
86 // cbar in the formula (calculated later)
87 real_number cbar = 0.0;
88 
89 // Mass of the fluid particles
90 const real_number MassFluid = 0.000614125;
91 
92 // Mass of the boundary particles
93 const real_number MassBound = 0.000614125;
94 
95 //
96 
97 // End simulation time
98 #ifdef TEST_RUN
99 const real_number t_end = 0.001;
100 #else
101 const real_number t_end = 1.5;
102 #endif
103 
104 // Gravity acceleration
105 const real_number gravity = 9.81;
106 
107 // Reference densitu 1000Kg/m^3
108 const real_number rho_zero = 1000.0;
109 
110 // Filled later require h_swl, it is b in the formulas
111 real_number B = 0.0;
112 
113 // Constant used to define time integration
114 const real_number CFLnumber = 0.2;
115 
116 // Minimum T
117 const real_number DtMin = 0.00001;
118 
119 // Minimum Rho allowed
120 const real_number RhoMin = 700.0;
121 
122 // Maximum Rho allowed
123 const real_number RhoMax = 1300.0;
124 
125 // Filled in initialization
126 real_number max_fluid_height = 0.0;
127 
128 // Properties
129 
130 // FLUID or BOUNDARY
131 const size_t type = 0;
132 
133 // Density
134 const int rho = 1;
135 
136 // Density at step n-1
137 const int rho_prev = 2;
138 
139 // Pressure
140 const int Pressure = 3;
141 
142 // Delta rho calculated in the force calculation
143 const int drho = 4;
144 
145 // calculated force
146 const int force = 5;
147 
148 // velocity
149 const int velocity = 6;
150 
151 // velocity at previous step
152 const int velocity_prev = 7;
153 
154 const int red = 8;
155 
156 const int red2 = 9;
157 
158 // Type of the vector containing particles
160 // | | | | | | | | | |
161 // | | | | | | | | | |
162 // type density density Pressure delta force velocity velocity reduction another
163 // at n-1 density at n - 1 buffer reduction buffer
164 
165 
166 struct ModelCustom
167 {
168  template<typename Decomposition, typename vector> inline void addComputation(Decomposition & dec,
169  vector & vd,
170  size_t v,
171  size_t p)
172  {
173  if (vd.template getProp<type>(p) == FLUID)
174  dec.addComputationCost(v,4);
175  else
176  dec.addComputationCost(v,3);
177  }
178 
179  template<typename Decomposition> inline void applyModel(Decomposition & dec, size_t v)
180  {
181  dec.setSubSubDomainComputationCost(v, dec.getSubSubDomainComputationCost(v) * dec.getSubSubDomainComputationCost(v));
182  }
183 
184  real_number distributionTol()
185  {
186  return 1.01;
187  }
188 };
189 
190 template<typename vd_type>
191 __global__ void EqState_gpu(vd_type vd, real_number B)
192 {
193  auto a = GET_PARTICLE(vd);
194 
195  real_number rho_a = vd.template getProp<rho>(a);
196  real_number rho_frac = rho_a / rho_zero;
197 
198  vd.template getProp<Pressure>(a) = B*( rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac - 1.0);
199 }
200 
201 inline void EqState(particles & vd)
202 {
203  auto it = vd.getDomainIteratorGPU();
204 
205  CUDA_LAUNCH(EqState_gpu,it,vd.toKernel(),B);
206 }
207 
208 
209 const real_number a2 = 1.0/M_PI/H/H/H;
210 
211 inline __device__ __host__ real_number Wab(real_number r)
212 {
213  r /= H;
214 
215  if (r < 1.0)
216  return (1.0 - 3.0/2.0*r*r + 3.0/4.0*r*r*r)*a2;
217  else if (r < 2.0)
218  return (1.0/4.0*(2.0 - r)*(2.0 - r)*(2.0 - r))*a2;
219  else
220  return 0.0;
221 }
222 
223 
224 const real_number c1 = -3.0/M_PI/H/H/H/H;
225 const real_number d1 = 9.0/4.0/M_PI/H/H/H/H;
226 const real_number c2 = -3.0/4.0/M_PI/H/H/H/H;
227 const real_number a2_4 = 0.25*a2;
228 // Filled later
229 real_number W_dap = 0.0;
230 
231 inline __device__ __host__ void DWab(Point<3,real_number> & dx, Point<3,real_number> & DW, real_number r)
232 {
233  const real_number qq=r/H;
234 
235  real_number qq2 = qq * qq;
236  real_number fac1 = (c1*qq + d1*qq2)/r;
237  real_number b1 = (qq < 1.0f)?1.0f:0.0f;
238 
239  real_number wqq = (2.0f - qq);
240  real_number fac2 = c2 * wqq * wqq / r;
241  real_number b2 = (qq >= 1.0f && qq < 2.0f)?1.0f:0.0f;
242 
243  real_number factor = (b1*fac1 + b2*fac2);
244 
245  DW.get(0) = factor * dx.get(0);
246  DW.get(1) = factor * dx.get(1);
247  DW.get(2) = factor * dx.get(2);
248 }
249 
250 
251 // Tensile correction
252 inline __device__ __host__ real_number Tensile(real_number r, real_number rhoa, real_number rhob, real_number prs1, real_number prs2, real_number W_dap)
253 {
254  const real_number qq=r/H;
255  //-Cubic Spline kernel
256  real_number wab;
257  if(r>H)
258  {
259  real_number wqq1=2.0f-qq;
260  real_number wqq2=wqq1*wqq1;
261 
262  wab=a2_4*(wqq2*wqq1);
263  }
264  else
265  {
266  real_number wqq2=qq*qq;
267  real_number wqq3=wqq2*qq;
268 
269  wab=a2*(1.0f-1.5f*wqq2+0.75f*wqq3);
270  }
271 
272  //-Tensile correction.
273  real_number fab=wab*W_dap;
274  fab*=fab; fab*=fab; //fab=fab^4
275  const real_number tensilp1=(prs1/(rhoa*rhoa))*(prs1>0.0f? 0.01f: -0.2f);
276  const real_number tensilp2=(prs2/(rhob*rhob))*(prs2>0.0f? 0.01f: -0.2f);
277 
278  return (fab*(tensilp1+tensilp2));
279 }
280 
281 
282 inline __device__ __host__ real_number Pi(const Point<3,real_number> & dr, real_number rr2, Point<3,real_number> & dv, real_number rhoa, real_number rhob, real_number massb, real_number cbar, real_number & visc)
283 {
284  const real_number dot = dr.get(0)*dv.get(0) + dr.get(1)*dv.get(1) + dr.get(2)*dv.get(2);
285  const real_number dot_rr2 = dot/(rr2+Eta2);
286  visc=(dot_rr2 < visc)?visc:dot_rr2;
287 
288  if(dot < 0)
289  {
290  const float amubar=H*dot_rr2;
291  const float robar=(rhoa+rhob)*0.5f;
292  const float pi_visc=(-visco*cbar*amubar/robar);
293 
294  return pi_visc;
295  }
296  else
297  return 0.0f;
298 }
299 
300 template<typename particles_type, typename NN_type>
301 __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap, real_number cbar)
302 {
303  unsigned int a;
304  GET_PARTICLE_SORT(a,NN);
305 
306  real_number max_visc = 0.0f;
307 
308  // Get the position xp of the particle
309  Point<3,real_number> xa = vd.getPos(a);
310 
311  // Type of the particle
312  unsigned int typea = vd.template getProp<type>(a);
313 
314  // Get the density of the of the particle a
315  real_number rhoa = vd.template getProp<rho>(a);
316 
317  // Get the pressure of the particle a
318  real_number Pa = vd.template getProp<Pressure>(a);
319 
320  // Get the Velocity of the particle a
321  Point<3,real_number> va = vd.template getProp<velocity>(a);
322 
323  Point<3,real_number> force_;
324  force_.get(0) = 0.0f;
325  force_.get(1) = 0.0f;
326  force_.get(2) = -gravity;
327  real_number drho_ = 0.0f;
328 
329  // Get an iterator over the neighborhood particles of p
330  auto Np = NN.getNNIteratorBox(NN.getCell(xa));
331 
332  // For each neighborhood particle
333  while (Np.isNext() == true)
334  {
335  // ... q
336  auto b = Np.get_sort();
337 
338  // Get the position xp of the particle
339  Point<3,real_number> xb = vd.getPos(b);
340 
341  // if (p == q) skip this particle this condition should be done in the r^2 = 0
342  if (a == b) {++Np; continue;};
343 
344  unsigned int typeb = vd.template getProp<type>(b);
345 
346  real_number massb = (typeb == FLUID)?MassFluid:MassBound;
347  Point<3,real_number> vb = vd.template getProp<velocity>(b);
348  real_number Pb = vd.template getProp<Pressure>(b);
349  real_number rhob = vd.template getProp<rho>(b);
350 
351  // Get the distance between p and q
352  Point<3,real_number> dr = xa - xb;
353  // take the norm of this vector
354  real_number r2 = norm2(dr);
355 
356  // if they interact
357  if (r2 < FourH2 && r2 >= 1e-16)
358  {
359  real_number r = sqrtf(r2);
360 
361  Point<3,real_number> v_rel = va - vb;
362 
364  DWab(dr,DW,r);
365 
366  real_number factor = - massb*((Pa + Pb) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb,W_dap) + Pi(dr,r2,v_rel,rhoa,rhob,massb,cbar,max_visc));
367 
368  // Bound - Bound does not produce any change
369  factor = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:factor;
370 
371  force_.get(0) += factor * DW.get(0);
372  force_.get(1) += factor * DW.get(1);
373  force_.get(2) += factor * DW.get(2);
374 
375  real_number scal = massb*(v_rel.get(0)*DW.get(0)+v_rel.get(1)*DW.get(1)+v_rel.get(2)*DW.get(2));
376  scal = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:scal;
377 
378  drho_ += scal;
379  }
380 
381  ++Np;
382  }
383 
384  vd.template getProp<red>(a) = max_visc;
385 
386  vd.template getProp<force>(a)[0] = force_.get(0);
387  vd.template getProp<force>(a)[1] = force_.get(1);
388  vd.template getProp<force>(a)[2] = force_.get(2);
389  vd.template getProp<drho>(a) = drho_;
390 }
391 
392 template<typename CellList> inline void calc_forces(particles & vd, CellList & NN, real_number & max_visc, size_t cnt)
393 {
394  auto part = vd.getDomainIteratorGPU(96);
395 
396  // Update the cell-list
397  vd.updateCellListGPU<type,rho,Pressure,velocity>(NN);
398 
399  CUDA_LAUNCH(calc_forces_gpu,part,vd.toKernel(),NN.toKernel(),W_dap,cbar);
400 
401  vd.restoreOrder<drho,force,red>(NN);
402 
403  max_visc = reduce_local<red,_max_>(vd);
404 }
405 
406 template<typename vector_type>
407 __global__ void max_acceleration_and_velocity_gpu(vector_type vd)
408 {
409  auto a = GET_PARTICLE(vd);
410 
411  Point<3,real_number> acc(vd.template getProp<force>(a));
412  vd.template getProp<red>(a) = norm(acc);
413 
414  Point<3,real_number> vel(vd.template getProp<velocity>(a));
415  vd.template getProp<red2>(a) = norm(vel);
416 }
417 
418 void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_number & max_vel)
419 {
420  // Calculate the maximum acceleration
421  auto part = vd.getDomainIteratorGPU();
422 
423  CUDA_LAUNCH(max_acceleration_and_velocity_gpu,part,vd.toKernel());
424 
425  max_acc = reduce_local<red,_max_>(vd);
426  max_vel = reduce_local<red2,_max_>(vd);
427 
428  Vcluster<> & v_cl = create_vcluster();
429  v_cl.max(max_acc);
430  v_cl.max(max_vel);
431  v_cl.execute();
432 }
433 
434 
435 real_number calc_deltaT(particles & vd, real_number ViscDtMax)
436 {
437  real_number Maxacc = 0.0;
438  real_number Maxvel = 0.0;
439  max_acceleration_and_velocity(vd,Maxacc,Maxvel);
440 
441  //-dt1 depends on force per unit mass.
442  const real_number dt_f = (Maxacc)?sqrt(H/Maxacc):std::numeric_limits<float>::max();
443 
444  //-dt2 combines the Courant and the viscous time-step controls.
445  const real_number dt_cv = H/(std::max(cbar,Maxvel*10.f) + H*ViscDtMax);
446 
447  //-dt new value of time step.
448  real_number dt=real_number(CFLnumber)*std::min(dt_f,dt_cv);
449  if(dt<real_number(DtMin))
450  {dt=real_number(DtMin);}
451 
452  return dt;
453 }
454 
455 template<typename vector_dist_type>
456 __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number dt2, real_number dt205)
457 {
458  // ... a
459  auto a = GET_PARTICLE(vd);
460 
461  // if the particle is boundary
462  if (vd.template getProp<type>(a) == BOUNDARY)
463  {
464  // Update rho
465  real_number rhop = vd.template getProp<rho>(a);
466 
467  // Update only the density
468  vd.template getProp<velocity>(a)[0] = 0.0;
469  vd.template getProp<velocity>(a)[1] = 0.0;
470  vd.template getProp<velocity>(a)[2] = 0.0;
471  real_number rhonew = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
472  vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
473 
474  vd.template getProp<rho_prev>(a) = rhop;
475 
476  vd.template getProp<red>(a) = 0;
477 
478  return;
479  }
480 
481  //-Calculate displacement and update position
482  real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
483  real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
484  real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
485 
486  vd.getPos(a)[0] += dx;
487  vd.getPos(a)[1] += dy;
488  vd.getPos(a)[2] += dz;
489 
490  real_number velX = vd.template getProp<velocity>(a)[0];
491  real_number velY = vd.template getProp<velocity>(a)[1];
492  real_number velZ = vd.template getProp<velocity>(a)[2];
493 
494  real_number rhop = vd.template getProp<rho>(a);
495 
496  vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity_prev>(a)[0] + vd.template getProp<force>(a)[0]*dt2;
497  vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity_prev>(a)[1] + vd.template getProp<force>(a)[1]*dt2;
498  vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity_prev>(a)[2] + vd.template getProp<force>(a)[2]*dt2;
499  vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
500 
501  // Check if the particle go out of range in space and in density, if they do mark them to remove it later
502  if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
503  vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
504  vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
505  {vd.template getProp<red>(a) = 1;}
506  else
507  {vd.template getProp<red>(a) = 0;}
508 
509 
510  vd.template getProp<velocity_prev>(a)[0] = velX;
511  vd.template getProp<velocity_prev>(a)[1] = velY;
512  vd.template getProp<velocity_prev>(a)[2] = velZ;
513  vd.template getProp<rho_prev>(a) = rhop;
514 }
515 
516 size_t cnt = 0;
517 
518 void verlet_int(particles & vd, real_number dt)
519 {
520  // particle iterator
521  auto part = vd.getDomainIteratorGPU();
522 
523  real_number dt205 = dt*dt*0.5;
524  real_number dt2 = dt*2.0;
525 
526  CUDA_LAUNCH(verlet_int_gpu,part,vd.toKernel(),dt,dt2,dt205);
527 
528  // remove the particles marked
529  remove_marked<red>(vd);
530 
531  // increment the iteration counter
532  cnt++;
533 }
534 
535 template<typename vector_type>
536 __global__ void euler_int_gpu(vector_type vd,real_number dt, real_number dt205)
537 {
538  // ... a
539  auto a = GET_PARTICLE(vd);
540 
541  // if the particle is boundary
542  if (vd.template getProp<type>(a) == BOUNDARY)
543  {
544  // Update rho
545  real_number rhop = vd.template getProp<rho>(a);
546 
547  // Update only the density
548  vd.template getProp<velocity>(a)[0] = 0.0;
549  vd.template getProp<velocity>(a)[1] = 0.0;
550  vd.template getProp<velocity>(a)[2] = 0.0;
551  real_number rhonew = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
552  vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
553 
554  vd.template getProp<rho_prev>(a) = rhop;
555 
556  vd.template getProp<red>(a) = 0;
557 
558  return;
559  }
560 
561  //-Calculate displacement and update position
562  real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
563  real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
564  real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
565 
566  vd.getPos(a)[0] += dx;
567  vd.getPos(a)[1] += dy;
568  vd.getPos(a)[2] += dz;
569 
570  real_number velX = vd.template getProp<velocity>(a)[0];
571  real_number velY = vd.template getProp<velocity>(a)[1];
572  real_number velZ = vd.template getProp<velocity>(a)[2];
573  real_number rhop = vd.template getProp<rho>(a);
574 
575  vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity>(a)[0] + vd.template getProp<force>(a)[0]*dt;
576  vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity>(a)[1] + vd.template getProp<force>(a)[1]*dt;
577  vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity>(a)[2] + vd.template getProp<force>(a)[2]*dt;
578  vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
579 
580  // Check if the particle go out of range in space and in density
581  if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
582  vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
583  vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
584  {vd.template getProp<red>(a) = 1;}
585  else
586  {vd.template getProp<red>(a) = 0;}
587 
588  vd.template getProp<velocity_prev>(a)[0] = velX;
589  vd.template getProp<velocity_prev>(a)[1] = velY;
590  vd.template getProp<velocity_prev>(a)[2] = velZ;
591  vd.template getProp<rho_prev>(a) = rhop;
592 }
593 
594 void euler_int(particles & vd, real_number dt)
595 {
596 
597  // particle iterator
598  auto part = vd.getDomainIteratorGPU();
599 
600  real_number dt205 = dt*dt*0.5;
601 
602  CUDA_LAUNCH(euler_int_gpu,part,vd.toKernel(),dt,dt205);
603 
604  // remove the particles
605  remove_marked<red>(vd);
606 
607  cnt++;
608 }
609 
610 template<typename vector_type, typename NN_type>
611 __global__ void sensor_pressure_gpu(vector_type vd, NN_type NN, Point<3,real_number> probe, real_number * press_tmp)
612 {
613  real_number tot_ker = 0.0;
614 
615  // Get the position of the probe i
616  Point<3,real_number> xp = probe;
617 
618  // get the iterator over the neighbohood particles of the probes position
619  auto itg = NN.getNNIteratorBox(NN.getCell(xp));
620  while (itg.isNext())
621  {
622  auto q = itg.get_sort();
623 
624  // Only the fluid particles are importants
625  if (vd.template getProp<type>(q) != FLUID)
626  {
627  ++itg;
628  continue;
629  }
630 
631  // Get the position of the neighborhood particle q
632  Point<3,real_number> xq = vd.getPos(q);
633 
634  // Calculate the contribution of the particle to the pressure
635  // of the probe
636  real_number r = sqrt(norm2(xp - xq));
637 
638  real_number ker = Wab(r) * (MassFluid / rho_zero);
639 
640  // Also keep track of the calculation of the summed
641  // kernel
642  tot_ker += ker;
643 
644  // Add the total pressure contribution
645  *press_tmp += vd.template getProp<Pressure>(q) * ker;
646 
647  // next neighborhood particle
648  ++itg;
649  }
650 
651  // We calculate the pressure normalizing the
652  // sum over all kernels
653  if (tot_ker == 0.0)
654  {*press_tmp = 0.0;}
655  else
656  {*press_tmp = 1.0 / tot_ker * *press_tmp;}
657 }
658 
659 template<typename Vector, typename CellList>
660 inline void sensor_pressure(Vector & vd,
661  CellList & NN,
664 {
665  Vcluster<> & v_cl = create_vcluster();
666 
667  press_t.add();
668 
669  for (size_t i = 0 ; i < probes.size() ; i++)
670  {
671  // A float variable to calculate the pressure of the problem
672  CudaMemory press_tmp_(sizeof(real_number));
673  real_number press_tmp;
674 
675  // if the probe is inside the processor domain
676  if (vd.getDecomposition().isLocal(probes.get(i)) == true)
677  {
678  vd.template updateCellListGPU<type,Pressure>(NN);
679 
680  Point<3,real_number> probe = probes.get(i);
681  CUDA_LAUNCH_DIM3(sensor_pressure_gpu,1,1,vd.toKernel(),NN.toKernel(),probe,(real_number *)press_tmp_.toKernel());
682 
683  vd.template restoreOrder<>(NN);
684  // move calculated pressure on
685  press_tmp_.deviceToHost();
686  press_tmp = *(real_number *)press_tmp_.getPointer();
687  }
688 
689  // This is not necessary in principle, but if you
690  // want to make all processor aware of the history of the calculated
691  // pressure we have to execute this
692  v_cl.sum(press_tmp);
693  v_cl.execute();
694 
695  // We add the calculated pressure into the history
696  press_t.last().add(press_tmp);
697  }
698 }
699 
700 int main(int argc, char* argv[])
701 {
702  // OpenFPM GPU distribution
703 
704  // OpenFPM by default select GPU 0 for process 0, gpu 1 for process 1 and so on ... . In case of multi-node is the same each node has
705  // has a group of processes and these group of processes are distributed across the available GPU on that node.
706 
707  // If you want to override this behaviour use #define EXTERNAL_SET_GPU at the very beginning of the program and use
708  // cudaSetDevice to select the GPU for that particular process before openfpm_init
709  // Note: To get the process number do MPI_Init and and use the MPI_Comm_rank. VCluster is not available before openfpm_init
710  // A code snippet in case we want to skip GPU 0
711  // MPI_Init(&argc,&argv);
712  // int rank;
713  // MPI_Comm_rank(MPI_COMM_WORLD,&rank);
714  // cudaSetDevice(1+rank);
715 
716  // initialize the library
717  openfpm_init(&argc,&argv);
718 
719 #if !defined(CUDA_ON_CPU) && !defined(__HIP__)
720  cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
721 #endif
722 
723  // It contain for each time-step the value detected by the probes
726 
727  probes.add({0.8779f,0.3f,0.02f});
728  probes.add({0.754f,0.31f,0.02f});
729 
730  // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
731  Box<3,real_number> domain({-0.05f,-0.05f,-0.05f},{1.7010f,0.7065f,0.511f});
732  size_t sz[3] = {207,90,66};
733 
734  // Fill W_dap
735  W_dap = 1.0/Wab(H/1.5);
736 
737  // Here we define the boundary conditions of our problem
738  size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
739 
740  // extended boundary around the domain, and the processor domain
741  Ghost<3,real_number> g(2*H);
742 
743  particles vd(0,domain,bc,g,DEC_GRAN(128));
744 
746 
747  // You can ignore all these dp/2.0 is a trick to reach the same initialization
748  // of Dual-SPH that use a different criteria to draw particles
749  Box<3,real_number> fluid_box({dp/2.0f,dp/2.0f,dp/2.0f},{0.4f+dp/2.0f,0.67f-dp/2.0f,0.3f+dp/2.0f});
750 
751  // return an iterator to the fluid particles to add to vd
752  auto fluid_it = DrawParticles::DrawBox(vd,sz,domain,fluid_box);
753 
754  // here we fill some of the constants needed by the simulation
755  max_fluid_height = fluid_it.getBoxMargins().getHigh(2);
756  h_swl = fluid_it.getBoxMargins().getHigh(2) - fluid_it.getBoxMargins().getLow(2);
757  B = (coeff_sound)*(coeff_sound)*gravity*h_swl*rho_zero / gamma_;
758  cbar = coeff_sound * sqrt(gravity * h_swl);
759 
760  // for each particle inside the fluid box ...
761  while (fluid_it.isNext())
762  {
763  // ... add a particle ...
764  vd.add();
765 
766  // ... and set it position ...
767  vd.getLastPos()[0] = fluid_it.get().get(0);
768  vd.getLastPos()[1] = fluid_it.get().get(1);
769  vd.getLastPos()[2] = fluid_it.get().get(2);
770 
771  // and its type.
772  vd.template getLastProp<type>() = FLUID;
773 
774  // We also initialize the density of the particle and the hydro-static pressure given by
775  //
776  // rho_zero*g*h = P
777  //
778  // rho_p = (P/B + 1)^(1/Gamma) * rho_zero
779  //
780 
781  vd.template getLastProp<Pressure>() = rho_zero * gravity * (max_fluid_height - fluid_it.get().get(2));
782 
783  vd.template getLastProp<rho>() = pow(vd.template getLastProp<Pressure>() / B + 1, 1.0/gamma_) * rho_zero;
784  vd.template getLastProp<rho_prev>() = vd.template getLastProp<rho>();
785  vd.template getLastProp<velocity>()[0] = 0.0;
786  vd.template getLastProp<velocity>()[1] = 0.0;
787  vd.template getLastProp<velocity>()[2] = 0.0;
788 
789  vd.template getLastProp<velocity_prev>()[0] = 0.0;
790  vd.template getLastProp<velocity_prev>()[1] = 0.0;
791  vd.template getLastProp<velocity_prev>()[2] = 0.0;
792 
793  // next fluid particle
794  ++fluid_it;
795  }
796 
797  // Recipient
798  Box<3,real_number> recipient1({0.0f,0.0f,0.0f},{1.6f+dp/2.0f,0.67f+dp/2.0f,0.4f+dp/2.0f});
799  Box<3,real_number> recipient2({dp,dp,dp},{1.6f-dp/2.0f,0.67f-dp/2.0f,0.4f+dp/2.0f});
800 
801  Box<3,real_number> obstacle1({0.9f,0.24f-dp/2.0f,0.0f},{1.02f+dp/2.0f,0.36f,0.45f+dp/2.0f});
802  Box<3,real_number> obstacle2({0.9f+dp,0.24f+dp/2.0f,0.0f},{1.02f-dp/2.0f,0.36f-dp,0.45f-dp/2.0f});
803  Box<3,real_number> obstacle3({0.9f+dp,0.24f,0.0f},{1.02f,0.36f,0.45f});
804 
806  holes.add(recipient2);
807  holes.add(obstacle1);
808  auto bound_box = DrawParticles::DrawSkin(vd,sz,domain,holes,recipient1);
809 
810  while (bound_box.isNext())
811  {
812  vd.add();
813 
814  vd.getLastPos()[0] = bound_box.get().get(0);
815  vd.getLastPos()[1] = bound_box.get().get(1);
816  vd.getLastPos()[2] = bound_box.get().get(2);
817 
818  vd.template getLastProp<type>() = BOUNDARY;
819  vd.template getLastProp<rho>() = rho_zero;
820  vd.template getLastProp<rho_prev>() = rho_zero;
821  vd.template getLastProp<velocity>()[0] = 0.0;
822  vd.template getLastProp<velocity>()[1] = 0.0;
823  vd.template getLastProp<velocity>()[2] = 0.0;
824 
825  vd.template getLastProp<velocity_prev>()[0] = 0.0;
826  vd.template getLastProp<velocity_prev>()[1] = 0.0;
827  vd.template getLastProp<velocity_prev>()[2] = 0.0;
828 
829  ++bound_box;
830  }
831 
832  auto obstacle_box = DrawParticles::DrawSkin(vd,sz,domain,obstacle2,obstacle1);
833 
834  while (obstacle_box.isNext())
835  {
836  vd.add();
837 
838  vd.getLastPos()[0] = obstacle_box.get().get(0);
839  vd.getLastPos()[1] = obstacle_box.get().get(1);
840  vd.getLastPos()[2] = obstacle_box.get().get(2);
841 
842  vd.template getLastProp<type>() = BOUNDARY;
843  vd.template getLastProp<rho>() = rho_zero;
844  vd.template getLastProp<rho_prev>() = rho_zero;
845  vd.template getLastProp<velocity>()[0] = 0.0;
846  vd.template getLastProp<velocity>()[1] = 0.0;
847  vd.template getLastProp<velocity>()[2] = 0.0;
848 
849  vd.template getLastProp<velocity_prev>()[0] = 0.0;
850  vd.template getLastProp<velocity_prev>()[1] = 0.0;
851  vd.template getLastProp<velocity_prev>()[2] = 0.0;
852 
853  ++obstacle_box;
854  }
855 
856  vd.map();
857 
858  // Now that we fill the vector with particles
859  ModelCustom md;
860 
861  vd.addComputationCosts(md);
862  vd.getDecomposition().decompose();
863  vd.map();
864 
866 
867  // Ok the initialization is done on CPU on GPU we are doing the main loop, so first we offload all properties on GPU
868 
869  vd.hostToDevicePos();
870  vd.template hostToDeviceProp<type,rho,rho_prev,Pressure,velocity,velocity_prev>();
871 
872  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
873 
874  auto NN = vd.getCellListGPU(2*H / 2.0, CL_NON_SYMMETRIC | CL_GPU_REORDER_POSITION | CL_GPU_REORDER_PROPERTY | CL_GPU_RESTORE_PROPERTY, 2);
875 
876  timer tot_sim;
877  tot_sim.start();
878 
879  size_t write = 0;
880  size_t it = 0;
881  size_t it_reb = 0;
882  real_number t = 0.0;
883  while (t <= t_end)
884  {
885  Vcluster<> & v_cl = create_vcluster();
886  timer it_time;
887  it_time.start();
888 
890  it_reb++;
891  if (it_reb == 300)
892  {
893  vd.map(RUN_ON_DEVICE);
894 
895  // Rebalancer for now work on CPU , so move to CPU
896  vd.deviceToHostPos();
897  vd.template deviceToHostProp<type>();
898 
899  it_reb = 0;
900  ModelCustom md;
901  vd.addComputationCosts(md);
902  vd.getDecomposition().decompose();
903 
904  if (v_cl.getProcessUnitID() == 0)
905  {std::cout << "REBALANCED " << it_reb << std::endl;}
906  }
907 
908  vd.map(RUN_ON_DEVICE);
909 
910  // Calculate pressure from the density
911  EqState(vd);
912 
913  real_number max_visc = 0.0;
914 
915  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
916 
917  // Calc forces
918  calc_forces(vd,NN,max_visc,cnt);
919 
920 
921  // Get the maximum viscosity term across processors
922  v_cl.max(max_visc);
923  v_cl.execute();
924 
925  // Calculate delta t integration
926  real_number dt = calc_deltaT(vd,max_visc);
927 
928  // VerletStep or euler step
929  it++;
930  if (it < 40)
931  verlet_int(vd,dt);
932  else
933  {
934  euler_int(vd,dt);
935  it = 0;
936  }
937 
938  t += dt;
939 
940  if (write < t*100)
941  {
942  // Sensor pressure require update ghost, so we ensure that particles are distributed correctly
943  // and ghost are updated
944  vd.map(RUN_ON_DEVICE);
945  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
946 
947  // calculate the pressure at the sensor points
948  //sensor_pressure(vd,NN,press_t,probes);
949 
950  std::cout << "OUTPUT " << dt << std::endl;
951 
952  // When we write we have move all the particles information back to CPU
953 
954  vd.deviceToHostPos();
955  vd.deviceToHostProp<type,rho,rho_prev,Pressure,drho,force,velocity,velocity_prev,red,red2>();
956 
957  // We copy on another vector with less properties to reduce the size of the output
959 
960  auto ito = vd.getDomainIterator();
961 
962  while(ito.isNext())
963  {
964  auto p = ito.get();
965 
966  vd_out.add();
967 
968  vd_out.getLastPos()[0] = vd.getPos(p)[0];
969  vd_out.getLastPos()[1] = vd.getPos(p)[1];
970  vd_out.getLastPos()[2] = vd.getPos(p)[2];
971 
972  vd_out.template getLastProp<0>() = vd.template getProp<type>(p);
973 
974  vd_out.template getLastProp<1>()[0] = vd.template getProp<velocity>(p)[0];
975  vd_out.template getLastProp<1>()[1] = vd.template getProp<velocity>(p)[1];
976  vd_out.template getLastProp<1>()[2] = vd.template getProp<velocity>(p)[2];
977 
978  ++ito;
979  }
980 
981  vd_out.write_frame("Particles",write,VTK_WRITER | FORMAT_BINARY);
982  write++;
983 
984  if (v_cl.getProcessUnitID() == 0)
985  {std::cout << "TIME: " << t << " write " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vd.size_local() << std::endl;}
986  }
987  else
988  {
989  if (v_cl.getProcessUnitID() == 0)
990  {std::cout << "TIME: " << t << " " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vd.size_local() << std::endl;}
991  }
992  }
993 
994  tot_sim.stop();
995  std::cout << "Time to complete: " << tot_sim.getwct() << " seconds" << std::endl;
996 
997 
998  openfpm_finalize();
999 }
1000 
1001 #else
1002 
1003 int main(int argc, char* argv[])
1004 {
1005  return 0;
1006 }
1007 
1008 #endif
This class represent an N-dimensional box.
Definition: Box.hpp:60
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
Definition: Box.hpp:555
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Definition: Box.hpp:566
Class for FAST cell list implementation.
Definition: CellList.hpp:558
This class define the domain decomposition interface.
static PointIteratorSkin< dim, T, typename vd_type::Decomposition_type > DrawSkin(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub_A, Box< dim, T > &sub_B)
Draw particles in a box B excluding the area of a second box A (B - A)
static PointIterator< dim, T, typename vd_type::Decomposition_type > DrawBox(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub)
Draw particles in a box.
Definition: Ghost.hpp:40
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:28
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
void execute()
Execute all the requests.
void sum(T &num)
Sum the numbers across all processors and get the result.
size_t getProcessUnitID()
Get the process unit id.
void max(T &num)
Get the maximum number across all processors (or reduction with infinity norm)
Implementation of VCluster class.
Definition: VCluster.hpp:59
Sparse Matrix implementation stub object when OpenFPM is compiled with no linear algebra support.
Definition: Vector.hpp:40
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:204
Class for cpu time benchmarking.
Definition: timer.hpp:28
void stop()
Stop the timer.
Definition: timer.hpp:119
void start()
Start the timer.
Definition: timer.hpp:90
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
Distributed vector.
auto getPos(vect_dist_key_dx vec_key) -> decltype(vPos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
Model for Dynamic load balancing.
Definition: main.cpp:232
temporal buffer for reductions