OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
main.cu
1 
40 #ifdef __NVCC__
41 
42 #define PRINT_STACKTRACE
43 //#define STOP_ON_ERROR
44 #define OPENMPI
45 //#define SE_CLASS1
46 
47 //#define USE_LOW_REGISTER_ITERATOR
48 #define SCAN_WITH_CUB //<------ In case you want to use CUB for scan operations
49 #define SORT_WITH_CUB
50 //#define EXTERNAL_SET_GPU <----- In case you want to distribute the GPUs differently from the default
51 
52 #include "Vector/vector_dist.hpp"
53 #include <math.h>
54 #include "Draw/DrawParticles.hpp"
55 
56 
57 
58 typedef float real_number;
59 
60 // A constant to indicate boundary particles
61 #define BOUNDARY 0
62 
63 // A constant to indicate fluid particles
64 #define FLUID 1
65 
66 // initial spacing between particles dp in the formulas
67 const real_number dp = 0.00425;
68 // Maximum height of the fluid water
69 // is going to be calculated and filled later on
70 real_number h_swl = 0.0;
71 
72 // c_s in the formulas (constant used to calculate the sound speed)
73 const real_number coeff_sound = 20.0;
74 
75 // gamma in the formulas
76 const real_number gamma_ = 7.0;
77 
78 // sqrt(3.0*dp*dp) support of the kernel
79 const real_number H = 0.00736121593217;
80 
81 // Eta in the formulas
82 const real_number Eta2 = 0.01 * H*H;
83 
84 const real_number FourH2 = 4.0 * H*H;
85 
86 // alpha in the formula
87 const real_number visco = 0.1;
88 
89 // cbar in the formula (calculated later)
90 real_number cbar = 0.0;
91 
92 // Mass of the fluid particles
93 const real_number MassFluid = 0.0000767656;
94 
95 // Mass of the boundary particles
96 const real_number MassBound = 0.0000767656;
97 
98 //
99 
100 // End simulation time
101 #ifdef TEST_RUN
102 const real_number t_end = 0.001;
103 #else
104 const real_number t_end = 1.5;
105 #endif
106 
107 // Gravity acceleration
108 const real_number gravity = 9.81;
109 
110 // Reference densitu 1000Kg/m^3
111 const real_number rho_zero = 1000.0;
112 
113 // Filled later require h_swl, it is b in the formulas
114 real_number B = 0.0;
115 
116 // Constant used to define time integration
117 const real_number CFLnumber = 0.2;
118 
119 // Minimum T
120 const real_number DtMin = 0.00001;
121 
122 // Minimum Rho allowed
123 const real_number RhoMin = 700.0;
124 
125 // Maximum Rho allowed
126 const real_number RhoMax = 1300.0;
127 
128 // Filled in initialization
129 real_number max_fluid_height = 0.0;
130 
131 // Properties
132 
133 // FLUID or BOUNDARY
134 const size_t type = 0;
135 
136 // Density
137 const int rho = 1;
138 
139 // Density at step n-1
140 const int rho_prev = 2;
141 
142 // Pressure
143 const int Pressure = 3;
144 
145 // Delta rho calculated in the force calculation
146 const int drho = 4;
147 
148 // calculated force
149 const int force = 5;
150 
151 // velocity
152 const int velocity = 6;
153 
154 // velocity at previous step
155 const int velocity_prev = 7;
156 
157 const int red = 8;
158 
159 const int red2 = 9;
160 
161 // Type of the vector containing particles
163 // | | | | | | | | | |
164 // | | | | | | | | | |
165 // type density density Pressure delta force velocity velocity reduction another
166 // at n-1 density at n - 1 buffer reduction buffer
167 
168 
169 struct ModelCustom
170 {
171  template<typename Decomposition, typename vector> inline void addComputation(Decomposition & dec,
172  vector & vd,
173  size_t v,
174  size_t p)
175  {
176  if (vd.template getProp<type>(p) == FLUID)
177  dec.addComputationCost(v,4);
178  else
179  dec.addComputationCost(v,3);
180  }
181 
182  template<typename Decomposition> inline void applyModel(Decomposition & dec, size_t v)
183  {
184  dec.setSubSubDomainComputationCost(v, dec.getSubSubDomainComputationCost(v) * dec.getSubSubDomainComputationCost(v));
185  }
186 
187  real_number distributionTol()
188  {
189  return 1.01;
190  }
191 };
192 
193 template<typename vd_type>
194 __global__ void EqState_gpu(vd_type vd, real_number B)
195 {
196  auto a = GET_PARTICLE(vd);
197 
198  real_number rho_a = vd.template getProp<rho>(a);
199  real_number rho_frac = rho_a / rho_zero;
200 
201  vd.template getProp<Pressure>(a) = B*( rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac - 1.0);
202 }
203 
204 inline void EqState(particles & vd)
205 {
206  auto it = vd.getDomainIteratorGPU();
207 
208  CUDA_LAUNCH(EqState_gpu,it,vd.toKernel(),B);
209 }
210 
211 
212 const real_number a2 = 1.0/M_PI/H/H/H;
213 
214 inline __device__ __host__ real_number Wab(real_number r)
215 {
216  r /= H;
217 
218  if (r < 1.0)
219  return (1.0 - 3.0/2.0*r*r + 3.0/4.0*r*r*r)*a2;
220  else if (r < 2.0)
221  return (1.0/4.0*(2.0 - r*r)*(2.0 - r*r)*(2.0 - r*r))*a2;
222  else
223  return 0.0;
224 }
225 
226 
227 const real_number c1 = -3.0/M_PI/H/H/H/H;
228 const real_number d1 = 9.0/4.0/M_PI/H/H/H/H;
229 const real_number c2 = -3.0/4.0/M_PI/H/H/H/H;
230 const real_number a2_4 = 0.25*a2;
231 // Filled later
232 real_number W_dap = 0.0;
233 
234 inline __device__ __host__ void DWab(Point<3,real_number> & dx, Point<3,real_number> & DW, real_number r)
235 {
236  const real_number qq=r/H;
237 
238  real_number qq2 = qq * qq;
239  real_number fac1 = (c1*qq + d1*qq2)/r;
240  real_number b1 = (qq < 1.0f)?1.0f:0.0f;
241 
242  real_number wqq = (2.0f - qq);
243  real_number fac2 = c2 * wqq * wqq / r;
244  real_number b2 = (qq >= 1.0f && qq < 2.0f)?1.0f:0.0f;
245 
246  real_number factor = (b1*fac1 + b2*fac2);
247 
248  DW.get(0) = factor * dx.get(0);
249  DW.get(1) = factor * dx.get(1);
250  DW.get(2) = factor * dx.get(2);
251 }
252 
253 
254 // Tensile correction
255 inline __device__ __host__ real_number Tensile(real_number r, real_number rhoa, real_number rhob, real_number prs1, real_number prs2, real_number W_dap)
256 {
257  const real_number qq=r/H;
258  //-Cubic Spline kernel
259  real_number wab;
260  if(r>H)
261  {
262  real_number wqq1=2.0f-qq;
263  real_number wqq2=wqq1*wqq1;
264 
265  wab=a2_4*(wqq2*wqq1);
266  }
267  else
268  {
269  real_number wqq2=qq*qq;
270  real_number wqq3=wqq2*qq;
271 
272  wab=a2*(1.0f-1.5f*wqq2+0.75f*wqq3);
273  }
274 
275  //-Tensile correction.
276  real_number fab=wab*W_dap;
277  fab*=fab; fab*=fab; //fab=fab^4
278  const real_number tensilp1=(prs1/(rhoa*rhoa))*(prs1>0.0f? 0.01f: -0.2f);
279  const real_number tensilp2=(prs2/(rhob*rhob))*(prs2>0.0f? 0.01f: -0.2f);
280 
281  return (fab*(tensilp1+tensilp2));
282 }
283 
284 
285 inline __device__ __host__ real_number Pi(const Point<3,real_number> & dr, real_number rr2, Point<3,real_number> & dv, real_number rhoa, real_number rhob, real_number massb, real_number cbar, real_number & visc)
286 {
287  const real_number dot = dr.get(0)*dv.get(0) + dr.get(1)*dv.get(1) + dr.get(2)*dv.get(2);
288  const real_number dot_rr2 = dot/(rr2+Eta2);
289  visc=(dot_rr2 < visc)?visc:dot_rr2;
290 
291  if(dot < 0)
292  {
293  const float amubar=H*dot_rr2;
294  const float robar=(rhoa+rhob)*0.5f;
295  const float pi_visc=(-visco*cbar*amubar/robar);
296 
297  return pi_visc;
298  }
299  else
300  return 0.0f;
301 }
302 
303 template<typename particles_type, typename NN_type>
304 __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap, real_number cbar)
305 {
306  // ... a
307  unsigned int a;
308 
309  GET_PARTICLE_SORT(a,NN);
310 
311  real_number max_visc = 0.0f;
312 
313  // Get the position xp of the particle
314  Point<3,real_number> xa = vd.getPos(a);
315 
316  // Type of the particle
317  unsigned int typea = vd.template getProp<type>(a);
318 
319  // Take the mass of the particle dependently if it is FLUID or BOUNDARY
320  //real_number massa = (typea == FLUID)?MassFluid:MassBound;
321 
322  // Get the density of the of the particle a
323  real_number rhoa = vd.template getProp<rho>(a);
324 
325  // Get the pressure of the particle a
326  real_number Pa = vd.template getProp<Pressure>(a);
327 
328  // Get the Velocity of the particle a
329  Point<3,real_number> va = vd.template getProp<velocity>(a);
330 
331  Point<3,real_number> force_;
332  force_.get(0) = 0.0f;
333  force_.get(1) = 0.0f;
334  force_.get(2) = -gravity;
335  real_number drho_ = 0.0f;
336 
337  // Get an iterator over the neighborhood particles of p
338  auto Np = NN.getNNIteratorBox(NN.getCell(xa));
339 
340  // For each neighborhood particle
341  while (Np.isNext() == true)
342  {
343  // ... q
344  auto b = Np.get_sort();
345 
346  // Get the position xp of the particle
347  Point<3,real_number> xb = vd.getPos(b);
348 
349  // if (p == q) skip this particle this condition should be done in the r^2 = 0
350  if (a == b) {++Np; continue;};
351 
352  unsigned int typeb = vd.template getProp<type>(b);
353 
354  real_number massb = (typeb == FLUID)?MassFluid:MassBound;
355  Point<3,real_number> vb = vd.template getProp<velocity>(b);
356  real_number Pb = vd.template getProp<Pressure>(b);
357  real_number rhob = vd.template getProp<rho>(b);
358 
359  // Get the distance between p and q
360  Point<3,real_number> dr = xa - xb;
361  Point<3,real_number> v_rel = va - vb;
362  // take the norm of this vector
363  real_number r2 = norm2(dr);
364 
365  // if they interact
366  if (r2 < FourH2 && r2 >= 1e-16)
367  {
368  real_number r = sqrtf(r2);
369 
371  DWab(dr,DW,r);
372 
373  real_number factor = - massb*((Pa + Pb) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb,W_dap) + Pi(dr,r2,v_rel,rhoa,rhob,massb,cbar,max_visc));
374 
375  // Bound - Bound does not produce any change
376  factor = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:factor;
377 
378  force_.get(0) += factor * DW.get(0);
379  force_.get(1) += factor * DW.get(1);
380  force_.get(2) += factor * DW.get(2);
381 
382  real_number scal = massb*(v_rel.get(0)*DW.get(0)+v_rel.get(1)*DW.get(1)+v_rel.get(2)*DW.get(2));
383  scal = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:scal;
384 
385  drho_ += scal;
386  }
387 
388  ++Np;
389  }
390 
391  vd.template getProp<red>(a) = max_visc;
392 
393  vd.template getProp<force>(a)[0] = force_.get(0);
394  vd.template getProp<force>(a)[1] = force_.get(1);
395  vd.template getProp<force>(a)[2] = force_.get(2);
396  vd.template getProp<drho>(a) = drho_;
397 }
398 
399 template<typename CellList> inline void calc_forces(particles & vd, CellList & NN, real_number & max_visc, size_t cnt)
400 {
401  auto part = vd.getDomainIteratorGPU(96);
402 
403  // Update the cell-list
404  vd.updateCellList(NN);
405 
406  CUDA_LAUNCH(calc_forces_gpu,part,vd.toKernel_sorted(),NN.toKernel(),W_dap,cbar);
407 
408  vd.merge_sort<force,drho,red>(NN);
409 
410  max_visc = reduce_local<red,_max_>(vd);
411 }
412 
413 template<typename vector_type>
414 __global__ void max_acceleration_and_velocity_gpu(vector_type vd)
415 {
416  auto a = GET_PARTICLE(vd);
417 
418  Point<3,real_number> acc(vd.template getProp<force>(a));
419  vd.template getProp<red>(a) = norm(acc);
420 
421  Point<3,real_number> vel(vd.template getProp<velocity>(a));
422  vd.template getProp<red2>(a) = norm(vel);
423 }
424 
425 void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_number & max_vel)
426 {
427  // Calculate the maximum acceleration
428  auto part = vd.getDomainIteratorGPU();
429 
430  CUDA_LAUNCH(max_acceleration_and_velocity_gpu,part,vd.toKernel());
431 
432  max_acc = reduce_local<red,_max_>(vd);
433  max_vel = reduce_local<red2,_max_>(vd);
434 
435  Vcluster<> & v_cl = create_vcluster();
436  v_cl.max(max_acc);
437  v_cl.max(max_vel);
438  v_cl.execute();
439 }
440 
441 
442 real_number calc_deltaT(particles & vd, real_number ViscDtMax)
443 {
444  real_number Maxacc = 0.0;
445  real_number Maxvel = 0.0;
446  max_acceleration_and_velocity(vd,Maxacc,Maxvel);
447 
448  //-dt1 depends on force per unit mass.
449  const real_number dt_f = (Maxacc)?sqrt(H/Maxacc):std::numeric_limits<float>::max();
450 
451  //-dt2 combines the Courant and the viscous time-step controls.
452  const real_number dt_cv = H/(std::max(cbar,Maxvel*10.f) + H*ViscDtMax);
453 
454  //-dt new value of time step.
455  real_number dt=real_number(CFLnumber)*std::min(dt_f,dt_cv);
456  if(dt<real_number(DtMin))
457  {dt=real_number(DtMin);}
458 
459  return dt;
460 }
461 
462 template<typename vector_dist_type>
463 __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number dt2, real_number dt205)
464 {
465  // ... a
466  auto a = GET_PARTICLE(vd);
467 
468  // if the particle is boundary
469  if (vd.template getProp<type>(a) == BOUNDARY)
470  {
471  // Update rho
472  real_number rhop = vd.template getProp<rho>(a);
473 
474  // Update only the density
475  vd.template getProp<velocity>(a)[0] = 0.0;
476  vd.template getProp<velocity>(a)[1] = 0.0;
477  vd.template getProp<velocity>(a)[2] = 0.0;
478  real_number rhonew = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
479  vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
480 
481  vd.template getProp<rho_prev>(a) = rhop;
482 
483  vd.template getProp<red>(a) = 0;
484 
485  return;
486  }
487 
488  //-Calculate displacement and update position
489  real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
490  real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
491  real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
492 
493  vd.getPos(a)[0] += dx;
494  vd.getPos(a)[1] += dy;
495  vd.getPos(a)[2] += dz;
496 
497  real_number velX = vd.template getProp<velocity>(a)[0];
498  real_number velY = vd.template getProp<velocity>(a)[1];
499  real_number velZ = vd.template getProp<velocity>(a)[2];
500 
501  real_number rhop = vd.template getProp<rho>(a);
502 
503  vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity_prev>(a)[0] + vd.template getProp<force>(a)[0]*dt2;
504  vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity_prev>(a)[1] + vd.template getProp<force>(a)[1]*dt2;
505  vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity_prev>(a)[2] + vd.template getProp<force>(a)[2]*dt2;
506  vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
507 
508  // Check if the particle go out of range in space and in density
509  if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
510  vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
511  vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
512  {vd.template getProp<red>(a) = 1;}
513  else
514  {vd.template getProp<red>(a) = 0;}
515 
516 
517  vd.template getProp<velocity_prev>(a)[0] = velX;
518  vd.template getProp<velocity_prev>(a)[1] = velY;
519  vd.template getProp<velocity_prev>(a)[2] = velZ;
520  vd.template getProp<rho_prev>(a) = rhop;
521 }
522 
523 size_t cnt = 0;
524 
525 void verlet_int(particles & vd, real_number dt)
526 {
527  // particle iterator
528  auto part = vd.getDomainIteratorGPU();
529 
530  real_number dt205 = dt*dt*0.5;
531  real_number dt2 = dt*2.0;
532 
533  CUDA_LAUNCH(verlet_int_gpu,part,vd.toKernel(),dt,dt2,dt205);
534 
535  // remove the particles marked
536  remove_marked<red>(vd);
537 
538  // increment the iteration counter
539  cnt++;
540 }
541 
542 template<typename vector_type>
543 __global__ void euler_int_gpu(vector_type vd,real_number dt, real_number dt205)
544 {
545  // ... a
546  auto a = GET_PARTICLE(vd);
547 
548  // if the particle is boundary
549  if (vd.template getProp<type>(a) == BOUNDARY)
550  {
551  // Update rho
552  real_number rhop = vd.template getProp<rho>(a);
553 
554  // Update only the density
555  vd.template getProp<velocity>(a)[0] = 0.0;
556  vd.template getProp<velocity>(a)[1] = 0.0;
557  vd.template getProp<velocity>(a)[2] = 0.0;
558  real_number rhonew = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
559  vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
560 
561  vd.template getProp<rho_prev>(a) = rhop;
562 
563  vd.template getProp<red>(a) = 0;
564 
565  return;
566  }
567 
568  //-Calculate displacement and update position / Calcula desplazamiento y actualiza posicion.
569  real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
570  real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
571  real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
572 
573  vd.getPos(a)[0] += dx;
574  vd.getPos(a)[1] += dy;
575  vd.getPos(a)[2] += dz;
576 
577  real_number velX = vd.template getProp<velocity>(a)[0];
578  real_number velY = vd.template getProp<velocity>(a)[1];
579  real_number velZ = vd.template getProp<velocity>(a)[2];
580  real_number rhop = vd.template getProp<rho>(a);
581 
582  vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity>(a)[0] + vd.template getProp<force>(a)[0]*dt;
583  vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity>(a)[1] + vd.template getProp<force>(a)[1]*dt;
584  vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity>(a)[2] + vd.template getProp<force>(a)[2]*dt;
585  vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
586 
587  // Check if the particle go out of range in space and in density
588  if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
589  vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
590  vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
591  {vd.template getProp<red>(a) = 1;}
592  else
593  {vd.template getProp<red>(a) = 0;}
594 
595  vd.template getProp<velocity_prev>(a)[0] = velX;
596  vd.template getProp<velocity_prev>(a)[1] = velY;
597  vd.template getProp<velocity_prev>(a)[2] = velZ;
598  vd.template getProp<rho_prev>(a) = rhop;
599 }
600 
601 void euler_int(particles & vd, real_number dt)
602 {
603 
604  // particle iterator
605  auto part = vd.getDomainIteratorGPU();
606 
607  real_number dt205 = dt*dt*0.5;
608 
609  CUDA_LAUNCH(euler_int_gpu,part,vd.toKernel(),dt,dt205);
610 
611  // remove the particles
612  remove_marked<red>(vd);
613 
614  cnt++;
615 }
616 
617 template<typename vector_type, typename NN_type>
618 __global__ void sensor_pressure_gpu(vector_type vd, NN_type NN, Point<3,real_number> probe, real_number * press_tmp)
619 {
620  real_number tot_ker = 0.0;
621 
622  // Get the position of the probe i
623  Point<3,real_number> xp = probe;
624 
625  // get the iterator over the neighbohood particles of the probes position
626  auto itg = NN.getNNIteratorBox(NN.getCell(xp));
627  while (itg.isNext())
628  {
629  auto q = itg.get_sort();
630 
631  // Only the fluid particles are importants
632  if (vd.template getProp<type>(q) != FLUID)
633  {
634  ++itg;
635  continue;
636  }
637 
638  // Get the position of the neighborhood particle q
639  Point<3,real_number> xq = vd.getPos(q);
640 
641  // Calculate the contribution of the particle to the pressure
642  // of the probe
643  real_number r = sqrt(norm2(xp - xq));
644 
645  real_number ker = Wab(r) * (MassFluid / rho_zero);
646 
647  // Also keep track of the calculation of the summed
648  // kernel
649  tot_ker += ker;
650 
651  // Add the total pressure contribution
652  *press_tmp += vd.template getProp<Pressure>(q) * ker;
653 
654  // next neighborhood particle
655  ++itg;
656  }
657 
658  // We calculate the pressure normalizing the
659  // sum over all kernels
660  if (tot_ker == 0.0)
661  {*press_tmp = 0.0;}
662  else
663  {*press_tmp = 1.0 / tot_ker * *press_tmp;}
664 }
665 
666 template<typename Vector, typename CellList>
667 inline void sensor_pressure(Vector & vd,
668  CellList & NN,
671 {
672  Vcluster<> & v_cl = create_vcluster();
673 
674  press_t.add();
675 
676  for (size_t i = 0 ; i < probes.size() ; i++)
677  {
678  // A float variable to calculate the pressure of the problem
679  CudaMemory press_tmp_(sizeof(real_number));
680  real_number press_tmp;
681 
682  // if the probe is inside the processor domain
683  if (vd.getDecomposition().isLocal(probes.get(i)) == true)
684  {
685  CUDA_LAUNCH_DIM3(sensor_pressure_gpu,1,1,vd.toKernel_sorted(),NN.toKernel(),probes.get(i),(real_number *)press_tmp_.toKernel());
686 
687  vd.merge<Pressure>(NN);
688 
689  // move calculated pressure on
690  press_tmp_.deviceToHost();
691  press_tmp = *(real_number *)press_tmp_.getPointer();
692  }
693 
694  // This is not necessary in principle, but if you
695  // want to make all processor aware of the history of the calculated
696  // pressure we have to execute this
697  v_cl.sum(press_tmp);
698  v_cl.execute();
699 
700  // We add the calculated pressure into the history
701  press_t.last().add(press_tmp);
702  }
703 }
704 
705 int main(int argc, char* argv[])
706 {
707  // OpenFPM GPU distribution
708 
709  // OpenFPM by default select GPU 0 for process 0, gpu 1 for process 1 and so on ... . In case of multi-node is the same each node has
710  // has a group of processes and these group of processes are distributed across the available GPU on that node.
711 
712  // If you want to override this behaviour use #define EXTERNAL_SET_GPU at the very beginning of the program and use
713  // cudaSetDevice to select the GPU for that particular process before openfpm_init
714  // Note: To get the process number do MPI_Init and and use the MPI_Comm_rank. VCluster is not available before openfpm_init
715  // A code snippet in case we want to skip GPU 0
716  // MPI_Init(&argc,&argv);
717  // int rank;
718  // MPI_Comm_rank(MPI_COMM_WORLD,&rank);
719  // cudaSetDevice(1+rank);
720 
721  // initialize the library
722  openfpm_init(&argc,&argv);
723 
724 #if !defined(CUDA_ON_CPU) && !defined(__HIP__)
725  cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
726 #endif
727 
728  // It contain for each time-step the value detected by the probes
731 
732  probes.add({0.8779f,0.3f,0.02f});
733  probes.add({0.754f,0.31f,0.02f});
734 
735  // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
736  Box<3,real_number> domain({-0.05f,-0.05f,-0.05f},{1.7010f,0.7065f,0.511f});
737  size_t sz[3] = {413,179,133};
738 
739  // Fill W_dap
740  W_dap = 1.0/Wab(H/1.5);
741 
742  // Here we define the boundary conditions of our problem
743  size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
744 
745  // extended boundary around the domain, and the processor domain
746  Ghost<3,real_number> g(2*H);
747 
748  particles vd(0,domain,bc,g,DEC_GRAN(128));
749 
751 
752  // You can ignore all these dp/2.0 is a trick to reach the same initialization
753  // of Dual-SPH that use a different criteria to draw particles
754  Box<3,real_number> fluid_box({dp/2.0f,dp/2.0f,dp/2.0f},{0.4f+dp/2.0f,0.67f-dp/2.0f,0.3f+dp/2.0f});
755 
756  // return an iterator to the fluid particles to add to vd
757  auto fluid_it = DrawParticles::DrawBox(vd,sz,domain,fluid_box);
758 
759  // here we fill some of the constants needed by the simulation
760  max_fluid_height = fluid_it.getBoxMargins().getHigh(2);
761  h_swl = fluid_it.getBoxMargins().getHigh(2) - fluid_it.getBoxMargins().getLow(2);
762  B = (coeff_sound)*(coeff_sound)*gravity*h_swl*rho_zero / gamma_;
763  cbar = coeff_sound * sqrt(gravity * h_swl);
764 
765  // for each particle inside the fluid box ...
766  while (fluid_it.isNext())
767  {
768  // ... add a particle ...
769  vd.add();
770 
771  // ... and set it position ...
772  vd.getLastPos()[0] = fluid_it.get().get(0);
773  vd.getLastPos()[1] = fluid_it.get().get(1);
774  vd.getLastPos()[2] = fluid_it.get().get(2);
775 
776  // and its type.
777  vd.template getLastProp<type>() = FLUID;
778 
779  // We also initialize the density of the particle and the hydro-static pressure given by
780  //
781  // rho_zero*g*h = P
782  //
783  // rho_p = (P/B + 1)^(1/Gamma) * rho_zero
784  //
785 
786  vd.template getLastProp<Pressure>() = rho_zero * gravity * (max_fluid_height - fluid_it.get().get(2));
787 
788  vd.template getLastProp<rho>() = pow(vd.template getLastProp<Pressure>() / B + 1, 1.0/gamma_) * rho_zero;
789  vd.template getLastProp<rho_prev>() = vd.template getLastProp<rho>();
790  vd.template getLastProp<velocity>()[0] = 0.0;
791  vd.template getLastProp<velocity>()[1] = 0.0;
792  vd.template getLastProp<velocity>()[2] = 0.0;
793 
794  vd.template getLastProp<velocity_prev>()[0] = 0.0;
795  vd.template getLastProp<velocity_prev>()[1] = 0.0;
796  vd.template getLastProp<velocity_prev>()[2] = 0.0;
797 
798  // next fluid particle
799  ++fluid_it;
800  }
801 
802  // Recipient
803  Box<3,real_number> recipient1({0.0f,0.0f,0.0f},{1.6f+dp/2.0f,0.67f+dp/2.0f,0.4f+dp/2.0f});
804  Box<3,real_number> recipient2({dp,dp,dp},{1.6f-dp/2.0f,0.67f-dp/2.0f,0.4f+dp/2.0f});
805 
806  Box<3,real_number> obstacle1({0.9f,0.24f-dp/2.0f,0.0f},{1.02f+dp/2.0f,0.36f,0.45f+dp/2.0f});
807  Box<3,real_number> obstacle2({0.9f+dp,0.24f+dp/2.0f,0.0f},{1.02f-dp/2.0f,0.36f-dp,0.45f-dp/2.0f});
808  Box<3,real_number> obstacle3({0.9f+dp,0.24f,0.0f},{1.02f,0.36f,0.45f});
809 
811  holes.add(recipient2);
812  holes.add(obstacle1);
813  auto bound_box = DrawParticles::DrawSkin(vd,sz,domain,holes,recipient1);
814 
815  while (bound_box.isNext())
816  {
817  vd.add();
818 
819  vd.getLastPos()[0] = bound_box.get().get(0);
820  vd.getLastPos()[1] = bound_box.get().get(1);
821  vd.getLastPos()[2] = bound_box.get().get(2);
822 
823  vd.template getLastProp<type>() = BOUNDARY;
824  vd.template getLastProp<rho>() = rho_zero;
825  vd.template getLastProp<rho_prev>() = rho_zero;
826  vd.template getLastProp<velocity>()[0] = 0.0;
827  vd.template getLastProp<velocity>()[1] = 0.0;
828  vd.template getLastProp<velocity>()[2] = 0.0;
829 
830  vd.template getLastProp<velocity_prev>()[0] = 0.0;
831  vd.template getLastProp<velocity_prev>()[1] = 0.0;
832  vd.template getLastProp<velocity_prev>()[2] = 0.0;
833 
834  ++bound_box;
835  }
836 
837  auto obstacle_box = DrawParticles::DrawSkin(vd,sz,domain,obstacle2,obstacle1);
838 
839  while (obstacle_box.isNext())
840  {
841  vd.add();
842 
843  vd.getLastPos()[0] = obstacle_box.get().get(0);
844  vd.getLastPos()[1] = obstacle_box.get().get(1);
845  vd.getLastPos()[2] = obstacle_box.get().get(2);
846 
847  vd.template getLastProp<type>() = BOUNDARY;
848  vd.template getLastProp<rho>() = rho_zero;
849  vd.template getLastProp<rho_prev>() = rho_zero;
850  vd.template getLastProp<velocity>()[0] = 0.0;
851  vd.template getLastProp<velocity>()[1] = 0.0;
852  vd.template getLastProp<velocity>()[2] = 0.0;
853 
854  vd.template getLastProp<velocity_prev>()[0] = 0.0;
855  vd.template getLastProp<velocity_prev>()[1] = 0.0;
856  vd.template getLastProp<velocity_prev>()[2] = 0.0;
857 
858  ++obstacle_box;
859  }
860 
861  vd.map();
862 
863  // Now that we fill the vector with particles
864  ModelCustom md;
865 
866  vd.addComputationCosts(md);
867  vd.getDecomposition().decompose();
868  vd.map();
869 
871 
872  // Ok the initialization is done on CPU on GPU we are doing the main loop, so first we offload all properties on GPU
873 
874  vd.hostToDevicePos();
875  vd.template hostToDeviceProp<type,rho,rho_prev,Pressure,velocity,velocity_prev>();
876 
877  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
878 
879  auto NN = vd.getCellListGPU/*<CELLLIST_GPU_SPARSE<3,float>>*/(2*H / 2.0);
880  //NN.setBoxNN(2);
881 
882  timer tot_sim;
883  tot_sim.start();
884 
885  size_t write = 0;
886  size_t it = 0;
887  size_t it_reb = 0;
888  real_number t = 0.0;
889  while (t <= t_end)
890  {
891  Vcluster<> & v_cl = create_vcluster();
892  timer it_time;
893  it_time.start();
894 
896  it_reb++;
897  if (it_reb == 300)
898  {
899  vd.map(RUN_ON_DEVICE);
900 
901  // Rebalancer for now work on CPU , so move to CPU
902  vd.deviceToHostPos();
903  vd.template deviceToHostProp<type>();
904 
905  it_reb = 0;
906  ModelCustom md;
907  vd.addComputationCosts(md);
908  vd.getDecomposition().decompose();
909 
910  if (v_cl.getProcessUnitID() == 0)
911  {std::cout << "REBALANCED " << it_reb << std::endl;}
912  }
913 
914  vd.map(RUN_ON_DEVICE);
915 
916  // it sort the vector (doesn not seem to produce some advantage)
917  // note force calculation is anyway sorted calculation
918  //vd.make_sort(NN);
919 
920  // Calculate pressure from the density
921  EqState(vd);
922 
923  real_number max_visc = 0.0;
924 
925  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
926 
927  // Calc forces
928  calc_forces(vd,NN,max_visc,cnt);
929 
930 
931  // Get the maximum viscosity term across processors
932  v_cl.max(max_visc);
933  v_cl.execute();
934 
935  // Calculate delta t integration
936  real_number dt = calc_deltaT(vd,max_visc);
937 
938  // VerletStep or euler step
939  it++;
940  if (it < 40)
941  verlet_int(vd,dt);
942  else
943  {
944  euler_int(vd,dt);
945  it = 0;
946  }
947 
948  t += dt;
949 
950  if (write < t*10)
951  {
952  // Sensor pressure require update ghost, so we ensure that particles are distributed correctly
953  // and ghost are updated
954  vd.map(RUN_ON_DEVICE);
955  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
956  vd.updateCellList(NN);
957 
958  // calculate the pressure at the sensor points
959  //sensor_pressure(vd,NN,press_t,probes);
960 
961  std::cout << "OUTPUT " << dt << std::endl;
962 
963  // When we write we have move all the particles information back to CPU
964 
965  vd.deviceToHostPos();
966  vd.deviceToHostProp<type,rho,rho_prev,Pressure,drho,force,velocity,velocity_prev,red,red2>();
967 
968  // We copy on another vector with less properties to reduce the size of the output
970 
971  auto ito = vd.getDomainIterator();
972 
973  while(ito.isNext())
974  {
975  auto p = ito.get();
976 
977  vd_out.add();
978 
979  vd_out.getLastPos()[0] = vd.getPos(p)[0];
980  vd_out.getLastPos()[1] = vd.getPos(p)[1];
981  vd_out.getLastPos()[2] = vd.getPos(p)[2];
982 
983  vd_out.template getLastProp<0>() = vd.template getProp<type>(p);
984 
985  vd_out.template getLastProp<1>()[0] = vd.template getProp<velocity>(p)[0];
986  vd_out.template getLastProp<1>()[1] = vd.template getProp<velocity>(p)[1];
987  vd_out.template getLastProp<1>()[2] = vd.template getProp<velocity>(p)[2];
988 
989  ++ito;
990  }
991 
992  vd_out.write_frame("Particles",write,VTK_WRITER | FORMAT_BINARY);
993  write++;
994 
995  if (v_cl.getProcessUnitID() == 0)
996  {std::cout << "TIME: " << t << " write " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vd.size_local() << std::endl;}
997  }
998  else
999  {
1000  if (v_cl.getProcessUnitID() == 0)
1001  {std::cout << "TIME: " << t << " " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vd.size_local() << std::endl;}
1002  }
1003  }
1004 
1005  tot_sim.stop();
1006  std::cout << "Time to complete: " << tot_sim.getwct() << " seconds" << std::endl;
1007 
1008 
1009  openfpm_finalize();
1010 }
1011 
1012 #else
1013 
1014 int main(int argc, char* argv[])
1015 {
1016  return 0;
1017 }
1018 
1019 #endif
size_t getProcessUnitID()
Get the process unit id.
Sparse Matrix implementation stub object when OpenFPM is compiled with no linear algebra support.
Definition: Vector.hpp:39
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
Definition: Box.hpp:556
auto getPos(vect_dist_key_dx vec_key) -> decltype(v_pos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
static PointIteratorSkin< dim, T, typename vd_type::Decomposition_type > DrawSkin(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub_A, Box< dim, T > &sub_B)
Draw particles in a box B excluding the area of a second box A (B - A)
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:27
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
Definition: Ghost.hpp:39
Implementation of VCluster class.
Definition: VCluster.hpp:58
void execute()
Execute all the requests.
This class define the domain decomposition interface.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
void start()
Start the timer.
Definition: timer.hpp:90
This class represent an N-dimensional box.
Definition: Box.hpp:60
void sum(T &num)
Sum the numbers across all processors and get the result.
void updateCellList(CellL &cell_list, bool no_se3=false, cl_construct_opt opt=cl_construct_opt::Full)
Update a cell list using the stored particles.
static PointIterator< dim, T, typename vd_type::Decomposition_type > DrawBox(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub)
Draw particles in a box.
Distributed vector.
void max(T &num)
Get the maximum number across all processors (or reduction with infinity norm)
temporal buffer for reductions
Model for Dynamic load balancing.
Definition: main.cpp:221
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202
Class for FAST cell list implementation.
Definition: CellList.hpp:356
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Definition: Box.hpp:567
auto get(size_t cell, size_t ele) -> decltype(this->Mem_type::get(cell, ele))
Get an element in the cell.
Definition: CellList.hpp:867
Class for cpu time benchmarking.
Definition: timer.hpp:27
void stop()
Stop the timer.
Definition: timer.hpp:119