OpenFPM  5.2.0
Project that contain the implementation of distributed structures
main.cu
1 
61 #ifdef __NVCC__
62 
63 #include "Vector/vector_dist.hpp"
64 #include <math.h>
65 #include "Draw/DrawParticles.hpp"
66 
67 typedef float real_number;
68 
69 // A constant to indicate boundary particles
70 #define BOUNDARY 0
71 
72 // A constant to indicate fluid particles
73 #define FLUID 1
74 
75 // initial spacing between particles dp in the formulas
76 const real_number dp = 0.0085;
77 // Maximum height of the fluid water
78 // is going to be calculated and filled later on
79 real_number h_swl = 0.0;
80 
81 // c_s in the formulas (constant used to calculate the sound speed)
82 const real_number coeff_sound = 20.0;
83 
84 // gamma in the formulas
85 const real_number gamma_ = 7.0;
86 
87 // sqrt(3.0*dp*dp) support of the kernel
88 const real_number H = 0.0147224318643;
89 
90 // Eta in the formulas
91 const real_number Eta2 = 0.01 * H*H;
92 
93 // alpha in the formula
94 const real_number visco = 0.1;
95 
96 // cbar in the formula (calculated later)
97 real_number cbar = 0.0;
98 
99 // Mass of the fluid particles
100 const real_number MassFluid = 0.000614125;
101 
102 // Mass of the boundary particles
103 const real_number MassBound = 0.000614125;
104 
105 // End simulation time
106 #ifdef TEST_RUN
107 const real_number t_end = 0.001;
108 #else
109 const real_number t_end = 1.5;
110 #endif
111 
112 // Gravity acceleration
113 const real_number gravity = 9.81;
114 
115 // Reference densitu 1000Kg/m^3
116 const real_number rho_zero = 1000.0;
117 
118 // Filled later require h_swl, it is b in the formulas
119 real_number B = 0.0;
120 
121 // Constant used to define time integration
122 const real_number CFLnumber = 0.2;
123 
124 // Minimum T
125 const real_number DtMin = 0.00001;
126 
127 // Minimum Rho allowed
128 const real_number RhoMin = 700.0;
129 
130 // Maximum Rho allowed
131 const real_number RhoMax = 1300.0;
132 
133 // Filled in initialization
134 real_number max_fluid_height = 0.0;
135 
136 // Properties
137 
138 // FLUID or BOUNDARY
139 const size_t type = 0;
140 
141 // Density
142 const int rho = 1;
143 
144 // Density at step n-1
145 const int rho_prev = 2;
146 
147 // Pressure
148 const int Pressure = 3;
149 
150 // Delta rho calculated in the force calculation
151 const int drho = 4;
152 
153 // calculated force
154 const int force = 5;
155 
156 // velocity
157 const int velocity = 6;
158 
159 // velocity at previous step
160 const int velocity_prev = 7;
161 
162 const int red = 8;
163 
164 const int red2 = 9;
165 
166 // Type of the vector containing particles
168 // | | | | | | | | | |
169 // | | | | | | | | | |
170 // type density density Pressure delta force velocity velocity reduction another
171 // at n-1 density at n - 1 buffer reduction buffer
172 
173 
174 struct ModelCustom
175 {
176  template<typename Decomposition, typename vector> inline void addComputation(Decomposition & dec,
177  vector & vd,
178  size_t v,
179  size_t p)
180  {
181  if (vd.template getProp<type>(p) == FLUID)
182  dec.addComputationCost(v,4);
183  else
184  dec.addComputationCost(v,3);
185  }
186 
187  template<typename Decomposition> inline void applyModel(Decomposition & dec, size_t v)
188  {
189  dec.setSubSubDomainComputationCost(v, dec.getSubSubDomainComputationCost(v) * dec.getSubSubDomainComputationCost(v));
190  }
191 
192  real_number distributionTol()
193  {
194  return 1.01;
195  }
196 };
197 
198 template<typename vd_type>
199 __global__ void EqState_gpu(vd_type vd, real_number B)
200 {
201  auto a = GET_PARTICLE(vd);
202 
203  real_number rho_a = vd.template getProp<rho>(a);
204  real_number rho_frac = rho_a / rho_zero;
205 
206  vd.template getProp<Pressure>(a) = B*( rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac - 1.0);
207 }
208 
209 inline void EqState(particles & vd)
210 {
211  auto it = vd.getDomainIteratorGPU();
212 
213  CUDA_LAUNCH(EqState_gpu,it,vd.toKernel(),B);
214 }
215 
216 
217 const real_number a2 = 1.0/M_PI/H/H/H;
218 
219 inline __device__ __host__ real_number Wab(real_number r)
220 {
221  r /= H;
222 
223  if (r < 1.0)
224  return (1.0 - 3.0/2.0*r*r + 3.0/4.0*r*r*r)*a2;
225  else if (r < 2.0)
226  return (1.0/4.0*(2.0 - r)*(2.0 - r)*(2.0 - r))*a2;
227  else
228  return 0.0;
229 }
230 
231 
232 const real_number c1 = -3.0/M_PI/H/H/H/H;
233 const real_number d1 = 9.0/4.0/M_PI/H/H/H/H;
234 const real_number c2 = -3.0/4.0/M_PI/H/H/H/H;
235 const real_number a2_4 = 0.25*a2;
236 // Filled later
237 real_number W_dap = 0.0;
238 
239 inline __device__ __host__ void DWab(Point<3,real_number> & dx, Point<3,real_number> & DW, real_number r)
240 {
241  const real_number qq=r/H;
242 
243  real_number qq2 = qq * qq;
244  real_number fac1 = (c1*qq + d1*qq2)/r;
245  real_number b1 = (qq < 1.0f)?1.0f:0.0f;
246 
247  real_number wqq = (2.0f - qq);
248  real_number fac2 = c2 * wqq * wqq / r;
249  real_number b2 = (qq >= 1.0f && qq < 2.0f)?1.0f:0.0f;
250 
251  real_number factor = (b1*fac1 + b2*fac2);
252 
253  DW.get(0) = factor * dx.get(0);
254  DW.get(1) = factor * dx.get(1);
255  DW.get(2) = factor * dx.get(2);
256 }
257 
258 // Tensile correction
259 inline __device__ __host__ real_number Tensile(real_number r, real_number rhoa, real_number rhob, real_number prs1, real_number prs2, real_number W_dap)
260 {
261  const real_number qq=r/H;
262  //-Cubic Spline kernel
263  real_number wab;
264  if(r>H)
265  {
266  real_number wqq1=2.0f-qq;
267  real_number wqq2=wqq1*wqq1;
268 
269  wab=a2_4*(wqq2*wqq1);
270  }
271  else
272  {
273  real_number wqq2=qq*qq;
274  real_number wqq3=wqq2*qq;
275 
276  wab=a2*(1.0f-1.5f*wqq2+0.75f*wqq3);
277  }
278 
279  //-Tensile correction.
280  real_number fab=wab*W_dap;
281  fab*=fab; fab*=fab; //fab=fab^4
282  const real_number tensilp1=(prs1/(rhoa*rhoa))*(prs1>0.0f? 0.01f: -0.2f);
283  const real_number tensilp2=(prs2/(rhob*rhob))*(prs2>0.0f? 0.01f: -0.2f);
284 
285  return (fab*(tensilp1+tensilp2));
286 }
287 
288 
289 inline __device__ __host__ real_number Pi(const Point<3,real_number> & dr, real_number rr2, Point<3,real_number> & dv, real_number rhoa, real_number rhob, real_number massb, real_number cbar, real_number & visc)
290 {
291  const real_number dot = dr.get(0)*dv.get(0) + dr.get(1)*dv.get(1) + dr.get(2)*dv.get(2);
292  const real_number dot_rr2 = dot/(rr2+Eta2);
293  visc=(dot_rr2 < visc)?visc:dot_rr2;
294 
295  if(dot < 0)
296  {
297  const float amubar=H*dot_rr2;
298  const float robar=(rhoa+rhob)*0.5f;
299  const float pi_visc=(-visco*cbar*amubar/robar);
300 
301  return pi_visc;
302  }
303  else
304  return 0.0f;
305 }
306 
307 template<typename particles_type, typename NN_type>
308 __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap, real_number cbar)
309 {
310  auto a = GET_PARTICLE(vd);
311 
312  real_number max_visc = 0.0f;
313 
314  // Get the position xp of the particle
315  Point<3,real_number> xa = vd.getPos(a);
316 
317  // Type of the particle
318  unsigned int typea = vd.template getProp<type>(a);
319 
320  // Get the density of the of the particle a
321  real_number rhoa = vd.template getProp<rho>(a);
322 
323  // Get the pressure of the particle a
324  real_number Pa = vd.template getProp<Pressure>(a);
325 
326  // Get the Velocity of the particle a
327  Point<3,real_number> va = vd.template getProp<velocity>(a);
328 
329  // Reset the force counter (- gravity on zeta direction)
330  Point<3,real_number> force_;
331  force_.get(0) = 0.0f;
332  force_.get(1) = 0.0f;
333  force_.get(2) = -gravity;
334  real_number drho_ = 0.0f;
335 
336  // Get an iterator over the neighborhood particles of p
337  auto Np = NN.getNNIteratorBox(NN.getCell(xa));
338 
339  // For each neighborhood particle
340  while (Np.isNext() == true)
341  {
342  // ... q
343  auto b = Np.get();
344 
345  // Get the position xp of the particle
346  Point<3,real_number> xb = vd.getPos(b);
347 
348  if (a == b) {++Np; continue;};
349 
350  unsigned int typeb = vd.template getProp<type>(b);
351 
352  real_number massb = (typeb == FLUID)?MassFluid:MassBound;
353  Point<3,real_number> vb = vd.template getProp<velocity>(b);
354  real_number Pb = vd.template getProp<Pressure>(b);
355  real_number rhob = vd.template getProp<rho>(b);
356 
357  // Get the distance between p and q
358  Point<3,real_number> dr = xa - xb;
359  // take the norm of this vector
360  real_number r2 = norm2(dr);
361 
362  // if they interact
363  if (r2 < 4.0*H*H && r2 >= 1e-16)
364  {
365  real_number r = sqrt(r2);
366 
367  Point<3,real_number> v_rel = va - vb;
368 
370  DWab(dr,DW,r);
371 
372  real_number factor = - massb*((Pa + Pb) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb,W_dap) + Pi(dr,r2,v_rel,rhoa,rhob,massb,cbar,max_visc));
373 
374  // Bound - Bound does not produce any change
375  // factor = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:factor;
376  factor = (typea != FLUID)?0.0f:factor;
377 
378  force_.get(0) += factor * DW.get(0);
379  force_.get(1) += factor * DW.get(1);
380  force_.get(2) += factor * DW.get(2);
381 
382  real_number scal = massb*(v_rel.get(0)*DW.get(0)+v_rel.get(1)*DW.get(1)+v_rel.get(2)*DW.get(2));
383  scal = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:scal;
384 
385  drho_ += scal;
386  }
387 
388  ++Np;
389  }
390 
391  vd.template getProp<red>(a) = max_visc;
392 
393  vd.template getProp<force>(a)[0] = force_.get(0);
394  vd.template getProp<force>(a)[1] = force_.get(1);
395  vd.template getProp<force>(a)[2] = force_.get(2);
396  vd.template getProp<drho>(a) = drho_;
397 }
398 
399 template<typename CellList> inline void calc_forces(particles & vd, CellList & NN, real_number & max_visc, size_t cnt)
400 {
401  auto part = vd.getDomainIteratorGPU(32);
402 
403  // Update the cell-list
404  vd.updateCellListGPU(NN);
405 
406  CUDA_LAUNCH(calc_forces_gpu,part,vd.toKernel(),NN.toKernel(),W_dap,cbar);
407 
408  max_visc = reduce_local<red,_max_>(vd);
409 }
410 
411 template<typename vector_type>
412 __global__ void max_acceleration_and_velocity_gpu(vector_type vd)
413 {
414  auto a = GET_PARTICLE(vd);
415 
416  Point<3,real_number> acc(vd.template getProp<force>(a));
417  vd.template getProp<red>(a) = norm(acc);
418 
419  Point<3,real_number> vel(vd.template getProp<velocity>(a));
420  vd.template getProp<red2>(a) = norm(vel);
421 }
422 
423 void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_number & max_vel)
424 {
425  // Calculate the maximum acceleration
426  auto part = vd.getDomainIteratorGPU();
427 
428  CUDA_LAUNCH(max_acceleration_and_velocity_gpu,part,vd.toKernel());
429 
430  max_acc = reduce_local<red,_max_>(vd);
431  max_vel = reduce_local<red2,_max_>(vd);
432 
433  Vcluster<> & v_cl = create_vcluster();
434  v_cl.max(max_acc);
435  v_cl.max(max_vel);
436  v_cl.execute();
437 }
438 
439 
440 real_number calc_deltaT(particles & vd, real_number ViscDtMax)
441 {
442  real_number Maxacc = 0.0;
443  real_number Maxvel = 0.0;
444  max_acceleration_and_velocity(vd,Maxacc,Maxvel);
445 
446  //-dt1 depends on force per unit mass.
447  const real_number dt_f = (Maxacc)?sqrt(H/Maxacc):std::numeric_limits<float>::max();
448 
449  //-dt2 combines the Courant and the viscous time-step controls.
450  const real_number dt_cv = H/(std::max(cbar,Maxvel*10.f) + H*ViscDtMax);
451 
452  //-dt new value of time step.
453  real_number dt=real_number(CFLnumber)*std::min(dt_f,dt_cv);
454  if(dt<real_number(DtMin))
455  {dt=real_number(DtMin);}
456 
457  return dt;
458 }
459 
460 template<typename vector_dist_type>
461 __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number dt2, real_number dt205)
462 {
463  // ... a
464  auto a = GET_PARTICLE(vd);
465 
466  // if the particle is boundary
467  if (vd.template getProp<type>(a) == BOUNDARY)
468  {
469  // Update rho
470  real_number rhop = vd.template getProp<rho>(a);
471 
472  // Update only the density
473  vd.template getProp<velocity>(a)[0] = 0.0;
474  vd.template getProp<velocity>(a)[1] = 0.0;
475  vd.template getProp<velocity>(a)[2] = 0.0;
476  real_number rhonew = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
477  vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
478 
479  vd.template getProp<rho_prev>(a) = rhop;
480 
481  vd.template getProp<red>(a) = 0;
482 
483  return;
484  }
485 
486  //-Calculate displacement and update position
487  real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
488  real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
489  real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
490 
491  vd.getPos(a)[0] += dx;
492  vd.getPos(a)[1] += dy;
493  vd.getPos(a)[2] += dz;
494 
495  real_number velX = vd.template getProp<velocity>(a)[0];
496  real_number velY = vd.template getProp<velocity>(a)[1];
497  real_number velZ = vd.template getProp<velocity>(a)[2];
498 
499  real_number rhop = vd.template getProp<rho>(a);
500 
501  vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity_prev>(a)[0] + vd.template getProp<force>(a)[0]*dt2;
502  vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity_prev>(a)[1] + vd.template getProp<force>(a)[1]*dt2;
503  vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity_prev>(a)[2] + vd.template getProp<force>(a)[2]*dt2;
504  vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
505 
506  // Check if the particle go out of range in space and in density, if they do mark them to remove it later
507  if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
508  vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
509  vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
510  {vd.template getProp<red>(a) = 1;}
511  else
512  {vd.template getProp<red>(a) = 0;}
513 
514 
515  vd.template getProp<velocity_prev>(a)[0] = velX;
516  vd.template getProp<velocity_prev>(a)[1] = velY;
517  vd.template getProp<velocity_prev>(a)[2] = velZ;
518  vd.template getProp<rho_prev>(a) = rhop;
519 }
520 
521 size_t cnt = 0;
522 
523 void verlet_int(particles & vd, real_number dt)
524 {
525  // particle iterator
526  auto part = vd.getDomainIteratorGPU();
527 
528  real_number dt205 = dt*dt*0.5;
529  real_number dt2 = dt*2.0;
530 
531  CUDA_LAUNCH(verlet_int_gpu,part,vd.toKernel(),dt,dt2,dt205);
532 
533  // remove the particles marked
534  remove_marked<red>(vd);
535 
536  // increment the iteration counter
537  cnt++;
538 }
539 
540 template<typename vector_type>
541 __global__ void euler_int_gpu(vector_type vd,real_number dt, real_number dt205)
542 {
543  // ... a
544  auto a = GET_PARTICLE(vd);
545 
546  // if the particle is boundary
547  if (vd.template getProp<type>(a) == BOUNDARY)
548  {
549  // Update rho
550  real_number rhop = vd.template getProp<rho>(a);
551 
552  // Update only the density
553  vd.template getProp<velocity>(a)[0] = 0.0;
554  vd.template getProp<velocity>(a)[1] = 0.0;
555  vd.template getProp<velocity>(a)[2] = 0.0;
556  real_number rhonew = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
557  vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
558 
559  vd.template getProp<rho_prev>(a) = rhop;
560 
561  vd.template getProp<red>(a) = 0;
562 
563  return;
564  }
565 
566  //-Calculate displacement and update position
567  real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
568  real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
569  real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
570 
571  vd.getPos(a)[0] += dx;
572  vd.getPos(a)[1] += dy;
573  vd.getPos(a)[2] += dz;
574 
575  real_number velX = vd.template getProp<velocity>(a)[0];
576  real_number velY = vd.template getProp<velocity>(a)[1];
577  real_number velZ = vd.template getProp<velocity>(a)[2];
578  real_number rhop = vd.template getProp<rho>(a);
579 
580  vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity>(a)[0] + vd.template getProp<force>(a)[0]*dt;
581  vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity>(a)[1] + vd.template getProp<force>(a)[1]*dt;
582  vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity>(a)[2] + vd.template getProp<force>(a)[2]*dt;
583  vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
584 
585  // Check if the particle go out of range in space and in density
586  if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
587  vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
588  vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
589  {vd.template getProp<red>(a) = 1;}
590  else
591  {vd.template getProp<red>(a) = 0;}
592 
593  vd.template getProp<velocity_prev>(a)[0] = velX;
594  vd.template getProp<velocity_prev>(a)[1] = velY;
595  vd.template getProp<velocity_prev>(a)[2] = velZ;
596  vd.template getProp<rho_prev>(a) = rhop;
597 }
598 
599 void euler_int(particles & vd, real_number dt)
600 {
601 
602  // particle iterator
603  auto part = vd.getDomainIteratorGPU();
604 
605  real_number dt205 = dt*dt*0.5;
606 
607  // euler_int_gpu<<<part.wthr,part.thr>>>(vd.toKernel(),dt,dt205);
608  CUDA_LAUNCH(euler_int_gpu,part,vd.toKernel(),dt,dt205);
609 
610  // remove the particles
611  remove_marked<red>(vd);
612 
613  cnt++;
614 }
615 
616 template<typename vector_type, typename NN_type>
617 __global__ void sensor_pressure_gpu(vector_type vd, NN_type NN, Point<3,real_number> probe, real_number * press_tmp)
618 {
619  real_number tot_ker = 0.0;
620 
621  // Get the position of the probe i
622  Point<3,real_number> xp = probe;
623 
624  // get the iterator over the neighbohood particles of the probes position
625  auto itg = NN.getNNIteratorBox(NN.getCell(xp));
626  while (itg.isNext())
627  {
628  auto q = itg.get();
629 
630  // Only the fluid particles are importants
631  if (vd.template getProp<type>(q) != FLUID)
632  {
633  ++itg;
634  continue;
635  }
636 
637  // Get the position of the neighborhood particle q
638  Point<3,real_number> xq = vd.getPos(q);
639 
640  // Calculate the contribution of the particle to the pressure
641  // of the probe
642  real_number r = sqrt(norm2(xp - xq));
643 
644  real_number ker = Wab(r) * (MassFluid / rho_zero);
645 
646  // Also keep track of the calculation of the summed
647  // kernel
648  tot_ker += ker;
649 
650  // Add the total pressure contribution
651  *press_tmp += vd.template getProp<Pressure>(q) * ker;
652 
653  // next neighborhood particle
654  ++itg;
655  }
656 
657  // We calculate the pressure normalizing the
658  // sum over all kernels
659  if (tot_ker == 0.0)
660  {*press_tmp = 0.0;}
661  else
662  {*press_tmp = 1.0 / tot_ker * *press_tmp;}
663 }
664 
665 template<typename Vector, typename CellList>
666 inline void sensor_pressure(Vector & vd,
667  CellList & NN,
670 {
671  Vcluster<> & v_cl = create_vcluster();
672 
673  press_t.add();
674 
675  for (size_t i = 0 ; i < probes.size() ; i++)
676  {
677  // A float variable to calculate the pressure of the problem
678  CudaMemory press_tmp_(sizeof(real_number));
679  real_number press_tmp;
680 
681  // if the probe is inside the processor domain
682  if (vd.getDecomposition().isLocal(probes.get(i)) == true)
683  {
684  vd.updateCellListGPU(NN);
685 
686  Point<3,real_number> probe = probes.get(i);
687  CUDA_LAUNCH_DIM3(sensor_pressure_gpu,1,1,vd.toKernel(),NN.toKernel(),probe,(real_number *)press_tmp_.toKernel());
688 
689  // move calculated pressure on
690  press_tmp_.deviceToHost();
691  press_tmp = *(real_number *)press_tmp_.getPointer();
692  }
693 
694  // This is not necessary in principle, but if you
695  // want to make all processor aware of the history of the calculated
696  // pressure we have to execute this
697  v_cl.sum(press_tmp);
698  v_cl.execute();
699 
700  // We add the calculated pressure into the history
701  press_t.last().add(press_tmp);
702  }
703 }
704 
705 int main(int argc, char* argv[])
706 {
707  // initialize the library
708  openfpm_init(&argc,&argv);
709 
710  // It contain for each time-step the value detected by the probes
713 
714  probes.add({0.8779f,0.3f,0.02f});
715  probes.add({0.754f,0.31f,0.02f});
716 
717  // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
718  Box<3,real_number> domain({-0.05f,-0.05f,-0.05f},{1.7010f,0.7065f,0.511f});
719  size_t sz[3] = {207,90,66};
720 
721  // Fill W_dap
722  W_dap = 1.0/Wab(H/1.5);
723 
724  // Here we define the boundary conditions of our problem
725  size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
726 
727  // extended boundary around the domain, and the processor domain
728  Ghost<3,real_number> g(2*H);
729 
730  particles vd(0,domain,bc,g,DEC_GRAN(512));
731 
733 
734  // You can ignore all these dp/2.0 is a trick to reach the same initialization
735  // of Dual-SPH that use a different criteria to draw particles
736  Box<3,real_number> fluid_box({dp/2.0f,dp/2.0f,dp/2.0f},{0.4f+dp/2.0f,0.67f-dp/2.0f,0.3f+dp/2.0f});
737 
738  // return an iterator to the fluid particles to add to vd
739  auto fluid_it = DrawParticles::DrawBox(vd,sz,domain,fluid_box);
740 
741  // here we fill some of the constants needed by the simulation
742  max_fluid_height = fluid_it.getBoxMargins().getHigh(2);
743  h_swl = fluid_it.getBoxMargins().getHigh(2) - fluid_it.getBoxMargins().getLow(2);
744  B = (coeff_sound)*(coeff_sound)*gravity*h_swl*rho_zero / gamma_;
745  cbar = coeff_sound * sqrt(gravity * h_swl);
746 
747  // for each particle inside the fluid box ...
748  while (fluid_it.isNext())
749  {
750  // ... add a particle ...
751  vd.add();
752 
753  // ... and set it position ...
754  vd.getLastPos()[0] = fluid_it.get().get(0);
755  vd.getLastPos()[1] = fluid_it.get().get(1);
756  vd.getLastPos()[2] = fluid_it.get().get(2);
757 
758  // and its type.
759  vd.template getLastProp<type>() = FLUID;
760 
761  // We also initialize the density of the particle and the hydro-static pressure given by
762  //
763  // rho_zero*g*h = P
764  //
765  // rho_p = (P/B + 1)^(1/Gamma) * rho_zero
766  //
767 
768  vd.template getLastProp<Pressure>() = rho_zero * gravity * (max_fluid_height - fluid_it.get().get(2));
769 
770  vd.template getLastProp<rho>() = pow(vd.template getLastProp<Pressure>() / B + 1, 1.0/gamma_) * rho_zero;
771  vd.template getLastProp<rho_prev>() = vd.template getLastProp<rho>();
772  vd.template getLastProp<velocity>()[0] = 0.0;
773  vd.template getLastProp<velocity>()[1] = 0.0;
774  vd.template getLastProp<velocity>()[2] = 0.0;
775 
776  vd.template getLastProp<velocity_prev>()[0] = 0.0;
777  vd.template getLastProp<velocity_prev>()[1] = 0.0;
778  vd.template getLastProp<velocity_prev>()[2] = 0.0;
779 
780  // next fluid particle
781  ++fluid_it;
782  }
783 
784  // Recipient
785  Box<3,real_number> recipient1({0.0f,0.0f,0.0f},{1.6f+dp/2.0f,0.67f+dp/2.0f,0.4f+dp/2.0f});
786  Box<3,real_number> recipient2({dp,dp,dp},{1.6f-dp/2.0f,0.67f-dp/2.0f,0.4f+dp/2.0f});
787 
788  Box<3,real_number> obstacle1({0.9f,0.24f-dp/2.0f,0.0f},{1.02f+dp/2.0f,0.36f,0.45f+dp/2.0f});
789  Box<3,real_number> obstacle2({0.9f+dp,0.24f+dp/2.0f,0.0f},{1.02f-dp/2.0f,0.36f-dp,0.45f-dp/2.0f});
790  Box<3,real_number> obstacle3({0.9f+dp,0.24f,0.0f},{1.02f,0.36f,0.45f});
791 
793  holes.add(recipient2);
794  holes.add(obstacle1);
795  auto bound_box = DrawParticles::DrawSkin(vd,sz,domain,holes,recipient1);
796 
797  while (bound_box.isNext())
798  {
799  vd.add();
800 
801  vd.getLastPos()[0] = bound_box.get().get(0);
802  vd.getLastPos()[1] = bound_box.get().get(1);
803  vd.getLastPos()[2] = bound_box.get().get(2);
804 
805  vd.template getLastProp<type>() = BOUNDARY;
806  vd.template getLastProp<rho>() = rho_zero;
807  vd.template getLastProp<rho_prev>() = rho_zero;
808  vd.template getLastProp<velocity>()[0] = 0.0;
809  vd.template getLastProp<velocity>()[1] = 0.0;
810  vd.template getLastProp<velocity>()[2] = 0.0;
811 
812  vd.template getLastProp<velocity_prev>()[0] = 0.0;
813  vd.template getLastProp<velocity_prev>()[1] = 0.0;
814  vd.template getLastProp<velocity_prev>()[2] = 0.0;
815 
816  ++bound_box;
817  }
818 
819  auto obstacle_box = DrawParticles::DrawSkin(vd,sz,domain,obstacle2,obstacle1);
820 
821  while (obstacle_box.isNext())
822  {
823  vd.add();
824 
825  vd.getLastPos()[0] = obstacle_box.get().get(0);
826  vd.getLastPos()[1] = obstacle_box.get().get(1);
827  vd.getLastPos()[2] = obstacle_box.get().get(2);
828 
829  vd.template getLastProp<type>() = BOUNDARY;
830  vd.template getLastProp<rho>() = rho_zero;
831  vd.template getLastProp<rho_prev>() = rho_zero;
832  vd.template getLastProp<velocity>()[0] = 0.0;
833  vd.template getLastProp<velocity>()[1] = 0.0;
834  vd.template getLastProp<velocity>()[2] = 0.0;
835 
836  vd.template getLastProp<velocity_prev>()[0] = 0.0;
837  vd.template getLastProp<velocity_prev>()[1] = 0.0;
838  vd.template getLastProp<velocity_prev>()[2] = 0.0;
839 
840  ++obstacle_box;
841  }
842 
843  vd.map();
844 
845  // Now that we fill the vector with particles
846  ModelCustom md;
847 
848  vd.addComputationCosts(md);
849  vd.getDecomposition().decompose();
850  vd.map();
851 
853 
854  // Ok the initialization is done on CPU on GPU we are doing the main loop, so first we offload all properties on GPU
855 
856  vd.hostToDevicePos();
857  vd.template hostToDeviceProp<type,rho,rho_prev,Pressure,velocity,velocity_prev>();
858 
859  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
860 
861  auto NN = vd.getCellListGPU(2*H, CL_NON_SYMMETRIC, 2);
862 
863  timer tot_sim;
864  tot_sim.start();
865 
866  size_t write = 0;
867  size_t it = 0;
868  size_t it_reb = 0;
869  real_number t = 0.0;
870  while (t <= t_end)
871  {
872  Vcluster<> & v_cl = create_vcluster();
873  timer it_time;
874  it_time.start();
875 
877  it_reb++;
878  if (it_reb == 300)
879  {
880  vd.map(RUN_ON_DEVICE);
881 
882  // Rebalancer for now work on CPU , so move to CPU
883  vd.deviceToHostPos();
884  vd.template deviceToHostProp<type>();
885 
886  it_reb = 0;
887  ModelCustom md;
888  vd.addComputationCosts(md);
889  vd.getDecomposition().decompose();
890 
891  if (v_cl.getProcessUnitID() == 0)
892  {std::cout << "REBALANCED " << it_reb << std::endl;}
893  }
894 
895  vd.map(RUN_ON_DEVICE);
896 
897  // Calculate pressure from the density
898  EqState(vd);
899 
900  real_number max_visc = 0.0;
901 
902  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
903 
904  // Calc forces
905  calc_forces(vd,NN,max_visc,cnt);
906 
907  // Get the maximum viscosity term across processors
908  v_cl.max(max_visc);
909  v_cl.execute();
910 
911  // Calculate delta t integration
912  real_number dt = calc_deltaT(vd,max_visc);
913 
914  // VerletStep or euler step
915  it++;
916  if (it < 40)
917  verlet_int(vd,dt);
918  else
919  {
920  euler_int(vd,dt);
921  it = 0;
922  }
923 
924  t += dt;
925 
926  if (write < t*100)
927  {
928  // Sensor pressure require update ghost, so we ensure that particles are distributed correctly
929  // and ghost are updated
930  vd.map(RUN_ON_DEVICE);
931  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
932 
933  // calculate the pressure at the sensor points
934  //sensor_pressure(vd,NN,press_t,probes);
935 
936  std::cout << "OUTPUT " << dt << std::endl;
937 
938  // When we write we have move all the particles information back to CPU
939 
940  vd.deviceToHostPos();
941  vd.deviceToHostProp<type,rho,rho_prev,Pressure,drho,force,velocity,velocity_prev,red,red2>();
942 
943  vd.write_frame("Geometry",write);
944  write++;
945 
946  if (v_cl.getProcessUnitID() == 0)
947  {std::cout << "TIME: " << t << " write " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vd.size_local() << std::endl;}
948  }
949  else
950  {
951  if (v_cl.getProcessUnitID() == 0)
952  {std::cout << "TIME: " << t << " " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vd.size_local() << std::endl;}
953  }
954  }
955 
956  tot_sim.stop();
957  std::cout << "Time to complete: " << tot_sim.getwct() << " seconds" << std::endl;
958 
959  openfpm_finalize();
960 }
961 
962 #else
963 
964 int main(int argc, char* argv[])
965 {
966  return 0;
967 }
968 
969 #endif
This class represent an N-dimensional box.
Definition: Box.hpp:60
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
Definition: Box.hpp:555
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Definition: Box.hpp:566
Class for FAST cell list implementation.
Definition: CellList.hpp:558
This class define the domain decomposition interface.
static PointIteratorSkin< dim, T, typename vd_type::Decomposition_type > DrawSkin(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub_A, Box< dim, T > &sub_B)
Draw particles in a box B excluding the area of a second box A (B - A)
static PointIterator< dim, T, typename vd_type::Decomposition_type > DrawBox(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub)
Draw particles in a box.
Definition: Ghost.hpp:40
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:28
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
void execute()
Execute all the requests.
void sum(T &num)
Sum the numbers across all processors and get the result.
size_t getProcessUnitID()
Get the process unit id.
void max(T &num)
Get the maximum number across all processors (or reduction with infinity norm)
Implementation of VCluster class.
Definition: VCluster.hpp:59
Sparse Matrix implementation stub object when OpenFPM is compiled with no linear algebra support.
Definition: Vector.hpp:40
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:204
Class for cpu time benchmarking.
Definition: timer.hpp:28
void stop()
Stop the timer.
Definition: timer.hpp:119
void start()
Start the timer.
Definition: timer.hpp:90
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
Distributed vector.
auto getPos(vect_dist_key_dx vec_key) -> decltype(vPos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
Model for Dynamic load balancing.
Definition: main.cpp:232
temporal buffer for reductions