OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
main.cu
1 
61 #ifdef __NVCC__
62 
63 #include "Vector/vector_dist.hpp"
64 #include <math.h>
65 #include "Draw/DrawParticles.hpp"
66 
67 typedef float real_number;
68 
69 // A constant to indicate boundary particles
70 #define BOUNDARY 0
71 
72 // A constant to indicate fluid particles
73 #define FLUID 1
74 
75 // initial spacing between particles dp in the formulas
76 const real_number dp = 0.0085;
77 // Maximum height of the fluid water
78 // is going to be calculated and filled later on
79 real_number h_swl = 0.0;
80 
81 // c_s in the formulas (constant used to calculate the sound speed)
82 const real_number coeff_sound = 20.0;
83 
84 // gamma in the formulas
85 const real_number gamma_ = 7.0;
86 
87 // sqrt(3.0*dp*dp) support of the kernel
88 const real_number H = 0.0147224318643;
89 
90 // Eta in the formulas
91 const real_number Eta2 = 0.01 * H*H;
92 
93 // alpha in the formula
94 const real_number visco = 0.1;
95 
96 // cbar in the formula (calculated later)
97 real_number cbar = 0.0;
98 
99 // Mass of the fluid particles
100 const real_number MassFluid = 0.000614125;
101 
102 // Mass of the boundary particles
103 const real_number MassBound = 0.000614125;
104 
105 // End simulation time
106 #ifdef TEST_RUN
107 const real_number t_end = 0.001;
108 #else
109 const real_number t_end = 1.5;
110 #endif
111 
112 // Gravity acceleration
113 const real_number gravity = 9.81;
114 
115 // Reference densitu 1000Kg/m^3
116 const real_number rho_zero = 1000.0;
117 
118 // Filled later require h_swl, it is b in the formulas
119 real_number B = 0.0;
120 
121 // Constant used to define time integration
122 const real_number CFLnumber = 0.2;
123 
124 // Minimum T
125 const real_number DtMin = 0.00001;
126 
127 // Minimum Rho allowed
128 const real_number RhoMin = 700.0;
129 
130 // Maximum Rho allowed
131 const real_number RhoMax = 1300.0;
132 
133 // Filled in initialization
134 real_number max_fluid_height = 0.0;
135 
136 // Properties
137 
138 // FLUID or BOUNDARY
139 const size_t type = 0;
140 
141 // Density
142 const int rho = 1;
143 
144 // Density at step n-1
145 const int rho_prev = 2;
146 
147 // Pressure
148 const int Pressure = 3;
149 
150 // Delta rho calculated in the force calculation
151 const int drho = 4;
152 
153 // calculated force
154 const int force = 5;
155 
156 // velocity
157 const int velocity = 6;
158 
159 // velocity at previous step
160 const int velocity_prev = 7;
161 
162 const int red = 8;
163 
164 const int red2 = 9;
165 
166 // Type of the vector containing particles
168 // | | | | | | | | | |
169 // | | | | | | | | | |
170 // type density density Pressure delta force velocity velocity reduction another
171 // at n-1 density at n - 1 buffer reduction buffer
172 
173 
174 struct ModelCustom
175 {
176  template<typename Decomposition, typename vector> inline void addComputation(Decomposition & dec,
177  vector & vd,
178  size_t v,
179  size_t p)
180  {
181  if (vd.template getProp<type>(p) == FLUID)
182  dec.addComputationCost(v,4);
183  else
184  dec.addComputationCost(v,3);
185  }
186 
187  template<typename Decomposition> inline void applyModel(Decomposition & dec, size_t v)
188  {
189  dec.setSubSubDomainComputationCost(v, dec.getSubSubDomainComputationCost(v) * dec.getSubSubDomainComputationCost(v));
190  }
191 
192  real_number distributionTol()
193  {
194  return 1.01;
195  }
196 };
197 
198 template<typename vd_type>
199 __global__ void EqState_gpu(vd_type vd, real_number B)
200 {
201  auto a = GET_PARTICLE(vd);
202 
203  real_number rho_a = vd.template getProp<rho>(a);
204  real_number rho_frac = rho_a / rho_zero;
205 
206  vd.template getProp<Pressure>(a) = B*( rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac - 1.0);
207 }
208 
209 inline void EqState(particles & vd)
210 {
211  auto it = vd.getDomainIteratorGPU();
212 
213  // You can use standard CUDA kernel launch or the macro CUDA_LAUNCH
214 
215  //EqState_gpuning<<<it.wthr,it.thr>>>(vd.toKernel(),B);
216  CUDA_LAUNCH(EqState_gpu,it,vd.toKernel(),B)
217 }
218 
219 
220 const real_number a2 = 1.0/M_PI/H/H/H;
221 
222 inline __device__ __host__ real_number Wab(real_number r)
223 {
224  r /= H;
225 
226  if (r < 1.0)
227  return (1.0 - 3.0/2.0*r*r + 3.0/4.0*r*r*r)*a2;
228  else if (r < 2.0)
229  return (1.0/4.0*(2.0 - r*r)*(2.0 - r*r)*(2.0 - r*r))*a2;
230  else
231  return 0.0;
232 }
233 
234 
235 const real_number c1 = -3.0/M_PI/H/H/H/H;
236 const real_number d1 = 9.0/4.0/M_PI/H/H/H/H;
237 const real_number c2 = -3.0/4.0/M_PI/H/H/H/H;
238 const real_number a2_4 = 0.25*a2;
239 // Filled later
240 real_number W_dap = 0.0;
241 
242 inline __device__ __host__ void DWab(Point<3,real_number> & dx, Point<3,real_number> & DW, real_number r, bool print)
243 {
244  const real_number qq=r/H;
245 
246  real_number qq2 = qq * qq;
247  real_number fac1 = (c1*qq + d1*qq2)/r;
248  real_number b1 = (qq < 1.0)?1.0f:0.0f;
249 
250  real_number wqq = (2.0 - qq);
251  real_number fac2 = c2 * wqq * wqq / r;
252  real_number b2 = (qq >= 1.0 && qq < 2.0)?1.0f:0.0f;
253 
254  real_number factor = (b1*fac1 + b2*fac2);
255 
256  DW.get(0) = factor * dx.get(0);
257  DW.get(1) = factor * dx.get(1);
258  DW.get(2) = factor * dx.get(2);
259 }
260 
261 // Tensile correction
262 inline __device__ __host__ real_number Tensile(real_number r, real_number rhoa, real_number rhob, real_number prs1, real_number prs2, real_number W_dap)
263 {
264  const real_number qq=r/H;
265  //-Cubic Spline kernel
266  real_number wab;
267  if(r>H)
268  {
269  real_number wqq1=2.0f-qq;
270  real_number wqq2=wqq1*wqq1;
271 
272  wab=a2_4*(wqq2*wqq1);
273  }
274  else
275  {
276  real_number wqq2=qq*qq;
277  real_number wqq3=wqq2*qq;
278 
279  wab=a2*(1.0f-1.5f*wqq2+0.75f*wqq3);
280  }
281 
282  //-Tensile correction.
283  real_number fab=wab*W_dap;
284  fab*=fab; fab*=fab; //fab=fab^4
285  const real_number tensilp1=(prs1/(rhoa*rhoa))*(prs1>0? 0.01: -0.2);
286  const real_number tensilp2=(prs2/(rhob*rhob))*(prs2>0? 0.01: -0.2);
287 
288  return (fab*(tensilp1+tensilp2));
289 }
290 
291 
292 inline __device__ __host__ real_number Pi(const Point<3,real_number> & dr, real_number rr2, Point<3,real_number> & dv, real_number rhoa, real_number rhob, real_number massb, real_number cbar, real_number & visc)
293 {
294  const real_number dot = dr.get(0)*dv.get(0) + dr.get(1)*dv.get(1) + dr.get(2)*dv.get(2);
295  const real_number dot_rr2 = dot/(rr2+Eta2);
296  visc=(dot_rr2 < visc)?visc:dot_rr2;
297 
298  if(dot < 0)
299  {
300  const float amubar=H*dot_rr2;
301  const float robar=(rhoa+rhob)*0.5f;
302  const float pi_visc=(-visco*cbar*amubar/robar);
303 
304  return pi_visc;
305  }
306  else
307  return 0.0;
308 }
309 
310 template<typename particles_type, typename NN_type>
311 __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap, real_number cbar)
312 {
313  // ... a
314  auto a = GET_PARTICLE(vd);
315 
316  real_number max_visc = 0.0;
317 
318  // Get the position xp of the particle
319  Point<3,real_number> xa = vd.getPos(a);
320 
321  // Take the mass of the particle dependently if it is FLUID or BOUNDARY
322  real_number massa = (vd.template getProp<type>(a) == FLUID)?MassFluid:MassBound;
323 
324  // Get the density of the of the particle a
325  real_number rhoa = vd.template getProp<rho>(a);
326 
327  // Get the pressure of the particle a
328  real_number Pa = vd.template getProp<Pressure>(a);
329 
330  // Get the Velocity of the particle a
331  Point<3,real_number> va = vd.template getProp<velocity>(a);
332 
333  // Reset the force counter (- gravity on zeta direction)
334  vd.template getProp<force>(a)[0] = 0.0;
335  vd.template getProp<force>(a)[1] = 0.0;
336  vd.template getProp<force>(a)[2] = -gravity;
337  vd.template getProp<drho>(a) = 0.0;
338 
339  // We threat FLUID particle differently from BOUNDARY PARTICLES ...
340  if (vd.template getProp<type>(a) != FLUID)
341  {
342 
343  // If it is a boundary particle calculate the delta rho based on equation 2
344  // This require to run across the neighborhoods particles of a
345  auto Np = NN.getNNIterator(NN.getCell(vd.getPos(a)));
346 
347  // For each neighborhood particle
348  while (Np.isNext() == true)
349  {
350  // ... q
351  auto b = Np.get();
352 
353  // Get the position xp of the particle
354  Point<3,real_number> xb = vd.getPos(b);
355 
356  // if (p == q) skip this particle
357  if (a == b) {++Np; continue;};
358 
359  // get the mass of the particle
360  real_number massb = (vd.template getProp<type>(b) == FLUID)?MassFluid:MassBound;
361 
362  // Get the velocity of the particle b
363  Point<3,real_number> vb = vd.template getProp<velocity>(b);
364 
365  // Get the pressure and density of particle b
366  real_number Pb = vd.template getProp<Pressure>(b);
367  real_number rhob = vd.template getProp<rho>(b);
368 
369  // Get the distance between p and q
370  Point<3,real_number> dr = xa - xb;
371  // take the norm of this vector
372  real_number r2 = norm2(dr);
373 
374  // If the particles interact ...
375  if (r2 < 4.0*H*H)
376  {
377  // ... calculate delta rho
378  real_number r = sqrt(r2);
379 
380  Point<3,real_number> dv = va - vb;
381 
383  DWab(dr,DW,r,false);
384 
385  const real_number dot = dr.get(0)*dv.get(0) + dr.get(1)*dv.get(1) + dr.get(2)*dv.get(2);
386  const real_number dot_rr2 = dot/(r2+Eta2);
387  max_visc = (dot_rr2 < max_visc)?max_visc:dot_rr2;
388 
389  vd.template getProp<drho>(a) += massb*(dv.get(0)*DW.get(0)+dv.get(1)*DW.get(1)+dv.get(2)*DW.get(2));
390  }
391 
392  ++Np;
393  }
394 
395  vd.template getProp<red>(a) = max_visc;
396  }
397  else
398  {
399  // If it is a fluid particle calculate based on equation 1 and 2
400 
401  // Get an iterator over the neighborhood particles of p
402  auto Np = NN.getNNIterator(NN.getCell(vd.getPos(a)));
403 
404  // For each neighborhood particle
405  while (Np.isNext() == true)
406  {
407  // ... q
408  auto b = Np.get();
409 
410  // Get the position xp of the particle
411  Point<3,real_number> xb = vd.getPos(b);
412 
413  // if (p == q) skip this particle
414  if (a == b) {++Np; continue;};
415 
416  real_number massb = (vd.template getProp<type>(b) == FLUID)?MassFluid:MassBound;
417  Point<3,real_number> vb = vd.template getProp<velocity>(b);
418  real_number Pb = vd.template getProp<Pressure>(b);
419  real_number rhob = vd.template getProp<rho>(b);
420 
421  // Get the distance between p and q
422  Point<3,real_number> dr = xa - xb;
423  // take the norm of this vector
424  real_number r2 = norm2(dr);
425 
426  // if they interact
427  if (r2 < 4.0*H*H)
428  {
429  real_number r = sqrt(r2);
430 
431  Point<3,real_number> v_rel = va - vb;
432 
434  DWab(dr,DW,r,false);
435 
436  real_number factor = - massb*((vd.template getProp<Pressure>(a) + vd.template getProp<Pressure>(b)) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb,W_dap) + Pi(dr,r2,v_rel,rhoa,rhob,massb,cbar,max_visc));
437 
438  vd.template getProp<force>(a)[0] += factor * DW.get(0);
439  vd.template getProp<force>(a)[1] += factor * DW.get(1);
440  vd.template getProp<force>(a)[2] += factor * DW.get(2);
441 
442  vd.template getProp<drho>(a) += massb*(v_rel.get(0)*DW.get(0)+v_rel.get(1)*DW.get(1)+v_rel.get(2)*DW.get(2));
443  }
444 
445  ++Np;
446  }
447 
448  vd.template getProp<red>(a) = max_visc;
449  }
450 }
451 
452 template<typename CellList> inline void calc_forces(particles & vd, CellList & NN, real_number & max_visc, size_t cnt)
453 {
454  auto part = vd.getDomainIteratorGPU(32);
455 
456  // Update the cell-list
457  vd.updateCellList(NN);
458 
459  //calc_forces_gpu<<<part.wthr,part.thr>>>(vd.toKernel(),NN.toKernel(),W_dap,cbar);
460  CUDA_LAUNCH(calc_forces_gpu,part,vd.toKernel(),NN.toKernel(),W_dap,cbar)
461 
462  max_visc = reduce_local<red,_max_>(vd);
463 }
464 
465 template<typename vector_type>
466 __global__ void max_acceleration_and_velocity_gpu(vector_type vd)
467 {
468  auto a = GET_PARTICLE(vd);
469 
470  Point<3,real_number> acc(vd.template getProp<force>(a));
471  vd.template getProp<red>(a) = norm(acc);
472 
473  Point<3,real_number> vel(vd.template getProp<velocity>(a));
474  vd.template getProp<red2>(a) = norm(vel);
475 }
476 
477 void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_number & max_vel)
478 {
479  // Calculate the maximum acceleration
480  auto part = vd.getDomainIteratorGPU();
481 
482  // max_acceleration_and_velocity_gpu<<<part.wthr,part.thr>>>(vd.toKernel());
483  CUDA_LAUNCH(max_acceleration_and_velocity_gpu,part,vd.toKernel());
484 
485  max_acc = reduce_local<red,_max_>(vd);
486  max_vel = reduce_local<red2,_max_>(vd);
487 
488  Vcluster<> & v_cl = create_vcluster();
489  v_cl.max(max_acc);
490  v_cl.max(max_vel);
491  v_cl.execute();
492 }
493 
494 
495 real_number calc_deltaT(particles & vd, real_number ViscDtMax)
496 {
497  real_number Maxacc = 0.0;
498  real_number Maxvel = 0.0;
499  max_acceleration_and_velocity(vd,Maxacc,Maxvel);
500 
501  //-dt1 depends on force per unit mass.
502  const real_number dt_f = (Maxacc)?sqrt(H/Maxacc):std::numeric_limits<int>::max();
503 
504  //-dt2 combines the Courant and the viscous time-step controls.
505  const real_number dt_cv = H/(std::max(cbar,Maxvel*10.f) + H*ViscDtMax);
506 
507  //-dt new value of time step.
508  real_number dt=real_number(CFLnumber)*std::min(dt_f,dt_cv);
509  if(dt<real_number(DtMin))
510  {dt=real_number(DtMin);}
511 
512  return dt;
513 }
514 
515 template<typename vector_dist_type>
516 __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number dt2, real_number dt205)
517 {
518  // ... a
519  auto a = GET_PARTICLE(vd);
520 
521  // if the particle is boundary
522  if (vd.template getProp<type>(a) == BOUNDARY)
523  {
524  // Update rho
525  real_number rhop = vd.template getProp<rho>(a);
526 
527  // Update only the density
528  vd.template getProp<velocity>(a)[0] = 0.0;
529  vd.template getProp<velocity>(a)[1] = 0.0;
530  vd.template getProp<velocity>(a)[2] = 0.0;
531  real_number rhonew = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
532  vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
533 
534  vd.template getProp<rho_prev>(a) = rhop;
535 
536  vd.template getProp<red>(a) = 0;
537 
538  return;
539  }
540 
541  //-Calculate displacement and update position / Calcula desplazamiento y actualiza posicion.
542  real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
543  real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
544  real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
545 
546  vd.getPos(a)[0] += dx;
547  vd.getPos(a)[1] += dy;
548  vd.getPos(a)[2] += dz;
549 
550  real_number velX = vd.template getProp<velocity>(a)[0];
551  real_number velY = vd.template getProp<velocity>(a)[1];
552  real_number velZ = vd.template getProp<velocity>(a)[2];
553 
554  real_number rhop = vd.template getProp<rho>(a);
555 
556  vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity_prev>(a)[0] + vd.template getProp<force>(a)[0]*dt2;
557  vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity_prev>(a)[1] + vd.template getProp<force>(a)[1]*dt2;
558  vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity_prev>(a)[2] + vd.template getProp<force>(a)[2]*dt2;
559  vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
560 
562 
563  // Check if the particle go out of range in space and in density, if they do mark them to remove it later
564  if (vd.getPos(a)[0] < 0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 ||
565  vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.50 ||
566  vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
567  {vd.template getProp<red>(a) = 1;}
568  else
569  {vd.template getProp<red>(a) = 0;}
570 
572 
573  vd.template getProp<velocity_prev>(a)[0] = velX;
574  vd.template getProp<velocity_prev>(a)[1] = velY;
575  vd.template getProp<velocity_prev>(a)[2] = velZ;
576  vd.template getProp<rho_prev>(a) = rhop;
577 }
578 
579 size_t cnt = 0;
580 
581 void verlet_int(particles & vd, real_number dt)
582 {
583  // particle iterator
584  auto part = vd.getDomainIteratorGPU();
585 
586  real_number dt205 = dt*dt*0.5;
587  real_number dt2 = dt*2.0;
588 
589  // verlet_int_gpu<<<part.wthr,part.thr>>>(vd.toKernel(),dt,dt2,dt205);
590  CUDA_LAUNCH(verlet_int_gpu,part,vd.toKernel(),dt,dt2,dt205);
591 
593 
594  // remove the particles marked
595  remove_marked<red>(vd);
596 
598 
599  // increment the iteration counter
600  cnt++;
601 }
602 
603 template<typename vector_type>
604 __global__ void euler_int_gpu(vector_type vd,real_number dt, real_number dt205)
605 {
606  // ... a
607  auto a = GET_PARTICLE(vd);
608 
609  // if the particle is boundary
610  if (vd.template getProp<type>(a) == BOUNDARY)
611  {
612  // Update rho
613  real_number rhop = vd.template getProp<rho>(a);
614 
615  // Update only the density
616  vd.template getProp<velocity>(a)[0] = 0.0;
617  vd.template getProp<velocity>(a)[1] = 0.0;
618  vd.template getProp<velocity>(a)[2] = 0.0;
619  real_number rhonew = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
620  vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
621 
622  vd.template getProp<rho_prev>(a) = rhop;
623 
624  vd.template getProp<red>(a) = 0;
625 
626  return;
627  }
628 
629  //-Calculate displacement and update position / Calcula desplazamiento y actualiza posicion.
630  real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
631  real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
632  real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
633 
634  vd.getPos(a)[0] += dx;
635  vd.getPos(a)[1] += dy;
636  vd.getPos(a)[2] += dz;
637 
638  real_number velX = vd.template getProp<velocity>(a)[0];
639  real_number velY = vd.template getProp<velocity>(a)[1];
640  real_number velZ = vd.template getProp<velocity>(a)[2];
641  real_number rhop = vd.template getProp<rho>(a);
642 
643  vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity>(a)[0] + vd.template getProp<force>(a)[0]*dt;
644  vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity>(a)[1] + vd.template getProp<force>(a)[1]*dt;
645  vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity>(a)[2] + vd.template getProp<force>(a)[2]*dt;
646  vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
647 
648  // Check if the particle go out of range in space and in density
649  if (vd.getPos(a)[0] < 0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 ||
650  vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.50 ||
651  vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
652  {vd.template getProp<red>(a) = 1;}
653  else
654  {vd.template getProp<red>(a) = 0;}
655 
656  vd.template getProp<velocity_prev>(a)[0] = velX;
657  vd.template getProp<velocity_prev>(a)[1] = velY;
658  vd.template getProp<velocity_prev>(a)[2] = velZ;
659  vd.template getProp<rho_prev>(a) = rhop;
660 }
661 
662 void euler_int(particles & vd, real_number dt)
663 {
664 
665  // particle iterator
666  auto part = vd.getDomainIteratorGPU();
667 
668  real_number dt205 = dt*dt*0.5;
669 
670  // euler_int_gpu<<<part.wthr,part.thr>>>(vd.toKernel(),dt,dt205);
671  CUDA_LAUNCH(euler_int_gpu,part,vd.toKernel(),dt,dt205);
672 
673  // remove the particles
674  remove_marked<red>(vd);
675 
676  cnt++;
677 }
678 
679 template<typename vector_type, typename NN_type>
680 __global__ void sensor_pressure_gpu(vector_type vd, NN_type NN, Point<3,real_number> probe, real_number * press_tmp)
681 {
682  real_number tot_ker = 0.0;
683 
684  // Get the position of the probe i
685  Point<3,real_number> xp = probe;
686 
687  // get the iterator over the neighbohood particles of the probes position
688  auto itg = NN.getNNIterator(NN.getCell(xp));
689  while (itg.isNext())
690  {
691  auto q = itg.get();
692 
693  // Only the fluid particles are importants
694  if (vd.template getProp<type>(q) != FLUID)
695  {
696  ++itg;
697  continue;
698  }
699 
700  // Get the position of the neighborhood particle q
701  Point<3,real_number> xq = vd.getPos(q);
702 
703  // Calculate the contribution of the particle to the pressure
704  // of the probe
705  real_number r = sqrt(norm2(xp - xq));
706 
707  real_number ker = Wab(r) * (MassFluid / rho_zero);
708 
709  // Also keep track of the calculation of the summed
710  // kernel
711  tot_ker += ker;
712 
713  // Add the total pressure contribution
714  *press_tmp += vd.template getProp<Pressure>(q) * ker;
715 
716  // next neighborhood particle
717  ++itg;
718  }
719 
720  // We calculate the pressure normalizing the
721  // sum over all kernels
722  if (tot_ker == 0.0)
723  {*press_tmp = 0.0;}
724  else
725  {*press_tmp = 1.0 / tot_ker * *press_tmp;}
726 }
727 
728 template<typename Vector, typename CellList>
729 inline void sensor_pressure(Vector & vd,
730  CellList & NN,
733 {
734  Vcluster<> & v_cl = create_vcluster();
735 
736  press_t.add();
737 
738  for (size_t i = 0 ; i < probes.size() ; i++)
739  {
740  // A float variable to calculate the pressure of the problem
741  CudaMemory press_tmp_(sizeof(real_number));
742  real_number press_tmp;
743 
744  // if the probe is inside the processor domain
745  if (vd.getDecomposition().isLocal(probes.get(i)) == true)
746  {
747  // sensor_pressure_gpu<<<1,1>>>(vd.toKernel(),NN.toKernel(),probes.get(i),(real_number *)press_tmp_.toKernel());
748  CUDA_LAUNCH_DIM3(sensor_pressure_gpu,1,1,vd.toKernel(),NN.toKernel(),probes.get(i),(real_number *)press_tmp_.toKernel());
749 
750  // move calculated pressure on
751  press_tmp_.deviceToHost();
752  press_tmp = *(real_number *)press_tmp_.getPointer();
753  }
754 
755  // This is not necessary in principle, but if you
756  // want to make all processor aware of the history of the calculated
757  // pressure we have to execute this
758  v_cl.sum(press_tmp);
759  v_cl.execute();
760 
761  // We add the calculated pressure into the history
762  press_t.last().add(press_tmp);
763  }
764 }
765 
766 int main(int argc, char* argv[])
767 {
768  // initialize the library
769  openfpm_init(&argc,&argv);
770 
771  // It contain for each time-step the value detected by the probes
774 
775  probes.add({0.8779,0.3,0.02});
776  probes.add({0.754,0.31,0.02});
777 
778  // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
779  Box<3,real_number> domain({-0.05,-0.05,-0.05},{1.7010,0.7065,0.5025});
780  size_t sz[3] = {207,90,66};
781 
782  // Fill W_dap
783  W_dap = 1.0/Wab(H/1.5);
784 
785  // Here we define the boundary conditions of our problem
786  size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
787 
788  // extended boundary around the domain, and the processor domain
789  Ghost<3,real_number> g(2*H);
790 
791  particles vd(0,domain,bc,g,DEC_GRAN(512));
792 
794 
795  // You can ignore all these dp/2.0 is a trick to reach the same initialization
796  // of Dual-SPH that use a different criteria to draw particles
797  Box<3,real_number> fluid_box({dp/2.0f,dp/2.0f,dp/2.0f},{0.4f+dp/2.0f,0.67f-dp/2.0f,0.3f+dp/2.0f});
798 
799  // return an iterator to the fluid particles to add to vd
800  auto fluid_it = DrawParticles::DrawBox(vd,sz,domain,fluid_box);
801 
802  // here we fill some of the constants needed by the simulation
803  max_fluid_height = fluid_it.getBoxMargins().getHigh(2);
804  h_swl = fluid_it.getBoxMargins().getHigh(2) - fluid_it.getBoxMargins().getLow(2);
805  B = (coeff_sound)*(coeff_sound)*gravity*h_swl*rho_zero / gamma_;
806  cbar = coeff_sound * sqrt(gravity * h_swl);
807 
808  // for each particle inside the fluid box ...
809  while (fluid_it.isNext())
810  {
811  // ... add a particle ...
812  vd.add();
813 
814  // ... and set it position ...
815  vd.getLastPos()[0] = fluid_it.get().get(0);
816  vd.getLastPos()[1] = fluid_it.get().get(1);
817  vd.getLastPos()[2] = fluid_it.get().get(2);
818 
819  // and its type.
820  vd.template getLastProp<type>() = FLUID;
821 
822  // We also initialize the density of the particle and the hydro-static pressure given by
823  //
824  // rho_zero*g*h = P
825  //
826  // rho_p = (P/B + 1)^(1/Gamma) * rho_zero
827  //
828 
829  vd.template getLastProp<Pressure>() = rho_zero * gravity * (max_fluid_height - fluid_it.get().get(2));
830 
831  vd.template getLastProp<rho>() = pow(vd.template getLastProp<Pressure>() / B + 1, 1.0/gamma_) * rho_zero;
832  vd.template getLastProp<rho_prev>() = vd.template getLastProp<rho>();
833  vd.template getLastProp<velocity>()[0] = 0.0;
834  vd.template getLastProp<velocity>()[1] = 0.0;
835  vd.template getLastProp<velocity>()[2] = 0.0;
836 
837  vd.template getLastProp<velocity_prev>()[0] = 0.0;
838  vd.template getLastProp<velocity_prev>()[1] = 0.0;
839  vd.template getLastProp<velocity_prev>()[2] = 0.0;
840 
841  // next fluid particle
842  ++fluid_it;
843  }
844 
845  // Recipient
846  Box<3,real_number> recipient1({0.0f,0.0f,0.0f},{1.6f+dp/2.0f,0.67f+dp/2.0f,0.4f+dp/2.0f});
847  Box<3,real_number> recipient2({dp,dp,dp},{1.6f-dp/2.0f,0.67f-dp/2.0f,0.4f+dp/2.0f});
848 
849  Box<3,real_number> obstacle1({0.9f,0.24f-dp/2.0f,0.0f},{1.02f+dp/2.0f,0.36f,0.45f+dp/2.0f});
850  Box<3,real_number> obstacle2({0.9f+dp,0.24f+dp/2.0f,0.0f},{1.02f-dp/2.0f,0.36f-dp,0.45f-dp/2.0f});
851  Box<3,real_number> obstacle3({0.9f+dp,0.24f,0.0f},{1.02f,0.36f,0.45f});
852 
854  holes.add(recipient2);
855  holes.add(obstacle1);
856  auto bound_box = DrawParticles::DrawSkin(vd,sz,domain,holes,recipient1);
857 
858  while (bound_box.isNext())
859  {
860  vd.add();
861 
862  vd.getLastPos()[0] = bound_box.get().get(0);
863  vd.getLastPos()[1] = bound_box.get().get(1);
864  vd.getLastPos()[2] = bound_box.get().get(2);
865 
866  vd.template getLastProp<type>() = BOUNDARY;
867  vd.template getLastProp<rho>() = rho_zero;
868  vd.template getLastProp<rho_prev>() = rho_zero;
869  vd.template getLastProp<velocity>()[0] = 0.0;
870  vd.template getLastProp<velocity>()[1] = 0.0;
871  vd.template getLastProp<velocity>()[2] = 0.0;
872 
873  vd.template getLastProp<velocity_prev>()[0] = 0.0;
874  vd.template getLastProp<velocity_prev>()[1] = 0.0;
875  vd.template getLastProp<velocity_prev>()[2] = 0.0;
876 
877  ++bound_box;
878  }
879 
880  auto obstacle_box = DrawParticles::DrawSkin(vd,sz,domain,obstacle2,obstacle1);
881 
882  while (obstacle_box.isNext())
883  {
884  vd.add();
885 
886  vd.getLastPos()[0] = obstacle_box.get().get(0);
887  vd.getLastPos()[1] = obstacle_box.get().get(1);
888  vd.getLastPos()[2] = obstacle_box.get().get(2);
889 
890  vd.template getLastProp<type>() = BOUNDARY;
891  vd.template getLastProp<rho>() = rho_zero;
892  vd.template getLastProp<rho_prev>() = rho_zero;
893  vd.template getLastProp<velocity>()[0] = 0.0;
894  vd.template getLastProp<velocity>()[1] = 0.0;
895  vd.template getLastProp<velocity>()[2] = 0.0;
896 
897  vd.template getLastProp<velocity_prev>()[0] = 0.0;
898  vd.template getLastProp<velocity_prev>()[1] = 0.0;
899  vd.template getLastProp<velocity_prev>()[2] = 0.0;
900 
901  ++obstacle_box;
902  }
903 
904  vd.map();
905 
906  // Now that we fill the vector with particles
907  ModelCustom md;
908 
909  vd.addComputationCosts(md);
910  vd.getDecomposition().decompose();
911  vd.map();
912 
914 
915  // Ok the initialization is done on CPU on GPU we are doing the main loop, so first we offload all properties on GPU
916 
917  vd.hostToDevicePos();
918  vd.template hostToDeviceProp<type,rho,rho_prev,Pressure,velocity,velocity_prev>();
919 
920  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
921 
922  auto NN = vd.getCellListGPU(2*H);
923 
924  timer tot_sim;
925  tot_sim.start();
926 
927  size_t write = 0;
928  size_t it = 0;
929  size_t it_reb = 0;
930  real_number t = 0.0;
931  while (t <= t_end)
932  {
933  Vcluster<> & v_cl = create_vcluster();
934  timer it_time;
935 
937  it_reb++;
938  if (it_reb == 300)
939  {
940  vd.map(RUN_ON_DEVICE);
941 
942  it_reb = 0;
943  ModelCustom md;
944  vd.addComputationCosts(md);
945  vd.getDecomposition().decompose();
946 
947  if (v_cl.getProcessUnitID() == 0)
948  {std::cout << "REBALANCED " << it_reb << std::endl;}
949  }
950 
951  vd.map(RUN_ON_DEVICE);
952 
953  // Calculate pressure from the density
954  EqState(vd);
955 
956  real_number max_visc = 0.0;
957 
958  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
959 
960  // Calc forces
961  calc_forces(vd,NN,max_visc,cnt);
962 
963  // Get the maximum viscosity term across processors
964  v_cl.max(max_visc);
965  v_cl.execute();
966 
967  // Calculate delta t integration
968  real_number dt = calc_deltaT(vd,max_visc);
969 
970  // VerletStep or euler step
971  it++;
972  if (it < 40)
973  verlet_int(vd,dt);
974  else
975  {
976  euler_int(vd,dt);
977  it = 0;
978  }
979 
980  t += dt;
981 
982  if (write < t*100)
983  {
984  // Sensor pressure require update ghost, so we ensure that particles are distributed correctly
985  // and ghost are updated
986  vd.map(RUN_ON_DEVICE);
987  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
988  vd.updateCellList(NN);
989 
990  // calculate the pressure at the sensor points
991  //sensor_pressure(vd,NN,press_t,probes);
992 
993  std::cout << "OUTPUT " << dt << std::endl;
994 
995  // When we write we have move all the particles information back to CPU
996 
997  vd.deviceToHostPos();
998  vd.deviceToHostProp<type,rho,rho_prev,Pressure,drho,force,velocity,velocity_prev,red,red2>();
999 
1000  vd.write_frame("Geometry",write);
1001  write++;
1002 
1003  if (v_cl.getProcessUnitID() == 0)
1004  {std::cout << "TIME: " << t << " write " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vd.size_local() << std::endl;}
1005  }
1006  else
1007  {
1008  if (v_cl.getProcessUnitID() == 0)
1009  {std::cout << "TIME: " << t << " " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vd.size_local() << std::endl;}
1010  }
1011  }
1012 
1013  tot_sim.stop();
1014  std::cout << "Time to complete: " << tot_sim.getwct() << " seconds" << std::endl;
1015 
1016  openfpm_finalize();
1017 }
1018 
1019 #else
1020 
1021 int main(int argc, char* argv[])
1022 {
1023  return 0;
1024 }
1025 
1026 #endif
size_t getProcessUnitID()
Get the process unit id.
Sparse Matrix implementation stub object when OpenFPM is compiled with no linear algebra support.
Definition: Vector.hpp:39
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
Definition: Box.hpp:556
auto getPos(vect_dist_key_dx vec_key) -> decltype(v_pos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
static PointIteratorSkin< dim, T, typename vd_type::Decomposition_type > DrawSkin(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub_A, Box< dim, T > &sub_B)
Draw particles in a box B excluding the area of a second box A (B - A)
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:27
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
Definition: Ghost.hpp:39
Implementation of VCluster class.
Definition: VCluster.hpp:58
void execute()
Execute all the requests.
This class define the domain decomposition interface.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
void start()
Start the timer.
Definition: timer.hpp:90
This class represent an N-dimensional box.
Definition: Box.hpp:60
void sum(T &num)
Sum the numbers across all processors and get the result.
void updateCellList(CellL &cell_list, bool no_se3=false, cl_construct_opt opt=cl_construct_opt::Full)
Update a cell list using the stored particles.
static PointIterator< dim, T, typename vd_type::Decomposition_type > DrawBox(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub)
Draw particles in a box.
Distributed vector.
void max(T &num)
Get the maximum number across all processors (or reduction with infinity norm)
temporal buffer for reductions
Model for Dynamic load balancing.
Definition: main.cpp:221
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202
Class for FAST cell list implementation.
Definition: CellList.hpp:356
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Definition: Box.hpp:567
auto get(size_t cell, size_t ele) -> decltype(this->Mem_type::get(cell, ele))
Get an element in the cell.
Definition: CellList.hpp:867
Class for cpu time benchmarking.
Definition: timer.hpp:27
void stop()
Stop the timer.
Definition: timer.hpp:119