63 #include "Vector/vector_dist.hpp"
65 #include "Draw/DrawParticles.hpp"
67 typedef float real_number;
76 const real_number dp = 0.0085;
79 real_number h_swl = 0.0;
82 const real_number coeff_sound = 20.0;
85 const real_number gamma_ = 7.0;
88 const real_number H = 0.0147224318643;
91 const real_number Eta2 = 0.01 * H*H;
94 const real_number visco = 0.1;
97 real_number cbar = 0.0;
100 const real_number MassFluid = 0.000614125;
103 const real_number MassBound = 0.000614125;
107 const real_number t_end = 0.001;
109 const real_number t_end = 1.5;
113 const real_number gravity = 9.81;
116 const real_number rho_zero = 1000.0;
122 const real_number CFLnumber = 0.2;
125 const real_number DtMin = 0.00001;
128 const real_number RhoMin = 700.0;
131 const real_number RhoMax = 1300.0;
134 real_number max_fluid_height = 0.0;
139 const size_t type = 0;
145 const int rho_prev = 2;
148 const int Pressure = 3;
157 const int velocity = 6;
160 const int velocity_prev = 7;
167 typedef vector_dist_gpu<3,real_number,aggregate<size_t,real_number, real_number, real_number, real_number, real_number[3], real_number[3], real_number[3], real_number, real_number>>
particles;
176 template<
typename Decomposition,
typename vector>
inline void addComputation(
Decomposition & dec,
181 if (vd.template getProp<type>(p) == FLUID)
182 dec.addComputationCost(v,4);
184 dec.addComputationCost(v,3);
187 template<
typename Decomposition>
inline void applyModel(
Decomposition & dec,
size_t v)
189 dec.setSubSubDomainComputationCost(v, dec.getSubSubDomainComputationCost(v) * dec.getSubSubDomainComputationCost(v));
192 real_number distributionTol()
198 template<
typename vd_type>
199 __global__
void EqState_gpu(vd_type vd, real_number B)
201 auto a = GET_PARTICLE(vd);
203 real_number rho_a = vd.template getProp<rho>(a);
204 real_number rho_frac = rho_a / rho_zero;
206 vd.template getProp<Pressure>(a) = B*( rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac - 1.0);
211 auto it = vd.getDomainIteratorGPU();
213 CUDA_LAUNCH(EqState_gpu,it,vd.toKernel(),B);
217 const real_number a2 = 1.0/M_PI/H/H/H;
219 inline __device__ __host__ real_number Wab(real_number r)
224 return (1.0 - 3.0/2.0*r*r + 3.0/4.0*r*r*r)*a2;
226 return (1.0/4.0*(2.0 - r)*(2.0 - r)*(2.0 - r))*a2;
232 const real_number c1 = -3.0/M_PI/H/H/H/H;
233 const real_number d1 = 9.0/4.0/M_PI/H/H/H/H;
234 const real_number c2 = -3.0/4.0/M_PI/H/H/H/H;
235 const real_number a2_4 = 0.25*a2;
237 real_number W_dap = 0.0;
241 const real_number qq=r/H;
243 real_number qq2 = qq * qq;
244 real_number fac1 = (c1*qq + d1*qq2)/r;
245 real_number b1 = (qq < 1.0f)?1.0f:0.0f;
247 real_number wqq = (2.0f - qq);
248 real_number fac2 = c2 * wqq * wqq / r;
249 real_number b2 = (qq >= 1.0f && qq < 2.0f)?1.0f:0.0f;
251 real_number factor = (b1*fac1 + b2*fac2);
253 DW.
get(0) = factor * dx.
get(0);
254 DW.
get(1) = factor * dx.
get(1);
255 DW.
get(2) = factor * dx.
get(2);
259 inline __device__ __host__ real_number Tensile(real_number r, real_number rhoa, real_number rhob, real_number prs1, real_number prs2, real_number W_dap)
261 const real_number qq=r/H;
266 real_number wqq1=2.0f-qq;
267 real_number wqq2=wqq1*wqq1;
269 wab=a2_4*(wqq2*wqq1);
273 real_number wqq2=qq*qq;
274 real_number wqq3=wqq2*qq;
276 wab=a2*(1.0f-1.5f*wqq2+0.75f*wqq3);
280 real_number fab=wab*W_dap;
282 const real_number tensilp1=(prs1/(rhoa*rhoa))*(prs1>0.0f? 0.01f: -0.2f);
283 const real_number tensilp2=(prs2/(rhob*rhob))*(prs2>0.0f? 0.01f: -0.2f);
285 return (fab*(tensilp1+tensilp2));
289 inline __device__ __host__ real_number Pi(
const Point<3,real_number> & dr, real_number rr2,
Point<3,real_number> & dv, real_number rhoa, real_number rhob, real_number massb, real_number cbar, real_number & visc)
291 const real_number dot = dr.
get(0)*dv.
get(0) + dr.
get(1)*dv.
get(1) + dr.
get(2)*dv.
get(2);
292 const real_number dot_rr2 = dot/(rr2+Eta2);
293 visc=(dot_rr2 < visc)?visc:dot_rr2;
297 const float amubar=H*dot_rr2;
298 const float robar=(rhoa+rhob)*0.5f;
299 const float pi_visc=(-visco*cbar*amubar/robar);
307 template<
typename particles_type,
typename NN_type>
308 __global__
void calc_forces_gpu(
particles_type vd, NN_type NN, real_number W_dap, real_number cbar)
310 auto a = GET_PARTICLE(vd);
312 real_number max_visc = 0.0f;
318 unsigned int typea = vd.template getProp<type>(a);
321 real_number rhoa = vd.template getProp<rho>(a);
324 real_number Pa = vd.template getProp<Pressure>(a);
331 force_.
get(0) = 0.0f;
332 force_.
get(1) = 0.0f;
333 force_.
get(2) = -gravity;
334 real_number drho_ = 0.0f;
337 auto Np = NN.getNNIteratorBox(NN.getCell(xa));
340 while (Np.isNext() ==
true)
348 if (a == b) {++Np;
continue;};
350 unsigned int typeb = vd.template getProp<type>(b);
352 real_number massb = (typeb == FLUID)?MassFluid:MassBound;
354 real_number Pb = vd.template getProp<Pressure>(b);
355 real_number rhob = vd.template getProp<rho>(b);
360 real_number r2 = norm2(dr);
363 if (r2 < 4.0*H*H && r2 >= 1e-16)
365 real_number r = sqrt(r2);
372 real_number factor = - massb*((Pa + Pb) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb,W_dap) + Pi(dr,r2,v_rel,rhoa,rhob,massb,cbar,max_visc));
376 factor = (typea != FLUID)?0.0f:factor;
378 force_.
get(0) += factor * DW.
get(0);
379 force_.
get(1) += factor * DW.
get(1);
380 force_.
get(2) += factor * DW.
get(2);
382 real_number scal = massb*(v_rel.
get(0)*DW.
get(0)+v_rel.
get(1)*DW.
get(1)+v_rel.
get(2)*DW.
get(2));
383 scal = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:scal;
391 vd.template getProp<red>(a) = max_visc;
393 vd.template getProp<force>(a)[0] = force_.
get(0);
394 vd.template getProp<force>(a)[1] = force_.
get(1);
395 vd.template getProp<force>(a)[2] = force_.
get(2);
396 vd.template getProp<drho>(a) = drho_;
399 template<
typename CellList>
inline void calc_forces(
particles & vd,
CellList & NN, real_number & max_visc,
size_t cnt)
401 auto part = vd.getDomainIteratorGPU(32);
404 vd.updateCellListGPU(NN);
406 CUDA_LAUNCH(calc_forces_gpu,part,vd.toKernel(),NN.toKernel(),W_dap,cbar);
408 max_visc = reduce_local<red,_max_>(vd);
411 template<
typename vector_type>
412 __global__
void max_acceleration_and_velocity_gpu(
vector_type vd)
414 auto a = GET_PARTICLE(vd);
417 vd.template getProp<red>(a) = norm(acc);
420 vd.template getProp<red2>(a) = norm(vel);
423 void max_acceleration_and_velocity(
particles & vd, real_number & max_acc, real_number & max_vel)
426 auto part = vd.getDomainIteratorGPU();
428 CUDA_LAUNCH(max_acceleration_and_velocity_gpu,part,vd.toKernel());
430 max_acc = reduce_local<red,_max_>(vd);
431 max_vel = reduce_local<red2,_max_>(vd);
440 real_number calc_deltaT(
particles & vd, real_number ViscDtMax)
442 real_number Maxacc = 0.0;
443 real_number Maxvel = 0.0;
444 max_acceleration_and_velocity(vd,Maxacc,Maxvel);
447 const real_number dt_f = (Maxacc)?sqrt(H/Maxacc):std::numeric_limits<float>::max();
450 const real_number dt_cv = H/(std::max(cbar,Maxvel*10.f) + H*ViscDtMax);
453 real_number dt=real_number(CFLnumber)*std::min(dt_f,dt_cv);
454 if(dt<real_number(DtMin))
455 {dt=real_number(DtMin);}
460 template<
typename vector_dist_type>
461 __global__
void verlet_int_gpu(
vector_dist_type vd, real_number dt, real_number dt2, real_number dt205)
464 auto a = GET_PARTICLE(vd);
467 if (vd.template getProp<type>(a) == BOUNDARY)
470 real_number rhop = vd.template getProp<rho>(a);
473 vd.template getProp<velocity>(a)[0] = 0.0;
474 vd.template getProp<velocity>(a)[1] = 0.0;
475 vd.template getProp<velocity>(a)[2] = 0.0;
476 real_number rhonew = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
477 vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
479 vd.template getProp<rho_prev>(a) = rhop;
481 vd.template getProp<red>(a) = 0;
487 real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
488 real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
489 real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
495 real_number velX = vd.template getProp<velocity>(a)[0];
496 real_number velY = vd.template getProp<velocity>(a)[1];
497 real_number velZ = vd.template getProp<velocity>(a)[2];
499 real_number rhop = vd.template getProp<rho>(a);
501 vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity_prev>(a)[0] + vd.template getProp<force>(a)[0]*dt2;
502 vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity_prev>(a)[1] + vd.template getProp<force>(a)[1]*dt2;
503 vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity_prev>(a)[2] + vd.template getProp<force>(a)[2]*dt2;
504 vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
509 vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
510 {vd.template getProp<red>(a) = 1;}
512 {vd.template getProp<red>(a) = 0;}
515 vd.template getProp<velocity_prev>(a)[0] = velX;
516 vd.template getProp<velocity_prev>(a)[1] = velY;
517 vd.template getProp<velocity_prev>(a)[2] = velZ;
518 vd.template getProp<rho_prev>(a) = rhop;
523 void verlet_int(
particles & vd, real_number dt)
526 auto part = vd.getDomainIteratorGPU();
528 real_number dt205 = dt*dt*0.5;
529 real_number dt2 = dt*2.0;
531 CUDA_LAUNCH(verlet_int_gpu,part,vd.toKernel(),dt,dt2,dt205);
534 remove_marked<red>(vd);
540 template<
typename vector_type>
541 __global__
void euler_int_gpu(
vector_type vd,real_number dt, real_number dt205)
544 auto a = GET_PARTICLE(vd);
547 if (vd.template getProp<type>(a) == BOUNDARY)
550 real_number rhop = vd.template getProp<rho>(a);
553 vd.template getProp<velocity>(a)[0] = 0.0;
554 vd.template getProp<velocity>(a)[1] = 0.0;
555 vd.template getProp<velocity>(a)[2] = 0.0;
556 real_number rhonew = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
557 vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
559 vd.template getProp<rho_prev>(a) = rhop;
561 vd.template getProp<red>(a) = 0;
567 real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
568 real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
569 real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
575 real_number velX = vd.template getProp<velocity>(a)[0];
576 real_number velY = vd.template getProp<velocity>(a)[1];
577 real_number velZ = vd.template getProp<velocity>(a)[2];
578 real_number rhop = vd.template getProp<rho>(a);
580 vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity>(a)[0] + vd.template getProp<force>(a)[0]*dt;
581 vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity>(a)[1] + vd.template getProp<force>(a)[1]*dt;
582 vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity>(a)[2] + vd.template getProp<force>(a)[2]*dt;
583 vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
588 vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
589 {vd.template getProp<red>(a) = 1;}
591 {vd.template getProp<red>(a) = 0;}
593 vd.template getProp<velocity_prev>(a)[0] = velX;
594 vd.template getProp<velocity_prev>(a)[1] = velY;
595 vd.template getProp<velocity_prev>(a)[2] = velZ;
596 vd.template getProp<rho_prev>(a) = rhop;
599 void euler_int(
particles & vd, real_number dt)
603 auto part = vd.getDomainIteratorGPU();
605 real_number dt205 = dt*dt*0.5;
608 CUDA_LAUNCH(euler_int_gpu,part,vd.toKernel(),dt,dt205);
611 remove_marked<red>(vd);
616 template<
typename vector_type,
typename NN_type>
619 real_number tot_ker = 0.0;
625 auto itg = NN.getNNIteratorBox(NN.getCell(xp));
631 if (vd.template getProp<type>(q) != FLUID)
642 real_number r = sqrt(norm2(xp - xq));
644 real_number ker = Wab(r) * (MassFluid / rho_zero);
651 *press_tmp += vd.template getProp<Pressure>(q) * ker;
662 {*press_tmp = 1.0 / tot_ker * *press_tmp;}
665 template<
typename Vector,
typename CellList>
666 inline void sensor_pressure(
Vector & vd,
675 for (
size_t i = 0 ; i < probes.size() ; i++)
679 real_number press_tmp;
682 if (vd.getDecomposition().isLocal(probes.get(i)) ==
true)
684 vd.updateCellListGPU(NN);
687 CUDA_LAUNCH_DIM3(sensor_pressure_gpu,1,1,vd.toKernel(),NN.toKernel(),probe,(real_number *)press_tmp_.toKernel());
690 press_tmp_.deviceToHost();
691 press_tmp = *(real_number *)press_tmp_.getPointer();
701 press_t.last().add(press_tmp);
705 int main(
int argc,
char* argv[])
708 openfpm_init(&argc,&argv);
714 probes.add({0.8779f,0.3f,0.02f});
715 probes.add({0.754f,0.31f,0.02f});
719 size_t sz[3] = {207,90,66};
722 W_dap = 1.0/Wab(H/1.5);
725 size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
730 particles vd(0,domain,bc,g,DEC_GRAN(512));
736 Box<3,real_number> fluid_box({dp/2.0f,dp/2.0f,dp/2.0f},{0.4f+dp/2.0f,0.67f-dp/2.0f,0.3f+dp/2.0f});
742 max_fluid_height = fluid_it.getBoxMargins().
getHigh(2);
743 h_swl = fluid_it.getBoxMargins().
getHigh(2) - fluid_it.getBoxMargins().
getLow(2);
744 B = (coeff_sound)*(coeff_sound)*gravity*h_swl*rho_zero / gamma_;
745 cbar = coeff_sound * sqrt(gravity * h_swl);
748 while (fluid_it.isNext())
754 vd.getLastPos()[0] = fluid_it.get().get(0);
755 vd.getLastPos()[1] = fluid_it.get().get(1);
756 vd.getLastPos()[2] = fluid_it.get().get(2);
759 vd.template getLastProp<type>() = FLUID;
768 vd.template getLastProp<Pressure>() = rho_zero * gravity * (max_fluid_height - fluid_it.get().get(2));
770 vd.template getLastProp<rho>() = pow(vd.template getLastProp<Pressure>() / B + 1, 1.0/gamma_) * rho_zero;
771 vd.template getLastProp<rho_prev>() = vd.template getLastProp<rho>();
772 vd.template getLastProp<velocity>()[0] = 0.0;
773 vd.template getLastProp<velocity>()[1] = 0.0;
774 vd.template getLastProp<velocity>()[2] = 0.0;
776 vd.template getLastProp<velocity_prev>()[0] = 0.0;
777 vd.template getLastProp<velocity_prev>()[1] = 0.0;
778 vd.template getLastProp<velocity_prev>()[2] = 0.0;
785 Box<3,real_number> recipient1({0.0f,0.0f,0.0f},{1.6f+dp/2.0f,0.67f+dp/2.0f,0.4f+dp/2.0f});
788 Box<3,real_number> obstacle1({0.9f,0.24f-dp/2.0f,0.0f},{1.02f+dp/2.0f,0.36f,0.45f+dp/2.0f});
789 Box<3,real_number> obstacle2({0.9f+dp,0.24f+dp/2.0f,0.0f},{1.02f-dp/2.0f,0.36f-dp,0.45f-dp/2.0f});
793 holes.add(recipient2);
794 holes.add(obstacle1);
797 while (bound_box.isNext())
801 vd.getLastPos()[0] = bound_box.get().get(0);
802 vd.getLastPos()[1] = bound_box.get().get(1);
803 vd.getLastPos()[2] = bound_box.get().get(2);
805 vd.template getLastProp<type>() = BOUNDARY;
806 vd.template getLastProp<rho>() = rho_zero;
807 vd.template getLastProp<rho_prev>() = rho_zero;
808 vd.template getLastProp<velocity>()[0] = 0.0;
809 vd.template getLastProp<velocity>()[1] = 0.0;
810 vd.template getLastProp<velocity>()[2] = 0.0;
812 vd.template getLastProp<velocity_prev>()[0] = 0.0;
813 vd.template getLastProp<velocity_prev>()[1] = 0.0;
814 vd.template getLastProp<velocity_prev>()[2] = 0.0;
821 while (obstacle_box.isNext())
825 vd.getLastPos()[0] = obstacle_box.get().get(0);
826 vd.getLastPos()[1] = obstacle_box.get().get(1);
827 vd.getLastPos()[2] = obstacle_box.get().get(2);
829 vd.template getLastProp<type>() = BOUNDARY;
830 vd.template getLastProp<rho>() = rho_zero;
831 vd.template getLastProp<rho_prev>() = rho_zero;
832 vd.template getLastProp<velocity>()[0] = 0.0;
833 vd.template getLastProp<velocity>()[1] = 0.0;
834 vd.template getLastProp<velocity>()[2] = 0.0;
836 vd.template getLastProp<velocity_prev>()[0] = 0.0;
837 vd.template getLastProp<velocity_prev>()[1] = 0.0;
838 vd.template getLastProp<velocity_prev>()[2] = 0.0;
848 vd.addComputationCosts(md);
849 vd.getDecomposition().decompose();
856 vd.hostToDevicePos();
857 vd.template hostToDeviceProp<type,rho,rho_prev,Pressure,velocity,velocity_prev>();
859 vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
861 auto NN = vd.getCellListGPU(2*H, CL_NON_SYMMETRIC, 2);
880 vd.map(RUN_ON_DEVICE);
883 vd.deviceToHostPos();
884 vd.template deviceToHostProp<type>();
888 vd.addComputationCosts(md);
889 vd.getDecomposition().decompose();
892 {std::cout <<
"REBALANCED " << it_reb << std::endl;}
895 vd.map(RUN_ON_DEVICE);
900 real_number max_visc = 0.0;
902 vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
905 calc_forces(vd,NN,max_visc,cnt);
912 real_number dt = calc_deltaT(vd,max_visc);
930 vd.map(RUN_ON_DEVICE);
931 vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
936 std::cout <<
"OUTPUT " << dt << std::endl;
940 vd.deviceToHostPos();
941 vd.deviceToHostProp<type,rho,rho_prev,Pressure,drho,force,velocity,velocity_prev,
red,red2>();
943 vd.write_frame(
"Geometry",write);
947 {std::cout <<
"TIME: " << t <<
" write " << it_time.
getwct() <<
" " << it_reb <<
" " << cnt <<
" Max visc: " << max_visc <<
" " << vd.size_local() << std::endl;}
952 {std::cout <<
"TIME: " << t <<
" " << it_time.
getwct() <<
" " << it_reb <<
" " << cnt <<
" Max visc: " << max_visc <<
" " << vd.size_local() << std::endl;}
957 std::cout <<
"Time to complete: " << tot_sim.
getwct() <<
" seconds" << std::endl;
964 int main(
int argc,
char* argv[])
This class represent an N-dimensional box.
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Class for FAST cell list implementation.
This class define the domain decomposition interface.
static PointIteratorSkin< dim, T, typename vd_type::Decomposition_type > DrawSkin(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub_A, Box< dim, T > &sub_B)
Draw particles in a box B excluding the area of a second box A (B - A)
static PointIterator< dim, T, typename vd_type::Decomposition_type > DrawBox(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub)
Draw particles in a box.
This class implement the point shape in an N-dimensional space.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
void execute()
Execute all the requests.
void sum(T &num)
Sum the numbers across all processors and get the result.
size_t getProcessUnitID()
Get the process unit id.
void max(T &num)
Get the maximum number across all processors (or reduction with infinity norm)
Implementation of VCluster class.
Sparse Matrix implementation stub object when OpenFPM is compiled with no linear algebra support.
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
auto getPos(vect_dist_key_dx vec_key) -> decltype(vPos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
Model for Dynamic load balancing.
temporal buffer for reductions