64 #include "Vector/vector_dist.hpp"
65 #include "Draw/DrawParticles.hpp"
66 #include "OdeIntegrators/OdeIntegrators.hpp"
67 #include "Operators/Vector/vector_dist_operators.hpp"
69 typedef float real_number;
78 const real_number dp = 0.0085;
81 real_number h_swl = 0.0;
84 const real_number coeff_sound = 20.0;
87 const real_number gamma_ = 7.0;
90 const real_number H = 0.0147224318643;
93 const real_number Eta2 = 0.01 * H*H;
96 const real_number visco = 0.1;
99 real_number cbar = 0.0;
102 const real_number MassFluid = 0.000614125;
105 const real_number MassBound = 0.000614125;
109 const real_number t_end = 0.001;
111 const real_number t_end = 1.5;
115 const real_number gravity = 9.81;
118 const real_number RhoZero = 1000.0;
124 const real_number CFLnumber = 0.2;
127 const real_number DtMin = 0.00001;
130 const real_number RhoMin = 700.0;
133 const real_number RhoMax = 1300.0;
136 real_number max_fluid_height = 0.0;
141 const size_t TYPE = 0;
147 const int RHO_PREV = 2;
150 const int PRESSURE = 3;
159 const int VELOCITY = 6;
162 const int VELOCITY_PREV = 7;
165 const int VELOCITY_TMP = 11;
168 const int RHO_TMP = 10;
175 typedef vector_dist_gpu<3,real_number,aggregate<size_t,real_number, real_number, real_number, real_number, VectorS<3, real_number>,
VectorS<3, real_number>,
VectorS<3, real_number>, real_number, real_number, real_number,
VectorS<3, real_number>>>
particles;
184 template<
typename Decomposition,
typename vector>
185 inline void addComputation(
191 if (vectorDist.template getProp<TYPE>(p) == FLUID)
192 dec.addComputationCost(v,4);
194 dec.addComputationCost(v,3);
197 template<
typename Decomposition>
inline void applyModel(
Decomposition & dec,
size_t v)
199 dec.setSubSubDomainComputationCost(v, dec.getSubSubDomainComputationCost(v) * dec.getSubSubDomainComputationCost(v));
202 real_number distributionTol()
208 template<
typename vd_type>
209 __global__
void EqState_gpu(vd_type vectorDist, real_number B)
211 auto a = GET_PARTICLE(vectorDist);
213 real_number rho_a = vectorDist.template getProp<RHO>(a);
214 real_number rho_frac = rho_a / RhoZero;
216 vectorDist.template getProp<PRESSURE>(a) = B*( rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac - 1.0);
219 inline void EqState(
particles & vectorDist)
221 auto it = vectorDist.getDomainIteratorGPU();
223 CUDA_LAUNCH(EqState_gpu,it,vectorDist.toKernel(),B);
227 const real_number a2 = 1.0/M_PI/H/H/H;
229 inline __device__ __host__ real_number Wab(real_number r)
234 return (1.0 - 3.0/2.0*r*r + 3.0/4.0*r*r*r)*a2;
236 return (1.0/4.0*(2.0 - r)*(2.0 - r)*(2.0 - r))*a2;
242 const real_number c1 = -3.0/M_PI/H/H/H/H;
243 const real_number d1 = 9.0/4.0/M_PI/H/H/H/H;
244 const real_number c2 = -3.0/4.0/M_PI/H/H/H/H;
245 const real_number a2_4 = 0.25*a2;
247 real_number W_dap = 0.0;
251 const real_number qq=r/H;
253 real_number qq2 = qq * qq;
254 real_number fac1 = (c1*qq + d1*qq2)/r;
255 real_number b1 = (qq < 1.0f)?1.0f:0.0f;
257 real_number wqq = (2.0f - qq);
258 real_number fac2 = c2 * wqq * wqq / r;
259 real_number b2 = (qq >= 1.0f && qq < 2.0f)?1.0f:0.0f;
261 real_number factor = (b1*fac1 + b2*fac2);
263 DW.
get(0) = factor * dx.
get(0);
264 DW.
get(1) = factor * dx.
get(1);
265 DW.
get(2) = factor * dx.
get(2);
269 inline __device__ __host__ real_number Tensile(real_number r, real_number rhoa, real_number rhob, real_number prs1, real_number prs2, real_number W_dap)
271 const real_number qq=r/H;
276 real_number wqq1=2.0f-qq;
277 real_number wqq2=wqq1*wqq1;
279 wab=a2_4*(wqq2*wqq1);
283 real_number wqq2=qq*qq;
284 real_number wqq3=wqq2*qq;
286 wab=a2*(1.0f-1.5f*wqq2+0.75f*wqq3);
290 real_number fab=wab*W_dap;
292 const real_number tensilp1=(prs1/(rhoa*rhoa))*(prs1>0.0f? 0.01f: -0.2f);
293 const real_number tensilp2=(prs2/(rhob*rhob))*(prs2>0.0f? 0.01f: -0.2f);
295 return (fab*(tensilp1+tensilp2));
299 inline __device__ __host__ real_number Pi(
const Point<3,real_number> & dr, real_number rr2,
Point<3,real_number> & dv, real_number rhoa, real_number rhob, real_number massb, real_number cbar, real_number & visc)
301 const real_number dot = dr.
get(0)*dv.
get(0) + dr.
get(1)*dv.
get(1) + dr.
get(2)*dv.
get(2);
302 const real_number dot_rr2 = dot/(rr2+Eta2);
303 visc=(dot_rr2 < visc)?visc:dot_rr2;
307 const float amubar=H*dot_rr2;
308 const float robar=(rhoa+rhob)*0.5f;
309 const float pi_visc=(-visco*cbar*amubar/robar);
317 template<
typename particles_type,
typename CellList_type>
318 __global__
void calc_forces_gpu(
particles_type vectorDist, CellList_type cellList, real_number W_dap, real_number cbar)
320 auto a = GET_PARTICLE(vectorDist);
322 real_number max_visc = 0.0f;
328 unsigned int typea = vectorDist.template getProp<TYPE>(a);
331 real_number rhoa = vectorDist.template getProp<RHO>(a);
334 real_number Pa = vectorDist.template getProp<PRESSURE>(a);
341 force_.
get(0) = 0.0f;
342 force_.
get(1) = 0.0f;
343 force_.
get(2) = -gravity;
344 real_number drho_ = 0.0f;
347 auto Np = cellList.getNNIteratorBox(cellList.getCell(xa));
350 while (Np.isNext() ==
true)
358 if (a == b) {++Np;
continue;};
360 unsigned int typeb = vectorDist.template getProp<TYPE>(b);
362 real_number massb = (typeb == FLUID)?MassFluid:MassBound;
364 real_number Pb = vectorDist.template getProp<PRESSURE>(b);
365 real_number rhob = vectorDist.template getProp<RHO>(b);
370 real_number r2 = norm2(dr);
373 if (r2 < 4.0*H*H && r2 >= 1e-16)
375 real_number r = sqrt(r2);
382 real_number factor = - massb*((Pa + Pb) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb,W_dap) + Pi(dr,r2,v_rel,rhoa,rhob,massb,cbar,max_visc));
386 factor = (typea != FLUID)?0.0f:factor;
388 force_.
get(0) += factor * DW.
get(0);
389 force_.
get(1) += factor * DW.
get(1);
390 force_.
get(2) += factor * DW.
get(2);
392 real_number scal = massb*(v_rel.
get(0)*DW.
get(0)+v_rel.
get(1)*DW.
get(1)+v_rel.
get(2)*DW.
get(2));
393 scal = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:scal;
401 vectorDist.template getProp<RED>(a) = max_visc;
403 vectorDist.template getProp<FORCE>(a)[0] = force_.
get(0);
404 vectorDist.template getProp<FORCE>(a)[1] = force_.
get(1);
405 vectorDist.template getProp<FORCE>(a)[2] = force_.
get(2);
406 vectorDist.template getProp<DRHO>(a) = drho_;
409 template<
typename CellList>
inline void calc_forces(
particles & vectorDist,
CellList & cellList, real_number & max_visc,
size_t cnt)
411 auto part = vectorDist.getDomainIteratorGPU(32);
414 vectorDist.updateCellListGPU(cellList);
416 CUDA_LAUNCH(calc_forces_gpu,part,vectorDist.toKernel(),cellList.toKernel(),W_dap,cbar);
418 max_visc = reduce_local<RED,_max_>(vectorDist);
421 template<
typename vector_type>
422 __global__
void max_acceleration_and_velocity_gpu(
vector_type vectorDist)
424 auto a = GET_PARTICLE(vectorDist);
427 vectorDist.template getProp<RED>(a) = norm(acc);
430 vectorDist.template getProp<RED2>(a) = norm(vel);
433 template<
typename vector_type>
437 printf(
"Check GPU %d %p %f\n", i, &vector.get<0>(i), vector.get<0>(i));
442 void max_acceleration_and_velocity(
particles & vectorDist, real_number & max_acc, real_number & max_vel)
445 auto part = vectorDist.getDomainIteratorGPU();
447 CUDA_LAUNCH(max_acceleration_and_velocity_gpu,part,vectorDist.toKernel());
449 max_acc = reduce_local<RED,_max_>(vectorDist);
450 max_vel = reduce_local<RED2,_max_>(vectorDist);
459 real_number calc_deltaT(
particles & vectorDist, real_number ViscDtMax)
461 real_number Maxacc = 0.0;
462 real_number Maxvel = 0.0;
463 max_acceleration_and_velocity(vectorDist,Maxacc,Maxvel);
466 const real_number dt_f = (Maxacc)?sqrt(H/Maxacc):std::numeric_limits<float>::max();
469 const real_number dt_cv = H/(std::max(cbar,Maxvel*10.f) + H*ViscDtMax);
472 real_number dt=real_number(CFLnumber)*std::min(dt_f,dt_cv);
473 if(dt<real_number(DtMin))
474 {dt=real_number(DtMin);}
480 template<
typename vector_dist_type>
483 auto p = GET_PARTICLE(vectorDist);
486 if (vectorDist.template getProp<TYPE>(p) == BOUNDARY)
488 real_number rho = vectorDist.template getProp<RHO>(p);
490 vectorDist.template getProp<RHO>(p) = RhoZero;
496 if (vectorDist.
getPos(p)[0] < 0.0 || vectorDist.
getPos(p)[1] < 0.0 || vectorDist.
getPos(p)[2] < 0.0 ||
497 vectorDist.
getPos(p)[0] > 1.61 || vectorDist.
getPos(p)[1] > 0.68 || vectorDist.
getPos(p)[2] > 0.50 ||
498 vectorDist.template getProp<RHO>(p) < RhoMin || vectorDist.template getProp<RHO>(p) > RhoMax)
499 {vectorDist.template getProp<RED>(p) = 1;}
505 void verlet_int(
particles & vectorDist, real_number dt)
508 auto part = vectorDist.getDomainIteratorGPU();
510 real_number dt205 = dt*dt*0.5;
511 real_number dt2 = dt*2.0;
513 auto posExpression = getV<POS_PROP, comp_dev>(vectorDist);
514 auto posExpression2 = getV<POS_PROP>(vectorDist);
515 auto forceExpression = getV<FORCE, comp_dev>(vectorDist);
516 auto drhoExpression = getV<DRHO, comp_dev>(vectorDist);
517 auto typeExpression = getV<TYPE, comp_dev>(vectorDist);
518 auto velocityExpression = getV<VELOCITY, comp_dev>(vectorDist);
520 auto rho_tmpExpression = getV<RHO_TMP, comp_dev>(vectorDist);
521 auto rhoExpression = getV<RHO, comp_dev>(vectorDist);
522 auto rho_prevExpression = getV<RHO_PREV, comp_dev>(vectorDist);
524 auto velocity_prevExpression = getV<VELOCITY_PREV, comp_dev>(vectorDist);
525 auto velocity_tmpExpression = getV<VELOCITY_TMP, comp_dev>(vectorDist);
526 auto redExpression = getV<RED, comp_dev>(vectorDist);
528 rho_tmpExpression = rhoExpression;
529 rhoExpression = rho_prevExpression + dt2*drhoExpression;
530 rho_prevExpression = rho_tmpExpression;
532 posExpression = posExpression + velocityExpression*dt + forceExpression*dt205 * typeExpression;
534 velocity_tmpExpression = velocityExpression;
535 velocityExpression = velocity_prevExpression + forceExpression*dt2 * typeExpression;
536 velocity_prevExpression = velocity_tmpExpression;
540 CUDA_LAUNCH(checkPosPrpLimits,part,vectorDist.toKernel());
544 remove_marked<RED>(vectorDist);
552 void euler_int(
particles & vectorDist, real_number dt)
556 auto part = vectorDist.getDomainIteratorGPU();
558 real_number dt205 = dt*dt*0.5;
560 auto posExpression = getV<POS_PROP, comp_dev>(vectorDist);
561 auto forceExpression = getV<FORCE, comp_dev>(vectorDist);
562 auto drhoExpression = getV<DRHO, comp_dev>(vectorDist);
563 auto typeExpression = getV<TYPE, comp_dev>(vectorDist);
565 auto rhoExpression = getV<RHO, comp_dev>(vectorDist);
566 auto rho_prevExpression = getV<RHO_PREV, comp_dev>(vectorDist);
568 auto velocityExpression = getV<VELOCITY, comp_dev>(vectorDist);
569 auto velocity_prevExpression = getV<VELOCITY_PREV, comp_dev>(vectorDist);
571 auto redExpression = getV<RED, comp_dev>(vectorDist);
573 rho_prevExpression = rhoExpression;
574 rhoExpression = rhoExpression + dt*drhoExpression;
576 posExpression = posExpression + velocityExpression*dt + forceExpression*dt205 * typeExpression;
578 velocity_prevExpression = velocityExpression;
579 velocityExpression = velocityExpression + forceExpression*dt * typeExpression;
583 CUDA_LAUNCH(checkPosPrpLimits,part,vectorDist.toKernel());
586 remove_marked<RED>(vectorDist);
591 template<
typename vector_type,
typename CellList_type>
594 real_number tot_ker = 0.0;
600 auto itg = cellList.getNNIteratorBox(cellList.getCell(xp));
606 if (vectorDist.template getProp<TYPE>(q) != FLUID)
617 real_number r = sqrt(norm2(xp - xq));
619 real_number ker = Wab(r) * (MassFluid / RhoZero);
626 *press_tmp += vectorDist.template getProp<PRESSURE>(q) * ker;
637 {*press_tmp = 1.0 / tot_ker * *press_tmp;}
640 template<
typename Vector,
typename CellList>
641 inline void sensor_pressure(
Vector & vectorDist,
650 for (
size_t i = 0 ; i < probes.size() ; i++)
654 real_number press_tmp;
657 if (vectorDist.getDecomposition().isLocal(probes.get(i)) ==
true)
659 vectorDist.updateCellListGPU(cellList);
662 CUDA_LAUNCH_DIM3(sensor_pressure_gpu,1,1,vectorDist.toKernel(),cellList.toKernel(),probe,(real_number *)press_tmp_.toKernel());
665 press_tmp_.deviceToHost();
666 press_tmp = *(real_number *)press_tmp_.getPointer();
676 press_t.last().add(press_tmp);
680 int main(
int argc,
char* argv[])
683 openfpm_init(&argc,&argv);
689 probes.add({0.8779f,0.3f,0.02f});
690 probes.add({0.754f,0.31f,0.02f});
694 size_t sz[3] = {207,90,66};
697 W_dap = 1.0/Wab(H/1.5);
700 size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
705 particles vectorDist(0,domain,bc,g,DEC_GRAN(512));
711 Box<3,real_number> fluid_box({dp/2.0f,dp/2.0f,dp/2.0f},{0.4f+dp/2.0f,0.67f-dp/2.0f,0.3f+dp/2.0f});
717 max_fluid_height = fluid_it.getBoxMargins().
getHigh(2);
718 h_swl = fluid_it.getBoxMargins().
getHigh(2) - fluid_it.getBoxMargins().
getLow(2);
719 B = (coeff_sound)*(coeff_sound)*gravity*h_swl*RhoZero / gamma_;
720 cbar = coeff_sound * sqrt(gravity * h_swl);
723 while (fluid_it.isNext())
729 vectorDist.getLastPos()[0] = fluid_it.get().get(0);
730 vectorDist.getLastPos()[1] = fluid_it.get().get(1);
731 vectorDist.getLastPos()[2] = fluid_it.get().get(2);
734 vectorDist.template getLastProp<TYPE>() = FLUID;
743 vectorDist.template getLastProp<PRESSURE>() = RhoZero * gravity * (max_fluid_height - fluid_it.get().get(2));
745 vectorDist.template getLastProp<RHO>() = pow(vectorDist.template getLastProp<PRESSURE>() / B + 1, 1.0/gamma_) * RhoZero;
746 vectorDist.template getLastProp<RHO_PREV>() = vectorDist.template getLastProp<RHO>();
747 vectorDist.template getLastProp<VELOCITY>()[0] = 0.0;
748 vectorDist.template getLastProp<VELOCITY>()[1] = 0.0;
749 vectorDist.template getLastProp<VELOCITY>()[2] = 0.0;
751 vectorDist.template getLastProp<VELOCITY_PREV>()[0] = 0.0;
752 vectorDist.template getLastProp<VELOCITY_PREV>()[1] = 0.0;
753 vectorDist.template getLastProp<VELOCITY_PREV>()[2] = 0.0;
760 Box<3,real_number> recipient1({0.0f,0.0f,0.0f},{1.6f+dp/2.0f,0.67f+dp/2.0f,0.4f+dp/2.0f});
763 Box<3,real_number> obstacle1({0.9f,0.24f-dp/2.0f,0.0f},{1.02f+dp/2.0f,0.36f,0.45f+dp/2.0f});
764 Box<3,real_number> obstacle2({0.9f+dp,0.24f+dp/2.0f,0.0f},{1.02f-dp/2.0f,0.36f-dp,0.45f-dp/2.0f});
768 holes.add(recipient2);
769 holes.add(obstacle1);
772 while (bound_box.isNext())
776 vectorDist.getLastPos()[0] = bound_box.get().get(0);
777 vectorDist.getLastPos()[1] = bound_box.get().get(1);
778 vectorDist.getLastPos()[2] = bound_box.get().get(2);
780 vectorDist.template getLastProp<TYPE>() = BOUNDARY;
781 vectorDist.template getLastProp<RHO>() = RhoZero;
782 vectorDist.template getLastProp<RHO_PREV>() = RhoZero;
783 vectorDist.template getLastProp<VELOCITY>()[0] = 0.0;
784 vectorDist.template getLastProp<VELOCITY>()[1] = 0.0;
785 vectorDist.template getLastProp<VELOCITY>()[2] = 0.0;
787 vectorDist.template getLastProp<VELOCITY_PREV>()[0] = 0.0;
788 vectorDist.template getLastProp<VELOCITY_PREV>()[1] = 0.0;
789 vectorDist.template getLastProp<VELOCITY_PREV>()[2] = 0.0;
796 while (obstacle_box.isNext())
800 vectorDist.getLastPos()[0] = obstacle_box.get().get(0);
801 vectorDist.getLastPos()[1] = obstacle_box.get().get(1);
802 vectorDist.getLastPos()[2] = obstacle_box.get().get(2);
804 vectorDist.template getLastProp<TYPE>() = BOUNDARY;
805 vectorDist.template getLastProp<RHO>() = RhoZero;
806 vectorDist.template getLastProp<RHO_PREV>() = RhoZero;
807 vectorDist.template getLastProp<VELOCITY>()[0] = 0.0;
808 vectorDist.template getLastProp<VELOCITY>()[1] = 0.0;
809 vectorDist.template getLastProp<VELOCITY>()[2] = 0.0;
811 vectorDist.template getLastProp<VELOCITY_PREV>()[0] = 0.0;
812 vectorDist.template getLastProp<VELOCITY_PREV>()[1] = 0.0;
813 vectorDist.template getLastProp<VELOCITY_PREV>()[2] = 0.0;
823 vectorDist.addComputationCosts(md);
824 vectorDist.getDecomposition().decompose();
831 vectorDist.hostToDevicePos();
832 vectorDist.template hostToDeviceProp<TYPE,RHO,RHO_PREV,PRESSURE,VELOCITY,VELOCITY_PREV>();
834 vectorDist.ghost_get<TYPE,RHO,PRESSURE,VELOCITY>(RUN_ON_DEVICE);
836 auto cellList = vectorDist.getCellListGPU(2*H, CL_NON_SYMMETRIC, 2);
855 vectorDist.map(RUN_ON_DEVICE);
858 vectorDist.deviceToHostPos();
859 vectorDist.template deviceToHostProp<TYPE>();
863 vectorDist.addComputationCosts(md);
864 vectorDist.getDecomposition().decompose();
867 {std::cout <<
"REBALANCED " << it_reb << std::endl;}
870 vectorDist.map(RUN_ON_DEVICE);
875 real_number max_visc = 0.0;
877 vectorDist.ghost_get<TYPE,RHO,PRESSURE,VELOCITY>(RUN_ON_DEVICE);
880 calc_forces(vectorDist,cellList,max_visc,cnt);
887 real_number dt = calc_deltaT(vectorDist,max_visc);
892 verlet_int(vectorDist,dt);
895 euler_int(vectorDist,dt);
905 vectorDist.map(RUN_ON_DEVICE);
906 vectorDist.ghost_get<TYPE,RHO,PRESSURE,VELOCITY>(RUN_ON_DEVICE);
911 std::cout <<
"OUTPUT " << dt << std::endl;
915 vectorDist.deviceToHostPos();
916 vectorDist.deviceToHostProp<TYPE,RHO,RHO_PREV,PRESSURE,DRHO,FORCE,VELOCITY,VELOCITY_PREV,RED,RED2>();
918 vectorDist.write_frame(
"Geometry",write);
922 {std::cout <<
"TIME: " << t <<
" write " << it_time.
getwct() <<
" " << it_reb <<
" " << cnt <<
" Max visc: " << max_visc <<
" " << vectorDist.size_local() << std::endl;}
927 {std::cout <<
"TIME: " << t <<
" " << it_time.
getwct() <<
" " << it_reb <<
" " << cnt <<
" Max visc: " << max_visc <<
" " << vectorDist.size_local() << std::endl;}
932 std::cout <<
"Time to complete: " << tot_sim.
getwct() <<
" seconds" << std::endl;
939 int main(
int argc,
char* argv[])
This class represent an N-dimensional box.
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Class for FAST cell list implementation.
This class define the domain decomposition interface.
static PointIteratorSkin< dim, T, typename vd_type::Decomposition_type > DrawSkin(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub_A, Box< dim, T > &sub_B)
Draw particles in a box B excluding the area of a second box A (B - A)
static PointIterator< dim, T, typename vd_type::Decomposition_type > DrawBox(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub)
Draw particles in a box.
This class implement the point shape in an N-dimensional space.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
void execute()
Execute all the requests.
void sum(T &num)
Sum the numbers across all processors and get the result.
size_t getProcessUnitID()
Get the process unit id.
void max(T &num)
Get the maximum number across all processors (or reduction with infinity norm)
Implementation of VCluster class.
Sparse Matrix implementation stub object when OpenFPM is compiled with no linear algebra support.
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
auto getPos(vect_dist_key_dx vec_key) -> decltype(vPos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
Model for Dynamic load balancing.