OpenFPM  5.2.0
Project that contain the implementation of distributed structures
main2.cu
1 
60 #ifdef __NVCC__
61 
62 #include <math.h>
63 
64 #include "Vector/vector_dist.hpp"
65 #include "Draw/DrawParticles.hpp"
66 #include "OdeIntegrators/OdeIntegrators.hpp"
67 #include "Operators/Vector/vector_dist_operators.hpp"
68 
69 typedef float real_number;
70 
71 // A constant to indicate boundary particles
72 #define BOUNDARY 0
73 
74 // A constant to indicate fluid particles
75 #define FLUID 1
76 
77 // initial spacing between particles dp in the formulas
78 const real_number dp = 0.0085;
79 // Maximum height of the fluid water
80 // is going to be calculated and filled later on
81 real_number h_swl = 0.0;
82 
83 // c_s in the formulas (constant used to calculate the sound speed)
84 const real_number coeff_sound = 20.0;
85 
86 // gamma in the formulas
87 const real_number gamma_ = 7.0;
88 
89 // sqrt(3.0*dp*dp) support of the kernel
90 const real_number H = 0.0147224318643;
91 
92 // Eta in the formulas
93 const real_number Eta2 = 0.01 * H*H;
94 
95 // alpha in the formula
96 const real_number visco = 0.1;
97 
98 // cbar in the formula (calculated later)
99 real_number cbar = 0.0;
100 
101 // Mass of the fluid particles
102 const real_number MassFluid = 0.000614125;
103 
104 // Mass of the boundary particles
105 const real_number MassBound = 0.000614125;
106 
107 // End simulation time
108 #ifdef TEST_RUN
109 const real_number t_end = 0.001;
110 #else
111 const real_number t_end = 1.5;
112 #endif
113 
114 // Gravity acceleration
115 const real_number gravity = 9.81;
116 
117 // Reference densitu 1000Kg/m^3
118 const real_number RhoZero = 1000.0;
119 
120 // Filled later require h_swl, it is b in the formulas
121 real_number B = 0.0;
122 
123 // Constant used to define time integration
124 const real_number CFLnumber = 0.2;
125 
126 // Minimum T
127 const real_number DtMin = 0.00001;
128 
129 // Minimum Rho allowed
130 const real_number RhoMin = 700.0;
131 
132 // Maximum Rho allowed
133 const real_number RhoMax = 1300.0;
134 
135 // Filled in initialization
136 real_number max_fluid_height = 0.0;
137 
138 // Properties
139 
140 // FLUID or BOUNDARY
141 const size_t TYPE = 0;
142 
143 // Density
144 const int RHO = 1;
145 
146 // Density at step n-1
147 const int RHO_PREV = 2;
148 
149 // Pressure
150 const int PRESSURE = 3;
151 
152 // Delta rho calculated in the force calculation
153 const int DRHO = 4;
154 
155 // calculated force
156 const int FORCE = 5;
157 
158 // velocity
159 const int VELOCITY = 6;
160 
161 // velocity at previous step
162 const int VELOCITY_PREV = 7;
163 
164 // temporal variable to store velocity
165 const int VELOCITY_TMP = 11;
166 
167 // temporal variable to store density
168 const int RHO_TMP = 10;
169 
170 const int RED = 8;
171 
172 const int RED2 = 9;
173 
174 // Type of the vector containing particles
176 // | | | | | | | | | | | |
177 // | | | | | | | | | | | |
178 // type density density Pressure delta force velocity velocity reduction another temp density temp velocity
179 // at n-1 density at n - 1 buffer reduction buffer
180 
181 // global variable dt to be accessible in RHSFunctor
182 double dt;
183 
184 // global odeint iteration variable to be accessible in RHSFunctor
185 size_t odeintIteration;
186 
192 struct sph_state_type_ofp_ker{
193  sph_state_type_ofp_ker(){
194  }
195  typedef decltype(std::declval<vector_dist_expression<0,openfpm::vector_gpu<aggregate<float>>>>().getVector().toKernel()) scalar_kernel;
196  typedef decltype(std::declval<vector_dist_expression<0,openfpm::vector_gpu<aggregate<VectorS<3, float>>>>>().getVector().toKernel()) vector_kernel;
197  typedef size_t size_type;
198  typedef int is_state_vector;
199  aggregate<scalar_kernel, vector_kernel, vector_kernel> data;
200 
201  __host__ __device__ size_t size() const
202  { return data.get<0>().size(); }
203 };
204 
205 
211 struct sph_state_type_ofp_gpu{
212  sph_state_type_ofp_gpu(){
213  }
214  typedef size_t size_type;
215  typedef int is_state_vector;
216  aggregate<
220  > data;
221 
222  size_t size() const
223  { return data.get<0>().size(); }
224 
225  void resize(size_t n)
226  {
227  data.get<0>().resize(n);
228  data.get<1>().resize(n);
229  data.get<2>().resize(n);
230  }
231  sph_state_type_ofp_ker toKernel() const
232  {
233  sph_state_type_ofp_ker s2_ker;
234  s2_ker.data.get<0>()=data.get<0>().getVector().toKernel();
235  s2_ker.data.get<1>()=data.get<1>().getVector().toKernel();
236  s2_ker.data.get<2>()=data.get<2>().getVector().toKernel();
237  return s2_ker;
238  }
239 };
240 
241 
242 template<> struct has_vector_kernel< sph_state_type_ofp_gpu >
243  : std::true_type { };
244 
245 
268 template<typename vector_dist_type, typename CellList_type>
269 struct RHSFunctor
270 {
271  CellList_type &cellList;
272  vector_dist_type &vectorDist;
273 
274  RHSFunctor(vector_dist_type &vectorDist, CellList_type &cellList) :
275  cellList(cellList),
276  vectorDist(vectorDist)
277  {}
278 
279  void operator()( sph_state_type_ofp_gpu &X , sph_state_type_ofp_gpu &dxdt , const double t)
280  {
281  double dt05 = dt*0.5;
282  double dt2 = dt*2.0;
283 
284  auto posExpression = getV<POS_PROP, comp_dev>(vectorDist);
285  auto forceExpression = getV<FORCE, comp_dev>(vectorDist);
286  auto drhoExpression = getV<DRHO, comp_dev>(vectorDist);
287  auto typeExpression = getV<TYPE, comp_dev>(vectorDist);
288 
289  auto rhoExpression = getV<RHO, comp_dev>(vectorDist);
290  auto rho_prevExpression = getV<RHO_PREV, comp_dev>(vectorDist);
291 
292  auto velocityExpression = getV<VELOCITY, comp_dev>(vectorDist);
293  auto velocity_prevExpression = getV<VELOCITY_PREV, comp_dev>(vectorDist);
294 
295  if (odeintIteration < 40) {
296  X.data.get<0>() = rho_prevExpression;
297  X.data.get<2>() = velocity_prevExpression;
298 
299  dxdt.data.get<0>() = 2*drhoExpression;
300  dxdt.data.get<1>() = velocityExpression + forceExpression*dt05 * typeExpression;
301  dxdt.data.get<2>() = forceExpression*2 * typeExpression;
302  }
303 
304  else
305  {
306  dxdt.data.get<0>() = drhoExpression;
307  dxdt.data.get<1>() = velocityExpression + forceExpression*dt05 * typeExpression;
308  dxdt.data.get<2>() = forceExpression * typeExpression;
309  }
310  }
311 };
313 
314 
315 struct ModelCustom
316 {
317  template<typename Decomposition, typename vector>
318  inline void addComputation(
319  Decomposition & dec,
320  vector & vectorDist,
321  size_t v,
322  size_t p)
323  {
324  if (vectorDist.template getProp<TYPE>(p) == FLUID)
325  dec.addComputationCost(v,4);
326  else
327  dec.addComputationCost(v,3);
328  }
329 
330  template<typename Decomposition> inline void applyModel(Decomposition & dec, size_t v)
331  {
332  dec.setSubSubDomainComputationCost(v, dec.getSubSubDomainComputationCost(v) * dec.getSubSubDomainComputationCost(v));
333  }
334 
335  real_number distributionTol()
336  {
337  return 1.01;
338  }
339 };
340 
341 template<typename vd_type>
342 __global__ void EqState_gpu(vd_type vectorDist, real_number B)
343 {
344  auto a = GET_PARTICLE(vectorDist);
345 
346  real_number rho_a = vectorDist.template getProp<RHO>(a);
347  real_number rho_frac = rho_a / RhoZero;
348 
349  vectorDist.template getProp<PRESSURE>(a) = B*( rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac - 1.0);
350 }
351 
352 inline void EqState(particles & vectorDist)
353 {
354  auto it = vectorDist.getDomainIteratorGPU();
355 
356  CUDA_LAUNCH(EqState_gpu,it,vectorDist.toKernel(),B);
357 }
358 
359 
360 const real_number a2 = 1.0/M_PI/H/H/H;
361 
362 inline __device__ __host__ real_number Wab(real_number r)
363 {
364  r /= H;
365 
366  if (r < 1.0)
367  return (1.0 - 3.0/2.0*r*r + 3.0/4.0*r*r*r)*a2;
368  else if (r < 2.0)
369  return (1.0/4.0*(2.0 - r)*(2.0 - r)*(2.0 - r))*a2;
370  else
371  return 0.0;
372 }
373 
374 
375 const real_number c1 = -3.0/M_PI/H/H/H/H;
376 const real_number d1 = 9.0/4.0/M_PI/H/H/H/H;
377 const real_number c2 = -3.0/4.0/M_PI/H/H/H/H;
378 const real_number a2_4 = 0.25*a2;
379 // Filled later
380 real_number W_dap = 0.0;
381 
382 inline __device__ __host__ void DWab(Point<3,real_number> & dx, Point<3,real_number> & DW, real_number r)
383 {
384  const real_number qq=r/H;
385 
386  real_number qq2 = qq * qq;
387  real_number fac1 = (c1*qq + d1*qq2)/r;
388  real_number b1 = (qq < 1.0f)?1.0f:0.0f;
389 
390  real_number wqq = (2.0f - qq);
391  real_number fac2 = c2 * wqq * wqq / r;
392  real_number b2 = (qq >= 1.0f && qq < 2.0f)?1.0f:0.0f;
393 
394  real_number factor = (b1*fac1 + b2*fac2);
395 
396  DW.get(0) = factor * dx.get(0);
397  DW.get(1) = factor * dx.get(1);
398  DW.get(2) = factor * dx.get(2);
399 }
400 
401 // Tensile correction
402 inline __device__ __host__ real_number Tensile(real_number r, real_number rhoa, real_number rhob, real_number prs1, real_number prs2, real_number W_dap)
403 {
404  const real_number qq=r/H;
405  //-Cubic Spline kernel
406  real_number wab;
407  if(r>H)
408  {
409  real_number wqq1=2.0f-qq;
410  real_number wqq2=wqq1*wqq1;
411 
412  wab=a2_4*(wqq2*wqq1);
413  }
414  else
415  {
416  real_number wqq2=qq*qq;
417  real_number wqq3=wqq2*qq;
418 
419  wab=a2*(1.0f-1.5f*wqq2+0.75f*wqq3);
420  }
421 
422  //-Tensile correction.
423  real_number fab=wab*W_dap;
424  fab*=fab; fab*=fab; //fab=fab^4
425  const real_number tensilp1=(prs1/(rhoa*rhoa))*(prs1>0.0f? 0.01f: -0.2f);
426  const real_number tensilp2=(prs2/(rhob*rhob))*(prs2>0.0f? 0.01f: -0.2f);
427 
428  return (fab*(tensilp1+tensilp2));
429 }
430 
431 
432 inline __device__ __host__ real_number Pi(const Point<3,real_number> & dr, real_number rr2, Point<3,real_number> & dv, real_number rhoa, real_number rhob, real_number massb, real_number cbar, real_number & visc)
433 {
434  const real_number dot = dr.get(0)*dv.get(0) + dr.get(1)*dv.get(1) + dr.get(2)*dv.get(2);
435  const real_number dot_rr2 = dot/(rr2+Eta2);
436  visc=(dot_rr2 < visc)?visc:dot_rr2;
437 
438  if(dot < 0)
439  {
440  const float amubar=H*dot_rr2;
441  const float robar=(rhoa+rhob)*0.5f;
442  const float pi_visc=(-visco*cbar*amubar/robar);
443 
444  return pi_visc;
445  }
446  else
447  return 0.0f;
448 }
449 
450 template<typename particles_type, typename CellList_type>
451 __global__ void calc_forces_gpu(particles_type vectorDist, CellList_type cellList, real_number W_dap, real_number cbar)
452 {
453  auto a = GET_PARTICLE(vectorDist);
454 
455  real_number max_visc = 0.0f;
456 
457  // Get the position xp of the particle
458  Point<3,real_number> xa = vectorDist.getPos(a);
459 
460  // Type of the particle
461  unsigned int typea = vectorDist.template getProp<TYPE>(a);
462 
463  // Get the density of the of the particle a
464  real_number rhoa = vectorDist.template getProp<RHO>(a);
465 
466  // Get the pressure of the particle a
467  real_number Pa = vectorDist.template getProp<PRESSURE>(a);
468 
469  // Get the Velocity of the particle a
470  Point<3,real_number> va = vectorDist.template getProp<VELOCITY>(a);
471 
472  // Reset the force counter (- gravity on zeta direction)
473  Point<3,real_number> force_;
474  force_.get(0) = 0.0f;
475  force_.get(1) = 0.0f;
476  force_.get(2) = -gravity;
477  real_number drho_ = 0.0f;
478 
479  // Get an iterator over the neighborhood particles of p
480  auto Np = cellList.getNNIteratorBox(cellList.getCell(xa));
481 
482  // For each neighborhood particle
483  while (Np.isNext() == true)
484  {
485  // ... q
486  auto b = Np.get();
487 
488  // Get the position xp of the particle
489  Point<3,real_number> xb = vectorDist.getPos(b);
490 
491  if (a == b) {++Np; continue;};
492 
493  unsigned int typeb = vectorDist.template getProp<TYPE>(b);
494 
495  real_number massb = (typeb == FLUID)?MassFluid:MassBound;
496  Point<3,real_number> vb = vectorDist.template getProp<VELOCITY>(b);
497  real_number Pb = vectorDist.template getProp<PRESSURE>(b);
498  real_number rhob = vectorDist.template getProp<RHO>(b);
499 
500  // Get the distance between p and q
501  Point<3,real_number> dr = xa - xb;
502  // take the norm of this vector
503  real_number r2 = norm2(dr);
504 
505  // if they interact
506  if (r2 < 4.0*H*H && r2 >= 1e-16)
507  {
508  real_number r = sqrt(r2);
509 
510  Point<3,real_number> v_rel = va - vb;
511 
513  DWab(dr,DW,r);
514 
515  real_number factor = - massb*((Pa + Pb) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb,W_dap) + Pi(dr,r2,v_rel,rhoa,rhob,massb,cbar,max_visc));
516 
517  // Bound - Bound does not produce any change
518  // factor = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:factor;
519  factor = (typea != FLUID)?0.0f:factor;
520 
521  force_.get(0) += factor * DW.get(0);
522  force_.get(1) += factor * DW.get(1);
523  force_.get(2) += factor * DW.get(2);
524 
525  real_number scal = massb*(v_rel.get(0)*DW.get(0)+v_rel.get(1)*DW.get(1)+v_rel.get(2)*DW.get(2));
526  scal = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:scal;
527 
528  drho_ += scal;
529  }
530 
531  ++Np;
532  }
533 
534  vectorDist.template getProp<RED>(a) = max_visc;
535 
536  vectorDist.template getProp<FORCE>(a)[0] = force_.get(0);
537  vectorDist.template getProp<FORCE>(a)[1] = force_.get(1);
538  vectorDist.template getProp<FORCE>(a)[2] = force_.get(2);
539  vectorDist.template getProp<DRHO>(a) = drho_;
540 }
541 
542 template<typename CellList> inline void calc_forces(particles & vectorDist, CellList & cellList, real_number & max_visc, size_t cnt)
543 {
544  auto part = vectorDist.getDomainIteratorGPU(32);
545 
546  // Update the cell-list
547  vectorDist.updateCellListGPU(cellList);
548 
549  CUDA_LAUNCH(calc_forces_gpu,part,vectorDist.toKernel(),cellList.toKernel(),W_dap,cbar);
550 
551  max_visc = reduce_local<RED,_max_>(vectorDist);
552 }
553 
554 template<typename vector_type>
555 __global__ void max_acceleration_and_velocity_gpu(vector_type vectorDist)
556 {
557  auto a = GET_PARTICLE(vectorDist);
558 
559  Point<3,real_number> acc(vectorDist.template getProp<FORCE>(a));
560  vectorDist.template getProp<RED>(a) = norm(acc);
561 
562  Point<3,real_number> vel(vectorDist.template getProp<VELOCITY>(a));
563  vectorDist.template getProp<RED2>(a) = norm(vel);
564 }
565 
566 template<typename vector_type>
567 __global__ void checkGPU(vector_type vector)
568 {
569  int i = threadIdx.x;
570  printf("Check GPU %d %p %f\n", i, &vector.get<0>(i), vector.get<0>(i));
571 }
572 
573 
574 
575 void max_acceleration_and_velocity(particles & vectorDist, real_number & max_acc, real_number & max_vel)
576 {
577  // Calculate the maximum acceleration
578  auto part = vectorDist.getDomainIteratorGPU();
579 
580  CUDA_LAUNCH(max_acceleration_and_velocity_gpu,part,vectorDist.toKernel());
581 
582  max_acc = reduce_local<RED,_max_>(vectorDist);
583  max_vel = reduce_local<RED2,_max_>(vectorDist);
584 
585  Vcluster<> & v_cl = create_vcluster();
586  v_cl.max(max_acc);
587  v_cl.max(max_vel);
588  v_cl.execute();
589 }
590 
591 
592 real_number calc_deltaT(particles & vectorDist, real_number ViscDtMax)
593 {
594  real_number Maxacc = 0.0;
595  real_number Maxvel = 0.0;
596  max_acceleration_and_velocity(vectorDist,Maxacc,Maxvel);
597 
598  //-dt1 depends on force per unit mass.
599  const real_number dt_f = (Maxacc)?sqrt(H/Maxacc):std::numeric_limits<float>::max();
600 
601  //-dt2 combines the Courant and the viscous time-step controls.
602  const real_number dt_cv = H/(std::max(cbar,Maxvel*10.f) + H*ViscDtMax);
603 
604  //-dt new value of time step.
605  real_number dt=real_number(CFLnumber)*std::min(dt_f,dt_cv);
606  if(dt<real_number(DtMin))
607  {dt=real_number(DtMin);}
608 
609  return dt;
610 }
611 
613 template<typename vector_dist_type>
614 __global__ void checkPosPrpLimits_ker(vector_dist_type vectorDist)
615 {
616  auto p = GET_PARTICLE(vectorDist);
617 
618  // if the particle type is boundary
619  if (vectorDist.template getProp<TYPE>(p) == BOUNDARY)
620  {
621  real_number rho = vectorDist.template getProp<RHO>(p);
622  if (rho < RhoZero)
623  vectorDist.template getProp<RHO>(p) = RhoZero;
624 
625  return;
626  }
627 
628  // Check if the particle go out of range in space and in density, if they do mark them to remove it later
629  if (vectorDist.getPos(p)[0] < 0.0 || vectorDist.getPos(p)[1] < 0.0 || vectorDist.getPos(p)[2] < 0.0 ||
630  vectorDist.getPos(p)[0] > 1.61 || vectorDist.getPos(p)[1] > 0.68 || vectorDist.getPos(p)[2] > 0.50 ||
631  vectorDist.template getProp<RHO>(p) < RhoMin || vectorDist.template getProp<RHO>(p) > RhoMax)
632  {vectorDist.template getProp<RED>(p) = 1;}
633 }
635 
636 size_t cnt = 0;
637 
638 void checkPosPrpLimits(particles & vectorDist)
639 {
640  auto redExpression = getV<RED, comp_dev>(vectorDist);
641  redExpression = 0;
642 
643  // particle iterator
644  auto part = vectorDist.getDomainIteratorGPU();
645  CUDA_LAUNCH(checkPosPrpLimits_ker,part,vectorDist.toKernel());
646 
647  // remove the particles marked
649  remove_marked<RED>(vectorDist);
651  // increment the iteration counter
652  cnt++;
653 }
654 
655 
656 template<typename vector_type, typename CellList_type>
657 __global__ void sensor_pressure_gpu(vector_type vectorDist, CellList_type cellList, Point<3,real_number> probe, real_number * press_tmp)
658 {
659  real_number tot_ker = 0.0;
660 
661  // Get the position of the probe i
662  Point<3,real_number> xp = probe;
663 
664  // get the iterator over the neighbohood particles of the probes position
665  auto itg = cellList.getNNIteratorBox(cellList.getCell(xp));
666  while (itg.isNext())
667  {
668  auto q = itg.get();
669 
670  // Only the fluid particles are importants
671  if (vectorDist.template getProp<TYPE>(q) != FLUID)
672  {
673  ++itg;
674  continue;
675  }
676 
677  // Get the position of the neighborhood particle q
678  Point<3,real_number> xq = vectorDist.getPos(q);
679 
680  // Calculate the contribution of the particle to the pressure
681  // of the probe
682  real_number r = sqrt(norm2(xp - xq));
683 
684  real_number ker = Wab(r) * (MassFluid / RhoZero);
685 
686  // Also keep track of the calculation of the summed
687  // kernel
688  tot_ker += ker;
689 
690  // Add the total pressure contribution
691  *press_tmp += vectorDist.template getProp<PRESSURE>(q) * ker;
692 
693  // next neighborhood particle
694  ++itg;
695  }
696 
697  // We calculate the pressure normalizing the
698  // sum over all kernels
699  if (tot_ker == 0.0)
700  {*press_tmp = 0.0;}
701  else
702  {*press_tmp = 1.0 / tot_ker * *press_tmp;}
703 }
704 
705 template<typename Vector, typename CellList>
706 inline void sensor_pressure(Vector & vectorDist,
707  CellList & cellList,
710 {
711  Vcluster<> & v_cl = create_vcluster();
712 
713  press_t.add();
714 
715  for (size_t i = 0 ; i < probes.size() ; i++)
716  {
717  // A float variable to calculate the pressure of the problem
718  CudaMemory press_tmp_(sizeof(real_number));
719  real_number press_tmp;
720 
721  // if the probe is inside the processor domain
722  if (vectorDist.getDecomposition().isLocal(probes.get(i)) == true)
723  {
724  vectorDist.updateCellListGPU(cellList);
725 
726  Point<3,real_number> probe = probes.get(i);
727  CUDA_LAUNCH_DIM3(sensor_pressure_gpu,1,1,vectorDist.toKernel(),cellList.toKernel(),probe,(real_number *)press_tmp_.toKernel());
728 
729  // move calculated pressure on
730  press_tmp_.deviceToHost();
731  press_tmp = *(real_number *)press_tmp_.getPointer();
732  }
733 
734  // This is not necessary in principle, but if you
735  // want to make all processor aware of the history of the calculated
736  // pressure we have to execute this
737  v_cl.sum(press_tmp);
738  v_cl.execute();
739 
740  // We add the calculated pressure into the history
741  press_t.last().add(press_tmp);
742  }
743 }
744 
745 int main(int argc, char* argv[])
746 {
747  // initialize the library
748  openfpm_init(&argc,&argv);
749 
750  // It contain for each time-step the value detected by the probes
753 
754  probes.add({0.8779f,0.3f,0.02f});
755  probes.add({0.754f,0.31f,0.02f});
756 
757  // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
758  Box<3,real_number> domain({-0.05f,-0.05f,-0.05f},{1.7010f,0.7065f,0.511f});
759  size_t sz[3] = {207,90,66};
760 
761  // Fill W_dap
762  W_dap = 1.0/Wab(H/1.5);
763 
764  // Here we define the boundary conditions of our problem
765  size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
766 
767  // extended boundary around the domain, and the processor domain
768  Ghost<3,real_number> g(2*H);
769 
770  particles vectorDist(0,domain,bc,g,DEC_GRAN(512));
771 
773 
774  // You can ignore all these dp/2.0 is a trick to reach the same initialization
775  // of Dual-SPH that use a different criteria to draw particles
776  Box<3,real_number> fluid_box({dp/2.0f,dp/2.0f,dp/2.0f},{0.4f+dp/2.0f,0.67f-dp/2.0f,0.3f+dp/2.0f});
777 
778  // return an iterator to the fluid particles to add to vectorDist
779  auto fluid_it = DrawParticles::DrawBox(vectorDist,sz,domain,fluid_box);
780 
781  // here we fill some of the constants needed by the simulation
782  max_fluid_height = fluid_it.getBoxMargins().getHigh(2);
783  h_swl = fluid_it.getBoxMargins().getHigh(2) - fluid_it.getBoxMargins().getLow(2);
784  B = (coeff_sound)*(coeff_sound)*gravity*h_swl*RhoZero / gamma_;
785  cbar = coeff_sound * sqrt(gravity * h_swl);
786 
787  // for each particle inside the fluid box ...
788  while (fluid_it.isNext())
789  {
790  // ... add a particle ...
791  vectorDist.add();
792 
793  // ... and set it position ...
794  vectorDist.getLastPos()[0] = fluid_it.get().get(0);
795  vectorDist.getLastPos()[1] = fluid_it.get().get(1);
796  vectorDist.getLastPos()[2] = fluid_it.get().get(2);
797 
798  // and its type.
799  vectorDist.template getLastProp<TYPE>() = FLUID;
800 
801  // We also initialize the density of the particle and the hydro-static pressure given by
802  //
803  // RhoZero*g*h = P
804  //
805  // rho_p = (P/B + 1)^(1/Gamma) * RhoZero
806  //
807 
808  vectorDist.template getLastProp<PRESSURE>() = RhoZero * gravity * (max_fluid_height - fluid_it.get().get(2));
809 
810  vectorDist.template getLastProp<RHO>() = pow(vectorDist.template getLastProp<PRESSURE>() / B + 1, 1.0/gamma_) * RhoZero;
811  vectorDist.template getLastProp<RHO_PREV>() = vectorDist.template getLastProp<RHO>();
812  vectorDist.template getLastProp<VELOCITY>()[0] = 0.0;
813  vectorDist.template getLastProp<VELOCITY>()[1] = 0.0;
814  vectorDist.template getLastProp<VELOCITY>()[2] = 0.0;
815 
816  vectorDist.template getLastProp<VELOCITY_PREV>()[0] = 0.0;
817  vectorDist.template getLastProp<VELOCITY_PREV>()[1] = 0.0;
818  vectorDist.template getLastProp<VELOCITY_PREV>()[2] = 0.0;
819 
820  // next fluid particle
821  ++fluid_it;
822  }
823 
824  // Recipient
825  Box<3,real_number> recipient1({0.0f,0.0f,0.0f},{1.6f+dp/2.0f,0.67f+dp/2.0f,0.4f+dp/2.0f});
826  Box<3,real_number> recipient2({dp,dp,dp},{1.6f-dp/2.0f,0.67f-dp/2.0f,0.4f+dp/2.0f});
827 
828  Box<3,real_number> obstacle1({0.9f,0.24f-dp/2.0f,0.0f},{1.02f+dp/2.0f,0.36f,0.45f+dp/2.0f});
829  Box<3,real_number> obstacle2({0.9f+dp,0.24f+dp/2.0f,0.0f},{1.02f-dp/2.0f,0.36f-dp,0.45f-dp/2.0f});
830  Box<3,real_number> obstacle3({0.9f+dp,0.24f,0.0f},{1.02f,0.36f,0.45f});
831 
833  holes.add(recipient2);
834  holes.add(obstacle1);
835  auto bound_box = DrawParticles::DrawSkin(vectorDist,sz,domain,holes,recipient1);
836 
837  while (bound_box.isNext())
838  {
839  vectorDist.add();
840 
841  vectorDist.getLastPos()[0] = bound_box.get().get(0);
842  vectorDist.getLastPos()[1] = bound_box.get().get(1);
843  vectorDist.getLastPos()[2] = bound_box.get().get(2);
844 
845  vectorDist.template getLastProp<TYPE>() = BOUNDARY;
846  vectorDist.template getLastProp<RHO>() = RhoZero;
847  vectorDist.template getLastProp<RHO_PREV>() = RhoZero;
848  vectorDist.template getLastProp<VELOCITY>()[0] = 0.0;
849  vectorDist.template getLastProp<VELOCITY>()[1] = 0.0;
850  vectorDist.template getLastProp<VELOCITY>()[2] = 0.0;
851 
852  vectorDist.template getLastProp<VELOCITY_PREV>()[0] = 0.0;
853  vectorDist.template getLastProp<VELOCITY_PREV>()[1] = 0.0;
854  vectorDist.template getLastProp<VELOCITY_PREV>()[2] = 0.0;
855 
856  ++bound_box;
857  }
858 
859  auto obstacle_box = DrawParticles::DrawSkin(vectorDist,sz,domain,obstacle2,obstacle1);
860 
861  while (obstacle_box.isNext())
862  {
863  vectorDist.add();
864 
865  vectorDist.getLastPos()[0] = obstacle_box.get().get(0);
866  vectorDist.getLastPos()[1] = obstacle_box.get().get(1);
867  vectorDist.getLastPos()[2] = obstacle_box.get().get(2);
868 
869  vectorDist.template getLastProp<TYPE>() = BOUNDARY;
870  vectorDist.template getLastProp<RHO>() = RhoZero;
871  vectorDist.template getLastProp<RHO_PREV>() = RhoZero;
872  vectorDist.template getLastProp<VELOCITY>()[0] = 0.0;
873  vectorDist.template getLastProp<VELOCITY>()[1] = 0.0;
874  vectorDist.template getLastProp<VELOCITY>()[2] = 0.0;
875 
876  vectorDist.template getLastProp<VELOCITY_PREV>()[0] = 0.0;
877  vectorDist.template getLastProp<VELOCITY_PREV>()[1] = 0.0;
878  vectorDist.template getLastProp<VELOCITY_PREV>()[2] = 0.0;
879 
880  ++obstacle_box;
881  }
882 
883  vectorDist.map();
884 
885  // Now that we fill the vector with particles
886  ModelCustom md;
887 
888  vectorDist.addComputationCosts(md);
889  vectorDist.getDecomposition().decompose();
890  vectorDist.map();
891 
893 
894  // Ok the initialization is done on CPU on GPU we are doing the main loop, so first we offload all properties on GPU
895 
896  vectorDist.hostToDevicePos();
897  vectorDist.template hostToDeviceProp<TYPE,RHO,RHO_PREV,PRESSURE,VELOCITY,VELOCITY_PREV>();
898 
899  vectorDist.ghost_get<TYPE,RHO,PRESSURE,VELOCITY>(RUN_ON_DEVICE);
900 
901  auto cellList = vectorDist.getCellListGPU(2*H, CL_NON_SYMMETRIC, 2);
902 
903  // Odeint time stepper with GPU backend
904  boost::numeric::odeint::euler<sph_state_type_ofp_gpu, double, sph_state_type_ofp_gpu, double, boost::numeric::odeint::vector_space_algebra_ofp_gpu,boost::numeric::odeint::ofp_operations> eulerOdeint;
905 
906  // RHS functor for odeint
907  RHSFunctor<decltype(vectorDist), decltype(cellList)> rhsFunctor(vectorDist, cellList);
908 
909  //Declare Odeint state object
910  sph_state_type_ofp_gpu X;
911 
912  auto posExpression = getV<POS_PROP, comp_dev>(vectorDist);
913  auto forceExpression = getV<FORCE, comp_dev>(vectorDist);
914  auto drhoExpression = getV<DRHO, comp_dev>(vectorDist);
915  auto typeExpression = getV<TYPE, comp_dev>(vectorDist);
916 
917  auto rhoExpression = getV<RHO, comp_dev>(vectorDist);
918  auto rho_tmpExpression = getV<RHO_TMP, comp_dev>(vectorDist);
919  auto rho_prevExpression = getV<RHO_PREV, comp_dev>(vectorDist);
920 
921  auto velocityExpression = getV<VELOCITY, comp_dev>(vectorDist);
922  auto velocity_tmpExpression = getV<VELOCITY_TMP, comp_dev>(vectorDist);
923  auto velocity_prevExpression = getV<VELOCITY_PREV, comp_dev>(vectorDist);
924 
925  //Fill Odeint state object with the initial condition
926  X.data.get<0>() = rhoExpression;
927  X.data.get<1>() = posExpression;
928  X.data.get<2>() = velocityExpression;
929 
930  // global variable
931  odeintIteration = 0;
932 
933  timer tot_sim;
934  tot_sim.start();
935 
936  size_t write = 0;
937  size_t it = 0;
938  size_t it_reb = 0;
939  real_number t = 0.0;
940  while (t <= t_end)
941  {
942  Vcluster<> & v_cl = create_vcluster();
943  timer it_time;
944  it_time.start();
945 
947  it_reb++;
948  if (it_reb == 300)
949  {
950  vectorDist.map(RUN_ON_DEVICE);
951 
952  // Rebalancer for now work on CPU , so move to CPU
953  vectorDist.deviceToHostPos();
954  vectorDist.template deviceToHostProp<TYPE>();
955 
956  it_reb = 0;
957  ModelCustom md;
958  vectorDist.addComputationCosts(md);
959  vectorDist.getDecomposition().decompose();
960 
961  if (v_cl.getProcessUnitID() == 0)
962  {std::cout << "REBALANCED " << it_reb << std::endl;}
963  }
964 
965  vectorDist.map(RUN_ON_DEVICE);
966 
967  // Calculate pressure from the density
968  EqState(vectorDist);
969 
970  real_number max_visc = 0.0;
971 
972  vectorDist.ghost_get<TYPE,RHO,PRESSURE,VELOCITY>(RUN_ON_DEVICE);
973 
974  // Calc forces
975  calc_forces(vectorDist,cellList,max_visc,cnt);
976 
977  // Get the maximum viscosity term across processors
978  v_cl.max(max_visc);
979  v_cl.execute();
980 
981  // Calculate delta t integration
982  // global variable
983  dt = calc_deltaT(vectorDist,max_visc);
984 
985  // global odeint iteration variable
986  odeintIteration++;
987 
988  // Reinitialize Odeint state vector as the particle
989  // distribution could have changed after map()
990  if (odeintIteration < 40) {
991  X.data.get<0>() = rho_prevExpression;
992  X.data.get<2>() = velocity_prevExpression;
993  } else {
994  X.data.get<0>() = rhoExpression;
995  X.data.get<2>() = velocityExpression;
996  }
997 
998  X.data.get<1>() = posExpression;
999 
1000  rho_tmpExpression = rhoExpression;
1001  velocity_tmpExpression = velocityExpression;
1002 
1003  eulerOdeint.do_step(rhsFunctor, X, t, dt);
1004 
1005  rhoExpression=X.data.get<0>();
1006  posExpression = X.data.get<1>();
1007  velocityExpression = X.data.get<2>();
1008 
1009  rho_prevExpression = rho_tmpExpression;
1010  velocity_prevExpression = velocity_tmpExpression;
1011 
1012  if (odeintIteration == 40) {
1013  // Once in 40 iterations
1014  // euler time stepping is done in RHSFunctor
1015  odeintIteration = 0;
1016  }
1017 
1018  checkPosPrpLimits(vectorDist);
1019 
1020  t += dt;
1021 
1022  if (write < t*100)
1023  {
1024  // Sensor pressure require update ghost, so we ensure that particles are distributed correctly
1025  // and ghost are updated
1026  vectorDist.map(RUN_ON_DEVICE);
1027  vectorDist.ghost_get<TYPE,RHO,PRESSURE,VELOCITY>(RUN_ON_DEVICE);
1028 
1029  // calculate the pressure at the sensor points
1030  //sensor_pressure(vectorDist,cellList,press_t,probes);
1031 
1032  std::cout << "OUTPUT " << dt << std::endl;
1033 
1034  // When we write we have move all the particles information back to CPU
1035 
1036  vectorDist.deviceToHostPos();
1037  vectorDist.deviceToHostProp<TYPE,RHO,RHO_PREV,PRESSURE,DRHO,FORCE,VELOCITY,VELOCITY_PREV,RED,RED2>();
1038 
1039  vectorDist.write_frame("Geometry",write);
1040  write++;
1041 
1042  if (v_cl.getProcessUnitID() == 0)
1043  {std::cout << "TIME: " << t << " write " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vectorDist.size_local() << std::endl;}
1044  }
1045  else
1046  {
1047  if (v_cl.getProcessUnitID() == 0)
1048  {std::cout << "TIME: " << t << " " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vectorDist.size_local() << std::endl;}
1049  }
1050  }
1051 
1052  tot_sim.stop();
1053  std::cout << "Time to complete: " << tot_sim.getwct() << " seconds" << std::endl;
1054 
1055  openfpm_finalize();
1056 }
1057 
1058 #else
1059 
1060 int main(int argc, char* argv[])
1061 {
1062  return 0;
1063 }
1064 
1065 #endif
This class represent an N-dimensional box.
Definition: Box.hpp:60
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
Definition: Box.hpp:555
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Definition: Box.hpp:566
Class for FAST cell list implementation.
Definition: CellList.hpp:558
This class define the domain decomposition interface.
static PointIteratorSkin< dim, T, typename vd_type::Decomposition_type > DrawSkin(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub_A, Box< dim, T > &sub_B)
Draw particles in a box B excluding the area of a second box A (B - A)
static PointIterator< dim, T, typename vd_type::Decomposition_type > DrawBox(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub)
Draw particles in a box.
Definition: Ghost.hpp:40
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:28
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
void execute()
Execute all the requests.
void sum(T &num)
Sum the numbers across all processors and get the result.
size_t getProcessUnitID()
Get the process unit id.
void max(T &num)
Get the maximum number across all processors (or reduction with infinity norm)
Implementation of VCluster class.
Definition: VCluster.hpp:59
Sparse Matrix implementation stub object when OpenFPM is compiled with no linear algebra support.
Definition: Vector.hpp:40
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:204
Class for cpu time benchmarking.
Definition: timer.hpp:28
void stop()
Stop the timer.
Definition: timer.hpp:119
void start()
Start the timer.
Definition: timer.hpp:90
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
Main class that encapsulate a vector properties operand to be used for expressions construction.
Distributed vector.
auto getPos(vect_dist_key_dx vec_key) -> decltype(vPos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
convert a type into constant type
Definition: aggregate.hpp:302
Model for Dynamic load balancing.
Definition: main.cpp:232
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:221
__device__ __host__ boost::mpl::at< type, boost::mpl::int_< i > >::type & get()
get the properties i
Definition: aggregate.hpp:249