OpenFPM  5.2.0
Project that contain the implementation of distributed structures
main.cu
1 
60 #ifdef __NVCC__
61 
62 #include <math.h>
63 
64 #include "Vector/vector_dist.hpp"
65 #include "Draw/DrawParticles.hpp"
66 #include "OdeIntegrators/OdeIntegrators.hpp"
67 #include "Operators/Vector/vector_dist_operators.hpp"
68 
69 typedef float real_number;
70 
71 // A constant to indicate boundary particles
72 #define BOUNDARY 0
73 
74 // A constant to indicate fluid particles
75 #define FLUID 1
76 
77 // initial spacing between particles dp in the formulas
78 const real_number dp = 0.0085;
79 // Maximum height of the fluid water
80 // is going to be calculated and filled later on
81 real_number h_swl = 0.0;
82 
83 // c_s in the formulas (constant used to calculate the sound speed)
84 const real_number coeff_sound = 20.0;
85 
86 // gamma in the formulas
87 const real_number gamma_ = 7.0;
88 
89 // sqrt(3.0*dp*dp) support of the kernel
90 const real_number H = 0.0147224318643;
91 
92 // Eta in the formulas
93 const real_number Eta2 = 0.01 * H*H;
94 
95 // alpha in the formula
96 const real_number visco = 0.1;
97 
98 // cbar in the formula (calculated later)
99 real_number cbar = 0.0;
100 
101 // Mass of the fluid particles
102 const real_number MassFluid = 0.000614125;
103 
104 // Mass of the boundary particles
105 const real_number MassBound = 0.000614125;
106 
107 // End simulation time
108 #ifdef TEST_RUN
109 const real_number t_end = 0.001;
110 #else
111 const real_number t_end = 1.5;
112 #endif
113 
114 // Gravity acceleration
115 const real_number gravity = 9.81;
116 
117 // Reference densitu 1000Kg/m^3
118 const real_number RhoZero = 1000.0;
119 
120 // Filled later require h_swl, it is b in the formulas
121 real_number B = 0.0;
122 
123 // Constant used to define time integration
124 const real_number CFLnumber = 0.2;
125 
126 // Minimum T
127 const real_number DtMin = 0.00001;
128 
129 // Minimum Rho allowed
130 const real_number RhoMin = 700.0;
131 
132 // Maximum Rho allowed
133 const real_number RhoMax = 1300.0;
134 
135 // Filled in initialization
136 real_number max_fluid_height = 0.0;
137 
138 // Properties
139 
140 // FLUID or BOUNDARY
141 const size_t TYPE = 0;
142 
143 // Density
144 const int RHO = 1;
145 
146 // Density at step n-1
147 const int RHO_PREV = 2;
148 
149 // Pressure
150 const int PRESSURE = 3;
151 
152 // Delta rho calculated in the force calculation
153 const int DRHO = 4;
154 
155 // calculated force
156 const int FORCE = 5;
157 
158 // velocity
159 const int VELOCITY = 6;
160 
161 // velocity at previous step
162 const int VELOCITY_PREV = 7;
163 
164 // temporal variable to store velocity
165 const int VELOCITY_TMP = 11;
166 
167 // temporal variable to store density
168 const int RHO_TMP = 10;
169 
170 const int RED = 8;
171 
172 const int RED2 = 9;
173 
174 // Type of the vector containing particles
176 // | | | | | | | | | | | |
177 // | | | | | | | | | | | |
178 // type density density Pressure delta force velocity velocity reduction another temp density temp velocity
179 // at n-1 density at n - 1 buffer reduction buffer
180 
181 
182 struct ModelCustom
183 {
184  template<typename Decomposition, typename vector>
185  inline void addComputation(
186  Decomposition & dec,
187  vector & vectorDist,
188  size_t v,
189  size_t p)
190  {
191  if (vectorDist.template getProp<TYPE>(p) == FLUID)
192  dec.addComputationCost(v,4);
193  else
194  dec.addComputationCost(v,3);
195  }
196 
197  template<typename Decomposition> inline void applyModel(Decomposition & dec, size_t v)
198  {
199  dec.setSubSubDomainComputationCost(v, dec.getSubSubDomainComputationCost(v) * dec.getSubSubDomainComputationCost(v));
200  }
201 
202  real_number distributionTol()
203  {
204  return 1.01;
205  }
206 };
207 
208 template<typename vd_type>
209 __global__ void EqState_gpu(vd_type vectorDist, real_number B)
210 {
211  auto a = GET_PARTICLE(vectorDist);
212 
213  real_number rho_a = vectorDist.template getProp<RHO>(a);
214  real_number rho_frac = rho_a / RhoZero;
215 
216  vectorDist.template getProp<PRESSURE>(a) = B*( rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac - 1.0);
217 }
218 
219 inline void EqState(particles & vectorDist)
220 {
221  auto it = vectorDist.getDomainIteratorGPU();
222 
223  CUDA_LAUNCH(EqState_gpu,it,vectorDist.toKernel(),B);
224 }
225 
226 
227 const real_number a2 = 1.0/M_PI/H/H/H;
228 
229 inline __device__ __host__ real_number Wab(real_number r)
230 {
231  r /= H;
232 
233  if (r < 1.0)
234  return (1.0 - 3.0/2.0*r*r + 3.0/4.0*r*r*r)*a2;
235  else if (r < 2.0)
236  return (1.0/4.0*(2.0 - r)*(2.0 - r)*(2.0 - r))*a2;
237  else
238  return 0.0;
239 }
240 
241 
242 const real_number c1 = -3.0/M_PI/H/H/H/H;
243 const real_number d1 = 9.0/4.0/M_PI/H/H/H/H;
244 const real_number c2 = -3.0/4.0/M_PI/H/H/H/H;
245 const real_number a2_4 = 0.25*a2;
246 // Filled later
247 real_number W_dap = 0.0;
248 
249 inline __device__ __host__ void DWab(Point<3,real_number> & dx, Point<3,real_number> & DW, real_number r)
250 {
251  const real_number qq=r/H;
252 
253  real_number qq2 = qq * qq;
254  real_number fac1 = (c1*qq + d1*qq2)/r;
255  real_number b1 = (qq < 1.0f)?1.0f:0.0f;
256 
257  real_number wqq = (2.0f - qq);
258  real_number fac2 = c2 * wqq * wqq / r;
259  real_number b2 = (qq >= 1.0f && qq < 2.0f)?1.0f:0.0f;
260 
261  real_number factor = (b1*fac1 + b2*fac2);
262 
263  DW.get(0) = factor * dx.get(0);
264  DW.get(1) = factor * dx.get(1);
265  DW.get(2) = factor * dx.get(2);
266 }
267 
268 // Tensile correction
269 inline __device__ __host__ real_number Tensile(real_number r, real_number rhoa, real_number rhob, real_number prs1, real_number prs2, real_number W_dap)
270 {
271  const real_number qq=r/H;
272  //-Cubic Spline kernel
273  real_number wab;
274  if(r>H)
275  {
276  real_number wqq1=2.0f-qq;
277  real_number wqq2=wqq1*wqq1;
278 
279  wab=a2_4*(wqq2*wqq1);
280  }
281  else
282  {
283  real_number wqq2=qq*qq;
284  real_number wqq3=wqq2*qq;
285 
286  wab=a2*(1.0f-1.5f*wqq2+0.75f*wqq3);
287  }
288 
289  //-Tensile correction.
290  real_number fab=wab*W_dap;
291  fab*=fab; fab*=fab; //fab=fab^4
292  const real_number tensilp1=(prs1/(rhoa*rhoa))*(prs1>0.0f? 0.01f: -0.2f);
293  const real_number tensilp2=(prs2/(rhob*rhob))*(prs2>0.0f? 0.01f: -0.2f);
294 
295  return (fab*(tensilp1+tensilp2));
296 }
297 
298 
299 inline __device__ __host__ real_number Pi(const Point<3,real_number> & dr, real_number rr2, Point<3,real_number> & dv, real_number rhoa, real_number rhob, real_number massb, real_number cbar, real_number & visc)
300 {
301  const real_number dot = dr.get(0)*dv.get(0) + dr.get(1)*dv.get(1) + dr.get(2)*dv.get(2);
302  const real_number dot_rr2 = dot/(rr2+Eta2);
303  visc=(dot_rr2 < visc)?visc:dot_rr2;
304 
305  if(dot < 0)
306  {
307  const float amubar=H*dot_rr2;
308  const float robar=(rhoa+rhob)*0.5f;
309  const float pi_visc=(-visco*cbar*amubar/robar);
310 
311  return pi_visc;
312  }
313  else
314  return 0.0f;
315 }
316 
317 template<typename particles_type, typename CellList_type>
318 __global__ void calc_forces_gpu(particles_type vectorDist, CellList_type cellList, real_number W_dap, real_number cbar)
319 {
320  auto a = GET_PARTICLE(vectorDist);
321 
322  real_number max_visc = 0.0f;
323 
324  // Get the position xp of the particle
325  Point<3,real_number> xa = vectorDist.getPos(a);
326 
327  // Type of the particle
328  unsigned int typea = vectorDist.template getProp<TYPE>(a);
329 
330  // Get the density of the of the particle a
331  real_number rhoa = vectorDist.template getProp<RHO>(a);
332 
333  // Get the pressure of the particle a
334  real_number Pa = vectorDist.template getProp<PRESSURE>(a);
335 
336  // Get the Velocity of the particle a
337  Point<3,real_number> va = vectorDist.template getProp<VELOCITY>(a);
338 
339  // Reset the force counter (- gravity on zeta direction)
340  Point<3,real_number> force_;
341  force_.get(0) = 0.0f;
342  force_.get(1) = 0.0f;
343  force_.get(2) = -gravity;
344  real_number drho_ = 0.0f;
345 
346  // Get an iterator over the neighborhood particles of p
347  auto Np = cellList.getNNIteratorBox(cellList.getCell(xa));
348 
349  // For each neighborhood particle
350  while (Np.isNext() == true)
351  {
352  // ... q
353  auto b = Np.get();
354 
355  // Get the position xp of the particle
356  Point<3,real_number> xb = vectorDist.getPos(b);
357 
358  if (a == b) {++Np; continue;};
359 
360  unsigned int typeb = vectorDist.template getProp<TYPE>(b);
361 
362  real_number massb = (typeb == FLUID)?MassFluid:MassBound;
363  Point<3,real_number> vb = vectorDist.template getProp<VELOCITY>(b);
364  real_number Pb = vectorDist.template getProp<PRESSURE>(b);
365  real_number rhob = vectorDist.template getProp<RHO>(b);
366 
367  // Get the distance between p and q
368  Point<3,real_number> dr = xa - xb;
369  // take the norm of this vector
370  real_number r2 = norm2(dr);
371 
372  // if they interact
373  if (r2 < 4.0*H*H && r2 >= 1e-16)
374  {
375  real_number r = sqrt(r2);
376 
377  Point<3,real_number> v_rel = va - vb;
378 
380  DWab(dr,DW,r);
381 
382  real_number factor = - massb*((Pa + Pb) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb,W_dap) + Pi(dr,r2,v_rel,rhoa,rhob,massb,cbar,max_visc));
383 
384  // Bound - Bound does not produce any change
385  // factor = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:factor;
386  factor = (typea != FLUID)?0.0f:factor;
387 
388  force_.get(0) += factor * DW.get(0);
389  force_.get(1) += factor * DW.get(1);
390  force_.get(2) += factor * DW.get(2);
391 
392  real_number scal = massb*(v_rel.get(0)*DW.get(0)+v_rel.get(1)*DW.get(1)+v_rel.get(2)*DW.get(2));
393  scal = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:scal;
394 
395  drho_ += scal;
396  }
397 
398  ++Np;
399  }
400 
401  vectorDist.template getProp<RED>(a) = max_visc;
402 
403  vectorDist.template getProp<FORCE>(a)[0] = force_.get(0);
404  vectorDist.template getProp<FORCE>(a)[1] = force_.get(1);
405  vectorDist.template getProp<FORCE>(a)[2] = force_.get(2);
406  vectorDist.template getProp<DRHO>(a) = drho_;
407 }
408 
409 template<typename CellList> inline void calc_forces(particles & vectorDist, CellList & cellList, real_number & max_visc, size_t cnt)
410 {
411  auto part = vectorDist.getDomainIteratorGPU(32);
412 
413  // Update the cell-list
414  vectorDist.updateCellListGPU(cellList);
415 
416  CUDA_LAUNCH(calc_forces_gpu,part,vectorDist.toKernel(),cellList.toKernel(),W_dap,cbar);
417 
418  max_visc = reduce_local<RED,_max_>(vectorDist);
419 }
420 
421 template<typename vector_type>
422 __global__ void max_acceleration_and_velocity_gpu(vector_type vectorDist)
423 {
424  auto a = GET_PARTICLE(vectorDist);
425 
426  Point<3,real_number> acc(vectorDist.template getProp<FORCE>(a));
427  vectorDist.template getProp<RED>(a) = norm(acc);
428 
429  Point<3,real_number> vel(vectorDist.template getProp<VELOCITY>(a));
430  vectorDist.template getProp<RED2>(a) = norm(vel);
431 }
432 
433 template<typename vector_type>
434 __global__ void checkGPU(vector_type vector)
435 {
436  int i = threadIdx.x;
437  printf("Check GPU %d %p %f\n", i, &vector.get<0>(i), vector.get<0>(i));
438 }
439 
440 
441 
442 void max_acceleration_and_velocity(particles & vectorDist, real_number & max_acc, real_number & max_vel)
443 {
444  // Calculate the maximum acceleration
445  auto part = vectorDist.getDomainIteratorGPU();
446 
447  CUDA_LAUNCH(max_acceleration_and_velocity_gpu,part,vectorDist.toKernel());
448 
449  max_acc = reduce_local<RED,_max_>(vectorDist);
450  max_vel = reduce_local<RED2,_max_>(vectorDist);
451 
452  Vcluster<> & v_cl = create_vcluster();
453  v_cl.max(max_acc);
454  v_cl.max(max_vel);
455  v_cl.execute();
456 }
457 
458 
459 real_number calc_deltaT(particles & vectorDist, real_number ViscDtMax)
460 {
461  real_number Maxacc = 0.0;
462  real_number Maxvel = 0.0;
463  max_acceleration_and_velocity(vectorDist,Maxacc,Maxvel);
464 
465  //-dt1 depends on force per unit mass.
466  const real_number dt_f = (Maxacc)?sqrt(H/Maxacc):std::numeric_limits<float>::max();
467 
468  //-dt2 combines the Courant and the viscous time-step controls.
469  const real_number dt_cv = H/(std::max(cbar,Maxvel*10.f) + H*ViscDtMax);
470 
471  //-dt new value of time step.
472  real_number dt=real_number(CFLnumber)*std::min(dt_f,dt_cv);
473  if(dt<real_number(DtMin))
474  {dt=real_number(DtMin);}
475 
476  return dt;
477 }
478 
480 template<typename vector_dist_type>
481 __global__ void checkPosPrpLimits(vector_dist_type vectorDist)
482 {
483  auto p = GET_PARTICLE(vectorDist);
484 
485  // if the particle type is boundary
486  if (vectorDist.template getProp<TYPE>(p) == BOUNDARY)
487  {
488  real_number rho = vectorDist.template getProp<RHO>(p);
489  if (rho < RhoZero)
490  vectorDist.template getProp<RHO>(p) = RhoZero;
491 
492  return;
493  }
494 
495  // Check if the particle go out of range in space and in density, if they do mark them to remove it later
496  if (vectorDist.getPos(p)[0] < 0.0 || vectorDist.getPos(p)[1] < 0.0 || vectorDist.getPos(p)[2] < 0.0 ||
497  vectorDist.getPos(p)[0] > 1.61 || vectorDist.getPos(p)[1] > 0.68 || vectorDist.getPos(p)[2] > 0.50 ||
498  vectorDist.template getProp<RHO>(p) < RhoMin || vectorDist.template getProp<RHO>(p) > RhoMax)
499  {vectorDist.template getProp<RED>(p) = 1;}
500 }
502 
503 size_t cnt = 0;
504 
505 void verlet_int(particles & vectorDist, real_number dt)
506 {
507  // particle iterator
508  auto part = vectorDist.getDomainIteratorGPU();
509 
510  real_number dt205 = dt*dt*0.5;
511  real_number dt2 = dt*2.0;
512 
513  auto posExpression = getV<POS_PROP, comp_dev>(vectorDist);
514  auto posExpression2 = getV<POS_PROP>(vectorDist);
515  auto forceExpression = getV<FORCE, comp_dev>(vectorDist);
516  auto drhoExpression = getV<DRHO, comp_dev>(vectorDist);
517  auto typeExpression = getV<TYPE, comp_dev>(vectorDist);
518  auto velocityExpression = getV<VELOCITY, comp_dev>(vectorDist);
519 
520  auto rho_tmpExpression = getV<RHO_TMP, comp_dev>(vectorDist);
521  auto rhoExpression = getV<RHO, comp_dev>(vectorDist);
522  auto rho_prevExpression = getV<RHO_PREV, comp_dev>(vectorDist);
523 
524  auto velocity_prevExpression = getV<VELOCITY_PREV, comp_dev>(vectorDist);
525  auto velocity_tmpExpression = getV<VELOCITY_TMP, comp_dev>(vectorDist);
526  auto redExpression = getV<RED, comp_dev>(vectorDist);
527 
528  rho_tmpExpression = rhoExpression;
529  rhoExpression = rho_prevExpression + dt2*drhoExpression;
530  rho_prevExpression = rho_tmpExpression;
531 
532  posExpression = posExpression + velocityExpression*dt + forceExpression*dt205 * typeExpression;
533 
534  velocity_tmpExpression = velocityExpression;
535  velocityExpression = velocity_prevExpression + forceExpression*dt2 * typeExpression;
536  velocity_prevExpression = velocity_tmpExpression;
537 
538  redExpression = 0;
539 
540  CUDA_LAUNCH(checkPosPrpLimits,part,vectorDist.toKernel());
541 
542  // remove the particles marked
544  remove_marked<RED>(vectorDist);
546 
547  // increment the iteration counter
548  cnt++;
549 }
550 
551 
552 void euler_int(particles & vectorDist, real_number dt)
553 {
554 
555  // particle iterator
556  auto part = vectorDist.getDomainIteratorGPU();
557 
558  real_number dt205 = dt*dt*0.5;
559 
560  auto posExpression = getV<POS_PROP, comp_dev>(vectorDist);
561  auto forceExpression = getV<FORCE, comp_dev>(vectorDist);
562  auto drhoExpression = getV<DRHO, comp_dev>(vectorDist);
563  auto typeExpression = getV<TYPE, comp_dev>(vectorDist);
564 
565  auto rhoExpression = getV<RHO, comp_dev>(vectorDist);
566  auto rho_prevExpression = getV<RHO_PREV, comp_dev>(vectorDist);
567 
568  auto velocityExpression = getV<VELOCITY, comp_dev>(vectorDist);
569  auto velocity_prevExpression = getV<VELOCITY_PREV, comp_dev>(vectorDist);
570 
571  auto redExpression = getV<RED, comp_dev>(vectorDist);
572 
573  rho_prevExpression = rhoExpression;
574  rhoExpression = rhoExpression + dt*drhoExpression;
575 
576  posExpression = posExpression + velocityExpression*dt + forceExpression*dt205 * typeExpression;
577 
578  velocity_prevExpression = velocityExpression;
579  velocityExpression = velocityExpression + forceExpression*dt * typeExpression;
580 
581  redExpression = 0;
582 
583  CUDA_LAUNCH(checkPosPrpLimits,part,vectorDist.toKernel());
584 
585  // remove the particles
586  remove_marked<RED>(vectorDist);
587 
588  cnt++;
589 }
590 
591 template<typename vector_type, typename CellList_type>
592 __global__ void sensor_pressure_gpu(vector_type vectorDist, CellList_type cellList, Point<3,real_number> probe, real_number * press_tmp)
593 {
594  real_number tot_ker = 0.0;
595 
596  // Get the position of the probe i
597  Point<3,real_number> xp = probe;
598 
599  // get the iterator over the neighbohood particles of the probes position
600  auto itg = cellList.getNNIteratorBox(cellList.getCell(xp));
601  while (itg.isNext())
602  {
603  auto q = itg.get();
604 
605  // Only the fluid particles are importants
606  if (vectorDist.template getProp<TYPE>(q) != FLUID)
607  {
608  ++itg;
609  continue;
610  }
611 
612  // Get the position of the neighborhood particle q
613  Point<3,real_number> xq = vectorDist.getPos(q);
614 
615  // Calculate the contribution of the particle to the pressure
616  // of the probe
617  real_number r = sqrt(norm2(xp - xq));
618 
619  real_number ker = Wab(r) * (MassFluid / RhoZero);
620 
621  // Also keep track of the calculation of the summed
622  // kernel
623  tot_ker += ker;
624 
625  // Add the total pressure contribution
626  *press_tmp += vectorDist.template getProp<PRESSURE>(q) * ker;
627 
628  // next neighborhood particle
629  ++itg;
630  }
631 
632  // We calculate the pressure normalizing the
633  // sum over all kernels
634  if (tot_ker == 0.0)
635  {*press_tmp = 0.0;}
636  else
637  {*press_tmp = 1.0 / tot_ker * *press_tmp;}
638 }
639 
640 template<typename Vector, typename CellList>
641 inline void sensor_pressure(Vector & vectorDist,
642  CellList & cellList,
645 {
646  Vcluster<> & v_cl = create_vcluster();
647 
648  press_t.add();
649 
650  for (size_t i = 0 ; i < probes.size() ; i++)
651  {
652  // A float variable to calculate the pressure of the problem
653  CudaMemory press_tmp_(sizeof(real_number));
654  real_number press_tmp;
655 
656  // if the probe is inside the processor domain
657  if (vectorDist.getDecomposition().isLocal(probes.get(i)) == true)
658  {
659  vectorDist.updateCellListGPU(cellList);
660 
661  Point<3,real_number> probe = probes.get(i);
662  CUDA_LAUNCH_DIM3(sensor_pressure_gpu,1,1,vectorDist.toKernel(),cellList.toKernel(),probe,(real_number *)press_tmp_.toKernel());
663 
664  // move calculated pressure on
665  press_tmp_.deviceToHost();
666  press_tmp = *(real_number *)press_tmp_.getPointer();
667  }
668 
669  // This is not necessary in principle, but if you
670  // want to make all processor aware of the history of the calculated
671  // pressure we have to execute this
672  v_cl.sum(press_tmp);
673  v_cl.execute();
674 
675  // We add the calculated pressure into the history
676  press_t.last().add(press_tmp);
677  }
678 }
679 
680 int main(int argc, char* argv[])
681 {
682  // initialize the library
683  openfpm_init(&argc,&argv);
684 
685  // It contain for each time-step the value detected by the probes
688 
689  probes.add({0.8779f,0.3f,0.02f});
690  probes.add({0.754f,0.31f,0.02f});
691 
692  // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
693  Box<3,real_number> domain({-0.05f,-0.05f,-0.05f},{1.7010f,0.7065f,0.511f});
694  size_t sz[3] = {207,90,66};
695 
696  // Fill W_dap
697  W_dap = 1.0/Wab(H/1.5);
698 
699  // Here we define the boundary conditions of our problem
700  size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
701 
702  // extended boundary around the domain, and the processor domain
703  Ghost<3,real_number> g(2*H);
704 
705  particles vectorDist(0,domain,bc,g,DEC_GRAN(512));
706 
708 
709  // You can ignore all these dp/2.0 is a trick to reach the same initialization
710  // of Dual-SPH that use a different criteria to draw particles
711  Box<3,real_number> fluid_box({dp/2.0f,dp/2.0f,dp/2.0f},{0.4f+dp/2.0f,0.67f-dp/2.0f,0.3f+dp/2.0f});
712 
713  // return an iterator to the fluid particles to add to vectorDist
714  auto fluid_it = DrawParticles::DrawBox(vectorDist,sz,domain,fluid_box);
715 
716  // here we fill some of the constants needed by the simulation
717  max_fluid_height = fluid_it.getBoxMargins().getHigh(2);
718  h_swl = fluid_it.getBoxMargins().getHigh(2) - fluid_it.getBoxMargins().getLow(2);
719  B = (coeff_sound)*(coeff_sound)*gravity*h_swl*RhoZero / gamma_;
720  cbar = coeff_sound * sqrt(gravity * h_swl);
721 
722  // for each particle inside the fluid box ...
723  while (fluid_it.isNext())
724  {
725  // ... add a particle ...
726  vectorDist.add();
727 
728  // ... and set it position ...
729  vectorDist.getLastPos()[0] = fluid_it.get().get(0);
730  vectorDist.getLastPos()[1] = fluid_it.get().get(1);
731  vectorDist.getLastPos()[2] = fluid_it.get().get(2);
732 
733  // and its type.
734  vectorDist.template getLastProp<TYPE>() = FLUID;
735 
736  // We also initialize the density of the particle and the hydro-static pressure given by
737  //
738  // RhoZero*g*h = P
739  //
740  // rho_p = (P/B + 1)^(1/Gamma) * RhoZero
741  //
742 
743  vectorDist.template getLastProp<PRESSURE>() = RhoZero * gravity * (max_fluid_height - fluid_it.get().get(2));
744 
745  vectorDist.template getLastProp<RHO>() = pow(vectorDist.template getLastProp<PRESSURE>() / B + 1, 1.0/gamma_) * RhoZero;
746  vectorDist.template getLastProp<RHO_PREV>() = vectorDist.template getLastProp<RHO>();
747  vectorDist.template getLastProp<VELOCITY>()[0] = 0.0;
748  vectorDist.template getLastProp<VELOCITY>()[1] = 0.0;
749  vectorDist.template getLastProp<VELOCITY>()[2] = 0.0;
750 
751  vectorDist.template getLastProp<VELOCITY_PREV>()[0] = 0.0;
752  vectorDist.template getLastProp<VELOCITY_PREV>()[1] = 0.0;
753  vectorDist.template getLastProp<VELOCITY_PREV>()[2] = 0.0;
754 
755  // next fluid particle
756  ++fluid_it;
757  }
758 
759  // Recipient
760  Box<3,real_number> recipient1({0.0f,0.0f,0.0f},{1.6f+dp/2.0f,0.67f+dp/2.0f,0.4f+dp/2.0f});
761  Box<3,real_number> recipient2({dp,dp,dp},{1.6f-dp/2.0f,0.67f-dp/2.0f,0.4f+dp/2.0f});
762 
763  Box<3,real_number> obstacle1({0.9f,0.24f-dp/2.0f,0.0f},{1.02f+dp/2.0f,0.36f,0.45f+dp/2.0f});
764  Box<3,real_number> obstacle2({0.9f+dp,0.24f+dp/2.0f,0.0f},{1.02f-dp/2.0f,0.36f-dp,0.45f-dp/2.0f});
765  Box<3,real_number> obstacle3({0.9f+dp,0.24f,0.0f},{1.02f,0.36f,0.45f});
766 
768  holes.add(recipient2);
769  holes.add(obstacle1);
770  auto bound_box = DrawParticles::DrawSkin(vectorDist,sz,domain,holes,recipient1);
771 
772  while (bound_box.isNext())
773  {
774  vectorDist.add();
775 
776  vectorDist.getLastPos()[0] = bound_box.get().get(0);
777  vectorDist.getLastPos()[1] = bound_box.get().get(1);
778  vectorDist.getLastPos()[2] = bound_box.get().get(2);
779 
780  vectorDist.template getLastProp<TYPE>() = BOUNDARY;
781  vectorDist.template getLastProp<RHO>() = RhoZero;
782  vectorDist.template getLastProp<RHO_PREV>() = RhoZero;
783  vectorDist.template getLastProp<VELOCITY>()[0] = 0.0;
784  vectorDist.template getLastProp<VELOCITY>()[1] = 0.0;
785  vectorDist.template getLastProp<VELOCITY>()[2] = 0.0;
786 
787  vectorDist.template getLastProp<VELOCITY_PREV>()[0] = 0.0;
788  vectorDist.template getLastProp<VELOCITY_PREV>()[1] = 0.0;
789  vectorDist.template getLastProp<VELOCITY_PREV>()[2] = 0.0;
790 
791  ++bound_box;
792  }
793 
794  auto obstacle_box = DrawParticles::DrawSkin(vectorDist,sz,domain,obstacle2,obstacle1);
795 
796  while (obstacle_box.isNext())
797  {
798  vectorDist.add();
799 
800  vectorDist.getLastPos()[0] = obstacle_box.get().get(0);
801  vectorDist.getLastPos()[1] = obstacle_box.get().get(1);
802  vectorDist.getLastPos()[2] = obstacle_box.get().get(2);
803 
804  vectorDist.template getLastProp<TYPE>() = BOUNDARY;
805  vectorDist.template getLastProp<RHO>() = RhoZero;
806  vectorDist.template getLastProp<RHO_PREV>() = RhoZero;
807  vectorDist.template getLastProp<VELOCITY>()[0] = 0.0;
808  vectorDist.template getLastProp<VELOCITY>()[1] = 0.0;
809  vectorDist.template getLastProp<VELOCITY>()[2] = 0.0;
810 
811  vectorDist.template getLastProp<VELOCITY_PREV>()[0] = 0.0;
812  vectorDist.template getLastProp<VELOCITY_PREV>()[1] = 0.0;
813  vectorDist.template getLastProp<VELOCITY_PREV>()[2] = 0.0;
814 
815  ++obstacle_box;
816  }
817 
818  vectorDist.map();
819 
820  // Now that we fill the vector with particles
821  ModelCustom md;
822 
823  vectorDist.addComputationCosts(md);
824  vectorDist.getDecomposition().decompose();
825  vectorDist.map();
826 
828 
829  // Ok the initialization is done on CPU on GPU we are doing the main loop, so first we offload all properties on GPU
830 
831  vectorDist.hostToDevicePos();
832  vectorDist.template hostToDeviceProp<TYPE,RHO,RHO_PREV,PRESSURE,VELOCITY,VELOCITY_PREV>();
833 
834  vectorDist.ghost_get<TYPE,RHO,PRESSURE,VELOCITY>(RUN_ON_DEVICE);
835 
836  auto cellList = vectorDist.getCellListGPU(2*H, CL_NON_SYMMETRIC, 2);
837 
838  timer tot_sim;
839  tot_sim.start();
840 
841  size_t write = 0;
842  size_t it = 0;
843  size_t it_reb = 0;
844  real_number t = 0.0;
845  while (t <= t_end)
846  {
847  Vcluster<> & v_cl = create_vcluster();
848  timer it_time;
849  it_time.start();
850 
852  it_reb++;
853  if (it_reb == 300)
854  {
855  vectorDist.map(RUN_ON_DEVICE);
856 
857  // Rebalancer for now work on CPU , so move to CPU
858  vectorDist.deviceToHostPos();
859  vectorDist.template deviceToHostProp<TYPE>();
860 
861  it_reb = 0;
862  ModelCustom md;
863  vectorDist.addComputationCosts(md);
864  vectorDist.getDecomposition().decompose();
865 
866  if (v_cl.getProcessUnitID() == 0)
867  {std::cout << "REBALANCED " << it_reb << std::endl;}
868  }
869 
870  vectorDist.map(RUN_ON_DEVICE);
871 
872  // Calculate pressure from the density
873  EqState(vectorDist);
874 
875  real_number max_visc = 0.0;
876 
877  vectorDist.ghost_get<TYPE,RHO,PRESSURE,VELOCITY>(RUN_ON_DEVICE);
878 
879  // Calc forces
880  calc_forces(vectorDist,cellList,max_visc,cnt);
881 
882  // Get the maximum viscosity term across processors
883  v_cl.max(max_visc);
884  v_cl.execute();
885 
886  // Calculate delta t integration
887  real_number dt = calc_deltaT(vectorDist,max_visc);
888 
889  // VerletStep or euler step
890  it++;
891  if (it < 40)
892  verlet_int(vectorDist,dt);
893  else
894  {
895  euler_int(vectorDist,dt);
896  it = 0;
897  }
898 
899  t += dt;
900 
901  if (write < t*100)
902  {
903  // Sensor pressure require update ghost, so we ensure that particles are distributed correctly
904  // and ghost are updated
905  vectorDist.map(RUN_ON_DEVICE);
906  vectorDist.ghost_get<TYPE,RHO,PRESSURE,VELOCITY>(RUN_ON_DEVICE);
907 
908  // calculate the pressure at the sensor points
909  //sensor_pressure(vectorDist,cellList,press_t,probes);
910 
911  std::cout << "OUTPUT " << dt << std::endl;
912 
913  // When we write we have move all the particles information back to CPU
914 
915  vectorDist.deviceToHostPos();
916  vectorDist.deviceToHostProp<TYPE,RHO,RHO_PREV,PRESSURE,DRHO,FORCE,VELOCITY,VELOCITY_PREV,RED,RED2>();
917 
918  vectorDist.write_frame("Geometry",write);
919  write++;
920 
921  if (v_cl.getProcessUnitID() == 0)
922  {std::cout << "TIME: " << t << " write " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vectorDist.size_local() << std::endl;}
923  }
924  else
925  {
926  if (v_cl.getProcessUnitID() == 0)
927  {std::cout << "TIME: " << t << " " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vectorDist.size_local() << std::endl;}
928  }
929  }
930 
931  tot_sim.stop();
932  std::cout << "Time to complete: " << tot_sim.getwct() << " seconds" << std::endl;
933 
934  openfpm_finalize();
935 }
936 
937 #else
938 
939 int main(int argc, char* argv[])
940 {
941  return 0;
942 }
943 
944 #endif
This class represent an N-dimensional box.
Definition: Box.hpp:60
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
Definition: Box.hpp:555
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Definition: Box.hpp:566
Class for FAST cell list implementation.
Definition: CellList.hpp:558
This class define the domain decomposition interface.
static PointIteratorSkin< dim, T, typename vd_type::Decomposition_type > DrawSkin(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub_A, Box< dim, T > &sub_B)
Draw particles in a box B excluding the area of a second box A (B - A)
static PointIterator< dim, T, typename vd_type::Decomposition_type > DrawBox(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub)
Draw particles in a box.
Definition: Ghost.hpp:40
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:28
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
void execute()
Execute all the requests.
void sum(T &num)
Sum the numbers across all processors and get the result.
size_t getProcessUnitID()
Get the process unit id.
void max(T &num)
Get the maximum number across all processors (or reduction with infinity norm)
Implementation of VCluster class.
Definition: VCluster.hpp:59
Sparse Matrix implementation stub object when OpenFPM is compiled with no linear algebra support.
Definition: Vector.hpp:40
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:204
Class for cpu time benchmarking.
Definition: timer.hpp:28
void stop()
Stop the timer.
Definition: timer.hpp:119
void start()
Start the timer.
Definition: timer.hpp:90
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
Distributed vector.
auto getPos(vect_dist_key_dx vec_key) -> decltype(vPos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
Model for Dynamic load balancing.
Definition: main.cpp:232