OpenFPM  5.2.0
Project that contain the implementation of distributed structures
main.cu
1 
51 #ifdef __NVCC__
52 
53 #define PRINT_STACKTRACE
54 #define STOP_ON_ERROR
55 #define OPENMPI
56 //#define SE_CLASS1
57 
58 #include "Vector/vector_dist.hpp"
59 #include <math.h>
60 #include "Draw/DrawParticles.hpp"
61 
62 
63 
64 typedef float real_number;
65 
66 // A constant to indicate boundary particles
67 #define BOUNDARY 0
68 
69 // A constant to indicate fluid particles
70 #define FLUID 1
71 
72 // initial spacing between particles dp in the formulas
73 const real_number dp = 0.0085;
74 // Maximum height of the fluid water
75 // is going to be calculated and filled later on
76 real_number h_swl = 0.0;
77 
78 // c_s in the formulas (constant used to calculate the sound speed)
79 const real_number coeff_sound = 20.0;
80 
81 // gamma in the formulas
82 const real_number gamma_ = 7.0;
83 
84 // sqrt(3.0*dp*dp) support of the kernel
85 const real_number H = 0.0147224318643;
86 
87 // Eta in the formulas
88 const real_number Eta2 = 0.01 * H*H;
89 
90 const real_number FourH2 = 4.0 * H*H;
91 
92 // alpha in the formula
93 const real_number visco = 0.1;
94 
95 // cbar in the formula (calculated later)
96 real_number cbar = 0.0;
97 
98 // Mass of the fluid particles
99 const real_number MassFluid = 0.000614125;
100 
101 // Mass of the boundary particles
102 const real_number MassBound = 0.000614125;
103 
104 //
105 
106 // End simulation time
107 #ifdef TEST_RUN
108 const real_number t_end = 0.001;
109 #else
110 const real_number t_end = 1.5;
111 #endif
112 
113 // Gravity acceleration
114 const real_number gravity = 9.81;
115 
116 // Reference densitu 1000Kg/m^3
117 const real_number rho_zero = 1000.0;
118 
119 // Filled later require h_swl, it is b in the formulas
120 real_number B = 0.0;
121 
122 // Constant used to define time integration
123 const real_number CFLnumber = 0.2;
124 
125 // Minimum T
126 const real_number DtMin = 0.00001;
127 
128 // Minimum Rho allowed
129 const real_number RhoMin = 700.0;
130 
131 // Maximum Rho allowed
132 const real_number RhoMax = 1300.0;
133 
134 // Filled in initialization
135 real_number max_fluid_height = 0.0;
136 
137 // Properties
138 
139 // FLUID or BOUNDARY
140 const size_t type = 0;
141 
142 // Density
143 const int rho = 1;
144 
145 // Density at step n-1
146 const int rho_prev = 2;
147 
148 // Pressure
149 const int Pressure = 3;
150 
151 // Delta rho calculated in the force calculation
152 const int drho = 4;
153 
154 // calculated force
155 const int force = 5;
156 
157 // velocity
158 const int velocity = 6;
159 
160 // velocity at previous step
161 const int velocity_prev = 7;
162 
163 const int red = 8;
164 
165 const int red2 = 9;
166 
167 // Type of the vector containing particles
169 // | | | | | | | | | |
170 // | | | | | | | | | |
171 // type density density Pressure delta force velocity velocity reduction another
172 // at n-1 density at n - 1 buffer reduction buffer
173 
174 
175 struct ModelCustom
176 {
177  template<typename Decomposition, typename vector> inline void addComputation(
178  Decomposition & dec,
179  vector & vd,
180  size_t v,
181  size_t p)
182  {
183  if (vd.template getProp<type>(p) == FLUID)
184  dec.addComputationCost(v,4);
185  else
186  dec.addComputationCost(v,3);
187  }
188 
189  template<typename Decomposition> inline void applyModel(Decomposition & dec, size_t v)
190  {
191  dec.setSubSubDomainComputationCost(v, dec.getSubSubDomainComputationCost(v) * dec.getSubSubDomainComputationCost(v));
192  }
193 
194  real_number distributionTol()
195  {
196  return 1.01;
197  }
198 };
199 
200 template<typename vd_type>
201 __global__ void EqState_gpu(vd_type vd, real_number B)
202 {
203  auto a = GET_PARTICLE(vd);
204 
205  real_number rho_a = vd.template getProp<rho>(a);
206  real_number rho_frac = rho_a / rho_zero;
207 
208  vd.template getProp<Pressure>(a) = B*( rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac - 1.0);
209 }
210 
211 inline void EqState(particles & vd)
212 {
213  auto it = vd.getDomainIteratorGPU();
214 
215  CUDA_LAUNCH(EqState_gpu,it,vd.toKernel(),B);
216 }
217 
218 
219 const real_number a2 = 1.0/M_PI/H/H/H;
220 
221 inline __device__ __host__ real_number Wab(real_number r)
222 {
223  r /= H;
224 
225  if (r < 1.0)
226  return (1.0 - 3.0/2.0*r*r + 3.0/4.0*r*r*r)*a2;
227  else if (r < 2.0)
228  return (1.0/4.0*(2.0 - r)*(2.0 - r)*(2.0 - r))*a2;
229  else
230  return 0.0;
231 }
232 
233 
234 const real_number c1 = -3.0/M_PI/H/H/H/H;
235 const real_number d1 = 9.0/4.0/M_PI/H/H/H/H;
236 const real_number c2 = -3.0/4.0/M_PI/H/H/H/H;
237 const real_number a2_4 = 0.25*a2;
238 // Filled later
239 real_number W_dap = 0.0;
240 
241 inline __device__ __host__ void DWab(Point<3,real_number> & dx, Point<3,real_number> & DW, real_number r)
242 {
243  const real_number qq=r/H;
244 
245  real_number qq2 = qq * qq;
246  real_number fac1 = (c1*qq + d1*qq2)/r;
247  real_number b1 = (qq < 1.0f)?1.0f:0.0f;
248 
249  real_number wqq = (2.0f - qq);
250  real_number fac2 = c2 * wqq * wqq / r;
251  real_number b2 = (qq >= 1.0f && qq < 2.0f)?1.0f:0.0f;
252 
253  real_number factor = (b1*fac1 + b2*fac2);
254 
255  DW.get(0) = factor * dx.get(0);
256  DW.get(1) = factor * dx.get(1);
257  DW.get(2) = factor * dx.get(2);
258 }
259 
260 // Tensile correction
261 inline __device__ __host__ real_number Tensile(real_number r, real_number rhoa, real_number rhob, real_number prs1, real_number prs2, real_number W_dap)
262 {
263  const real_number qq=r/H;
264  //-Cubic Spline kernel
265  real_number wab;
266  if(r>H)
267  {
268  real_number wqq1=2.0f-qq;
269  real_number wqq2=wqq1*wqq1;
270 
271  wab=a2_4*(wqq2*wqq1);
272  }
273  else
274  {
275  real_number wqq2=qq*qq;
276  real_number wqq3=wqq2*qq;
277 
278  wab=a2*(1.0f-1.5f*wqq2+0.75f*wqq3);
279  }
280 
281  //-Tensile correction.
282  real_number fab=wab*W_dap;
283  fab*=fab; fab*=fab; //fab=fab^4
284  const real_number tensilp1=(prs1/(rhoa*rhoa))*(prs1>0.0f? 0.01f: -0.2f);
285  const real_number tensilp2=(prs2/(rhob*rhob))*(prs2>0.0f? 0.01f: -0.2f);
286 
287  return (fab*(tensilp1+tensilp2));
288 }
289 
290 
291 inline __device__ __host__ real_number Pi(const Point<3,real_number> & dr, real_number rr2, Point<3,real_number> & dv, real_number rhoa, real_number rhob, real_number massb, real_number cbar, real_number & visc)
292 {
293  const real_number dot = dr.get(0)*dv.get(0) + dr.get(1)*dv.get(1) + dr.get(2)*dv.get(2);
294  const real_number dot_rr2 = dot/(rr2+Eta2);
295  visc=(dot_rr2 < visc)?visc:dot_rr2;
296 
297  if(dot < 0)
298  {
299  const float amubar=H*dot_rr2;
300  const float robar=(rhoa+rhob)*0.5f;
301  const float pi_visc=(-visco*cbar*amubar/robar);
302 
303  return pi_visc;
304  }
305  else
306  return 0.0f;
307 }
308 
309 template<typename particles_type, typename fluid_ids_type,typename NN_type>
310 __global__ void calc_forces_fluid_gpu(particles_type vd, fluid_ids_type fids, NN_type NN, real_number W_dap, real_number cbar)
311 {
312  // ... a
313  unsigned int a;
314 
315  GET_PARTICLE_BY_ID(a,fids);
316 
317  real_number max_visc = 0.0f;
318 
319  // Get the position xp of the particle
320  Point<3,real_number> xa = vd.getPos(a);
321 
322  // Type of the particle
323  unsigned int typea = vd.template getProp<type>(a);
324 
325  // Get the density of the of the particle a
326  real_number rhoa = vd.template getProp<rho>(a);
327 
328  // Get the pressure of the particle a
329  real_number Pa = vd.template getProp<Pressure>(a);
330 
331  // Get the Velocity of the particle a
332  Point<3,real_number> va = vd.template getProp<velocity>(a);
333 
334  Point<3,real_number> force_;
335  force_.get(0) = 0.0f;
336  force_.get(1) = 0.0f;
337  force_.get(2) = -gravity;
338  real_number drho_ = 0.0f;
339 
340  // Get an iterator over the neighborhood particles of p
341  auto Np = NN.getNNIteratorBox(NN.getCell(xa));
342 
343  // For each neighborhood particle
344  while (Np.isNext() == true)
345  {
346  // ... q
347  auto b = Np.get_sort();
348 
349  // Get the position xp of the particle
350  Point<3,real_number> xb = vd.getPos(b);
351 
352  // if (p == q) skip this particle this condition should be done in the r^2 = 0
353  //if (a == b) {++Np; continue;};
354 
355  unsigned int typeb = vd.template getProp<type>(b);
356 
357  real_number massb = (typeb == FLUID)?MassFluid:MassBound;
358  Point<3,real_number> vb = vd.template getProp<velocity>(b);
359  real_number Pb = vd.template getProp<Pressure>(b);
360  real_number rhob = vd.template getProp<rho>(b);
361 
362  // Get the distance between p and q
363  Point<3,real_number> dr = xa - xb;
364  // take the norm of this vector
365  real_number r2 = norm2(dr);
366 
367  // if they interact
368  if (r2 < FourH2 && r2 >= 1e-16)
369  {
370  real_number r = sqrtf(r2);
371 
372  Point<3,real_number> v_rel = va - vb;
373 
375  DWab(dr,DW,r);
376 
377  real_number factor = - massb*((Pa + Pb) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb,W_dap) + Pi(dr,r2,v_rel,rhoa,rhob,massb,cbar,max_visc));
378 
379  // Bound - Bound does not produce any change
380  factor = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:factor;
381 
382  force_.get(0) += factor * DW.get(0);
383  force_.get(1) += factor * DW.get(1);
384  force_.get(2) += factor * DW.get(2);
385 
386  real_number scal = massb*(v_rel.get(0)*DW.get(0)+v_rel.get(1)*DW.get(1)+v_rel.get(2)*DW.get(2));
387  scal = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:scal;
388 
389  drho_ += scal;
390  }
391 
392  ++Np;
393  }
394 
395  vd.template getProp<red>(a) = max_visc;
396 
397  vd.template getProp<force>(a)[0] = force_.get(0);
398  vd.template getProp<force>(a)[1] = force_.get(1);
399  vd.template getProp<force>(a)[2] = force_.get(2);
400  vd.template getProp<drho>(a) = drho_;
401 }
402 
403 template<typename particles_type, typename fluid_ids_type,typename NN_type>
404 __global__ void calc_forces_border_gpu(particles_type vd, fluid_ids_type fbord, NN_type NN, real_number W_dap, real_number cbar)
405 {
406  // ... a
407  unsigned int a;
408 
409  GET_PARTICLE_BY_ID(a,fbord);
410 
411  real_number max_visc = 0.0f;
412 
413  // Get the position xp of the particle
414  Point<3,real_number> xa = vd.getPos(a);
415 
416  // Type of the particle
417  unsigned int typea = vd.template getProp<type>(a);
418 
419  // Get the Velocity of the particle a
420  Point<3,real_number> va = vd.template getProp<velocity>(a);
421 
422  real_number drho_ = 0.0f;
423 
424  // Get an iterator over the neighborhood particles of p
425  auto Np = NN.getNNIteratorBox(NN.getCell(xa));
426 
427  // For each neighborhood particle
428  while (Np.isNext() == true)
429  {
430  // ... q
431  auto b = Np.get_sort();
432 
433  // Get the position xp of the particle
434  Point<3,real_number> xb = vd.getPos(b);
435 
436  // if (p == q) skip this particle this condition should be done in the r^2 = 0
437  //if (a == b) {++Np; continue;};
438 
439  unsigned int typeb = vd.template getProp<type>(b);
440 
441  real_number massb = (typeb == FLUID)?MassFluid:MassBound;
442  Point<3,real_number> vb = vd.template getProp<velocity>(b);
443 
444  // Get the distance between p and q
445  Point<3,real_number> dr = xa - xb;
446  Point<3,real_number> v_rel = va - vb;
447  // take the norm of this vector
448  real_number r2 = norm2(dr);
449 
450  // if they interact
451  if (r2 < FourH2 && r2 >= 1e-16)
452  {
453  real_number r = sqrtf(r2);
454 
456  DWab(dr,DW,r);
457 
458  real_number scal = massb*(v_rel.get(0)*DW.get(0)+v_rel.get(1)*DW.get(1)+v_rel.get(2)*DW.get(2));
459  scal = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:scal;
460 
461  drho_ += scal;
462  }
463 
464  ++Np;
465  }
466 
467  vd.template getProp<red>(a) = max_visc;
468 
469  vd.template getProp<drho>(a) = drho_;
470 }
471 
472 struct type_is_fluid
473 {
474  __device__ static bool check(int c)
475  {
476  return c == FLUID;
477  }
478 };
479 
480 struct type_is_border
481 {
482  __device__ static bool check(int c)
483  {
484  return c == BOUNDARY;
485  }
486 };
487 
488 template<typename CellList> inline void calc_forces(particles & vd, CellList & NN, real_number & max_visc, size_t cnt, openfpm::vector_gpu<aggregate<int>> & fluid_ids, openfpm::vector_gpu<aggregate<int>> & border_ids)
489 {
490  // Update the cell-list
491  vd.template updateCellListGPU<type,rho,Pressure,velocity>(NN);
492 
494 
495  // get the particles fluid ids
496  get_indexes_by_type<type,type_is_fluid>(vd.getPropVector(),fluid_ids,vd.size_local(),vd.getVC().getGpuContext());
497 
498  // get the particles fluid ids
499  get_indexes_by_type<type,type_is_border>(vd.getPropVector(),border_ids,vd.size_local(),vd.getVC().getGpuContext());
500 
501  auto part = fluid_ids.getGPUIterator(96);
502  CUDA_LAUNCH(calc_forces_fluid_gpu,part,vd.toKernel(),fluid_ids.toKernel(),NN.toKernel(),W_dap,cbar);
503 
504  part = border_ids.getGPUIterator(96);
505  CUDA_LAUNCH(calc_forces_border_gpu,part,vd.toKernel(),border_ids.toKernel(),NN.toKernel(),W_dap,cbar);
506 
508 
509  vd.template restoreOrder<drho,force,red>(NN);
510 
511  max_visc = reduce_local<red,_max_>(vd);
512 }
513 
514 template<typename vector_type>
515 __global__ void max_acceleration_and_velocity_gpu(vector_type vd)
516 {
517  auto a = GET_PARTICLE(vd);
518 
519  Point<3,real_number> acc(vd.template getProp<force>(a));
520  vd.template getProp<red>(a) = norm(acc);
521 
522  Point<3,real_number> vel(vd.template getProp<velocity>(a));
523  vd.template getProp<red2>(a) = norm(vel);
524 }
525 
526 void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_number & max_vel)
527 {
528  // Calculate the maximum acceleration
529  auto part = vd.getDomainIteratorGPU();
530 
531  CUDA_LAUNCH(max_acceleration_and_velocity_gpu,part,vd.toKernel());
532 
533  max_acc = reduce_local<red,_max_>(vd);
534  max_vel = reduce_local<red2,_max_>(vd);
535 
536  Vcluster<> & v_cl = create_vcluster();
537  v_cl.max(max_acc);
538  v_cl.max(max_vel);
539  v_cl.execute();
540 }
541 
542 
543 real_number calc_deltaT(particles & vd, real_number ViscDtMax)
544 {
545  real_number Maxacc = 0.0;
546  real_number Maxvel = 0.0;
547  max_acceleration_and_velocity(vd,Maxacc,Maxvel);
548 
549  //-dt1 depends on force per unit mass.
550  const real_number dt_f = (Maxacc)?sqrt(H/Maxacc):std::numeric_limits<float>::max();
551 
552  //-dt2 combines the Courant and the viscous time-step controls.
553  const real_number dt_cv = H/(std::max(cbar,Maxvel*10.f) + H*ViscDtMax);
554 
555  //-dt new value of time step.
556  real_number dt=real_number(CFLnumber)*std::min(dt_f,dt_cv);
557  if(dt<real_number(DtMin))
558  {dt=real_number(DtMin);}
559 
560  return dt;
561 }
562 
563 template<typename vector_dist_type>
564 __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number dt2, real_number dt205)
565 {
566  // ... a
567  auto a = GET_PARTICLE(vd);
568 
569  // if the particle is boundary
570  if (vd.template getProp<type>(a) == BOUNDARY)
571  {
572  // Update rho
573  real_number rhop = vd.template getProp<rho>(a);
574 
575  // Update only the density
576  vd.template getProp<velocity>(a)[0] = 0.0;
577  vd.template getProp<velocity>(a)[1] = 0.0;
578  vd.template getProp<velocity>(a)[2] = 0.0;
579  real_number rhonew = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
580  vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
581 
582  vd.template getProp<rho_prev>(a) = rhop;
583 
584  vd.template getProp<red>(a) = 0;
585 
586  return;
587  }
588 
589  //-Calculate displacement and update position
590  real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
591  real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
592  real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
593 
594  vd.getPos(a)[0] += dx;
595  vd.getPos(a)[1] += dy;
596  vd.getPos(a)[2] += dz;
597 
598  real_number velX = vd.template getProp<velocity>(a)[0];
599  real_number velY = vd.template getProp<velocity>(a)[1];
600  real_number velZ = vd.template getProp<velocity>(a)[2];
601 
602  real_number rhop = vd.template getProp<rho>(a);
603 
604  vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity_prev>(a)[0] + vd.template getProp<force>(a)[0]*dt2;
605  vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity_prev>(a)[1] + vd.template getProp<force>(a)[1]*dt2;
606  vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity_prev>(a)[2] + vd.template getProp<force>(a)[2]*dt2;
607  vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
608 
609  // Check if the particle go out of range in space and in density
610  if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
611  vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
612  vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
613  {
614  vd.template getProp<red>(a) = 1;
615  }
616 
617  else
618  {
619  vd.template getProp<red>(a) = 0;
620  }
621 
622 
623  vd.template getProp<velocity_prev>(a)[0] = velX;
624  vd.template getProp<velocity_prev>(a)[1] = velY;
625  vd.template getProp<velocity_prev>(a)[2] = velZ;
626  vd.template getProp<rho_prev>(a) = rhop;
627 }
628 
629 size_t cnt = 0;
630 
631 void verlet_int(particles & vd, real_number dt)
632 {
633  // particle iterator
634  auto part = vd.getDomainIteratorGPU();
635 
636  real_number dt205 = dt*dt*0.5;
637  real_number dt2 = dt*2.0;
638 
639  CUDA_LAUNCH(verlet_int_gpu,part,vd.toKernel(),dt,dt2,dt205);
640 
641  // remove the particles marked
642  remove_marked<red>(vd);
643 
644  // increment the iteration counter
645  cnt++;
646 }
647 
648 template<typename vector_type>
649 __global__ void euler_int_gpu(vector_type vd,real_number dt, real_number dt205)
650 {
651  // ... a
652  auto a = GET_PARTICLE(vd);
653 
654  // if the particle is boundary
655  if (vd.template getProp<type>(a) == BOUNDARY)
656  {
657  // Update rho
658  real_number rhop = vd.template getProp<rho>(a);
659 
660  // Update only the density
661  vd.template getProp<velocity>(a)[0] = 0.0;
662  vd.template getProp<velocity>(a)[1] = 0.0;
663  vd.template getProp<velocity>(a)[2] = 0.0;
664  real_number rhonew = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
665  vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
666 
667  vd.template getProp<rho_prev>(a) = rhop;
668 
669  vd.template getProp<red>(a) = 0;
670 
671  return;
672  }
673 
674  //-Calculate displacement and update position / Calcula desplazamiento y actualiza posicion.
675  real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
676  real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
677  real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
678 
679  vd.getPos(a)[0] += dx;
680  vd.getPos(a)[1] += dy;
681  vd.getPos(a)[2] += dz;
682 
683  real_number velX = vd.template getProp<velocity>(a)[0];
684  real_number velY = vd.template getProp<velocity>(a)[1];
685  real_number velZ = vd.template getProp<velocity>(a)[2];
686  real_number rhop = vd.template getProp<rho>(a);
687 
688  vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity>(a)[0] + vd.template getProp<force>(a)[0]*dt;
689  vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity>(a)[1] + vd.template getProp<force>(a)[1]*dt;
690  vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity>(a)[2] + vd.template getProp<force>(a)[2]*dt;
691  vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
692 
693  // Check if the particle go out of range in space and in density
694  if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
695  vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
696  vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
697  {vd.template getProp<red>(a) = 1;}
698  else
699  {vd.template getProp<red>(a) = 0;}
700 
701  vd.template getProp<velocity_prev>(a)[0] = velX;
702  vd.template getProp<velocity_prev>(a)[1] = velY;
703  vd.template getProp<velocity_prev>(a)[2] = velZ;
704  vd.template getProp<rho_prev>(a) = rhop;
705 }
706 
707 void euler_int(particles & vd, real_number dt)
708 {
709 
710  // particle iterator
711  auto part = vd.getDomainIteratorGPU();
712 
713  real_number dt205 = dt*dt*0.5;
714 
715  CUDA_LAUNCH(euler_int_gpu,part,vd.toKernel(),dt,dt205);
716 
717  // remove the particles
718  remove_marked<red>(vd);
719 
720  cnt++;
721 }
722 
723 template<typename vector_type, typename NN_type>
724 __global__ void sensor_pressure_gpu(vector_type vd, NN_type NN, Point<3,real_number> probe, real_number * press_tmp)
725 {
726  real_number tot_ker = 0.0;
727 
728  // Get the position of the probe i
729  Point<3,real_number> xp = probe;
730 
731  // get the iterator over the neighbohood particles of the probes position
732  auto itg = NN.getNNIteratorBox(NN.getCell(xp));
733  while (itg.isNext())
734  {
735  auto q = itg.get_sort();
736 
737  // Only the fluid particles are importants
738  if (vd.template getProp<type>(q) != FLUID)
739  {
740  ++itg;
741  continue;
742  }
743 
744  // Get the position of the neighborhood particle q
745  Point<3,real_number> xq = vd.getPos(q);
746 
747  // Calculate the contribution of the particle to the pressure
748  // of the probe
749  real_number r = sqrt(norm2(xp - xq));
750 
751  real_number ker = Wab(r) * (MassFluid / rho_zero);
752 
753  // Also keep track of the calculation of the summed
754  // kernel
755  tot_ker += ker;
756 
757  // Add the total pressure contribution
758  *press_tmp += vd.template getProp<Pressure>(q) * ker;
759 
760  // next neighborhood particle
761  ++itg;
762  }
763 
764  // We calculate the pressure normalizing the
765  // sum over all kernels
766  if (tot_ker == 0.0)
767  {*press_tmp = 0.0;}
768  else
769  {*press_tmp = 1.0 / tot_ker * *press_tmp;}
770 }
771 
772 template<typename Vector, typename CellList>
773 inline void sensor_pressure(Vector & vd,
774  CellList & NN,
777 {
778  Vcluster<> & v_cl = create_vcluster();
779 
780  press_t.add();
781 
782  for (size_t i = 0 ; i < probes.size() ; i++)
783  {
784  // A float variable to calculate the pressure of the problem
785  CudaMemory press_tmp_(sizeof(real_number));
786  real_number press_tmp;
787 
788  // if the probe is inside the processor domain
789  if (vd.getDecomposition().isLocal(probes.get(i)) == true)
790  {
791  vd.template updateCellListGPU<type,Pressure>(NN);
792 
793  Point<3,real_number> probe = probes.get(i);
794  CUDA_LAUNCH_DIM3(sensor_pressure_gpu,1,1,vd.toKernel(),NN.toKernel(),probe,(real_number *)press_tmp_.toKernel());
795 
796  vd.template restoreOrder<>(NN);
797 
798  // move calculated pressure on
799  press_tmp_.deviceToHost();
800  press_tmp = *(real_number *)press_tmp_.getPointer();
801  }
802 
803  // This is not necessary in principle, but if you
804  // want to make all processor aware of the history of the calculated
805  // pressure we have to execute this
806  v_cl.sum(press_tmp);
807  v_cl.execute();
808 
809  // We add the calculated pressure into the history
810  press_t.last().add(press_tmp);
811  }
812 }
813 
814 int main(int argc, char* argv[])
815 {
816  // initialize the library
817  openfpm_init(&argc,&argv);
818 
821 
822 #ifdef CUDIFY_USE_CUDA
823  cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
824 #endif
825 
826  // It contain for each time-step the value detected by the probes
829 
830  probes.add({0.8779f,0.3f,0.02f});
831  probes.add({0.754f,0.31f,0.02f});
832 
833  // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
834  Box<3,real_number> domain({-0.05f,-0.05f,-0.05f},{1.7010f,0.7065f,0.511f});
835  size_t sz[3] = {207,90,66};
836 
837  // Fill W_dap
838  W_dap = 1.0/Wab(H/1.5);
839 
840  // Here we define the boundary conditions of our problem
841  size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
842 
843  // extended boundary around the domain, and the processor domain
844  Ghost<3,real_number> g(2*H);
845 
846  particles vd(0,domain,bc,g,DEC_GRAN(128));
847 
849 
850  // You can ignore all these dp/2.0 is a trick to reach the same initialization
851  // of Dual-SPH that use a different criteria to draw particles
852  Box<3,real_number> fluid_box({dp/2.0f,dp/2.0f,dp/2.0f},{0.4f+dp/2.0f,0.67f-dp/2.0f,0.3f+dp/2.0f});
853 
854  // return an iterator to the fluid particles to add to vd
855  auto fluid_it = DrawParticles::DrawBox(vd,sz,domain,fluid_box);
856 
857  // here we fill some of the constants needed by the simulation
858  max_fluid_height = fluid_it.getBoxMargins().getHigh(2);
859  h_swl = fluid_it.getBoxMargins().getHigh(2) - fluid_it.getBoxMargins().getLow(2);
860  B = (coeff_sound)*(coeff_sound)*gravity*h_swl*rho_zero / gamma_;
861  cbar = coeff_sound * sqrt(gravity * h_swl);
862 
863  // for each particle inside the fluid box ...
864  while (fluid_it.isNext())
865  {
866  // ... add a particle ...
867  vd.add();
868 
869  // ... and set it position ...
870  vd.getLastPos()[0] = fluid_it.get().get(0);
871  vd.getLastPos()[1] = fluid_it.get().get(1);
872  vd.getLastPos()[2] = fluid_it.get().get(2);
873 
874  // and its type.
875  vd.template getLastProp<type>() = FLUID;
876 
877  // We also initialize the density of the particle and the hydro-static pressure given by
878  //
879  // rho_zero*g*h = P
880  //
881  // rho_p = (P/B + 1)^(1/Gamma) * rho_zero
882  //
883 
884  vd.template getLastProp<Pressure>() = rho_zero * gravity * (max_fluid_height - fluid_it.get().get(2));
885 
886  vd.template getLastProp<rho>() = pow(vd.template getLastProp<Pressure>() / B + 1, 1.0/gamma_) * rho_zero;
887  vd.template getLastProp<rho_prev>() = vd.template getLastProp<rho>();
888  vd.template getLastProp<velocity>()[0] = 0.0;
889  vd.template getLastProp<velocity>()[1] = 0.0;
890  vd.template getLastProp<velocity>()[2] = 0.0;
891 
892  vd.template getLastProp<velocity_prev>()[0] = 0.0;
893  vd.template getLastProp<velocity_prev>()[1] = 0.0;
894  vd.template getLastProp<velocity_prev>()[2] = 0.0;
895 
896  // next fluid particle
897  ++fluid_it;
898  }
899 
900  // Recipient
901  Box<3,real_number> recipient1({0.0f,0.0f,0.0f},{1.6f+dp/2.0f,0.67f+dp/2.0f,0.4f+dp/2.0f});
902  Box<3,real_number> recipient2({dp,dp,dp},{1.6f-dp/2.0f,0.67f-dp/2.0f,0.4f+dp/2.0f});
903 
904  Box<3,real_number> obstacle1({0.9f,0.24f-dp/2.0f,0.0f},{1.02f+dp/2.0f,0.36f,0.45f+dp/2.0f});
905  Box<3,real_number> obstacle2({0.9f+dp,0.24f+dp/2.0f,0.0f},{1.02f-dp/2.0f,0.36f-dp,0.45f-dp/2.0f});
906  Box<3,real_number> obstacle3({0.9f+dp,0.24f,0.0f},{1.02f,0.36f,0.45f});
907 
909  holes.add(recipient2);
910  holes.add(obstacle1);
911  auto bound_box = DrawParticles::DrawSkin(vd,sz,domain,holes,recipient1);
912 
913  while (bound_box.isNext())
914  {
915  vd.add();
916 
917  vd.getLastPos()[0] = bound_box.get().get(0);
918  vd.getLastPos()[1] = bound_box.get().get(1);
919  vd.getLastPos()[2] = bound_box.get().get(2);
920 
921  vd.template getLastProp<type>() = BOUNDARY;
922  vd.template getLastProp<rho>() = rho_zero;
923  vd.template getLastProp<rho_prev>() = rho_zero;
924  vd.template getLastProp<velocity>()[0] = 0.0;
925  vd.template getLastProp<velocity>()[1] = 0.0;
926  vd.template getLastProp<velocity>()[2] = 0.0;
927 
928  vd.template getLastProp<velocity_prev>()[0] = 0.0;
929  vd.template getLastProp<velocity_prev>()[1] = 0.0;
930  vd.template getLastProp<velocity_prev>()[2] = 0.0;
931 
932  ++bound_box;
933  }
934 
935  auto obstacle_box = DrawParticles::DrawSkin(vd,sz,domain,obstacle2,obstacle1);
936 
937  while (obstacle_box.isNext())
938  {
939  vd.add();
940 
941  vd.getLastPos()[0] = obstacle_box.get().get(0);
942  vd.getLastPos()[1] = obstacle_box.get().get(1);
943  vd.getLastPos()[2] = obstacle_box.get().get(2);
944 
945  vd.template getLastProp<type>() = BOUNDARY;
946  vd.template getLastProp<rho>() = rho_zero;
947  vd.template getLastProp<rho_prev>() = rho_zero;
948  vd.template getLastProp<velocity>()[0] = 0.0;
949  vd.template getLastProp<velocity>()[1] = 0.0;
950  vd.template getLastProp<velocity>()[2] = 0.0;
951 
952  vd.template getLastProp<velocity_prev>()[0] = 0.0;
953  vd.template getLastProp<velocity_prev>()[1] = 0.0;
954  vd.template getLastProp<velocity_prev>()[2] = 0.0;
955 
956  ++obstacle_box;
957  }
958 
959  vd.map();
960 
961  // Now that we fill the vector with particles
962  ModelCustom md;
963 
964  vd.addComputationCosts(md);
965  vd.getDecomposition().decompose();
966  vd.map();
967 
969 
970  // Ok the initialization is done on CPU on GPU we are doing the main loop, so first we offload all properties on GPU
971 
972  vd.hostToDevicePos();
973  vd.template hostToDeviceProp<type,rho,rho_prev,Pressure,velocity>();
974 
975 
976  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
977 
978  auto NN = vd.getCellListGPU/*<CELLLIST_GPU_SPARSE<3,float>>*/(2*H / 2.0, CL_NON_SYMMETRIC | CL_GPU_REORDER, 2);
979 
980  timer tot_sim;
981  tot_sim.start();
982 
983  size_t write = 0;
984  size_t it = 0;
985  size_t it_reb = 0;
986  real_number t = 0.0;
987  while (t <= t_end)
988  {
989  Vcluster<> & v_cl = create_vcluster();
990  timer it_time;
991  it_time.start();
992 
994  it_reb++;
995  if (it_reb == 300)
996  {
997  vd.map(RUN_ON_DEVICE);
998 
999  // Rebalancer for now work on CPU , so move to CPU
1000  vd.deviceToHostPos();
1001  vd.template deviceToHostProp<type>();
1002 
1003  it_reb = 0;
1004  ModelCustom md;
1005  vd.addComputationCosts(md);
1006  vd.getDecomposition().decompose();
1007 
1008  if (v_cl.getProcessUnitID() == 0)
1009  {std::cout << "REBALANCED " << it_reb << std::endl;}
1010  }
1011 
1012  vd.map(RUN_ON_DEVICE);
1013 
1014  // Calculate pressure from the density
1015  EqState(vd);
1016 
1017  real_number max_visc = 0.0;
1018 
1019  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
1020 
1021 
1022  // Calc forces
1023  calc_forces(vd,NN,max_visc,cnt,fluid_ids,border_ids);
1024 
1025  // Get the maximum viscosity term across processors
1026  v_cl.max(max_visc);
1027  v_cl.execute();
1028 
1029  // Calculate delta t integration
1030  real_number dt = calc_deltaT(vd,max_visc);
1031 
1032  // VerletStep or euler step
1033  it++;
1034  if (it < 40)
1035  verlet_int(vd,dt);
1036  else
1037  {
1038  euler_int(vd,dt);
1039  it = 0;
1040  }
1041 
1042  t += dt;
1043 
1044  if (write < t*100)
1045  {
1046  // Sensor pressure require update ghost, so we ensure that particles are distributed correctly
1047  // and ghost are updated
1048  vd.map(RUN_ON_DEVICE);
1049  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
1050 
1051  // calculate the pressure at the sensor points
1052  //sensor_pressure(vd,NN,press_t,probes);
1053 
1054  std::cout << "OUTPUT " << dt << std::endl;
1055 
1056  // When we write we have move all the particles information back to CPU
1057 
1058  vd.deviceToHostPos();
1059  vd.deviceToHostProp<type,rho,rho_prev,Pressure,drho,force,velocity,velocity_prev,red,red2>();
1060 
1061  // We copy on another vector with less properties to reduce the size of the output
1063 
1064  auto ito = vd.getDomainIterator();
1065 
1066  while(ito.isNext())
1067  {
1068  auto p = ito.get();
1069 
1070  vd_out.add();
1071 
1072  vd_out.getLastPos()[0] = vd.getPos(p)[0];
1073  vd_out.getLastPos()[1] = vd.getPos(p)[1];
1074  vd_out.getLastPos()[2] = vd.getPos(p)[2];
1075 
1076  vd_out.template getLastProp<0>() = vd.template getProp<type>(p);
1077 
1078  vd_out.template getLastProp<1>()[0] = vd.template getProp<velocity>(p)[0];
1079  vd_out.template getLastProp<1>()[1] = vd.template getProp<velocity>(p)[1];
1080  vd_out.template getLastProp<1>()[2] = vd.template getProp<velocity>(p)[2];
1081 
1082  ++ito;
1083  }
1084 
1085  vd_out.write_frame("Particles",write,VTK_WRITER | FORMAT_BINARY);
1086  write++;
1087 
1088  if (v_cl.getProcessUnitID() == 0)
1089  {std::cout << "TIME: " << t << " write " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vd.size_local() << std::endl;}
1090  }
1091  else
1092  {
1093  if (v_cl.getProcessUnitID() == 0)
1094  {std::cout << "TIME: " << t << " " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vd.size_local() << std::endl;}
1095  }
1096  }
1097 
1098  tot_sim.stop();
1099  std::cout << "Time to complete: " << tot_sim.getwct() << " seconds" << std::endl;
1100 
1101 
1102  openfpm_finalize();
1103 }
1104 
1105 #else
1106 
1107 int main(int argc, char* argv[])
1108 {
1109  return 0;
1110 }
1111 
1112 #endif
This class represent an N-dimensional box.
Definition: Box.hpp:60
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
Definition: Box.hpp:555
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Definition: Box.hpp:566
Class for FAST cell list implementation.
Definition: CellList.hpp:558
This class define the domain decomposition interface.
static PointIteratorSkin< dim, T, typename vd_type::Decomposition_type > DrawSkin(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub_A, Box< dim, T > &sub_B)
Draw particles in a box B excluding the area of a second box A (B - A)
static PointIterator< dim, T, typename vd_type::Decomposition_type > DrawBox(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub)
Draw particles in a box.
Definition: Ghost.hpp:40
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:28
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
void execute()
Execute all the requests.
void sum(T &num)
Sum the numbers across all processors and get the result.
size_t getProcessUnitID()
Get the process unit id.
void max(T &num)
Get the maximum number across all processors (or reduction with infinity norm)
Implementation of VCluster class.
Definition: VCluster.hpp:59
Sparse Matrix implementation stub object when OpenFPM is compiled with no linear algebra support.
Definition: Vector.hpp:40
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:204
Class for cpu time benchmarking.
Definition: timer.hpp:28
void stop()
Stop the timer.
Definition: timer.hpp:119
void start()
Start the timer.
Definition: timer.hpp:90
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
Distributed vector.
Vcluster< Memory > & getVC()
Get the Virtual Cluster machine.
size_t size_local() const
return the local size of the vector
const vector_dist_prop & getPropVector() const
return the property vector of all the particles
auto getPos(vect_dist_key_dx vec_key) -> decltype(vPos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
Model for Dynamic load balancing.
Definition: main.cpp:232
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:221
temporal buffer for reductions