OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
main.cu
1 
51 #ifdef __NVCC__
52 
53 #define PRINT_STACKTRACE
54 #define STOP_ON_ERROR
55 #define OPENMPI
56 #define SCAN_WITH_CUB
57 #define SORT_WITH_CUB
58 //#define SE_CLASS1
59 
60 //#define USE_LOW_REGISTER_ITERATOR
61 
62 #include "Vector/vector_dist.hpp"
63 #include <math.h>
64 #include "Draw/DrawParticles.hpp"
65 
66 
67 
68 typedef float real_number;
69 
70 // A constant to indicate boundary particles
71 #define BOUNDARY 0
72 
73 // A constant to indicate fluid particles
74 #define FLUID 1
75 
76 // initial spacing between particles dp in the formulas
77 const real_number dp = 0.00425;
78 // Maximum height of the fluid water
79 // is going to be calculated and filled later on
80 real_number h_swl = 0.0;
81 
82 // c_s in the formulas (constant used to calculate the sound speed)
83 const real_number coeff_sound = 20.0;
84 
85 // gamma in the formulas
86 const real_number gamma_ = 7.0;
87 
88 // sqrt(3.0*dp*dp) support of the kernel
89 const real_number H = 0.00736121593217;
90 
91 // Eta in the formulas
92 const real_number Eta2 = 0.01 * H*H;
93 
94 const real_number FourH2 = 4.0 * H*H;
95 
96 // alpha in the formula
97 const real_number visco = 0.1;
98 
99 // cbar in the formula (calculated later)
100 real_number cbar = 0.0;
101 
102 // Mass of the fluid particles
103 const real_number MassFluid = 0.0000767656;
104 
105 // Mass of the boundary particles
106 const real_number MassBound = 0.0000767656;
107 
108 //
109 
110 // End simulation time
111 #ifdef TEST_RUN
112 const real_number t_end = 0.001;
113 #else
114 const real_number t_end = 1.5;
115 #endif
116 
117 // Gravity acceleration
118 const real_number gravity = 9.81;
119 
120 // Reference densitu 1000Kg/m^3
121 const real_number rho_zero = 1000.0;
122 
123 // Filled later require h_swl, it is b in the formulas
124 real_number B = 0.0;
125 
126 // Constant used to define time integration
127 const real_number CFLnumber = 0.2;
128 
129 // Minimum T
130 const real_number DtMin = 0.00001;
131 
132 // Minimum Rho allowed
133 const real_number RhoMin = 700.0;
134 
135 // Maximum Rho allowed
136 const real_number RhoMax = 1300.0;
137 
138 // Filled in initialization
139 real_number max_fluid_height = 0.0;
140 
141 // Properties
142 
143 // FLUID or BOUNDARY
144 const size_t type = 0;
145 
146 // Density
147 const int rho = 1;
148 
149 // Density at step n-1
150 const int rho_prev = 2;
151 
152 // Pressure
153 const int Pressure = 3;
154 
155 // Delta rho calculated in the force calculation
156 const int drho = 4;
157 
158 // calculated force
159 const int force = 5;
160 
161 // velocity
162 const int velocity = 6;
163 
164 // velocity at previous step
165 const int velocity_prev = 7;
166 
167 const int red = 8;
168 
169 const int red2 = 9;
170 
171 // Type of the vector containing particles
173 // | | | | | | | | | |
174 // | | | | | | | | | |
175 // type density density Pressure delta force velocity velocity reduction another
176 // at n-1 density at n - 1 buffer reduction buffer
177 
178 
179 struct ModelCustom
180 {
181  template<typename Decomposition, typename vector> inline void addComputation(Decomposition & dec,
182  vector & vd,
183  size_t v,
184  size_t p)
185  {
186  if (vd.template getProp<type>(p) == FLUID)
187  dec.addComputationCost(v,4);
188  else
189  dec.addComputationCost(v,3);
190  }
191 
192  template<typename Decomposition> inline void applyModel(Decomposition & dec, size_t v)
193  {
194  dec.setSubSubDomainComputationCost(v, dec.getSubSubDomainComputationCost(v) * dec.getSubSubDomainComputationCost(v));
195  }
196 
197  real_number distributionTol()
198  {
199  return 1.01;
200  }
201 };
202 
203 template<typename vd_type>
204 __global__ void EqState_gpu(vd_type vd, real_number B)
205 {
206  auto a = GET_PARTICLE(vd);
207 
208  real_number rho_a = vd.template getProp<rho>(a);
209  real_number rho_frac = rho_a / rho_zero;
210 
211  vd.template getProp<Pressure>(a) = B*( rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac - 1.0);
212 }
213 
214 inline void EqState(particles & vd)
215 {
216  auto it = vd.getDomainIteratorGPU();
217 
218  CUDA_LAUNCH(EqState_gpu,it,vd.toKernel(),B);
219 }
220 
221 
222 const real_number a2 = 1.0/M_PI/H/H/H;
223 
224 inline __device__ __host__ real_number Wab(real_number r)
225 {
226  r /= H;
227 
228  if (r < 1.0)
229  return (1.0 - 3.0/2.0*r*r + 3.0/4.0*r*r*r)*a2;
230  else if (r < 2.0)
231  return (1.0/4.0*(2.0 - r*r)*(2.0 - r*r)*(2.0 - r*r))*a2;
232  else
233  return 0.0;
234 }
235 
236 
237 const real_number c1 = -3.0/M_PI/H/H/H/H;
238 const real_number d1 = 9.0/4.0/M_PI/H/H/H/H;
239 const real_number c2 = -3.0/4.0/M_PI/H/H/H/H;
240 const real_number a2_4 = 0.25*a2;
241 // Filled later
242 real_number W_dap = 0.0;
243 
244 inline __device__ __host__ void DWab(Point<3,real_number> & dx, Point<3,real_number> & DW, real_number r)
245 {
246  const real_number qq=r/H;
247 
248  real_number qq2 = qq * qq;
249  real_number fac1 = (c1*qq + d1*qq2)/r;
250  real_number b1 = (qq < 1.0f)?1.0f:0.0f;
251 
252  real_number wqq = (2.0f - qq);
253  real_number fac2 = c2 * wqq * wqq / r;
254  real_number b2 = (qq >= 1.0f && qq < 2.0f)?1.0f:0.0f;
255 
256  real_number factor = (b1*fac1 + b2*fac2);
257 
258  DW.get(0) = factor * dx.get(0);
259  DW.get(1) = factor * dx.get(1);
260  DW.get(2) = factor * dx.get(2);
261 }
262 
263 // Tensile correction
264 inline __device__ __host__ real_number Tensile(real_number r, real_number rhoa, real_number rhob, real_number prs1, real_number prs2, real_number W_dap)
265 {
266  const real_number qq=r/H;
267  //-Cubic Spline kernel
268  real_number wab;
269  if(r>H)
270  {
271  real_number wqq1=2.0f-qq;
272  real_number wqq2=wqq1*wqq1;
273 
274  wab=a2_4*(wqq2*wqq1);
275  }
276  else
277  {
278  real_number wqq2=qq*qq;
279  real_number wqq3=wqq2*qq;
280 
281  wab=a2*(1.0f-1.5f*wqq2+0.75f*wqq3);
282  }
283 
284  //-Tensile correction.
285  real_number fab=wab*W_dap;
286  fab*=fab; fab*=fab; //fab=fab^4
287  const real_number tensilp1=(prs1/(rhoa*rhoa))*(prs1>0.0f? 0.01f: -0.2f);
288  const real_number tensilp2=(prs2/(rhob*rhob))*(prs2>0.0f? 0.01f: -0.2f);
289 
290  return (fab*(tensilp1+tensilp2));
291 }
292 
293 
294 inline __device__ __host__ real_number Pi(const Point<3,real_number> & dr, real_number rr2, Point<3,real_number> & dv, real_number rhoa, real_number rhob, real_number massb, real_number cbar, real_number & visc)
295 {
296  const real_number dot = dr.get(0)*dv.get(0) + dr.get(1)*dv.get(1) + dr.get(2)*dv.get(2);
297  const real_number dot_rr2 = dot/(rr2+Eta2);
298  visc=(dot_rr2 < visc)?visc:dot_rr2;
299 
300  if(dot < 0)
301  {
302  const float amubar=H*dot_rr2;
303  const float robar=(rhoa+rhob)*0.5f;
304  const float pi_visc=(-visco*cbar*amubar/robar);
305 
306  return pi_visc;
307  }
308  else
309  return 0.0f;
310 }
311 
312 template<typename particles_type, typename fluid_ids_type,typename NN_type>
313 __global__ void calc_forces_fluid_gpu(particles_type vd, fluid_ids_type fids, NN_type NN, real_number W_dap, real_number cbar)
314 {
315  // ... a
316  unsigned int a;
317 
318  GET_PARTICLE_BY_ID(a,fids);
319 
320  real_number max_visc = 0.0f;
321 
322  // Get the position xp of the particle
323  Point<3,real_number> xa = vd.getPos(a);
324 
325  // Type of the particle
326  unsigned int typea = vd.template getProp<type>(a);
327 
328  // Get the density of the of the particle a
329  real_number rhoa = vd.template getProp<rho>(a);
330 
331  // Get the pressure of the particle a
332  real_number Pa = vd.template getProp<Pressure>(a);
333 
334  // Get the Velocity of the particle a
335  Point<3,real_number> va = vd.template getProp<velocity>(a);
336 
337  Point<3,real_number> force_;
338  force_.get(0) = 0.0f;
339  force_.get(1) = 0.0f;
340  force_.get(2) = -gravity;
341  real_number drho_ = 0.0f;
342 
343  // Get an iterator over the neighborhood particles of p
344  auto Np = NN.getNNIteratorBox(NN.getCell(xa));
345 
346  // For each neighborhood particle
347  while (Np.isNext() == true)
348  {
349  // ... q
350  auto b = Np.get_sort();
351 
352  // Get the position xp of the particle
353  Point<3,real_number> xb = vd.getPos(b);
354 
355  // if (p == q) skip this particle this condition should be done in the r^2 = 0
356  //if (a == b) {++Np; continue;};
357 
358  unsigned int typeb = vd.template getProp<type>(b);
359 
360  real_number massb = (typeb == FLUID)?MassFluid:MassBound;
361  Point<3,real_number> vb = vd.template getProp<velocity>(b);
362  real_number Pb = vd.template getProp<Pressure>(b);
363  real_number rhob = vd.template getProp<rho>(b);
364 
365  // Get the distance between p and q
366  Point<3,real_number> dr = xa - xb;
367  Point<3,real_number> v_rel = va - vb;
368  // take the norm of this vector
369  real_number r2 = norm2(dr);
370 
371  // if they interact
372  if (r2 < FourH2 && r2 >= 1e-16)
373  {
374  real_number r = sqrtf(r2);
375 
377  DWab(dr,DW,r);
378 
379  real_number factor = - massb*((Pa + Pb) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb,W_dap) + Pi(dr,r2,v_rel,rhoa,rhob,massb,cbar,max_visc));
380 
381  // Bound - Bound does not produce any change
382  factor = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:factor;
383 
384  force_.get(0) += factor * DW.get(0);
385  force_.get(1) += factor * DW.get(1);
386  force_.get(2) += factor * DW.get(2);
387 
388  real_number scal = massb*(v_rel.get(0)*DW.get(0)+v_rel.get(1)*DW.get(1)+v_rel.get(2)*DW.get(2));
389  scal = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:scal;
390 
391  drho_ += scal;
392  }
393 
394  ++Np;
395  }
396 
397  vd.template getProp<red>(a) = max_visc;
398 
399  vd.template getProp<force>(a)[0] = force_.get(0);
400  vd.template getProp<force>(a)[1] = force_.get(1);
401  vd.template getProp<force>(a)[2] = force_.get(2);
402  vd.template getProp<drho>(a) = drho_;
403 }
404 
405 template<typename particles_type, typename fluid_ids_type,typename NN_type>
406 __global__ void calc_forces_border_gpu(particles_type vd, fluid_ids_type fbord, NN_type NN, real_number W_dap, real_number cbar)
407 {
408  // ... a
409  unsigned int a;
410 
411  GET_PARTICLE_BY_ID(a,fbord);
412 
413  real_number max_visc = 0.0f;
414 
415  // Get the position xp of the particle
416  Point<3,real_number> xa = vd.getPos(a);
417 
418  // Type of the particle
419  unsigned int typea = vd.template getProp<type>(a);
420 
421  // Get the Velocity of the particle a
422  Point<3,real_number> va = vd.template getProp<velocity>(a);
423 
424  real_number drho_ = 0.0f;
425 
426  // Get an iterator over the neighborhood particles of p
427  auto Np = NN.getNNIteratorBox(NN.getCell(xa));
428 
429  // For each neighborhood particle
430  while (Np.isNext() == true)
431  {
432  // ... q
433  auto b = Np.get_sort();
434 
435  // Get the position xp of the particle
436  Point<3,real_number> xb = vd.getPos(b);
437 
438  // if (p == q) skip this particle this condition should be done in the r^2 = 0
439  //if (a == b) {++Np; continue;};
440 
441  unsigned int typeb = vd.template getProp<type>(b);
442 
443  real_number massb = (typeb == FLUID)?MassFluid:MassBound;
444  Point<3,real_number> vb = vd.template getProp<velocity>(b);
445 
446  // Get the distance between p and q
447  Point<3,real_number> dr = xa - xb;
448  Point<3,real_number> v_rel = va - vb;
449  // take the norm of this vector
450  real_number r2 = norm2(dr);
451 
452  // if they interact
453  if (r2 < FourH2 && r2 >= 1e-16)
454  {
455  real_number r = sqrtf(r2);
456 
458  DWab(dr,DW,r);
459 
460  real_number scal = massb*(v_rel.get(0)*DW.get(0)+v_rel.get(1)*DW.get(1)+v_rel.get(2)*DW.get(2));
461  scal = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:scal;
462 
463  drho_ += scal;
464  }
465 
466  ++Np;
467  }
468 
469  vd.template getProp<red>(a) = max_visc;
470 
471  vd.template getProp<drho>(a) = drho_;
472 }
473 
474 struct type_is_fluid
475 {
476  __device__ static bool check(int c)
477  {
478  return c == FLUID;
479  }
480 };
481 
482 struct type_is_border
483 {
484  __device__ static bool check(int c)
485  {
486  return c == BOUNDARY;
487  }
488 };
489 
490 template<typename CellList> inline void calc_forces(particles & vd, CellList & NN, real_number & max_visc, size_t cnt, openfpm::vector_gpu<aggregate<int>> & fluid_ids, openfpm::vector_gpu<aggregate<int>> & border_ids)
491 {
492  // Update the cell-list
493  vd.updateCellList<type,rho,Pressure,velocity>(NN);
494 
496 
497  // get the particles fluid ids
498  get_indexes_by_type<type,type_is_fluid>(vd.getPropVectorSort(),fluid_ids,vd.size_local(),vd.getVC().getmgpuContext());
499 
500  // get the particles fluid ids
501  get_indexes_by_type<type,type_is_border>(vd.getPropVectorSort(),border_ids,vd.size_local(),vd.getVC().getmgpuContext());
502 
503  auto part = fluid_ids.getGPUIterator(96);
504  CUDA_LAUNCH(calc_forces_fluid_gpu,part,vd.toKernel_sorted(),fluid_ids.toKernel(),NN.toKernel(),W_dap,cbar);
505 
506  part = border_ids.getGPUIterator(96);
507  CUDA_LAUNCH(calc_forces_border_gpu,part,vd.toKernel_sorted(),border_ids.toKernel(),NN.toKernel(),W_dap,cbar);
508 
510 
511  vd.merge_sort<force,drho,red>(NN);
512 
513  max_visc = reduce_local<red,_max_>(vd);
514 }
515 
516 template<typename vector_type>
517 __global__ void max_acceleration_and_velocity_gpu(vector_type vd)
518 {
519  auto a = GET_PARTICLE(vd);
520 
521  Point<3,real_number> acc(vd.template getProp<force>(a));
522  vd.template getProp<red>(a) = norm(acc);
523 
524  Point<3,real_number> vel(vd.template getProp<velocity>(a));
525  vd.template getProp<red2>(a) = norm(vel);
526 }
527 
528 void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_number & max_vel)
529 {
530  // Calculate the maximum acceleration
531  auto part = vd.getDomainIteratorGPU();
532 
533  CUDA_LAUNCH(max_acceleration_and_velocity_gpu,part,vd.toKernel());
534 
535  max_acc = reduce_local<red,_max_>(vd);
536  max_vel = reduce_local<red2,_max_>(vd);
537 
538  Vcluster<> & v_cl = create_vcluster();
539  v_cl.max(max_acc);
540  v_cl.max(max_vel);
541  v_cl.execute();
542 }
543 
544 
545 real_number calc_deltaT(particles & vd, real_number ViscDtMax)
546 {
547  real_number Maxacc = 0.0;
548  real_number Maxvel = 0.0;
549  max_acceleration_and_velocity(vd,Maxacc,Maxvel);
550 
551  //-dt1 depends on force per unit mass.
552  const real_number dt_f = (Maxacc)?sqrt(H/Maxacc):std::numeric_limits<float>::max();
553 
554  //-dt2 combines the Courant and the viscous time-step controls.
555  const real_number dt_cv = H/(std::max(cbar,Maxvel*10.f) + H*ViscDtMax);
556 
557  //-dt new value of time step.
558  real_number dt=real_number(CFLnumber)*std::min(dt_f,dt_cv);
559  if(dt<real_number(DtMin))
560  {dt=real_number(DtMin);}
561 
562  return dt;
563 }
564 
565 template<typename vector_dist_type>
566 __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number dt2, real_number dt205)
567 {
568  // ... a
569  auto a = GET_PARTICLE(vd);
570 
571  // if the particle is boundary
572  if (vd.template getProp<type>(a) == BOUNDARY)
573  {
574  // Update rho
575  real_number rhop = vd.template getProp<rho>(a);
576 
577  // Update only the density
578  vd.template getProp<velocity>(a)[0] = 0.0;
579  vd.template getProp<velocity>(a)[1] = 0.0;
580  vd.template getProp<velocity>(a)[2] = 0.0;
581  real_number rhonew = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
582  vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
583 
584  vd.template getProp<rho_prev>(a) = rhop;
585 
586  vd.template getProp<red>(a) = 0;
587 
588  return;
589  }
590 
591  //-Calculate displacement and update position
592  real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
593  real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
594  real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
595 
596  vd.getPos(a)[0] += dx;
597  vd.getPos(a)[1] += dy;
598  vd.getPos(a)[2] += dz;
599 
600  real_number velX = vd.template getProp<velocity>(a)[0];
601  real_number velY = vd.template getProp<velocity>(a)[1];
602  real_number velZ = vd.template getProp<velocity>(a)[2];
603 
604  real_number rhop = vd.template getProp<rho>(a);
605 
606  vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity_prev>(a)[0] + vd.template getProp<force>(a)[0]*dt2;
607  vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity_prev>(a)[1] + vd.template getProp<force>(a)[1]*dt2;
608  vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity_prev>(a)[2] + vd.template getProp<force>(a)[2]*dt2;
609  vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
610 
611  // Check if the particle go out of range in space and in density
612  if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
613  vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
614  vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
615  {
616  vd.template getProp<red>(a) = 1;
617  }
618  else
619  {
620  vd.template getProp<red>(a) = 0;
621  }
622 
623 
624  vd.template getProp<velocity_prev>(a)[0] = velX;
625  vd.template getProp<velocity_prev>(a)[1] = velY;
626  vd.template getProp<velocity_prev>(a)[2] = velZ;
627  vd.template getProp<rho_prev>(a) = rhop;
628 }
629 
630 size_t cnt = 0;
631 
632 void verlet_int(particles & vd, real_number dt)
633 {
634  // particle iterator
635  auto part = vd.getDomainIteratorGPU();
636 
637  real_number dt205 = dt*dt*0.5;
638  real_number dt2 = dt*2.0;
639 
640  CUDA_LAUNCH(verlet_int_gpu,part,vd.toKernel(),dt,dt2,dt205);
641 
642  // remove the particles marked
643  remove_marked<red>(vd);
644 
645  // increment the iteration counter
646  cnt++;
647 }
648 
649 template<typename vector_type>
650 __global__ void euler_int_gpu(vector_type vd,real_number dt, real_number dt205)
651 {
652  // ... a
653  auto a = GET_PARTICLE(vd);
654 
655  // if the particle is boundary
656  if (vd.template getProp<type>(a) == BOUNDARY)
657  {
658  // Update rho
659  real_number rhop = vd.template getProp<rho>(a);
660 
661  // Update only the density
662  vd.template getProp<velocity>(a)[0] = 0.0;
663  vd.template getProp<velocity>(a)[1] = 0.0;
664  vd.template getProp<velocity>(a)[2] = 0.0;
665  real_number rhonew = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
666  vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
667 
668  vd.template getProp<rho_prev>(a) = rhop;
669 
670  vd.template getProp<red>(a) = 0;
671 
672  return;
673  }
674 
675  //-Calculate displacement and update position / Calcula desplazamiento y actualiza posicion.
676  real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
677  real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
678  real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
679 
680  vd.getPos(a)[0] += dx;
681  vd.getPos(a)[1] += dy;
682  vd.getPos(a)[2] += dz;
683 
684  real_number velX = vd.template getProp<velocity>(a)[0];
685  real_number velY = vd.template getProp<velocity>(a)[1];
686  real_number velZ = vd.template getProp<velocity>(a)[2];
687  real_number rhop = vd.template getProp<rho>(a);
688 
689  vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity>(a)[0] + vd.template getProp<force>(a)[0]*dt;
690  vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity>(a)[1] + vd.template getProp<force>(a)[1]*dt;
691  vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity>(a)[2] + vd.template getProp<force>(a)[2]*dt;
692  vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
693 
694  // Check if the particle go out of range in space and in density
695  if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
696  vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
697  vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
698  {vd.template getProp<red>(a) = 1;}
699  else
700  {vd.template getProp<red>(a) = 0;}
701 
702  vd.template getProp<velocity_prev>(a)[0] = velX;
703  vd.template getProp<velocity_prev>(a)[1] = velY;
704  vd.template getProp<velocity_prev>(a)[2] = velZ;
705  vd.template getProp<rho_prev>(a) = rhop;
706 }
707 
708 void euler_int(particles & vd, real_number dt)
709 {
710 
711  // particle iterator
712  auto part = vd.getDomainIteratorGPU();
713 
714  real_number dt205 = dt*dt*0.5;
715 
716  CUDA_LAUNCH(euler_int_gpu,part,vd.toKernel(),dt,dt205);
717 
718  // remove the particles
719  remove_marked<red>(vd);
720 
721  cnt++;
722 }
723 
724 template<typename vector_type, typename NN_type>
725 __global__ void sensor_pressure_gpu(vector_type vd, NN_type NN, Point<3,real_number> probe, real_number * press_tmp)
726 {
727  real_number tot_ker = 0.0;
728 
729  // Get the position of the probe i
730  Point<3,real_number> xp = probe;
731 
732  // get the iterator over the neighbohood particles of the probes position
733  auto itg = NN.getNNIteratorBox(NN.getCell(xp));
734  while (itg.isNext())
735  {
736  auto q = itg.get_sort();
737 
738  // Only the fluid particles are importants
739  if (vd.template getProp<type>(q) != FLUID)
740  {
741  ++itg;
742  continue;
743  }
744 
745  // Get the position of the neighborhood particle q
746  Point<3,real_number> xq = vd.getPos(q);
747 
748  // Calculate the contribution of the particle to the pressure
749  // of the probe
750  real_number r = sqrt(norm2(xp - xq));
751 
752  real_number ker = Wab(r) * (MassFluid / rho_zero);
753 
754  // Also keep track of the calculation of the summed
755  // kernel
756  tot_ker += ker;
757 
758  // Add the total pressure contribution
759  *press_tmp += vd.template getProp<Pressure>(q) * ker;
760 
761  // next neighborhood particle
762  ++itg;
763  }
764 
765  // We calculate the pressure normalizing the
766  // sum over all kernels
767  if (tot_ker == 0.0)
768  {*press_tmp = 0.0;}
769  else
770  {*press_tmp = 1.0 / tot_ker * *press_tmp;}
771 }
772 
773 template<typename Vector, typename CellList>
774 inline void sensor_pressure(Vector & vd,
775  CellList & NN,
778 {
779  Vcluster<> & v_cl = create_vcluster();
780 
781  press_t.add();
782 
783  for (size_t i = 0 ; i < probes.size() ; i++)
784  {
785  // A float variable to calculate the pressure of the problem
786  CudaMemory press_tmp_(sizeof(real_number));
787  real_number press_tmp;
788 
789  // if the probe is inside the processor domain
790  if (vd.getDecomposition().isLocal(probes.get(i)) == true)
791  {
792  CUDA_LAUNCH_DIM3(sensor_pressure_gpu,1,1,vd.toKernel_sorted(),NN.toKernel(),probes.get(i),(real_number *)press_tmp_.toKernel());
793 
794  vd.merge<Pressure>(NN);
795 
796  // move calculated pressure on
797  press_tmp_.deviceToHost();
798  press_tmp = *(real_number *)press_tmp_.getPointer();
799  }
800 
801  // This is not necessary in principle, but if you
802  // want to make all processor aware of the history of the calculated
803  // pressure we have to execute this
804  v_cl.sum(press_tmp);
805  v_cl.execute();
806 
807  // We add the calculated pressure into the history
808  press_t.last().add(press_tmp);
809  }
810 }
811 
812 int main(int argc, char* argv[])
813 {
814  // initialize the library
815  openfpm_init(&argc,&argv);
816 
819 
820 #ifdef CUDIFY_USE_CUDA
821  cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
822 #endif
823 
824  // It contain for each time-step the value detected by the probes
827 
828  probes.add({0.8779,0.3,0.02});
829  probes.add({0.754,0.31,0.02});
830 
831  // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
832  Box<3,real_number> domain({-0.05,-0.05,-0.05},{1.7010,0.7065,0.511});
833  size_t sz[3] = {413,179,133};
834 
835  // Fill W_dap
836  W_dap = 1.0/Wab(H/1.5);
837 
838  // Here we define the boundary conditions of our problem
839  size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
840 
841  // extended boundary around the domain, and the processor domain
842  Ghost<3,real_number> g(2*H);
843 
844  particles vd(0,domain,bc,g,DEC_GRAN(128));
845 
847 
848  // You can ignore all these dp/2.0 is a trick to reach the same initialization
849  // of Dual-SPH that use a different criteria to draw particles
850  Box<3,real_number> fluid_box({dp/2.0,dp/2.0,dp/2.0},{0.4+dp/2.0,0.67-dp/2.0,0.3+dp/2.0});
851 
852  // return an iterator to the fluid particles to add to vd
853  auto fluid_it = DrawParticles::DrawBox(vd,sz,domain,fluid_box);
854 
855  // here we fill some of the constants needed by the simulation
856  max_fluid_height = fluid_it.getBoxMargins().getHigh(2);
857  h_swl = fluid_it.getBoxMargins().getHigh(2) - fluid_it.getBoxMargins().getLow(2);
858  B = (coeff_sound)*(coeff_sound)*gravity*h_swl*rho_zero / gamma_;
859  cbar = coeff_sound * sqrt(gravity * h_swl);
860 
861  // for each particle inside the fluid box ...
862  while (fluid_it.isNext())
863  {
864  // ... add a particle ...
865  vd.add();
866 
867  // ... and set it position ...
868  vd.getLastPos()[0] = fluid_it.get().get(0);
869  vd.getLastPos()[1] = fluid_it.get().get(1);
870  vd.getLastPos()[2] = fluid_it.get().get(2);
871 
872  // and its type.
873  vd.template getLastProp<type>() = FLUID;
874 
875  // We also initialize the density of the particle and the hydro-static pressure given by
876  //
877  // rho_zero*g*h = P
878  //
879  // rho_p = (P/B + 1)^(1/Gamma) * rho_zero
880  //
881 
882  vd.template getLastProp<Pressure>() = rho_zero * gravity * (max_fluid_height - fluid_it.get().get(2));
883 
884  vd.template getLastProp<rho>() = pow(vd.template getLastProp<Pressure>() / B + 1, 1.0/gamma_) * rho_zero;
885  vd.template getLastProp<rho_prev>() = vd.template getLastProp<rho>();
886  vd.template getLastProp<velocity>()[0] = 0.0;
887  vd.template getLastProp<velocity>()[1] = 0.0;
888  vd.template getLastProp<velocity>()[2] = 0.0;
889 
890  vd.template getLastProp<velocity_prev>()[0] = 0.0;
891  vd.template getLastProp<velocity_prev>()[1] = 0.0;
892  vd.template getLastProp<velocity_prev>()[2] = 0.0;
893 
894  // next fluid particle
895  ++fluid_it;
896  }
897 
898  // Recipient
899  Box<3,real_number> recipient1({0.0,0.0,0.0},{1.6+dp/2.0,0.67+dp/2.0,0.4+dp/2.0});
900  Box<3,real_number> recipient2({dp,dp,dp},{1.6-dp/2.0,0.67-dp/2.0,0.4+dp/2.0});
901 
902  Box<3,real_number> obstacle1({0.9,0.24-dp/2.0,0.0},{1.02+dp/2.0,0.36,0.45+dp/2.0});
903  Box<3,real_number> obstacle2({0.9+dp,0.24+dp/2.0,0.0},{1.02-dp/2.0,0.36-dp,0.45-dp/2.0});
904  Box<3,real_number> obstacle3({0.9+dp,0.24,0.0},{1.02,0.36,0.45});
905 
907  holes.add(recipient2);
908  holes.add(obstacle1);
909  auto bound_box = DrawParticles::DrawSkin(vd,sz,domain,holes,recipient1);
910 
911  while (bound_box.isNext())
912  {
913  vd.add();
914 
915  vd.getLastPos()[0] = bound_box.get().get(0);
916  vd.getLastPos()[1] = bound_box.get().get(1);
917  vd.getLastPos()[2] = bound_box.get().get(2);
918 
919  vd.template getLastProp<type>() = BOUNDARY;
920  vd.template getLastProp<rho>() = rho_zero;
921  vd.template getLastProp<rho_prev>() = rho_zero;
922  vd.template getLastProp<velocity>()[0] = 0.0;
923  vd.template getLastProp<velocity>()[1] = 0.0;
924  vd.template getLastProp<velocity>()[2] = 0.0;
925 
926  vd.template getLastProp<velocity_prev>()[0] = 0.0;
927  vd.template getLastProp<velocity_prev>()[1] = 0.0;
928  vd.template getLastProp<velocity_prev>()[2] = 0.0;
929 
930  ++bound_box;
931  }
932 
933  auto obstacle_box = DrawParticles::DrawSkin(vd,sz,domain,obstacle2,obstacle1);
934 
935  while (obstacle_box.isNext())
936  {
937  vd.add();
938 
939  vd.getLastPos()[0] = obstacle_box.get().get(0);
940  vd.getLastPos()[1] = obstacle_box.get().get(1);
941  vd.getLastPos()[2] = obstacle_box.get().get(2);
942 
943  vd.template getLastProp<type>() = BOUNDARY;
944  vd.template getLastProp<rho>() = rho_zero;
945  vd.template getLastProp<rho_prev>() = rho_zero;
946  vd.template getLastProp<velocity>()[0] = 0.0;
947  vd.template getLastProp<velocity>()[1] = 0.0;
948  vd.template getLastProp<velocity>()[2] = 0.0;
949 
950  vd.template getLastProp<velocity_prev>()[0] = 0.0;
951  vd.template getLastProp<velocity_prev>()[1] = 0.0;
952  vd.template getLastProp<velocity_prev>()[2] = 0.0;
953 
954  ++obstacle_box;
955  }
956 
957  vd.map();
958 
959  // Now that we fill the vector with particles
960  ModelCustom md;
961 
962  vd.addComputationCosts(md);
963  vd.getDecomposition().decompose();
964  vd.map();
965 
967 
968  // Ok the initialization is done on CPU on GPU we are doing the main loop, so first we offload all properties on GPU
969 
970  vd.hostToDevicePos();
971  vd.template hostToDeviceProp<type,rho,rho_prev,Pressure,velocity>();
972 
973 
974  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
975 
976  auto NN = vd.getCellListGPU/*<CELLLIST_GPU_SPARSE<3,float>>*/(2*H / 2.0);
977  NN.setBoxNN(2);
978 
979  timer tot_sim;
980  tot_sim.start();
981 
982  size_t write = 0;
983  size_t it = 0;
984  size_t it_reb = 0;
985  real_number t = 0.0;
986  while (t <= t_end)
987  {
988  Vcluster<> & v_cl = create_vcluster();
989  timer it_time;
990  it_time.start();
991 
993  it_reb++;
994  if (it_reb == 300)
995  {
996  vd.map(RUN_ON_DEVICE);
997 
998  // Rebalancer for now work on CPU , so move to CPU
999  vd.deviceToHostPos();
1000  vd.template deviceToHostProp<type>();
1001 
1002  it_reb = 0;
1003  ModelCustom md;
1004  vd.addComputationCosts(md);
1005  vd.getDecomposition().decompose();
1006 
1007  if (v_cl.getProcessUnitID() == 0)
1008  {std::cout << "REBALANCED " << it_reb << std::endl;}
1009  }
1010 
1011  vd.map(RUN_ON_DEVICE);
1012 
1013  // Calculate pressure from the density
1014  EqState(vd);
1015 
1016  real_number max_visc = 0.0;
1017 
1018  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
1019 
1020 
1021  // Calc forces
1022  calc_forces(vd,NN,max_visc,cnt,fluid_ids,border_ids);
1023 
1024  // Get the maximum viscosity term across processors
1025  v_cl.max(max_visc);
1026  v_cl.execute();
1027 
1028  // Calculate delta t integration
1029  real_number dt = calc_deltaT(vd,max_visc);
1030 
1031  // VerletStep or euler step
1032  it++;
1033  if (it < 40)
1034  verlet_int(vd,dt);
1035  else
1036  {
1037  euler_int(vd,dt);
1038  it = 0;
1039  }
1040 
1041  t += dt;
1042 
1043  if (write < t*10)
1044  {
1045  // Sensor pressure require update ghost, so we ensure that particles are distributed correctly
1046  // and ghost are updated
1047  vd.map(RUN_ON_DEVICE);
1048  vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
1049  vd.updateCellList(NN);
1050 
1051  // calculate the pressure at the sensor points
1052  //sensor_pressure(vd,NN,press_t,probes);
1053 
1054  std::cout << "OUTPUT " << dt << std::endl;
1055 
1056  // When we write we have move all the particles information back to CPU
1057 
1058  vd.deviceToHostPos();
1059  vd.deviceToHostProp<type,rho,rho_prev,Pressure,drho,force,velocity,velocity_prev,red,red2>();
1060 
1061  // We copy on another vector with less properties to reduce the size of the output
1063 
1064  auto ito = vd.getDomainIterator();
1065 
1066  while(ito.isNext())
1067  {
1068  auto p = ito.get();
1069 
1070  vd_out.add();
1071 
1072  vd_out.getLastPos()[0] = vd.getPos(p)[0];
1073  vd_out.getLastPos()[1] = vd.getPos(p)[1];
1074  vd_out.getLastPos()[2] = vd.getPos(p)[2];
1075 
1076  vd_out.template getLastProp<0>() = vd.template getProp<type>(p);
1077 
1078  vd_out.template getLastProp<1>()[0] = vd.template getProp<velocity>(p)[0];
1079  vd_out.template getLastProp<1>()[1] = vd.template getProp<velocity>(p)[1];
1080  vd_out.template getLastProp<1>()[2] = vd.template getProp<velocity>(p)[2];
1081 
1082  ++ito;
1083  }
1084 
1085  vd_out.write_frame("Particles",write,VTK_WRITER | FORMAT_BINARY);
1086  write++;
1087 
1088  if (v_cl.getProcessUnitID() == 0)
1089  {std::cout << "TIME: " << t << " write " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vd.size_local() << std::endl;}
1090  }
1091  else
1092  {
1093  if (v_cl.getProcessUnitID() == 0)
1094  {std::cout << "TIME: " << t << " " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vd.size_local() << std::endl;}
1095  }
1096  }
1097 
1098  tot_sim.stop();
1099  std::cout << "Time to complete: " << tot_sim.getwct() << " seconds" << std::endl;
1100 
1101 
1102  openfpm_finalize();
1103 }
1104 
1105 #else
1106 
1107 int main(int argc, char* argv[])
1108 {
1109  return 0;
1110 }
1111 
1112 #endif
size_t getProcessUnitID()
Get the process unit id.
Sparse Matrix implementation stub object when OpenFPM is compiled with no linear algebra support.
Definition: Vector.hpp:39
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
Definition: Box.hpp:556
auto getPos(vect_dist_key_dx vec_key) -> decltype(v_pos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
static PointIteratorSkin< dim, T, typename vd_type::Decomposition_type > DrawSkin(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub_A, Box< dim, T > &sub_B)
Draw particles in a box B excluding the area of a second box A (B - A)
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:27
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
Definition: Ghost.hpp:39
Implementation of VCluster class.
Definition: VCluster.hpp:58
void execute()
Execute all the requests.
This class define the domain decomposition interface.
const vector_dist_prop & getPropVectorSort() const
return the property vector of all the particles
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
size_t size_local() const
return the local size of the vector
Vcluster< Memory > & getVC()
Get the Virtual Cluster machine.
void start()
Start the timer.
Definition: timer.hpp:90
This class represent an N-dimensional box.
Definition: Box.hpp:60
void sum(T &num)
Sum the numbers across all processors and get the result.
mgpu::ofp_context_t & getmgpuContext(bool iw=true)
If nvidia cuda is activated return a mgpu context.
void updateCellList(CellL &cell_list, bool no_se3=false, cl_construct_opt opt=cl_construct_opt::Full)
Update a cell list using the stored particles.
static PointIterator< dim, T, typename vd_type::Decomposition_type > DrawBox(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub)
Draw particles in a box.
Distributed vector.
void max(T &num)
Get the maximum number across all processors (or reduction with infinity norm)
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:214
temporal buffer for reductions
Model for Dynamic load balancing.
Definition: main.cpp:221
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202
Class for FAST cell list implementation.
Definition: CellList.hpp:356
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Definition: Box.hpp:567
auto get(size_t cell, size_t ele) -> decltype(this->Mem_type::get(cell, ele))
Get an element in the cell.
Definition: CellList.hpp:867
Class for cpu time benchmarking.
Definition: timer.hpp:27
void stop()
Stop the timer.
Definition: timer.hpp:119