OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
main.cu
1
51#ifdef __NVCC__
52
53#define PRINT_STACKTRACE
54#define STOP_ON_ERROR
55#define OPENMPI
56#define SCAN_WITH_CUB
57#define SORT_WITH_CUB
58//#define SE_CLASS1
59
60//#define USE_LOW_REGISTER_ITERATOR
61
62#include "Vector/vector_dist.hpp"
63#include <math.h>
64#include "Draw/DrawParticles.hpp"
65
66
67
68typedef float real_number;
69
70// A constant to indicate boundary particles
71#define BOUNDARY 0
72
73// A constant to indicate fluid particles
74#define FLUID 1
75
76// initial spacing between particles dp in the formulas
77const real_number dp = 0.00425;
78// Maximum height of the fluid water
79// is going to be calculated and filled later on
80real_number h_swl = 0.0;
81
82// c_s in the formulas (constant used to calculate the sound speed)
83const real_number coeff_sound = 20.0;
84
85// gamma in the formulas
86const real_number gamma_ = 7.0;
87
88// sqrt(3.0*dp*dp) support of the kernel
89const real_number H = 0.00736121593217;
90
91// Eta in the formulas
92const real_number Eta2 = 0.01 * H*H;
93
94const real_number FourH2 = 4.0 * H*H;
95
96// alpha in the formula
97const real_number visco = 0.1;
98
99// cbar in the formula (calculated later)
100real_number cbar = 0.0;
101
102// Mass of the fluid particles
103const real_number MassFluid = 0.0000767656;
104
105// Mass of the boundary particles
106const real_number MassBound = 0.0000767656;
107
108//
109
110// End simulation time
111#ifdef TEST_RUN
112const real_number t_end = 0.001;
113#else
114const real_number t_end = 1.5;
115#endif
116
117// Gravity acceleration
118const real_number gravity = 9.81;
119
120// Reference densitu 1000Kg/m^3
121const real_number rho_zero = 1000.0;
122
123// Filled later require h_swl, it is b in the formulas
124real_number B = 0.0;
125
126// Constant used to define time integration
127const real_number CFLnumber = 0.2;
128
129// Minimum T
130const real_number DtMin = 0.00001;
131
132// Minimum Rho allowed
133const real_number RhoMin = 700.0;
134
135// Maximum Rho allowed
136const real_number RhoMax = 1300.0;
137
138// Filled in initialization
139real_number max_fluid_height = 0.0;
140
141// Properties
142
143// FLUID or BOUNDARY
144const size_t type = 0;
145
146// Density
147const int rho = 1;
148
149// Density at step n-1
150const int rho_prev = 2;
151
152// Pressure
153const int Pressure = 3;
154
155// Delta rho calculated in the force calculation
156const int drho = 4;
157
158// calculated force
159const int force = 5;
160
161// velocity
162const int velocity = 6;
163
164// velocity at previous step
165const int velocity_prev = 7;
166
167const int red = 8;
168
169const int red2 = 9;
170
171// Type of the vector containing particles
173// | | | | | | | | | |
174// | | | | | | | | | |
175// type density density Pressure delta force velocity velocity reduction another
176// at n-1 density at n - 1 buffer reduction buffer
177
178
179struct ModelCustom
180{
181 template<typename Decomposition, typename vector> inline void addComputation(Decomposition & dec,
182 vector & vd,
183 size_t v,
184 size_t p)
185 {
186 if (vd.template getProp<type>(p) == FLUID)
187 dec.addComputationCost(v,4);
188 else
189 dec.addComputationCost(v,3);
190 }
191
192 template<typename Decomposition> inline void applyModel(Decomposition & dec, size_t v)
193 {
194 dec.setSubSubDomainComputationCost(v, dec.getSubSubDomainComputationCost(v) * dec.getSubSubDomainComputationCost(v));
195 }
196
197 real_number distributionTol()
198 {
199 return 1.01;
200 }
201};
202
203template<typename vd_type>
204__global__ void EqState_gpu(vd_type vd, real_number B)
205{
206 auto a = GET_PARTICLE(vd);
207
208 real_number rho_a = vd.template getProp<rho>(a);
209 real_number rho_frac = rho_a / rho_zero;
210
211 vd.template getProp<Pressure>(a) = B*( rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac - 1.0);
212}
213
214inline void EqState(particles & vd)
215{
216 auto it = vd.getDomainIteratorGPU();
217
218 CUDA_LAUNCH(EqState_gpu,it,vd.toKernel(),B);
219}
220
221
222const real_number a2 = 1.0/M_PI/H/H/H;
223
224inline __device__ __host__ real_number Wab(real_number r)
225{
226 r /= H;
227
228 if (r < 1.0)
229 return (1.0 - 3.0/2.0*r*r + 3.0/4.0*r*r*r)*a2;
230 else if (r < 2.0)
231 return (1.0/4.0*(2.0 - r*r)*(2.0 - r*r)*(2.0 - r*r))*a2;
232 else
233 return 0.0;
234}
235
236
237const real_number c1 = -3.0/M_PI/H/H/H/H;
238const real_number d1 = 9.0/4.0/M_PI/H/H/H/H;
239const real_number c2 = -3.0/4.0/M_PI/H/H/H/H;
240const real_number a2_4 = 0.25*a2;
241// Filled later
242real_number W_dap = 0.0;
243
244inline __device__ __host__ void DWab(Point<3,real_number> & dx, Point<3,real_number> & DW, real_number r)
245{
246 const real_number qq=r/H;
247
248 real_number qq2 = qq * qq;
249 real_number fac1 = (c1*qq + d1*qq2)/r;
250 real_number b1 = (qq < 1.0f)?1.0f:0.0f;
251
252 real_number wqq = (2.0f - qq);
253 real_number fac2 = c2 * wqq * wqq / r;
254 real_number b2 = (qq >= 1.0f && qq < 2.0f)?1.0f:0.0f;
255
256 real_number factor = (b1*fac1 + b2*fac2);
257
258 DW.get(0) = factor * dx.get(0);
259 DW.get(1) = factor * dx.get(1);
260 DW.get(2) = factor * dx.get(2);
261}
262
263// Tensile correction
264inline __device__ __host__ real_number Tensile(real_number r, real_number rhoa, real_number rhob, real_number prs1, real_number prs2, real_number W_dap)
265{
266 const real_number qq=r/H;
267 //-Cubic Spline kernel
268 real_number wab;
269 if(r>H)
270 {
271 real_number wqq1=2.0f-qq;
272 real_number wqq2=wqq1*wqq1;
273
274 wab=a2_4*(wqq2*wqq1);
275 }
276 else
277 {
278 real_number wqq2=qq*qq;
279 real_number wqq3=wqq2*qq;
280
281 wab=a2*(1.0f-1.5f*wqq2+0.75f*wqq3);
282 }
283
284 //-Tensile correction.
285 real_number fab=wab*W_dap;
286 fab*=fab; fab*=fab; //fab=fab^4
287 const real_number tensilp1=(prs1/(rhoa*rhoa))*(prs1>0.0f? 0.01f: -0.2f);
288 const real_number tensilp2=(prs2/(rhob*rhob))*(prs2>0.0f? 0.01f: -0.2f);
289
290 return (fab*(tensilp1+tensilp2));
291}
292
293
294inline __device__ __host__ real_number Pi(const Point<3,real_number> & dr, real_number rr2, Point<3,real_number> & dv, real_number rhoa, real_number rhob, real_number massb, real_number cbar, real_number & visc)
295{
296 const real_number dot = dr.get(0)*dv.get(0) + dr.get(1)*dv.get(1) + dr.get(2)*dv.get(2);
297 const real_number dot_rr2 = dot/(rr2+Eta2);
298 visc=(dot_rr2 < visc)?visc:dot_rr2;
299
300 if(dot < 0)
301 {
302 const float amubar=H*dot_rr2;
303 const float robar=(rhoa+rhob)*0.5f;
304 const float pi_visc=(-visco*cbar*amubar/robar);
305
306 return pi_visc;
307 }
308 else
309 return 0.0f;
310}
311
312template<typename particles_type, typename fluid_ids_type,typename NN_type>
313__global__ void calc_forces_fluid_gpu(particles_type vd, fluid_ids_type fids, NN_type NN, real_number W_dap, real_number cbar)
314{
315 // ... a
316 unsigned int a;
317
318 GET_PARTICLE_BY_ID(a,fids);
319
320 real_number max_visc = 0.0f;
321
322 // Get the position xp of the particle
323 Point<3,real_number> xa = vd.getPos(a);
324
325 // Type of the particle
326 unsigned int typea = vd.template getProp<type>(a);
327
328 // Get the density of the of the particle a
329 real_number rhoa = vd.template getProp<rho>(a);
330
331 // Get the pressure of the particle a
332 real_number Pa = vd.template getProp<Pressure>(a);
333
334 // Get the Velocity of the particle a
335 Point<3,real_number> va = vd.template getProp<velocity>(a);
336
338 force_.get(0) = 0.0f;
339 force_.get(1) = 0.0f;
340 force_.get(2) = -gravity;
341 real_number drho_ = 0.0f;
342
343 // Get an iterator over the neighborhood particles of p
344 auto Np = NN.getNNIteratorBox(NN.getCell(xa));
345
346 // For each neighborhood particle
347 while (Np.isNext() == true)
348 {
349 // ... q
350 auto b = Np.get_sort();
351
352 // Get the position xp of the particle
353 Point<3,real_number> xb = vd.getPos(b);
354
355 // if (p == q) skip this particle this condition should be done in the r^2 = 0
356 //if (a == b) {++Np; continue;};
357
358 unsigned int typeb = vd.template getProp<type>(b);
359
360 real_number massb = (typeb == FLUID)?MassFluid:MassBound;
361 Point<3,real_number> vb = vd.template getProp<velocity>(b);
362 real_number Pb = vd.template getProp<Pressure>(b);
363 real_number rhob = vd.template getProp<rho>(b);
364
365 // Get the distance between p and q
366 Point<3,real_number> dr = xa - xb;
367 Point<3,real_number> v_rel = va - vb;
368 // take the norm of this vector
369 real_number r2 = norm2(dr);
370
371 // if they interact
372 if (r2 < FourH2 && r2 >= 1e-16)
373 {
374 real_number r = sqrtf(r2);
375
377 DWab(dr,DW,r);
378
379 real_number factor = - massb*((Pa + Pb) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb,W_dap) + Pi(dr,r2,v_rel,rhoa,rhob,massb,cbar,max_visc));
380
381 // Bound - Bound does not produce any change
382 factor = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:factor;
383
384 force_.get(0) += factor * DW.get(0);
385 force_.get(1) += factor * DW.get(1);
386 force_.get(2) += factor * DW.get(2);
387
388 real_number scal = massb*(v_rel.get(0)*DW.get(0)+v_rel.get(1)*DW.get(1)+v_rel.get(2)*DW.get(2));
389 scal = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:scal;
390
391 drho_ += scal;
392 }
393
394 ++Np;
395 }
396
397 vd.template getProp<red>(a) = max_visc;
398
399 vd.template getProp<force>(a)[0] = force_.get(0);
400 vd.template getProp<force>(a)[1] = force_.get(1);
401 vd.template getProp<force>(a)[2] = force_.get(2);
402 vd.template getProp<drho>(a) = drho_;
403}
404
405template<typename particles_type, typename fluid_ids_type,typename NN_type>
406__global__ void calc_forces_border_gpu(particles_type vd, fluid_ids_type fbord, NN_type NN, real_number W_dap, real_number cbar)
407{
408 // ... a
409 unsigned int a;
410
411 GET_PARTICLE_BY_ID(a,fbord);
412
413 real_number max_visc = 0.0f;
414
415 // Get the position xp of the particle
416 Point<3,real_number> xa = vd.getPos(a);
417
418 // Type of the particle
419 unsigned int typea = vd.template getProp<type>(a);
420
421 // Get the Velocity of the particle a
422 Point<3,real_number> va = vd.template getProp<velocity>(a);
423
424 real_number drho_ = 0.0f;
425
426 // Get an iterator over the neighborhood particles of p
427 auto Np = NN.getNNIteratorBox(NN.getCell(xa));
428
429 // For each neighborhood particle
430 while (Np.isNext() == true)
431 {
432 // ... q
433 auto b = Np.get_sort();
434
435 // Get the position xp of the particle
436 Point<3,real_number> xb = vd.getPos(b);
437
438 // if (p == q) skip this particle this condition should be done in the r^2 = 0
439 //if (a == b) {++Np; continue;};
440
441 unsigned int typeb = vd.template getProp<type>(b);
442
443 real_number massb = (typeb == FLUID)?MassFluid:MassBound;
444 Point<3,real_number> vb = vd.template getProp<velocity>(b);
445
446 // Get the distance between p and q
447 Point<3,real_number> dr = xa - xb;
448 Point<3,real_number> v_rel = va - vb;
449 // take the norm of this vector
450 real_number r2 = norm2(dr);
451
452 // if they interact
453 if (r2 < FourH2 && r2 >= 1e-16)
454 {
455 real_number r = sqrtf(r2);
456
458 DWab(dr,DW,r);
459
460 real_number scal = massb*(v_rel.get(0)*DW.get(0)+v_rel.get(1)*DW.get(1)+v_rel.get(2)*DW.get(2));
461 scal = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:scal;
462
463 drho_ += scal;
464 }
465
466 ++Np;
467 }
468
469 vd.template getProp<red>(a) = max_visc;
470
471 vd.template getProp<drho>(a) = drho_;
472}
473
474struct type_is_fluid
475{
476 __device__ static bool check(int c)
477 {
478 return c == FLUID;
479 }
480};
481
482struct type_is_border
483{
484 __device__ static bool check(int c)
485 {
486 return c == BOUNDARY;
487 }
488};
489
490template<typename CellList> inline void calc_forces(particles & vd, CellList & NN, real_number & max_visc, size_t cnt, openfpm::vector_gpu<aggregate<int>> & fluid_ids, openfpm::vector_gpu<aggregate<int>> & border_ids)
491{
492 // Update the cell-list
493 vd.updateCellList<type,rho,Pressure,velocity>(NN);
494
496
497 // get the particles fluid ids
498 get_indexes_by_type<type,type_is_fluid>(vd.getPropVectorSort(),fluid_ids,vd.size_local(),vd.getVC().getgpuContext());
499
500 // get the particles fluid ids
501 get_indexes_by_type<type,type_is_border>(vd.getPropVectorSort(),border_ids,vd.size_local(),vd.getVC().getgpuContext());
502
503 auto part = fluid_ids.getGPUIterator(96);
504 CUDA_LAUNCH(calc_forces_fluid_gpu,part,vd.toKernel_sorted(),fluid_ids.toKernel(),NN.toKernel(),W_dap,cbar);
505
506 part = border_ids.getGPUIterator(96);
507 CUDA_LAUNCH(calc_forces_border_gpu,part,vd.toKernel_sorted(),border_ids.toKernel(),NN.toKernel(),W_dap,cbar);
508
510
511 vd.merge_sort<force,drho,red>(NN);
512
513 max_visc = reduce_local<red,_max_>(vd);
514}
515
516template<typename vector_type>
517__global__ void max_acceleration_and_velocity_gpu(vector_type vd)
518{
519 auto a = GET_PARTICLE(vd);
520
521 Point<3,real_number> acc(vd.template getProp<force>(a));
522 vd.template getProp<red>(a) = norm(acc);
523
524 Point<3,real_number> vel(vd.template getProp<velocity>(a));
525 vd.template getProp<red2>(a) = norm(vel);
526}
527
528void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_number & max_vel)
529{
530 // Calculate the maximum acceleration
531 auto part = vd.getDomainIteratorGPU();
532
533 CUDA_LAUNCH(max_acceleration_and_velocity_gpu,part,vd.toKernel());
534
535 max_acc = reduce_local<red,_max_>(vd);
536 max_vel = reduce_local<red2,_max_>(vd);
537
538 Vcluster<> & v_cl = create_vcluster();
539 v_cl.max(max_acc);
540 v_cl.max(max_vel);
541 v_cl.execute();
542}
543
544
545real_number calc_deltaT(particles & vd, real_number ViscDtMax)
546{
547 real_number Maxacc = 0.0;
548 real_number Maxvel = 0.0;
549 max_acceleration_and_velocity(vd,Maxacc,Maxvel);
550
551 //-dt1 depends on force per unit mass.
552 const real_number dt_f = (Maxacc)?sqrt(H/Maxacc):std::numeric_limits<float>::max();
553
554 //-dt2 combines the Courant and the viscous time-step controls.
555 const real_number dt_cv = H/(std::max(cbar,Maxvel*10.f) + H*ViscDtMax);
556
557 //-dt new value of time step.
558 real_number dt=real_number(CFLnumber)*std::min(dt_f,dt_cv);
559 if(dt<real_number(DtMin))
560 {dt=real_number(DtMin);}
561
562 return dt;
563}
564
565template<typename vector_dist_type>
566__global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number dt2, real_number dt205)
567{
568 // ... a
569 auto a = GET_PARTICLE(vd);
570
571 // if the particle is boundary
572 if (vd.template getProp<type>(a) == BOUNDARY)
573 {
574 // Update rho
575 real_number rhop = vd.template getProp<rho>(a);
576
577 // Update only the density
578 vd.template getProp<velocity>(a)[0] = 0.0;
579 vd.template getProp<velocity>(a)[1] = 0.0;
580 vd.template getProp<velocity>(a)[2] = 0.0;
581 real_number rhonew = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
582 vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
583
584 vd.template getProp<rho_prev>(a) = rhop;
585
586 vd.template getProp<red>(a) = 0;
587
588 return;
589 }
590
591 //-Calculate displacement and update position
592 real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
593 real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
594 real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
595
596 vd.getPos(a)[0] += dx;
597 vd.getPos(a)[1] += dy;
598 vd.getPos(a)[2] += dz;
599
600 real_number velX = vd.template getProp<velocity>(a)[0];
601 real_number velY = vd.template getProp<velocity>(a)[1];
602 real_number velZ = vd.template getProp<velocity>(a)[2];
603
604 real_number rhop = vd.template getProp<rho>(a);
605
606 vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity_prev>(a)[0] + vd.template getProp<force>(a)[0]*dt2;
607 vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity_prev>(a)[1] + vd.template getProp<force>(a)[1]*dt2;
608 vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity_prev>(a)[2] + vd.template getProp<force>(a)[2]*dt2;
609 vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
610
611 // Check if the particle go out of range in space and in density
612 if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
613 vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
614 vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
615 {
616 vd.template getProp<red>(a) = 1;
617 }
618 else
619 {
620 vd.template getProp<red>(a) = 0;
621 }
622
623
624 vd.template getProp<velocity_prev>(a)[0] = velX;
625 vd.template getProp<velocity_prev>(a)[1] = velY;
626 vd.template getProp<velocity_prev>(a)[2] = velZ;
627 vd.template getProp<rho_prev>(a) = rhop;
628}
629
630size_t cnt = 0;
631
632void verlet_int(particles & vd, real_number dt)
633{
634 // particle iterator
635 auto part = vd.getDomainIteratorGPU();
636
637 real_number dt205 = dt*dt*0.5;
638 real_number dt2 = dt*2.0;
639
640 CUDA_LAUNCH(verlet_int_gpu,part,vd.toKernel(),dt,dt2,dt205);
641
642 // remove the particles marked
643 remove_marked<red>(vd);
644
645 // increment the iteration counter
646 cnt++;
647}
648
649template<typename vector_type>
650__global__ void euler_int_gpu(vector_type vd,real_number dt, real_number dt205)
651{
652 // ... a
653 auto a = GET_PARTICLE(vd);
654
655 // if the particle is boundary
656 if (vd.template getProp<type>(a) == BOUNDARY)
657 {
658 // Update rho
659 real_number rhop = vd.template getProp<rho>(a);
660
661 // Update only the density
662 vd.template getProp<velocity>(a)[0] = 0.0;
663 vd.template getProp<velocity>(a)[1] = 0.0;
664 vd.template getProp<velocity>(a)[2] = 0.0;
665 real_number rhonew = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
666 vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
667
668 vd.template getProp<rho_prev>(a) = rhop;
669
670 vd.template getProp<red>(a) = 0;
671
672 return;
673 }
674
675 //-Calculate displacement and update position / Calcula desplazamiento y actualiza posicion.
676 real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
677 real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
678 real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
679
680 vd.getPos(a)[0] += dx;
681 vd.getPos(a)[1] += dy;
682 vd.getPos(a)[2] += dz;
683
684 real_number velX = vd.template getProp<velocity>(a)[0];
685 real_number velY = vd.template getProp<velocity>(a)[1];
686 real_number velZ = vd.template getProp<velocity>(a)[2];
687 real_number rhop = vd.template getProp<rho>(a);
688
689 vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity>(a)[0] + vd.template getProp<force>(a)[0]*dt;
690 vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity>(a)[1] + vd.template getProp<force>(a)[1]*dt;
691 vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity>(a)[2] + vd.template getProp<force>(a)[2]*dt;
692 vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
693
694 // Check if the particle go out of range in space and in density
695 if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
696 vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
697 vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
698 {vd.template getProp<red>(a) = 1;}
699 else
700 {vd.template getProp<red>(a) = 0;}
701
702 vd.template getProp<velocity_prev>(a)[0] = velX;
703 vd.template getProp<velocity_prev>(a)[1] = velY;
704 vd.template getProp<velocity_prev>(a)[2] = velZ;
705 vd.template getProp<rho_prev>(a) = rhop;
706}
707
708void euler_int(particles & vd, real_number dt)
709{
710
711 // particle iterator
712 auto part = vd.getDomainIteratorGPU();
713
714 real_number dt205 = dt*dt*0.5;
715
716 CUDA_LAUNCH(euler_int_gpu,part,vd.toKernel(),dt,dt205);
717
718 // remove the particles
719 remove_marked<red>(vd);
720
721 cnt++;
722}
723
724template<typename vector_type, typename NN_type>
725__global__ void sensor_pressure_gpu(vector_type vd, NN_type NN, Point<3,real_number> probe, real_number * press_tmp)
726{
727 real_number tot_ker = 0.0;
728
729 // Get the position of the probe i
730 Point<3,real_number> xp = probe;
731
732 // get the iterator over the neighbohood particles of the probes position
733 auto itg = NN.getNNIteratorBox(NN.getCell(xp));
734 while (itg.isNext())
735 {
736 auto q = itg.get_sort();
737
738 // Only the fluid particles are importants
739 if (vd.template getProp<type>(q) != FLUID)
740 {
741 ++itg;
742 continue;
743 }
744
745 // Get the position of the neighborhood particle q
746 Point<3,real_number> xq = vd.getPos(q);
747
748 // Calculate the contribution of the particle to the pressure
749 // of the probe
750 real_number r = sqrt(norm2(xp - xq));
751
752 real_number ker = Wab(r) * (MassFluid / rho_zero);
753
754 // Also keep track of the calculation of the summed
755 // kernel
756 tot_ker += ker;
757
758 // Add the total pressure contribution
759 *press_tmp += vd.template getProp<Pressure>(q) * ker;
760
761 // next neighborhood particle
762 ++itg;
763 }
764
765 // We calculate the pressure normalizing the
766 // sum over all kernels
767 if (tot_ker == 0.0)
768 {*press_tmp = 0.0;}
769 else
770 {*press_tmp = 1.0 / tot_ker * *press_tmp;}
771}
772
773template<typename Vector, typename CellList>
774inline void sensor_pressure(Vector & vd,
775 CellList & NN,
778{
779 Vcluster<> & v_cl = create_vcluster();
780
781 press_t.add();
782
783 for (size_t i = 0 ; i < probes.size() ; i++)
784 {
785 // A float variable to calculate the pressure of the problem
786 CudaMemory press_tmp_(sizeof(real_number));
787 real_number press_tmp;
788
789 // if the probe is inside the processor domain
790 if (vd.getDecomposition().isLocal(probes.get(i)) == true)
791 {
792 CUDA_LAUNCH_DIM3(sensor_pressure_gpu,1,1,vd.toKernel_sorted(),NN.toKernel(),probes.get(i),(real_number *)press_tmp_.toKernel());
793
794 vd.merge<Pressure>(NN);
795
796 // move calculated pressure on
797 press_tmp_.deviceToHost();
798 press_tmp = *(real_number *)press_tmp_.getPointer();
799 }
800
801 // This is not necessary in principle, but if you
802 // want to make all processor aware of the history of the calculated
803 // pressure we have to execute this
804 v_cl.sum(press_tmp);
805 v_cl.execute();
806
807 // We add the calculated pressure into the history
808 press_t.last().add(press_tmp);
809 }
810}
811
812int main(int argc, char* argv[])
813{
814 // initialize the library
815 openfpm_init(&argc,&argv);
816
819
820#ifdef CUDIFY_USE_CUDA
821 cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
822#endif
823
824 // It contain for each time-step the value detected by the probes
827
828 probes.add({0.8779,0.3,0.02});
829 probes.add({0.754,0.31,0.02});
830
831 // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
832 Box<3,real_number> domain({-0.05,-0.05,-0.05},{1.7010,0.7065,0.511});
833 size_t sz[3] = {413,179,133};
834
835 // Fill W_dap
836 W_dap = 1.0/Wab(H/1.5);
837
838 // Here we define the boundary conditions of our problem
839 size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
840
841 // extended boundary around the domain, and the processor domain
843
844 particles vd(0,domain,bc,g,DEC_GRAN(128));
845
847
848 // You can ignore all these dp/2.0 is a trick to reach the same initialization
849 // of Dual-SPH that use a different criteria to draw particles
850 Box<3,real_number> fluid_box({dp/2.0,dp/2.0,dp/2.0},{0.4+dp/2.0,0.67-dp/2.0,0.3+dp/2.0});
851
852 // return an iterator to the fluid particles to add to vd
853 auto fluid_it = DrawParticles::DrawBox(vd,sz,domain,fluid_box);
854
855 // here we fill some of the constants needed by the simulation
856 max_fluid_height = fluid_it.getBoxMargins().getHigh(2);
857 h_swl = fluid_it.getBoxMargins().getHigh(2) - fluid_it.getBoxMargins().getLow(2);
858 B = (coeff_sound)*(coeff_sound)*gravity*h_swl*rho_zero / gamma_;
859 cbar = coeff_sound * sqrt(gravity * h_swl);
860
861 // for each particle inside the fluid box ...
862 while (fluid_it.isNext())
863 {
864 // ... add a particle ...
865 vd.add();
866
867 // ... and set it position ...
868 vd.getLastPos()[0] = fluid_it.get().get(0);
869 vd.getLastPos()[1] = fluid_it.get().get(1);
870 vd.getLastPos()[2] = fluid_it.get().get(2);
871
872 // and its type.
873 vd.template getLastProp<type>() = FLUID;
874
875 // We also initialize the density of the particle and the hydro-static pressure given by
876 //
877 // rho_zero*g*h = P
878 //
879 // rho_p = (P/B + 1)^(1/Gamma) * rho_zero
880 //
881
882 vd.template getLastProp<Pressure>() = rho_zero * gravity * (max_fluid_height - fluid_it.get().get(2));
883
884 vd.template getLastProp<rho>() = pow(vd.template getLastProp<Pressure>() / B + 1, 1.0/gamma_) * rho_zero;
885 vd.template getLastProp<rho_prev>() = vd.template getLastProp<rho>();
886 vd.template getLastProp<velocity>()[0] = 0.0;
887 vd.template getLastProp<velocity>()[1] = 0.0;
888 vd.template getLastProp<velocity>()[2] = 0.0;
889
890 vd.template getLastProp<velocity_prev>()[0] = 0.0;
891 vd.template getLastProp<velocity_prev>()[1] = 0.0;
892 vd.template getLastProp<velocity_prev>()[2] = 0.0;
893
894 // next fluid particle
895 ++fluid_it;
896 }
897
898 // Recipient
899 Box<3,real_number> recipient1({0.0,0.0,0.0},{1.6+dp/2.0,0.67+dp/2.0,0.4+dp/2.0});
900 Box<3,real_number> recipient2({dp,dp,dp},{1.6-dp/2.0,0.67-dp/2.0,0.4+dp/2.0});
901
902 Box<3,real_number> obstacle1({0.9,0.24-dp/2.0,0.0},{1.02+dp/2.0,0.36,0.45+dp/2.0});
903 Box<3,real_number> obstacle2({0.9+dp,0.24+dp/2.0,0.0},{1.02-dp/2.0,0.36-dp,0.45-dp/2.0});
904 Box<3,real_number> obstacle3({0.9+dp,0.24,0.0},{1.02,0.36,0.45});
905
907 holes.add(recipient2);
908 holes.add(obstacle1);
909 auto bound_box = DrawParticles::DrawSkin(vd,sz,domain,holes,recipient1);
910
911 while (bound_box.isNext())
912 {
913 vd.add();
914
915 vd.getLastPos()[0] = bound_box.get().get(0);
916 vd.getLastPos()[1] = bound_box.get().get(1);
917 vd.getLastPos()[2] = bound_box.get().get(2);
918
919 vd.template getLastProp<type>() = BOUNDARY;
920 vd.template getLastProp<rho>() = rho_zero;
921 vd.template getLastProp<rho_prev>() = rho_zero;
922 vd.template getLastProp<velocity>()[0] = 0.0;
923 vd.template getLastProp<velocity>()[1] = 0.0;
924 vd.template getLastProp<velocity>()[2] = 0.0;
925
926 vd.template getLastProp<velocity_prev>()[0] = 0.0;
927 vd.template getLastProp<velocity_prev>()[1] = 0.0;
928 vd.template getLastProp<velocity_prev>()[2] = 0.0;
929
930 ++bound_box;
931 }
932
933 auto obstacle_box = DrawParticles::DrawSkin(vd,sz,domain,obstacle2,obstacle1);
934
935 while (obstacle_box.isNext())
936 {
937 vd.add();
938
939 vd.getLastPos()[0] = obstacle_box.get().get(0);
940 vd.getLastPos()[1] = obstacle_box.get().get(1);
941 vd.getLastPos()[2] = obstacle_box.get().get(2);
942
943 vd.template getLastProp<type>() = BOUNDARY;
944 vd.template getLastProp<rho>() = rho_zero;
945 vd.template getLastProp<rho_prev>() = rho_zero;
946 vd.template getLastProp<velocity>()[0] = 0.0;
947 vd.template getLastProp<velocity>()[1] = 0.0;
948 vd.template getLastProp<velocity>()[2] = 0.0;
949
950 vd.template getLastProp<velocity_prev>()[0] = 0.0;
951 vd.template getLastProp<velocity_prev>()[1] = 0.0;
952 vd.template getLastProp<velocity_prev>()[2] = 0.0;
953
954 ++obstacle_box;
955 }
956
957 vd.map();
958
959 // Now that we fill the vector with particles
960 ModelCustom md;
961
962 vd.addComputationCosts(md);
963 vd.getDecomposition().decompose();
964 vd.map();
965
967
968 // Ok the initialization is done on CPU on GPU we are doing the main loop, so first we offload all properties on GPU
969
970 vd.hostToDevicePos();
971 vd.template hostToDeviceProp<type,rho,rho_prev,Pressure,velocity>();
972
973
974 vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
975
976 auto NN = vd.getCellListGPU/*<CELLLIST_GPU_SPARSE<3,float>>*/(2*H / 2.0);
977 NN.setBoxNN(2);
978
979 timer tot_sim;
980 tot_sim.start();
981
982 size_t write = 0;
983 size_t it = 0;
984 size_t it_reb = 0;
985 real_number t = 0.0;
986 while (t <= t_end)
987 {
988 Vcluster<> & v_cl = create_vcluster();
989 timer it_time;
990 it_time.start();
991
993 it_reb++;
994 if (it_reb == 300)
995 {
996 vd.map(RUN_ON_DEVICE);
997
998 // Rebalancer for now work on CPU , so move to CPU
999 vd.deviceToHostPos();
1000 vd.template deviceToHostProp<type>();
1001
1002 it_reb = 0;
1003 ModelCustom md;
1004 vd.addComputationCosts(md);
1005 vd.getDecomposition().decompose();
1006
1007 if (v_cl.getProcessUnitID() == 0)
1008 {std::cout << "REBALANCED " << it_reb << std::endl;}
1009 }
1010
1011 vd.map(RUN_ON_DEVICE);
1012
1013 // Calculate pressure from the density
1014 EqState(vd);
1015
1016 real_number max_visc = 0.0;
1017
1018 vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
1019
1020
1021 // Calc forces
1022 calc_forces(vd,NN,max_visc,cnt,fluid_ids,border_ids);
1023
1024 // Get the maximum viscosity term across processors
1025 v_cl.max(max_visc);
1026 v_cl.execute();
1027
1028 // Calculate delta t integration
1029 real_number dt = calc_deltaT(vd,max_visc);
1030
1031 // VerletStep or euler step
1032 it++;
1033 if (it < 40)
1034 verlet_int(vd,dt);
1035 else
1036 {
1037 euler_int(vd,dt);
1038 it = 0;
1039 }
1040
1041 t += dt;
1042
1043 if (write < t*10)
1044 {
1045 // Sensor pressure require update ghost, so we ensure that particles are distributed correctly
1046 // and ghost are updated
1047 vd.map(RUN_ON_DEVICE);
1048 vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
1049 vd.updateCellList(NN);
1050
1051 // calculate the pressure at the sensor points
1052 //sensor_pressure(vd,NN,press_t,probes);
1053
1054 std::cout << "OUTPUT " << dt << std::endl;
1055
1056 // When we write we have move all the particles information back to CPU
1057
1058 vd.deviceToHostPos();
1059 vd.deviceToHostProp<type,rho,rho_prev,Pressure,drho,force,velocity,velocity_prev,red,red2>();
1060
1061 // We copy on another vector with less properties to reduce the size of the output
1063
1064 auto ito = vd.getDomainIterator();
1065
1066 while(ito.isNext())
1067 {
1068 auto p = ito.get();
1069
1070 vd_out.add();
1071
1072 vd_out.getLastPos()[0] = vd.getPos(p)[0];
1073 vd_out.getLastPos()[1] = vd.getPos(p)[1];
1074 vd_out.getLastPos()[2] = vd.getPos(p)[2];
1075
1076 vd_out.template getLastProp<0>() = vd.template getProp<type>(p);
1077
1078 vd_out.template getLastProp<1>()[0] = vd.template getProp<velocity>(p)[0];
1079 vd_out.template getLastProp<1>()[1] = vd.template getProp<velocity>(p)[1];
1080 vd_out.template getLastProp<1>()[2] = vd.template getProp<velocity>(p)[2];
1081
1082 ++ito;
1083 }
1084
1085 vd_out.write_frame("Particles",write,VTK_WRITER | FORMAT_BINARY);
1086 write++;
1087
1088 if (v_cl.getProcessUnitID() == 0)
1089 {std::cout << "TIME: " << t << " write " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vd.size_local() << std::endl;}
1090 }
1091 else
1092 {
1093 if (v_cl.getProcessUnitID() == 0)
1094 {std::cout << "TIME: " << t << " " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vd.size_local() << std::endl;}
1095 }
1096 }
1097
1098 tot_sim.stop();
1099 std::cout << "Time to complete: " << tot_sim.getwct() << " seconds" << std::endl;
1100
1101
1102 openfpm_finalize();
1103}
1104
1105#else
1106
1107int main(int argc, char* argv[])
1108{
1109 return 0;
1110}
1111
1112#endif
This class represent an N-dimensional box.
Definition Box.hpp:61
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
Definition Box.hpp:556
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Definition Box.hpp:567
Class for FAST cell list implementation.
Definition CellList.hpp:357
auto get(size_t cell, size_t ele) -> decltype(this->Mem_type::get(cell, ele))
Get an element in the cell.
Definition CellList.hpp:867
This class define the domain decomposition interface.
static PointIteratorSkin< dim, T, typename vd_type::Decomposition_type > DrawSkin(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub_A, Box< dim, T > &sub_B)
Draw particles in a box B excluding the area of a second box A (B - A)
static PointIterator< dim, T, typename vd_type::Decomposition_type > DrawBox(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub)
Draw particles in a box.
This class implement the point shape in an N-dimensional space.
Definition Point.hpp:28
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition Point.hpp:172
void execute()
Execute all the requests.
void sum(T &num)
Sum the numbers across all processors and get the result.
size_t getProcessUnitID()
Get the process unit id.
void max(T &num)
Get the maximum number across all processors (or reduction with infinity norm)
Implementation of VCluster class.
Definition VCluster.hpp:59
Sparse Matrix implementation stub object when OpenFPM is compiled with no linear algebra support.
Definition Vector.hpp:40
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
Definition timer.hpp:28
void stop()
Stop the timer.
Definition timer.hpp:119
void start()
Start the timer.
Definition timer.hpp:90
double getwct()
Return the elapsed real time.
Definition timer.hpp:130
Distributed vector.
size_t size_local() const
return the local size of the vector
void updateCellList(CellL &cell_list, bool no_se3=false, cl_construct_opt opt=cl_construct_opt::Full)
Update a cell list using the stored particles.
auto getPos(vect_dist_key_dx vec_key) -> decltype(v_pos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
const vector_dist_prop & getPropVectorSort() const
return the property vector of all the particles
Vcluster< Memory > & getVC()
Get the Virtual Cluster machine.
Model for Dynamic load balancing.
Definition main.cpp:222
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
temporal buffer for reductions