OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
main.cu
1
40#ifdef __NVCC__
41
42#define PRINT_STACKTRACE
43//#define STOP_ON_ERROR
44#define OPENMPI
45//#define SE_CLASS1
46
47//#define USE_LOW_REGISTER_ITERATOR
48#define SCAN_WITH_CUB //<------ In case you want to use CUB for scan operations
49#define SORT_WITH_CUB
50//#define EXTERNAL_SET_GPU <----- In case you want to distribute the GPUs differently from the default
51
52#include "Vector/vector_dist.hpp"
53#include <math.h>
54#include "Draw/DrawParticles.hpp"
55
56
57
58typedef float real_number;
59
60// A constant to indicate boundary particles
61#define BOUNDARY 0
62
63// A constant to indicate fluid particles
64#define FLUID 1
65
66// initial spacing between particles dp in the formulas
67const real_number dp = 0.00425;
68// Maximum height of the fluid water
69// is going to be calculated and filled later on
70real_number h_swl = 0.0;
71
72// c_s in the formulas (constant used to calculate the sound speed)
73const real_number coeff_sound = 20.0;
74
75// gamma in the formulas
76const real_number gamma_ = 7.0;
77
78// sqrt(3.0*dp*dp) support of the kernel
79const real_number H = 0.00736121593217;
80
81// Eta in the formulas
82const real_number Eta2 = 0.01 * H*H;
83
84const real_number FourH2 = 4.0 * H*H;
85
86// alpha in the formula
87const real_number visco = 0.1;
88
89// cbar in the formula (calculated later)
90real_number cbar = 0.0;
91
92// Mass of the fluid particles
93const real_number MassFluid = 0.0000767656;
94
95// Mass of the boundary particles
96const real_number MassBound = 0.0000767656;
97
98//
99
100// End simulation time
101#ifdef TEST_RUN
102const real_number t_end = 0.001;
103#else
104const real_number t_end = 1.5;
105#endif
106
107// Gravity acceleration
108const real_number gravity = 9.81;
109
110// Reference densitu 1000Kg/m^3
111const real_number rho_zero = 1000.0;
112
113// Filled later require h_swl, it is b in the formulas
114real_number B = 0.0;
115
116// Constant used to define time integration
117const real_number CFLnumber = 0.2;
118
119// Minimum T
120const real_number DtMin = 0.00001;
121
122// Minimum Rho allowed
123const real_number RhoMin = 700.0;
124
125// Maximum Rho allowed
126const real_number RhoMax = 1300.0;
127
128// Filled in initialization
129real_number max_fluid_height = 0.0;
130
131// Properties
132
133// FLUID or BOUNDARY
134const size_t type = 0;
135
136// Density
137const int rho = 1;
138
139// Density at step n-1
140const int rho_prev = 2;
141
142// Pressure
143const int Pressure = 3;
144
145// Delta rho calculated in the force calculation
146const int drho = 4;
147
148// calculated force
149const int force = 5;
150
151// velocity
152const int velocity = 6;
153
154// velocity at previous step
155const int velocity_prev = 7;
156
157const int red = 8;
158
159const int red2 = 9;
160
161// Type of the vector containing particles
163// | | | | | | | | | |
164// | | | | | | | | | |
165// type density density Pressure delta force velocity velocity reduction another
166// at n-1 density at n - 1 buffer reduction buffer
167
168
169struct ModelCustom
170{
171 template<typename Decomposition, typename vector> inline void addComputation(Decomposition & dec,
172 vector & vd,
173 size_t v,
174 size_t p)
175 {
176 if (vd.template getProp<type>(p) == FLUID)
177 dec.addComputationCost(v,4);
178 else
179 dec.addComputationCost(v,3);
180 }
181
182 template<typename Decomposition> inline void applyModel(Decomposition & dec, size_t v)
183 {
184 dec.setSubSubDomainComputationCost(v, dec.getSubSubDomainComputationCost(v) * dec.getSubSubDomainComputationCost(v));
185 }
186
187 real_number distributionTol()
188 {
189 return 1.01;
190 }
191};
192
193template<typename vd_type>
194__global__ void EqState_gpu(vd_type vd, real_number B)
195{
196 auto a = GET_PARTICLE(vd);
197
198 real_number rho_a = vd.template getProp<rho>(a);
199 real_number rho_frac = rho_a / rho_zero;
200
201 vd.template getProp<Pressure>(a) = B*( rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac*rho_frac - 1.0);
202}
203
204inline void EqState(particles & vd)
205{
206 auto it = vd.getDomainIteratorGPU();
207
208 CUDA_LAUNCH(EqState_gpu,it,vd.toKernel(),B);
209}
210
211
212const real_number a2 = 1.0/M_PI/H/H/H;
213
214inline __device__ __host__ real_number Wab(real_number r)
215{
216 r /= H;
217
218 if (r < 1.0)
219 return (1.0 - 3.0/2.0*r*r + 3.0/4.0*r*r*r)*a2;
220 else if (r < 2.0)
221 return (1.0/4.0*(2.0 - r*r)*(2.0 - r*r)*(2.0 - r*r))*a2;
222 else
223 return 0.0;
224}
225
226
227const real_number c1 = -3.0/M_PI/H/H/H/H;
228const real_number d1 = 9.0/4.0/M_PI/H/H/H/H;
229const real_number c2 = -3.0/4.0/M_PI/H/H/H/H;
230const real_number a2_4 = 0.25*a2;
231// Filled later
232real_number W_dap = 0.0;
233
234inline __device__ __host__ void DWab(Point<3,real_number> & dx, Point<3,real_number> & DW, real_number r)
235{
236 const real_number qq=r/H;
237
238 real_number qq2 = qq * qq;
239 real_number fac1 = (c1*qq + d1*qq2)/r;
240 real_number b1 = (qq < 1.0f)?1.0f:0.0f;
241
242 real_number wqq = (2.0f - qq);
243 real_number fac2 = c2 * wqq * wqq / r;
244 real_number b2 = (qq >= 1.0f && qq < 2.0f)?1.0f:0.0f;
245
246 real_number factor = (b1*fac1 + b2*fac2);
247
248 DW.get(0) = factor * dx.get(0);
249 DW.get(1) = factor * dx.get(1);
250 DW.get(2) = factor * dx.get(2);
251}
252
253
254// Tensile correction
255inline __device__ __host__ real_number Tensile(real_number r, real_number rhoa, real_number rhob, real_number prs1, real_number prs2, real_number W_dap)
256{
257 const real_number qq=r/H;
258 //-Cubic Spline kernel
259 real_number wab;
260 if(r>H)
261 {
262 real_number wqq1=2.0f-qq;
263 real_number wqq2=wqq1*wqq1;
264
265 wab=a2_4*(wqq2*wqq1);
266 }
267 else
268 {
269 real_number wqq2=qq*qq;
270 real_number wqq3=wqq2*qq;
271
272 wab=a2*(1.0f-1.5f*wqq2+0.75f*wqq3);
273 }
274
275 //-Tensile correction.
276 real_number fab=wab*W_dap;
277 fab*=fab; fab*=fab; //fab=fab^4
278 const real_number tensilp1=(prs1/(rhoa*rhoa))*(prs1>0.0f? 0.01f: -0.2f);
279 const real_number tensilp2=(prs2/(rhob*rhob))*(prs2>0.0f? 0.01f: -0.2f);
280
281 return (fab*(tensilp1+tensilp2));
282}
283
284
285inline __device__ __host__ real_number Pi(const Point<3,real_number> & dr, real_number rr2, Point<3,real_number> & dv, real_number rhoa, real_number rhob, real_number massb, real_number cbar, real_number & visc)
286{
287 const real_number dot = dr.get(0)*dv.get(0) + dr.get(1)*dv.get(1) + dr.get(2)*dv.get(2);
288 const real_number dot_rr2 = dot/(rr2+Eta2);
289 visc=(dot_rr2 < visc)?visc:dot_rr2;
290
291 if(dot < 0)
292 {
293 const float amubar=H*dot_rr2;
294 const float robar=(rhoa+rhob)*0.5f;
295 const float pi_visc=(-visco*cbar*amubar/robar);
296
297 return pi_visc;
298 }
299 else
300 return 0.0f;
301}
302
303template<typename particles_type, typename NN_type>
304__global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap, real_number cbar)
305{
306 // ... a
307 unsigned int a;
308
309 GET_PARTICLE_SORT(a,NN);
310
311 real_number max_visc = 0.0f;
312
313 // Get the position xp of the particle
314 Point<3,real_number> xa = vd.getPos(a);
315
316 // Type of the particle
317 unsigned int typea = vd.template getProp<type>(a);
318
319 // Take the mass of the particle dependently if it is FLUID or BOUNDARY
320 //real_number massa = (typea == FLUID)?MassFluid:MassBound;
321
322 // Get the density of the of the particle a
323 real_number rhoa = vd.template getProp<rho>(a);
324
325 // Get the pressure of the particle a
326 real_number Pa = vd.template getProp<Pressure>(a);
327
328 // Get the Velocity of the particle a
329 Point<3,real_number> va = vd.template getProp<velocity>(a);
330
332 force_.get(0) = 0.0f;
333 force_.get(1) = 0.0f;
334 force_.get(2) = -gravity;
335 real_number drho_ = 0.0f;
336
337 // Get an iterator over the neighborhood particles of p
338 auto Np = NN.getNNIteratorBox(NN.getCell(xa));
339
340 // For each neighborhood particle
341 while (Np.isNext() == true)
342 {
343 // ... q
344 auto b = Np.get_sort();
345
346 // Get the position xp of the particle
347 Point<3,real_number> xb = vd.getPos(b);
348
349 // if (p == q) skip this particle this condition should be done in the r^2 = 0
350 if (a == b) {++Np; continue;};
351
352 unsigned int typeb = vd.template getProp<type>(b);
353
354 real_number massb = (typeb == FLUID)?MassFluid:MassBound;
355 Point<3,real_number> vb = vd.template getProp<velocity>(b);
356 real_number Pb = vd.template getProp<Pressure>(b);
357 real_number rhob = vd.template getProp<rho>(b);
358
359 // Get the distance between p and q
360 Point<3,real_number> dr = xa - xb;
361 Point<3,real_number> v_rel = va - vb;
362 // take the norm of this vector
363 real_number r2 = norm2(dr);
364
365 // if they interact
366 if (r2 < FourH2 && r2 >= 1e-16)
367 {
368 real_number r = sqrtf(r2);
369
371 DWab(dr,DW,r);
372
373 real_number factor = - massb*((Pa + Pb) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb,W_dap) + Pi(dr,r2,v_rel,rhoa,rhob,massb,cbar,max_visc));
374
375 // Bound - Bound does not produce any change
376 factor = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:factor;
377
378 force_.get(0) += factor * DW.get(0);
379 force_.get(1) += factor * DW.get(1);
380 force_.get(2) += factor * DW.get(2);
381
382 real_number scal = massb*(v_rel.get(0)*DW.get(0)+v_rel.get(1)*DW.get(1)+v_rel.get(2)*DW.get(2));
383 scal = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:scal;
384
385 drho_ += scal;
386 }
387
388 ++Np;
389 }
390
391 vd.template getProp<red>(a) = max_visc;
392
393 vd.template getProp<force>(a)[0] = force_.get(0);
394 vd.template getProp<force>(a)[1] = force_.get(1);
395 vd.template getProp<force>(a)[2] = force_.get(2);
396 vd.template getProp<drho>(a) = drho_;
397}
398
399template<typename CellList> inline void calc_forces(particles & vd, CellList & NN, real_number & max_visc, size_t cnt)
400{
401 auto part = vd.getDomainIteratorGPU(96);
402
403 // Update the cell-list
404 vd.updateCellList(NN);
405
406 CUDA_LAUNCH(calc_forces_gpu,part,vd.toKernel_sorted(),NN.toKernel(),W_dap,cbar);
407
408 vd.merge_sort<force,drho,red>(NN);
409
410 max_visc = reduce_local<red,_max_>(vd);
411}
412
413template<typename vector_type>
414__global__ void max_acceleration_and_velocity_gpu(vector_type vd)
415{
416 auto a = GET_PARTICLE(vd);
417
418 Point<3,real_number> acc(vd.template getProp<force>(a));
419 vd.template getProp<red>(a) = norm(acc);
420
421 Point<3,real_number> vel(vd.template getProp<velocity>(a));
422 vd.template getProp<red2>(a) = norm(vel);
423}
424
425void max_acceleration_and_velocity(particles & vd, real_number & max_acc, real_number & max_vel)
426{
427 // Calculate the maximum acceleration
428 auto part = vd.getDomainIteratorGPU();
429
430 CUDA_LAUNCH(max_acceleration_and_velocity_gpu,part,vd.toKernel());
431
432 max_acc = reduce_local<red,_max_>(vd);
433 max_vel = reduce_local<red2,_max_>(vd);
434
435 Vcluster<> & v_cl = create_vcluster();
436 v_cl.max(max_acc);
437 v_cl.max(max_vel);
438 v_cl.execute();
439}
440
441
442real_number calc_deltaT(particles & vd, real_number ViscDtMax)
443{
444 real_number Maxacc = 0.0;
445 real_number Maxvel = 0.0;
446 max_acceleration_and_velocity(vd,Maxacc,Maxvel);
447
448 //-dt1 depends on force per unit mass.
449 const real_number dt_f = (Maxacc)?sqrt(H/Maxacc):std::numeric_limits<float>::max();
450
451 //-dt2 combines the Courant and the viscous time-step controls.
452 const real_number dt_cv = H/(std::max(cbar,Maxvel*10.f) + H*ViscDtMax);
453
454 //-dt new value of time step.
455 real_number dt=real_number(CFLnumber)*std::min(dt_f,dt_cv);
456 if(dt<real_number(DtMin))
457 {dt=real_number(DtMin);}
458
459 return dt;
460}
461
462template<typename vector_dist_type>
463__global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number dt2, real_number dt205)
464{
465 // ... a
466 auto a = GET_PARTICLE(vd);
467
468 // if the particle is boundary
469 if (vd.template getProp<type>(a) == BOUNDARY)
470 {
471 // Update rho
472 real_number rhop = vd.template getProp<rho>(a);
473
474 // Update only the density
475 vd.template getProp<velocity>(a)[0] = 0.0;
476 vd.template getProp<velocity>(a)[1] = 0.0;
477 vd.template getProp<velocity>(a)[2] = 0.0;
478 real_number rhonew = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
479 vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
480
481 vd.template getProp<rho_prev>(a) = rhop;
482
483 vd.template getProp<red>(a) = 0;
484
485 return;
486 }
487
488 //-Calculate displacement and update position
489 real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
490 real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
491 real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
492
493 vd.getPos(a)[0] += dx;
494 vd.getPos(a)[1] += dy;
495 vd.getPos(a)[2] += dz;
496
497 real_number velX = vd.template getProp<velocity>(a)[0];
498 real_number velY = vd.template getProp<velocity>(a)[1];
499 real_number velZ = vd.template getProp<velocity>(a)[2];
500
501 real_number rhop = vd.template getProp<rho>(a);
502
503 vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity_prev>(a)[0] + vd.template getProp<force>(a)[0]*dt2;
504 vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity_prev>(a)[1] + vd.template getProp<force>(a)[1]*dt2;
505 vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity_prev>(a)[2] + vd.template getProp<force>(a)[2]*dt2;
506 vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
507
508 // Check if the particle go out of range in space and in density
509 if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
510 vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
511 vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
512 {vd.template getProp<red>(a) = 1;}
513 else
514 {vd.template getProp<red>(a) = 0;}
515
516
517 vd.template getProp<velocity_prev>(a)[0] = velX;
518 vd.template getProp<velocity_prev>(a)[1] = velY;
519 vd.template getProp<velocity_prev>(a)[2] = velZ;
520 vd.template getProp<rho_prev>(a) = rhop;
521}
522
523size_t cnt = 0;
524
525void verlet_int(particles & vd, real_number dt)
526{
527 // particle iterator
528 auto part = vd.getDomainIteratorGPU();
529
530 real_number dt205 = dt*dt*0.5;
531 real_number dt2 = dt*2.0;
532
533 CUDA_LAUNCH(verlet_int_gpu,part,vd.toKernel(),dt,dt2,dt205);
534
535 // remove the particles marked
536 remove_marked<red>(vd);
537
538 // increment the iteration counter
539 cnt++;
540}
541
542template<typename vector_type>
543__global__ void euler_int_gpu(vector_type vd,real_number dt, real_number dt205)
544{
545 // ... a
546 auto a = GET_PARTICLE(vd);
547
548 // if the particle is boundary
549 if (vd.template getProp<type>(a) == BOUNDARY)
550 {
551 // Update rho
552 real_number rhop = vd.template getProp<rho>(a);
553
554 // Update only the density
555 vd.template getProp<velocity>(a)[0] = 0.0;
556 vd.template getProp<velocity>(a)[1] = 0.0;
557 vd.template getProp<velocity>(a)[2] = 0.0;
558 real_number rhonew = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
559 vd.template getProp<rho>(a) = (rhonew < rho_zero)?rho_zero:rhonew;
560
561 vd.template getProp<rho_prev>(a) = rhop;
562
563 vd.template getProp<red>(a) = 0;
564
565 return;
566 }
567
568 //-Calculate displacement and update position / Calcula desplazamiento y actualiza posicion.
569 real_number dx = vd.template getProp<velocity>(a)[0]*dt + vd.template getProp<force>(a)[0]*dt205;
570 real_number dy = vd.template getProp<velocity>(a)[1]*dt + vd.template getProp<force>(a)[1]*dt205;
571 real_number dz = vd.template getProp<velocity>(a)[2]*dt + vd.template getProp<force>(a)[2]*dt205;
572
573 vd.getPos(a)[0] += dx;
574 vd.getPos(a)[1] += dy;
575 vd.getPos(a)[2] += dz;
576
577 real_number velX = vd.template getProp<velocity>(a)[0];
578 real_number velY = vd.template getProp<velocity>(a)[1];
579 real_number velZ = vd.template getProp<velocity>(a)[2];
580 real_number rhop = vd.template getProp<rho>(a);
581
582 vd.template getProp<velocity>(a)[0] = vd.template getProp<velocity>(a)[0] + vd.template getProp<force>(a)[0]*dt;
583 vd.template getProp<velocity>(a)[1] = vd.template getProp<velocity>(a)[1] + vd.template getProp<force>(a)[1]*dt;
584 vd.template getProp<velocity>(a)[2] = vd.template getProp<velocity>(a)[2] + vd.template getProp<force>(a)[2]*dt;
585 vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
586
587 // Check if the particle go out of range in space and in density
588 if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
589 vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
590 vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
591 {vd.template getProp<red>(a) = 1;}
592 else
593 {vd.template getProp<red>(a) = 0;}
594
595 vd.template getProp<velocity_prev>(a)[0] = velX;
596 vd.template getProp<velocity_prev>(a)[1] = velY;
597 vd.template getProp<velocity_prev>(a)[2] = velZ;
598 vd.template getProp<rho_prev>(a) = rhop;
599}
600
601void euler_int(particles & vd, real_number dt)
602{
603
604 // particle iterator
605 auto part = vd.getDomainIteratorGPU();
606
607 real_number dt205 = dt*dt*0.5;
608
609 CUDA_LAUNCH(euler_int_gpu,part,vd.toKernel(),dt,dt205);
610
611 // remove the particles
612 remove_marked<red>(vd);
613
614 cnt++;
615}
616
617template<typename vector_type, typename NN_type>
618__global__ void sensor_pressure_gpu(vector_type vd, NN_type NN, Point<3,real_number> probe, real_number * press_tmp)
619{
620 real_number tot_ker = 0.0;
621
622 // Get the position of the probe i
623 Point<3,real_number> xp = probe;
624
625 // get the iterator over the neighbohood particles of the probes position
626 auto itg = NN.getNNIteratorBox(NN.getCell(xp));
627 while (itg.isNext())
628 {
629 auto q = itg.get_sort();
630
631 // Only the fluid particles are importants
632 if (vd.template getProp<type>(q) != FLUID)
633 {
634 ++itg;
635 continue;
636 }
637
638 // Get the position of the neighborhood particle q
639 Point<3,real_number> xq = vd.getPos(q);
640
641 // Calculate the contribution of the particle to the pressure
642 // of the probe
643 real_number r = sqrt(norm2(xp - xq));
644
645 real_number ker = Wab(r) * (MassFluid / rho_zero);
646
647 // Also keep track of the calculation of the summed
648 // kernel
649 tot_ker += ker;
650
651 // Add the total pressure contribution
652 *press_tmp += vd.template getProp<Pressure>(q) * ker;
653
654 // next neighborhood particle
655 ++itg;
656 }
657
658 // We calculate the pressure normalizing the
659 // sum over all kernels
660 if (tot_ker == 0.0)
661 {*press_tmp = 0.0;}
662 else
663 {*press_tmp = 1.0 / tot_ker * *press_tmp;}
664}
665
666template<typename Vector, typename CellList>
667inline void sensor_pressure(Vector & vd,
668 CellList & NN,
671{
672 Vcluster<> & v_cl = create_vcluster();
673
674 press_t.add();
675
676 for (size_t i = 0 ; i < probes.size() ; i++)
677 {
678 // A float variable to calculate the pressure of the problem
679 CudaMemory press_tmp_(sizeof(real_number));
680 real_number press_tmp;
681
682 // if the probe is inside the processor domain
683 if (vd.getDecomposition().isLocal(probes.get(i)) == true)
684 {
685 CUDA_LAUNCH_DIM3(sensor_pressure_gpu,1,1,vd.toKernel_sorted(),NN.toKernel(),probes.get(i),(real_number *)press_tmp_.toKernel());
686
687 vd.merge<Pressure>(NN);
688
689 // move calculated pressure on
690 press_tmp_.deviceToHost();
691 press_tmp = *(real_number *)press_tmp_.getPointer();
692 }
693
694 // This is not necessary in principle, but if you
695 // want to make all processor aware of the history of the calculated
696 // pressure we have to execute this
697 v_cl.sum(press_tmp);
698 v_cl.execute();
699
700 // We add the calculated pressure into the history
701 press_t.last().add(press_tmp);
702 }
703}
704
705int main(int argc, char* argv[])
706{
707 // OpenFPM GPU distribution
708
709 // OpenFPM by default select GPU 0 for process 0, gpu 1 for process 1 and so on ... . In case of multi-node is the same each node has
710 // has a group of processes and these group of processes are distributed across the available GPU on that node.
711
712 // If you want to override this behaviour use #define EXTERNAL_SET_GPU at the very beginning of the program and use
713 // cudaSetDevice to select the GPU for that particular process before openfpm_init
714 // Note: To get the process number do MPI_Init and and use the MPI_Comm_rank. VCluster is not available before openfpm_init
715 // A code snippet in case we want to skip GPU 0
716 // MPI_Init(&argc,&argv);
717 // int rank;
718 // MPI_Comm_rank(MPI_COMM_WORLD,&rank);
719 // cudaSetDevice(1+rank);
720
721 // initialize the library
722 openfpm_init(&argc,&argv);
723
724#if !defined(CUDA_ON_CPU) && !defined(__HIP__)
725 cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
726#endif
727
728 // It contain for each time-step the value detected by the probes
731
732 probes.add({0.8779f,0.3f,0.02f});
733 probes.add({0.754f,0.31f,0.02f});
734
735 // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
736 Box<3,real_number> domain({-0.05f,-0.05f,-0.05f},{1.7010f,0.7065f,0.511f});
737 size_t sz[3] = {413,179,133};
738
739 // Fill W_dap
740 W_dap = 1.0/Wab(H/1.5);
741
742 // Here we define the boundary conditions of our problem
743 size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
744
745 // extended boundary around the domain, and the processor domain
747
748 particles vd(0,domain,bc,g,DEC_GRAN(128));
749
751
752 // You can ignore all these dp/2.0 is a trick to reach the same initialization
753 // of Dual-SPH that use a different criteria to draw particles
754 Box<3,real_number> fluid_box({dp/2.0f,dp/2.0f,dp/2.0f},{0.4f+dp/2.0f,0.67f-dp/2.0f,0.3f+dp/2.0f});
755
756 // return an iterator to the fluid particles to add to vd
757 auto fluid_it = DrawParticles::DrawBox(vd,sz,domain,fluid_box);
758
759 // here we fill some of the constants needed by the simulation
760 max_fluid_height = fluid_it.getBoxMargins().getHigh(2);
761 h_swl = fluid_it.getBoxMargins().getHigh(2) - fluid_it.getBoxMargins().getLow(2);
762 B = (coeff_sound)*(coeff_sound)*gravity*h_swl*rho_zero / gamma_;
763 cbar = coeff_sound * sqrt(gravity * h_swl);
764
765 // for each particle inside the fluid box ...
766 while (fluid_it.isNext())
767 {
768 // ... add a particle ...
769 vd.add();
770
771 // ... and set it position ...
772 vd.getLastPos()[0] = fluid_it.get().get(0);
773 vd.getLastPos()[1] = fluid_it.get().get(1);
774 vd.getLastPos()[2] = fluid_it.get().get(2);
775
776 // and its type.
777 vd.template getLastProp<type>() = FLUID;
778
779 // We also initialize the density of the particle and the hydro-static pressure given by
780 //
781 // rho_zero*g*h = P
782 //
783 // rho_p = (P/B + 1)^(1/Gamma) * rho_zero
784 //
785
786 vd.template getLastProp<Pressure>() = rho_zero * gravity * (max_fluid_height - fluid_it.get().get(2));
787
788 vd.template getLastProp<rho>() = pow(vd.template getLastProp<Pressure>() / B + 1, 1.0/gamma_) * rho_zero;
789 vd.template getLastProp<rho_prev>() = vd.template getLastProp<rho>();
790 vd.template getLastProp<velocity>()[0] = 0.0;
791 vd.template getLastProp<velocity>()[1] = 0.0;
792 vd.template getLastProp<velocity>()[2] = 0.0;
793
794 vd.template getLastProp<velocity_prev>()[0] = 0.0;
795 vd.template getLastProp<velocity_prev>()[1] = 0.0;
796 vd.template getLastProp<velocity_prev>()[2] = 0.0;
797
798 // next fluid particle
799 ++fluid_it;
800 }
801
802 // Recipient
803 Box<3,real_number> recipient1({0.0f,0.0f,0.0f},{1.6f+dp/2.0f,0.67f+dp/2.0f,0.4f+dp/2.0f});
804 Box<3,real_number> recipient2({dp,dp,dp},{1.6f-dp/2.0f,0.67f-dp/2.0f,0.4f+dp/2.0f});
805
806 Box<3,real_number> obstacle1({0.9f,0.24f-dp/2.0f,0.0f},{1.02f+dp/2.0f,0.36f,0.45f+dp/2.0f});
807 Box<3,real_number> obstacle2({0.9f+dp,0.24f+dp/2.0f,0.0f},{1.02f-dp/2.0f,0.36f-dp,0.45f-dp/2.0f});
808 Box<3,real_number> obstacle3({0.9f+dp,0.24f,0.0f},{1.02f,0.36f,0.45f});
809
811 holes.add(recipient2);
812 holes.add(obstacle1);
813 auto bound_box = DrawParticles::DrawSkin(vd,sz,domain,holes,recipient1);
814
815 while (bound_box.isNext())
816 {
817 vd.add();
818
819 vd.getLastPos()[0] = bound_box.get().get(0);
820 vd.getLastPos()[1] = bound_box.get().get(1);
821 vd.getLastPos()[2] = bound_box.get().get(2);
822
823 vd.template getLastProp<type>() = BOUNDARY;
824 vd.template getLastProp<rho>() = rho_zero;
825 vd.template getLastProp<rho_prev>() = rho_zero;
826 vd.template getLastProp<velocity>()[0] = 0.0;
827 vd.template getLastProp<velocity>()[1] = 0.0;
828 vd.template getLastProp<velocity>()[2] = 0.0;
829
830 vd.template getLastProp<velocity_prev>()[0] = 0.0;
831 vd.template getLastProp<velocity_prev>()[1] = 0.0;
832 vd.template getLastProp<velocity_prev>()[2] = 0.0;
833
834 ++bound_box;
835 }
836
837 auto obstacle_box = DrawParticles::DrawSkin(vd,sz,domain,obstacle2,obstacle1);
838
839 while (obstacle_box.isNext())
840 {
841 vd.add();
842
843 vd.getLastPos()[0] = obstacle_box.get().get(0);
844 vd.getLastPos()[1] = obstacle_box.get().get(1);
845 vd.getLastPos()[2] = obstacle_box.get().get(2);
846
847 vd.template getLastProp<type>() = BOUNDARY;
848 vd.template getLastProp<rho>() = rho_zero;
849 vd.template getLastProp<rho_prev>() = rho_zero;
850 vd.template getLastProp<velocity>()[0] = 0.0;
851 vd.template getLastProp<velocity>()[1] = 0.0;
852 vd.template getLastProp<velocity>()[2] = 0.0;
853
854 vd.template getLastProp<velocity_prev>()[0] = 0.0;
855 vd.template getLastProp<velocity_prev>()[1] = 0.0;
856 vd.template getLastProp<velocity_prev>()[2] = 0.0;
857
858 ++obstacle_box;
859 }
860
861 vd.map();
862
863 // Now that we fill the vector with particles
864 ModelCustom md;
865
866 vd.addComputationCosts(md);
867 vd.getDecomposition().decompose();
868 vd.map();
869
871
872 // Ok the initialization is done on CPU on GPU we are doing the main loop, so first we offload all properties on GPU
873
874 vd.hostToDevicePos();
875 vd.template hostToDeviceProp<type,rho,rho_prev,Pressure,velocity,velocity_prev>();
876
877 vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
878
879 auto NN = vd.getCellListGPU/*<CELLLIST_GPU_SPARSE<3,float>>*/(2*H / 2.0);
880 //NN.setBoxNN(2);
881
882 timer tot_sim;
883 tot_sim.start();
884
885 size_t write = 0;
886 size_t it = 0;
887 size_t it_reb = 0;
888 real_number t = 0.0;
889 while (t <= t_end)
890 {
891 Vcluster<> & v_cl = create_vcluster();
892 timer it_time;
893 it_time.start();
894
896 it_reb++;
897 if (it_reb == 300)
898 {
899 vd.map(RUN_ON_DEVICE);
900
901 // Rebalancer for now work on CPU , so move to CPU
902 vd.deviceToHostPos();
903 vd.template deviceToHostProp<type>();
904
905 it_reb = 0;
906 ModelCustom md;
907 vd.addComputationCosts(md);
908 vd.getDecomposition().decompose();
909
910 if (v_cl.getProcessUnitID() == 0)
911 {std::cout << "REBALANCED " << it_reb << std::endl;}
912 }
913
914 vd.map(RUN_ON_DEVICE);
915
916 // it sort the vector (doesn not seem to produce some advantage)
917 // note force calculation is anyway sorted calculation
918 //vd.make_sort(NN);
919
920 // Calculate pressure from the density
921 EqState(vd);
922
923 real_number max_visc = 0.0;
924
925 vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
926
927 // Calc forces
928 calc_forces(vd,NN,max_visc,cnt);
929
930
931 // Get the maximum viscosity term across processors
932 v_cl.max(max_visc);
933 v_cl.execute();
934
935 // Calculate delta t integration
936 real_number dt = calc_deltaT(vd,max_visc);
937
938 // VerletStep or euler step
939 it++;
940 if (it < 40)
941 verlet_int(vd,dt);
942 else
943 {
944 euler_int(vd,dt);
945 it = 0;
946 }
947
948 t += dt;
949
950 if (write < t*10)
951 {
952 // Sensor pressure require update ghost, so we ensure that particles are distributed correctly
953 // and ghost are updated
954 vd.map(RUN_ON_DEVICE);
955 vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
956 vd.updateCellList(NN);
957
958 // calculate the pressure at the sensor points
959 //sensor_pressure(vd,NN,press_t,probes);
960
961 std::cout << "OUTPUT " << dt << std::endl;
962
963 // When we write we have move all the particles information back to CPU
964
965 vd.deviceToHostPos();
966 vd.deviceToHostProp<type,rho,rho_prev,Pressure,drho,force,velocity,velocity_prev,red,red2>();
967
968 // We copy on another vector with less properties to reduce the size of the output
970
971 auto ito = vd.getDomainIterator();
972
973 while(ito.isNext())
974 {
975 auto p = ito.get();
976
977 vd_out.add();
978
979 vd_out.getLastPos()[0] = vd.getPos(p)[0];
980 vd_out.getLastPos()[1] = vd.getPos(p)[1];
981 vd_out.getLastPos()[2] = vd.getPos(p)[2];
982
983 vd_out.template getLastProp<0>() = vd.template getProp<type>(p);
984
985 vd_out.template getLastProp<1>()[0] = vd.template getProp<velocity>(p)[0];
986 vd_out.template getLastProp<1>()[1] = vd.template getProp<velocity>(p)[1];
987 vd_out.template getLastProp<1>()[2] = vd.template getProp<velocity>(p)[2];
988
989 ++ito;
990 }
991
992 vd_out.write_frame("Particles",write,VTK_WRITER | FORMAT_BINARY);
993 write++;
994
995 if (v_cl.getProcessUnitID() == 0)
996 {std::cout << "TIME: " << t << " write " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vd.size_local() << std::endl;}
997 }
998 else
999 {
1000 if (v_cl.getProcessUnitID() == 0)
1001 {std::cout << "TIME: " << t << " " << it_time.getwct() << " " << it_reb << " " << cnt << " Max visc: " << max_visc << " " << vd.size_local() << std::endl;}
1002 }
1003 }
1004
1005 tot_sim.stop();
1006 std::cout << "Time to complete: " << tot_sim.getwct() << " seconds" << std::endl;
1007
1008
1009 openfpm_finalize();
1010}
1011
1012#else
1013
1014int main(int argc, char* argv[])
1015{
1016 return 0;
1017}
1018
1019#endif
This class represent an N-dimensional box.
Definition Box.hpp:61
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
Definition Box.hpp:556
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Definition Box.hpp:567
Class for FAST cell list implementation.
Definition CellList.hpp:357
auto get(size_t cell, size_t ele) -> decltype(this->Mem_type::get(cell, ele))
Get an element in the cell.
Definition CellList.hpp:867
This class define the domain decomposition interface.
static PointIteratorSkin< dim, T, typename vd_type::Decomposition_type > DrawSkin(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub_A, Box< dim, T > &sub_B)
Draw particles in a box B excluding the area of a second box A (B - A)
static PointIterator< dim, T, typename vd_type::Decomposition_type > DrawBox(vd_type &vd, size_t(&sz)[dim], Box< dim, T > &domain, Box< dim, T > &sub)
Draw particles in a box.
This class implement the point shape in an N-dimensional space.
Definition Point.hpp:28
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition Point.hpp:172
void execute()
Execute all the requests.
void sum(T &num)
Sum the numbers across all processors and get the result.
size_t getProcessUnitID()
Get the process unit id.
void max(T &num)
Get the maximum number across all processors (or reduction with infinity norm)
Implementation of VCluster class.
Definition VCluster.hpp:59
Sparse Matrix implementation stub object when OpenFPM is compiled with no linear algebra support.
Definition Vector.hpp:40
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
Definition timer.hpp:28
void stop()
Stop the timer.
Definition timer.hpp:119
void start()
Start the timer.
Definition timer.hpp:90
double getwct()
Return the elapsed real time.
Definition timer.hpp:130
Distributed vector.
void updateCellList(CellL &cell_list, bool no_se3=false, cl_construct_opt opt=cl_construct_opt::Full)
Update a cell list using the stored particles.
auto getPos(vect_dist_key_dx vec_key) -> decltype(v_pos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
Model for Dynamic load balancing.
Definition main.cpp:222
temporal buffer for reductions