OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
main.cu
1//#define VCLUSTER_PERF_REPORT <------ Activate telemetry for the VCluster data-structure
2//#define SYNC_BEFORE_TAKE_TIME <------ Force synchronization of the kernels everytime we take the time with the structure timer.
3// Use this option for telemetry and GPU otherwise the result are unreliable
4//#define ENABLE_GRID_DIST_ID_PERF_STATS <------ Activate telementry for the grid data-structure
5
6#include "Decomposition/Distribution/BoxDistribution.hpp"
7#include "Grid/grid_dist_id.hpp"
8#include "data_type/aggregate.hpp"
9#include "timer.hpp"
10
43#ifdef __NVCC__
44
45constexpr int U = 0;
46constexpr int V = 1;
47
48constexpr int U_next = 2;
49constexpr int V_next = 3;
50
51constexpr int x = 0;
52constexpr int y = 1;
53constexpr int z = 2;
54
56
58
59typedef sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>,CudaMemory, Dec> SparseGridType;
60
62
63void init(SparseGridType & grid, Box<3,float> & domain)
64{
66
67 typedef typename GetAddBlockType<SparseGridType>::type InsertBlockT;
68
69 grid.addPoints([] __device__ (int i, int j, int k)
70 {
71 return true;
72 },
73 [] __device__ (InsertBlockT & data, int i, int j, int k)
74 {
75 data.template get<U>() = 1.0;
76 data.template get<V>() = 0.0;
77 }
78 );
79
80
81 grid.template flush<smax_<U>,smax_<V>>(flush_type::FLUSH_ON_DEVICE);
82
84
85 long int x_start = grid.size(0)*1.55f/domain.getHigh(0);
86 long int y_start = grid.size(1)*1.55f/domain.getHigh(1);
87 long int z_start = grid.size(1)*1.55f/domain.getHigh(2);
88
89 long int x_stop = grid.size(0)*1.85f/domain.getHigh(0);
90 long int y_stop = grid.size(1)*1.85f/domain.getHigh(1);
91 long int z_stop = grid.size(1)*1.85f/domain.getHigh(2);
92
94
95 grid_key_dx<3> start({x_start,y_start,z_start});
96 grid_key_dx<3> stop ({x_stop,y_stop,z_stop});
97
98 grid.addPoints(start,stop,[] __device__ (int i, int j, int k)
99 {
100 return true;
101 },
102 [] __device__ (InsertBlockT & data, int i, int j, int k)
103 {
104 data.template get<U>() = 0.5;
105 data.template get<V>() = 0.24;
106 }
107 );
108
109 grid.template flush<smax_<U>,smax_<V>>(flush_type::FLUSH_ON_DEVICE);
110
112}
113
114
115int main(int argc, char* argv[])
116{
117 openfpm_init(&argc,&argv);
118
119 // domain
120 Box<3,float> domain({0.0,0.0,0.0},{2.5,2.5,2.5});
121
122 // grid size
123 size_t sz[3] = {256,256,256};
124
125 // Define periodicity of the grid
126 periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
127
128 // Ghost in grid unit
130
131 // deltaT
132 float deltaT = 0.25;
133
134 // Diffusion constant for specie U
135 float du = 2*1e-5;
136
137 // Diffusion constant for specie V
138 float dv = 1*1e-5;
139
140 // Number of timesteps
141#ifdef TEST_RUN
142 size_t timeSteps = 300;
143#else
144 size_t timeSteps = 15000;
145#endif
146
147 // K and F (Physical constant in the equation)
148 float K = 0.053;
149 float F = 0.014;
150
151 SparseGridType grid(sz,domain,g,bc);
152
153 // spacing of the grid on x and y
154 float spacing[3] = {grid.spacing(0),grid.spacing(1),grid.spacing(2)};
155
156 init(grid,domain);
157
158 // sync the ghost
159 grid.template ghost_get<U,V>(RUN_ON_DEVICE);
160
161 // because we assume that spacing[x] == spacing[y] we use formula 2
162 // and we calculate the prefactor of Eq 2
163 float uFactor = deltaT * du/(spacing[x]*spacing[x]);
164 float vFactor = deltaT * dv/(spacing[x]*spacing[x]);
165
166 auto & v_cl = create_vcluster();
167
168 timer tot_sim;
169 tot_sim.start();
170
171 for (size_t i = 0; i < timeSteps ; ++i)
172 {
173 if (v_cl.rank() == 0)
174 {std::cout << "STEP: " << i << std::endl;}
175/* if (i % 300 == 0)
176 {
177 std::cout << "STEP: " << i << std::endl;
178 grid.write_frame("out",i,VTK_WRITER);
179 }*/
180
182
183 typedef typename GetCpBlockType<decltype(grid),0,1>::type CpBlockType;
184
186
187 auto func = [uFactor,vFactor,deltaT,F,K] __device__ (float & u_out, float & v_out,
188 CpBlockType & u, CpBlockType & v,
189 int i, int j, int k){
190
191 float uc = u(i,j,k);
192 float vc = v(i,j,k);
193
194 u_out = uc + uFactor *(u(i-1,j,k) + u(i+1,j,k) +
195 u(i,j-1,k) + u(i,j+1,k) +
196 u(i,j,k-1) + u(i,j,k+1) - 6.0f*uc) - deltaT * uc*vc*vc
197 - deltaT * F * (uc - 1.0f);
198
199
200 v_out = vc + vFactor *(v(i-1,j,k) + v(i+1,j,k) +
201 v(i,j+1,k) + v(i,j-1,k) +
202 v(i,j,k-1) + v(i,j,k+1) - 6.0f*vc) + deltaT * uc*vc*vc
203 - deltaT * (F+K) * vc;
204 };
205
207
209
210 if (i % 2 == 0)
211 {
212 cudaDeviceSynchronize();
213 timer tconv;
214 tconv.start();
215 grid.conv2<U,V,U_next,V_next,1>({0,0,0},{(long int)sz[0]-1,(long int)sz[1]-1,(long int)sz[2]-1},func);
216 cudaDeviceSynchronize();
217 tconv.stop();
218 std::cout << "Conv " << tconv.getwct() << std::endl;
219
220 // After copy we synchronize again the ghost part U and V
221
222 grid.ghost_get<U_next,V_next>(RUN_ON_DEVICE | SKIP_LABELLING);
223 }
224 else
225 {
226 grid.conv2<U_next,V_next,U,V,1>({0,0,0},{(long int)sz[0]-1,(long int)sz[1]-1,(long int)sz[2]-1},func);
227
228 // After copy we synchronize again the ghost part U and V
229 grid.ghost_get<U,V>(RUN_ON_DEVICE | SKIP_LABELLING);
230 }
231
233
234 // Every 500 time step we output the configuration for
235 // visualization
236// if (i % 500 == 0)
237// {
238// grid.save("output_" + std::to_string(count));
239// count++;
240// }
241 }
242
243 tot_sim.stop();
244 std::cout << "Total simulation: " << tot_sim.getwct() << std::endl;
245
246 grid.deviceToHost<U,V,U_next,V_next>();
247 grid.write("final");
248
250
263
264 openfpm_finalize();
265
267
276}
277
278#else
279
280int main(int argc, char* argv[])
281{
282 return 0;
283}
284
285#endif
286
This class represent an N-dimensional box.
Definition Box.hpp:61
This class decompose a space into sub-sub-domains and distribute them across processors.
grid_key_dx is the key to access any element in the grid
Definition grid_key.hpp:19
Class for cpu time benchmarking.
Definition timer.hpp:28
void stop()
Stop the timer.
Definition timer.hpp:119
void start()
Start the timer.
Definition timer.hpp:90
double getwct()
Return the elapsed real time.
Definition timer.hpp:130
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
[v_transform metafunction]
get the type of the block
Boundary conditions.
Definition common.hpp:22