OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
main.cu
1 //#define VCLUSTER_PERF_REPORT <------ Activate telemetry for the VCluster data-structure
2 //#define SYNC_BEFORE_TAKE_TIME <------ Force synchronization of the kernels everytime we take the time with the structure timer.
3 // Use this option for telemetry and GPU otherwise the result are unreliable
4 //#define ENABLE_GRID_DIST_ID_PERF_STATS <------ Activate telementry for the grid data-structure
5 
6 #include "Decomposition/Distribution/BoxDistribution.hpp"
7 #include "Grid/grid_dist_id.hpp"
8 #include "data_type/aggregate.hpp"
9 #include "timer.hpp"
10 
43 #ifdef __NVCC__
44 
45 constexpr int U = 0;
46 constexpr int V = 1;
47 
48 constexpr int U_next = 2;
49 constexpr int V_next = 3;
50 
51 constexpr int x = 0;
52 constexpr int y = 1;
53 constexpr int z = 2;
54 
56 
58 
59 typedef sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>,CudaMemory, Dec> SparseGridType;
60 
62 
63 void init(SparseGridType & grid, Box<3,float> & domain)
64 {
66 
67  typedef typename GetAddBlockType<SparseGridType>::type InsertBlockT;
68 
69  grid.addPoints([] __device__ (int i, int j, int k)
70  {
71  return true;
72  },
73  [] __device__ (InsertBlockT & data, int i, int j, int k)
74  {
75  data.template get<U>() = 1.0;
76  data.template get<V>() = 0.0;
77  }
78  );
79 
80 
81  grid.template flush<smax_<U>,smax_<V>>(flush_type::FLUSH_ON_DEVICE);
82 
84 
85  long int x_start = grid.size(0)*1.55f/domain.getHigh(0);
86  long int y_start = grid.size(1)*1.55f/domain.getHigh(1);
87  long int z_start = grid.size(1)*1.55f/domain.getHigh(2);
88 
89  long int x_stop = grid.size(0)*1.85f/domain.getHigh(0);
90  long int y_stop = grid.size(1)*1.85f/domain.getHigh(1);
91  long int z_stop = grid.size(1)*1.85f/domain.getHigh(2);
92 
94 
95  grid_key_dx<3> start({x_start,y_start,z_start});
96  grid_key_dx<3> stop ({x_stop,y_stop,z_stop});
97 
98  grid.addPoints(start,stop,[] __device__ (int i, int j, int k)
99  {
100  return true;
101  },
102  [] __device__ (InsertBlockT & data, int i, int j, int k)
103  {
104  data.template get<U>() = 0.5;
105  data.template get<V>() = 0.24;
106  }
107  );
108 
109  grid.template flush<smax_<U>,smax_<V>>(flush_type::FLUSH_ON_DEVICE);
110 
112 }
113 
114 
115 int main(int argc, char* argv[])
116 {
117  openfpm_init(&argc,&argv);
118 
119  // domain
120  Box<3,float> domain({0.0,0.0,0.0},{2.5,2.5,2.5});
121 
122  // grid size
123  size_t sz[3] = {256,256,256};
124 
125  // Define periodicity of the grid
126  periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
127 
128  // Ghost in grid unit
129  Ghost<3,long int> g(1);
130 
131  // deltaT
132  float deltaT = 0.25;
133 
134  // Diffusion constant for specie U
135  float du = 2*1e-5;
136 
137  // Diffusion constant for specie V
138  float dv = 1*1e-5;
139 
140  // Number of timesteps
141 #ifdef TEST_RUN
142  size_t timeSteps = 300;
143 #else
144  size_t timeSteps = 15000;
145 #endif
146 
147  // K and F (Physical constant in the equation)
148  float K = 0.053;
149  float F = 0.014;
150 
151  SparseGridType grid(sz,domain,g,bc);
152 
153  // spacing of the grid on x and y
154  float spacing[3] = {grid.spacing(0),grid.spacing(1),grid.spacing(2)};
155 
156  init(grid,domain);
157 
158  // sync the ghost
159  grid.template ghost_get<U,V>(RUN_ON_DEVICE);
160 
161  // because we assume that spacing[x] == spacing[y] we use formula 2
162  // and we calculate the prefactor of Eq 2
163  float uFactor = deltaT * du/(spacing[x]*spacing[x]);
164  float vFactor = deltaT * dv/(spacing[x]*spacing[x]);
165 
166  auto & v_cl = create_vcluster();
167 
168  timer tot_sim;
169  tot_sim.start();
170 
171  for (size_t i = 0; i < timeSteps ; ++i)
172  {
173  if (v_cl.rank() == 0)
174  {std::cout << "STEP: " << i << std::endl;}
175 /* if (i % 300 == 0)
176  {
177  std::cout << "STEP: " << i << std::endl;
178  grid.write_frame("out",i,VTK_WRITER);
179  }*/
180 
182 
183  typedef typename GetCpBlockType<decltype(grid),0,1>::type CpBlockType;
184 
186 
187  auto func = [uFactor,vFactor,deltaT,F,K] __device__ (float & u_out, float & v_out,
188  CpBlockType & u, CpBlockType & v,
189  int i, int j, int k){
190 
191  float uc = u(i,j,k);
192  float vc = v(i,j,k);
193 
194  u_out = uc + uFactor *(u(i-1,j,k) + u(i+1,j,k) +
195  u(i,j-1,k) + u(i,j+1,k) +
196  u(i,j,k-1) + u(i,j,k+1) - 6.0f*uc) - deltaT * uc*vc*vc
197  - deltaT * F * (uc - 1.0f);
198 
199 
200  v_out = vc + vFactor *(v(i-1,j,k) + v(i+1,j,k) +
201  v(i,j+1,k) + v(i,j-1,k) +
202  v(i,j,k-1) + v(i,j,k+1) - 6.0f*vc) + deltaT * uc*vc*vc
203  - deltaT * (F+K) * vc;
204  };
205 
207 
209 
210  if (i % 2 == 0)
211  {
212  cudaDeviceSynchronize();
213  timer tconv;
214  tconv.start();
215  grid.conv2<U,V,U_next,V_next,1>({0,0,0},{(long int)sz[0]-1,(long int)sz[1]-1,(long int)sz[2]-1},func);
216  cudaDeviceSynchronize();
217  tconv.stop();
218  std::cout << "Conv " << tconv.getwct() << std::endl;
219 
220  // After copy we synchronize again the ghost part U and V
221 
222  grid.ghost_get<U_next,V_next>(RUN_ON_DEVICE | SKIP_LABELLING);
223  }
224  else
225  {
226  grid.conv2<U_next,V_next,U,V,1>({0,0,0},{(long int)sz[0]-1,(long int)sz[1]-1,(long int)sz[2]-1},func);
227 
228  // After copy we synchronize again the ghost part U and V
229  grid.ghost_get<U,V>(RUN_ON_DEVICE | SKIP_LABELLING);
230  }
231 
233 
234  // Every 500 time step we output the configuration for
235  // visualization
236 // if (i % 500 == 0)
237 // {
238 // grid.save("output_" + std::to_string(count));
239 // count++;
240 // }
241  }
242 
243  tot_sim.stop();
244  std::cout << "Total simulation: " << tot_sim.getwct() << std::endl;
245 
246  grid.deviceToHost<U,V,U_next,V_next>();
247  grid.write("final");
248 
250 
262 
264  openfpm_finalize();
265 
267 
276 }
277 
278 #else
279 
280 int main(int argc, char* argv[])
281 {
282  return 0;
283 }
284 
285 #endif
286 
grid_key_dx is the key to access any element in the grid
Definition: grid_key.hpp:18
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
Definition: Ghost.hpp:39
This class decompose a space into sub-sub-domains and distribute them across processors.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
void start()
Start the timer.
Definition: timer.hpp:90
get the type of the block
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Definition: Box.hpp:567
Boundary conditions.
Definition: common.hpp:21
Class for cpu time benchmarking.
Definition: timer.hpp:27
void stop()
Stop the timer.
Definition: timer.hpp:119
[v_transform metafunction]