OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
main_gpu.cu
1 
104 #ifdef __NVCC__
105 
106 #include "Vector/vector_dist.hpp"
107 #include "Plot/GoogleChart.hpp"
108 #include "Plot/util.hpp"
109 #include "timer.hpp"
110 
111 #ifdef TEST_RUN
112 size_t nstep = 100;
113 #else
114 size_t nstep = 1000;
115 #endif
116 
117 typedef float real_number;
118 
119 constexpr int velocity = 0;
120 constexpr int force = 1;
121 constexpr int energy = 2;
122 
123 template<typename vector_dist_type,typename NN_type>
124 __global__ void calc_force_gpu(vector_dist_type vd, NN_type NN, real_number sigma12, real_number sigma6, real_number r_cut2)
125 {
126  unsigned int p;
127  GET_PARTICLE_SORT(p,NN);
128 
129  // Get the position xp of the particle
130  Point<3,real_number> xp = vd.getPos(p);
131 
132  // Reset the force counter
133  vd.template getProp<force>(p)[0] = 0.0;
134  vd.template getProp<force>(p)[1] = 0.0;
135  vd.template getProp<force>(p)[2] = 0.0;
136 
137  Point<3,real_number> force_;
138  force_.get(0) = 0.0;
139  force_.get(1) = 0.0;
140  force_.get(2) = 0.0;
141 
142  // Get an iterator over the neighborhood particles of p
143  auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(p)));
144 
145  // For each neighborhood particle ...
146  while (Np.isNext())
147  {
149 
150  // ... q
151  auto q = Np.get_sort();
152 
154 
155  // if (p == q) skip this particle
156  if (q == p) {++Np; continue;};
157 
158  // Get the position of p
159  Point<3,real_number> xq = vd.getPos(q);
160 
161  // Get the distance between p and q
162  Point<3,real_number> r = xp - xq;
163 
164  // take the norm of this vector
165  real_number rn = norm2(r);
166 
167  if (rn > r_cut2)
168  {++Np; continue;};
169 
170  // Calculate the force, using pow is slower
171  Point<3,real_number> f = 24.0*(2.0 *sigma12 / (rn*rn*rn*rn*rn*rn*rn) - sigma6 / (rn*rn*rn*rn)) * r;
172  force_ += f;
173 
174  // Next neighborhood
175  ++Np;
176  }
177 
178  // we sum the force produced by q on p
179  vd.template getProp<force>(p)[0] = force_.get(0);
180  vd.template getProp<force>(p)[1] = force_.get(1);
181  vd.template getProp<force>(p)[2] = force_.get(2);
182 }
183 
184 template<typename vector_dist_type>
185 __global__ void update_velocity_position(vector_dist_type vd, real_number dt)
186 {
187  auto p = GET_PARTICLE(vd);
188 
189  // here we calculate v(tn + 0.5)
190  vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];
191  vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];
192  vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];
193 
194  // here we calculate x(tn + 1)
195  vd.getPos(p)[0] += vd.template getProp<velocity>(p)[0]*dt;
196  vd.getPos(p)[1] += vd.template getProp<velocity>(p)[1]*dt;
197  vd.getPos(p)[2] += vd.template getProp<velocity>(p)[2]*dt;
198 }
199 
200 template<typename vector_dist_type>
201 __global__ void update_velocity(vector_dist_type vd, real_number dt)
202 {
203  auto p = GET_PARTICLE(vd);
204 
205  // here we calculate v(tn + 1)
206  vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];
207  vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];
208  vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];
209 }
210 
211 template<typename vector_dist_type,typename NN_type>
212 __global__ void particle_energy(vector_dist_type vd, NN_type NN, real_number sigma12, real_number sigma6, real_number shift, real_number r_cut2)
213 {
214  unsigned int p;
215  GET_PARTICLE_SORT(p,NN);
216 
217  // Get the position of the particle p
218  Point<3,real_number> xp = vd.getPos(p);
219 
220  // Get an iterator over the neighborhood of the particle p
221  auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(p)));
222 
223  real_number E = 0;
224 
225  // For each neighborhood of the particle p
226  while (Np.isNext())
227  {
228  // Neighborhood particle q
229  auto q = Np.get_sort();
230 
231  // if p == q skip this particle
232  if (q == p) {++Np; continue;};
233 
234  // Get position of the particle q
235  Point<3,real_number> xq = vd.getPos(q);
236 
237  // take the normalized direction
238  real_number rn = norm2(xp - xq);
239 
240  if (rn > r_cut2)
241  {++Np;continue;}
242 
243  // potential energy (using pow is slower)
244  E += 2.0 * ( sigma12 / (rn*rn*rn*rn*rn*rn) - sigma6 / ( rn*rn*rn) ) - shift;
245 
246  // Next neighborhood
247  ++Np;
248  }
249 
250  // Kinetic energy of the particle given by its actual speed
251  vd.template getProp<energy>(p) = E + (vd.template getProp<velocity>(p)[0]*vd.template getProp<velocity>(p)[0] +
252  vd.template getProp<velocity>(p)[1]*vd.template getProp<velocity>(p)[1] +
253  vd.template getProp<velocity>(p)[2]*vd.template getProp<velocity>(p)[2]) / 2;
254 }
255 
256 template<typename CellList> void calc_forces(vector_dist_gpu<3,real_number, aggregate<real_number[3],real_number[3],real_number> > & vd, CellList & NN, real_number sigma12, real_number sigma6, real_number r_cut2)
257 {
258  vd.updateCellList(NN);
259 
260  // Get an iterator over particles
261  auto it2 = vd.getDomainIteratorGPU();
262 
264 
265  CUDA_LAUNCH(calc_force_gpu,it2,vd.toKernel_sorted(),NN.toKernel(),sigma12,sigma6,r_cut2);
266 
268 
270 
271  vd.merge_sort<force>(NN);
272 
274 }
275 
276 template<typename CellList> real_number calc_energy(vector_dist_gpu<3,real_number, aggregate<real_number[3],real_number[3],real_number> > & vd, CellList & NN, real_number sigma12, real_number sigma6, real_number r_cut2)
277 {
278  real_number rc = r_cut2;
279  real_number shift = 2.0 * ( sigma12 / (rc*rc*rc*rc*rc*rc) - sigma6 / ( rc*rc*rc) );
280 
281  vd.updateCellList(NN);
282 
283  auto it2 = vd.getDomainIteratorGPU();
284 
285  CUDA_LAUNCH(particle_energy,it2,vd.toKernel_sorted(),NN.toKernel(),sigma12,sigma6,shift,r_cut2);
286 
287  vd.merge_sort<energy>(NN);
288 
289  // Calculated energy
290  return reduce_local<energy,_add_>(vd);
291 }
292 
293 int main(int argc, char* argv[])
294 {
295  openfpm_init(&argc,&argv);
296 
297  real_number sigma = 0.01;
298  real_number r_cut =3.0*sigma;
299 
300  // we will use it do place particles on a 10x10x10 Grid like
301  size_t sz[3] = {100,100,100};
302 
303  // domain
304  Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
305 
306  // Boundary conditions
307  size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
308 
309  // ghost, big enough to contain the interaction radius
310  Ghost<3,float> ghost(r_cut);
311 
312  real_number dt = 0.00005;
313  real_number sigma12 = pow(sigma,12);
314  real_number sigma6 = pow(sigma,6);
315 
318 
320 
321  // We create the grid iterator
322  auto it = vd.getGridIterator(sz);
323 
324  while (it.isNext())
325  {
326  // Create a new particle
327  vd.add();
328 
329  // key contain (i,j,k) index of the grid
330  auto key = it.get();
331 
332  // The index of the grid can be accessed with key.get(0) == i, key.get(1) == j ...
333  // We use getLastPos to set the position of the last particle added
334  vd.getLastPos()[0] = key.get(0) * it.getSpacing(0);
335  vd.getLastPos()[1] = key.get(1) * it.getSpacing(1);
336  vd.getLastPos()[2] = key.get(2) * it.getSpacing(2);
337 
338  // We use getLastProp to set the property value of the last particle we added
339  vd.template getLastProp<velocity>()[0] = 0.0;
340  vd.template getLastProp<velocity>()[1] = 0.0;
341  vd.template getLastProp<velocity>()[2] = 0.0;
342 
343  vd.template getLastProp<force>()[0] = 0.0;
344  vd.template getLastProp<force>()[1] = 0.0;
345  vd.template getLastProp<force>()[2] = 0.0;
346 
347  ++it;
348  }
349 
350  vd.hostToDevicePos();
351  vd.hostToDeviceProp<velocity,force>();
352 
353  vd.map(RUN_ON_DEVICE);
354  vd.ghost_get<>(RUN_ON_DEVICE);
355 
356  timer tsim;
357  tsim.start();
358 
360 
362 
363  // Get the Cell list structure
364  auto NN = vd.getCellListGPU(r_cut / 2.0);
365 
367 
368  // The standard
369  // auto NN = vd.getCellList(r_cut);
370 
371  // calculate forces
372  calc_forces(vd,NN,sigma12,sigma6,r_cut*r_cut);
373  unsigned long int f = 0;
374 
375  // MD time stepping
376  for (size_t i = 0; i < nstep ; i++)
377  {
378  // Get the iterator
379  auto it3 = vd.getDomainIteratorGPU();
380 
381  CUDA_LAUNCH(update_velocity_position,it3,vd.toKernel(),dt);
382 
383  // Because we moved the particles in space we have to map them and re-sync the ghost
384  vd.map(RUN_ON_DEVICE);
385  vd.template ghost_get<>(RUN_ON_DEVICE);
386 
387  // calculate forces or a(tn + 1) Step 2
388  calc_forces(vd,NN,sigma12,sigma6,r_cut*r_cut);
389 
390  // Integrate the velocity Step 3
391  auto it4 = vd.getDomainIteratorGPU();
392 
393  CUDA_LAUNCH(update_velocity,it4,vd.toKernel(),dt);
394 
395  // After every iteration collect some statistic about the configuration
396  if (i % 1000 == 0)
397  {
398  vd.deviceToHostPos();
399  vd.deviceToHostProp<0,1,2>();
400 
401  // We write the particle position for visualization (Without ghost)
402  vd.deleteGhost();
403  vd.write_frame("particles_",f);
404 
405  // we resync the ghost
406  vd.ghost_get<>(RUN_ON_DEVICE);
407 
408  // We calculate the energy
409  real_number energy = calc_energy(vd,NN,sigma12,sigma6,r_cut*r_cut);
410  auto & vcl = create_vcluster();
411  vcl.sum(energy);
412  vcl.execute();
413 
414  // we save the energy calculated at time step i c contain the time-step y contain the energy
415  x.add(i);
416  y.add({energy});
417 
418  // We also print on terminal the value of the energy
419  // only one processor (master) write on terminal
420  if (vcl.getProcessUnitID() == 0)
421  std::cout << "Energy: " << energy << std::endl;
422 
423  f++;
424  }
425  }
426 
427  tsim.stop();
428  std::cout << "Time: " << tsim.getwct() << std::endl;
429 
430  // Google charts options, it store the options to draw the X Y graph
431  GCoptions options;
432 
433  // Title of the graph
434  options.title = std::string("Energy with time");
435 
436  // Y axis name
437  options.yAxis = std::string("Energy");
438 
439  // X axis name
440  options.xAxis = std::string("iteration");
441 
442  // width of the line
443  options.lineWidth = 1.0;
444 
445  // Resolution in x
446  options.width = 1280;
447 
448  // Resolution in y
449  options.heigh = 720;
450 
451  // Add zoom capability
452  options.more = GC_ZOOM;
453 
454  // Object that draw the X Y graph
455  GoogleChart cg;
456 
457  // Add the graph
458  // The graph that it produce is in svg format that can be opened on browser
459  cg.AddLinesGraph(x,y,options);
460 
461  // Write into html format
462  cg.write("gc_plot2_out.html");
463 
464  openfpm_finalize();
465 }
466 
467 #else
468 
469 int main(int argc, char* argv[])
470 {
471  return 0;
472 }
473 
474 #endif
475 
476 
size_t heigh
height of the graph in pixels
Definition: GoogleChart.hpp:49
void AddLinesGraph(openfpm::vector< X > &x, openfpm::vector< Y > &y, const GCoptions &opt)
Add a simple lines graph.
std::string title
Title of the chart.
Definition: GoogleChart.hpp:28
std::string more
more
Definition: GoogleChart.hpp:67
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:27
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
Definition: Ghost.hpp:39
std::string yAxis
Y axis name.
Definition: GoogleChart.hpp:30
Small class to produce graph with Google chart in HTML.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
void start()
Start the timer.
Definition: timer.hpp:90
size_t lineWidth
Width of the line.
Definition: GoogleChart.hpp:56
void write(std::string file)
It write the graphs on file in html format using Google charts.
size_t width
width of the graph in pixels
Definition: GoogleChart.hpp:46
Distributed vector.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:214
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202
Class for FAST cell list implementation.
Definition: CellList.hpp:356
std::string xAxis
X axis name.
Definition: GoogleChart.hpp:32
Google chart options.
Definition: GoogleChart.hpp:25
Class for cpu time benchmarking.
Definition: timer.hpp:27
void stop()
Stop the timer.
Definition: timer.hpp:119