OpenFPM  5.2.0
Project that contain the implementation of distributed structures
main_gpu.cu
1 
103 #ifdef __NVCC__
104 
105 #include "Vector/vector_dist.hpp"
106 #include "Plot/GoogleChart.hpp"
107 #include "Plot/util.hpp"
108 #include "timer.hpp"
109 
110 #ifdef TEST_RUN
111 size_t nstep = 100;
112 #else
113 size_t nstep = 1000;
114 #endif
115 
116 typedef float real_number;
117 
118 constexpr int velocity = 0;
119 constexpr int force = 1;
120 constexpr int energy = 2;
121 
122 template<typename vector_dist_type,typename NN_type>
123 __global__ void calc_force_gpu(vector_dist_type vd, NN_type NN, real_number sigma12, real_number sigma6, real_number r_cut2)
124 {
125  unsigned int p;
126  GET_PARTICLE_SORT(p,NN);
127 
128  // Get the position xp of the particle
129  Point<3,real_number> xp = vd.getPos(p);
130 
131  // Reset the force counter
132  vd.template getProp<force>(p)[0] = 0.0;
133  vd.template getProp<force>(p)[1] = 0.0;
134  vd.template getProp<force>(p)[2] = 0.0;
135 
136  Point<3,real_number> force_;
137  force_.get(0) = 0.0;
138  force_.get(1) = 0.0;
139  force_.get(2) = 0.0;
140 
141  // Get an iterator over the neighborhood particles of p
142  auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(p)));
143 
144  // For each neighborhood particle ...
145  while (Np.isNext())
146  {
148 
149  // ... q
150  auto q = Np.get_sort();
151 
153 
154  // if (p == q) skip this particle
155  if (q == p) {++Np; continue;};
156 
157  // Get the position of p
158  Point<3,real_number> xq = vd.getPos(q);
159 
160  // Get the distance between p and q
161  Point<3,real_number> r = xp - xq;
162 
163  // take the norm of this vector
164  real_number rn = norm2(r);
165 
166  if (rn > r_cut2)
167  {++Np; continue;};
168 
169  // Calculate the force, using pow is slower
170  Point<3,real_number> f = 24.0*(2.0 *sigma12 / (rn*rn*rn*rn*rn*rn*rn) - sigma6 / (rn*rn*rn*rn)) * r;
171  force_ += f;
172 
173  // Next neighborhood
174  ++Np;
175  }
176 
177  // we sum the force produced by q on p
178  vd.template getProp<force>(p)[0] = force_.get(0);
179  vd.template getProp<force>(p)[1] = force_.get(1);
180  vd.template getProp<force>(p)[2] = force_.get(2);
181 }
182 
183 template<typename vector_dist_type>
184 __global__ void update_velocity_position(vector_dist_type vd, real_number dt)
185 {
186  auto p = GET_PARTICLE(vd);
187 
188  // here we calculate v(tn + 0.5)
189  vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];
190  vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];
191  vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];
192 
193  // here we calculate x(tn + 1)
194  vd.getPos(p)[0] += vd.template getProp<velocity>(p)[0]*dt;
195  vd.getPos(p)[1] += vd.template getProp<velocity>(p)[1]*dt;
196  vd.getPos(p)[2] += vd.template getProp<velocity>(p)[2]*dt;
197 }
198 
199 template<typename vector_dist_type>
200 __global__ void update_velocity(vector_dist_type vd, real_number dt)
201 {
202  auto p = GET_PARTICLE(vd);
203 
204  // here we calculate v(tn + 1)
205  vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];
206  vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];
207  vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];
208 }
209 
210 template<typename vector_dist_type,typename NN_type>
211 __global__ void particle_energy(vector_dist_type vd, NN_type NN, real_number sigma12, real_number sigma6, real_number shift, real_number r_cut2)
212 {
213  unsigned int p;
214  GET_PARTICLE_SORT(p,NN);
215 
216  // Get the position of the particle p
217  Point<3,real_number> xp = vd.getPos(p);
218 
219  // Get an iterator over the neighborhood of the particle p
220  auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(p)));
221 
222  real_number E = 0;
223 
224  // For each neighborhood of the particle p
225  while (Np.isNext())
226  {
227  // Neighborhood particle q
228  auto q = Np.get_sort();
229 
230  // if p == q skip this particle
231  if (q == p) {++Np; continue;};
232 
233  // Get position of the particle q
234  Point<3,real_number> xq = vd.getPos(q);
235 
236  // take the normalized direction
237  real_number rn = norm2(xp - xq);
238 
239  if (rn > r_cut2)
240  {++Np;continue;}
241 
242  // potential energy (using pow is slower)
243  E += 2.0 * ( sigma12 / (rn*rn*rn*rn*rn*rn) - sigma6 / ( rn*rn*rn) ) - shift;
244 
245  // Next neighborhood
246  ++Np;
247  }
248 
249  // Kinetic energy of the particle given by its actual speed
250  vd.template getProp<energy>(p) = E + (vd.template getProp<velocity>(p)[0]*vd.template getProp<velocity>(p)[0] +
251  vd.template getProp<velocity>(p)[1]*vd.template getProp<velocity>(p)[1] +
252  vd.template getProp<velocity>(p)[2]*vd.template getProp<velocity>(p)[2]) / 2;
253 }
254 
255 template<typename CellList> void calc_forces(vector_dist_gpu<3,real_number, aggregate<real_number[3],real_number[3],real_number> > & vd, CellList & NN, real_number sigma12, real_number sigma6, real_number r_cut2)
256 {
257  // Get an iterator over particles
258  auto it2 = vd.getDomainIteratorGPU();
259 
261 
262  // reorder positions only (no properties)
263  // as nothing else is needed to be read in calc_force_gpu
264  vd.template updateCellListGPU<>(NN);
265 
266  CUDA_LAUNCH(calc_force_gpu,it2,vd.toKernel(),NN.toKernel(),sigma12,sigma6,r_cut2);
267 
269  vd.template restoreOrder<force>(NN);
270 }
271 
272 template<typename CellList> real_number calc_energy(vector_dist_gpu<3,real_number, aggregate<real_number[3],real_number[3],real_number> > & vd, CellList & NN, real_number sigma12, real_number sigma6, real_number r_cut2)
273 {
274  real_number rc = r_cut2;
275  real_number shift = 2.0 * ( sigma12 / (rc*rc*rc*rc*rc*rc) - sigma6 / ( rc*rc*rc) );
276 
277  vd.template updateCellListGPU<velocity>(NN);
278 
279  auto it2 = vd.getDomainIteratorGPU();
280 
281  CUDA_LAUNCH(particle_energy,it2,vd.toKernel(),NN.toKernel(),sigma12,sigma6,shift,r_cut2);
282 
283  vd.template restoreOrder<energy>(NN);
284 
285  // Calculated energy
286  return reduce_local<energy,_add_>(vd);
287 }
288 
289 int main(int argc, char* argv[])
290 {
291  openfpm_init(&argc,&argv);
292 
293  real_number sigma = 0.01;
294  real_number r_cut =3.0*sigma;
295 
296  // we will use it do place particles on a 10x10x10 Grid like
297  size_t sz[3] = {100,100,100};
298 
299  // domain
300  Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
301 
302  // Boundary conditions
303  size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
304 
305  // ghost, big enough to contain the interaction radius
306  Ghost<3,float> ghost(r_cut);
307 
308  real_number dt = 0.00005;
309  real_number sigma12 = pow(sigma,12);
310  real_number sigma6 = pow(sigma,6);
311 
314 
316 
317  // We create the grid iterator
318  auto it = vd.getGridIterator(sz);
319 
320  while (it.isNext())
321  {
322  // Create a new particle
323  vd.add();
324 
325  // key contain (i,j,k) index of the grid
326  auto key = it.get();
327 
328  // The index of the grid can be accessed with key.get(0) == i, key.get(1) == j ...
329  // We use getLastPos to set the position of the last particle added
330  vd.getLastPos()[0] = key.get(0) * it.getSpacing(0);
331  vd.getLastPos()[1] = key.get(1) * it.getSpacing(1);
332  vd.getLastPos()[2] = key.get(2) * it.getSpacing(2);
333 
334  // We use getLastProp to set the property value of the last particle we added
335  vd.template getLastProp<velocity>()[0] = 0.0;
336  vd.template getLastProp<velocity>()[1] = 0.0;
337  vd.template getLastProp<velocity>()[2] = 0.0;
338 
339  vd.template getLastProp<force>()[0] = 0.0;
340  vd.template getLastProp<force>()[1] = 0.0;
341  vd.template getLastProp<force>()[2] = 0.0;
342 
343  ++it;
344  }
345 
346  vd.hostToDevicePos();
347  vd.hostToDeviceProp<velocity,force>();
348 
349  vd.map(RUN_ON_DEVICE);
350  vd.ghost_get<>(RUN_ON_DEVICE);
351 
352  timer tsim;
353  tsim.start();
354 
356 
358 
359  // Get the Cell list structure
360  auto NN = vd.getCellListGPU(r_cut / 2.0, CL_NON_SYMMETRIC | CL_GPU_REORDER, 2);
361 
363 
364  // The standard
365  // auto NN = vd.getCellList(r_cut);
366 
367  // calculate forces
368  calc_forces(vd,NN,sigma12,sigma6,r_cut*r_cut);
369  unsigned long int f = 0;
370 
371  // MD time stepping
372  for (size_t i = 0; i < nstep ; i++)
373  {
374  // Get the iterator
375  auto it3 = vd.getDomainIteratorGPU();
376  CUDA_LAUNCH(update_velocity_position,it3,vd.toKernel(),dt);
377 
378  // Because we moved the particles in space we have to map them and re-sync the ghost
379  vd.map(RUN_ON_DEVICE);
380  vd.template ghost_get<>(RUN_ON_DEVICE);
381 
382  // calculate forces or a(tn + 1) Step 2
383  calc_forces(vd,NN,sigma12,sigma6,r_cut*r_cut);
384 
385  // Integrate the velocity Step 3
386  auto it4 = vd.getDomainIteratorGPU();
387 
388  CUDA_LAUNCH(update_velocity,it4,vd.toKernel(),dt);
389 
390  // After every iteration collect some statistic about the configuration
391  if (i % 100 == 0)
392  {
393  vd.deviceToHostPos();
394  vd.deviceToHostProp<0,1,2>();
395 
396  // We write the particle position for visualization (Without ghost)
397  vd.deleteGhost();
398  vd.write_frame("particles_",f);
399 
400  // we resync the ghost
401  vd.ghost_get<>(RUN_ON_DEVICE);
402 
403  // We calculate the energy
404  real_number energy = calc_energy(vd,NN,sigma12,sigma6,r_cut*r_cut);
405  auto & vcl = create_vcluster();
406  vcl.sum(energy);
407  vcl.execute();
408 
409  // we save the energy calculated at time step i c contain the time-step y contain the energy
410  x.add(i);
411  y.add({energy});
412 
413  // We also print on terminal the value of the energy
414  // only one processor (master) write on terminal
415  if (vcl.getProcessUnitID() == 0)
416  std::cout << "Energy: " << energy << std::endl;
417 
418  f++;
419  }
420  }
421 
422  tsim.stop();
423  std::cout << "Time: " << tsim.getwct() << std::endl;
424 
425  // Google charts options, it store the options to draw the X Y graph
426  GCoptions options;
427 
428  // Title of the graph
429  options.title = std::string("Energy with time");
430 
431  // Y axis name
432  options.yAxis = std::string("Energy");
433 
434  // X axis name
435  options.xAxis = std::string("iteration");
436 
437  // width of the line
438  options.lineWidth = 1.0;
439 
440  // Resolution in x
441  options.width = 1280;
442 
443  // Resolution in y
444  options.heigh = 720;
445 
446  // Add zoom capability
447  options.more = GC_ZOOM;
448 
449  // Object that draw the X Y graph
450  GoogleChart cg;
451 
452  // Add the graph
453  // The graph that it produce is in svg format that can be opened on browser
454  cg.AddLinesGraph(x,y,options);
455 
456  // Write into html format
457  cg.write("gc_plot2_out.html");
458 
459  openfpm_finalize();
460 }
461 
462 #else
463 
464 int main(int argc, char* argv[])
465 {
466  return 0;
467 }
468 
469 #endif
470 
471 
Class for FAST cell list implementation.
Definition: CellList.hpp:558
Definition: Ghost.hpp:40
Small class to produce graph with Google chart in HTML.
void write(std::string file)
It write the graphs on file in html format using Google charts.
void AddLinesGraph(openfpm::vector< X > &x, openfpm::vector< Y > &y, const GCoptions &opt)
Add a simple lines graph.
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:28
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:204
Class for cpu time benchmarking.
Definition: timer.hpp:28
void stop()
Stop the timer.
Definition: timer.hpp:119
void start()
Start the timer.
Definition: timer.hpp:90
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
Distributed vector.
bool write_frame(std::string out, size_t iteration, int opt=VTK_WRITER)
Output particle position and properties.
void deviceToHostPos()
Move the memory from the device to host memory.
grid_dist_id_iterator_dec< Decomposition > getGridIterator(const size_t(&sz)[dim])
auto getPos(vect_dist_key_dx vec_key) -> decltype(vPos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
void ghost_get(size_t opt=WITH_POSITION)
It synchronize the properties and position of the ghost particles.
void hostToDevicePos()
Move the memory from the device to host memory.
void map(size_t opt=NONE)
It move all the particles that does not belong to the local processor to the respective processor.
void deviceToHostProp()
Move the memory from the device to host memory.
void add()
Add local particle.
void hostToDeviceProp()
Move the memory from the device to host memory.
void deleteGhost()
Delete the particles on the ghost.
auto getLastPos() -> decltype(vPos.template get< 0 >(0))
Get the position of the last element.
Google chart options.
Definition: GoogleChart.hpp:26
size_t width
width of the graph in pixels
Definition: GoogleChart.hpp:46
size_t heigh
height of the graph in pixels
Definition: GoogleChart.hpp:49
std::string xAxis
X axis name.
Definition: GoogleChart.hpp:32
size_t lineWidth
Width of the line.
Definition: GoogleChart.hpp:56
std::string more
more
Definition: GoogleChart.hpp:67
std::string title
Title of the chart.
Definition: GoogleChart.hpp:28
std::string yAxis
Y axis name.
Definition: GoogleChart.hpp:30
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:221