OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
main_gpu.cu
1
104#ifdef __NVCC__
105
106#include "Vector/vector_dist.hpp"
107#include "Plot/GoogleChart.hpp"
108#include "Plot/util.hpp"
109#include "timer.hpp"
110
111#ifdef TEST_RUN
112size_t nstep = 100;
113#else
114size_t nstep = 1000;
115#endif
116
117typedef float real_number;
118
119constexpr int velocity = 0;
120constexpr int force = 1;
121constexpr int energy = 2;
122
123template<typename vector_dist_type,typename NN_type>
124__global__ void calc_force_gpu(vector_dist_type vd, NN_type NN, real_number sigma12, real_number sigma6, real_number r_cut2)
125{
126 unsigned int p;
127 GET_PARTICLE_SORT(p,NN);
128
129 // Get the position xp of the particle
130 Point<3,real_number> xp = vd.getPos(p);
131
132 // Reset the force counter
133 vd.template getProp<force>(p)[0] = 0.0;
134 vd.template getProp<force>(p)[1] = 0.0;
135 vd.template getProp<force>(p)[2] = 0.0;
136
138 force_.get(0) = 0.0;
139 force_.get(1) = 0.0;
140 force_.get(2) = 0.0;
141
142 // Get an iterator over the neighborhood particles of p
143 auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(p)));
144
145 // For each neighborhood particle ...
146 while (Np.isNext())
147 {
149
150 // ... q
151 auto q = Np.get_sort();
152
154
155 // if (p == q) skip this particle
156 if (q == p) {++Np; continue;};
157
158 // Get the position of p
159 Point<3,real_number> xq = vd.getPos(q);
160
161 // Get the distance between p and q
162 Point<3,real_number> r = xp - xq;
163
164 // take the norm of this vector
165 real_number rn = norm2(r);
166
167 if (rn > r_cut2)
168 {++Np; continue;};
169
170 // Calculate the force, using pow is slower
171 Point<3,real_number> f = 24.0*(2.0 *sigma12 / (rn*rn*rn*rn*rn*rn*rn) - sigma6 / (rn*rn*rn*rn)) * r;
172 force_ += f;
173
174 // Next neighborhood
175 ++Np;
176 }
177
178 // we sum the force produced by q on p
179 vd.template getProp<force>(p)[0] = force_.get(0);
180 vd.template getProp<force>(p)[1] = force_.get(1);
181 vd.template getProp<force>(p)[2] = force_.get(2);
182}
183
184template<typename vector_dist_type>
185__global__ void update_velocity_position(vector_dist_type vd, real_number dt)
186{
187 auto p = GET_PARTICLE(vd);
188
189 // here we calculate v(tn + 0.5)
190 vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];
191 vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];
192 vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];
193
194 // here we calculate x(tn + 1)
195 vd.getPos(p)[0] += vd.template getProp<velocity>(p)[0]*dt;
196 vd.getPos(p)[1] += vd.template getProp<velocity>(p)[1]*dt;
197 vd.getPos(p)[2] += vd.template getProp<velocity>(p)[2]*dt;
198}
199
200template<typename vector_dist_type>
201__global__ void update_velocity(vector_dist_type vd, real_number dt)
202{
203 auto p = GET_PARTICLE(vd);
204
205 // here we calculate v(tn + 1)
206 vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];
207 vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];
208 vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];
209}
210
211template<typename vector_dist_type,typename NN_type>
212__global__ void particle_energy(vector_dist_type vd, NN_type NN, real_number sigma12, real_number sigma6, real_number shift, real_number r_cut2)
213{
214 unsigned int p;
215 GET_PARTICLE_SORT(p,NN);
216
217 // Get the position of the particle p
218 Point<3,real_number> xp = vd.getPos(p);
219
220 // Get an iterator over the neighborhood of the particle p
221 auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(p)));
222
223 real_number E = 0;
224
225 // For each neighborhood of the particle p
226 while (Np.isNext())
227 {
228 // Neighborhood particle q
229 auto q = Np.get_sort();
230
231 // if p == q skip this particle
232 if (q == p) {++Np; continue;};
233
234 // Get position of the particle q
235 Point<3,real_number> xq = vd.getPos(q);
236
237 // take the normalized direction
238 real_number rn = norm2(xp - xq);
239
240 if (rn > r_cut2)
241 {++Np;continue;}
242
243 // potential energy (using pow is slower)
244 E += 2.0 * ( sigma12 / (rn*rn*rn*rn*rn*rn) - sigma6 / ( rn*rn*rn) ) - shift;
245
246 // Next neighborhood
247 ++Np;
248 }
249
250 // Kinetic energy of the particle given by its actual speed
251 vd.template getProp<energy>(p) = E + (vd.template getProp<velocity>(p)[0]*vd.template getProp<velocity>(p)[0] +
252 vd.template getProp<velocity>(p)[1]*vd.template getProp<velocity>(p)[1] +
253 vd.template getProp<velocity>(p)[2]*vd.template getProp<velocity>(p)[2]) / 2;
254}
255
256template<typename CellList> void calc_forces(vector_dist_gpu<3,real_number, aggregate<real_number[3],real_number[3],real_number> > & vd, CellList & NN, real_number sigma12, real_number sigma6, real_number r_cut2)
257{
258 vd.updateCellList(NN);
259
260 // Get an iterator over particles
261 auto it2 = vd.getDomainIteratorGPU();
262
264
265 CUDA_LAUNCH(calc_force_gpu,it2,vd.toKernel_sorted(),NN.toKernel(),sigma12,sigma6,r_cut2);
266
268
270
271 vd.merge_sort<force>(NN);
272
274}
275
276template<typename CellList> real_number calc_energy(vector_dist_gpu<3,real_number, aggregate<real_number[3],real_number[3],real_number> > & vd, CellList & NN, real_number sigma12, real_number sigma6, real_number r_cut2)
277{
278 real_number rc = r_cut2;
279 real_number shift = 2.0 * ( sigma12 / (rc*rc*rc*rc*rc*rc) - sigma6 / ( rc*rc*rc) );
280
281 vd.updateCellList(NN);
282
283 auto it2 = vd.getDomainIteratorGPU();
284
285 CUDA_LAUNCH(particle_energy,it2,vd.toKernel_sorted(),NN.toKernel(),sigma12,sigma6,shift,r_cut2);
286
287 vd.merge_sort<energy>(NN);
288
289 // Calculated energy
290 return reduce_local<energy,_add_>(vd);
291}
292
293int main(int argc, char* argv[])
294{
295 openfpm_init(&argc,&argv);
296
297 real_number sigma = 0.01;
298 real_number r_cut =3.0*sigma;
299
300 // we will use it do place particles on a 10x10x10 Grid like
301 size_t sz[3] = {100,100,100};
302
303 // domain
304 Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});
305
306 // Boundary conditions
307 size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};
308
309 // ghost, big enough to contain the interaction radius
310 Ghost<3,float> ghost(r_cut);
311
312 real_number dt = 0.00005;
313 real_number sigma12 = pow(sigma,12);
314 real_number sigma6 = pow(sigma,6);
315
318
320
321 // We create the grid iterator
322 auto it = vd.getGridIterator(sz);
323
324 while (it.isNext())
325 {
326 // Create a new particle
327 vd.add();
328
329 // key contain (i,j,k) index of the grid
330 auto key = it.get();
331
332 // The index of the grid can be accessed with key.get(0) == i, key.get(1) == j ...
333 // We use getLastPos to set the position of the last particle added
334 vd.getLastPos()[0] = key.get(0) * it.getSpacing(0);
335 vd.getLastPos()[1] = key.get(1) * it.getSpacing(1);
336 vd.getLastPos()[2] = key.get(2) * it.getSpacing(2);
337
338 // We use getLastProp to set the property value of the last particle we added
339 vd.template getLastProp<velocity>()[0] = 0.0;
340 vd.template getLastProp<velocity>()[1] = 0.0;
341 vd.template getLastProp<velocity>()[2] = 0.0;
342
343 vd.template getLastProp<force>()[0] = 0.0;
344 vd.template getLastProp<force>()[1] = 0.0;
345 vd.template getLastProp<force>()[2] = 0.0;
346
347 ++it;
348 }
349
350 vd.hostToDevicePos();
351 vd.hostToDeviceProp<velocity,force>();
352
353 vd.map(RUN_ON_DEVICE);
354 vd.ghost_get<>(RUN_ON_DEVICE);
355
356 timer tsim;
357 tsim.start();
358
360
362
363 // Get the Cell list structure
364 auto NN = vd.getCellListGPU(r_cut / 2.0);
365
367
368 // The standard
369 // auto NN = vd.getCellList(r_cut);
370
371 // calculate forces
372 calc_forces(vd,NN,sigma12,sigma6,r_cut*r_cut);
373 unsigned long int f = 0;
374
375 // MD time stepping
376 for (size_t i = 0; i < nstep ; i++)
377 {
378 // Get the iterator
379 auto it3 = vd.getDomainIteratorGPU();
380
381 CUDA_LAUNCH(update_velocity_position,it3,vd.toKernel(),dt);
382
383 // Because we moved the particles in space we have to map them and re-sync the ghost
384 vd.map(RUN_ON_DEVICE);
385 vd.template ghost_get<>(RUN_ON_DEVICE);
386
387 // calculate forces or a(tn + 1) Step 2
388 calc_forces(vd,NN,sigma12,sigma6,r_cut*r_cut);
389
390 // Integrate the velocity Step 3
391 auto it4 = vd.getDomainIteratorGPU();
392
393 CUDA_LAUNCH(update_velocity,it4,vd.toKernel(),dt);
394
395 // After every iteration collect some statistic about the configuration
396 if (i % 1000 == 0)
397 {
398 vd.deviceToHostPos();
399 vd.deviceToHostProp<0,1,2>();
400
401 // We write the particle position for visualization (Without ghost)
402 vd.deleteGhost();
403 vd.write_frame("particles_",f);
404
405 // we resync the ghost
406 vd.ghost_get<>(RUN_ON_DEVICE);
407
408 // We calculate the energy
409 real_number energy = calc_energy(vd,NN,sigma12,sigma6,r_cut*r_cut);
410 auto & vcl = create_vcluster();
411 vcl.sum(energy);
412 vcl.execute();
413
414 // we save the energy calculated at time step i c contain the time-step y contain the energy
415 x.add(i);
416 y.add({energy});
417
418 // We also print on terminal the value of the energy
419 // only one processor (master) write on terminal
420 if (vcl.getProcessUnitID() == 0)
421 std::cout << "Energy: " << energy << std::endl;
422
423 f++;
424 }
425 }
426
427 tsim.stop();
428 std::cout << "Time: " << tsim.getwct() << std::endl;
429
430 // Google charts options, it store the options to draw the X Y graph
431 GCoptions options;
432
433 // Title of the graph
434 options.title = std::string("Energy with time");
435
436 // Y axis name
437 options.yAxis = std::string("Energy");
438
439 // X axis name
440 options.xAxis = std::string("iteration");
441
442 // width of the line
443 options.lineWidth = 1.0;
444
445 // Resolution in x
446 options.width = 1280;
447
448 // Resolution in y
449 options.heigh = 720;
450
451 // Add zoom capability
452 options.more = GC_ZOOM;
453
454 // Object that draw the X Y graph
455 GoogleChart cg;
456
457 // Add the graph
458 // The graph that it produce is in svg format that can be opened on browser
459 cg.AddLinesGraph(x,y,options);
460
461 // Write into html format
462 cg.write("gc_plot2_out.html");
463
464 openfpm_finalize();
465}
466
467#else
468
469int main(int argc, char* argv[])
470{
471 return 0;
472}
473
474#endif
475
476
This class represent an N-dimensional box.
Definition Box.hpp:61
Class for FAST cell list implementation.
Definition CellList.hpp:357
Small class to produce graph with Google chart in HTML.
void write(std::string file)
It write the graphs on file in html format using Google charts.
void AddLinesGraph(openfpm::vector< X > &x, openfpm::vector< Y > &y, const GCoptions &opt)
Add a simple lines graph.
This class implement the point shape in an N-dimensional space.
Definition Point.hpp:28
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition Point.hpp:172
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
Definition timer.hpp:28
void stop()
Stop the timer.
Definition timer.hpp:119
void start()
Start the timer.
Definition timer.hpp:90
double getwct()
Return the elapsed real time.
Definition timer.hpp:130
Distributed vector.
Google chart options.
size_t width
width of the graph in pixels
size_t heigh
height of the graph in pixels
std::string xAxis
X axis name.
size_t lineWidth
Width of the line.
std::string more
more
std::string title
Title of the chart.
std::string yAxis
Y axis name.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...