doxygen/openfpm/main__gpu_8cu_source.html

 #ifdef __NVCC__


 #include "Vector/vector_dist.hpp"

 #include "Plot/GoogleChart.hpp"

 #include "Plot/util.hpp"

 #include "timer.hpp"


 #ifdef TEST_RUN

 size_t nstep = 100;

 #else

 size_t nstep = 1000;

 #endif


 typedef float real_number;


 constexpr int velocity = 0;

 constexpr int force = 1;

 constexpr int energy = 2;


 template<typename vector_dist_type,typename NN_type>

 __global__ void calc_force_gpu(vector_dist_type vd, NN_type NN, real_number sigma12, real_number sigma6, real_number r_cut2)

 {

     unsigned int p;

     GET_PARTICLE_SORT(p,NN);


     // Get the position xp of the particle

     Point<3,real_number> xp = vd.getPos(p);


     // Reset the force counter

     vd.template getProp<force>(p)[0] = 0.0;

     vd.template getProp<force>(p)[1] = 0.0;

     vd.template getProp<force>(p)[2] = 0.0;


     Point<3,real_number> force_;

     force_.get(0) = 0.0;

     force_.get(1) = 0.0;

     force_.get(2) = 0.0;


     // Get an iterator over the neighborhood particles of p

     auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(p)));


     // For each neighborhood particle ...

     while (Np.isNext())

     {


         // ... q

         auto q = Np.get_sort();


         // if (p == q) skip this particle

         if (q == p) {++Np; continue;};


         // Get the position of p

         Point<3,real_number> xq = vd.getPos(q);


         // Get the distance between p and q

         Point<3,real_number> r = xp - xq;


         // take the norm of this vector

         real_number rn = norm2(r);


         if (rn > r_cut2)

         {++Np; continue;};


         // Calculate the force, using pow is slower

         Point<3,real_number> f = 24.0*(2.0 *sigma12 / (rn*rn*rn*rn*rn*rn*rn) -  sigma6 / (rn*rn*rn*rn)) * r;

         force_ += f;


         // Next neighborhood

         ++Np;

     }


     // we sum the force produced by q on p

     vd.template getProp<force>(p)[0] = force_.get(0);

     vd.template getProp<force>(p)[1] = force_.get(1);

     vd.template getProp<force>(p)[2] = force_.get(2);

 }


 template<typename vector_dist_type>

 __global__ void update_velocity_position(vector_dist_type vd, real_number dt)

 {

     auto p = GET_PARTICLE(vd);


     // here we calculate v(tn + 0.5)

     vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];

     vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];

     vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];


     // here we calculate x(tn + 1)

     vd.getPos(p)[0] += vd.template getProp<velocity>(p)[0]*dt;

     vd.getPos(p)[1] += vd.template getProp<velocity>(p)[1]*dt;

     vd.getPos(p)[2] += vd.template getProp<velocity>(p)[2]*dt;

 }


 template<typename vector_dist_type>

 __global__ void update_velocity(vector_dist_type vd, real_number dt)

 {

     auto p = GET_PARTICLE(vd);


     // here we calculate v(tn + 1)

     vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];

     vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];

     vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];

 }


 template<typename vector_dist_type,typename NN_type>

 __global__ void particle_energy(vector_dist_type vd, NN_type NN, real_number sigma12, real_number sigma6, real_number shift, real_number r_cut2)

 {

     unsigned int p;

     GET_PARTICLE_SORT(p,NN);


     // Get the position of the particle p

     Point<3,real_number> xp = vd.getPos(p);


     // Get an iterator over the neighborhood of the particle p

     auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(p)));


     real_number E = 0;


     // For each neighborhood of the particle p

     while (Np.isNext())

     {

         // Neighborhood particle q

         auto q = Np.get_sort();


         // if p == q skip this particle

         if (q == p) {++Np; continue;};


         // Get position of the particle q

         Point<3,real_number> xq = vd.getPos(q);


         // take the normalized direction

         real_number rn = norm2(xp - xq);


         if (rn > r_cut2)

         {++Np;continue;}


         // potential energy (using pow is slower)

         E += 2.0 * ( sigma12 / (rn*rn*rn*rn*rn*rn) - sigma6 / ( rn*rn*rn) ) - shift;


         // Next neighborhood

         ++Np;

     }


     // Kinetic energy of the particle given by its actual speed

     vd.template getProp<energy>(p) = E + (vd.template getProp<velocity>(p)[0]*vd.template getProp<velocity>(p)[0] +

             vd.template getProp<velocity>(p)[1]*vd.template getProp<velocity>(p)[1] +

             vd.template getProp<velocity>(p)[2]*vd.template getProp<velocity>(p)[2]) / 2;

 }


 template<typename CellList> void calc_forces(vector_dist_gpu<3,real_number, aggregate<real_number[3],real_number[3],real_number> > & vd, CellList & NN, real_number sigma12, real_number sigma6, real_number r_cut2)

 {

     // Get an iterator over particles

     auto it2 = vd.getDomainIteratorGPU();


     // reorder positions only (no properties)

     // as nothing else is needed to be read in calc_force_gpu

     vd.template updateCellListGPU<>(NN);


     CUDA_LAUNCH(calc_force_gpu,it2,vd.toKernel(),NN.toKernel(),sigma12,sigma6,r_cut2);


     vd.template restoreOrder<force>(NN);

 }


 template<typename CellList> real_number calc_energy(vector_dist_gpu<3,real_number, aggregate<real_number[3],real_number[3],real_number> > & vd, CellList & NN, real_number sigma12, real_number sigma6, real_number r_cut2)

 {

     real_number rc = r_cut2;

     real_number shift = 2.0 * ( sigma12 / (rc*rc*rc*rc*rc*rc) - sigma6 / ( rc*rc*rc) );


     vd.template updateCellListGPU<velocity>(NN);


     auto it2 = vd.getDomainIteratorGPU();


     CUDA_LAUNCH(particle_energy,it2,vd.toKernel(),NN.toKernel(),sigma12,sigma6,shift,r_cut2);


     vd.template restoreOrder<energy>(NN);


     // Calculated energy

     return reduce_local<energy,_add_>(vd);

 }


 int main(int argc, char* argv[])

 {

     openfpm_init(&argc,&argv);


     real_number sigma = 0.01;

     real_number r_cut =3.0*sigma;


     // we will use it do place particles on a 10x10x10 Grid like

     size_t sz[3] = {100,100,100};


     // domain

     Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});


     // Boundary conditions

     size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};


     // ghost, big enough to contain the interaction radius

     Ghost<3,float> ghost(r_cut);


     real_number dt = 0.00005;

     real_number sigma12 = pow(sigma,12);

     real_number sigma6 = pow(sigma,6);


     openfpm::vector<real_number> x;

     openfpm::vector<openfpm::vector<real_number>> y;


     vector_dist_gpu<3,real_number, aggregate<real_number[3],real_number[3],real_number> > vd(0,box,bc,ghost);


     // We create the grid iterator

     auto it = vd.getGridIterator(sz);


     while (it.isNext())

     {

         // Create a new particle

         vd.add();


         // key contain (i,j,k) index of the grid

         auto key = it.get();


         // The index of the grid can be accessed with key.get(0) == i, key.get(1) == j ...

         // We use getLastPos to set the position of the last particle added

         vd.getLastPos()[0] = key.get(0) * it.getSpacing(0);

         vd.getLastPos()[1] = key.get(1) * it.getSpacing(1);

         vd.getLastPos()[2] = key.get(2) * it.getSpacing(2);


         // We use getLastProp to set the property value of the last particle we added

         vd.template getLastProp<velocity>()[0] = 0.0;

         vd.template getLastProp<velocity>()[1] = 0.0;

         vd.template getLastProp<velocity>()[2] = 0.0;


         vd.template getLastProp<force>()[0] = 0.0;

         vd.template getLastProp<force>()[1] = 0.0;

         vd.template getLastProp<force>()[2] = 0.0;


         ++it;

     }


     vd.hostToDevicePos();

     vd.hostToDeviceProp<velocity,force>();


     vd.map(RUN_ON_DEVICE);

     vd.ghost_get<>(RUN_ON_DEVICE);


     timer tsim;

     tsim.start();


     // Get the Cell list structure

     auto NN = vd.getCellListGPU(r_cut / 2.0, CL_NON_SYMMETRIC | CL_GPU_REORDER, 2);


     // The standard

     // auto NN = vd.getCellList(r_cut);


     // calculate forces

     calc_forces(vd,NN,sigma12,sigma6,r_cut*r_cut);

     unsigned long int f = 0;


     // MD time stepping

     for (size_t i = 0; i < nstep ; i++)

     {

         // Get the iterator

         auto it3 = vd.getDomainIteratorGPU();

         CUDA_LAUNCH(update_velocity_position,it3,vd.toKernel(),dt);


         // Because we moved the particles in space we have to map them and re-sync the ghost

         vd.map(RUN_ON_DEVICE);

         vd.template ghost_get<>(RUN_ON_DEVICE);


         // calculate forces or a(tn + 1) Step 2

         calc_forces(vd,NN,sigma12,sigma6,r_cut*r_cut);


         // Integrate the velocity Step 3

         auto it4 = vd.getDomainIteratorGPU();


         CUDA_LAUNCH(update_velocity,it4,vd.toKernel(),dt);


         // After every iteration collect some statistic about the configuration

         if (i % 100 == 0)

         {

             vd.deviceToHostPos();

             vd.deviceToHostProp<0,1,2>();


             // We write the particle position for visualization (Without ghost)

             vd.deleteGhost();

             vd.write_frame("particles_",f);


             // we resync the ghost

             vd.ghost_get<>(RUN_ON_DEVICE);


             // We calculate the energy

             real_number energy = calc_energy(vd,NN,sigma12,sigma6,r_cut*r_cut);

             auto & vcl = create_vcluster();

             vcl.sum(energy);

             vcl.execute();


             // we save the energy calculated at time step i c contain the time-step y contain the energy

             x.add(i);

             y.add({energy});


             // We also print on terminal the value of the energy

             // only one processor (master) write on terminal

             if (vcl.getProcessUnitID() == 0)

                 std::cout << "Energy: " << energy << std::endl;


             f++;

         }

     }


     tsim.stop();

     std::cout << "Time: " << tsim.getwct() << std::endl;


     // Google charts options, it store the options to draw the X Y graph

     GCoptions options;


     // Title of the graph

     options.title = std::string("Energy with time");


     // Y axis name

     options.yAxis = std::string("Energy");


     // X axis name

     options.xAxis = std::string("iteration");


     // width of the line

     options.lineWidth = 1.0;


     // Resolution in x

     options.width = 1280;


     // Resolution in y

     options.heigh = 720;


     // Add zoom capability

     options.more = GC_ZOOM;


     // Object that draw the X Y graph

     GoogleChart cg;


     // Add the graph

     // The graph that it produce is in svg format that can be opened on browser

     cg.AddLinesGraph(x,y,options);


     // Write into html format

     cg.write("gc_plot2_out.html");


     openfpm_finalize();

 }


 #else


 int main(int argc, char* argv[])

 {

         return 0;

 }


 #endif


Box< 3, float >

CellList
Class for FAST cell list implementation.
Definition: CellList.hpp:558

Ghost
Definition: Ghost.hpp:40

GoogleChart
Small class to produce graph with Google chart in HTML.
Definition: GoogleChart.hpp:216

GoogleChart::write
void write(std::string file)
It write the graphs on file in html format using Google charts.
Definition: GoogleChart.hpp:959

GoogleChart::AddLinesGraph
void AddLinesGraph(openfpm::vector< X > &x, openfpm::vector< Y > &y, const GCoptions &opt)
Add a simple lines graph.
Definition: GoogleChart.hpp:886

Point
This class implement the point shape in an N-dimensional space.
Definition: Point.hpp:28

Point::get
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
Definition: Point.hpp:172

openfpm::vector
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:204

shift
Definition: CellDecomposer.hpp:28

timer
Class for cpu time benchmarking.
Definition: timer.hpp:28

timer::stop
void stop()
Stop the timer.
Definition: timer.hpp:119

timer::start
void start()
Start the timer.
Definition: timer.hpp:90

timer::getwct
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130

vector_dist
Distributed vector.
Definition: vector_dist.hpp:176

vector_dist::write_frame
bool write_frame(std::string out, size_t iteration, int opt=VTK_WRITER)
Output particle position and properties.
Definition: vector_dist.hpp:2612

vector_dist::deviceToHostPos
void deviceToHostPos()
Move the memory from the device to host memory.
Definition: vector_dist.hpp:3093

vector_dist::getGridIterator
grid_dist_id_iterator_dec< Decomposition > getGridIterator(const size_t(&sz)[dim])
Definition: vector_dist.hpp:1870

vector_dist::getPos
auto getPos(vect_dist_key_dx vec_key) -> decltype(vPos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
Definition: vector_dist.hpp:585

vector_dist::ghost_get
void ghost_get(size_t opt=WITH_POSITION)
It synchronize the properties and position of the ghost particles.
Definition: vector_dist.hpp:2203

vector_dist::hostToDevicePos
void hostToDevicePos()
Move the memory from the device to host memory.
Definition: vector_dist.hpp:3109

vector_dist::map
void map(size_t opt=NONE)
It move all the particles that does not belong to the local processor to the respective processor.
Definition: vector_dist.hpp:2158

vector_dist::deviceToHostProp
void deviceToHostProp()
Move the memory from the device to host memory.
Definition: vector_dist.hpp:3085

vector_dist::add
void add()
Add local particle.
Definition: vector_dist.hpp:923

vector_dist::hostToDeviceProp
void hostToDeviceProp()
Move the memory from the device to host memory.
Definition: vector_dist.hpp:3101

vector_dist::deleteGhost
void deleteGhost()
Delete the particles on the ghost.
Definition: vector_dist.hpp:2559

vector_dist::getLastPos
auto getLastPos() -> decltype(vPos.template get< 0 >(0))
Get the position of the last element.
Definition: vector_dist.hpp:958

GCoptions
Google chart options.
Definition: GoogleChart.hpp:26

GCoptions::width
size_t width
width of the graph in pixels
Definition: GoogleChart.hpp:46

GCoptions::heigh
size_t heigh
height of the graph in pixels
Definition: GoogleChart.hpp:49

GCoptions::xAxis
std::string xAxis
X axis name.
Definition: GoogleChart.hpp:32

GCoptions::lineWidth
size_t lineWidth
Width of the line.
Definition: GoogleChart.hpp:56

GCoptions::more
std::string more
more
Definition: GoogleChart.hpp:67

GCoptions::title
std::string title
Title of the chart.
Definition: GoogleChart.hpp:28

GCoptions::yAxis
std::string yAxis
Y axis name.
Definition: GoogleChart.hpp:30

aggregate
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:221