1#include "Grid/grid_dist_id.hpp"
2#include "data_type/aggregate.hpp"
68constexpr int U_next = 2;
69constexpr int V_next = 3;
77typedef sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float> > SparseGridType;
87 grid.addPoints([] __device__ (
int i,
int j,
int k)
91 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
93 data.template get<U>() = 1.0;
94 data.template get<V>() = 0.0;
99 grid.template flush<smax_<U>,
smax_<V>>(flush_type::FLUSH_ON_DEVICE);
103 long int x_start =
grid.size(0)*1.55f/domain.getHigh(0);
104 long int y_start =
grid.size(1)*1.55f/domain.getHigh(1);
105 long int z_start =
grid.size(1)*1.55f/domain.getHigh(2);
107 long int x_stop =
grid.size(0)*1.85f/domain.getHigh(0);
108 long int y_stop =
grid.size(1)*1.85f/domain.getHigh(1);
109 long int z_stop =
grid.size(1)*1.85f/domain.getHigh(2);
116 grid.addPoints(start,stop,[] __device__ (
int i,
int j,
int k)
120 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
122 data.template get<U>() = 0.5;
123 data.template get<V>() = 0.24;
127 grid.template flush<smax_<U>,
smax_<V>>(flush_type::FLUSH_ON_DEVICE);
133int main(
int argc,
char* argv[])
135 openfpm_init(&argc,&argv);
141 size_t sz[3] = {256,256,256};
160 size_t timeSteps = 300;
162 size_t timeSteps = 15000;
169 sgrid_dist_id_gpu<3, float, aggregate<float,float,float,float>>
grid(sz,domain,g,bc);
172 float spacing[3] = {
grid.spacing(0),
grid.spacing(1),
grid.spacing(2)};
177 grid.template ghost_get<U,V>(RUN_ON_DEVICE);
181 float uFactor = deltaT * du/(spacing[x]*spacing[x]);
182 float vFactor = deltaT * dv/(spacing[x]*spacing[x]);
184 auto & v_cl = create_vcluster();
189 for (
size_t i = 0; i < timeSteps ; ++i)
191 if (v_cl.rank() == 0)
192 {std::cout <<
"STEP: " << i << std::endl;}
205 auto func = [uFactor,vFactor,deltaT,
F,K] __device__ (
float & u_out,
float & v_out,
206 CpBlockType & u, CpBlockType & v,
207 int i,
int j,
int k){
212 u_out = uc + uFactor *(u(i-1,j,k) + u(i+1,j,k) +
213 u(i,j-1,k) + u(i,j+1,k) +
214 u(i,j,k-1) + u(i,j,k+1) - 6.0*uc) - deltaT * uc*vc*vc
215 - deltaT *
F * (uc - 1.0);
218 v_out = vc + vFactor *(v(i-1,j,k) + v(i+1,j,k) +
219 v(i,j+1,k) + v(i,j-1,k) +
220 v(i,j,k-1) + v(i,j,k+1) - 6.0*vc) + deltaT * uc*vc*vc
221 - deltaT * (
F+K) * vc;
230 grid.conv2<U,V,U_next,V_next,1>({0,0,0},{(
long int)sz[0]-1,(
long int)sz[1]-1,(
long int)sz[2]-1},func);
234 grid.ghost_get<U_next,V_next>(RUN_ON_DEVICE | SKIP_LABELLING);
238 grid.conv2<U_next,V_next,U,V,1>({0,0,0},{(
long int)sz[0]-1,(
long int)sz[1]-1,(
long int)sz[2]-1},func);
241 grid.ghost_get<U,V>(RUN_ON_DEVICE | SKIP_LABELLING);
256 std::cout <<
"Total simulation: " << tot_sim.
getwct() << std::endl;
258 grid.deviceToHost<U,V,U_next,V_next>();
292int main(
int argc,
char* argv[])
This class represent an N-dimensional box.
grid_key_dx is the key to access any element in the grid
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
[v_transform metafunction]
get the type of the block