6 #include "Decomposition/Distribution/BoxDistribution.hpp" 7 #include "Grid/grid_dist_id.hpp" 8 #include "data_type/aggregate.hpp" 48 constexpr
int U_next = 2;
49 constexpr
int V_next = 3;
59 typedef sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>,
CudaMemory, Dec> SparseGridType;
69 grid.addPoints([] __device__ (
int i,
int j,
int k)
73 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
75 data.template get<U>() = 1.0;
76 data.template get<V>() = 0.0;
81 grid.template flush<smax_<U>,
smax_<V>>(flush_type::FLUSH_ON_DEVICE);
85 long int x_start =
grid.size(0)*1.55f/domain.
getHigh(0);
86 long int y_start =
grid.size(1)*1.55f/domain.
getHigh(1);
87 long int z_start =
grid.size(1)*1.55f/domain.
getHigh(2);
89 long int x_stop =
grid.size(0)*1.85f/domain.
getHigh(0);
90 long int y_stop =
grid.size(1)*1.85f/domain.
getHigh(1);
91 long int z_stop =
grid.size(1)*1.85f/domain.
getHigh(2);
98 grid.addPoints(start,stop,[] __device__ (
int i,
int j,
int k)
102 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
104 data.template get<U>() = 0.5;
105 data.template get<V>() = 0.24;
109 grid.template flush<smax_<U>,
smax_<V>>(flush_type::FLUSH_ON_DEVICE);
115 int main(
int argc,
char* argv[])
117 openfpm_init(&argc,&argv);
123 size_t sz[3] = {256,256,256};
142 size_t timeSteps = 300;
144 size_t timeSteps = 15000;
151 SparseGridType
grid(sz,domain,g,bc);
154 float spacing[3] = {
grid.spacing(0),
grid.spacing(1),
grid.spacing(2)};
159 grid.template ghost_get<U,V>(RUN_ON_DEVICE);
163 float uFactor = deltaT * du/(spacing[x]*spacing[x]);
164 float vFactor = deltaT * dv/(spacing[x]*spacing[x]);
166 auto & v_cl = create_vcluster();
171 for (
size_t i = 0; i < timeSteps ; ++i)
173 if (v_cl.rank() == 0)
174 {std::cout <<
"STEP: " << i << std::endl;}
187 auto func = [uFactor,vFactor,deltaT,
F,K] __device__ (
float & u_out,
float & v_out,
188 CpBlockType & u, CpBlockType & v,
189 int i,
int j,
int k){
194 u_out = uc + uFactor *(u(i-1,j,k) + u(i+1,j,k) +
195 u(i,j-1,k) + u(i,j+1,k) +
196 u(i,j,k-1) + u(i,j,k+1) - 6.0f*uc) - deltaT * uc*vc*vc
197 - deltaT *
F * (uc - 1.0f);
200 v_out = vc + vFactor *(v(i-1,j,k) + v(i+1,j,k) +
201 v(i,j+1,k) + v(i,j-1,k) +
202 v(i,j,k-1) + v(i,j,k+1) - 6.0f*vc) + deltaT * uc*vc*vc
203 - deltaT * (
F+K) * vc;
212 cudaDeviceSynchronize();
215 grid.conv2<U,V,U_next,V_next,1>({0,0,0},{(
long int)sz[0]-1,(
long int)sz[1]-1,(
long int)sz[2]-1},func);
216 cudaDeviceSynchronize();
218 std::cout <<
"Conv " << tconv.
getwct() << std::endl;
222 grid.ghost_get<U_next,V_next>(RUN_ON_DEVICE | SKIP_LABELLING);
226 grid.conv2<U_next,V_next,U,V,1>({0,0,0},{(
long int)sz[0]-1,(
long int)sz[1]-1,(
long int)sz[2]-1},func);
229 grid.ghost_get<U,V>(RUN_ON_DEVICE | SKIP_LABELLING);
244 std::cout <<
"Total simulation: " << tot_sim.
getwct() << std::endl;
246 grid.deviceToHost<U,V,U_next,V_next>();
280 int main(
int argc,
char* argv[])
grid_key_dx is the key to access any element in the grid
double getwct()
Return the elapsed real time.
This class decompose a space into sub-sub-domains and distribute them across processors.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
void start()
Start the timer.
get the type of the block
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Class for cpu time benchmarking.
void stop()
Stop the timer.
[v_transform metafunction]