6#include "Decomposition/Distribution/BoxDistribution.hpp"
7#include "Grid/grid_dist_id.hpp"
8#include "data_type/aggregate.hpp"
48constexpr int U_next = 2;
49constexpr int V_next = 3;
59typedef sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>,
CudaMemory, Dec> SparseGridType;
69 grid.addPoints([] __device__ (
int i,
int j,
int k)
73 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
75 data.template get<U>() = 1.0;
76 data.template get<V>() = 0.0;
81 grid.template flush<smax_<U>,
smax_<V>>(flush_type::FLUSH_ON_DEVICE);
85 long int x_start =
grid.size(0)*1.55f/domain.getHigh(0);
86 long int y_start =
grid.size(1)*1.55f/domain.getHigh(1);
87 long int z_start =
grid.size(1)*1.55f/domain.getHigh(2);
89 long int x_stop =
grid.size(0)*1.85f/domain.getHigh(0);
90 long int y_stop =
grid.size(1)*1.85f/domain.getHigh(1);
91 long int z_stop =
grid.size(1)*1.85f/domain.getHigh(2);
98 grid.addPoints(start,stop,[] __device__ (
int i,
int j,
int k)
102 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
104 data.template get<U>() = 0.5;
105 data.template get<V>() = 0.24;
109 grid.template flush<smax_<U>,
smax_<V>>(flush_type::FLUSH_ON_DEVICE);
115int main(
int argc,
char* argv[])
117 openfpm_init(&argc,&argv);
123 size_t sz[3] = {256,256,256};
142 size_t timeSteps = 300;
144 size_t timeSteps = 15000;
151 SparseGridType
grid(sz,domain,g,bc);
154 float spacing[3] = {
grid.spacing(0),
grid.spacing(1),
grid.spacing(2)};
159 grid.template ghost_get<U,V>(RUN_ON_DEVICE);
163 float uFactor = deltaT * du/(spacing[x]*spacing[x]);
164 float vFactor = deltaT * dv/(spacing[x]*spacing[x]);
166 auto & v_cl = create_vcluster();
171 for (
size_t i = 0; i < timeSteps ; ++i)
173 if (v_cl.rank() == 0)
174 {std::cout <<
"STEP: " << i << std::endl;}
187 auto func = [uFactor,vFactor,deltaT,
F,K] __device__ (
float & u_out,
float & v_out,
188 CpBlockType & u, CpBlockType & v,
189 int i,
int j,
int k){
194 u_out = uc + uFactor *(u(i-1,j,k) + u(i+1,j,k) +
195 u(i,j-1,k) + u(i,j+1,k) +
196 u(i,j,k-1) + u(i,j,k+1) - 6.0f*uc) - deltaT * uc*vc*vc
197 - deltaT *
F * (uc - 1.0f);
200 v_out = vc + vFactor *(v(i-1,j,k) + v(i+1,j,k) +
201 v(i,j+1,k) + v(i,j-1,k) +
202 v(i,j,k-1) + v(i,j,k+1) - 6.0f*vc) + deltaT * uc*vc*vc
203 - deltaT * (
F+K) * vc;
212 cudaDeviceSynchronize();
215 grid.conv2<U,V,U_next,V_next,1>({0,0,0},{(
long int)sz[0]-1,(
long int)sz[1]-1,(
long int)sz[2]-1},func);
216 cudaDeviceSynchronize();
218 std::cout <<
"Conv " << tconv.
getwct() << std::endl;
222 grid.ghost_get<U_next,V_next>(RUN_ON_DEVICE | SKIP_LABELLING);
226 grid.conv2<U_next,V_next,U,V,1>({0,0,0},{(
long int)sz[0]-1,(
long int)sz[1]-1,(
long int)sz[2]-1},func);
229 grid.ghost_get<U,V>(RUN_ON_DEVICE | SKIP_LABELLING);
244 std::cout <<
"Total simulation: " << tot_sim.
getwct() << std::endl;
246 grid.deviceToHost<U,V,U_next,V_next>();
280int main(
int argc,
char* argv[])
This class represent an N-dimensional box.
This class decompose a space into sub-sub-domains and distribute them across processors.
grid_key_dx is the key to access any element in the grid
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
[v_transform metafunction]
get the type of the block