1 #include "Decomposition/Distribution/BoxDistribution.hpp" 2 #include "Grid/grid_dist_id.hpp" 3 #include "data_type/aggregate.hpp" 69 constexpr
int U_next = 2;
70 constexpr
int V_next = 3;
78 typedef sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>,
CudaMemory, Dec> SparseGridType;
84 double spacing_x =
grid.spacing(0);
85 double spacing_y =
grid.spacing(1);
86 double spacing_z =
grid.spacing(2);
92 for (
int i = 0 ; i < div[0] ; i++)
94 for (
int j = 0 ; j < div[1] ; j++)
96 for (
int k = 0 ; k < div[2] ; k++)
103 for (
int s = 0 ; s < 3 ; s++)
105 bx.
setLow(s,(
size_t)((sph.center(s) - 0.31)/
grid.spacing(s)));
106 bx.
setHigh(s,(
size_t)((sph.center(s) + 0.31)/
grid.spacing(s)));
109 grid.addPoints([spacing_x,spacing_y,spacing_z,sph] __device__ (
int i,
int j,
int k)
114 if (sph.isInside(pc) )
118 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
120 data.template get<U>() = 1.0;
121 data.template get<V>() = 0.0;
125 grid.template flush<smax_<U>,
smax_<V>>(flush_type::FLUSH_ON_DEVICE);
126 grid.removeUnusedBuffers();
131 for (
int i = 0 ; i < div[0] ; i++)
133 for (
int j = 0 ; j < div[1] ; j++)
140 bx.setLow(0,(0.4+i)/spacing_x);
141 bx.setHigh(0,(0.6+i)/spacing_x);
143 bx.setLow(1,(0.4+j)/spacing_y);
144 bx.setHigh(1,(0.6+j)/spacing_y);
147 bx.setHigh(2,(
size_t)
grid.size(2));
149 grid.addPoints(bx.getKP1(),bx.getKP2(),[spacing_x,spacing_y,spacing_z,u,c] __device__ (
int i,
int j,
int k)
159 vp.
get(0) = pc.get(1)*u.get(2) - pc.get(2)*u.get(1);
160 vp.
get(1) = pc.get(2)*u.get(0) - pc.get(0)*u.get(2);
161 vp.
get(2) = pc.get(0)*u.get(1) - pc.get(1)*u.get(0);
163 double distance = vp.
norm();
171 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
173 data.template get<U>() = 1.0;
174 data.template get<V>() = 0.0;
178 grid.template flush<smax_<U>,
smax_<V>>(flush_type::FLUSH_ON_DEVICE);
179 grid.removeUnusedBuffers();
183 for (
int i = 0 ; i < div[0] ; i++)
185 for (
int k = 0 ; k < div[2] ; k++)
192 bx.setLow(0,(0.4+i)/spacing_x);
193 bx.setHigh(0,(0.6+i)/spacing_x);
195 bx.setLow(2,(0.4+k)/spacing_z);
196 bx.setHigh(2,(0.6+k)/spacing_z);
199 bx.setHigh(1,(
size_t)
grid.size(1));
201 grid.addPoints(bx.getKP1(),bx.getKP2(),[spacing_x,spacing_y,spacing_z,u,c] __device__ (
int i,
int j,
int k)
211 vp.
get(0) = pc.get(1)*u.get(2) - pc.get(2)*u.get(1);
212 vp.
get(1) = pc.get(2)*u.get(0) - pc.get(0)*u.get(2);
213 vp.
get(2) = pc.get(0)*u.get(1) - pc.get(1)*u.get(0);
215 double distance = vp.
norm();
223 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
225 data.template get<U>() = 1.0;
226 data.template get<V>() = 0.0;
230 grid.template flush<smax_<U>,
smax_<V>>(flush_type::FLUSH_ON_DEVICE);
231 grid.removeUnusedBuffers();
235 for (
int j = 0 ; j < div[1] ; j++)
237 for (
int k = 0 ; k < div[2] ; k++)
244 bx.setLow(1,(0.4+j)/spacing_y);
245 bx.setHigh(1,(0.6+j)/spacing_y);
247 bx.setLow(2,(0.4+k)/spacing_z);
248 bx.setHigh(2,(0.6+k)/spacing_z);
251 bx.setHigh(0,(
size_t)
grid.size(0));
253 grid.addPoints(bx.getKP1(),bx.getKP2(),[spacing_x,spacing_y,spacing_z,u,c] __device__ (
int i,
int j,
int k)
263 vp.
get(0) = pc.get(1)*u.get(2) - pc.get(2)*u.get(1);
264 vp.
get(1) = pc.get(2)*u.get(0) - pc.get(0)*u.get(2);
265 vp.
get(2) = pc.get(0)*u.get(1) - pc.get(1)*u.get(0);
267 double distance = vp.
norm();
275 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
277 data.template get<U>() = 1.0;
278 data.template get<V>() = 0.0;
282 grid.template flush<smax_<U>,
smax_<V>>(flush_type::FLUSH_ON_DEVICE);
283 grid.removeUnusedBuffers();
289 long int x_start =
grid.size(0)*0.4f/domain.
getHigh(0);
290 long int y_start =
grid.size(1)*0.4f/domain.
getHigh(1);
291 long int z_start =
grid.size(1)*0.4f/domain.
getHigh(2);
293 long int x_stop =
grid.size(0)*0.6f/domain.
getHigh(0);
294 long int y_stop =
grid.size(1)*0.6f/domain.
getHigh(1);
295 long int z_stop =
grid.size(1)*0.6f/domain.
getHigh(2);
302 grid.addPoints(start,stop,[] __device__ (
int i,
int j,
int k)
306 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
308 data.template get<U>() = 0.5;
309 data.template get<V>() = 0.24;
313 grid.template flush<smin_<U>,
smax_<V>>(flush_type::FLUSH_ON_DEVICE);
319 int main(
int argc,
char* argv[])
321 openfpm_init(&argc,&argv);
324 auto & v_cl = create_vcluster();
327 getPrimeFactors(v_cl.size(),facts);
331 for (
int i = 0 ; i < 3 ; i++)
334 for (
int i = 0 ; i < facts.
size() ; i++)
335 {div[i % 3] *= facts.get(i);}
340 Box<3,float> domain({0.0,0.0,0.0},{div[0]*1.0f,div[1]*1.0f,div[2]*1.0f});
343 size_t sz[3] = {64*div[0],64*div[1],64*div[2]};
362 size_t timeSteps = 300;
364 size_t timeSteps = 15000;
371 SparseGridType
grid(sz,domain,g,bc,0,gdist);
374 float spacing[3] = {
grid.spacing(0),
grid.spacing(1),
grid.spacing(2)};
378 grid.deviceToHost<U,V,U_next,V_next>();
385 grid.template ghost_get<U,V>(RUN_ON_DEVICE);
389 float uFactor = deltaT * du/(spacing[x]*spacing[x]);
390 float vFactor = deltaT * dv/(spacing[x]*spacing[x]);
395 for (
size_t i = 0; i < timeSteps ; ++i)
397 if (v_cl.rank() == 0)
398 {std::cout <<
"STEP: " << i << std::endl;}
411 auto func = [uFactor,vFactor,deltaT,
F,K] __device__ (
float & u_out,
float & v_out,
412 CpBlockType & u, CpBlockType & v,
413 int i,
int j,
int k){
418 u_out = uc + uFactor *(u(i-1,j,k) + u(i+1,j,k) +
419 u(i,j-1,k) + u(i,j+1,k) +
420 u(i,j,k-1) + u(i,j,k+1) - 6.0f*uc) - deltaT * uc*vc*vc
421 - deltaT *
F * (uc - 1.0f);
424 v_out = vc + vFactor *(v(i-1,j,k) + v(i+1,j,k) +
425 v(i,j+1,k) + v(i,j-1,k) +
426 v(i,j,k-1) + v(i,j,k+1) - 6.0f*vc) + deltaT * uc*vc*vc
427 - deltaT * (
F+K) * vc;
436 grid.conv2<U,V,U_next,V_next,1>({0,0,0},{(
long int)sz[0]-1,(
long int)sz[1]-1,(
long int)sz[2]-1},func);
440 grid.ghost_get<U_next,V_next>(RUN_ON_DEVICE | SKIP_LABELLING);
444 grid.conv2<U_next,V_next,U,V,1>({0,0,0},{(
long int)sz[0]-1,(
long int)sz[1]-1,(
long int)sz[2]-1},func);
447 grid.ghost_get<U,V>(RUN_ON_DEVICE | SKIP_LABELLING);
462 std::cout <<
"Total simulation: " << tot_sim.
getwct() << std::endl;
464 grid.deviceToHost<U,V,U_next,V_next>();
498 int main(
int argc,
char* argv[])
grid_key_dx is the key to access any element in the grid
This class implement the point shape in an N-dimensional space.
double getwct()
Return the elapsed real time.
__device__ __host__ T norm()
norm of the vector
__device__ __host__ void setHigh(int i, T val)
set the high interval of the box
This class decompose a space into sub-sub-domains and distribute them across processors.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
__device__ __host__ void setLow(int i, T val)
set the low interval of the box
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
void start()
Start the timer.
get the type of the block
This class implement the Sphere concept in an N-dimensional space.
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
__device__ __host__ T getHigh(int i) const
get the high interval of the box
Class for cpu time benchmarking.
void stop()
Stop the timer.
[v_transform metafunction]