1#include "Decomposition/Distribution/BoxDistribution.hpp"
2#include "Grid/grid_dist_id.hpp"
3#include "data_type/aggregate.hpp"
69constexpr int U_next = 2;
70constexpr int V_next = 3;
78typedef sgrid_dist_id_gpu<3,float,aggregate<float,float,float,float>,
CudaMemory, Dec> SparseGridType;
84 double spacing_x =
grid.spacing(0);
85 double spacing_y =
grid.spacing(1);
86 double spacing_z =
grid.spacing(2);
92 for (
int i = 0 ; i < div[0] ; i++)
94 for (
int j = 0 ; j < div[1] ; j++)
96 for (
int k = 0 ; k < div[2] ; k++)
103 for (
int s = 0 ; s < 3 ; s++)
105 bx.
setLow(s,(
size_t)((sph.center(s) - 0.31)/
grid.spacing(s)));
106 bx.
setHigh(s,(
size_t)((sph.center(s) + 0.31)/
grid.spacing(s)));
109 grid.addPoints([spacing_x,spacing_y,spacing_z,sph] __device__ (
int i,
int j,
int k)
114 if (sph.isInside(pc) )
118 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
120 data.template get<U>() = 1.0;
121 data.template get<V>() = 0.0;
125 grid.template flush<smax_<U>,
smax_<V>>(flush_type::FLUSH_ON_DEVICE);
126 grid.removeUnusedBuffers();
131 for (
int i = 0 ; i < div[0] ; i++)
133 for (
int j = 0 ; j < div[1] ; j++)
140 bx.setLow(0,(0.4+i)/spacing_x);
141 bx.setHigh(0,(0.6+i)/spacing_x);
143 bx.setLow(1,(0.4+j)/spacing_y);
144 bx.setHigh(1,(0.6+j)/spacing_y);
147 bx.setHigh(2,(
size_t)
grid.size(2));
149 grid.addPoints(bx.getKP1(),bx.getKP2(),[spacing_x,spacing_y,spacing_z,u,c] __device__ (
int i,
int j,
int k)
159 vp.
get(0) = pc.get(1)*u.get(2) - pc.get(2)*u.get(1);
160 vp.
get(1) = pc.get(2)*u.get(0) - pc.get(0)*u.get(2);
161 vp.
get(2) = pc.get(0)*u.get(1) - pc.get(1)*u.get(0);
163 double distance = vp.
norm();
171 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
173 data.template get<U>() = 1.0;
174 data.template get<V>() = 0.0;
178 grid.template flush<smax_<U>,
smax_<V>>(flush_type::FLUSH_ON_DEVICE);
179 grid.removeUnusedBuffers();
183 for (
int i = 0 ; i < div[0] ; i++)
185 for (
int k = 0 ; k < div[2] ; k++)
192 bx.setLow(0,(0.4+i)/spacing_x);
193 bx.setHigh(0,(0.6+i)/spacing_x);
195 bx.setLow(2,(0.4+k)/spacing_z);
196 bx.setHigh(2,(0.6+k)/spacing_z);
199 bx.setHigh(1,(
size_t)
grid.size(1));
201 grid.addPoints(bx.getKP1(),bx.getKP2(),[spacing_x,spacing_y,spacing_z,u,c] __device__ (
int i,
int j,
int k)
211 vp.
get(0) = pc.get(1)*u.get(2) - pc.get(2)*u.get(1);
212 vp.
get(1) = pc.get(2)*u.get(0) - pc.get(0)*u.get(2);
213 vp.
get(2) = pc.get(0)*u.get(1) - pc.get(1)*u.get(0);
215 double distance = vp.
norm();
223 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
225 data.template get<U>() = 1.0;
226 data.template get<V>() = 0.0;
230 grid.template flush<smax_<U>,
smax_<V>>(flush_type::FLUSH_ON_DEVICE);
231 grid.removeUnusedBuffers();
235 for (
int j = 0 ; j < div[1] ; j++)
237 for (
int k = 0 ; k < div[2] ; k++)
244 bx.setLow(1,(0.4+j)/spacing_y);
245 bx.setHigh(1,(0.6+j)/spacing_y);
247 bx.setLow(2,(0.4+k)/spacing_z);
248 bx.setHigh(2,(0.6+k)/spacing_z);
251 bx.setHigh(0,(
size_t)
grid.size(0));
253 grid.addPoints(bx.getKP1(),bx.getKP2(),[spacing_x,spacing_y,spacing_z,u,c] __device__ (
int i,
int j,
int k)
263 vp.
get(0) = pc.get(1)*u.get(2) - pc.get(2)*u.get(1);
264 vp.
get(1) = pc.get(2)*u.get(0) - pc.get(0)*u.get(2);
265 vp.
get(2) = pc.get(0)*u.get(1) - pc.get(1)*u.get(0);
267 double distance = vp.
norm();
275 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
277 data.template get<U>() = 1.0;
278 data.template get<V>() = 0.0;
282 grid.template flush<smax_<U>,
smax_<V>>(flush_type::FLUSH_ON_DEVICE);
283 grid.removeUnusedBuffers();
289 long int x_start =
grid.size(0)*0.4f/domain.getHigh(0);
290 long int y_start =
grid.size(1)*0.4f/domain.getHigh(1);
291 long int z_start =
grid.size(1)*0.4f/domain.getHigh(2);
293 long int x_stop =
grid.size(0)*0.6f/domain.getHigh(0);
294 long int y_stop =
grid.size(1)*0.6f/domain.getHigh(1);
295 long int z_stop =
grid.size(1)*0.6f/domain.getHigh(2);
302 grid.addPoints(start,stop,[] __device__ (
int i,
int j,
int k)
306 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
308 data.template get<U>() = 0.5;
309 data.template get<V>() = 0.24;
313 grid.template flush<smin_<U>,
smax_<V>>(flush_type::FLUSH_ON_DEVICE);
319int main(
int argc,
char* argv[])
321 openfpm_init(&argc,&argv);
324 auto & v_cl = create_vcluster();
327 getPrimeFactors(v_cl.size(),facts);
331 for (
int i = 0 ; i < 3 ; i++)
334 for (
int i = 0 ; i < facts.
size() ; i++)
335 {div[i % 3] *= facts.get(i);}
340 Box<3,float> domain({0.0,0.0,0.0},{div[0]*1.0f,div[1]*1.0f,div[2]*1.0f});
343 size_t sz[3] = {64*div[0],64*div[1],64*div[2]};
362 size_t timeSteps = 300;
364 size_t timeSteps = 15000;
371 SparseGridType
grid(sz,domain,g,bc,0,gdist);
374 float spacing[3] = {
grid.spacing(0),
grid.spacing(1),
grid.spacing(2)};
378 grid.deviceToHost<U,V,U_next,V_next>();
385 grid.template ghost_get<U,V>(RUN_ON_DEVICE);
389 float uFactor = deltaT * du/(spacing[x]*spacing[x]);
390 float vFactor = deltaT * dv/(spacing[x]*spacing[x]);
395 for (
size_t i = 0; i < timeSteps ; ++i)
397 if (v_cl.rank() == 0)
398 {std::cout <<
"STEP: " << i << std::endl;}
411 auto func = [uFactor,vFactor,deltaT,
F,K] __device__ (
float & u_out,
float & v_out,
412 CpBlockType & u, CpBlockType & v,
413 int i,
int j,
int k){
418 u_out = uc + uFactor *(u(i-1,j,k) + u(i+1,j,k) +
419 u(i,j-1,k) + u(i,j+1,k) +
420 u(i,j,k-1) + u(i,j,k+1) - 6.0f*uc) - deltaT * uc*vc*vc
421 - deltaT *
F * (uc - 1.0f);
424 v_out = vc + vFactor *(v(i-1,j,k) + v(i+1,j,k) +
425 v(i,j+1,k) + v(i,j-1,k) +
426 v(i,j,k-1) + v(i,j,k+1) - 6.0f*vc) + deltaT * uc*vc*vc
427 - deltaT * (
F+K) * vc;
436 grid.conv2<U,V,U_next,V_next,1>({0,0,0},{(
long int)sz[0]-1,(
long int)sz[1]-1,(
long int)sz[2]-1},func);
440 grid.ghost_get<U_next,V_next>(RUN_ON_DEVICE | SKIP_LABELLING);
444 grid.conv2<U_next,V_next,U,V,1>({0,0,0},{(
long int)sz[0]-1,(
long int)sz[1]-1,(
long int)sz[2]-1},func);
447 grid.ghost_get<U,V>(RUN_ON_DEVICE | SKIP_LABELLING);
462 std::cout <<
"Total simulation: " << tot_sim.
getwct() << std::endl;
464 grid.deviceToHost<U,V,U_next,V_next>();
498int main(
int argc,
char* argv[])
This class represent an N-dimensional box.
__device__ __host__ void setHigh(int i, T val)
set the high interval of the box
__device__ __host__ void setLow(int i, T val)
set the low interval of the box
This class decompose a space into sub-sub-domains and distribute them across processors.
This class implement the point shape in an N-dimensional space.
__device__ __host__ const T & get(unsigned int i) const
Get coordinate.
__device__ __host__ T norm() const
norm of the vector
This class implement the Sphere concept in an N-dimensional space.
grid_key_dx is the key to access any element in the grid
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
[v_transform metafunction]
get the type of the block