8#ifndef GRID_DIST_ID_ITERATOR_DEC_GPU_CUH_
9#define GRID_DIST_ID_ITERATOR_DEC_GPU_CUH_
12#include "Grid/Iterators/grid_dist_id_iterator.hpp"
13#include "Grid/grid_dist_util.hpp"
14#include "Grid/Iterators/grid_dist_id_iterator_util.hpp"
15#include "Grid/cuda/grid_dist_id_kernels.cuh"
17template<
unsigned int impl>
20 template<
typename loc_grid_type,
typename ite_type,
typename itd_type,
typename functor_type,
typename ... argsT>
21 inline static void call(loc_grid_type & loc_grid, ite_type & ite , itd_type & itd, functor_type functor, argsT ... args)
23 CUDA_LAUNCH(grid_apply_functor,ite,loc_grid.toKernel(), itd, functor, args... );
30 template<
typename loc_grid_type,
typename ite_type,
typename itd_type,
typename functor_type,
typename ... argsT>
31 inline static void call(loc_grid_type & loc_grid, ite_type & ite, itd_type & itd, functor_type f, argsT ... args)
35 CUDA_LAUNCH(grid_apply_functor_shared_bool,ite,loc_grid.toKernel(), itd, f, args... );
38 auto g = loc_grid.toKernel();
40 auto lamb = [g,itd,f,args ...] __device__ ()
mutable
42 __shared__
bool is_empty_block;
44 f(g,itd,is_empty_block,args...);
47 CUDA_LAUNCH_LAMBDA_TLS(ite,lamb);
59template<
typename Decomposition,
typename deviceGr
ids,
bool ghost_or_domain = false>
84 typename Decomposition::stype
spacing[Decomposition::dims];
129 for (
size_t i = 0 ; i < Decomposition::dims ; i++)
133 create_gdb_ext<Decomposition::dims,Decomposition>(
gdb_ext,dec,sz,dec.getDomain(),
spacing);
150 create_gdb_ext<Decomposition::dims,Decomposition>(
gdb_ext,dec,sz,dec.getDomain(),
spacing);
225 template<
unsigned int impl = 0,
typename func_t,
typename ... argsType >
226 inline void launch(func_t functor,argsType ... args)
230 ite_gpu_dist<Decomposition::dims> itd;
240 if (intersect ==
false) {
continue;}
244 for (
int i = 0 ; i < Decomposition::dims ; i++)
246 ite.start.set_d(i,(kbox.
getKP1().
get(i) / lg.getBlockEdgeSize())*lg.getBlockEdgeSize() );
252 for (
int i = 0 ; i < Decomposition::dims ; i++)
254 itd.origin.set_d(i,
gdb_ext.get(
g_c).origin.get(i));
255 itd.start_base.set_d(i,kbox.
getKP1().
get(i) % lg.getBlockEdgeSize() + ite.start.get(i));
258 ite.thr.x = lg.getBlockEdgeSize();
259 ite.wthr.x = (ite.stop.get(0) - ite.start.get(0) + 1) / lg.getBlockEdgeSize() + ((ite.stop.get(0) - ite.start.get(0) + 1) % lg.getBlockEdgeSize() != 0);
261 ite.thr.y = lg.getBlockEdgeSize();
262 ite.wthr.y = (ite.stop.get(1) - ite.start.get(1) + 1) / lg.getBlockEdgeSize() + ((ite.stop.get(1) - ite.start.get(1) + 1) % lg.getBlockEdgeSize() != 0);
264 if (Decomposition::dims > 2)
266 ite.thr.z = lg.getBlockEdgeSize();
267 ite.wthr.z = (ite.stop.get(2) - ite.start.get(2) + 1) / lg.getBlockEdgeSize() + ((ite.stop.get(2) - ite.start.get(2) + 1) % lg.getBlockEdgeSize() != 0);
272 itd.start = ite.start;
277 loc_grids.get(
g_c).setGPUInsertBuffer((
unsigned int)ite.nblocks(),(
unsigned int)
nSlot);
280 if (ite.nblocks() != 0)
This class represent an N-dimensional box.
__device__ __host__ bool Intersect(const Box< dim, T > &b, Box< dim, T > &b_out) const
Intersect.
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
This class define the domain decomposition interface.
Given the decomposition it create an iterator.
void setGPUInsertBuffer(int nSlot)
The the number of maximum inserts each GPU block can do.
int nSlot
Maximum number of insertions for each GPU block.
bool isNextGrid()
Return true if we point to a valid grid.
void launch(func_t functor, argsType ... args)
Launch a functor with a particular kernel.
size_t getGridId()
Return the index of the grid in which we are iterating.
grid_dist_id_iterator_gpu(deviceGrids &loc_grids, Decomposition &dec, const size_t(&sz)[Decomposition::dims], grid_key_dx< Decomposition::dims > start, grid_key_dx< Decomposition::dims > stop)
Constructor of the distributed grid iterator.
size_t g_c
grid list counter
openfpm::vector< GBoxes< Decomposition::dims > > gdb_ext
Extension of each grid: domain and ghost + domain.
grid_key_dx< Decomposition::dims > getStart()
Get the starting point of the sub-grid we are iterating.
grid_key_dx< Decomposition::dims > start
start key
grid_dist_id_iterator_gpu(deviceGrids &loc_grids, Decomposition &dec, const size_t(&sz)[Decomposition::dims])
Copy operator=.
grid_key_dx< Decomposition::dims > stop
stop key
Decomposition::stype getSpacing(size_t i)
Get the spacing of the grid.
size_t n_thr
number of threads to launch the kernels
Decomposition::stype spacing[Decomposition::dims]
Spacing.
grid_key_dx< Decomposition::dims > getStop()
Get the starting point of the sub-grid we are iterating.
void setBlockThreads(size_t nthr)
Set the number of threads for each block.
deviceGrids & loc_grids
Local device grids.
grid_key_dx is the key to access any element in the grid
void zero()
Set to zero the key.
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
__device__ __host__ index_type get(index_type i) const
Get the i index.
Implementation of 1-D std::vector like structure.