8 #ifndef GRID_DIST_ID_ITERATOR_DEC_GPU_CUH_ 9 #define GRID_DIST_ID_ITERATOR_DEC_GPU_CUH_ 12 #include "Grid/Iterators/grid_dist_id_iterator.hpp" 13 #include "Grid/grid_dist_util.hpp" 14 #include "Grid/Iterators/grid_dist_id_iterator_util.hpp" 15 #include "Grid/cuda/grid_dist_id_kernels.cuh" 17 template<
unsigned int impl>
20 template<
typename loc_grid_type,
typename ite_type,
typename itd_type,
typename functor_type,
typename ... argsT>
21 inline static void call(loc_grid_type & loc_grid, ite_type & ite , itd_type & itd, functor_type functor, argsT ... args)
23 CUDA_LAUNCH(grid_apply_functor,ite,loc_grid.toKernel(), itd, functor, args... );
30 template<
typename loc_grid_type,
typename ite_type,
typename itd_type,
typename functor_type,
typename ... argsT>
31 inline static void call(loc_grid_type & loc_grid, ite_type & ite, itd_type & itd, functor_type f, argsT ... args)
33 #ifdef CUDIFY_USE_CUDA 35 CUDA_LAUNCH(grid_apply_functor_shared_bool,ite,loc_grid.toKernel(), itd, f, args... );
38 auto g = loc_grid.toKernel();
40 auto lamb = [g,itd,f,args ...] __device__ ()
mutable 42 __shared__
bool is_empty_block;
44 f(g,itd,is_empty_block,args...);
47 CUDA_LAUNCH_LAMBDA_TLS(ite,lamb);
59 template<
typename Decomposition,
typename deviceGr
ids,
bool ghost_or_domain = false>
84 typename Decomposition::stype
spacing[Decomposition::dims];
129 for (
size_t i = 0 ; i < Decomposition::dims ; i++)
133 create_gdb_ext<Decomposition::dims,Decomposition>(
gdb_ext,dec,sz,dec.getDomain(),
spacing);
150 create_gdb_ext<Decomposition::dims,Decomposition>(
gdb_ext,dec,sz,dec.getDomain(),
spacing);
225 template<
unsigned int impl = 0,
typename func_t,
typename ... argsType >
226 inline void launch(func_t functor,argsType ... args)
230 ite_gpu_dist<Decomposition::dims> itd;
240 if (intersect ==
false) {
continue;}
244 for (
int i = 0 ; i < Decomposition::dims ; i++)
246 ite.start.set_d(i,(kbox.
getKP1().
get(i) / lg.getBlockEdgeSize())*lg.getBlockEdgeSize() );
252 for (
int i = 0 ; i < Decomposition::dims ; i++)
254 itd.origin.set_d(i,
gdb_ext.get(
g_c).origin.get(i));
255 itd.start_base.set_d(i,kbox.
getKP1().
get(i) % lg.getBlockEdgeSize() + ite.start.get(i));
258 ite.thr.x = lg.getBlockEdgeSize();
259 ite.wthr.x = (ite.stop.get(0) - ite.start.get(0) + 1) / lg.getBlockEdgeSize() + ((ite.stop.get(0) - ite.start.get(0) + 1) % lg.getBlockEdgeSize() != 0);
261 ite.thr.y = lg.getBlockEdgeSize();
262 ite.wthr.y = (ite.stop.get(1) - ite.start.get(1) + 1) / lg.getBlockEdgeSize() + ((ite.stop.get(1) - ite.start.get(1) + 1) % lg.getBlockEdgeSize() != 0);
264 if (Decomposition::dims > 2)
266 ite.thr.z = lg.getBlockEdgeSize();
267 ite.wthr.z = (ite.stop.get(2) - ite.start.get(2) + 1) / lg.getBlockEdgeSize() + ((ite.stop.get(2) - ite.start.get(2) + 1) % lg.getBlockEdgeSize() != 0);
272 itd.start = ite.start;
280 if (ite.nblocks() != 0)
grid_key_dx< Decomposition::dims > getStop()
Get the starting point of the sub-grid we are iterating.
size_t g_c
grid list counter
grid_key_dx< Decomposition::dims > getStart()
Get the starting point of the sub-grid we are iterating.
size_t getGridId()
Return the index of the grid in which we are iterating.
void launch(func_t functor, argsType ... args)
Launch a functor with a particular kernel.
bool isNextGrid()
Return true if we point to a valid grid.
Decomposition::stype getSpacing(size_t i)
Get the spacing of the grid.
void setGPUInsertBuffer(int nSlot)
The the number of maximum inserts each GPU block can do.
grid_key_dx< Decomposition::dims > stop
stop key
Decomposition::stype spacing[Decomposition::dims]
Spacing.
__device__ __host__ index_type get(index_type i) const
Get the i index.
grid_dist_id_iterator_gpu(deviceGrids &loc_grids, Decomposition &dec, const size_t(&sz)[Decomposition::dims], grid_key_dx< Decomposition::dims > start, grid_key_dx< Decomposition::dims > stop)
Constructor of the distributed grid iterator.
This class define the domain decomposition interface.
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
Given the decomposition it create an iterator.
grid_dist_id_iterator_gpu(deviceGrids &loc_grids, Decomposition &dec, const size_t(&sz)[Decomposition::dims])
Copy operator=.
deviceGrids & loc_grids
Local device grids.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
int nSlot
Maximum number of insertions for each GPU block.
This class represent an N-dimensional box.
void setBlockThreads(size_t nthr)
Set the number of threads for each block.
grid_key_dx< Decomposition::dims > start
start key
__device__ __host__ bool Intersect(const Box< dim, T > &b, Box< dim, T > &b_out) const
Intersect.
void zero()
Set to zero the key.
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Implementation of 1-D std::vector like structure.
size_t n_thr
number of threads to launch the kernels
openfpm::vector< GBoxes< Decomposition::dims > > gdb_ext
Extension of each grid: domain and ghost + domain.