OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
grid_dist_id_iterator_gpu.cuh
1 /*
2  * grid_dist_id_iterator_dec_gpu.cuh
3  *
4  * Created on: Sep 1, 2019
5  * Author: i-bird
6  */
7 
8 #ifndef GRID_DIST_ID_ITERATOR_DEC_GPU_CUH_
9 #define GRID_DIST_ID_ITERATOR_DEC_GPU_CUH_
10 
11 #include "config.h"
12 #include "Grid/Iterators/grid_dist_id_iterator.hpp"
13 #include "Grid/grid_dist_util.hpp"
14 #include "Grid/Iterators/grid_dist_id_iterator_util.hpp"
15 #include "Grid/cuda/grid_dist_id_kernels.cuh"
16 
17 template<unsigned int impl>
19 {
20  template<typename loc_grid_type, typename ite_type, typename itd_type, typename functor_type, typename ... argsT>
21  inline static void call(loc_grid_type & loc_grid, ite_type & ite , itd_type & itd, functor_type functor, argsT ... args)
22  {
23  CUDA_LAUNCH(grid_apply_functor,ite,loc_grid.toKernel(), itd, functor, args... );
24  }
25 };
26 
27 template<>
29 {
30  template<typename loc_grid_type, typename ite_type, typename itd_type, typename functor_type,typename ... argsT>
31  inline static void call(loc_grid_type & loc_grid, ite_type & ite, itd_type & itd, functor_type f, argsT ... args)
32  {
33 #ifdef CUDIFY_USE_CUDA
34 
35  CUDA_LAUNCH(grid_apply_functor_shared_bool,ite,loc_grid.toKernel(), itd, f, args... );
36 
37 #else
38  auto g = loc_grid.toKernel();
39 
40  auto lamb = [g,itd,f,args ...] __device__ () mutable
41  {
42  __shared__ bool is_empty_block;
43 
44  f(g,itd,is_empty_block,args...);
45  };
46 
47  CUDA_LAUNCH_LAMBDA_TLS(ite,lamb);
48 #endif
49  }
50 };
51 
59 template<typename Decomposition, typename deviceGrids, bool ghost_or_domain = false>
61 {
63  size_t g_c;
64 
67 
70 
73 
75  deviceGrids & loc_grids;
76 
78  size_t n_thr;
79 
81  int nSlot = -1;
82 
84  typename Decomposition::stype spacing[Decomposition::dims];
85 
86  public:
87 
95 // grid_dist_id_iterator_gpu<Decomposition,deviceGrids> & operator=(const grid_dist_id_iterator_gpu<Decomposition,deviceGrids> & tmp)
96 // {
97 // g_c = tmp.g_c;
98 // gdb_ext = tmp.gdb_ext;
99 
100 // start = tmp.start;
101 // stop = tmp.stop;
102 // loc_grids = tmp.loc_grids;
103 
104 // return *this;
105 // }
106 
112 // grid_dist_id_iterator_gpu(const grid_dist_id_iterator_gpu<Decomposition,deviceGrids> & tmp)
113 // :loc_grids(tmp.loc_grids)
114 // {
115 // this->operator=(tmp);
116 // }
117 
124  grid_dist_id_iterator_gpu(deviceGrids & loc_grids,Decomposition & dec, const size_t (& sz)[Decomposition::dims])
125  :loc_grids(loc_grids),g_c(0)
126  {
127  // Initialize start and stop
128  start.zero();
129  for (size_t i = 0 ; i < Decomposition::dims ; i++)
130  stop.set_d(i,sz[i]-1);
131 
132  // From the decomposition construct gdb_ext
133  create_gdb_ext<Decomposition::dims,Decomposition>(gdb_ext,dec,sz,dec.getDomain(),spacing);
134 
135  g_c = 0;
136  }
137 
148  {
149  // From the decomposition construct gdb_ext
150  create_gdb_ext<Decomposition::dims,Decomposition>(gdb_ext,dec,sz,dec.getDomain(),spacing);
151 
152  g_c = 0;
153  }
154 
155  // Destructor
157  {
158  }
159 
166  {
167  this->nSlot = nSlot;
168  }
169 
175  void setBlockThreads(size_t nthr)
176  {
177  this->n_thr = nthr;
178  }
179 
185  inline bool isNextGrid()
186  {
187  return g_c < gdb_ext.size();
188  }
189 
194  inline size_t getGridId()
195  {
196  return g_c;
197  }
198 
203  inline void nextGrid()
204  {
205  g_c++;
206  }
207 
208 
214  inline typename Decomposition::stype getSpacing(size_t i)
215  {
216  return spacing[i];
217  }
218 
225  template<unsigned int impl = 0, typename func_t, typename ... argsType >
226  inline void launch(func_t functor,argsType ... args)
227  {
228  for (g_c = 0 ; g_c < gdb_ext.size() ; g_c++)
229  {
230  ite_gpu_dist<Decomposition::dims> itd;
232 
233  // intersect
234 
237  range_box -= gdb_ext.get(g_c).origin;
238  bool intersect = range_box.Intersect(gdb_ext.get(g_c).Dbox,kbox);
239 
240  if (intersect == false) {continue;}
241 
242  auto & lg = loc_grids.get(g_c);
243 
244  for (int i = 0 ; i < Decomposition::dims ; i++)
245  {
246  ite.start.set_d(i,(kbox.getKP1().get(i) / lg.getBlockEdgeSize())*lg.getBlockEdgeSize() );
247  ite.stop.set_d(i, kbox.getKP2().get(i));
248  }
249 
250  // the thread extensions are
251 
252  for (int i = 0 ; i < Decomposition::dims ; i++)
253  {
254  itd.origin.set_d(i,gdb_ext.get(g_c).origin.get(i));
255  itd.start_base.set_d(i,kbox.getKP1().get(i) % lg.getBlockEdgeSize() + ite.start.get(i));
256  }
257 
258  ite.thr.x = lg.getBlockEdgeSize();
259  ite.wthr.x = (ite.stop.get(0) - ite.start.get(0) + 1) / lg.getBlockEdgeSize() + ((ite.stop.get(0) - ite.start.get(0) + 1) % lg.getBlockEdgeSize() != 0);
260 
261  ite.thr.y = lg.getBlockEdgeSize();
262  ite.wthr.y = (ite.stop.get(1) - ite.start.get(1) + 1) / lg.getBlockEdgeSize() + ((ite.stop.get(1) - ite.start.get(1) + 1) % lg.getBlockEdgeSize() != 0);
263 
264  if (Decomposition::dims > 2)
265  {
266  ite.thr.z = lg.getBlockEdgeSize();
267  ite.wthr.z = (ite.stop.get(2) - ite.start.get(2) + 1) / lg.getBlockEdgeSize() + ((ite.stop.get(2) - ite.start.get(2) + 1) % lg.getBlockEdgeSize() != 0);
268  }
269 
270  itd.wthr = ite.wthr;
271  itd.thr = ite.thr;
272  itd.start = ite.start;
273  itd.stop = ite.stop;
274 
275  if (nSlot != -1)
276  {
277  loc_grids.get(g_c).setGPUInsertBuffer((unsigned int)ite.nblocks(),(unsigned int)nSlot);
278  }
279 
280  if (ite.nblocks() != 0)
281  {
282  launch_call_impl<impl>::call(loc_grids.get(g_c),ite,itd,functor,args...);
283  }
284  }
285  }
286 
287 
294  {
295  return start;
296  }
297 
304  {
305  return stop;
306  }
307 };
308 
309 
310 #endif /* GRID_DIST_ID_ITERATOR_DEC_GPU_CUH_ */
grid_key_dx< Decomposition::dims > getStop()
Get the starting point of the sub-grid we are iterating.
grid_key_dx< Decomposition::dims > getStart()
Get the starting point of the sub-grid we are iterating.
size_t getGridId()
Return the index of the grid in which we are iterating.
void launch(func_t functor, argsType ... args)
Launch a functor with a particular kernel.
bool isNextGrid()
Return true if we point to a valid grid.
Decomposition::stype getSpacing(size_t i)
Get the spacing of the grid.
void setGPUInsertBuffer(int nSlot)
The the number of maximum inserts each GPU block can do.
grid_key_dx< Decomposition::dims > stop
stop key
Decomposition::stype spacing[Decomposition::dims]
Spacing.
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition: grid_key.hpp:503
size_t size()
Stub size.
Definition: map_vector.hpp:211
grid_dist_id_iterator_gpu(deviceGrids &loc_grids, Decomposition &dec, const size_t(&sz)[Decomposition::dims], grid_key_dx< Decomposition::dims > start, grid_key_dx< Decomposition::dims > stop)
Constructor of the distributed grid iterator.
This class define the domain decomposition interface.
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
Definition: Box.hpp:669
Given the decomposition it create an iterator.
grid_dist_id_iterator_gpu(deviceGrids &loc_grids, Decomposition &dec, const size_t(&sz)[Decomposition::dims])
Copy operator=.
deviceGrids & loc_grids
Local device grids.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
int nSlot
Maximum number of insertions for each GPU block.
This class represent an N-dimensional box.
Definition: Box.hpp:60
void setBlockThreads(size_t nthr)
Set the number of threads for each block.
grid_key_dx< Decomposition::dims > start
start key
__device__ __host__ bool Intersect(const Box< dim, T > &b, Box< dim, T > &b_out) const
Intersect.
Definition: Box.hpp:95
void zero()
Set to zero the key.
Definition: grid_key.hpp:170
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
Definition: Box.hpp:656
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition: grid_key.hpp:516
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202
size_t n_thr
number of threads to launch the kernels
openfpm::vector< GBoxes< Decomposition::dims > > gdb_ext
Extension of each grid: domain and ghost + domain.