OpenFPM  5.2.0
Project that contain the implementation of distributed structures
grid_dist_id_iterator_gpu.cuh
1 /*
2  * grid_dist_id_iterator_dec_gpu.cuh
3  *
4  * Created on: Sep 1, 2019
5  * Author: i-bird
6  */
7 
8 #ifndef GRID_DIST_ID_ITERATOR_DEC_GPU_CUH_
9 #define GRID_DIST_ID_ITERATOR_DEC_GPU_CUH_
10 
11 #include "config.h"
12 #include "Grid/Iterators/grid_dist_id_iterator.hpp"
13 #include "Grid/grid_dist_util.hpp"
14 #include "Grid/Iterators/grid_dist_id_iterator_util.hpp"
15 #include "Grid/cuda/grid_dist_id_kernels.cuh"
16 
17 template<unsigned int impl>
19 {
20  template<typename loc_grid_type, typename ite_type, typename itd_type, typename functor_type, typename ... argsT>
21  inline static void call(loc_grid_type & loc_grid, ite_type & ite , itd_type & itd, functor_type functor, argsT ... args)
22  {
23  CUDA_LAUNCH(grid_apply_functor,ite,loc_grid.toKernel(), itd, functor, args... );
24  }
25 };
26 
27 template<>
29 {
30  template<typename loc_grid_type, typename ite_type, typename itd_type, typename functor_type,typename ... argsT>
31  inline static void call(loc_grid_type & loc_grid, ite_type & ite, itd_type & itd, functor_type f, argsT ... args)
32  {
33 #ifdef CUDIFY_USE_CUDA
34 
35  CUDA_LAUNCH(grid_apply_functor_shared_bool,ite,loc_grid.toKernel(), itd, f, args... );
36 
37 #else
38  auto g = loc_grid.toKernel();
39 
40  auto lamb = [g,itd,f,args ...] __device__ () mutable
41  {
42  __shared__ bool is_empty_block;
43 
44  f(g,itd,is_empty_block,args...);
45  };
46 
47  CUDA_LAUNCH_LAMBDA_TLS(ite,lamb);
48 #endif
49  }
50 };
51 
59 template<typename Decomposition, typename deviceGrids, bool ghost_or_domain = false>
61 {
63  size_t g_c;
64 
67 
70 
73 
75  deviceGrids & loc_grids;
76 
78  size_t n_thr;
79 
81  int nSlot = -1;
82 
84  typename Decomposition::stype spacing[Decomposition::dims];
85 
86  public:
87 
95 // grid_dist_id_iterator_gpu<Decomposition,deviceGrids> & operator=(const grid_dist_id_iterator_gpu<Decomposition,deviceGrids> & tmp)
96 // {
97 // g_c = tmp.g_c;
98 // gdb_ext = tmp.gdb_ext;
99 
100 // start = tmp.start;
101 // stop = tmp.stop;
102 // loc_grids = tmp.loc_grids;
103 
104 // return *this;
105 // }
106 
112 // grid_dist_id_iterator_gpu(const grid_dist_id_iterator_gpu<Decomposition,deviceGrids> & tmp)
113 // :loc_grids(tmp.loc_grids)
114 // {
115 // this->operator=(tmp);
116 // }
117 
124  grid_dist_id_iterator_gpu(deviceGrids & loc_grids,Decomposition & dec, const size_t (& sz)[Decomposition::dims])
125  :loc_grids(loc_grids),g_c(0)
126  {
127  // Initialize start and stop
128  start.zero();
129  for (size_t i = 0 ; i < Decomposition::dims ; i++)
130  stop.set_d(i,sz[i]-1);
131 
132  // From the decomposition construct gdb_ext
133  create_gdb_ext<Decomposition::dims,Decomposition>(gdb_ext,dec,sz,dec.getDomain(),spacing);
134 
135  g_c = 0;
136  }
137 
148  {
149  // From the decomposition construct gdb_ext
150  create_gdb_ext<Decomposition::dims,Decomposition>(gdb_ext,dec,sz,dec.getDomain(),spacing);
151 
152  g_c = 0;
153  }
154 
155  // Destructor
157  {
158  }
159 
166  {
167  this->nSlot = nSlot;
168  }
169 
175  void setBlockThreads(size_t nthr)
176  {
177  this->n_thr = nthr;
178  }
179 
185  inline bool isNextGrid()
186  {
187  return g_c < gdb_ext.size();
188  }
189 
194  inline size_t getGridId()
195  {
196  return g_c;
197  }
198 
203  inline void nextGrid()
204  {
205  g_c++;
206  }
207 
208 
214  inline typename Decomposition::stype getSpacing(size_t i)
215  {
216  return spacing[i];
217  }
218 
225  template<unsigned int impl = 0, typename func_t, typename ... argsType >
226  inline void launch(func_t functor,argsType ... args)
227  {
228  for (g_c = 0 ; g_c < gdb_ext.size() ; g_c++)
229  {
230  ite_gpu_dist<Decomposition::dims> itd;
232 
233  // intersect
234 
237  range_box -= gdb_ext.get(g_c).origin;
238  bool intersect = range_box.Intersect(gdb_ext.get(g_c).Dbox,kbox);
239 
240  if (intersect == false) {continue;}
241 
242  auto & lg = loc_grids.get(g_c);
243 
244  for (int i = 0 ; i < Decomposition::dims ; i++)
245  {
246  ite.start.set_d(i,(kbox.getKP1().get(i) / lg.getBlockEdgeSize())*lg.getBlockEdgeSize() );
247  ite.stop.set_d(i, kbox.getKP2().get(i));
248  }
249 
250  // the thread extensions are
251 
252  for (int i = 0 ; i < Decomposition::dims ; i++)
253  {
254  itd.origin.set_d(i,gdb_ext.get(g_c).origin.get(i));
255  itd.start_base.set_d(i,kbox.getKP1().get(i) % lg.getBlockEdgeSize() + ite.start.get(i));
256  }
257 
258  ite.thr.x = lg.getBlockEdgeSize();
259  ite.wthr.x = (ite.stop.get(0) - ite.start.get(0) + 1) / lg.getBlockEdgeSize() + ((ite.stop.get(0) - ite.start.get(0) + 1) % lg.getBlockEdgeSize() != 0);
260 
261  ite.thr.y = lg.getBlockEdgeSize();
262  ite.wthr.y = (ite.stop.get(1) - ite.start.get(1) + 1) / lg.getBlockEdgeSize() + ((ite.stop.get(1) - ite.start.get(1) + 1) % lg.getBlockEdgeSize() != 0);
263 
264  if (Decomposition::dims > 2)
265  {
266  ite.thr.z = lg.getBlockEdgeSize();
267  ite.wthr.z = (ite.stop.get(2) - ite.start.get(2) + 1) / lg.getBlockEdgeSize() + ((ite.stop.get(2) - ite.start.get(2) + 1) % lg.getBlockEdgeSize() != 0);
268  }
269 
270  itd.wthr = ite.wthr;
271  itd.thr = ite.thr;
272  itd.start = ite.start;
273  itd.stop = ite.stop;
274 
275  if (nSlot != -1)
276  {
277  loc_grids.get(g_c).setGPUInsertBuffer((unsigned int)ite.nblocks(),(unsigned int)nSlot);
278  }
279 
280  if (ite.nblocks() != 0)
281  {
282  launch_call_impl<impl>::call(loc_grids.get(g_c),ite,itd,functor,args...);
283  }
284  }
285  }
286 
287 
294  {
295  return start;
296  }
297 
304  {
305  return stop;
306  }
307 };
308 
309 
310 #endif /* GRID_DIST_ID_ITERATOR_DEC_GPU_CUH_ */
This class represent an N-dimensional box.
Definition: Box.hpp:60
__device__ __host__ bool Intersect(const Box< dim, T > &b, Box< dim, T > &b_out) const
Intersect.
Definition: Box.hpp:94
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
Definition: Box.hpp:655
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
Definition: Box.hpp:668
This class define the domain decomposition interface.
Given the decomposition it create an iterator.
void setGPUInsertBuffer(int nSlot)
The the number of maximum inserts each GPU block can do.
int nSlot
Maximum number of insertions for each GPU block.
bool isNextGrid()
Return true if we point to a valid grid.
void launch(func_t functor, argsType ... args)
Launch a functor with a particular kernel.
size_t getGridId()
Return the index of the grid in which we are iterating.
grid_key_dx< Decomposition::dims > getStart()
Get the starting point of the sub-grid we are iterating.
grid_dist_id_iterator_gpu(deviceGrids &loc_grids, Decomposition &dec, const size_t(&sz)[Decomposition::dims], grid_key_dx< Decomposition::dims > start, grid_key_dx< Decomposition::dims > stop)
Constructor of the distributed grid iterator.
openfpm::vector< GBoxes< Decomposition::dims > > gdb_ext
Extension of each grid: domain and ghost + domain.
grid_key_dx< Decomposition::dims > start
start key
grid_dist_id_iterator_gpu(deviceGrids &loc_grids, Decomposition &dec, const size_t(&sz)[Decomposition::dims])
Copy operator=.
grid_key_dx< Decomposition::dims > getStop()
Get the starting point of the sub-grid we are iterating.
grid_key_dx< Decomposition::dims > stop
stop key
Decomposition::stype getSpacing(size_t i)
Get the spacing of the grid.
size_t n_thr
number of threads to launch the kernels
Decomposition::stype spacing[Decomposition::dims]
Spacing.
void setBlockThreads(size_t nthr)
Set the number of threads for each block.
deviceGrids & loc_grids
Local device grids.
void zero()
Set to zero the key.
Definition: grid_key.hpp:170
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition: grid_key.hpp:516
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition: grid_key.hpp:503
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:204
size_t size()
Stub size.
Definition: map_vector.hpp:212
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data