OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
grid_dist_id_iterator_gpu.cuh
1/*
2 * grid_dist_id_iterator_dec_gpu.cuh
3 *
4 * Created on: Sep 1, 2019
5 * Author: i-bird
6 */
7
8#ifndef GRID_DIST_ID_ITERATOR_DEC_GPU_CUH_
9#define GRID_DIST_ID_ITERATOR_DEC_GPU_CUH_
10
11#include "config.h"
12#include "Grid/Iterators/grid_dist_id_iterator.hpp"
13#include "Grid/grid_dist_util.hpp"
14#include "Grid/Iterators/grid_dist_id_iterator_util.hpp"
15#include "Grid/cuda/grid_dist_id_kernels.cuh"
16
17template<unsigned int impl>
19{
20 template<typename loc_grid_type, typename ite_type, typename itd_type, typename functor_type, typename ... argsT>
21 inline static void call(loc_grid_type & loc_grid, ite_type & ite , itd_type & itd, functor_type functor, argsT ... args)
22 {
23 CUDA_LAUNCH(grid_apply_functor,ite,loc_grid.toKernel(), itd, functor, args... );
24 }
25};
26
27template<>
29{
30 template<typename loc_grid_type, typename ite_type, typename itd_type, typename functor_type,typename ... argsT>
31 inline static void call(loc_grid_type & loc_grid, ite_type & ite, itd_type & itd, functor_type f, argsT ... args)
32 {
33#ifdef CUDIFY_USE_CUDA
34
35 CUDA_LAUNCH(grid_apply_functor_shared_bool,ite,loc_grid.toKernel(), itd, f, args... );
36
37#else
38 auto g = loc_grid.toKernel();
39
40 auto lamb = [g,itd,f,args ...] __device__ () mutable
41 {
42 __shared__ bool is_empty_block;
43
44 f(g,itd,is_empty_block,args...);
45 };
46
47 CUDA_LAUNCH_LAMBDA_TLS(ite,lamb);
48#endif
49 }
50};
51
59template<typename Decomposition, typename deviceGrids, bool ghost_or_domain = false>
61{
63 size_t g_c;
64
67
70
73
75 deviceGrids & loc_grids;
76
78 size_t n_thr;
79
81 int nSlot = -1;
82
84 typename Decomposition::stype spacing[Decomposition::dims];
85
86 public:
87
95// grid_dist_id_iterator_gpu<Decomposition,deviceGrids> & operator=(const grid_dist_id_iterator_gpu<Decomposition,deviceGrids> & tmp)
96// {
97// g_c = tmp.g_c;
98// gdb_ext = tmp.gdb_ext;
99
100// start = tmp.start;
101// stop = tmp.stop;
102// loc_grids = tmp.loc_grids;
103
104// return *this;
105// }
106
112// grid_dist_id_iterator_gpu(const grid_dist_id_iterator_gpu<Decomposition,deviceGrids> & tmp)
113// :loc_grids(tmp.loc_grids)
114// {
115// this->operator=(tmp);
116// }
117
124 grid_dist_id_iterator_gpu(deviceGrids & loc_grids,Decomposition & dec, const size_t (& sz)[Decomposition::dims])
126 {
127 // Initialize start and stop
128 start.zero();
129 for (size_t i = 0 ; i < Decomposition::dims ; i++)
130 stop.set_d(i,sz[i]-1);
131
132 // From the decomposition construct gdb_ext
133 create_gdb_ext<Decomposition::dims,Decomposition>(gdb_ext,dec,sz,dec.getDomain(),spacing);
134
135 g_c = 0;
136 }
137
148 {
149 // From the decomposition construct gdb_ext
150 create_gdb_ext<Decomposition::dims,Decomposition>(gdb_ext,dec,sz,dec.getDomain(),spacing);
151
152 g_c = 0;
153 }
154
155 // Destructor
157 {
158 }
159
166 {
167 this->nSlot = nSlot;
168 }
169
175 void setBlockThreads(size_t nthr)
176 {
177 this->n_thr = nthr;
178 }
179
185 inline bool isNextGrid()
186 {
187 return g_c < gdb_ext.size();
188 }
189
194 inline size_t getGridId()
195 {
196 return g_c;
197 }
198
203 inline void nextGrid()
204 {
205 g_c++;
206 }
207
208
214 inline typename Decomposition::stype getSpacing(size_t i)
215 {
216 return spacing[i];
217 }
218
225 template<unsigned int impl = 0, typename func_t, typename ... argsType >
226 inline void launch(func_t functor,argsType ... args)
227 {
228 for (g_c = 0 ; g_c < gdb_ext.size() ; g_c++)
229 {
230 ite_gpu_dist<Decomposition::dims> itd;
232
233 // intersect
234
237 range_box -= gdb_ext.get(g_c).origin;
238 bool intersect = range_box.Intersect(gdb_ext.get(g_c).Dbox,kbox);
239
240 if (intersect == false) {continue;}
241
242 auto & lg = loc_grids.get(g_c);
243
244 for (int i = 0 ; i < Decomposition::dims ; i++)
245 {
246 ite.start.set_d(i,(kbox.getKP1().get(i) / lg.getBlockEdgeSize())*lg.getBlockEdgeSize() );
247 ite.stop.set_d(i, kbox.getKP2().get(i));
248 }
249
250 // the thread extensions are
251
252 for (int i = 0 ; i < Decomposition::dims ; i++)
253 {
254 itd.origin.set_d(i,gdb_ext.get(g_c).origin.get(i));
255 itd.start_base.set_d(i,kbox.getKP1().get(i) % lg.getBlockEdgeSize() + ite.start.get(i));
256 }
257
258 ite.thr.x = lg.getBlockEdgeSize();
259 ite.wthr.x = (ite.stop.get(0) - ite.start.get(0) + 1) / lg.getBlockEdgeSize() + ((ite.stop.get(0) - ite.start.get(0) + 1) % lg.getBlockEdgeSize() != 0);
260
261 ite.thr.y = lg.getBlockEdgeSize();
262 ite.wthr.y = (ite.stop.get(1) - ite.start.get(1) + 1) / lg.getBlockEdgeSize() + ((ite.stop.get(1) - ite.start.get(1) + 1) % lg.getBlockEdgeSize() != 0);
263
264 if (Decomposition::dims > 2)
265 {
266 ite.thr.z = lg.getBlockEdgeSize();
267 ite.wthr.z = (ite.stop.get(2) - ite.start.get(2) + 1) / lg.getBlockEdgeSize() + ((ite.stop.get(2) - ite.start.get(2) + 1) % lg.getBlockEdgeSize() != 0);
268 }
269
270 itd.wthr = ite.wthr;
271 itd.thr = ite.thr;
272 itd.start = ite.start;
273 itd.stop = ite.stop;
274
275 if (nSlot != -1)
276 {
277 loc_grids.get(g_c).setGPUInsertBuffer((unsigned int)ite.nblocks(),(unsigned int)nSlot);
278 }
279
280 if (ite.nblocks() != 0)
281 {
282 launch_call_impl<impl>::call(loc_grids.get(g_c),ite,itd,functor,args...);
283 }
284 }
285 }
286
287
294 {
295 return start;
296 }
297
304 {
305 return stop;
306 }
307};
308
309
310#endif /* GRID_DIST_ID_ITERATOR_DEC_GPU_CUH_ */
This class represent an N-dimensional box.
Definition Box.hpp:61
__device__ __host__ bool Intersect(const Box< dim, T > &b, Box< dim, T > &b_out) const
Intersect.
Definition Box.hpp:95
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
Definition Box.hpp:669
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
Definition Box.hpp:656
This class define the domain decomposition interface.
Given the decomposition it create an iterator.
void setGPUInsertBuffer(int nSlot)
The the number of maximum inserts each GPU block can do.
int nSlot
Maximum number of insertions for each GPU block.
bool isNextGrid()
Return true if we point to a valid grid.
void launch(func_t functor, argsType ... args)
Launch a functor with a particular kernel.
size_t getGridId()
Return the index of the grid in which we are iterating.
grid_dist_id_iterator_gpu(deviceGrids &loc_grids, Decomposition &dec, const size_t(&sz)[Decomposition::dims], grid_key_dx< Decomposition::dims > start, grid_key_dx< Decomposition::dims > stop)
Constructor of the distributed grid iterator.
openfpm::vector< GBoxes< Decomposition::dims > > gdb_ext
Extension of each grid: domain and ghost + domain.
grid_key_dx< Decomposition::dims > getStart()
Get the starting point of the sub-grid we are iterating.
grid_key_dx< Decomposition::dims > start
start key
grid_dist_id_iterator_gpu(deviceGrids &loc_grids, Decomposition &dec, const size_t(&sz)[Decomposition::dims])
Copy operator=.
grid_key_dx< Decomposition::dims > stop
stop key
Decomposition::stype getSpacing(size_t i)
Get the spacing of the grid.
size_t n_thr
number of threads to launch the kernels
Decomposition::stype spacing[Decomposition::dims]
Spacing.
grid_key_dx< Decomposition::dims > getStop()
Get the starting point of the sub-grid we are iterating.
void setBlockThreads(size_t nthr)
Set the number of threads for each block.
deviceGrids & loc_grids
Local device grids.
grid_key_dx is the key to access any element in the grid
Definition grid_key.hpp:19
void zero()
Set to zero the key.
Definition grid_key.hpp:170
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition grid_key.hpp:516
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition grid_key.hpp:503
Implementation of 1-D std::vector like structure.
size_t size()
Stub size.