OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
BlockMapGpu_ker.cuh
1 #ifndef BLOCK_MAP_GPU_KER_CUH_
2 #define BLOCK_MAP_GPU_KER_CUH_
3 
4 #include "util/cuda_util.hpp"
5 #include <cstdlib>
6 #include "Vector/map_vector_sparse.hpp"
7 #include "DataBlock.cuh"
8 #include "TemplateUtils/encap_shmem.hpp"
9 
10 template<typename AggregateT, unsigned int p>
11 using BlockTypeOf = typename std::remove_reference<typename boost::fusion::result_of::at_c<typename AggregateT::type, p>::type>::type;
12 
13 template<typename AggregateT, unsigned int p>
14 using ScalarTypeOf = typename std::remove_reference<typename boost::fusion::result_of::at_c<typename AggregateT::type, p>::type>::type::scalarType;
15 
16 template <typename AggregateT>
17 struct LastPOf
18 {
19  static const unsigned int value = AggregateT::max_prop_real - 1;
20 };
21 
22 template <typename AggregateT, unsigned int pMask>
24 {
25  AggregateT aggregate;
26 
27  InsertBlockWrapper() = default;
28 
30 
32  {
33 #ifdef __NVCC__
34  aggregate = other.aggregate;
35 #else // __NVCC__
36  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
37 #endif // __NVCC__
38  }
39 
41  {
42 #ifdef __NVCC__
43  aggregate = other.aggregate;
44  return *this;
45 #else // __NVCC__
46  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
47 #endif // __NVCC__
48  }
49 
50  template <unsigned int p>
51  inline auto get() -> decltype(aggregate.template get<p>())
52  {
53 #ifdef __NVCC__
54  return aggregate.template get<p>();
55 #else // __NVCC__
56  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
57 #endif // __NVCC__
58  }
59 
60  inline auto getMask() -> decltype(aggregate.template get<pMask>())
61  {
62 #ifdef __NVCC__
63  return aggregate.template get<pMask>();
64 #else // __NVCC__
65  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
66 #endif // __NVCC__
67  }
68 };
69 
70 template<typename AggregateBlockT=aggregate<DataBlock<float, 64>>, typename indexT=int, template<typename> class layout_base=memory_traits_inte>
72 {
73 protected:
75  const static unsigned char EXIST_BIT = 0;
76 
77 public:
78  static const unsigned int pMask = AggregateBlockT::max_prop_real - 1;
79  typedef AggregateBlockT AggregateType;
81 
82 public:
83  template<typename BitMaskT>
84  inline static __device__ __host__ bool getBit(const BitMaskT &bitMask, unsigned char pos)
85  {
86  return (bitMask>>pos)&1U;
87  }
88 
89  template<typename BitMaskT>
90  inline static __device__ __host__ void setBit(BitMaskT &bitMask, unsigned char pos)
91  {
92  bitMask = bitMask | (1U<<pos);
93  }
94 
95  template<typename BitMaskT>
96  inline static __device__ __host__ void unsetBit(BitMaskT &bitMask, unsigned char pos)
97  {
98  bitMask = bitMask & ~(1U<<pos);
99  }
100 
101 public:
103  : blockMap(blockMap) {};
104 
105  template<unsigned int p>
106  inline __device__ auto get(unsigned int linId) const -> ScalarTypeOf<AggregateBlockT, p>
107  {
108  #ifdef __NVCC__
109  typedef BlockTypeOf<AggregateBlockT, p> BlockT;
110  unsigned int blockId = linId / BlockT::size;
111  unsigned int offset = linId % BlockT::size;
112  return get<p>(blockId, offset);
113  #else // __NVCC__
114  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
115  #endif // __NVCC__
116  }
117 
118 
119  template<unsigned int p>
120  inline __device__ auto get(unsigned int blockId, unsigned int offset) const -> ScalarTypeOf<AggregateBlockT, p>
121  {
122  #ifdef __NVCC__
123 
124  const auto sid = blockMap.get_sparse(blockId);
125  const auto & block = blockMap.template get_ele<p>(sid.id)[offset];
126  const auto mask = blockMap.template get_ele<pMask>(sid.id)[offset];
127  // Now check if the element actually exists
128  return exist(mask)
129  ? block
130  : blockMap.template getBackground<p>()[offset];
131 
132  #else // __NVCC__
133  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
134  #endif // __NVCC__
135  }
136 
137  inline __device__ auto getBlock(unsigned int blockId) -> decltype(blockMap.get(0))
138  {
139  #ifdef __NVCC__
140  return blockMap.get(blockId);
141  #else // __NVCC__
142  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
143  #endif // __NVCC__
144  }
145 
146  template<unsigned int p>
147  inline __device__ ScalarTypeOf<AggregateBlockT, p> & getReference(unsigned int linId)
148  {
149  // Only call this if you are TOTALLY SURE the element exists! Otherwise KABOOOOOM! :D
150  #ifdef __NVCC__
151  typedef BlockTypeOf<AggregateBlockT, p> BlockT;
152  unsigned int blockId = linId / BlockT::size;
153  unsigned int offset = linId % BlockT::size;
154  return getReference<p>(blockId, offset);
155  #else // __NVCC__
156  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
157  #endif // __NVCC__
158  }
159 
160  template<unsigned int p>
161  inline __device__ ScalarTypeOf<AggregateBlockT, p> & getReference(unsigned int blockId, unsigned int offset)
162  {
163  // Only call this if you are TOTALLY SURE the element exists! Otherwise KABOOOOOM! :D
164  #ifdef __NVCC__
165  return blockMap.template get<p>(blockId)[offset];
166  #else // __NVCC__
167  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
168  #endif // __NVCC__
169  }
170 
171  template<unsigned int p>
172  inline __device__ auto insert(unsigned int linId) -> ScalarTypeOf<AggregateBlockT, p>&
173  {
174  #ifdef __NVCC__
175  typedef BlockTypeOf<AggregateBlockT, p> BlockT;
176  unsigned int blockId = linId / BlockT::size;
177  unsigned int offset = linId % BlockT::size;
178  return insert<p>(blockId, offset);
179  #else // __NVCC__
180  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
181  #endif // __NVCC__
182  }
183 
184  template<unsigned int p>
185  inline __device__ auto insert(unsigned int blockId, unsigned int offset) -> ScalarTypeOf<AggregateBlockT, p>&
186  {
187  #ifdef __NVCC__
188  auto aggregate = blockMap.insert(blockId);
189  auto &block = aggregate.template get<p>();
190  auto &mask = aggregate.template get<pMask>();
191  setExist(mask[offset]);
192 
193  return block[offset];
194  #else // __NVCC__
195  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
196  #endif // __NVCC__
197  }
198 
199  template<unsigned int nChunksPerBlocks = 1>
200  inline __device__ auto insertBlock(indexT blockId, unsigned int stride = 8192) -> decltype(blockMap.insert(0))
201  {
202  int offset = threadIdx.x / stride;
203 // __shared__ int mem[nChunksPerBlocks][encap_shmem<sizeof(blockMap.insert(0))>::nthr];
204  __shared__ int mem_[nChunksPerBlocks];
205 
206  decltype(blockMap.insert(0)) ec_(blockMap.private_get_data(),0);
207 
208  #ifdef __NVCC__
209  if (threadIdx.x % stride == 0 && threadIdx.y == 0 && threadIdx.z == 0)
210  {
211  auto ec = blockMap.insert(blockId);
212 
213  mem_[offset] = ec.private_get_k();
214 
215  // copy to shared to broadcast on all thread
216  //new (mem[offset]) decltype(ec)(ec.private_get_data(),ec.private_get_k());
217  }
218 
219  __syncthreads();;
220 
221  ec_.private_set_k(mem_[offset]);
222 
223  return ec_/* *(decltype(blockMap.insert(0)) *)mem[offset]*/;
224  #else // __NVCC__
225  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
226  #endif // __NVCC__
227  }
228 
230  {
231  return blockMap;
232  }
233 
234  inline __device__ void get_sparse(unsigned int linId, unsigned int & dataBlockPos , unsigned int & offset) const
235  {
236  #ifdef __NVCC__
237 
238  typedef BlockTypeOf<AggregateBlockT, pMask> BlockT;
239  unsigned int blockId = linId / BlockT::size;
240  offset = linId % BlockT::size;
241 
242  const auto sid = blockMap.get_sparse(blockId);
243 
244  dataBlockPos = sid.id;
245 
246  #else // __NVCC__
247  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
248  #endif // __NVCC__
249  }
250 
251  inline static __device__ unsigned int getBlockId(unsigned int linId)
252  {
253 #ifdef __NVCC__
254  return linId / BlockTypeOf<AggregateBlockT, 0>::size;
255 #else // __NVCC__
256  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
257 #endif // __NVCC__
258  }
259 
260  inline static __device__ unsigned int getOffset(unsigned int linId)
261  {
262 #ifdef __NVCC__
263  return linId % BlockTypeOf<AggregateBlockT, 0>::size;
264 #else // __NVCC__
265  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
266 #endif // __NVCC__
267  }
268 
269  inline __device__ void init()
270  {
271 #ifdef __NVCC__
272  blockMap.init();
273 #else // __NVCC__
274  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
275 #endif // __NVCC__
276  }
277 
278  inline __device__ void flush_block_insert()
279  {
280 #ifdef __NVCC__
281  blockMap.flush_block_insert();
282 #else // __NVCC__
283  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
284 #endif // __NVCC__
285  }
286 
287  template<typename BitMaskT>
288  inline static __device__ bool exist(const BitMaskT &bitMask)
289  {
290  return getBit(bitMask, EXIST_BIT);
291  }
292 
293  template<typename BitMaskT>
294  inline static __device__ void setExist(BitMaskT &bitMask)
295  {
296  setBit(bitMask, EXIST_BIT);
297  }
298 
299  template<typename BitMaskT>
300  inline static __device__ void unsetExist(BitMaskT &bitMask)
301  {
302  unsetBit(bitMask, EXIST_BIT);
303  }
304 
305  inline __device__ ScalarTypeOf<AggregateBlockT, pMask> getMask(unsigned int linId) const
306  {
307  return get<pMask>(linId);
308  }
309 
310  inline __device__ void remove(unsigned int linId)
311  {
312  #ifdef __NVCC__
313  typedef BlockTypeOf<AggregateBlockT, pMask> BlockT;
314  unsigned int blockId = linId / BlockT::size;
315  unsigned int offset = linId % BlockT::size;
316  remove(blockId, offset);
317  #else // __NVCC__
318  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
319  #endif // __NVCC__
320  }
321 
322  inline __device__ void remove(unsigned int blockId, unsigned int offset)
323  {
324  #ifdef __NVCC__
325 
326  const auto sid = blockMap.get_sparse(blockId);
327  blockMap.template get<pMask>(sid)[offset] = 0;
328 
329  #else // __NVCC__
330  std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
331  #endif // __NVCC__
332  }
333 
339  inline __device__ auto getIndexBuffer() -> decltype(blockMap.getIndexBuffer())
340  {
341  return blockMap.getIndexBuffer();
342  }
343 
349  inline __device__ auto getDataBuffer() -> decltype(blockMap.getDataBuffer())
350  {
351  return blockMap.getDataBuffer();
352  }
353 
354 #ifdef SE_CLASS1
355 
361  pointer_check check_device_pointer(void * ptr)
362  {
363  pointer_check pc;
364 
365  pc = blockMap.check_device_pointer(ptr);
366 
367  if (pc.match == true)
368  {
369  pc.match_str = std::string("blockMap overflow : ") + "\n" + pc.match_str;
370  return pc;
371  }
372 
373  return pc;
374  }
375 
376 #endif
377 
378 };
379 
380 
381 
382 
383 #endif /* BLOCK_MAP_GPU_KER_CUH_ */
__device__ auto getDataBuffer() -> decltype(vct_data)&
Get the data buffer.
__device__ auto get(Ti id) const -> decltype(vct_data.template get< p >(id))
Get an element of the vector.
bool match
Indicate if the pointer match.
__device__ auto getDataBuffer() -> decltype(blockMap.getDataBuffer())
Return the data buffer for the sparse vector.
__device__ void init()
This function must be called.
Transform the boost::fusion::vector into memory specification (memory_traits)
Definition: memory_conf.hpp:83
__device__ void flush_block_insert()
It insert an element in the sparse vector.
__device__ openfpm::sparse_index< Ti > get_sparse(Ti id) const
Get the sparse index.
std::string match_str
match string
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:214
__device__ auto getIndexBuffer() const -> const decltype(vct_index)&
Get the indices buffer.
__device__ auto getIndexBuffer() -> decltype(blockMap.getIndexBuffer())
Return the index buffer for the sparse vector.
__device__ auto insert(Ti ele) -> decltype(vct_data.template get< p >(0))
It insert an element in the sparse vector.