OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
BlockMapGpu.hpp
1 #ifndef BLOCK_MAP_GPU_HPP_
2 #define BLOCK_MAP_GPU_HPP_
3 
4 #include "Vector/map_vector_sparse.hpp"
5 #include "BlockMapGpu_ker.cuh"
6 #include "BlockMapGpu_kernels.cuh"
7 #include "DataBlock.cuh"
8 #include <set>
9 #include "util/sparsegrid_util_common.hpp"
10 
11 template<typename AggregateT, unsigned int p>
12 using BlockTypeOf = typename std::remove_reference<typename boost::fusion::result_of::at_c<typename AggregateT::type, p>::type>::type;
13 
14 template<typename AggregateT, unsigned int p>
15 using ScalarTypeOf = typename std::remove_reference<typename boost::fusion::result_of::at_c<typename AggregateT::type, p>::type>::type::scalarType;
16 
17 template<typename T>
19 {
20  template<typename destType>
21  inline static void set(destType & bP ,T & backgroundValue, int j)
22  {
23  bP[j] = backgroundValue;
24  }
25 };
26 
27 template<unsigned int N, typename T>
28 struct meta_copy_set_bck<T[N]>
29 {
30  template<typename destType>
31  inline static void set(destType & bP ,T * backgroundValue, int j)
32  {
33  for (int i = 0 ; i < N ; i++)
34  {
35  bP[i][j] = backgroundValue[i];
36  }
37  }
38 };
39 
40 template<typename AggregateBlockT, unsigned int threadBlockSize=128, typename indexT=long int, template<typename> class layout_base=memory_traits_inte>
42 {
43 private:
44 
46 
47  typedef BlockTypeOf<AggregateBlockT, 0> BlockT0;
48 
49  bool is_new;
50 
51 #ifdef SE_CLASS1
52 
54  bool is_setGPUInsertBuffer = false;
55 
57  bool is_initializeGPUInsertBuffer = false;
58 
59 #endif
60 
61 protected:
62  const static unsigned char EXIST_BIT = 0;
63  typedef typename AggregateAppend<DataBlock<unsigned char, BlockT0::size>, AggregateBlockT>::type AggregateInternalT;
64  static const unsigned int pMask = AggregateInternalT::max_prop_real - 1;
66  AggregateInternalT,
67  BlockMapGpuFunctors::BlockFunctor<threadBlockSize>,
68  indexT
69  > blockMap;
70 
71 public:
72  typedef AggregateBlockT AggregateType;
73 
74  BlockMapGpu() = default;
75 
76  void clear()
77  {
78  blockMap.clear();
79  }
80 
81  void swap(self & bm)
82  {
83  blockMap.swap(bm.blockMap);
84  }
85 
91 // auto getBackgroundValue() -> decltype(blockMap.getBackground())
92 // {
93 // return blockMap.getBackground();
94 // }
95 
101  sparse_grid_bck_value<typename std::remove_reference<decltype(blockMap.getBackground())>::type> getBackgroundValue()
102  {
103  return sparse_grid_bck_value<typename std::remove_reference<decltype(blockMap.getBackground())>::type>(blockMap.getBackground());
104  }
105 
106 // auto get(unsigned int linId) const -> decltype(blockMap.get(0));
107 
108  template<unsigned int p>
109  auto get(unsigned int linId) const -> const ScalarTypeOf<AggregateBlockT, p> &
110  {
111  typedef BlockTypeOf<AggregateBlockT, p> BlockT;
112  unsigned int blockId = linId / BlockT::size;
113  unsigned int offset = linId % BlockT::size;
114  auto aggregate = blockMap.get(blockId);
115  auto &block = aggregate.template get<p>();
116  auto &mask = aggregate.template get<pMask>();
117  // Now check if the element actually exists
118  if (exist(mask[offset]))
119  {
120  return block[offset];
121  }
122  else
123  {
124  return blockMap.template getBackground<p>()[offset];
125  }
126  }
127 
128  auto get(unsigned int linId) const -> const decltype(blockMap.get(0)) &
129  {
130  typedef BlockTypeOf<AggregateBlockT, 0> BlockT;
131  unsigned int blockId = linId / BlockT::size;
132  unsigned int offset = linId % BlockT::size;
133  auto & aggregate = blockMap.get(blockId);
134  return aggregate;
135  }
136 
146  template<unsigned int p>
147  auto insert(unsigned int linId) -> ScalarTypeOf<AggregateBlockT, p> &
148  {
149  typedef BlockTypeOf<AggregateBlockT, p> BlockT;
150  unsigned int blockId = linId / BlockT::size;
151  unsigned int offset = linId % BlockT::size;
152  auto aggregate = blockMap.insert(blockId);
153  auto &block = aggregate.template get<p>();
154  auto &mask = aggregate.template get<pMask>();
155  setExist(mask[offset]);
156  return block[offset];
157  }
158 
168  auto insert_o(unsigned int linId) -> decltype(blockMap.insert(0))
169  {
170  typedef BlockTypeOf<AggregateBlockT, 0> BlockT;
171  unsigned int blockId = linId / BlockT::size;
172  unsigned int offset = linId % BlockT::size;
173  auto aggregate = blockMap.insert(blockId);
174  return aggregate;
175  }
176 
186  template<unsigned int p>
187  auto insertBlockFlush(size_t blockId) -> decltype(blockMap.insertFlush(blockId,is_new).template get<p>())
188  {
189  typedef BlockTypeOf<AggregateBlockT, p> BlockT;
190 
191  auto aggregate = blockMap.insertFlush(blockId,is_new);
192  auto &block = aggregate.template get<p>();
193 
194  if (is_new == true)
195  {
196  for (int i = 0 ; i < BlockT::size ; i++)
197  {aggregate.template get<pMask>()[i] = 0;}
198  }
199 
200  return block;
201  }
202 
210  auto insertBlockFlush(size_t blockId) -> decltype(blockMap.insertFlush(blockId,is_new))
211  {
212  typedef BlockTypeOf<AggregateBlockT, 0> BlockT;
213  auto b = blockMap.insertFlush(blockId,is_new);
214 
215  if (is_new == true)
216  {
217  for (int i = 0 ; i < BlockT::size ; i++)
218  {b.template get<pMask>()[i] = 0;}
219  }
220 
221  return b;
222  }
223 
225  {
227  return toKer;
228  }
229 
230  template<unsigned int ... prp>
231  void deviceToHost()
232  {
233  blockMap.template deviceToHost<prp..., pMask>();
234  }
235 
236  void deviceToHost();
237 
238  template<unsigned int ... prp>
239  void hostToDevice();
240 
241  void hostToDevice();
242 
249  void setGPUInsertBuffer(int nBlock, int nSlot)
250  {
251  // Prealloc the insert buffer on the underlying sparse vector
252  blockMap.setGPUInsertBuffer(nBlock, nSlot);
253  initializeGPUInsertBuffer();
254 
255 #ifdef SE_CLASS1
256  is_setGPUInsertBuffer = true;
257 #endif
258  }
259 
265  void preFlush()
266  {
267  blockMap.preFlush();
268  }
269 
270  void initializeGPUInsertBuffer()
271  {
272  //todo: Test if it's enough to just initialize masks to 0, without any background value
273  // Initialize the blocks to background
274  auto & insertBuffer = blockMap.getGPUInsertBuffer();
275  typedef BlockTypeOf<AggregateInternalT, pMask> BlockType; // Here assuming that all block types in the aggregate have the same size!
276  constexpr unsigned int chunksPerBlock = 1; // Floor is good here...
277 
278  if (insertBuffer.size() != 0)
279  {
280  CUDA_LAUNCH_DIM3((BlockMapGpuKernels::initializeInsertBuffer<pMask, chunksPerBlock>),insertBuffer.size()/chunksPerBlock, chunksPerBlock*BlockType::size,
281  insertBuffer.toKernel());
282  }
283 
284  #ifdef SE_CLASS1
285  is_initializeGPUInsertBuffer = true;
286  #endif
287  }
288 
289  template<typename ... v_reduce>
290  void flush(mgpu::ofp_context_t &context, flush_type opt = FLUSH_ON_HOST)
291  {
292 #ifdef SE_CLASS1
293 
294  if (is_setGPUInsertBuffer == false || is_initializeGPUInsertBuffer == false)
295  {std::cout << __FILE__ << ":" << __LINE__ << " error setGPUInsertBuffer you must call before doing any insertion " << std::endl;}
296 #endif
297 
298  blockMap.template flush<v_reduce ... >(context, opt);
299  }
300 
306  template<unsigned int p, typename TypeBck>
307  void setBackgroundValue(TypeBck backgroundValue)
308  {
309  // NOTE: Here we assume user only passes Blocks and not scalars in the templated aggregate type
310  typedef BlockTypeOf<AggregateInternalT, p> BlockT;
311  typedef typename std::remove_all_extents<BlockTypeOf<AggregateInternalT, p>>::type BlockT_noarr;
312  typedef BlockTypeOf<AggregateInternalT, pMask> BlockM;
313 
314  BlockT bP;
315  BlockM bM;
316 
317  for (unsigned int i = 0; i < BlockT_noarr::size; ++i)
318  {
319  meta_copy_set_bck<TypeBck>::set(bP,backgroundValue,i);
320  //meta_copy<TypeBck>::meta_copy_(backgroundValue,bP[][i]);
321  bM[i] = 0;
322  }
323 
324  blockMap.template setBackground<p>(bP);
325  blockMap.template setBackground<pMask>(bM);
326  }
327 
328  template<typename BitMaskT>
329  inline static bool getBit(const BitMaskT &bitMask, unsigned char pos)
330  {
331  return (bitMask>>pos)&1U;
332  }
333 
334  template<typename BitMaskT>
335  inline static bool setBit(BitMaskT &bitMask, unsigned char pos)
336  {
337  return bitMask |= 1U<<pos;
338  }
339 
340  template<typename BitMaskT>
341  inline static bool unsetBit(BitMaskT &bitMask, unsigned char pos)
342  {
343  return bitMask &= !(1U<<pos);
344  }
345 
346  template<typename BitMaskT>
347  inline static bool exist(BitMaskT &bitMask)
348  {
349  return getBit(bitMask, EXIST_BIT);
350  }
351 
352  template<typename BitMaskT>
353  inline static void setExist(BitMaskT &bitMask)
354  {
355  setBit(bitMask, EXIST_BIT);
356  }
357 
358  template<typename BitMaskT>
359  inline static void unsetExist(BitMaskT &bitMask)
360  {
361  unsetBit(bitMask, EXIST_BIT);
362  }
363 
369  {
370  blockMap.removeUnusedBuffers();
371  }
372 
378  decltype(blockMap) & private_get_blockMap_non_const()
379  {
380  return blockMap;
381  }
382 
388  decltype(blockMap) & private_get_blockMap()
389  {
390  return blockMap;
391  }
392 
398  const decltype(blockMap) & private_get_blockMap() const
399  {
400  return blockMap;
401  }
402 };
403 
404 template<typename AggregateBlockT, unsigned int threadBlockSize, typename indexT, template<typename> class layout_base>
406 {
407  blockMap.template deviceToHost<pMask>();
408 }
409 
410 template<typename AggregateBlockT, unsigned int threadBlockSize, typename indexT, template<typename> class layout_base>
411 template<unsigned int ... prp>
413 {
414  blockMap.template hostToDevice<prp..., pMask>();
415 }
416 
417 template<typename AggregateBlockT, unsigned int threadBlockSize, typename indexT, template<typename> class layout_base>
419 {
420  blockMap.template hostToDevice<pMask>();
421 }
422 
423 //template<typename AggregateBlockT, unsigned int threadBlockSize, typename indexT, template<typename> class layout_base>
424 //template<unsigned int p>
425 //void BlockMapGpu<AggregateBlockT, threadBlockSize, indexT, layout_base>::setBackgroundValue(
426 // ScalarTypeOf<AggregateBlockT, p> backgroundValue)
427 
428 
429 #endif /* BLOCK_MAP_GPU_HPP_ */
decltype(blockMap) const & private_get_blockMap() const
Return internal structure block map.
auto insertBlockFlush(size_t blockId) -> decltype(blockMap.insertFlush(blockId, is_new).template get< p >())
insert a block + flush, host version
void setGPUInsertBuffer(int nBlock, int nSlot)
decltype(blockMap) & private_get_blockMap()
Return internal structure block map.
void removeUnusedBuffers()
Eliminate many internal temporary buffer you can use this between flushes if you get some out of memo...
sparse_grid_bck_value< typename std::remove_reference< decltype(blockMap.getBackground())>::type > getBackgroundValue()
Get the background value.
__device__ __host__ boost::mpl::at< type, boost::mpl::int_< i > >::type & get()
get the properties i
Definition: aggregate.hpp:240
auto insertBlockFlush(size_t blockId) -> decltype(blockMap.insertFlush(blockId, is_new))
insert a block + flush, host version
decltype(blockMap) & private_get_blockMap_non_const()
Return internal structure block map.
Transform the boost::fusion::vector into memory specification (memory_traits)
Definition: memory_conf.hpp:83
auto insert_o(unsigned int linId) -> decltype(blockMap.insert(0))
insert data, host version
void preFlush()
In case we manually set the added index buffer and the add data buffer we have to call this function ...
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:214
auto insert(unsigned int linId) -> ScalarTypeOf< AggregateBlockT, p > &
insert data, host version
void setBackgroundValue(TypeBck backgroundValue)
set the background for property p