OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
BlockMapGpu.hpp
1#ifndef BLOCK_MAP_GPU_HPP_
2#define BLOCK_MAP_GPU_HPP_
3
4#include "Vector/map_vector_sparse.hpp"
5#include "BlockMapGpu_ker.cuh"
6#include "BlockMapGpu_kernels.cuh"
7#include "DataBlock.cuh"
8#include <set>
9#include "util/sparsegrid_util_common.hpp"
10
11template<typename AggregateT, unsigned int p>
12using BlockTypeOf = typename std::remove_reference<typename boost::fusion::result_of::at_c<typename AggregateT::type, p>::type>::type;
13
14template<typename AggregateT, unsigned int p>
15using ScalarTypeOf = typename std::remove_reference<typename boost::fusion::result_of::at_c<typename AggregateT::type, p>::type>::type::scalarType;
16
17template<typename T>
19{
20 template<typename destType>
21 inline static void set(destType & bP ,T & backgroundValue, int j)
22 {
23 bP[j] = backgroundValue;
24 }
25};
26
27template<unsigned int N, typename T>
29{
30 template<typename destType>
31 inline static void set(destType & bP ,T * backgroundValue, int j)
32 {
33 for (int i = 0 ; i < N ; i++)
34 {
35 bP[i][j] = backgroundValue[i];
36 }
37 }
38};
39
40template<typename AggregateBlockT, unsigned int threadBlockSize=128, typename indexT=long int, template<typename> class layout_base=memory_traits_inte>
42{
43private:
44
46
47 typedef BlockTypeOf<AggregateBlockT, 0> BlockT0;
48
49 bool is_new;
50
51#ifdef SE_CLASS1
52
54 bool is_setGPUInsertBuffer = false;
55
57 bool is_initializeGPUInsertBuffer = false;
58
59#endif
60
61protected:
62 const static unsigned char EXIST_BIT = 0;
63 typedef typename AggregateAppend<DataBlock<unsigned char, BlockT0::size>, AggregateBlockT>::type AggregateInternalT;
64 static const unsigned int pMask = AggregateInternalT::max_prop_real - 1;
66 AggregateInternalT,
67 BlockMapGpuFunctors::BlockFunctor<threadBlockSize>,
68 indexT
69 > blockMap;
70
71public:
72 typedef AggregateBlockT AggregateType;
73
74 BlockMapGpu() = default;
75
76 void clear()
77 {
78 blockMap.clear();
79 }
80
81 void swap(self & bm)
82 {
83 blockMap.swap(bm.blockMap);
84 }
85
91// auto getBackgroundValue() -> decltype(blockMap.getBackground())
92// {
93// return blockMap.getBackground();
94// }
95
101 sparse_grid_bck_value<typename std::remove_reference<decltype(blockMap.getBackground())>::type> getBackgroundValue()
102 {
103 return sparse_grid_bck_value<typename std::remove_reference<decltype(blockMap.getBackground())>::type>(blockMap.getBackground());
104 }
105
106// auto get(unsigned int linId) const -> decltype(blockMap.get(0));
107
108 template<unsigned int p>
109 auto get(unsigned int linId) const -> const ScalarTypeOf<AggregateBlockT, p> &
110 {
111 typedef BlockTypeOf<AggregateBlockT, p> BlockT;
112 unsigned int blockId = linId / BlockT::size;
113 unsigned int offset = linId % BlockT::size;
114 auto aggregate = blockMap.get(blockId);
115 auto &block = aggregate.template get<p>();
116 auto &mask = aggregate.template get<pMask>();
117 // Now check if the element actually exists
118 if (exist(mask[offset]))
119 {
120 return block[offset];
121 }
122 else
123 {
124 return blockMap.template getBackground<p>()[offset];
125 }
126 }
127
128 auto get(unsigned int linId) const -> const decltype(blockMap.get(0)) &
129 {
130 typedef BlockTypeOf<AggregateBlockT, 0> BlockT;
131 unsigned int blockId = linId / BlockT::size;
132 unsigned int offset = linId % BlockT::size;
133 auto & aggregate = blockMap.get(blockId);
134 return aggregate;
135 }
136
146 template<unsigned int p>
147 auto insert(unsigned int linId) -> ScalarTypeOf<AggregateBlockT, p> &
148 {
149 typedef BlockTypeOf<AggregateBlockT, p> BlockT;
150 unsigned int blockId = linId / BlockT::size;
151 unsigned int offset = linId % BlockT::size;
152 auto aggregate = blockMap.insert(blockId);
153 auto &block = aggregate.template get<p>();
154 auto &mask = aggregate.template get<pMask>();
155 setExist(mask[offset]);
156 return block[offset];
157 }
158
168 auto insert_o(unsigned int linId) -> decltype(blockMap.insert(0))
169 {
170 typedef BlockTypeOf<AggregateBlockT, 0> BlockT;
171 unsigned int blockId = linId / BlockT::size;
172 unsigned int offset = linId % BlockT::size;
173 auto aggregate = blockMap.insert(blockId);
174 return aggregate;
175 }
176
186 template<unsigned int p>
187 auto insertBlockFlush(size_t blockId) -> decltype(blockMap.insertFlush(blockId,is_new).template get<p>())
188 {
189 typedef BlockTypeOf<AggregateBlockT, p> BlockT;
190
191 auto aggregate = blockMap.insertFlush(blockId,is_new);
192 auto &block = aggregate.template get<p>();
193
194 if (is_new == true)
195 {
196 for (int i = 0 ; i < BlockT::size ; i++)
197 {aggregate.template get<pMask>()[i] = 0;}
198 }
199
200 return block;
201 }
202
210 auto insertBlockFlush(size_t blockId) -> decltype(blockMap.insertFlush(blockId,is_new))
211 {
212 typedef BlockTypeOf<AggregateBlockT, 0> BlockT;
213 auto b = blockMap.insertFlush(blockId,is_new);
214
215 if (is_new == true)
216 {
217 for (int i = 0 ; i < BlockT::size ; i++)
218 {b.template get<pMask>()[i] = 0;}
219 }
220
221 return b;
222 }
223
225 {
227 return toKer;
228 }
229
230 template<unsigned int ... prp>
231 void deviceToHost()
232 {
233 blockMap.template deviceToHost<prp..., pMask>();
234 }
235
236 void deviceToHost();
237
238 template<unsigned int ... prp>
239 void hostToDevice();
240
241 void hostToDevice();
242
249 void setGPUInsertBuffer(int nBlock, int nSlot)
250 {
251 // Prealloc the insert buffer on the underlying sparse vector
252 blockMap.setGPUInsertBuffer(nBlock, nSlot);
253 initializeGPUInsertBuffer();
254
255#ifdef SE_CLASS1
256 is_setGPUInsertBuffer = true;
257#endif
258 }
259
265 void preFlush()
266 {
267 blockMap.preFlush();
268 }
269
270 void initializeGPUInsertBuffer()
271 {
272 //todo: Test if it's enough to just initialize masks to 0, without any background value
273 // Initialize the blocks to background
274 auto & insertBuffer = blockMap.getGPUInsertBuffer();
275 typedef BlockTypeOf<AggregateInternalT, pMask> BlockType; // Here assuming that all block types in the aggregate have the same size!
276 constexpr unsigned int chunksPerBlock = 1; // Floor is good here...
277
278 if (insertBuffer.size() != 0)
279 {
280 CUDA_LAUNCH_DIM3((BlockMapGpuKernels::initializeInsertBuffer<pMask, chunksPerBlock>),insertBuffer.size()/chunksPerBlock, chunksPerBlock*BlockType::size,
281 insertBuffer.toKernel());
282 }
283
284 #ifdef SE_CLASS1
285 is_initializeGPUInsertBuffer = true;
286 #endif
287 }
288
289 template<typename ... v_reduce>
290 void flush(gpu::ofp_context_t &context, flush_type opt = FLUSH_ON_HOST)
291 {
292#ifdef SE_CLASS1
293
294 if (is_setGPUInsertBuffer == false || is_initializeGPUInsertBuffer == false)
295 {std::cout << __FILE__ << ":" << __LINE__ << " error setGPUInsertBuffer you must call before doing any insertion " << std::endl;}
296#endif
297
298 blockMap.template flush<v_reduce ... >(context, opt);
299 }
300
306 template<unsigned int p, typename TypeBck>
307 void setBackgroundValue(TypeBck backgroundValue)
308 {
309 // NOTE: Here we assume user only passes Blocks and not scalars in the templated aggregate type
310 typedef BlockTypeOf<AggregateInternalT, p> BlockT;
311 typedef typename std::remove_all_extents<BlockTypeOf<AggregateInternalT, p>>::type BlockT_noarr;
312 typedef BlockTypeOf<AggregateInternalT, pMask> BlockM;
313
314 BlockT bP;
315 BlockM bM;
316
317 for (unsigned int i = 0; i < BlockT_noarr::size; ++i)
318 {
319 meta_copy_set_bck<TypeBck>::set(bP,backgroundValue,i);
320 //meta_copy<TypeBck>::meta_copy_(backgroundValue,bP[][i]);
321 bM[i] = 0;
322 }
323
324 blockMap.template setBackground<p>(bP);
325 blockMap.template setBackground<pMask>(bM);
326 }
327
328 template<typename BitMaskT>
329 inline static bool getBit(const BitMaskT &bitMask, unsigned char pos)
330 {
331 return (bitMask>>pos)&1U;
332 }
333
334 template<typename BitMaskT>
335 inline static bool setBit(BitMaskT &bitMask, unsigned char pos)
336 {
337 return bitMask |= 1U<<pos;
338 }
339
340 template<typename BitMaskT>
341 inline static bool unsetBit(BitMaskT &bitMask, unsigned char pos)
342 {
343 return bitMask &= !(1U<<pos);
344 }
345
346 template<typename BitMaskT>
347 inline static bool exist(BitMaskT &bitMask)
348 {
349 return getBit(bitMask, EXIST_BIT);
350 }
351
352 template<typename BitMaskT>
353 inline static void setExist(BitMaskT &bitMask)
354 {
355 setBit(bitMask, EXIST_BIT);
356 }
357
358 template<typename BitMaskT>
359 inline static void unsetExist(BitMaskT &bitMask)
360 {
361 unsetBit(bitMask, EXIST_BIT);
362 }
363
369 {
370 blockMap.removeUnusedBuffers();
371 }
372
378 decltype(blockMap) & private_get_blockMap_non_const()
379 {
380 return blockMap;
381 }
382
388 decltype(blockMap) & private_get_blockMap()
389 {
390 return blockMap;
391 }
392
398 const decltype(blockMap) & private_get_blockMap() const
399 {
400 return blockMap;
401 }
402};
403
404template<typename AggregateBlockT, unsigned int threadBlockSize, typename indexT, template<typename> class layout_base>
406{
407 blockMap.template deviceToHost<pMask>();
408}
409
410template<typename AggregateBlockT, unsigned int threadBlockSize, typename indexT, template<typename> class layout_base>
411template<unsigned int ... prp>
413{
414 blockMap.template hostToDevice<prp..., pMask>();
415}
416
417template<typename AggregateBlockT, unsigned int threadBlockSize, typename indexT, template<typename> class layout_base>
419{
420 blockMap.template hostToDevice<pMask>();
421}
422
423//template<typename AggregateBlockT, unsigned int threadBlockSize, typename indexT, template<typename> class layout_base>
424//template<unsigned int p>
425//void BlockMapGpu<AggregateBlockT, threadBlockSize, indexT, layout_base>::setBackgroundValue(
426// ScalarTypeOf<AggregateBlockT, p> backgroundValue)
427
428
429#endif /* BLOCK_MAP_GPU_HPP_ */
void setBackgroundValue(TypeBck backgroundValue)
set the background for property p
void removeUnusedBuffers()
Eliminate many internal temporary buffer you can use this between flushes if you get some out of memo...
auto insert_o(unsigned int linId) -> decltype(blockMap.insert(0))
insert data, host version
auto insert(unsigned int linId) -> ScalarTypeOf< AggregateBlockT, p > &
insert data, host version
const decltype(blockMap) & private_get_blockMap() const
Return internal structure block map.
auto insertBlockFlush(size_t blockId) -> decltype(blockMap.insertFlush(blockId, is_new).template get< p >())
insert a block + flush, host version
sparse_grid_bck_value< typename std::remove_reference< decltype(blockMap.getBackground())>::type > getBackgroundValue()
Get the background value.
void preFlush()
In case we manually set the added index buffer and the add data buffer we have to call this function ...
void setGPUInsertBuffer(int nBlock, int nSlot)
decltype(blockMap) & private_get_blockMap_non_const()
Return internal structure block map.
decltype(blockMap) & private_get_blockMap()
Return internal structure block map.
auto insertBlockFlush(size_t blockId) -> decltype(blockMap.insertFlush(blockId, is_new))
insert a block + flush, host version
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
__device__ __host__ boost::mpl::at< type, boost::mpl::int_< i > >::type & get()
get the properties i
Transform the boost::fusion::vector into memory specification (memory_traits)