1#ifndef BLOCK_MAP_GPU_HPP_
2#define BLOCK_MAP_GPU_HPP_
4#include "Vector/map_vector_sparse.hpp"
5#include "BlockMapGpu_ker.cuh"
6#include "BlockMapGpu_kernels.cuh"
7#include "DataBlock.cuh"
9#include "util/sparsegrid_util_common.hpp"
11template<
typename AggregateT,
unsigned int p>
12using BlockTypeOf =
typename std::remove_reference<typename boost::fusion::result_of::at_c<typename AggregateT::type, p>::type>::type;
14template<
typename AggregateT,
unsigned int p>
15using ScalarTypeOf =
typename std::remove_reference<typename boost::fusion::result_of::at_c<typename AggregateT::type, p>::type>::type::scalarType;
20 template<
typename destType>
21 inline static void set(destType & bP ,T & backgroundValue,
int j)
23 bP[j] = backgroundValue;
27template<
unsigned int N,
typename T>
30 template<
typename destType>
31 inline static void set(destType & bP ,T * backgroundValue,
int j)
33 for (
int i = 0 ; i < N ; i++)
35 bP[i][j] = backgroundValue[i];
40template<
typename AggregateBlockT,
unsigned int threadBlockSize=128,
typename indexT=
long int,
template<
typename>
class layout_base=
memory_traits_inte>
47 typedef BlockTypeOf<AggregateBlockT, 0> BlockT0;
54 bool is_setGPUInsertBuffer =
false;
57 bool is_initializeGPUInsertBuffer =
false;
62 const static unsigned char EXIST_BIT = 0;
64 static const unsigned int pMask = AggregateInternalT::max_prop_real - 1;
67 BlockMapGpuFunctors::BlockFunctor<threadBlockSize>,
72 typedef AggregateBlockT AggregateType;
83 blockMap.swap(bm.blockMap);
103 return sparse_grid_bck_value<
typename std::remove_reference<
decltype(blockMap.getBackground())>::type>(blockMap.getBackground());
108 template<
unsigned int p>
109 auto get(
unsigned int linId)
const ->
const ScalarTypeOf<AggregateBlockT, p> &
111 typedef BlockTypeOf<AggregateBlockT, p> BlockT;
112 unsigned int blockId = linId / BlockT::size;
113 unsigned int offset = linId % BlockT::size;
115 auto &block =
aggregate.template get<p>();
116 auto &mask =
aggregate.template get<pMask>();
118 if (exist(mask[offset]))
120 return block[offset];
124 return blockMap.template getBackground<p>()[offset];
128 auto get(
unsigned int linId)
const ->
const decltype(blockMap.get(0)) &
130 typedef BlockTypeOf<AggregateBlockT, 0> BlockT;
131 unsigned int blockId = linId / BlockT::size;
132 unsigned int offset = linId % BlockT::size;
146 template<
unsigned int p>
147 auto insert(
unsigned int linId) -> ScalarTypeOf<AggregateBlockT, p> &
149 typedef BlockTypeOf<AggregateBlockT, p> BlockT;
150 unsigned int blockId = linId / BlockT::size;
151 unsigned int offset = linId % BlockT::size;
152 auto aggregate = blockMap.insert(blockId);
153 auto &block =
aggregate.template get<p>();
154 auto &mask =
aggregate.template get<pMask>();
155 setExist(mask[offset]);
156 return block[offset];
168 auto insert_o(
unsigned int linId) ->
decltype(blockMap.insert(0))
170 typedef BlockTypeOf<AggregateBlockT, 0> BlockT;
171 unsigned int blockId = linId / BlockT::size;
172 unsigned int offset = linId % BlockT::size;
173 auto aggregate = blockMap.insert(blockId);
186 template<
unsigned int p>
187 auto insertBlockFlush(
size_t blockId) ->
decltype(blockMap.insertFlush(blockId,is_new).template get<p>())
189 typedef BlockTypeOf<AggregateBlockT, p> BlockT;
191 auto aggregate = blockMap.insertFlush(blockId,is_new);
192 auto &block =
aggregate.template get<p>();
196 for (
int i = 0 ; i < BlockT::size ; i++)
197 {
aggregate.template get<pMask>()[i] = 0;}
212 typedef BlockTypeOf<AggregateBlockT, 0> BlockT;
213 auto b = blockMap.insertFlush(blockId,is_new);
217 for (
int i = 0 ; i < BlockT::size ; i++)
218 {b.template get<pMask>()[i] = 0;}
230 template<
unsigned int ... prp>
233 blockMap.template deviceToHost<prp..., pMask>();
238 template<
unsigned int ... prp>
252 blockMap.setGPUInsertBuffer(nBlock, nSlot);
253 initializeGPUInsertBuffer();
256 is_setGPUInsertBuffer =
true;
270 void initializeGPUInsertBuffer()
274 auto & insertBuffer = blockMap.getGPUInsertBuffer();
275 typedef BlockTypeOf<AggregateInternalT, pMask> BlockType;
276 constexpr unsigned int chunksPerBlock = 1;
278 if (insertBuffer.size() != 0)
280 CUDA_LAUNCH_DIM3((BlockMapGpuKernels::initializeInsertBuffer<pMask, chunksPerBlock>),insertBuffer.size()/chunksPerBlock, chunksPerBlock*BlockType::size,
281 insertBuffer.toKernel());
285 is_initializeGPUInsertBuffer =
true;
289 template<
typename ... v_reduce>
294 if (is_setGPUInsertBuffer ==
false || is_initializeGPUInsertBuffer ==
false)
295 {std::cout << __FILE__ <<
":" << __LINE__ <<
" error setGPUInsertBuffer you must call before doing any insertion " << std::endl;}
298 blockMap.template flush<v_reduce ... >(context, opt);
306 template<
unsigned int p,
typename TypeBck>
310 typedef BlockTypeOf<AggregateInternalT, p> BlockT;
311 typedef typename std::remove_all_extents<BlockTypeOf<AggregateInternalT, p>>::type BlockT_noarr;
312 typedef BlockTypeOf<AggregateInternalT, pMask> BlockM;
317 for (
unsigned int i = 0; i < BlockT_noarr::size; ++i)
324 blockMap.template setBackground<p>(bP);
325 blockMap.template setBackground<pMask>(bM);
328 template<
typename BitMaskT>
329 inline static bool getBit(
const BitMaskT &bitMask,
unsigned char pos)
331 return (bitMask>>pos)&1U;
334 template<
typename BitMaskT>
335 inline static bool setBit(BitMaskT &bitMask,
unsigned char pos)
337 return bitMask |= 1U<<pos;
340 template<
typename BitMaskT>
341 inline static bool unsetBit(BitMaskT &bitMask,
unsigned char pos)
343 return bitMask &= !(1U<<pos);
346 template<
typename BitMaskT>
347 inline static bool exist(BitMaskT &bitMask)
349 return getBit(bitMask, EXIST_BIT);
352 template<
typename BitMaskT>
353 inline static void setExist(BitMaskT &bitMask)
355 setBit(bitMask, EXIST_BIT);
358 template<
typename BitMaskT>
359 inline static void unsetExist(BitMaskT &bitMask)
361 unsetBit(bitMask, EXIST_BIT);
370 blockMap.removeUnusedBuffers();
404template<
typename AggregateBlockT,
unsigned int threadBlockSize,
typename indexT,
template<
typename>
class layout_base>
407 blockMap.template deviceToHost<pMask>();
410template<
typename AggregateBlockT,
unsigned int threadBlockSize,
typename indexT,
template<
typename>
class layout_base>
411template<
unsigned int ... prp>
414 blockMap.template hostToDevice<prp..., pMask>();
417template<
typename AggregateBlockT,
unsigned int threadBlockSize,
typename indexT,
template<
typename>
class layout_base>
420 blockMap.template hostToDevice<pMask>();
void setBackgroundValue(TypeBck backgroundValue)
set the background for property p
void removeUnusedBuffers()
Eliminate many internal temporary buffer you can use this between flushes if you get some out of memo...
auto insert_o(unsigned int linId) -> decltype(blockMap.insert(0))
insert data, host version
auto insert(unsigned int linId) -> ScalarTypeOf< AggregateBlockT, p > &
insert data, host version
const decltype(blockMap) & private_get_blockMap() const
Return internal structure block map.
auto insertBlockFlush(size_t blockId) -> decltype(blockMap.insertFlush(blockId, is_new).template get< p >())
insert a block + flush, host version
sparse_grid_bck_value< typename std::remove_reference< decltype(blockMap.getBackground())>::type > getBackgroundValue()
Get the background value.
void preFlush()
In case we manually set the added index buffer and the add data buffer we have to call this function ...
void setGPUInsertBuffer(int nBlock, int nSlot)
decltype(blockMap) & private_get_blockMap_non_const()
Return internal structure block map.
decltype(blockMap) & private_get_blockMap()
Return internal structure block map.
auto insertBlockFlush(size_t blockId) -> decltype(blockMap.insertFlush(blockId, is_new))
insert a block + flush, host version
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
__device__ __host__ boost::mpl::at< type, boost::mpl::int_< i > >::type & get()
get the properties i
Transform the boost::fusion::vector into memory specification (memory_traits)