OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
BlockMapGpu_ker.cuh
1#ifndef BLOCK_MAP_GPU_KER_CUH_
2#define BLOCK_MAP_GPU_KER_CUH_
3
4#include "util/cuda_util.hpp"
5#include <cstdlib>
6#include "Vector/map_vector_sparse.hpp"
7#include "DataBlock.cuh"
8#include "TemplateUtils/encap_shmem.hpp"
9
10template<typename AggregateT, unsigned int p>
11using BlockTypeOf = typename std::remove_reference<typename boost::fusion::result_of::at_c<typename AggregateT::type, p>::type>::type;
12
13template<typename AggregateT, unsigned int p>
14using ScalarTypeOf = typename std::remove_reference<typename boost::fusion::result_of::at_c<typename AggregateT::type, p>::type>::type::scalarType;
15
16template <typename AggregateT>
17struct LastPOf
18{
19 static const unsigned int value = AggregateT::max_prop_real - 1;
20};
21
22template <typename AggregateT, unsigned int pMask>
24{
25 AggregateT aggregate;
26
27 InsertBlockWrapper() = default;
28
30
32 {
33#ifdef __NVCC__
34 aggregate = other.aggregate;
35#else // __NVCC__
36 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
37#endif // __NVCC__
38 }
39
41 {
42#ifdef __NVCC__
43 aggregate = other.aggregate;
44 return *this;
45#else // __NVCC__
46 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
47#endif // __NVCC__
48 }
49
50 template <unsigned int p>
51 inline auto get() -> decltype(aggregate.template get<p>())
52 {
53#ifdef __NVCC__
54 return aggregate.template get<p>();
55#else // __NVCC__
56 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
57#endif // __NVCC__
58 }
59
60 inline auto getMask() -> decltype(aggregate.template get<pMask>())
61 {
62#ifdef __NVCC__
63 return aggregate.template get<pMask>();
64#else // __NVCC__
65 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
66#endif // __NVCC__
67 }
68};
69
70template<typename AggregateBlockT=aggregate<DataBlock<float, 64>>, typename indexT=int, template<typename> class layout_base=memory_traits_inte>
72{
73protected:
75 const static unsigned char EXIST_BIT = 0;
76
77public:
78 static const unsigned int pMask = AggregateBlockT::max_prop_real - 1;
79 typedef AggregateBlockT AggregateType;
81
82public:
83 template<typename BitMaskT>
84 inline static __device__ __host__ bool getBit(const BitMaskT &bitMask, unsigned char pos)
85 {
86 return (bitMask>>pos)&1U;
87 }
88
89 template<typename BitMaskT>
90 inline static __device__ __host__ void setBit(BitMaskT &bitMask, unsigned char pos)
91 {
92 bitMask = bitMask | (1U<<pos);
93 }
94
95 template<typename BitMaskT>
96 inline static __device__ __host__ void unsetBit(BitMaskT &bitMask, unsigned char pos)
97 {
98 bitMask = bitMask & ~(1U<<pos);
99 }
100
101public:
103 : blockMap(blockMap) {};
104
105 template<unsigned int p>
106 inline __device__ auto get(unsigned int linId) const -> ScalarTypeOf<AggregateBlockT, p>
107 {
108 #ifdef __NVCC__
109 typedef BlockTypeOf<AggregateBlockT, p> BlockT;
110 unsigned int blockId = linId / BlockT::size;
111 unsigned int offset = linId % BlockT::size;
112 return get<p>(blockId, offset);
113 #else // __NVCC__
114 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
115 #endif // __NVCC__
116 }
117
118
119 template<unsigned int p>
120 inline __device__ auto get(unsigned int blockId, unsigned int offset) const -> ScalarTypeOf<AggregateBlockT, p>
121 {
122 #ifdef __NVCC__
123
124 const auto sid = blockMap.get_sparse(blockId);
125 const auto & block = blockMap.template get_ele<p>(sid.id)[offset];
126 const auto mask = blockMap.template get_ele<pMask>(sid.id)[offset];
127 // Now check if the element actually exists
128 return exist(mask)
129 ? block
130 : blockMap.template getBackground<p>()[offset];
131
132 #else // __NVCC__
133 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
134 #endif // __NVCC__
135 }
136
137 inline __device__ auto getBlock(unsigned int blockId) -> decltype(blockMap.get(0))
138 {
139 #ifdef __NVCC__
140 return blockMap.get(blockId);
141 #else // __NVCC__
142 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
143 #endif // __NVCC__
144 }
145
146 template<unsigned int p>
147 inline __device__ ScalarTypeOf<AggregateBlockT, p> & getReference(unsigned int linId)
148 {
149 // Only call this if you are TOTALLY SURE the element exists! Otherwise KABOOOOOM! :D
150 #ifdef __NVCC__
151 typedef BlockTypeOf<AggregateBlockT, p> BlockT;
152 unsigned int blockId = linId / BlockT::size;
153 unsigned int offset = linId % BlockT::size;
154 return getReference<p>(blockId, offset);
155 #else // __NVCC__
156 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
157 #endif // __NVCC__
158 }
159
160 template<unsigned int p>
161 inline __device__ ScalarTypeOf<AggregateBlockT, p> & getReference(unsigned int blockId, unsigned int offset)
162 {
163 // Only call this if you are TOTALLY SURE the element exists! Otherwise KABOOOOOM! :D
164 #ifdef __NVCC__
165 return blockMap.template get<p>(blockId)[offset];
166 #else // __NVCC__
167 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
168 #endif // __NVCC__
169 }
170
171 template<unsigned int p>
172 inline __device__ auto insert(unsigned int linId) -> ScalarTypeOf<AggregateBlockT, p>&
173 {
174 #ifdef __NVCC__
175 typedef BlockTypeOf<AggregateBlockT, p> BlockT;
176 unsigned int blockId = linId / BlockT::size;
177 unsigned int offset = linId % BlockT::size;
178 return insert<p>(blockId, offset);
179 #else // __NVCC__
180 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
181 #endif // __NVCC__
182 }
183
184 template<unsigned int p>
185 inline __device__ auto insert(unsigned int blockId, unsigned int offset) -> ScalarTypeOf<AggregateBlockT, p>&
186 {
187 #ifdef __NVCC__
188 auto aggregate = blockMap.insert(blockId);
189 auto &block = aggregate.template get<p>();
190 auto &mask = aggregate.template get<pMask>();
191 setExist(mask[offset]);
192
193 return block[offset];
194 #else // __NVCC__
195 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
196 #endif // __NVCC__
197 }
198
199 template<unsigned int nChunksPerBlocks = 1>
200 inline __device__ auto insertBlock(indexT blockId, unsigned int stride = 8192) -> decltype(blockMap.insert(0))
201 {
202 int offset = threadIdx.x / stride;
203// __shared__ int mem[nChunksPerBlocks][encap_shmem<sizeof(blockMap.insert(0))>::nthr];
204 __shared__ int mem_[nChunksPerBlocks];
205
206 decltype(blockMap.insert(0)) ec_(blockMap.private_get_data(),0);
207
208 #ifdef __NVCC__
209 if (threadIdx.x % stride == 0 && threadIdx.y == 0 && threadIdx.z == 0)
210 {
211 auto ec = blockMap.insert(blockId);
212
213 mem_[offset] = ec.private_get_k();
214
215 // copy to shared to broadcast on all thread
216 //new (mem[offset]) decltype(ec)(ec.private_get_data(),ec.private_get_k());
217 }
218
219 __syncthreads();;
220
221 ec_.private_set_k(mem_[offset]);
222
223 return ec_/* *(decltype(blockMap.insert(0)) *)mem[offset]*/;
224 #else // __NVCC__
225 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
226 #endif // __NVCC__
227 }
228
230 {
231 return blockMap;
232 }
233
234 inline __device__ void get_sparse(unsigned int linId, unsigned int & dataBlockPos , unsigned int & offset) const
235 {
236 #ifdef __NVCC__
237
238 typedef BlockTypeOf<AggregateBlockT, pMask> BlockT;
239 unsigned int blockId = linId / BlockT::size;
240 offset = linId % BlockT::size;
241
242 const auto sid = blockMap.get_sparse(blockId);
243
244 dataBlockPos = sid.id;
245
246 #else // __NVCC__
247 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
248 #endif // __NVCC__
249 }
250
251 inline static __device__ unsigned int getBlockId(unsigned int linId)
252 {
253#ifdef __NVCC__
254 return linId / BlockTypeOf<AggregateBlockT, 0>::size;
255#else // __NVCC__
256 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
257#endif // __NVCC__
258 }
259
260 inline static __device__ unsigned int getOffset(unsigned int linId)
261 {
262#ifdef __NVCC__
263 return linId % BlockTypeOf<AggregateBlockT, 0>::size;
264#else // __NVCC__
265 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
266#endif // __NVCC__
267 }
268
269 inline __device__ void init()
270 {
271#ifdef __NVCC__
272 blockMap.init();
273#else // __NVCC__
274 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
275#endif // __NVCC__
276 }
277
278 inline __device__ void flush_block_insert()
279 {
280#ifdef __NVCC__
281 blockMap.flush_block_insert();
282#else // __NVCC__
283 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
284#endif // __NVCC__
285 }
286
287 template<typename BitMaskT>
288 inline static __device__ bool exist(const BitMaskT &bitMask)
289 {
290 return getBit(bitMask, EXIST_BIT);
291 }
292
293 template<typename BitMaskT>
294 inline static __device__ void setExist(BitMaskT &bitMask)
295 {
296 setBit(bitMask, EXIST_BIT);
297 }
298
299 template<typename BitMaskT>
300 inline static __device__ void unsetExist(BitMaskT &bitMask)
301 {
302 unsetBit(bitMask, EXIST_BIT);
303 }
304
305 inline __device__ ScalarTypeOf<AggregateBlockT, pMask> getMask(unsigned int linId) const
306 {
307 return get<pMask>(linId);
308 }
309
310 inline __device__ void remove(unsigned int linId)
311 {
312 #ifdef __NVCC__
313 typedef BlockTypeOf<AggregateBlockT, pMask> BlockT;
314 unsigned int blockId = linId / BlockT::size;
315 unsigned int offset = linId % BlockT::size;
316 remove(blockId, offset);
317 #else // __NVCC__
318 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
319 #endif // __NVCC__
320 }
321
322 inline __device__ void remove(unsigned int blockId, unsigned int offset)
323 {
324 #ifdef __NVCC__
325
326 const auto sid = blockMap.get_sparse(blockId);
327 blockMap.template get<pMask>(sid)[offset] = 0;
328
329 #else // __NVCC__
330 std::cout << __FILE__ << ":" << __LINE__ << " error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
331 #endif // __NVCC__
332 }
333
339 inline __device__ auto getIndexBuffer() -> decltype(blockMap.getIndexBuffer())
340 {
341 return blockMap.getIndexBuffer();
342 }
343
349 inline __device__ auto getDataBuffer() -> decltype(blockMap.getDataBuffer())
350 {
351 return blockMap.getDataBuffer();
352 }
353
354#ifdef SE_CLASS1
355
361 pointer_check check_device_pointer(void * ptr)
362 {
363 pointer_check pc;
364
365 pc = blockMap.check_device_pointer(ptr);
366
367 if (pc.match == true)
368 {
369 pc.match_str = std::string("blockMap overflow : ") + "\n" + pc.match_str;
370 return pc;
371 }
372
373 return pc;
374 }
375
376#endif
377
378};
379
380
381
382
383#endif /* BLOCK_MAP_GPU_KER_CUH_ */
__device__ auto getIndexBuffer() -> decltype(blockMap.getIndexBuffer())
Return the index buffer for the sparse vector.
__device__ auto getDataBuffer() -> decltype(blockMap.getDataBuffer())
Return the data buffer for the sparse vector.
__device__ void init()
This function must be called.
__device__ openfpm::sparse_index< Ti > get_sparse(Ti id) const
Get the sparse index.
__device__ void flush_block_insert()
It insert an element in the sparse vector.
__device__ auto get(Ti id) const -> decltype(vct_data.template get< p >(id))
Get an element of the vector.
__device__ auto insert(Ti ele) -> decltype(vct_data.template get< p >(0))
It insert an element in the sparse vector.
__device__ auto getIndexBuffer() const -> const decltype(vct_index)&
Get the indices buffer.
__device__ auto getDataBuffer() -> decltype(vct_data)&
Get the data buffer.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Transform the boost::fusion::vector into memory specification (memory_traits)
std::string match_str
match string
bool match
Indicate if the pointer match.