5#define BOOST_TEST_DYN_LINK
7#include <boost/test/unit_test.hpp>
8#include "SparseGridGpu/BlockMapGpu.hpp"
9#include "SparseGridGpu/BlockMapGpu_ker.cuh"
10#include "SparseGridGpu/BlockMapGpu_kernels.cuh"
11#include "SparseGridGpu/DataBlock.cuh"
15template<
unsigned int p,
typename SparseGr
idType,
typename VectorOutType>
16__global__
void copyBlocksToOutput(SparseGridType sparseGrid, VectorOutType output)
18 int pos = blockIdx.x * blockDim.x + threadIdx.x;
19 output.template get<p>(pos) = sparseGrid.template get<p>(pos);
22template<
unsigned int p,
typename SparseGr
idType>
23__global__
void insertValues(SparseGridType sparseGrid)
27 int pos = blockIdx.x * blockDim.x + threadIdx.x;
29 sparseGrid.template insert<p>(pos) = pos;
33 sparseGrid.flush_block_insert();
36template<
unsigned int p,
unsigned int chunksPerBlock,
typename SparseGr
idType>
37__global__
void insertValuesBlocked(SparseGridType sparseGrid)
39 constexpr unsigned int pMask = SparseGridType::pMask;
40 typedef BlockTypeOf<typename SparseGridType::AggregateType, p> BlockT;
41 typedef BlockTypeOf<typename SparseGridType::AggregateType, pMask> MaskBlockT;
46 int pos = blockIdx.x * blockDim.x + threadIdx.x;
47 unsigned int dataBlockId = pos / BlockT::size;
48 unsigned int offset = pos % BlockT::size;
50 auto encap = sparseGrid.template insertBlock<chunksPerBlock>(dataBlockId,BlockT::size);
53 encap.template get<p>()[offset] = pos;
58 sparseGrid.flush_block_insert();
61template<
unsigned int p,
typename SparseGr
idType>
62__global__
void insertValuesHalfBlock(SparseGridType sparseGrid)
66 int pos = blockIdx.x * blockDim.x + threadIdx.x;
68 constexpr unsigned int dataChunkSize = BlockTypeOf<typename SparseGridType::AggregateType, p>::size;
69 if (threadIdx.x % dataChunkSize < dataChunkSize/ 2)
71 sparseGrid.template insert<p>(pos) = pos;
76 sparseGrid.flush_block_insert();
79BOOST_AUTO_TEST_SUITE(BlockMapGpu_tests)
81BOOST_AUTO_TEST_CASE(testBitwiseOps)
96BOOST_AUTO_TEST_CASE(testBackground)
101 sparseGrid.template setBackgroundValue<0>(666);
103 const unsigned int gridSize = 10;
104 const unsigned int blockSize = 128;
108 output.resize(gridSize * blockSize);
109 CUDA_LAUNCH_DIM3((copyBlocksToOutput<0>), gridSize, blockSize, sparseGrid.toKernel(), output.toKernel());
111 output.template deviceToHost<0>();
112 sparseGrid.template deviceToHost<0>();
116 for (
size_t i = 0; i < output.
size(); i++)
118 match &= output.template get<0>(i) == 666;
119 match &= output.template get<0>(i) == sparseGrid.template get<0>(i);
122 BOOST_REQUIRE_EQUAL(match,
true);
126BOOST_AUTO_TEST_CASE(testInsert)
131 blockMap.template setBackgroundValue<0>(666);
133 const unsigned int gridSize = 3;
134 const unsigned int bufferPoolSize = 128;
135 const unsigned int blockSizeInsert = 128;
136 const unsigned int gridSizeRead = gridSize + 1;
137 const unsigned int blockSizeRead = 128;
143 CUDA_LAUNCH_DIM3((insertValues<0>), gridSize, blockSizeInsert ,blockMap.toKernel());
147 blockMap.flush<
smax_<0>>(ctx, flush_type::FLUSH_ON_DEVICE);
151 output.resize(gridSizeRead * blockSizeRead);
153 CUDA_LAUNCH_DIM3((copyBlocksToOutput<0>), gridSizeRead, blockSizeRead,blockMap.toKernel(), output.toKernel());
155 output.template deviceToHost<0>();
156 blockMap.template deviceToHost<0>();
160 for (
size_t i = 0; i < output.size(); i++)
162 auto expectedValue = (i < gridSize * blockSizeInsert) ? i : 666;
164 match &= output.template get<0>(i) == blockMap.template get<0>(i);
165 match &= output.template get<0>(i) == expectedValue;
168 BOOST_REQUIRE_EQUAL(match,
true);
171BOOST_AUTO_TEST_CASE(testInsert_halfBlock)
176 blockMap.template setBackgroundValue<0>(666);
178 const unsigned int gridSize = 3;
179 const unsigned int bufferPoolSize = 128;
180 const unsigned int blockSizeInsert = 128;
181 const unsigned int gridSizeRead = gridSize + 1;
182 const unsigned int blockSizeRead = 128;
188 CUDA_LAUNCH_DIM3((insertValuesHalfBlock<0>), gridSize, blockSizeInsert, blockMap.toKernel());
192 blockMap.flush<
smax_<0>>(ctx, flush_type::FLUSH_ON_DEVICE);
196 output.resize(gridSizeRead * blockSizeRead);
198 CUDA_LAUNCH_DIM3((copyBlocksToOutput<0>), gridSizeRead, blockSizeRead ,blockMap.toKernel(), output.toKernel());
200 output.template deviceToHost<0>();
201 blockMap.template deviceToHost<0>();
205 for (
size_t i = 0; i < output.size(); i++)
207 auto expectedValue = (i < gridSize * blockSizeInsert) ? i : 666;
208 constexpr unsigned int dataChunkSize = BlockTypeOf<AggregateT, 0>::size;
209 int offset = i % dataChunkSize;
210 if (! (offset < dataChunkSize / 2))
215 match &= output.template get<0>(i) == blockMap.template get<0>(i);
216 match &= output.template get<0>(i) == expectedValue;
219 BOOST_REQUIRE_EQUAL(match,
true);
222BOOST_AUTO_TEST_CASE(testInsert_blocked)
227 sparseGrid.template setBackgroundValue<0>(666);
229 const unsigned int gridSize = 3;
230 const unsigned int bufferPoolSize = 4;
231 const unsigned int blockSizeInsert = 128;
232 const unsigned int gridSizeRead = gridSize + 1;
233 const unsigned int blockSizeRead = 128;
239 CUDA_LAUNCH_DIM3((insertValuesBlocked<0, 2>), gridSize, blockSizeInsert,sparseGrid.toKernel());
243 sparseGrid.flush<
smax_<0>>(ctx, flush_type::FLUSH_ON_DEVICE);
247 output.resize(gridSizeRead * blockSizeRead);
249 CUDA_LAUNCH_DIM3((copyBlocksToOutput<0>), gridSizeRead, blockSizeRead, sparseGrid.toKernel(), output.toKernel());
251 output.template deviceToHost<0>();
252 sparseGrid.template deviceToHost<0>();
256 for (
size_t i = 0; i < output.size(); i++)
258 auto expectedValue = (i < gridSize * blockSizeInsert) ? i : 666;
259 match &= output.template get<0>(i) == sparseGrid.template get<0>(i);
260 match &= output.template get<0>(i) == expectedValue;
263 BOOST_REQUIRE_EQUAL(match,
true);
266BOOST_AUTO_TEST_SUITE_END()
void setGPUInsertBuffer(int nBlock, int nSlot)
Implementation of 1-D std::vector like structure.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...