7#define BOOST_TEST_DYN_LINK
9#include <boost/test/unit_test.hpp>
10#include "Vector/map_vector.hpp"
11#include "SparseGridGpu/BlockMapGpu.hpp"
14template<
typename op,
typename ScalarT>
15__device__
inline ScalarT applyOp(ScalarT a, ScalarT b,
bool aExist,
bool bExist)
32template<
unsigned int chunksPerBlock,
typename op,
typename SegType,
typename DataType,
typename MaskType>
41 unsigned int segmentId = blockIdx.x;
42 int segmentSize = segments[segmentId + 1] - segments[segmentId];
44 unsigned int start = segments[segmentId];
46 unsigned int chunkId = threadIdx.x / DataType::size;
47 unsigned int offset = threadIdx.x % DataType::size;
49 __shared__ DataType A[chunksPerBlock];
50 __shared__ MaskType AMask[chunksPerBlock];
51 typename DataType::scalarType bReg;
52 typename MaskType::scalarType aMask, bMask;
55 if (chunkId < segmentSize)
57 A[chunkId][offset] = data[start + chunkId][offset];
58 aMask = masks[start + chunkId][offset];
61 int i = chunksPerBlock;
62 for ( ; i < segmentSize - (
int) (chunksPerBlock); i += chunksPerBlock)
64 bReg = data[start + i + chunkId][offset];
65 bMask = masks[start + i + chunkId][offset];
67 A[chunkId][offset] = applyOp<op>(A[chunkId][offset],
71 aMask = aMask | bMask;
74 if (i + chunkId < segmentSize)
76 bReg = data[start + i + chunkId][offset];
77 bMask = masks[start + i + chunkId][offset];
79 A[chunkId][offset] = applyOp<op>(A[chunkId][offset],
83 aMask = aMask | bMask;
86 AMask[chunkId][offset] = aMask;
92 for (
int j = 2; j <= chunksPerBlock && j <= segmentSize; j *= 2)
94 if (chunkId % j == 0 && chunkId < segmentSize)
96 unsigned int otherChunkId = chunkId + (j / 2);
97 if (otherChunkId < segmentSize)
99 aMask = AMask[chunkId][offset];
100 bMask = AMask[otherChunkId][offset];
101 A[chunkId][offset] = applyOp<op>(A[chunkId][offset],
102 A[otherChunkId][offset],
105 AMask[chunkId][offset] = aMask | bMask;
114 output[segmentId][offset] = A[chunkId][offset];
118BOOST_AUTO_TEST_SUITE(segreduce_block_cuda_tests)
120 BOOST_AUTO_TEST_CASE (segreduce_block_test)
122 typedef float ScalarT;
127 segments.template get<0>(0) = 0;
128 segments.template get<0>(1) = 4;
129 segments.template get<0>(2) = 5;
130 segments.template get<0>(3) = 7;
131 segments.template get<0>(4) = 8;
132 segments.template get<0>(5) = 11;
133 segments.template get<0>(6) = 17;
134 segments.template get<0>(7) = 18;
136 segments.template hostToDevice<0>();
138 const unsigned int BITMASK = 0, BLOCK = 1;
141 for (
int i = 0; i < 32; ++i)
146 for (
int i = 32; i < 64; ++i)
154 for (
int i = 0; i < 18; ++i)
156 data.template get<BITMASK>(i) = mask;
157 data.template get<BLOCK>(i) = block;
160 data.template hostToDevice<BITMASK, BLOCK>();
164 outputData.resize(segments.
size()-1);
170 (BlockT *) data.template getDeviceBuffer<BLOCK>(),
171 (
int *) segments.template getDeviceBuffer<0>(),
172 (MaskBlockT *) data.template getDeviceBuffer<BITMASK>(),
173 (BlockT *) outputData.template getDeviceBuffer<BLOCK>()
178 (MaskBlockT *) data.template getDeviceBuffer<BITMASK>(),
179 (
int *) segments.template getDeviceBuffer<0>(),
180 (MaskBlockT *) data.template getDeviceBuffer<BITMASK>(),
181 (MaskBlockT *) outputData.template getDeviceBuffer<BITMASK>()
184 outputData.template deviceToHost<BITMASK, BLOCK>();
186 for (
int j = 0; j < outputData.
size(); ++j)
188 BlockT outBlock = outputData.template get<BLOCK>(j);
189 MaskBlockT outMask = outputData.template get<BITMASK>(j);
191 int seg_sz = segments.template get<0>(j+1) - segments.template get<0>(j);
193 for (
int i = 0; i < BlockT::size; ++i)
197 BOOST_REQUIRE_EQUAL(outBlock[i],seg_sz*(i+1));
198 BOOST_REQUIRE_EQUAL(outMask[i],1);
202 BOOST_REQUIRE_EQUAL(outMask[i],0);
208BOOST_AUTO_TEST_SUITE_END()
Implementation of 1-D std::vector like structure.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data