1 #define BOOST_GPU_ENABLED __host__ __device__ 4 #define BOOST_TEST_DYN_LINK 5 #include "util/cuda_launch.hpp" 6 #include <boost/test/unit_test.hpp> 8 #include "util/cuda_util.hpp" 9 #include "Vector/map_vector.hpp" 13 #include "sort_ofp.cuh" 14 #include "scan_ofp.cuh" 15 #include "segreduce_ofp.cuh" 17 BOOST_AUTO_TEST_SUITE( scan_tests )
19 BOOST_AUTO_TEST_CASE( test_scan_cub_wrapper )
21 std::cout <<
"Test scan CUB" <<
"\n";
33 for (
size_t i = 0 ; i < 10000; i++)
35 input.template get<0>(i) = 10.0*(float)rand() / RAND_MAX;
38 input.template hostToDevice<0>();
40 mgpu::ofp_context_t context;
41 openfpm::scan((
unsigned int *)input.template getDeviceBuffer<0>(),input.
size(),(
unsigned int *)output.template getDeviceBuffer<0>(),context);
43 output.template deviceToHost<0>();
46 for (
size_t i = 0 ; i < input.
size() ; i++)
48 BOOST_REQUIRE_EQUAL(cnt,output.template get<0>(i));
49 cnt += input.template get<0>(i);
52 std::cout <<
"End scan CUB" <<
"\n";
57 BOOST_AUTO_TEST_CASE( test_sort_cub_wrapper )
59 std::cout <<
"Test sort CUB" <<
"\n";
67 input_id.resize(100000);
72 for (
size_t i = 0 ; i < 100000; i++)
74 input.template get<0>(i) = 10000.0*(float)rand() / RAND_MAX;
75 input_id.template get<0>(i) = i;
78 input.template hostToDevice<0>();
79 input_id.template hostToDevice<0>();
81 mgpu::ofp_context_t context;
83 openfpm::sort((
unsigned int *)input.template getDeviceBuffer<0>(),
84 (
unsigned int *)input_id.template getDeviceBuffer<0>(),
85 input.
size(),mgpu::template less_t<unsigned int>(),context);
87 input.template deviceToHost<0>();
88 input_id.template deviceToHost<0>();
90 for (
size_t i = 0 ; i < input.
size() - 1 ; i++)
92 BOOST_REQUIRE(input.template get<0>(i) <= input.template get<0>(i+1));
95 openfpm::sort((
unsigned int *)input.template getDeviceBuffer<0>(),
96 (
unsigned int *)input_id.template getDeviceBuffer<0>(),
97 input.
size(),mgpu::template greater_t<unsigned int>(),context);
99 input.template deviceToHost<0>();
100 input_id.template deviceToHost<0>();
102 for (
size_t i = 0 ; i < input.
size() - 1 ; i++)
104 BOOST_REQUIRE(input.template get<0>(i) >= input.template get<0>(i+1));
107 std::cout <<
"End sort CUB" <<
"\n";
112 BOOST_AUTO_TEST_CASE( test_seg_reduce_wrapper )
114 std::cout <<
"Test gpu segmented reduce" <<
"\n";
116 mgpu::ofp_context_t context;
127 for (
size_t i = 0 ; i < count ; i++)
129 vgpu.template get<0>(i) = ((float)rand() / (float)RAND_MAX) * 17;
132 segment_offset.add();
133 segment_offset.template get<0>(0) = 0;
137 int c = ((float)rand() / (float)RAND_MAX) * 17;
139 if (c + base >= count)
142 segment_offset.add();
143 segment_offset.template get<0>(segment_offset.
size() - 1) = c + segment_offset.template get<0>(segment_offset.
size() - 2);
148 vgpu.hostToDevice<0>();
150 segment_offset.hostToDevice<0>();
151 output.resize(segment_offset.
size());
153 openfpm::segreduce((
int *)vgpu.template getDeviceBuffer<0>(), vgpu.
size(),
154 (
int *)segment_offset.template getDeviceBuffer<0>(), segment_offset.
size(),
155 (
int *)output.template getDeviceBuffer<0>(),
159 output.template deviceToHost<0>();
163 for ( ; i < segment_offset.
size()-1 ; i++)
166 for (
size_t j = 0 ; j < segment_offset.template get<0>(i+1) - segment_offset.template get<0>(i) ; j++)
168 red += vgpu.template get<0>(segment_offset.template get<0>(i) + j);
170 match &=
red == output.template get<0>(i);
173 BOOST_REQUIRE_EQUAL(match,
true);
176 for (
size_t j = 0 ; j < vgpu.
size() - segment_offset.template get<0>(i) ; j++)
178 red2 += vgpu.template get<0>(segment_offset.template get<0>(i) + j);
180 match &= red2 == output.template get<0>(i);
182 BOOST_REQUIRE_EQUAL(match,
true);
184 std::cout <<
"End test modern gpu test reduce" <<
"\n";
189 BOOST_AUTO_TEST_SUITE_END()
temporal buffer for reductions
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
Implementation of 1-D std::vector like structure.