1 #include "util/cuda_util.hpp"
3 #define BOOST_TEST_DYN_LINK
4 #include <boost/test/unit_test.hpp>
6 #include "Vector/map_vector.hpp"
7 #include "sort_ofp.cuh"
8 #include "scan_ofp.cuh"
9 #include "segreduce_ofp.cuh"
11 BOOST_AUTO_TEST_SUITE( scan_tests )
13 BOOST_AUTO_TEST_CASE( test_scan_cub_wrapper )
15 std::cout <<
"Test scan CUB" <<
"\n";
27 for (
size_t i = 0 ; i < 10000; i++)
29 input.template get<0>(i) = 10.0*(float)rand() / RAND_MAX;
32 input.template hostToDevice<0>();
35 openfpm::scan((
unsigned int *)input.template getDeviceBuffer<0>(),input.
size(),(
unsigned int *)output.template getDeviceBuffer<0>(),gpuContext);
37 output.template deviceToHost<0>();
40 for (
size_t i = 0 ; i < input.
size() ; i++)
42 BOOST_REQUIRE_EQUAL(cnt,output.template get<0>(i));
43 cnt += input.template get<0>(i);
46 std::cout <<
"End scan CUB" <<
"\n";
51 BOOST_AUTO_TEST_CASE( test_sort_cub_wrapper )
53 std::cout <<
"Test sort CUB" <<
"\n";
61 input_id.resize(100000);
66 for (
size_t i = 0 ; i < 100000; i++)
68 input.template get<0>(i) = 10000.0*(float)rand() / RAND_MAX;
69 input_id.template get<0>(i) = i;
72 input.template hostToDevice<0>();
73 input_id.template hostToDevice<0>();
77 openfpm::sort((
unsigned int *)input.template getDeviceBuffer<0>(),
78 (
unsigned int *)input_id.template getDeviceBuffer<0>(),
79 input.
size(),gpu::template less_t<unsigned int>(),gpuContext);
81 input.template deviceToHost<0>();
82 input_id.template deviceToHost<0>();
84 for (
size_t i = 0 ; i < input.
size() - 1 ; i++)
86 BOOST_REQUIRE(input.template get<0>(i) <= input.template get<0>(i+1));
89 openfpm::sort((
unsigned int *)input.template getDeviceBuffer<0>(),
90 (
unsigned int *)input_id.template getDeviceBuffer<0>(),
91 input.
size(),gpu::template greater_t<unsigned int>(),gpuContext);
93 input.template deviceToHost<0>();
94 input_id.template deviceToHost<0>();
96 for (
size_t i = 0 ; i < input.
size() - 1 ; i++)
98 BOOST_REQUIRE(input.template get<0>(i) >= input.template get<0>(i+1));
101 std::cout <<
"End sort CUB" <<
"\n";
106 BOOST_AUTO_TEST_CASE( test_seg_reduce_wrapper )
108 std::cout <<
"Test gpu segmented reduce" <<
"\n";
121 for (
size_t i = 0 ; i < count ; i++)
123 vgpu.template get<0>(i) = ((float)rand() / (float)RAND_MAX) * 17;
126 segment_offset.add();
127 segment_offset.template get<0>(0) = 0;
131 int c = ((float)rand() / (float)RAND_MAX) * 17;
133 if (c + base >= count)
136 segment_offset.add();
137 segment_offset.template get<0>(segment_offset.
size() - 1) = c + segment_offset.template get<0>(segment_offset.
size() - 2);
141 segment_offset.add();
142 segment_offset.template get<0>(segment_offset.
size() - 1) = vgpu.
size();
144 vgpu.hostToDevice<0>();
146 segment_offset.hostToDevice<0>();
147 output.resize(segment_offset.
size()-1);
149 openfpm::segreduce((
int *)vgpu.template getDeviceBuffer<0>(), vgpu.
size(),
150 (
int *)segment_offset.template getDeviceBuffer<0>(), segment_offset.
size()-1,
151 (
int *)output.template getDeviceBuffer<0>(),
155 output.template deviceToHost<0>();
159 for ( ; i < segment_offset.
size()-2 ; i++)
162 for (
size_t j = 0 ; j < segment_offset.template get<0>(i+1) - segment_offset.template get<0>(i) ; j++)
164 red += vgpu.template get<0>(segment_offset.template get<0>(i) + j);
166 match &=
red == output.template get<0>(i);
169 BOOST_REQUIRE_EQUAL(match,
true);
172 for (
size_t j = 0 ; j < segment_offset.template get<0>(i+1) - segment_offset.template get<0>(i) ; j++)
174 red2 += vgpu.template get<0>(segment_offset.template get<0>(i) + j);
176 match &= red2 == output.template get<0>(i);
178 BOOST_REQUIRE_EQUAL(match,
true);
180 std::cout <<
"End test gpu seg reduce test" <<
"\n";
185 BOOST_AUTO_TEST_SUITE_END()