2 #define BOOST_TEST_DYN_LINK 3 #include <boost/test/unit_test.hpp> 5 #include "util/cuda_util.hpp" 6 #include "Vector/map_vector.hpp" 11 #include "util/cuda/moderngpu/kernel_load_balance.hxx" 12 #include "util/cuda/moderngpu/kernel_mergesort.hxx" 13 #include "util/cuda/moderngpu/kernel_reduce.hxx" 14 #include "util/cuda/moderngpu/kernel_segreduce.hxx" 17 BOOST_AUTO_TEST_SUITE( modern_gpu_tests )
19 BOOST_AUTO_TEST_CASE( modern_gpu_loadbalance_lbs )
21 std::cout <<
"Test modern gpu test tansform_lbs" <<
"\n";
23 mgpu::standard_context_t context(
false);
28 int num_segments = mgpu::div_up(count, spacing);
30 for(
int i = 0; i < num_segments; ++i)
31 {segments.template get<0>(i) = i * spacing;}
35 segments.template hostToDevice<0>();
37 mgpu::load_balance_search(count, (
int *)segments.template getDeviceBuffer<0>(), num_segments, (
int *)lbs.template getDeviceBuffer<0>(),context);
39 lbs.deviceToHost<0>();
42 for(
size_t i = 0; i < lbs.size(); ++i)
44 check &= lbs.template get<0>(i) == i / spacing;
47 BOOST_REQUIRE_EQUAL(check,
true);
49 std::cout <<
"End test modern gpu test tansform_lbs" <<
"\n";
54 BOOST_AUTO_TEST_CASE( modern_gpu_sort )
56 std::cout <<
"Test modern gpu test tansform_lbs" <<
"\n";
58 mgpu::standard_context_t context(
false);
68 for (
size_t i = 0 ; i < count ; i++)
70 vgpu.template get<0>(i) = ((float)rand() / (float)RAND_MAX) * 17;
71 vgpu.template get<1>(i) = i;
73 gpu_ns.template get<0>(i) = vgpu.template get<0>(i);
76 vgpu.hostToDevice<0,1>();
78 mergesort((
unsigned int *)vgpu.getDeviceBuffer<0>(),(
unsigned int *)vgpu.getDeviceBuffer<1>(), count,
mgpu::less_t<unsigned int>(), context);
80 vgpu.deviceToHost<0,1>();
85 for (
int i = 0 ; i < count - 1 ; i++)
87 match &= vgpu.template get<0>(i) <= vgpu.template get<0>(i+1);
88 match &= gpu_ns.template get<0>(vgpu.template get<1>(i)) == vgpu.template get<0>(i);
91 BOOST_REQUIRE_EQUAL(match,
true);
93 std::cout <<
"End test modern gpu test tansform_lbs" <<
"\n";
98 BOOST_AUTO_TEST_CASE( modern_gpu_reduce )
100 std::cout <<
"Test modern gpu reduce" <<
"\n";
102 mgpu::standard_context_t context(
false);
110 for (
size_t i = 0 ; i < count ; i++)
112 vgpu.template get<0>(i) = ((float)rand() / (float)RAND_MAX) * 17;
115 vgpu.hostToDevice<0>();
127 for (
int i = 0 ; i < count ; i++)
129 red += vgpu.template get<0>(i);
132 BOOST_REQUIRE_EQUAL(
red,red_p);
134 std::cout <<
"End test modern gpu test reduce" <<
"\n";
140 BOOST_AUTO_TEST_CASE( modern_gpu_seg_reduce )
142 std::cout <<
"Test modern gpu segmented reduce" <<
"\n";
144 mgpu::standard_context_t context(
false);
155 for (
size_t i = 0 ; i < count ; i++)
157 vgpu.template get<0>(i) = ((float)rand() / (float)RAND_MAX) * 17;
160 segment_offset.add();
161 segment_offset.template get<0>(0) = 0;
165 int c = ((float)rand() / (float)RAND_MAX) * 17;
167 if (c + base >= count)
170 segment_offset.add();
171 segment_offset.template get<0>(segment_offset.
size() - 1) = c + segment_offset.template get<0>(segment_offset.
size() - 2);
176 vgpu.hostToDevice<0>();
177 segment_offset.hostToDevice<0>();
178 output.resize(segment_offset.
size());
180 mgpu::segreduce((
int *)vgpu.template getDeviceBuffer<0>(), vgpu.
size(),
181 (
int *)segment_offset.template getDeviceBuffer<0>(), segment_offset.
size(),
182 (
int *)output.template getDeviceBuffer<0>(),
186 output.template deviceToHost<0>();
190 for ( ; i < segment_offset.
size()-1 ; i++)
193 for (
size_t j = 0 ; j < segment_offset.template get<0>(i+1) - segment_offset.template get<0>(i) ; j++)
195 red += vgpu.template get<0>(segment_offset.template get<0>(i) + j);
197 match &=
red == output.template get<0>(i);
200 BOOST_REQUIRE_EQUAL(match,
true);
203 for (
size_t j = 0 ; j < vgpu.
size() - segment_offset.template get<0>(i) ; j++)
205 red2 += vgpu.template get<0>(segment_offset.template get<0>(i) + j);
207 match &= red2 == output.template get<0>(i);
209 BOOST_REQUIRE_EQUAL(match,
true);
211 std::cout <<
"End test modern gpu test reduce" <<
"\n";
217 BOOST_AUTO_TEST_SUITE_END()
virtual bool allocate(size_t sz)
allocate memory
virtual void * getPointer()
get a readable pointer with the data
virtual void * getDevicePointer()
get a readable pointer with the data
virtual void deviceToHost()
Move memory from device to host.
temporal buffer for reductions
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
Implementation of 1-D std::vector like structure.