1#define BOOST_GPU_ENABLED __host__ __device__
4#define BOOST_TEST_DYN_LINK
5#include "util/cuda_launch.hpp"
6#include <boost/test/unit_test.hpp>
8#include "util/cuda_util.hpp"
9#include "Vector/map_vector.hpp"
11#include "sort_ofp.cuh"
12#include "scan_ofp.cuh"
13#include "segreduce_ofp.cuh"
15BOOST_AUTO_TEST_SUITE( scan_tests )
17BOOST_AUTO_TEST_CASE( test_scan_cub_wrapper )
19 std::cout <<
"Test scan CUB" <<
"\n";
31 for (
size_t i = 0 ; i < 10000; i++)
33 input.template get<0>(i) = 10.0*(float)rand() / RAND_MAX;
36 input.template hostToDevice<0>();
39 openfpm::scan((
unsigned int *)input.template getDeviceBuffer<0>(),input.
size(),(
unsigned int *)output.template getDeviceBuffer<0>(),context);
41 output.template deviceToHost<0>();
44 for (
size_t i = 0 ; i < input.
size() ; i++)
46 BOOST_REQUIRE_EQUAL(cnt,output.template get<0>(i));
47 cnt += input.template get<0>(i);
50 std::cout <<
"End scan CUB" <<
"\n";
55BOOST_AUTO_TEST_CASE( test_sort_cub_wrapper )
57 std::cout <<
"Test sort CUB" <<
"\n";
65 input_id.resize(100000);
70 for (
size_t i = 0 ; i < 100000; i++)
72 input.template get<0>(i) = 10000.0*(float)rand() / RAND_MAX;
73 input_id.template get<0>(i) = i;
76 input.template hostToDevice<0>();
77 input_id.template hostToDevice<0>();
81 openfpm::sort((
unsigned int *)input.template getDeviceBuffer<0>(),
82 (
unsigned int *)input_id.template getDeviceBuffer<0>(),
83 input.
size(),gpu::template less_t<unsigned int>(),context);
85 input.template deviceToHost<0>();
86 input_id.template deviceToHost<0>();
88 for (
size_t i = 0 ; i < input.
size() - 1 ; i++)
90 BOOST_REQUIRE(input.template get<0>(i) <= input.template get<0>(i+1));
93 openfpm::sort((
unsigned int *)input.template getDeviceBuffer<0>(),
94 (
unsigned int *)input_id.template getDeviceBuffer<0>(),
95 input.
size(),gpu::template greater_t<unsigned int>(),context);
97 input.template deviceToHost<0>();
98 input_id.template deviceToHost<0>();
100 for (
size_t i = 0 ; i < input.
size() - 1 ; i++)
102 BOOST_REQUIRE(input.template get<0>(i) >= input.template get<0>(i+1));
105 std::cout <<
"End sort CUB" <<
"\n";
110BOOST_AUTO_TEST_CASE( test_seg_reduce_wrapper )
112 std::cout <<
"Test gpu segmented reduce" <<
"\n";
125 for (
size_t i = 0 ; i < count ; i++)
127 vgpu.template get<0>(i) = ((float)rand() / (float)RAND_MAX) * 17;
130 segment_offset.add();
131 segment_offset.template get<0>(0) = 0;
135 int c = ((float)rand() / (float)RAND_MAX) * 17;
137 if (c + base >= count)
140 segment_offset.add();
141 segment_offset.template get<0>(segment_offset.
size() - 1) = c + segment_offset.template get<0>(segment_offset.
size() - 2);
145 segment_offset.add();
146 segment_offset.template get<0>(segment_offset.
size() - 1) = vgpu.
size();
148 vgpu.hostToDevice<0>();
150 segment_offset.hostToDevice<0>();
151 output.resize(segment_offset.
size()-1);
153 openfpm::segreduce((
int *)vgpu.template getDeviceBuffer<0>(), vgpu.
size(),
154 (
int *)segment_offset.template getDeviceBuffer<0>(), segment_offset.
size()-1,
155 (
int *)output.template getDeviceBuffer<0>(),
159 output.template deviceToHost<0>();
163 for ( ; i < segment_offset.
size()-2 ; i++)
166 for (
size_t j = 0 ; j < segment_offset.template get<0>(i+1) - segment_offset.template get<0>(i) ; j++)
168 red += vgpu.template get<0>(segment_offset.template get<0>(i) + j);
170 match &=
red == output.template get<0>(i);
173 BOOST_REQUIRE_EQUAL(match,
true);
176 for (
size_t j = 0 ; j < segment_offset.template get<0>(i+1) - segment_offset.template get<0>(i) ; j++)
178 red2 += vgpu.template get<0>(segment_offset.template get<0>(i) + j);
180 match &= red2 == output.template get<0>(i);
182 BOOST_REQUIRE_EQUAL(match,
true);
184 std::cout <<
"End test modern gpu test reduce" <<
"\n";
189BOOST_AUTO_TEST_SUITE_END()
Implementation of 1-D std::vector like structure.
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
temporal buffer for reductions