OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
scan_sort_cuda_unit_tests.cu
1#define BOOST_GPU_ENABLED __host__ __device__
2
3#include "config.h"
4#define BOOST_TEST_DYN_LINK
5#include "util/cuda_launch.hpp"
6#include <boost/test/unit_test.hpp>
7
8#include "util/cuda_util.hpp"
9#include "Vector/map_vector.hpp"
10
11#include "sort_ofp.cuh"
12#include "scan_ofp.cuh"
13#include "segreduce_ofp.cuh"
14
15BOOST_AUTO_TEST_SUITE( scan_tests )
16
17BOOST_AUTO_TEST_CASE( test_scan_cub_wrapper )
18{
19 std::cout << "Test scan CUB" << "\n";
20
23
25
26 input.resize(10000);
27 output.resize(10000);
28
29 // fill input
30
31 for (size_t i = 0 ; i < 10000; i++)
32 {
33 input.template get<0>(i) = 10.0*(float)rand() / RAND_MAX;
34 }
35
36 input.template hostToDevice<0>();
37
38 gpu::ofp_context_t context;
39 openfpm::scan((unsigned int *)input.template getDeviceBuffer<0>(),input.size(),(unsigned int *)output.template getDeviceBuffer<0>(),context);
40
41 output.template deviceToHost<0>();
42
43 size_t cnt = 0;
44 for (size_t i = 0 ; i < input.size() ; i++)
45 {
46 BOOST_REQUIRE_EQUAL(cnt,output.template get<0>(i));
47 cnt += input.template get<0>(i);
48 }
49
50 std::cout << "End scan CUB" << "\n";
51
52 // Test the cell list
53}
54
55BOOST_AUTO_TEST_CASE( test_sort_cub_wrapper )
56{
57 std::cout << "Test sort CUB" << "\n";
58
61
63
64 input.resize(100000);
65 input_id.resize(100000);
66
67
68 // fill input
69
70 for (size_t i = 0 ; i < 100000; i++)
71 {
72 input.template get<0>(i) = 10000.0*(float)rand() / RAND_MAX;
73 input_id.template get<0>(i) = i;
74 }
75
76 input.template hostToDevice<0>();
77 input_id.template hostToDevice<0>();
78
79 gpu::ofp_context_t context;
80
81 openfpm::sort((unsigned int *)input.template getDeviceBuffer<0>(),
82 (unsigned int *)input_id.template getDeviceBuffer<0>(),
83 input.size(),gpu::template less_t<unsigned int>(),context);
84
85 input.template deviceToHost<0>();
86 input_id.template deviceToHost<0>();
87
88 for (size_t i = 0 ; i < input.size() - 1 ; i++)
89 {
90 BOOST_REQUIRE(input.template get<0>(i) <= input.template get<0>(i+1));
91 }
92
93 openfpm::sort((unsigned int *)input.template getDeviceBuffer<0>(),
94 (unsigned int *)input_id.template getDeviceBuffer<0>(),
95 input.size(),gpu::template greater_t<unsigned int>(),context);
96
97 input.template deviceToHost<0>();
98 input_id.template deviceToHost<0>();
99
100 for (size_t i = 0 ; i < input.size() - 1 ; i++)
101 {
102 BOOST_REQUIRE(input.template get<0>(i) >= input.template get<0>(i+1));
103 }
104
105 std::cout << "End sort CUB" << "\n";
106
107 // Test the cell list
108}
109
110BOOST_AUTO_TEST_CASE( test_seg_reduce_wrapper )
111{
112 std::cout << "Test gpu segmented reduce" << "\n";
113
114 gpu::ofp_context_t context;
115
116 int count = 130;
117
121 int init = 0;
122
123 vgpu.resize(count);
124
125 for (size_t i = 0 ; i < count ; i++)
126 {
127 vgpu.template get<0>(i) = ((float)rand() / (float)RAND_MAX) * 17;
128 }
129
130 segment_offset.add();
131 segment_offset.template get<0>(0) = 0;
132 size_t base = 0;
133 while (1)
134 {
135 int c = ((float)rand() / (float)RAND_MAX) * 17;
136
137 if (c + base >= count)
138 {break;}
139
140 segment_offset.add();
141 segment_offset.template get<0>(segment_offset.size() - 1) = c + segment_offset.template get<0>(segment_offset.size() - 2);
142
143 base += c;
144 }
145 segment_offset.add();
146 segment_offset.template get<0>(segment_offset.size() - 1) = vgpu.size();
147
148 vgpu.hostToDevice<0>();
149
150 segment_offset.hostToDevice<0>();
151 output.resize(segment_offset.size()-1);
152
153 openfpm::segreduce((int *)vgpu.template getDeviceBuffer<0>(), vgpu.size(),
154 (int *)segment_offset.template getDeviceBuffer<0>(), segment_offset.size()-1,
155 (int *)output.template getDeviceBuffer<0>(),
156 gpu::plus_t<int>(), init, context);
157
158
159 output.template deviceToHost<0>();
160
161 bool match = true;
162 size_t i = 0;
163 for ( ; i < segment_offset.size()-2 ; i++)
164 {
165 size_t red = 0;
166 for (size_t j = 0 ; j < segment_offset.template get<0>(i+1) - segment_offset.template get<0>(i) ; j++)
167 {
168 red += vgpu.template get<0>(segment_offset.template get<0>(i) + j);
169 }
170 match &= red == output.template get<0>(i);
171 }
172
173 BOOST_REQUIRE_EQUAL(match,true);
174
175 size_t red2 = 0;
176 for (size_t j = 0 ; j < segment_offset.template get<0>(i+1) - segment_offset.template get<0>(i) ; j++)
177 {
178 red2 += vgpu.template get<0>(segment_offset.template get<0>(i) + j);
179 }
180 match &= red2 == output.template get<0>(i);
181
182 BOOST_REQUIRE_EQUAL(match,true);
183
184 std::cout << "End test modern gpu test reduce" << "\n";
185
186 // Test the cell list
187}
188
189BOOST_AUTO_TEST_SUITE_END()
Implementation of 1-D std::vector like structure.
size_t size()
Stub size.
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
temporal buffer for reductions