OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
scan_sort_cuda_unit_tests.cu
1 #define BOOST_GPU_ENABLED __host__ __device__
2 
3 #include "config.h"
4 #define BOOST_TEST_DYN_LINK
5 #include "util/cuda_launch.hpp"
6 #include <boost/test/unit_test.hpp>
7 
8 #include "util/cuda_util.hpp"
9 #include "Vector/map_vector.hpp"
10 
11 #define SORT_WITH_CUB
12 
13 #include "sort_ofp.cuh"
14 #include "scan_ofp.cuh"
15 #include "segreduce_ofp.cuh"
16 
17 BOOST_AUTO_TEST_SUITE( scan_tests )
18 
19 BOOST_AUTO_TEST_CASE( test_scan_cub_wrapper )
20 {
21  std::cout << "Test scan CUB" << "\n";
22 
25 
27 
28  input.resize(10000);
29  output.resize(10000);
30 
31  // fill input
32 
33  for (size_t i = 0 ; i < 10000; i++)
34  {
35  input.template get<0>(i) = 10.0*(float)rand() / RAND_MAX;
36  }
37 
38  input.template hostToDevice<0>();
39 
40  mgpu::ofp_context_t context;
41  openfpm::scan((unsigned int *)input.template getDeviceBuffer<0>(),input.size(),(unsigned int *)output.template getDeviceBuffer<0>(),context);
42 
43  output.template deviceToHost<0>();
44 
45  size_t cnt = 0;
46  for (size_t i = 0 ; i < input.size() ; i++)
47  {
48  BOOST_REQUIRE_EQUAL(cnt,output.template get<0>(i));
49  cnt += input.template get<0>(i);
50  }
51 
52  std::cout << "End scan CUB" << "\n";
53 
54  // Test the cell list
55 }
56 
57 BOOST_AUTO_TEST_CASE( test_sort_cub_wrapper )
58 {
59  std::cout << "Test sort CUB" << "\n";
60 
63 
65 
66  input.resize(100000);
67  input_id.resize(100000);
68 
69 
70  // fill input
71 
72  for (size_t i = 0 ; i < 100000; i++)
73  {
74  input.template get<0>(i) = 10000.0*(float)rand() / RAND_MAX;
75  input_id.template get<0>(i) = i;
76  }
77 
78  input.template hostToDevice<0>();
79  input_id.template hostToDevice<0>();
80 
81  mgpu::ofp_context_t context;
82 
83  openfpm::sort((unsigned int *)input.template getDeviceBuffer<0>(),
84  (unsigned int *)input_id.template getDeviceBuffer<0>(),
85  input.size(),mgpu::template less_t<unsigned int>(),context);
86 
87  input.template deviceToHost<0>();
88  input_id.template deviceToHost<0>();
89 
90  for (size_t i = 0 ; i < input.size() - 1 ; i++)
91  {
92  BOOST_REQUIRE(input.template get<0>(i) <= input.template get<0>(i+1));
93  }
94 
95  openfpm::sort((unsigned int *)input.template getDeviceBuffer<0>(),
96  (unsigned int *)input_id.template getDeviceBuffer<0>(),
97  input.size(),mgpu::template greater_t<unsigned int>(),context);
98 
99  input.template deviceToHost<0>();
100  input_id.template deviceToHost<0>();
101 
102  for (size_t i = 0 ; i < input.size() - 1 ; i++)
103  {
104  BOOST_REQUIRE(input.template get<0>(i) >= input.template get<0>(i+1));
105  }
106 
107  std::cout << "End sort CUB" << "\n";
108 
109  // Test the cell list
110 }
111 
112 BOOST_AUTO_TEST_CASE( test_seg_reduce_wrapper )
113 {
114  std::cout << "Test gpu segmented reduce" << "\n";
115 
116  mgpu::ofp_context_t context;
117 
118  int count = 130;
119 
121  openfpm::vector_gpu<aggregate<int>> segment_offset;
123  int init = 0;
124 
125  vgpu.resize(count);
126 
127  for (size_t i = 0 ; i < count ; i++)
128  {
129  vgpu.template get<0>(i) = ((float)rand() / (float)RAND_MAX) * 17;
130  }
131 
132  segment_offset.add();
133  segment_offset.template get<0>(0) = 0;
134  size_t base = 0;
135  while (1)
136  {
137  int c = ((float)rand() / (float)RAND_MAX) * 17;
138 
139  if (c + base >= count)
140  {break;}
141 
142  segment_offset.add();
143  segment_offset.template get<0>(segment_offset.size() - 1) = c + segment_offset.template get<0>(segment_offset.size() - 2);
144 
145  base += c;
146  }
147 
148  vgpu.hostToDevice<0>();
149 
150  segment_offset.hostToDevice<0>();
151  output.resize(segment_offset.size());
152 
153  openfpm::segreduce((int *)vgpu.template getDeviceBuffer<0>(), vgpu.size(),
154  (int *)segment_offset.template getDeviceBuffer<0>(), segment_offset.size(),
155  (int *)output.template getDeviceBuffer<0>(),
156  mgpu::plus_t<int>(), init, context);
157 
158 
159  output.template deviceToHost<0>();
160 
161  bool match = true;
162  size_t i = 0;
163  for ( ; i < segment_offset.size()-1 ; i++)
164  {
165  size_t red = 0;
166  for (size_t j = 0 ; j < segment_offset.template get<0>(i+1) - segment_offset.template get<0>(i) ; j++)
167  {
168  red += vgpu.template get<0>(segment_offset.template get<0>(i) + j);
169  }
170  match &= red == output.template get<0>(i);
171  }
172 
173  BOOST_REQUIRE_EQUAL(match,true);
174 
175  size_t red2 = 0;
176  for (size_t j = 0 ; j < vgpu.size() - segment_offset.template get<0>(i) ; j++)
177  {
178  red2 += vgpu.template get<0>(segment_offset.template get<0>(i) + j);
179  }
180  match &= red2 == output.template get<0>(i);
181 
182  BOOST_REQUIRE_EQUAL(match,true);
183 
184  std::cout << "End test modern gpu test reduce" << "\n";
185 
186  // Test the cell list
187 }
188 
189 BOOST_AUTO_TEST_SUITE_END()
size_t size()
Stub size.
Definition: map_vector.hpp:211
temporal buffer for reductions
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202