OpenFPM  5.2.0
Project that contain the implementation of distributed structures
scan_sort_cuda_unit_tests.cu
1 #include "util/cuda_util.hpp"
2 #include "config.h"
3 #define BOOST_TEST_DYN_LINK
4 #include <boost/test/unit_test.hpp>
5 
6 #include "Vector/map_vector.hpp"
7 #include "sort_ofp.cuh"
8 #include "scan_ofp.cuh"
9 #include "segreduce_ofp.cuh"
10 
11 BOOST_AUTO_TEST_SUITE( scan_tests )
12 
13 BOOST_AUTO_TEST_CASE( test_scan_cub_wrapper )
14 {
15  std::cout << "Test scan CUB" << "\n";
16 
19 
21 
22  input.resize(10000);
23  output.resize(10000);
24 
25  // fill input
26 
27  for (size_t i = 0 ; i < 10000; i++)
28  {
29  input.template get<0>(i) = 10.0*(float)rand() / RAND_MAX;
30  }
31 
32  input.template hostToDevice<0>();
33 
34  gpu::ofp_context_t gpuContext;
35  openfpm::scan((unsigned int *)input.template getDeviceBuffer<0>(),input.size(),(unsigned int *)output.template getDeviceBuffer<0>(),gpuContext);
36 
37  output.template deviceToHost<0>();
38 
39  size_t cnt = 0;
40  for (size_t i = 0 ; i < input.size() ; i++)
41  {
42  BOOST_REQUIRE_EQUAL(cnt,output.template get<0>(i));
43  cnt += input.template get<0>(i);
44  }
45 
46  std::cout << "End scan CUB" << "\n";
47 
48  // Test the cell list
49 }
50 
51 BOOST_AUTO_TEST_CASE( test_sort_cub_wrapper )
52 {
53  std::cout << "Test sort CUB" << "\n";
54 
57 
59 
60  input.resize(100000);
61  input_id.resize(100000);
62 
63 
64  // fill input
65 
66  for (size_t i = 0 ; i < 100000; i++)
67  {
68  input.template get<0>(i) = 10000.0*(float)rand() / RAND_MAX;
69  input_id.template get<0>(i) = i;
70  }
71 
72  input.template hostToDevice<0>();
73  input_id.template hostToDevice<0>();
74 
75  gpu::ofp_context_t gpuContext;
76 
77  openfpm::sort((unsigned int *)input.template getDeviceBuffer<0>(),
78  (unsigned int *)input_id.template getDeviceBuffer<0>(),
79  input.size(),gpu::template less_t<unsigned int>(),gpuContext);
80 
81  input.template deviceToHost<0>();
82  input_id.template deviceToHost<0>();
83 
84  for (size_t i = 0 ; i < input.size() - 1 ; i++)
85  {
86  BOOST_REQUIRE(input.template get<0>(i) <= input.template get<0>(i+1));
87  }
88 
89  openfpm::sort((unsigned int *)input.template getDeviceBuffer<0>(),
90  (unsigned int *)input_id.template getDeviceBuffer<0>(),
91  input.size(),gpu::template greater_t<unsigned int>(),gpuContext);
92 
93  input.template deviceToHost<0>();
94  input_id.template deviceToHost<0>();
95 
96  for (size_t i = 0 ; i < input.size() - 1 ; i++)
97  {
98  BOOST_REQUIRE(input.template get<0>(i) >= input.template get<0>(i+1));
99  }
100 
101  std::cout << "End sort CUB" << "\n";
102 
103  // Test the cell list
104 }
105 
106 BOOST_AUTO_TEST_CASE( test_seg_reduce_wrapper )
107 {
108  std::cout << "Test gpu segmented reduce" << "\n";
109 
110  gpu::ofp_context_t gpuContext;
111 
112  int count = 130;
113 
115  openfpm::vector_gpu<aggregate<int>> segment_offset;
117  int init = 0;
118 
119  vgpu.resize(count);
120 
121  for (size_t i = 0 ; i < count ; i++)
122  {
123  vgpu.template get<0>(i) = ((float)rand() / (float)RAND_MAX) * 17;
124  }
125 
126  segment_offset.add();
127  segment_offset.template get<0>(0) = 0;
128  size_t base = 0;
129  while (1)
130  {
131  int c = ((float)rand() / (float)RAND_MAX) * 17;
132 
133  if (c + base >= count)
134  {break;}
135 
136  segment_offset.add();
137  segment_offset.template get<0>(segment_offset.size() - 1) = c + segment_offset.template get<0>(segment_offset.size() - 2);
138 
139  base += c;
140  }
141  segment_offset.add();
142  segment_offset.template get<0>(segment_offset.size() - 1) = vgpu.size();
143 
144  vgpu.hostToDevice<0>();
145 
146  segment_offset.hostToDevice<0>();
147  output.resize(segment_offset.size()-1);
148 
149  openfpm::segreduce((int *)vgpu.template getDeviceBuffer<0>(), vgpu.size(),
150  (int *)segment_offset.template getDeviceBuffer<0>(), segment_offset.size()-1,
151  (int *)output.template getDeviceBuffer<0>(),
152  gpu::plus_t<int>(), init, gpuContext);
153 
154 
155  output.template deviceToHost<0>();
156 
157  bool match = true;
158  size_t i = 0;
159  for ( ; i < segment_offset.size()-2 ; i++)
160  {
161  size_t red = 0;
162  for (size_t j = 0 ; j < segment_offset.template get<0>(i+1) - segment_offset.template get<0>(i) ; j++)
163  {
164  red += vgpu.template get<0>(segment_offset.template get<0>(i) + j);
165  }
166  match &= red == output.template get<0>(i);
167  }
168 
169  BOOST_REQUIRE_EQUAL(match,true);
170 
171  size_t red2 = 0;
172  for (size_t j = 0 ; j < segment_offset.template get<0>(i+1) - segment_offset.template get<0>(i) ; j++)
173  {
174  red2 += vgpu.template get<0>(segment_offset.template get<0>(i) + j);
175  }
176  match &= red2 == output.template get<0>(i);
177 
178  BOOST_REQUIRE_EQUAL(match,true);
179 
180  std::cout << "End test gpu seg reduce test" << "\n";
181 
182  // Test the cell list
183 }
184 
185 BOOST_AUTO_TEST_SUITE_END()
cub::init
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
Definition: dispatch_reduce.cuh:119
openfpm::vector::size
size_t size()
Stub size.
Definition: map_vector.hpp:212
openfpm::vector
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:203
gpu::plus_t
Definition: cudify_alpaka.hpp:180
gpu::ofp_context_t
Definition: ofp_context.hpp:302
red
temporal buffer for reductions
Definition: VCluster_base.hpp:81