OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
modern_gpu_tests.cu
1 #include "config.h"
2 #define BOOST_TEST_DYN_LINK
3 #include <boost/test/unit_test.hpp>
4 
5 #include "util/cuda_util.hpp"
6 #include "Vector/map_vector.hpp"
7 
8 #ifndef CUDA_ON_CPU
9 
10 #ifndef __HIP__
11 #include "util/cuda/moderngpu/kernel_load_balance.hxx"
12 #include "util/cuda/moderngpu/kernel_mergesort.hxx"
13 #include "util/cuda/moderngpu/kernel_reduce.hxx"
14 #include "util/cuda/moderngpu/kernel_segreduce.hxx"
15 
16 
17 BOOST_AUTO_TEST_SUITE( modern_gpu_tests )
18 
19 BOOST_AUTO_TEST_CASE( modern_gpu_loadbalance_lbs )
20 {
21  std::cout << "Test modern gpu test tansform_lbs" << "\n";
22 
23  mgpu::standard_context_t context(false);
24 
25  int count = 200030;
26  int spacing = 100;
27 
28  int num_segments = mgpu::div_up(count, spacing);
29  openfpm::vector_gpu<aggregate<int>> segments(num_segments);
30  for(int i = 0; i < num_segments; ++i)
31  {segments.template get<0>(i) = i * spacing;}
32 
34 
35  segments.template hostToDevice<0>();
36 
37  mgpu::load_balance_search(count, (int *)segments.template getDeviceBuffer<0>(), num_segments, (int *)lbs.template getDeviceBuffer<0>(),context);
38 
39  lbs.deviceToHost<0>();
40 
41  bool check = true;
42  for(size_t i = 0; i < lbs.size(); ++i)
43  {
44  check &= lbs.template get<0>(i) == i / spacing;
45  }
46 
47  BOOST_REQUIRE_EQUAL(check,true);
48 
49  std::cout << "End test modern gpu test tansform_lbs" << "\n";
50 
51  // Test the cell list
52 }
53 
54 BOOST_AUTO_TEST_CASE( modern_gpu_sort )
55 {
56  std::cout << "Test modern gpu test tansform_lbs" << "\n";
57 
58  mgpu::standard_context_t context(false);
59 
60  int count = 200030;
61 
64 
65  vgpu.resize(count);
66  gpu_ns.resize(count);
67 
68  for (size_t i = 0 ; i < count ; i++)
69  {
70  vgpu.template get<0>(i) = ((float)rand() / (float)RAND_MAX) * 17;
71  vgpu.template get<1>(i) = i;
72 
73  gpu_ns.template get<0>(i) = vgpu.template get<0>(i);
74  }
75 
76  vgpu.hostToDevice<0,1>();
77 
78  mergesort((unsigned int *)vgpu.getDeviceBuffer<0>(),(unsigned int *)vgpu.getDeviceBuffer<1>(), count, mgpu::less_t<unsigned int>(), context);
79 
80  vgpu.deviceToHost<0,1>();
81 
82  // print
83 
84  bool match = true;
85  for (int i = 0 ; i < count - 1 ; i++)
86  {
87  match &= vgpu.template get<0>(i) <= vgpu.template get<0>(i+1);
88  match &= gpu_ns.template get<0>(vgpu.template get<1>(i)) == vgpu.template get<0>(i);
89  }
90 
91  BOOST_REQUIRE_EQUAL(match,true);
92 
93  std::cout << "End test modern gpu test tansform_lbs" << "\n";
94 
95  // Test the cell list
96 }
97 
98 BOOST_AUTO_TEST_CASE( modern_gpu_reduce )
99 {
100  std::cout << "Test modern gpu reduce" << "\n";
101 
102  mgpu::standard_context_t context(false);
103 
104  int count = 200030;
105 
107 
108  vgpu.resize(count);
109 
110  for (size_t i = 0 ; i < count ; i++)
111  {
112  vgpu.template get<0>(i) = ((float)rand() / (float)RAND_MAX) * 17;
113  }
114 
115  vgpu.hostToDevice<0>();
116 
117  CudaMemory mem;
118  mem.allocate(sizeof(int));
119  mgpu::reduce((int *)vgpu.template getDeviceBuffer<0>(), count, (int *)mem.getDevicePointer(), mgpu::plus_t<int>(), context);
120 
121  mem.deviceToHost();
122  int red_p = *(int *)mem.getPointer();
123 
124  // print
125 
126  int red = 0;
127  for (int i = 0 ; i < count ; i++)
128  {
129  red += vgpu.template get<0>(i);
130  }
131 
132  BOOST_REQUIRE_EQUAL(red,red_p);
133 
134  std::cout << "End test modern gpu test reduce" << "\n";
135 
136  // Test the cell list
137 }
138 
139 
140 BOOST_AUTO_TEST_CASE( modern_gpu_seg_reduce )
141 {
142  std::cout << "Test modern gpu segmented reduce" << "\n";
143 
144  mgpu::standard_context_t context(false);
145 
146  int count = 130;
147 
149  openfpm::vector_gpu<aggregate<int>> segment_offset;
151  int init = 0;
152 
153  vgpu.resize(count);
154 
155  for (size_t i = 0 ; i < count ; i++)
156  {
157  vgpu.template get<0>(i) = ((float)rand() / (float)RAND_MAX) * 17;
158  }
159 
160  segment_offset.add();
161  segment_offset.template get<0>(0) = 0;
162  size_t base = 0;
163  while (1)
164  {
165  int c = ((float)rand() / (float)RAND_MAX) * 17;
166 
167  if (c + base >= count)
168  {break;}
169 
170  segment_offset.add();
171  segment_offset.template get<0>(segment_offset.size() - 1) = c + segment_offset.template get<0>(segment_offset.size() - 2);
172 
173  base += c;
174  }
175 
176  vgpu.hostToDevice<0>();
177  segment_offset.hostToDevice<0>();
178  output.resize(segment_offset.size());
179 
180  mgpu::segreduce((int *)vgpu.template getDeviceBuffer<0>(), vgpu.size(),
181  (int *)segment_offset.template getDeviceBuffer<0>(), segment_offset.size(),
182  (int *)output.template getDeviceBuffer<0>(),
183  mgpu::plus_t<int>(), init, context);
184 
185 
186  output.template deviceToHost<0>();
187 
188  bool match = true;
189  size_t i = 0;
190  for ( ; i < segment_offset.size()-1 ; i++)
191  {
192  size_t red = 0;
193  for (size_t j = 0 ; j < segment_offset.template get<0>(i+1) - segment_offset.template get<0>(i) ; j++)
194  {
195  red += vgpu.template get<0>(segment_offset.template get<0>(i) + j);
196  }
197  match &= red == output.template get<0>(i);
198  }
199 
200  BOOST_REQUIRE_EQUAL(match,true);
201 
202  size_t red2 = 0;
203  for (size_t j = 0 ; j < vgpu.size() - segment_offset.template get<0>(i) ; j++)
204  {
205  red2 += vgpu.template get<0>(segment_offset.template get<0>(i) + j);
206  }
207  match &= red2 == output.template get<0>(i);
208 
209  BOOST_REQUIRE_EQUAL(match,true);
210 
211  std::cout << "End test modern gpu test reduce" << "\n";
212 
213  // Test the cell list
214 }
215 
216 
217 BOOST_AUTO_TEST_SUITE_END()
218 
219 #endif
220 
221 #endif
222 
virtual bool allocate(size_t sz)
allocate memory
Definition: CudaMemory.cu:38
virtual void * getPointer()
get a readable pointer with the data
Definition: CudaMemory.cu:352
size_t size()
Stub size.
Definition: map_vector.hpp:211
virtual void * getDevicePointer()
get a readable pointer with the data
Definition: CudaMemory.cu:497
virtual void deviceToHost()
Move memory from device to host.
Definition: CudaMemory.cu:367
temporal buffer for reductions
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202