OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
cudify_unit_test.cu
1#include "config.h"
2#define BOOST_TEST_DYN_LINK
3#include <boost/test/unit_test.hpp>
4
5#include <type_traits>
6
7std::is_trivially_copyable<int> b;
8
9#include "util/cuda_launch.hpp"
10#include "memory/CudaMemory.cuh"
11
12#ifdef CUDIFY_USE_OPENMP
13BOOST_AUTO_TEST_SUITE( cudify_tests_openmp )
14#elif defined(CUDIFY_USE_CUDA)
15BOOST_AUTO_TEST_SUITE( cudify_tests_cuda )
16#elif defined(CUDIFY_USE_HIP)
17BOOST_AUTO_TEST_SUITE( cudify_tests_hip )
18#else
19BOOST_AUTO_TEST_SUITE( cudify_tests_sequential )
20#endif
21
23{
24 float * ptr;
25};
26
27struct ite_g
28{
29 dim3 wthr;
30 dim3 thr;
31
32 size_t nblocks()
33 {
34 return wthr.x * wthr.y * wthr.z;
35 }
36
37 size_t nthrs()
38 {
39 return thr.x * thr.y * thr.z;
40 }
41};
42
43template<typename T>
44__global__ void test1(float * array,T p)
45{
46 size_t idx_x = blockIdx.x * blockDim.x + threadIdx.x;
47 size_t idx_y = blockIdx.y * blockDim.y + threadIdx.y;
48 size_t idx_z = blockIdx.z * blockDim.z + threadIdx.z;
49
50 array[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 5.0;
51
52 p.ptr[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 17.0;
53}
54
55template<typename T>
56__global__ void test1_syncthreads(T p, float * array)
57{
58 __shared__ int cnt;
59
60 cnt = 0;
61
62 __syncthreads();
63
64 size_t idx_x = blockIdx.x * blockDim.x + threadIdx.x;
65 size_t idx_y = blockIdx.y * blockDim.y + threadIdx.y;
66 size_t idx_z = blockIdx.z * blockDim.z + threadIdx.z;
67
68 array[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 5.0;
69
70 p.ptr[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 17.0;
71
72 atomicAdd(&cnt,1);
73
74 __syncthreads();
75
76 array[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = cnt;
77
78 __syncthreads();
79
80 atomicAdd(&cnt,1);
81
82 __syncthreads();
83
84 p.ptr[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = cnt;
85}
86
87
88BOOST_AUTO_TEST_CASE( cudify_on_test_test )
89{
90 init_wrappers();
91
92 CudaMemory mem;
93 mem.allocate(16*16*16*sizeof(float));
94
95 CudaMemory mem2;
96 mem2.allocate(16*16*16*sizeof(float));
97
98 float * array_ptr = (float *)mem.getDevicePointer();
99
100 par_struct p;
101 p.ptr = (float *)mem2.getDevicePointer();
102
103 ite_g g;
104
105 g.wthr = dim3(4,4,4);
106 g.thr = dim3(4,4,4);
107
108 CUDA_LAUNCH(test1,g,array_ptr,p);
109
110 mem.deviceToHost();
111 mem2.deviceToHost();
112
113 float * ptr1 = (float *)mem.getPointer();
114 float * ptr2 = (float *)mem2.getPointer();
115
116 bool check = true;
117 for (int i = 0 ; i < 16*16*16; i++)
118 {
119 check &= ptr1[i] == 5.0;
120 check &= ptr2[i] == 17.0;
121 }
122
123 BOOST_REQUIRE_EQUAL(check,true);
124}
125
126BOOST_AUTO_TEST_CASE( cudify_on_test_test2)
127{
128 init_wrappers();
129
130 CudaMemory mem;
131 mem.allocate(16*16*16*sizeof(float));
132
133 CudaMemory mem2;
134 mem2.allocate(16*16*16*sizeof(float));
135
136 float * array_ptr = (float *)mem.getDevicePointer();
137
138 par_struct p;
139 p.ptr = (float *)mem2.getDevicePointer();
140
141 ite_g g;
142
143 g.wthr = dim3(4,4,4);
144 g.thr = dim3(4,4,4);
145
146 CUDA_LAUNCH(test1_syncthreads,g,p,array_ptr);
147
148 mem.deviceToHost();
149 mem2.deviceToHost();
150
151 float * ptr1 = (float *)mem.getPointer();
152 float * ptr2 = (float *)mem2.getPointer();
153
154 bool check = true;
155 for (int i = 0 ; i < 16*16*16; i++)
156 {
157 //std::cout << i << " " << ptr1[i] << " " << ptr2[i] << std::endl;
158
159 check &= ptr1[i] == 64.0;
160 check &= ptr2[i] == 128.0;
161 }
162
163 BOOST_REQUIRE_EQUAL(check,true);
164}
165
166
167BOOST_AUTO_TEST_CASE( cudify_on_test_test2_lambda)
168{
169 init_wrappers();
170
171 CudaMemory mem;
172 mem.allocate(16*16*16*sizeof(float));
173
174 CudaMemory mem2;
175 mem2.allocate(16*16*16*sizeof(float));
176
177 float * array_ptr = (float *)mem.getDevicePointer();
178
179 par_struct p;
180 p.ptr = (float *)mem2.getDevicePointer();
181
182 ite_g g;
183
184 float * array = array_ptr;
185
186 g.wthr = dim3(4,4,4);
187 g.thr = dim3(4,4,4);
188
189 auto lambda_f = [array,p] __device__ (dim3 & blockIdx, dim3 & threadIdx){
190 __shared__ int cnt;
191
192 cnt = 0;
193
194 __syncthreads();
195
196 size_t idx_x = blockIdx.x * blockDim.x + threadIdx.x;
197 size_t idx_y = blockIdx.y * blockDim.y + threadIdx.y;
198 size_t idx_z = blockIdx.z * blockDim.z + threadIdx.z;
199
200 array[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 5.0;
201
202 p.ptr[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 17.0;
203
204 atomicAdd(&cnt,1);
205
206 __syncthreads();
207
208 array[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = cnt;
209
210 __syncthreads();
211
212 atomicAdd(&cnt,1);
213
214 __syncthreads();
215
216 p.ptr[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = cnt;
217 };
218
219 CUDA_LAUNCH_LAMBDA(g, lambda_f);
220
221 mem.deviceToHost();
222 mem2.deviceToHost();
223
224 float * ptr1 = (float *)mem.getPointer();
225 float * ptr2 = (float *)mem2.getPointer();
226
227 bool check = true;
228 for (int i = 0 ; i < 16*16*16; i++)
229 {
230 //std::cout << i << " " << ptr1[i] << " " << ptr2[i] << std::endl;
231
232 check &= ptr1[i] == 64.0;
233 check &= ptr2[i] == 128.0;
234 }
235
236 BOOST_REQUIRE_EQUAL(check,true);
237}
238
239BOOST_AUTO_TEST_CASE( cudify_type_chack)
240{
241 float3 f3;
242
243 f3.x = 0.0;
244 f3.y = 1.0;
245 f3.z = 2.0;
246
247 BOOST_REQUIRE_EQUAL(f3.x,0.0);
248 BOOST_REQUIRE_EQUAL(f3.y,1.0);
249 BOOST_REQUIRE_EQUAL(f3.z,2.0);
250
251 float4 f4 = make_float4(0.0,1.0,2.0,3.0);
252
253
254 BOOST_REQUIRE_EQUAL(f4.x,0.0);
255 BOOST_REQUIRE_EQUAL(f4.y,1.0);
256 BOOST_REQUIRE_EQUAL(f4.z,2.0);
257 BOOST_REQUIRE_EQUAL(f4.w,3.0);
258}
259
260BOOST_AUTO_TEST_SUITE_END()
261
virtual void * getDevicePointer()
get a readable pointer with the data
virtual void deviceToHost()
Move memory from device to host.
virtual void * getPointer()
get a readable pointer with the data
virtual bool allocate(size_t sz)
allocate memory
Definition CudaMemory.cu:38