2 #define BOOST_TEST_DYN_LINK
3 #include <boost/test/unit_test.hpp>
5 #include "util/cuda_util.hpp"
6 #include "memory/CudaMemory.cuh"
8 #ifdef CUDIFY_USE_OPENMP
9 BOOST_AUTO_TEST_SUITE( cudify_tests_openmp )
10 #elif defined(CUDIFY_USE_CUDA)
11 BOOST_AUTO_TEST_SUITE( cudify_tests_cuda )
12 #elif defined(CUDIFY_USE_HIP)
13 BOOST_AUTO_TEST_SUITE( cudify_tests_hip )
15 BOOST_AUTO_TEST_SUITE( cudify_tests_sequential )
28 size_t nblocks()
const
30 return wthr.x * wthr.y * wthr.z;
35 return thr.x * thr.y * thr.z;
40 __global__
void test1(
float * array,T p)
42 size_t idx_x = blockIdx.x * blockDim.x + threadIdx.x;
43 size_t idx_y = blockIdx.y * blockDim.y + threadIdx.y;
44 size_t idx_z = blockIdx.z * blockDim.z + threadIdx.z;
46 array[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 5.0;
48 p.ptr[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 17.0;
52 __global__
void test1_syncthreads(T p,
float * array)
60 size_t idx_x = blockIdx.x * blockDim.x + threadIdx.x;
61 size_t idx_y = blockIdx.y * blockDim.y + threadIdx.y;
62 size_t idx_z = blockIdx.z * blockDim.z + threadIdx.z;
64 array[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 5.0;
66 p.ptr[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 17.0;
72 array[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = cnt;
80 p.ptr[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = cnt;
84 BOOST_AUTO_TEST_CASE( cudify_on_test_test )
89 mem.
allocate(16*16*16*
sizeof(
float));
92 mem2.
allocate(16*16*16*
sizeof(
float));
101 g.wthr = dim3(4,4,4);
104 CUDA_LAUNCH(test1,g,array_ptr,p);
113 for (
int i = 0 ; i < 16*16*16; i++)
115 check &= ptr1[i] == 5.0;
116 check &= ptr2[i] == 17.0;
119 BOOST_REQUIRE_EQUAL(check,
true);
122 BOOST_AUTO_TEST_CASE( cudify_on_test_test2)
127 mem.
allocate(16*16*16*
sizeof(
float));
130 mem2.
allocate(16*16*16*
sizeof(
float));
139 g.wthr = dim3(4,4,4);
142 CUDA_LAUNCH(test1_syncthreads,g,p,array_ptr);
151 for (
int i = 0 ; i < 16*16*16; i++)
155 check &= ptr1[i] == 64.0;
156 check &= ptr2[i] == 128.0;
159 BOOST_REQUIRE_EQUAL(check,
true);
163 BOOST_AUTO_TEST_CASE( cudify_on_test_test2_lambda)
168 mem.
allocate(16*16*16*
sizeof(
float));
171 mem2.
allocate(16*16*16*
sizeof(
float));
180 float * array = array_ptr;
182 g.wthr = dim3(4,4,4);
185 auto lambda_f = [array,p] __device__ (dim3 & blockIdx, dim3 & threadIdx){
192 size_t idx_x = blockIdx.x * blockDim.x + threadIdx.x;
193 size_t idx_y = blockIdx.y * blockDim.y + threadIdx.y;
194 size_t idx_z = blockIdx.z * blockDim.z + threadIdx.z;
196 array[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 5.0;
198 p.ptr[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 17.0;
204 array[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = cnt;
212 p.ptr[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = cnt;
215 CUDA_LAUNCH_LAMBDA(g, lambda_f);
224 for (
int i = 0 ; i < 16*16*16; i++)
228 check &= ptr1[i] == 64.0;
229 check &= ptr2[i] == 128.0;
232 BOOST_REQUIRE_EQUAL(check,
true);
235 BOOST_AUTO_TEST_CASE( cudify_type_chack)
243 BOOST_REQUIRE_EQUAL(f3.x,0.0);
244 BOOST_REQUIRE_EQUAL(f3.y,1.0);
245 BOOST_REQUIRE_EQUAL(f3.z,2.0);
247 float4 f4 = make_float4(0.0,1.0,2.0,3.0);
250 BOOST_REQUIRE_EQUAL(f4.x,0.0);
251 BOOST_REQUIRE_EQUAL(f4.y,1.0);
252 BOOST_REQUIRE_EQUAL(f4.z,2.0);
253 BOOST_REQUIRE_EQUAL(f4.w,3.0);
256 BOOST_AUTO_TEST_SUITE_END()