2#define BOOST_TEST_DYN_LINK
3#include <boost/test/unit_test.hpp>
7std::is_trivially_copyable<int> b;
9#include "util/cuda_launch.hpp"
10#include "memory/CudaMemory.cuh"
12#ifdef CUDIFY_USE_OPENMP
13BOOST_AUTO_TEST_SUITE( cudify_tests_openmp )
14#elif defined(CUDIFY_USE_CUDA)
15BOOST_AUTO_TEST_SUITE( cudify_tests_cuda )
16#elif defined(CUDIFY_USE_HIP)
17BOOST_AUTO_TEST_SUITE( cudify_tests_hip )
19BOOST_AUTO_TEST_SUITE( cudify_tests_sequential )
34 return wthr.x * wthr.y * wthr.z;
39 return thr.x * thr.y * thr.z;
44__global__
void test1(
float * array,T p)
46 size_t idx_x = blockIdx.x * blockDim.x + threadIdx.x;
47 size_t idx_y = blockIdx.y * blockDim.y + threadIdx.y;
48 size_t idx_z = blockIdx.z * blockDim.z + threadIdx.z;
50 array[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 5.0;
52 p.ptr[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 17.0;
56__global__
void test1_syncthreads(T p,
float * array)
64 size_t idx_x = blockIdx.x * blockDim.x + threadIdx.x;
65 size_t idx_y = blockIdx.y * blockDim.y + threadIdx.y;
66 size_t idx_z = blockIdx.z * blockDim.z + threadIdx.z;
68 array[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 5.0;
70 p.ptr[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 17.0;
76 array[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = cnt;
84 p.ptr[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = cnt;
88BOOST_AUTO_TEST_CASE( cudify_on_test_test )
93 mem.
allocate(16*16*16*
sizeof(
float));
96 mem2.
allocate(16*16*16*
sizeof(
float));
105 g.wthr = dim3(4,4,4);
108 CUDA_LAUNCH(test1,g,array_ptr,p);
117 for (
int i = 0 ; i < 16*16*16; i++)
119 check &= ptr1[i] == 5.0;
120 check &= ptr2[i] == 17.0;
123 BOOST_REQUIRE_EQUAL(check,
true);
126BOOST_AUTO_TEST_CASE( cudify_on_test_test2)
131 mem.
allocate(16*16*16*
sizeof(
float));
134 mem2.
allocate(16*16*16*
sizeof(
float));
143 g.wthr = dim3(4,4,4);
146 CUDA_LAUNCH(test1_syncthreads,g,p,array_ptr);
155 for (
int i = 0 ; i < 16*16*16; i++)
159 check &= ptr1[i] == 64.0;
160 check &= ptr2[i] == 128.0;
163 BOOST_REQUIRE_EQUAL(check,
true);
167BOOST_AUTO_TEST_CASE( cudify_on_test_test2_lambda)
172 mem.
allocate(16*16*16*
sizeof(
float));
175 mem2.
allocate(16*16*16*
sizeof(
float));
184 float * array = array_ptr;
186 g.wthr = dim3(4,4,4);
189 auto lambda_f = [array,p] __device__ (dim3 & blockIdx, dim3 & threadIdx){
196 size_t idx_x = blockIdx.x * blockDim.x + threadIdx.x;
197 size_t idx_y = blockIdx.y * blockDim.y + threadIdx.y;
198 size_t idx_z = blockIdx.z * blockDim.z + threadIdx.z;
200 array[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 5.0;
202 p.ptr[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = 17.0;
208 array[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = cnt;
216 p.ptr[idx_z*gridDim.x*gridDim.y*blockDim.x*blockDim.y + idx_y*gridDim.x*blockDim.x + idx_x] = cnt;
219 CUDA_LAUNCH_LAMBDA(g, lambda_f);
228 for (
int i = 0 ; i < 16*16*16; i++)
232 check &= ptr1[i] == 64.0;
233 check &= ptr2[i] == 128.0;
236 BOOST_REQUIRE_EQUAL(check,
true);
239BOOST_AUTO_TEST_CASE( cudify_type_chack)
247 BOOST_REQUIRE_EQUAL(f3.x,0.0);
248 BOOST_REQUIRE_EQUAL(f3.y,1.0);
249 BOOST_REQUIRE_EQUAL(f3.z,2.0);
251 float4 f4 = make_float4(0.0,1.0,2.0,3.0);
254 BOOST_REQUIRE_EQUAL(f4.x,0.0);
255 BOOST_REQUIRE_EQUAL(f4.y,1.0);
256 BOOST_REQUIRE_EQUAL(f4.z,2.0);
257 BOOST_REQUIRE_EQUAL(f4.w,3.0);
260BOOST_AUTO_TEST_SUITE_END()
virtual void * getDevicePointer()
get a readable pointer with the data
virtual void deviceToHost()
Move memory from device to host.
virtual void * getPointer()
get a readable pointer with the data
virtual bool allocate(size_t sz)
allocate memory