OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
cub_gpu_tests.cu
1/*
2 * cub_gpu_tests.cu
3 *
4 * Created on: May 15, 2019
5 * Author: i-bird
6 */
7
8#ifndef CUB_GPU_TESTS_CU_
9#define CUB_GPU_TESTS_CU_
10
11#include "config.h"
12#define BOOST_TEST_DYN_LINK
13#include <boost/test/unit_test.hpp>
14#include "cub/cub.cuh"
15#include "Vector/map_vector.hpp"
16
17BOOST_AUTO_TEST_SUITE( cub_gpu_tests )
18
19BOOST_AUTO_TEST_CASE( cub_gpu_scan_test )
20{
23
25
26 input.resize(10000);
27 output.resize(10000);
28
29 // fill input
30
31 for (size_t i = 0 ; i < 10000; i++)
32 {
33 input.template get<0>(i) = 10.0*(float)rand() / RAND_MAX;
34 }
35
36 input.template hostToDevice<0>();
37
38 void *d_temp_storage = NULL;
39 size_t temp_storage_bytes = 0;
40 cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes,(unsigned int *)input.template getDeviceBuffer<0>(),
41 (unsigned int *)output.template getDeviceBuffer<0>(),
42 input.size());
43
44 temporal.resize(temp_storage_bytes);
45
46 // Run
47 cub::DeviceScan::ExclusiveSum(temporal.template getDeviceBuffer<0>(), temp_storage_bytes,(unsigned int *)input.template getDeviceBuffer<0>(),
48 (unsigned int *)output.template getDeviceBuffer<0>(),
49 input.size());
50
51 // Check
52
53 output.template deviceToHost<0>();
54
55 size_t cnt = 0;
56 for (size_t i = 0 ; i < input.size() ; i++)
57 {
58 BOOST_REQUIRE_EQUAL(cnt,output.template get<0>(i));
59 cnt += input.template get<0>(i);
60 }
61}
62
63
64BOOST_AUTO_TEST_SUITE_END()
65
66#endif /* CUB_GPU_TESTS_CU_ */
Implementation of 1-D std::vector like structure.
size_t size()
Stub size.
static CUB_RUNTIME_FUNCTION cudaError_t ExclusiveSum(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Computes a device-wide exclusive prefix sum. The value of 0 is applied as the initial value,...