OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
scan_ofp.cuh
1/*
2 * scan_ofp.hpp
3 *
4 * Created on: May 15, 2019
5 * Author: i-bird
6 */
7
8#ifndef SCAN_OFP_HPP_
9#define SCAN_OFP_HPP_
10
11#ifdef __NVCC__
12
13#include "util/cuda_launch.hpp"
14#include "util/ofp_context.hpp"
15
16#if CUDART_VERSION >= 11000
17 // Here we have for sure CUDA >= 11
18 #ifndef CUDA_ON_CPU
19 #ifdef __HIP__
20 #include "hipcub/hipcub.hpp"
21 #else
22 #include "cub/cub.cuh"
23 #endif
24 #endif
25#else
26 #include "cub_old/cub.cuh"
27#endif
28
29
30namespace openfpm
31{
32 template<typename input_it, typename output_it>
33 void scan(input_it input, int count, output_it output, gpu::ofp_context_t& context)
34 {
35#ifdef CUDA_ON_CPU
36
37 if (count == 0) {return;}
38
39 auto prec = input[0];
40 output[0] = 0;
41 for (int i = 1 ; i < count ; i++)
42 {
43 auto next = prec + output[i-1];
44 prec = input[i];
45 output[i] = next;
46 }
47
48#else
49 if (count == 0) return;
50
51 #ifdef __HIP__
52
53 size_t temp_storage_bytes = 0;
54 hipcub::DeviceScan::ExclusiveSum(NULL,
55 temp_storage_bytes,input, output, count);
56
57 auto & temporal = context.getTemporalCUB();
58 temporal.resize(temp_storage_bytes);
59
60 hipcub::DeviceScan::ExclusiveSum(temporal.template getDeviceBuffer<0>(),
61 temp_storage_bytes, input, output, count);
62
63 #else
64
65 size_t temp_storage_bytes = 0;
67 temp_storage_bytes, input, output, count);
68
69 auto & temporal = context.getTemporalCUB();
70 temporal.resize(temp_storage_bytes);
71
72 cub::DeviceScan::ExclusiveSum(temporal.template getDeviceBuffer<0>(),
73 temp_storage_bytes,input, output, count);
74
75 #endif
76#endif
77 }
78}
79
80#endif /* __NVCC__ */
81
82#endif /* SCAN_OFP_HPP_ */
convert a type into constant type
static CUB_RUNTIME_FUNCTION cudaError_t ExclusiveSum(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Computes a device-wide exclusive prefix sum. The value of 0 is applied as the initial value,...