template<typename InputIteratorT, typename OutputIteratorT, typename OffsetIteratorT, typename OffsetT, typename ReductionOpT>
struct cub::DispatchSegmentedReduce< InputIteratorT, OutputIteratorT, OffsetIteratorT, OffsetT, ReductionOpT >
< Binary reduction functor type having member T operator()(const T &a, const T &b)
Utility class for dispatching the appropriately-tuned kernels for device-wide reduction
Definition at line 681 of file dispatch_reduce.cuh.
|
CUB_RUNTIME_FUNCTION __forceinline__ | DispatchSegmentedReduce (void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, OffsetT num_segments, OffsetIteratorT d_begin_offsets, OffsetIteratorT d_end_offsets, ReductionOpT reduction_op, OutputT init, cudaStream_t stream, bool debug_synchronous, int ptx_version) |
| Constructor.
|
|
template<typename ActivePolicyT , typename DeviceSegmentedReduceKernelT > |
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | InvokePasses (DeviceSegmentedReduceKernelT segmented_reduce_kernel) |
| Invocation. More...
|
|
template<typename ActivePolicyT > |
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | Invoke () |
| Invocation.
|
|
|
void * | d_temp_storage |
| [in] Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done.
|
|
size_t & | temp_storage_bytes |
| [in,out] Reference to size in bytes of d_temp_storage allocation
|
|
InputIteratorT | d_in |
| [in] Pointer to the input sequence of data items
|
|
OutputIteratorT | d_out |
| [out] Pointer to the output aggregate
|
|
OffsetT | num_segments |
| [in] The number of segments that comprise the sorting data
|
|
OffsetIteratorT | d_begin_offsets |
| [in] Pointer to the sequence of beginning offsets of length num_segments , such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_*
|
|
OffsetIteratorT | d_end_offsets |
| [in] Pointer to the sequence of ending offsets of length num_segments , such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_* . If d_end_offsets[i]-1 <= d_begin_offsets[i] , the ith is considered empty.
|
|
ReductionOpT | reduction_op |
| [in] Binary reduction functor
|
|
OutputT | init |
| [in] The initial value of the reduction
|
|
cudaStream_t | stream |
| [in] CUDA stream to launch kernels within. Default is stream0.
|
|
bool | debug_synchronous |
| [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is false .
|
|
int | ptx_version |
| [in] PTX version
|
|
template<typename InputIteratorT , typename OutputIteratorT , typename OffsetIteratorT , typename OffsetT , typename ReductionOpT >
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t cub::DispatchSegmentedReduce< InputIteratorT, OutputIteratorT, OffsetIteratorT, OffsetT, ReductionOpT >::Dispatch |
( |
void * |
d_temp_storage, |
|
|
size_t & |
temp_storage_bytes, |
|
|
InputIteratorT |
d_in, |
|
|
OutputIteratorT |
d_out, |
|
|
int |
num_segments, |
|
|
OffsetIteratorT |
d_begin_offsets, |
|
|
OffsetIteratorT |
d_end_offsets, |
|
|
ReductionOpT |
reduction_op, |
|
|
OutputT |
init, |
|
|
cudaStream_t |
stream, |
|
|
bool |
debug_synchronous |
|
) |
| |
|
inlinestatic |
Internal dispatch routine for computing a device-wide reduction
- Parameters
-
[in] | d_temp_storage | Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done. |
[in,out] | temp_storage_bytes | Reference to size in bytes of d_temp_storage allocation |
[in] | d_in | Pointer to the input sequence of data items |
[out] | d_out | Pointer to the output aggregate |
[in] | num_segments | The number of segments that comprise the sorting data |
[in] | d_begin_offsets | Pointer to the sequence of beginning offsets of length num_segments , such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* |
[in] | d_end_offsets | Pointer to the sequence of ending offsets of length num_segments , such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_* . If d_end_offsets[i]-1 <= d_begin_offsets[i] , the ith is considered empty. |
[in] | reduction_op | Binary reduction functor |
[in] | init | The initial value of the reduction |
[in] | stream | [optional] CUDA stream to launch kernels within. Default is stream0. |
[in] | debug_synchronous | [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is false . |
Definition at line 835 of file dispatch_reduce.cuh.
template<typename InputIteratorT , typename OutputIteratorT , typename OffsetIteratorT , typename OffsetT , typename ReductionOpT >
template<typename ActivePolicyT , typename DeviceSegmentedReduceKernelT >
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::DispatchSegmentedReduce< InputIteratorT, OutputIteratorT, OffsetIteratorT, OffsetT, ReductionOpT >::InvokePasses |
( |
DeviceSegmentedReduceKernelT |
segmented_reduce_kernel | ) |
|
|
inline |
Invocation.
< Function type of cub::DeviceSegmentedReduceKernel
- Parameters
-
[in] | segmented_reduce_kernel | Kernel function pointer to parameterization of cub::DeviceSegmentedReduceKernel |
Definition at line 759 of file dispatch_reduce.cuh.