< Binary reduction functor type having member T operator()(const T &a, const T &b)
More...
< Binary reduction functor type having member T operator()(const T &a, const T &b)
Utility class for dispatching the appropriately-tuned kernels for device-wide reduction
Definition at line 681 of file dispatch_reduce.cuh.
Public Types | |
typedef If<(Equals< typenamestd::iterator_traits< OutputIteratorT >::value_type, void >::VALUE), typenamestd::iterator_traits< InputIteratorT >::value_type, typenamestd::iterator_traits< OutputIteratorT >::value_type >::Type | OutputT |
The output value type. | |
Public Types inherited from cub::DeviceReducePolicy< std::iterator_traits< InputIteratorT >::value_type, OffsetT, ReductionOpT > | |
typedef Policy600 | MaxPolicy |
MaxPolicy. | |
Public Member Functions | |
CUB_RUNTIME_FUNCTION __forceinline__ | DispatchSegmentedReduce (void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, OffsetT num_segments, OffsetIteratorT d_begin_offsets, OffsetIteratorT d_end_offsets, ReductionOpT reduction_op, OutputT init, cudaStream_t stream, bool debug_synchronous, int ptx_version) |
Constructor. | |
template<typename ActivePolicyT , typename DeviceSegmentedReduceKernelT > | |
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | InvokePasses (DeviceSegmentedReduceKernelT segmented_reduce_kernel) |
Invocation. | |
template<typename ActivePolicyT > | |
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | Invoke () |
Invocation. | |
Static Public Member Functions | |
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t | Dispatch (void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_segments, OffsetIteratorT d_begin_offsets, OffsetIteratorT d_end_offsets, ReductionOpT reduction_op, OutputT init, cudaStream_t stream, bool debug_synchronous) |
Data Fields | |
void * | d_temp_storage |
[in] Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done. | |
size_t & | temp_storage_bytes |
[in,out] Reference to size in bytes of d_temp_storage allocation | |
InputIteratorT | d_in |
[in] Pointer to the input sequence of data items | |
OutputIteratorT | d_out |
[out] Pointer to the output aggregate | |
OffsetT | num_segments |
[in] The number of segments that comprise the sorting data | |
OffsetIteratorT | d_begin_offsets |
[in] Pointer to the sequence of beginning offsets of length num_segments , such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* | |
OffsetIteratorT | d_end_offsets |
[in] Pointer to the sequence of ending offsets of length num_segments , such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_* . If d_end_offsets[i]-1 <= d_begin_offsets[i] , the ith is considered empty. | |
ReductionOpT | reduction_op |
[in] Binary reduction functor | |
OutputT | init |
[in] The initial value of the reduction | |
cudaStream_t | stream |
[in] CUDA stream to launch kernels within. Default is stream0. | |
bool | debug_synchronous |
[in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is false . | |
int | ptx_version |
[in] PTX version | |
typedef If<(Equals<typenamestd::iterator_traits<OutputIteratorT>::value_type,void>::VALUE),typenamestd::iterator_traits<InputIteratorT>::value_type,typenamestd::iterator_traits<OutputIteratorT>::value_type>::Type cub::DispatchSegmentedReduce< InputIteratorT, OutputIteratorT, OffsetIteratorT, OffsetT, ReductionOpT >::OutputT |
The output value type.
Definition at line 694 of file dispatch_reduce.cuh.
|
inline |
Constructor.
Definition at line 720 of file dispatch_reduce.cuh.
|
inlinestatic |
Internal dispatch routine for computing a device-wide reduction
[in] | d_temp_storage | Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done. |
[in,out] | temp_storage_bytes | Reference to size in bytes of d_temp_storage allocation |
[in] | d_in | Pointer to the input sequence of data items |
[out] | d_out | Pointer to the output aggregate |
[in] | num_segments | The number of segments that comprise the sorting data |
[in] | d_begin_offsets | Pointer to the sequence of beginning offsets of length num_segments , such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* |
[in] | d_end_offsets | Pointer to the sequence of ending offsets of length num_segments , such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_* . If d_end_offsets[i]-1 <= d_begin_offsets[i] , the ith is considered empty. |
[in] | reduction_op | Binary reduction functor |
[in] | init | The initial value of the reduction |
[in] | stream | [optional] CUDA stream to launch kernels within. Default is stream0. |
[in] | debug_synchronous | [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is false . |
Definition at line 835 of file dispatch_reduce.cuh.
|
inline |
Invocation.
Definition at line 817 of file dispatch_reduce.cuh.
|
inline |
Invocation.
< Function type of cub::DeviceSegmentedReduceKernel
[in] | segmented_reduce_kernel | Kernel function pointer to parameterization of cub::DeviceSegmentedReduceKernel |
Definition at line 759 of file dispatch_reduce.cuh.
OffsetIteratorT cub::DispatchSegmentedReduce< InputIteratorT, OutputIteratorT, OffsetIteratorT, OffsetT, ReductionOpT >::d_begin_offsets |
[in] Pointer to the sequence of beginning offsets of length num_segments
, such that d_begin_offsets[i]
is the first element of the ith data segment in d_keys_*
and d_values_*
Definition at line 706 of file dispatch_reduce.cuh.
OffsetIteratorT cub::DispatchSegmentedReduce< InputIteratorT, OutputIteratorT, OffsetIteratorT, OffsetT, ReductionOpT >::d_end_offsets |
[in] Pointer to the sequence of ending offsets of length num_segments
, such that d_end_offsets[i]-1
is the last element of the ith data segment in d_keys_*
and d_values_*
. If d_end_offsets[i]-1
<= d_begin_offsets[i]
, the ith is considered empty.
Definition at line 707 of file dispatch_reduce.cuh.
InputIteratorT cub::DispatchSegmentedReduce< InputIteratorT, OutputIteratorT, OffsetIteratorT, OffsetT, ReductionOpT >::d_in |
[in] Pointer to the input sequence of data items
Definition at line 703 of file dispatch_reduce.cuh.
OutputIteratorT cub::DispatchSegmentedReduce< InputIteratorT, OutputIteratorT, OffsetIteratorT, OffsetT, ReductionOpT >::d_out |
[out] Pointer to the output aggregate
Definition at line 704 of file dispatch_reduce.cuh.
void* cub::DispatchSegmentedReduce< InputIteratorT, OutputIteratorT, OffsetIteratorT, OffsetT, ReductionOpT >::d_temp_storage |
[in] Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes
and no work is done.
Definition at line 701 of file dispatch_reduce.cuh.
bool cub::DispatchSegmentedReduce< InputIteratorT, OutputIteratorT, OffsetIteratorT, OffsetT, ReductionOpT >::debug_synchronous |
[in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is false
.
Definition at line 711 of file dispatch_reduce.cuh.
OutputT cub::DispatchSegmentedReduce< InputIteratorT, OutputIteratorT, OffsetIteratorT, OffsetT, ReductionOpT >::init |
[in] The initial value of the reduction
Definition at line 709 of file dispatch_reduce.cuh.
OffsetT cub::DispatchSegmentedReduce< InputIteratorT, OutputIteratorT, OffsetIteratorT, OffsetT, ReductionOpT >::num_segments |
[in] The number of segments that comprise the sorting data
Definition at line 705 of file dispatch_reduce.cuh.
int cub::DispatchSegmentedReduce< InputIteratorT, OutputIteratorT, OffsetIteratorT, OffsetT, ReductionOpT >::ptx_version |
[in] PTX version
Definition at line 712 of file dispatch_reduce.cuh.
ReductionOpT cub::DispatchSegmentedReduce< InputIteratorT, OutputIteratorT, OffsetIteratorT, OffsetT, ReductionOpT >::reduction_op |
[in] Binary reduction functor
Definition at line 708 of file dispatch_reduce.cuh.
cudaStream_t cub::DispatchSegmentedReduce< InputIteratorT, OutputIteratorT, OffsetIteratorT, OffsetT, ReductionOpT >::stream |
[in] CUDA stream to launch kernels within. Default is stream0.
Definition at line 710 of file dispatch_reduce.cuh.
size_t& cub::DispatchSegmentedReduce< InputIteratorT, OutputIteratorT, OffsetIteratorT, OffsetT, ReductionOpT >::temp_storage_bytes |
[in,out] Reference to size in bytes of d_temp_storage
allocation
Definition at line 702 of file dispatch_reduce.cuh.