< Signed integer type for global offsets More...
< Signed integer type for global offsets
Utility class for dispatching the appropriately-tuned kernels for DeviceReduceByKey
Definition at line 126 of file dispatch_reduce_by_key.cuh.
Data Structures | |
struct | KernelConfig |
struct | Policy110 |
SM11. More... | |
struct | Policy130 |
SM13. More... | |
struct | Policy200 |
SM20. More... | |
struct | Policy300 |
SM30. More... | |
struct | Policy350 |
SM35. More... | |
struct | PtxReduceByKeyPolicy |
Public Types | |
enum | { INIT_KERNEL_THREADS = 128 , MAX_INPUT_BYTES = CUB_MAX(sizeof(KeyOutputT), sizeof(ValueOutputT)) , COMBINED_INPUT_BYTES = sizeof(KeyOutputT) + sizeof(ValueOutputT) } |
typedef std::iterator_traits< KeysInputIteratorT >::value_type | KeyInputT |
typedef If<(Equals< typenamestd::iterator_traits< UniqueOutputIteratorT >::value_type, void >::VALUE), typenamestd::iterator_traits< KeysInputIteratorT >::value_type, typenamestd::iterator_traits< UniqueOutputIteratorT >::value_type >::Type | KeyOutputT |
typedef std::iterator_traits< ValuesInputIteratorT >::value_type | ValueInputT |
typedef If<(Equals< typenamestd::iterator_traits< AggregatesOutputIteratorT >::value_type, void >::VALUE), typenamestd::iterator_traits< ValuesInputIteratorT >::value_type, typenamestd::iterator_traits< AggregatesOutputIteratorT >::value_type >::Type | ValueOutputT |
typedef ReduceByKeyScanTileState< ValueOutputT, OffsetT > | ScanTileStateT |
typedef Policy110 | PtxPolicy |
Static Public Member Functions | |
template<typename KernelConfig > | |
CUB_RUNTIME_FUNCTION static __forceinline__ void | InitConfigs (int ptx_version, KernelConfig &reduce_by_key_config) |
template<typename ScanInitKernelT , typename ReduceByKeyKernelT > | |
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t | Dispatch (void *d_temp_storage, size_t &temp_storage_bytes, KeysInputIteratorT d_keys_in, UniqueOutputIteratorT d_unique_out, ValuesInputIteratorT d_values_in, AggregatesOutputIteratorT d_aggregates_out, NumRunsOutputIteratorT d_num_runs_out, EqualityOpT equality_op, ReductionOpT reduction_op, OffsetT num_items, cudaStream_t stream, bool debug_synchronous, int, ScanInitKernelT init_kernel, ReduceByKeyKernelT reduce_by_key_kernel, KernelConfig reduce_by_key_config) |
< Function type of cub::DeviceReduceByKeyKernelT | |
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t | Dispatch (void *d_temp_storage, size_t &temp_storage_bytes, KeysInputIteratorT d_keys_in, UniqueOutputIteratorT d_unique_out, ValuesInputIteratorT d_values_in, AggregatesOutputIteratorT d_aggregates_out, NumRunsOutputIteratorT d_num_runs_out, EqualityOpT equality_op, ReductionOpT reduction_op, OffsetT num_items, cudaStream_t stream, bool debug_synchronous) |
typedef std::iterator_traits<KeysInputIteratorT>::value_type cub::DispatchReduceByKey< KeysInputIteratorT, UniqueOutputIteratorT, ValuesInputIteratorT, AggregatesOutputIteratorT, NumRunsOutputIteratorT, EqualityOpT, ReductionOpT, OffsetT >::KeyInputT |
Definition at line 133 of file dispatch_reduce_by_key.cuh.
typedef If<(Equals<typenamestd::iterator_traits<UniqueOutputIteratorT>::value_type,void>::VALUE),typenamestd::iterator_traits<KeysInputIteratorT>::value_type,typenamestd::iterator_traits<UniqueOutputIteratorT>::value_type>::Type cub::DispatchReduceByKey< KeysInputIteratorT, UniqueOutputIteratorT, ValuesInputIteratorT, AggregatesOutputIteratorT, NumRunsOutputIteratorT, EqualityOpT, ReductionOpT, OffsetT >::KeyOutputT |
Definition at line 138 of file dispatch_reduce_by_key.cuh.
typedef Policy110 cub::DispatchReduceByKey< KeysInputIteratorT, UniqueOutputIteratorT, ValuesInputIteratorT, AggregatesOutputIteratorT, NumRunsOutputIteratorT, EqualityOpT, ReductionOpT, OffsetT >::PtxPolicy |
Definition at line 266 of file dispatch_reduce_by_key.cuh.
typedef ReduceByKeyScanTileState<ValueOutputT, OffsetT> cub::DispatchReduceByKey< KeysInputIteratorT, UniqueOutputIteratorT, ValuesInputIteratorT, AggregatesOutputIteratorT, NumRunsOutputIteratorT, EqualityOpT, ReductionOpT, OffsetT >::ScanTileStateT |
Definition at line 156 of file dispatch_reduce_by_key.cuh.
typedef std::iterator_traits<ValuesInputIteratorT>::value_type cub::DispatchReduceByKey< KeysInputIteratorT, UniqueOutputIteratorT, ValuesInputIteratorT, AggregatesOutputIteratorT, NumRunsOutputIteratorT, EqualityOpT, ReductionOpT, OffsetT >::ValueInputT |
Definition at line 141 of file dispatch_reduce_by_key.cuh.
typedef If<(Equals<typenamestd::iterator_traits<AggregatesOutputIteratorT>::value_type,void>::VALUE),typenamestd::iterator_traits<ValuesInputIteratorT>::value_type,typenamestd::iterator_traits<AggregatesOutputIteratorT>::value_type>::Type cub::DispatchReduceByKey< KeysInputIteratorT, UniqueOutputIteratorT, ValuesInputIteratorT, AggregatesOutputIteratorT, NumRunsOutputIteratorT, EqualityOpT, ReductionOpT, OffsetT >::ValueOutputT |
Definition at line 146 of file dispatch_reduce_by_key.cuh.
anonymous enum |
Definition at line 148 of file dispatch_reduce_by_key.cuh.
|
inlinestatic |
Internal dispatch routine
[in] | d_temp_storage | Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done. |
[in,out] | temp_storage_bytes | Reference to size in bytes of d_temp_storage allocation |
[in] | d_keys_in | Pointer to the input sequence of keys |
[out] | d_unique_out | Pointer to the output sequence of unique keys (one key per run) |
[in] | d_values_in | Pointer to the input sequence of corresponding values |
[out] | d_aggregates_out | Pointer to the output sequence of value aggregates (one aggregate per run) |
[out] | d_num_runs_out | Pointer to total number of runs encountered (i.e., the length of d_unique_out) |
[in] | equality_op | KeyT equality operator |
[in] | reduction_op | ValueT reduction operator |
[in] | num_items | Total number of items to select from |
[in] | stream | CUDA stream to launch kernels within. Default is stream0. |
[in] | debug_synchronous | Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is false . |
Definition at line 497 of file dispatch_reduce_by_key.cuh.
|
inlinestatic |
< Function type of cub::DeviceReduceByKeyKernelT
Internal dispatch routine for computing a device-wide reduce-by-key using the specified kernel functions.
[in] | d_temp_storage | Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done. |
[in,out] | temp_storage_bytes | Reference to size in bytes of d_temp_storage allocation |
[in] | d_keys_in | Pointer to the input sequence of keys |
[out] | d_unique_out | Pointer to the output sequence of unique keys (one key per run) |
[in] | d_values_in | Pointer to the input sequence of corresponding values |
[out] | d_aggregates_out | Pointer to the output sequence of value aggregates (one aggregate per run) |
[out] | d_num_runs_out | Pointer to total number of runs encountered (i.e., the length of d_unique_out) |
[in] | equality_op | KeyT equality operator |
[in] | reduction_op | ValueT reduction operator |
[in] | num_items | Total number of items to select from |
[in] | stream | CUDA stream to launch kernels within. Default is stream0. |
[in] | debug_synchronous | Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is false . |
[in] | init_kernel | Kernel function pointer to parameterization of cub::DeviceScanInitKernel |
[in] | reduce_by_key_kernel | Kernel function pointer to parameterization of cub::DeviceReduceByKeyKernel |
[in] | reduce_by_key_config | Dispatch parameters that match the policy that reduce_by_key_kernel was compiled for |
Definition at line 353 of file dispatch_reduce_by_key.cuh.
|
inlinestatic |
Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use
Definition at line 283 of file dispatch_reduce_by_key.cuh.