WarpScanShfl provides SHFL-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. More...
WarpScanShfl provides SHFL-based variants of parallel prefix scan of items partitioned across a CUDA thread warp.
LOGICAL_WARP_THREADS must be a power-of-two < The PTX compute capability for which to to specialize this collective
Definition at line 56 of file warp_scan_shfl.cuh.
Data Structures | |
| struct | IntegerTraits |
| struct | TempStorage |
| Shared memory storage layout type. More... | |
Public Types | |
| enum | { IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)) , STEPS = Log2<LOGICAL_WARP_THREADS>::VALUE , SHFL_C = (CUB_WARP_THREADS(PTX_ARCH) - LOGICAL_WARP_THREADS) << 8 } |
Public Member Functions | |
| __device__ __forceinline__ | WarpScanShfl (TempStorage &) |
| Constructor. | |
| __device__ __forceinline__ int | InclusiveScanStep (int input, cub::Sum, int first_lane, int offset) |
| Inclusive prefix scan step (specialized for summation across int32 types) | |
| __device__ __forceinline__ unsigned int | InclusiveScanStep (unsigned int input, cub::Sum, int first_lane, int offset) |
| Inclusive prefix scan step (specialized for summation across uint32 types) | |
| __device__ __forceinline__ float | InclusiveScanStep (float input, cub::Sum, int first_lane, int offset) |
| Inclusive prefix scan step (specialized for summation across fp32 types) | |
| __device__ __forceinline__ unsigned long long | InclusiveScanStep (unsigned long long input, cub::Sum, int first_lane, int offset) |
| Inclusive prefix scan step (specialized for summation across unsigned long long types) | |
| __device__ __forceinline__ long long | InclusiveScanStep (long long input, cub::Sum, int first_lane, int offset) |
| Inclusive prefix scan step (specialized for summation across long long types) | |
| __device__ __forceinline__ double | InclusiveScanStep (double input, cub::Sum, int first_lane, int offset) |
| Inclusive prefix scan step (specialized for summation across fp64 types) | |
| template<typename _T , typename ScanOpT > | |
| __device__ __forceinline__ _T | InclusiveScanStep (_T input, ScanOpT scan_op, int first_lane, int offset) |
| Inclusive prefix scan step (generic) | |
| template<typename _T , typename ScanOpT > | |
| __device__ __forceinline__ _T | InclusiveScanStep (_T input, ScanOpT scan_op, int first_lane, int offset, Int2Type< true >) |
| Inclusive prefix scan step (specialized for small integers size 32b or less) | |
| template<typename _T , typename ScanOpT > | |
| __device__ __forceinline__ _T | InclusiveScanStep (_T input, ScanOpT scan_op, int first_lane, int offset, Int2Type< false >) |
| Inclusive prefix scan step (specialized for types other than small integers size 32b or less) | |
| __device__ __forceinline__ T | Broadcast (T input, int src_lane) |
| Broadcast. | |
| template<typename _T , typename ScanOpT > | |
| __device__ __forceinline__ void | InclusiveScan (_T input, _T &inclusive_output, ScanOpT scan_op) |
| Inclusive scan. | |
| template<typename KeyT , typename ValueT , typename ReductionOpT > | |
| __device__ __forceinline__ void | InclusiveScan (KeyValuePair< KeyT, ValueT > input, KeyValuePair< KeyT, ValueT > &inclusive_output, ReduceByKeyOp< ReductionOpT > scan_op) |
| Inclusive scan, specialized for reduce-value-by-key. | |
| template<typename ScanOpT > | |
| __device__ __forceinline__ void | InclusiveScan (T input, T &inclusive_output, ScanOpT scan_op, T &warp_aggregate) |
| Inclusive scan with aggregate. | |
| template<typename ScanOpT , typename IsIntegerT > | |
| __device__ __forceinline__ void | Update (T, T &inclusive, T &exclusive, ScanOpT, IsIntegerT) |
| Update inclusive and exclusive using input and inclusive. | |
| __device__ __forceinline__ void | Update (T input, T &inclusive, T &exclusive, cub::Sum, Int2Type< true >) |
| Update inclusive and exclusive using input and inclusive (specialized for summation of integer types) | |
| template<typename ScanOpT , typename IsIntegerT > | |
| __device__ __forceinline__ void | Update (T, T &inclusive, T &exclusive, ScanOpT scan_op, T initial_value, IsIntegerT) |
| Update inclusive and exclusive using initial value using input, inclusive, and initial value. | |
| __device__ __forceinline__ void | Update (T input, T &inclusive, T &exclusive, cub::Sum scan_op, T initial_value, Int2Type< true >) |
| Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types) | |
| template<typename ScanOpT , typename IsIntegerT > | |
| __device__ __forceinline__ void | Update (T input, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT scan_op, IsIntegerT is_integer) |
| Update inclusive, exclusive, and warp aggregate using input and inclusive. | |
| template<typename ScanOpT , typename IsIntegerT > | |
| __device__ __forceinline__ void | Update (T input, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT scan_op, T initial_value, IsIntegerT is_integer) |
| Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value. | |
Data Fields | |
| unsigned int | lane_id |
| Lane index in logical warp. | |
| unsigned int | warp_id |
| Logical warp index in 32-thread physical warp. | |
| unsigned int | member_mask |
| 32-thread physical warp member mask of logical warp | |
Definition at line 62 of file warp_scan_shfl.cuh.
|
inline |
Constructor.
Definition at line 105 of file warp_scan_shfl.cuh.
|
inline |
Broadcast.
| [in] | input | The value to broadcast |
| [in] | src_lane | Which warp lane is to do the broadcasting |
Definition at line 451 of file warp_scan_shfl.cuh.
|
inline |
Inclusive scan.
| [in] | input | Calling thread's input item. |
| [out] | inclusive_output | Calling thread's output item. May be aliased with input. |
| [in] | scan_op | Binary scan operator |
Definition at line 465 of file warp_scan_shfl.cuh.
|
inline |
Inclusive scan, specialized for reduce-value-by-key.
| [in] | input | Calling thread's input item. |
| [out] | inclusive_output | Calling thread's output item. May be aliased with input. |
| [in] | scan_op | Binary scan operator |
Definition at line 491 of file warp_scan_shfl.cuh.
|
inline |
Inclusive scan with aggregate.
| [in] | input | Calling thread's input item. |
| [out] | inclusive_output | Calling thread's output item. May be aliased with input. |
| [in] | scan_op | Binary scan operator |
| [out] | warp_aggregate | Warp-wide aggregate reduction of input items. |
Definition at line 524 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (generic)
| [in] | input | Calling thread's input item. |
| [in] | scan_op | Binary scan operator |
| [in] | first_lane | Index of first lane in segment |
| [in] | offset | Up-offset to pull from |
Definition at line 399 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (specialized for types other than small integers size 32b or less)
| [in] | input | Calling thread's input item. |
| [in] | scan_op | Binary scan operator |
| [in] | first_lane | Index of first lane in segment |
| [in] | offset | Up-offset to pull from |
Definition at line 431 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (specialized for small integers size 32b or less)
| [in] | input | Calling thread's input item. |
| [in] | scan_op | Binary scan operator |
| [in] | first_lane | Index of first lane in segment |
| [in] | offset | Up-offset to pull from |
Definition at line 418 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (specialized for summation across fp64 types)
| [in] | input | Calling thread's input item. |
| [in] | first_lane | Index of first lane in segment |
| [in] | offset | Up-offset to pull from |
Definition at line 330 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (specialized for summation across fp32 types)
| [in] | input | Calling thread's input item. |
| [in] | first_lane | Index of first lane in segment |
| [in] | offset | Up-offset to pull from |
Definition at line 199 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (specialized for summation across int32 types)
| [in] | input | Calling thread's input item. |
| [in] | first_lane | Index of first lane in segment |
| [in] | offset | Up-offset to pull from |
Definition at line 126 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (specialized for summation across long long types)
| [in] | input | Calling thread's input item. |
| [in] | first_lane | Index of first lane in segment |
| [in] | offset | Up-offset to pull from |
Definition at line 283 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (specialized for summation across uint32 types)
| [in] | input | Calling thread's input item. |
| [in] | first_lane | Index of first lane in segment |
| [in] | offset | Up-offset to pull from |
Definition at line 162 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (specialized for summation across unsigned long long types)
| [in] | input | Calling thread's input item. |
| [in] | first_lane | Index of first lane in segment |
| [in] | offset | Up-offset to pull from |
Definition at line 236 of file warp_scan_shfl.cuh.
|
inline |
Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types)
Definition at line 584 of file warp_scan_shfl.cuh.
|
inline |
Update inclusive and exclusive using input and inclusive (specialized for summation of integer types)
Definition at line 555 of file warp_scan_shfl.cuh.
|
inline |
Update inclusive, exclusive, and warp aggregate using input and inclusive.
Definition at line 599 of file warp_scan_shfl.cuh.
|
inline |
Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value.
Definition at line 613 of file warp_scan_shfl.cuh.
|
inline |
Update inclusive and exclusive using initial value using input, inclusive, and initial value.
Definition at line 568 of file warp_scan_shfl.cuh.
|
inline |
Update inclusive and exclusive using input and inclusive.
| [in,out] | inclusive | |
| [out] | exclusive |
Definition at line 543 of file warp_scan_shfl.cuh.
| unsigned int cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::lane_id |
Lane index in logical warp.
Definition at line 92 of file warp_scan_shfl.cuh.
| unsigned int cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::member_mask |
32-thread physical warp member mask of logical warp
Definition at line 98 of file warp_scan_shfl.cuh.
| unsigned int cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::warp_id |
Logical warp index in 32-thread physical warp.
Definition at line 95 of file warp_scan_shfl.cuh.