WarpScanShfl provides SHFL-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. More...
WarpScanShfl provides SHFL-based variants of parallel prefix scan of items partitioned across a CUDA thread warp.
LOGICAL_WARP_THREADS must be a power-of-two < The PTX compute capability for which to to specialize this collective
Definition at line 56 of file warp_scan_shfl.cuh.
Data Structures | |
struct | IntegerTraits |
struct | TempStorage |
Shared memory storage layout type. More... | |
Public Types | |
enum | { IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)) , STEPS = Log2<LOGICAL_WARP_THREADS>::VALUE , SHFL_C = (CUB_WARP_THREADS(PTX_ARCH) - LOGICAL_WARP_THREADS) << 8 } |
Public Member Functions | |
__device__ __forceinline__ | WarpScanShfl (TempStorage &) |
Constructor. | |
__device__ __forceinline__ int | InclusiveScanStep (int input, cub::Sum, int first_lane, int offset) |
Inclusive prefix scan step (specialized for summation across int32 types) | |
__device__ __forceinline__ unsigned int | InclusiveScanStep (unsigned int input, cub::Sum, int first_lane, int offset) |
Inclusive prefix scan step (specialized for summation across uint32 types) | |
__device__ __forceinline__ float | InclusiveScanStep (float input, cub::Sum, int first_lane, int offset) |
Inclusive prefix scan step (specialized for summation across fp32 types) | |
__device__ __forceinline__ unsigned long long | InclusiveScanStep (unsigned long long input, cub::Sum, int first_lane, int offset) |
Inclusive prefix scan step (specialized for summation across unsigned long long types) | |
__device__ __forceinline__ long long | InclusiveScanStep (long long input, cub::Sum, int first_lane, int offset) |
Inclusive prefix scan step (specialized for summation across long long types) | |
__device__ __forceinline__ double | InclusiveScanStep (double input, cub::Sum, int first_lane, int offset) |
Inclusive prefix scan step (specialized for summation across fp64 types) | |
template<typename _T , typename ScanOpT > | |
__device__ __forceinline__ _T | InclusiveScanStep (_T input, ScanOpT scan_op, int first_lane, int offset) |
Inclusive prefix scan step (generic) | |
template<typename _T , typename ScanOpT > | |
__device__ __forceinline__ _T | InclusiveScanStep (_T input, ScanOpT scan_op, int first_lane, int offset, Int2Type< true >) |
Inclusive prefix scan step (specialized for small integers size 32b or less) | |
template<typename _T , typename ScanOpT > | |
__device__ __forceinline__ _T | InclusiveScanStep (_T input, ScanOpT scan_op, int first_lane, int offset, Int2Type< false >) |
Inclusive prefix scan step (specialized for types other than small integers size 32b or less) | |
__device__ __forceinline__ T | Broadcast (T input, int src_lane) |
Broadcast. | |
template<typename _T , typename ScanOpT > | |
__device__ __forceinline__ void | InclusiveScan (_T input, _T &inclusive_output, ScanOpT scan_op) |
Inclusive scan. | |
template<typename KeyT , typename ValueT , typename ReductionOpT > | |
__device__ __forceinline__ void | InclusiveScan (KeyValuePair< KeyT, ValueT > input, KeyValuePair< KeyT, ValueT > &inclusive_output, ReduceByKeyOp< ReductionOpT > scan_op) |
Inclusive scan, specialized for reduce-value-by-key. | |
template<typename ScanOpT > | |
__device__ __forceinline__ void | InclusiveScan (T input, T &inclusive_output, ScanOpT scan_op, T &warp_aggregate) |
Inclusive scan with aggregate. | |
template<typename ScanOpT , typename IsIntegerT > | |
__device__ __forceinline__ void | Update (T, T &inclusive, T &exclusive, ScanOpT, IsIntegerT) |
Update inclusive and exclusive using input and inclusive. | |
__device__ __forceinline__ void | Update (T input, T &inclusive, T &exclusive, cub::Sum, Int2Type< true >) |
Update inclusive and exclusive using input and inclusive (specialized for summation of integer types) | |
template<typename ScanOpT , typename IsIntegerT > | |
__device__ __forceinline__ void | Update (T, T &inclusive, T &exclusive, ScanOpT scan_op, T initial_value, IsIntegerT) |
Update inclusive and exclusive using initial value using input, inclusive, and initial value. | |
__device__ __forceinline__ void | Update (T input, T &inclusive, T &exclusive, cub::Sum scan_op, T initial_value, Int2Type< true >) |
Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types) | |
template<typename ScanOpT , typename IsIntegerT > | |
__device__ __forceinline__ void | Update (T input, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT scan_op, IsIntegerT is_integer) |
Update inclusive, exclusive, and warp aggregate using input and inclusive. | |
template<typename ScanOpT , typename IsIntegerT > | |
__device__ __forceinline__ void | Update (T input, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT scan_op, T initial_value, IsIntegerT is_integer) |
Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value. | |
Data Fields | |
unsigned int | lane_id |
Lane index in logical warp. | |
unsigned int | warp_id |
Logical warp index in 32-thread physical warp. | |
unsigned int | member_mask |
32-thread physical warp member mask of logical warp | |
Definition at line 62 of file warp_scan_shfl.cuh.
|
inline |
Constructor.
Definition at line 105 of file warp_scan_shfl.cuh.
|
inline |
Broadcast.
[in] | input | The value to broadcast |
[in] | src_lane | Which warp lane is to do the broadcasting |
Definition at line 451 of file warp_scan_shfl.cuh.
|
inline |
Inclusive scan.
[in] | input | Calling thread's input item. |
[out] | inclusive_output | Calling thread's output item. May be aliased with input . |
[in] | scan_op | Binary scan operator |
Definition at line 465 of file warp_scan_shfl.cuh.
|
inline |
Inclusive scan, specialized for reduce-value-by-key.
[in] | input | Calling thread's input item. |
[out] | inclusive_output | Calling thread's output item. May be aliased with input . |
[in] | scan_op | Binary scan operator |
Definition at line 491 of file warp_scan_shfl.cuh.
|
inline |
Inclusive scan with aggregate.
[in] | input | Calling thread's input item. |
[out] | inclusive_output | Calling thread's output item. May be aliased with input . |
[in] | scan_op | Binary scan operator |
[out] | warp_aggregate | Warp-wide aggregate reduction of input items. |
Definition at line 524 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (generic)
[in] | input | Calling thread's input item. |
[in] | scan_op | Binary scan operator |
[in] | first_lane | Index of first lane in segment |
[in] | offset | Up-offset to pull from |
Definition at line 399 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (specialized for types other than small integers size 32b or less)
[in] | input | Calling thread's input item. |
[in] | scan_op | Binary scan operator |
[in] | first_lane | Index of first lane in segment |
[in] | offset | Up-offset to pull from |
Definition at line 431 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (specialized for small integers size 32b or less)
[in] | input | Calling thread's input item. |
[in] | scan_op | Binary scan operator |
[in] | first_lane | Index of first lane in segment |
[in] | offset | Up-offset to pull from |
Definition at line 418 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (specialized for summation across fp64 types)
[in] | input | Calling thread's input item. |
[in] | first_lane | Index of first lane in segment |
[in] | offset | Up-offset to pull from |
Definition at line 330 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (specialized for summation across fp32 types)
[in] | input | Calling thread's input item. |
[in] | first_lane | Index of first lane in segment |
[in] | offset | Up-offset to pull from |
Definition at line 199 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (specialized for summation across int32 types)
[in] | input | Calling thread's input item. |
[in] | first_lane | Index of first lane in segment |
[in] | offset | Up-offset to pull from |
Definition at line 126 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (specialized for summation across long long types)
[in] | input | Calling thread's input item. |
[in] | first_lane | Index of first lane in segment |
[in] | offset | Up-offset to pull from |
Definition at line 283 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (specialized for summation across uint32 types)
[in] | input | Calling thread's input item. |
[in] | first_lane | Index of first lane in segment |
[in] | offset | Up-offset to pull from |
Definition at line 162 of file warp_scan_shfl.cuh.
|
inline |
Inclusive prefix scan step (specialized for summation across unsigned long long types)
[in] | input | Calling thread's input item. |
[in] | first_lane | Index of first lane in segment |
[in] | offset | Up-offset to pull from |
Definition at line 236 of file warp_scan_shfl.cuh.
|
inline |
Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types)
Definition at line 584 of file warp_scan_shfl.cuh.
|
inline |
Update inclusive and exclusive using input and inclusive (specialized for summation of integer types)
Definition at line 555 of file warp_scan_shfl.cuh.
|
inline |
Update inclusive, exclusive, and warp aggregate using input and inclusive.
Definition at line 599 of file warp_scan_shfl.cuh.
|
inline |
Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value.
Definition at line 613 of file warp_scan_shfl.cuh.
|
inline |
Update inclusive and exclusive using initial value using input, inclusive, and initial value.
Definition at line 568 of file warp_scan_shfl.cuh.
|
inline |
Update inclusive and exclusive using input and inclusive.
[in,out] | inclusive | |
[out] | exclusive |
Definition at line 543 of file warp_scan_shfl.cuh.
unsigned int cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::lane_id |
Lane index in logical warp.
Definition at line 92 of file warp_scan_shfl.cuh.
unsigned int cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::member_mask |
32-thread physical warp member mask of logical warp
Definition at line 98 of file warp_scan_shfl.cuh.
unsigned int cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::warp_id |
Logical warp index in 32-thread physical warp.
Definition at line 95 of file warp_scan_shfl.cuh.