WarpReduceSmem provides smem-based variants of parallel reduction of items partitioned across a CUDA thread warp.
More...
template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
struct cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >
WarpReduceSmem provides smem-based variants of parallel reduction of items partitioned across a CUDA thread warp.
< The PTX compute capability for which to to specialize this collective
Definition at line 55 of file warp_reduce_smem.cuh.
|
__device__ __forceinline__ | WarpReduceSmem (TempStorage &temp_storage) |
| Constructor.
|
|
template<bool ALL_LANES_VALID, typename ReductionOp , int STEP> |
__device__ __forceinline__ T | ReduceStep (T input, int valid_items, ReductionOp reduction_op, Int2Type< STEP >) |
|
template<bool ALL_LANES_VALID, typename ReductionOp > |
__device__ __forceinline__ T | ReduceStep (T input, int valid_items, ReductionOp, Int2Type< STEPS >) |
|
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp > |
__device__ __forceinline__ T | SegmentedReduce (T input, FlagT flag, ReductionOp reduction_op, Int2Type< true >) |
|
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp > |
__device__ __forceinline__ T | SegmentedReduce (T input, FlagT flag, ReductionOp reduction_op, Int2Type< false >) |
|
template<bool ALL_LANES_VALID, typename ReductionOp > |
__device__ __forceinline__ T | Reduce (T input, int valid_items, ReductionOp reduction_op) |
|
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp > |
__device__ __forceinline__ T | SegmentedReduce (T input, FlagT flag, ReductionOp reduction_op) |
|
◆ anonymous enum
template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
Enumerator |
---|
IS_ARCH_WARP | Whether the logical warp size and the PTX warp size coincide.
|
IS_POW_OF_TWO | Whether the logical warp size is a power-of-two.
|
STEPS | The number of warp scan steps.
|
HALF_WARP_THREADS | The number of threads in half a warp.
|
WARP_SMEM_ELEMENTS | The number of shared memory elements per warp.
|
UNSET | FlagT status (when not using ballot)
|
Definition at line 61 of file warp_reduce_smem.cuh.
◆ Reduce()
template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool ALL_LANES_VALID, typename ReductionOp >
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Reduce |
( |
T |
input, |
|
|
int |
valid_items, |
|
|
ReductionOp |
reduction_op |
|
) |
| |
|
inline |
Reduction
- Parameters
-
[in] | input | Calling thread's input |
[in] | valid_items | Total number of valid items across the logical warp |
[in] | reduction_op | Reduction operator |
Definition at line 343 of file warp_reduce_smem.cuh.
◆ ReduceStep() [1/2]
template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool ALL_LANES_VALID, typename ReductionOp , int STEP>
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::ReduceStep |
( |
T |
input, |
|
|
int |
valid_items, |
|
|
ReductionOp |
reduction_op, |
|
|
Int2Type< STEP > |
|
|
) |
| |
|
inline |
Reduction step
- Parameters
-
[in] | input | Calling thread's input |
[in] | valid_items | Total number of valid items across the logical warp |
[in] | reduction_op | Reduction operator |
Definition at line 141 of file warp_reduce_smem.cuh.
◆ ReduceStep() [2/2]
template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool ALL_LANES_VALID, typename ReductionOp >
Reduction step (terminate)
- Parameters
-
[in] | input | Calling thread's input |
[in] | valid_items | Total number of valid items across the logical warp |
Definition at line 173 of file warp_reduce_smem.cuh.
◆ SegmentedReduce() [1/3]
template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp >
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::SegmentedReduce |
( |
T |
input, |
|
|
FlagT |
flag, |
|
|
ReductionOp |
reduction_op, |
|
|
Int2Type< true > |
|
|
) |
| |
|
inline |
Ballot-based segmented reduce
- Parameters
-
[in] | input | Calling thread's input |
[in] | flag | Whether or not the current lane is a segment head/tail |
[in] | reduction_op | Reduction operator |
Definition at line 195 of file warp_reduce_smem.cuh.
◆ SegmentedReduce() [2/3]
template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp >
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::SegmentedReduce |
( |
T |
input, |
|
|
FlagT |
flag, |
|
|
ReductionOp |
reduction_op, |
|
|
Int2Type< false > |
|
|
) |
| |
|
inline |
Smem-based segmented reduce
- Parameters
-
[in] | input | Calling thread's input |
[in] | flag | Whether or not the current lane is a segment head/tail |
[in] | reduction_op | Reduction operator |
Definition at line 254 of file warp_reduce_smem.cuh.
◆ SegmentedReduce() [3/3]
template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp >
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::SegmentedReduce |
( |
T |
input, |
|
|
FlagT |
flag, |
|
|
ReductionOp |
reduction_op |
|
) |
| |
|
inline |
Segmented reduction
- Parameters
-
[in] | input | Calling thread's input |
[in] | flag | Whether or not the current lane is a segment head/tail |
[in] | reduction_op | Reduction operator |
Definition at line 359 of file warp_reduce_smem.cuh.
The documentation for this struct was generated from the following file: