OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH > Struct Template Reference

WarpReduceSmem provides smem-based variants of parallel reduction of items partitioned across a CUDA thread warp. More...

Detailed Description

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
struct cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >

WarpReduceSmem provides smem-based variants of parallel reduction of items partitioned across a CUDA thread warp.

< The PTX compute capability for which to to specialize this collective

Definition at line 55 of file warp_reduce_smem.cuh.

Data Structures

struct  _TempStorage
 Shared memory storage layout type (1.5 warps-worth of elements for each warp) More...
 
struct  TempStorage
 

Public Types

enum  {
  IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), IS_POW_OF_TWO = PowerOfTwo<LOGICAL_WARP_THREADS>::VALUE, STEPS = Log2<LOGICAL_WARP_THREADS>::VALUE, HALF_WARP_THREADS = 1 << (STEPS - 1),
  WARP_SMEM_ELEMENTS = LOGICAL_WARP_THREADS + HALF_WARP_THREADS, UNSET = 0x0, SET = 0x1, SEEN = 0x2
}
 
typedef unsigned char SmemFlag
 Shared memory flag type.
 

Public Member Functions

__device__ __forceinline__ WarpReduceSmem (TempStorage &temp_storage)
 Constructor.
 
template<bool ALL_LANES_VALID, typename ReductionOp , int STEP>
__device__ __forceinline__ T ReduceStep (T input, int valid_items, ReductionOp reduction_op, Int2Type< STEP >)
 
template<bool ALL_LANES_VALID, typename ReductionOp >
__device__ __forceinline__ T ReduceStep (T input, int valid_items, ReductionOp, Int2Type< STEPS >)
 
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp >
__device__ __forceinline__ T SegmentedReduce (T input, FlagT flag, ReductionOp reduction_op, Int2Type< true >)
 
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp >
__device__ __forceinline__ T SegmentedReduce (T input, FlagT flag, ReductionOp reduction_op, Int2Type< false >)
 
template<bool ALL_LANES_VALID, typename ReductionOp >
__device__ __forceinline__ T Reduce (T input, int valid_items, ReductionOp reduction_op)
 
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp >
__device__ __forceinline__ T SegmentedReduce (T input, FlagT flag, ReductionOp reduction_op)
 

Data Fields

_TempStoragetemp_storage
 
unsigned int lane_id
 
unsigned int member_mask
 

Member Enumeration Documentation

◆ anonymous enum

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
anonymous enum
Enumerator
IS_ARCH_WARP 

Whether the logical warp size and the PTX warp size coincide.

IS_POW_OF_TWO 

Whether the logical warp size is a power-of-two.

STEPS 

The number of warp scan steps.

HALF_WARP_THREADS 

The number of threads in half a warp.

WARP_SMEM_ELEMENTS 

The number of shared memory elements per warp.

UNSET 

FlagT status (when not using ballot)

Definition at line 61 of file warp_reduce_smem.cuh.

Member Function Documentation

◆ Reduce()

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool ALL_LANES_VALID, typename ReductionOp >
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Reduce ( input,
int  valid_items,
ReductionOp  reduction_op 
)
inline

Reduction

Parameters
[in]inputCalling thread's input
[in]valid_itemsTotal number of valid items across the logical warp
[in]reduction_opReduction operator

Definition at line 343 of file warp_reduce_smem.cuh.

◆ ReduceStep() [1/2]

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool ALL_LANES_VALID, typename ReductionOp , int STEP>
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::ReduceStep ( input,
int  valid_items,
ReductionOp  reduction_op,
Int2Type< STEP >   
)
inline

Reduction step

Parameters
[in]inputCalling thread's input
[in]valid_itemsTotal number of valid items across the logical warp
[in]reduction_opReduction operator

Definition at line 141 of file warp_reduce_smem.cuh.

◆ ReduceStep() [2/2]

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool ALL_LANES_VALID, typename ReductionOp >
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::ReduceStep ( input,
int  valid_items,
ReductionOp  ,
Int2Type< STEPS  
)
inline

Reduction step (terminate)

Parameters
[in]inputCalling thread's input
[in]valid_itemsTotal number of valid items across the logical warp

Definition at line 173 of file warp_reduce_smem.cuh.

◆ SegmentedReduce() [1/3]

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp >
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::SegmentedReduce ( input,
FlagT  flag,
ReductionOp  reduction_op,
Int2Type< true >   
)
inline

Ballot-based segmented reduce

Parameters
[in]inputCalling thread's input
[in]flagWhether or not the current lane is a segment head/tail
[in]reduction_opReduction operator

Definition at line 195 of file warp_reduce_smem.cuh.

◆ SegmentedReduce() [2/3]

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp >
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::SegmentedReduce ( input,
FlagT  flag,
ReductionOp  reduction_op,
Int2Type< false >   
)
inline

Smem-based segmented reduce

Parameters
[in]inputCalling thread's input
[in]flagWhether or not the current lane is a segment head/tail
[in]reduction_opReduction operator

Definition at line 254 of file warp_reduce_smem.cuh.

◆ SegmentedReduce() [3/3]

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp >
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::SegmentedReduce ( input,
FlagT  flag,
ReductionOp  reduction_op 
)
inline

Segmented reduction

Parameters
[in]inputCalling thread's input
[in]flagWhether or not the current lane is a segment head/tail
[in]reduction_opReduction operator

Definition at line 359 of file warp_reduce_smem.cuh.


The documentation for this struct was generated from the following file: