OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH > Struct Template Reference

WarpReduceSmem provides smem-based variants of parallel reduction of items partitioned across a CUDA thread warp. More...

Detailed Description

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
struct cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >

WarpReduceSmem provides smem-based variants of parallel reduction of items partitioned across a CUDA thread warp.

< The PTX compute capability for which to to specialize this collective

Definition at line 55 of file warp_reduce_smem.cuh.

Data Structures

struct  _TempStorage
 Shared memory storage layout type (1.5 warps-worth of elements for each warp) More...
 
struct  TempStorage
 

Public Types

enum  {
  IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)) , IS_POW_OF_TWO = PowerOfTwo<LOGICAL_WARP_THREADS>::VALUE , STEPS = Log2<LOGICAL_WARP_THREADS>::VALUE , HALF_WARP_THREADS = 1 << (STEPS - 1) ,
  WARP_SMEM_ELEMENTS = LOGICAL_WARP_THREADS + HALF_WARP_THREADS , UNSET = 0x0 , SET = 0x1 , SEEN = 0x2
}
 
typedef unsigned char SmemFlag
 Shared memory flag type.
 

Public Member Functions

__device__ __forceinline__ WarpReduceSmem (TempStorage &temp_storage)
 Constructor.
 
template<bool ALL_LANES_VALID, typename ReductionOp , int STEP>
__device__ __forceinline__ T ReduceStep (T input, int valid_items, ReductionOp reduction_op, Int2Type< STEP >)
 
template<bool ALL_LANES_VALID, typename ReductionOp >
__device__ __forceinline__ T ReduceStep (T input, int valid_items, ReductionOp, Int2Type< STEPS >)
 
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp >
__device__ __forceinline__ T SegmentedReduce (T input, FlagT flag, ReductionOp reduction_op, Int2Type< true >)
 
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp >
__device__ __forceinline__ T SegmentedReduce (T input, FlagT flag, ReductionOp reduction_op, Int2Type< false >)
 
template<bool ALL_LANES_VALID, typename ReductionOp >
__device__ __forceinline__ T Reduce (T input, int valid_items, ReductionOp reduction_op)
 
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp >
__device__ __forceinline__ T SegmentedReduce (T input, FlagT flag, ReductionOp reduction_op)
 

Data Fields

_TempStoragetemp_storage
 
unsigned int lane_id
 
unsigned int member_mask
 

Member Typedef Documentation

◆ SmemFlag

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
typedef unsigned char cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::SmemFlag

Shared memory flag type.

Definition at line 85 of file warp_reduce_smem.cuh.

Member Enumeration Documentation

◆ anonymous enum

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
anonymous enum
Enumerator
IS_ARCH_WARP 

Whether the logical warp size and the PTX warp size coincide.

IS_POW_OF_TWO 

Whether the logical warp size is a power-of-two.

STEPS 

The number of warp scan steps.

HALF_WARP_THREADS 

The number of threads in half a warp.

WARP_SMEM_ELEMENTS 

The number of shared memory elements per warp.

UNSET 

FlagT status (when not using ballot)

Definition at line 61 of file warp_reduce_smem.cuh.

Constructor & Destructor Documentation

◆ WarpReduceSmem()

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::WarpReduceSmem ( TempStorage temp_storage)
inline

Constructor.

Definition at line 112 of file warp_reduce_smem.cuh.

Member Function Documentation

◆ Reduce()

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool ALL_LANES_VALID, typename ReductionOp >
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Reduce ( input,
int  valid_items,
ReductionOp  reduction_op 
)
inline

Reduction

Parameters
[in]inputCalling thread's input
[in]valid_itemsTotal number of valid items across the logical warp
[in]reduction_opReduction operator

Definition at line 343 of file warp_reduce_smem.cuh.

◆ ReduceStep() [1/2]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool ALL_LANES_VALID, typename ReductionOp , int STEP>
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::ReduceStep ( input,
int  valid_items,
ReductionOp  reduction_op,
Int2Type< STEP >   
)
inline

Reduction step

Parameters
[in]inputCalling thread's input
[in]valid_itemsTotal number of valid items across the logical warp
[in]reduction_opReduction operator

Definition at line 141 of file warp_reduce_smem.cuh.

◆ ReduceStep() [2/2]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool ALL_LANES_VALID, typename ReductionOp >
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::ReduceStep ( input,
int  valid_items,
ReductionOp  ,
Int2Type< STEPS  
)
inline

Reduction step (terminate)

Parameters
[in]inputCalling thread's input
[in]valid_itemsTotal number of valid items across the logical warp

Definition at line 173 of file warp_reduce_smem.cuh.

◆ SegmentedReduce() [1/3]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp >
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::SegmentedReduce ( input,
FlagT  flag,
ReductionOp  reduction_op 
)
inline

Segmented reduction

Parameters
[in]inputCalling thread's input
[in]flagWhether or not the current lane is a segment head/tail
[in]reduction_opReduction operator

Definition at line 359 of file warp_reduce_smem.cuh.

◆ SegmentedReduce() [2/3]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp >
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::SegmentedReduce ( input,
FlagT  flag,
ReductionOp  reduction_op,
Int2Type< false >   
)
inline

Smem-based segmented reduce

Parameters
[in]inputCalling thread's input
[in]flagWhether or not the current lane is a segment head/tail
[in]reduction_opReduction operator

Definition at line 254 of file warp_reduce_smem.cuh.

◆ SegmentedReduce() [3/3]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool HEAD_SEGMENTED, typename FlagT , typename ReductionOp >
__device__ __forceinline__ T cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::SegmentedReduce ( input,
FlagT  flag,
ReductionOp  reduction_op,
Int2Type< true >   
)
inline

Ballot-based segmented reduce

Parameters
[in]inputCalling thread's input
[in]flagWhether or not the current lane is a segment head/tail
[in]reduction_opReduction operator

Definition at line 195 of file warp_reduce_smem.cuh.

Field Documentation

◆ lane_id

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
unsigned int cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::lane_id

Definition at line 103 of file warp_reduce_smem.cuh.

◆ member_mask

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
unsigned int cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::member_mask

Definition at line 104 of file warp_reduce_smem.cuh.

◆ temp_storage

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
_TempStorage& cub::WarpReduceSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::temp_storage

Definition at line 102 of file warp_reduce_smem.cuh.


The documentation for this struct was generated from the following file: