OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > Struct Template Reference

BlockReduceWarpReductions provides variants of warp-reduction-based parallel reduction across a CUDA thread block. Supports non-commutative reduction operators. More...

Detailed Description

template<typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
struct cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >

BlockReduceWarpReductions provides variants of warp-reduction-based parallel reduction across a CUDA thread block. Supports non-commutative reduction operators.

< The PTX compute capability for which to to specialize this collective

Definition at line 57 of file block_reduce_warp_reductions.cuh.

Data Structures

struct  _TempStorage
 Shared memory storage layout type. More...
 
struct  TempStorage
 Alias wrapper allowing storage to be unioned. More...
 

Public Types

enum  {
  BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z , WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) , WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS , LOGICAL_WARP_SIZE = CUB_MIN(BLOCK_THREADS, WARP_THREADS) ,
  EVEN_WARP_MULTIPLE = (BLOCK_THREADS % LOGICAL_WARP_SIZE == 0)
}
 Constants. More...
 
typedef WarpReduce< T, LOGICAL_WARP_SIZE, PTX_ARCH >::InternalWarpReduce WarpReduce
 WarpReduce utility type.
 

Public Member Functions

__device__ __forceinline__ BlockReduceWarpReductions (TempStorage &temp_storage)
 Constructor.
 
template<bool FULL_TILE, typename ReductionOp , int SUCCESSOR_WARP>
__device__ __forceinline__ T ApplyWarpAggregates (ReductionOp reduction_op, T warp_aggregate, int num_valid, Int2Type< SUCCESSOR_WARP >)
 
template<bool FULL_TILE, typename ReductionOp >
__device__ __forceinline__ T ApplyWarpAggregates (ReductionOp, T warp_aggregate, int, Int2Type< WARPS >)
 
template<bool FULL_TILE, typename ReductionOp >
__device__ __forceinline__ T ApplyWarpAggregates (ReductionOp reduction_op, T warp_aggregate, int num_valid)
 Returns block-wide aggregate in thread0.
 
template<bool FULL_TILE>
__device__ __forceinline__ T Sum (T input, int num_valid)
 Computes a thread block-wide reduction using addition (+) as the reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0.
 
template<bool FULL_TILE, typename ReductionOp >
__device__ __forceinline__ T Reduce (T input, int num_valid, ReductionOp reduction_op)
 Computes a thread block-wide reduction using the specified reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0.
 

Data Fields

_TempStoragetemp_storage
 
int linear_tid
 
int warp_id
 
int lane_id
 

Member Typedef Documentation

◆ WarpReduce

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
typedef WarpReduce<T,LOGICAL_WARP_SIZE,PTX_ARCH>::InternalWarpReduce cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::WarpReduce

WarpReduce utility type.

Definition at line 80 of file block_reduce_warp_reductions.cuh.

Member Enumeration Documentation

◆ anonymous enum

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
anonymous enum

Constants.

Enumerator
BLOCK_THREADS 

The thread block size in threads.

WARP_THREADS 

Number of warp threads.

WARPS 

Number of active warps.

LOGICAL_WARP_SIZE 

The logical warp size for warp reductions.

EVEN_WARP_MULTIPLE 

Whether or not the logical warp size evenly divides the thread block size.

Definition at line 60 of file block_reduce_warp_reductions.cuh.

Constructor & Destructor Documentation

◆ BlockReduceWarpReductions()

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
__device__ __forceinline__ cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::BlockReduceWarpReductions ( TempStorage temp_storage)
inline

Constructor.

Definition at line 103 of file block_reduce_warp_reductions.cuh.

Member Function Documentation

◆ ApplyWarpAggregates() [1/3]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<bool FULL_TILE, typename ReductionOp >
__device__ __forceinline__ T cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ApplyWarpAggregates ( ReductionOp  reduction_op,
warp_aggregate,
int  num_valid 
)
inline

Returns block-wide aggregate in thread0.

Parameters
[in]reduction_opBinary scan operator
[in]warp_aggregate[lane0 only] Warp-wide aggregate reduction of input items
[in]num_validNumber of valid elements (may be less than BLOCK_THREADS)

Definition at line 143 of file block_reduce_warp_reductions.cuh.

◆ ApplyWarpAggregates() [2/3]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<bool FULL_TILE, typename ReductionOp , int SUCCESSOR_WARP>
__device__ __forceinline__ T cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ApplyWarpAggregates ( ReductionOp  reduction_op,
warp_aggregate,
int  num_valid,
Int2Type< SUCCESSOR_WARP >   
)
inline
Parameters
[in]reduction_opBinary scan operator
[in]warp_aggregate[lane0 only] Warp-wide aggregate reduction of input items
[in]num_validNumber of valid elements (may be less than BLOCK_THREADS)

Definition at line 114 of file block_reduce_warp_reductions.cuh.

◆ ApplyWarpAggregates() [3/3]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<bool FULL_TILE, typename ReductionOp >
__device__ __forceinline__ T cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ApplyWarpAggregates ( ReductionOp  ,
warp_aggregate,
int  ,
Int2Type< WARPS  
)
inline
Parameters
[in]warp_aggregate[lane0 only] Warp-wide aggregate reduction of input items

Definition at line 129 of file block_reduce_warp_reductions.cuh.

◆ Reduce()

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<bool FULL_TILE, typename ReductionOp >
__device__ __forceinline__ T cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::Reduce ( input,
int  num_valid,
ReductionOp  reduction_op 
)
inline

Computes a thread block-wide reduction using the specified reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0.

Parameters
[in]inputCalling thread's input partial reductions
[in]num_validNumber of valid elements (may be less than BLOCK_THREADS)
[in]reduction_opBinary reduction operator

Definition at line 193 of file block_reduce_warp_reductions.cuh.

◆ Sum()

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<bool FULL_TILE>
__device__ __forceinline__ T cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::Sum ( input,
int  num_valid 
)
inline

Computes a thread block-wide reduction using addition (+) as the reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0.

Parameters
[in]inputCalling thread's input partial reductions
[in]num_validNumber of valid elements (may be less than BLOCK_THREADS)

Definition at line 168 of file block_reduce_warp_reductions.cuh.

Field Documentation

◆ lane_id

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
int cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::lane_id

Definition at line 99 of file block_reduce_warp_reductions.cuh.

◆ linear_tid

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
int cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::linear_tid

Definition at line 97 of file block_reduce_warp_reductions.cuh.

◆ temp_storage

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
_TempStorage& cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::temp_storage

Definition at line 96 of file block_reduce_warp_reductions.cuh.

◆ warp_id

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
int cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::warp_id

Definition at line 98 of file block_reduce_warp_reductions.cuh.


The documentation for this struct was generated from the following file: