BlockReduceWarpReductions provides variants of warp-reduction-based parallel reduction across a CUDA thread block. Supports non-commutative reduction operators. More...

Detailed Description

template<typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
struct cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >

BlockReduceWarpReductions provides variants of warp-reduction-based parallel reduction across a CUDA thread block. Supports non-commutative reduction operators.

< The PTX compute capability for which to to specialize this collective

Definition at line 57 of file block_reduce_warp_reductions.cuh.

Data Structures
struct	_TempStorage
	Shared memory storage layout type. More...

struct	TempStorage
	Alias wrapper allowing storage to be unioned. More...

Public Types
enum	{ BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z , WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) , WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS , LOGICAL_WARP_SIZE = CUB_MIN(BLOCK_THREADS, WARP_THREADS) , EVEN_WARP_MULTIPLE = (BLOCK_THREADS % LOGICAL_WARP_SIZE == 0) }
	Constants. More...

typedef WarpReduce< T, LOGICAL_WARP_SIZE, PTX_ARCH >::InternalWarpReduce	WarpReduce
	WarpReduce utility type.

Public Member Functions
__device__ __forceinline__	BlockReduceWarpReductions (TempStorage &temp_storage)
	Constructor.

template<bool FULL_TILE, typename ReductionOp , int SUCCESSOR_WARP>
__device__ __forceinline__ T	ApplyWarpAggregates (ReductionOp reduction_op, T warp_aggregate, int num_valid, Int2Type< SUCCESSOR_WARP >)

template<bool FULL_TILE, typename ReductionOp >
__device__ __forceinline__ T	ApplyWarpAggregates (ReductionOp, T warp_aggregate, int, Int2Type< WARPS >)

template<bool FULL_TILE, typename ReductionOp >
__device__ __forceinline__ T	ApplyWarpAggregates (ReductionOp reduction_op, T warp_aggregate, int num_valid)
	Returns block-wide aggregate in thread₀.

template<bool FULL_TILE>
__device__ __forceinline__ T	Sum (T input, int num_valid)
	Computes a thread block-wide reduction using addition (+) as the reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread₀.

template<bool FULL_TILE, typename ReductionOp >
__device__ __forceinline__ T	Reduce (T input, int num_valid, ReductionOp reduction_op)
	Computes a thread block-wide reduction using the specified reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread₀.

Data Fields
_TempStorage &	temp_storage

int	linear_tid

int	warp_id

int	lane_id

Member Typedef Documentation

◆ WarpReduce

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>

typedef WarpReduce<T,LOGICAL_WARP_SIZE,PTX_ARCH>::InternalWarpReduce cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::WarpReduce

WarpReduce utility type.

Definition at line 80 of file block_reduce_warp_reductions.cuh.

Member Enumeration Documentation

◆ anonymous enum

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>

anonymous enum

Constants.

Enumerator
BLOCK_THREADS	The thread block size in threads.
WARP_THREADS	Number of warp threads.
WARPS	Number of active warps.
LOGICAL_WARP_SIZE	The logical warp size for warp reductions.
EVEN_WARP_MULTIPLE	Whether or not the logical warp size evenly divides the thread block size.

Definition at line 60 of file block_reduce_warp_reductions.cuh.

Constructor & Destructor Documentation

◆ BlockReduceWarpReductions()

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>

__device__ __forceinline__ cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::BlockReduceWarpReductions ( TempStorage & temp_storage )

inline

Constructor.

Definition at line 103 of file block_reduce_warp_reductions.cuh.

Member Function Documentation

◆ ApplyWarpAggregates() [1/3]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>

template<bool FULL_TILE, typename ReductionOp >

__device__ __forceinline__ T cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ApplyWarpAggregates	(	ReductionOp	reduction_op,
		T	warp_aggregate,
		int	num_valid
	)

inline

Returns block-wide aggregate in thread₀.

Parameters

[in]	reduction_op	Binary scan operator
[in]	warp_aggregate	*[lane₀ only]* Warp-wide aggregate reduction of input items
[in]	num_valid	Number of valid elements (may be less than BLOCK_THREADS)

Definition at line 143 of file block_reduce_warp_reductions.cuh.

◆ ApplyWarpAggregates() [2/3]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>

template<bool FULL_TILE, typename ReductionOp , int SUCCESSOR_WARP>

__device__ __forceinline__ T cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ApplyWarpAggregates	(	ReductionOp	reduction_op,
		T	warp_aggregate,
		int	num_valid,
		Int2Type< SUCCESSOR_WARP >
	)

inline

Parameters

[in]	reduction_op	Binary scan operator
[in]	warp_aggregate	*[lane₀ only]* Warp-wide aggregate reduction of input items
[in]	num_valid	Number of valid elements (may be less than BLOCK_THREADS)

Definition at line 114 of file block_reduce_warp_reductions.cuh.

◆ ApplyWarpAggregates() [3/3]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>

template<bool FULL_TILE, typename ReductionOp >

__device__ __forceinline__ T cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ApplyWarpAggregates	(	ReductionOp	,
		T	warp_aggregate,
		int	,
		Int2Type< WARPS >
	)

inline

Parameters

[in] warp_aggregate [lane₀ only] Warp-wide aggregate reduction of input items

Definition at line 129 of file block_reduce_warp_reductions.cuh.

◆ Reduce()

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>

template<bool FULL_TILE, typename ReductionOp >

__device__ __forceinline__ T cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::Reduce	(	T	input,
		int	num_valid,
		ReductionOp	reduction_op
	)

inline

Computes a thread block-wide reduction using the specified reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread₀.

Parameters

[in]	input	Calling thread's input partial reductions
[in]	num_valid	Number of valid elements (may be less than BLOCK_THREADS)
[in]	reduction_op	Binary reduction operator

Definition at line 193 of file block_reduce_warp_reductions.cuh.

◆ Sum()

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>

template<bool FULL_TILE>

__device__ __forceinline__ T cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::Sum	(	T	input,
		int	num_valid
	)

inline

Computes a thread block-wide reduction using addition (+) as the reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread₀.

Parameters

[in]	input	Calling thread's input partial reductions
[in]	num_valid	Number of valid elements (may be less than BLOCK_THREADS)

Definition at line 168 of file block_reduce_warp_reductions.cuh.

Field Documentation

◆ lane_id

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>

int cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::lane_id

Definition at line 99 of file block_reduce_warp_reductions.cuh.

◆ linear_tid

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>

int cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::linear_tid

Definition at line 97 of file block_reduce_warp_reductions.cuh.

◆ temp_storage

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>

_TempStorage& cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::temp_storage

Definition at line 96 of file block_reduce_warp_reductions.cuh.

◆ warp_id

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>

int cub::BlockReduceWarpReductions< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::warp_id

Definition at line 98 of file block_reduce_warp_reductions.cuh.

The documentation for this struct was generated from the following file:

openfpm_data/src/util/cuda/cub_old/block/specializations/block_reduce_warp_reductions.cuh

Detailed Description

Data Structures

Public Types

Public Member Functions

Data Fields

Member Typedef Documentation

◆ WarpReduce

Member Enumeration Documentation

◆ anonymous enum

Constructor & Destructor Documentation

◆ BlockReduceWarpReductions()

Member Function Documentation

◆ ApplyWarpAggregates() [1/3]

◆ ApplyWarpAggregates() [2/3]

◆ ApplyWarpAggregates() [3/3]

◆ Reduce()

◆ Sum()

Field Documentation

◆ lane_id

◆ linear_tid

◆ temp_storage

◆ warp_id