AgentReduce implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduction . More...

Detailed Description

template<typename AgentReducePolicy, typename InputIteratorT, typename OutputIteratorT, typename OffsetT, typename ReductionOp>
struct cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >

AgentReduce implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduction .

Each thread reduces only the values it loads. If FIRST_TILE, this partial reduction is stored into thread_aggregate. Otherwise it is accumulated into thread_aggregate. < Binary reduction operator type having member T operator()(const T &a, const T &b)

Definition at line 99 of file agent_reduce.cuh.

Data Structures
struct	_TempStorage
	Shared memory type required by this thread block. More...

struct	TempStorage
	Alias wrapper allowing storage to be unioned. More...

Public Types
enum	{ BLOCK_THREADS = AgentReducePolicy::BLOCK_THREADS , ITEMS_PER_THREAD = AgentReducePolicy::ITEMS_PER_THREAD , VECTOR_LOAD_LENGTH = CUB_MIN(ITEMS_PER_THREAD, AgentReducePolicy::VECTOR_LOAD_LENGTH) , TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD , ATTEMPT_VECTORIZATION }
	Constants. More...

typedef std::iterator_traits< InputIteratorT >::value_type	InputT
	The input value type.

typedef If<(Equals< typenamestd::iterator_traits< OutputIteratorT >::value_type, void >::VALUE), typenamestd::iterator_traits< InputIteratorT >::value_type, typenamestd::iterator_traits< OutputIteratorT >::value_type >::Type	OutputT
	The output value type.

typedef CubVector< InputT, AgentReducePolicy::VECTOR_LOAD_LENGTH >::Type	VectorT
	Vector type of InputT for data movement.

typedef If< IsPointer< InputIteratorT >::VALUE, CacheModifiedInputIterator< AgentReducePolicy::LOAD_MODIFIER, InputT, OffsetT >, InputIteratorT >::Type	WrappedInputIteratorT
	Input iterator wrapper type (for applying cache modifier)

typedef BlockReduce< OutputT, BLOCK_THREADS, AgentReducePolicy::BLOCK_ALGORITHM >	BlockReduceT
	Parameterized BlockReduce primitive.

Public Member Functions
__device__ __forceinline__	AgentReduce (TempStorage &temp_storage, InputIteratorT d_in, ReductionOp reduction_op)

template<int IS_FIRST_TILE>
__device__ __forceinline__ void	ConsumeTile (OutputT &thread_aggregate, OffsetT block_offset, int, Int2Type< true >, Int2Type< false >)

template<int IS_FIRST_TILE>
__device__ __forceinline__ void	ConsumeTile (OutputT &thread_aggregate, OffsetT block_offset, int, Int2Type< true >, Int2Type< true >)

template<int IS_FIRST_TILE, int CAN_VECTORIZE>
__device__ __forceinline__ void	ConsumeTile (OutputT &thread_aggregate, OffsetT block_offset, int valid_items, Int2Type< false >, Int2Type< CAN_VECTORIZE >)

template<int CAN_VECTORIZE>
__device__ __forceinline__ OutputT	ConsumeRange (GridEvenShare< OffsetT > &even_share, Int2Type< CAN_VECTORIZE > can_vectorize)
	Reduce a contiguous segment of input tiles.

__device__ __forceinline__ OutputT	ConsumeRange (OffsetT block_offset, OffsetT block_end)
	Reduce a contiguous segment of input tiles.

__device__ __forceinline__ OutputT	ConsumeTiles (GridEvenShare< OffsetT > &even_share)

Static Public Member Functions
template<typename Iterator >
static __device__ __forceinline__ bool	IsAligned (Iterator d_in, Int2Type< true >)

template<typename Iterator >
static __device__ __forceinline__ bool	IsAligned (Iterator, Int2Type< false >)

Data Fields
_TempStorage &	temp_storage
	Reference to temp_storage.

InputIteratorT	d_in
	Input data to reduce.

WrappedInputIteratorT	d_wrapped_in
	Wrapped input data to reduce.

ReductionOp	reduction_op
	Binary reduction operator.

Static Public Attributes
static const CacheLoadModifier	LOAD_MODIFIER = AgentReducePolicy::LOAD_MODIFIER

static const BlockReduceAlgorithm	BLOCK_ALGORITHM = AgentReducePolicy::BLOCK_ALGORITHM

Member Typedef Documentation

◆ BlockReduceT

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

typedef BlockReduce<OutputT, BLOCK_THREADS, AgentReducePolicy::BLOCK_ALGORITHM> cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::BlockReduceT

Parameterized BlockReduce primitive.

Definition at line 142 of file agent_reduce.cuh.

◆ InputT

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

typedef std::iterator_traits<InputIteratorT>::value_type cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::InputT

The input value type.

Definition at line 107 of file agent_reduce.cuh.

◆ OutputT

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

typedef If<(Equals<typenamestd::iterator_traits<OutputIteratorT>::value_type,void>::VALUE),typenamestd::iterator_traits<InputIteratorT>::value_type,typenamestd::iterator_traits<OutputIteratorT>::value_type>::Type cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::OutputT

The output value type.

Definition at line 112 of file agent_reduce.cuh.

◆ VectorT

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

typedef CubVector<InputT,AgentReducePolicy::VECTOR_LOAD_LENGTH>::Type cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::VectorT

Vector type of InputT for data movement.

Definition at line 115 of file agent_reduce.cuh.

◆ WrappedInputIteratorT

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

typedef If<IsPointer<InputIteratorT>::VALUE,CacheModifiedInputIterator<AgentReducePolicy::LOAD_MODIFIER,InputT,OffsetT>,InputIteratorT>::Type cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::WrappedInputIteratorT

Input iterator wrapper type (for applying cache modifier)

Definition at line 121 of file agent_reduce.cuh.

Member Enumeration Documentation

◆ anonymous enum

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

anonymous enum

Constants.

Definition at line 124 of file agent_reduce.cuh.

Constructor & Destructor Documentation

◆ AgentReduce()

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

__device__ __forceinline__ cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::AgentReduce	(	TempStorage &	temp_storage,
		InputIteratorT	d_in,
		ReductionOp	reduction_op
	)

inline

Constructor

Parameters

temp_storage	Reference to temp_storage
d_in	Input data to reduce
reduction_op	Binary reduction operator

Definition at line 195 of file agent_reduce.cuh.

Member Function Documentation

◆ ConsumeRange() [1/2]

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

template<int CAN_VECTORIZE>

__device__ __forceinline__ OutputT cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::ConsumeRange	(	GridEvenShare< OffsetT > &	even_share,
		Int2Type< CAN_VECTORIZE >	can_vectorize
	)

inline

Reduce a contiguous segment of input tiles.

Parameters

even_share	GridEvenShare descriptor
can_vectorize	Whether or not we can vectorize loads

Definition at line 312 of file agent_reduce.cuh.

◆ ConsumeRange() [2/2]

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

__device__ __forceinline__ OutputT cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::ConsumeRange	(	OffsetT	block_offset,
		OffsetT	block_end
	)

inline

Reduce a contiguous segment of input tiles.

Parameters

[in]	block_offset	Threadblock begin offset (inclusive)
[in]	block_end	Threadblock end offset (exclusive)

Definition at line 352 of file agent_reduce.cuh.

◆ ConsumeTile() [1/3]

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

template<int IS_FIRST_TILE, int CAN_VECTORIZE>

__device__ __forceinline__ void cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::ConsumeTile	(	OutputT &	thread_aggregate,
		OffsetT	block_offset,
		int	valid_items,
		Int2Type< false >	,
		Int2Type< CAN_VECTORIZE >
	)

inline

Consume a partial tile of input

Parameters

block_offset	The offset the tile to consume
valid_items	The number of valid items in the tile

Definition at line 277 of file agent_reduce.cuh.

◆ ConsumeTile() [2/3]

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

template<int IS_FIRST_TILE>

__device__ __forceinline__ void cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::ConsumeTile	(	OutputT &	thread_aggregate,
		OffsetT	block_offset,
		int	,
		Int2Type< true >	,
		Int2Type< false >
	)

inline

Consume a full tile of input (non-vectorized)

Parameters

block_offset The offset the tile to consume

Definition at line 215 of file agent_reduce.cuh.

◆ ConsumeTile() [3/3]

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

template<int IS_FIRST_TILE>

__device__ __forceinline__ void cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::ConsumeTile	(	OutputT &	thread_aggregate,
		OffsetT	block_offset,
		int	,
		Int2Type< true >	,
		Int2Type< true >
	)

inline

Consume a full tile of input (vectorized)

Parameters

block_offset The offset the tile to consume

Definition at line 238 of file agent_reduce.cuh.

◆ ConsumeTiles()

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

__device__ __forceinline__ OutputT cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::ConsumeTiles ( GridEvenShare< OffsetT > & even_share )

inline

Reduce a contiguous segment of input tiles

Parameters

[in] even_share GridEvenShare descriptor

Definition at line 368 of file agent_reduce.cuh.

◆ IsAligned() [1/2]

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

template<typename Iterator >

static __device__ __forceinline__ bool cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::IsAligned	(	Iterator	d_in,
		Int2Type< true >
	)

inlinestatic

Definition at line 171 of file agent_reduce.cuh.

◆ IsAligned() [2/2]

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

template<typename Iterator >

static __device__ __forceinline__ bool cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::IsAligned	(	Iterator	,
		Int2Type< false >
	)

inlinestatic

Definition at line 180 of file agent_reduce.cuh.

Field Documentation

◆ BLOCK_ALGORITHM

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

const BlockReduceAlgorithm cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::BLOCK_ALGORITHM = AgentReducePolicy::BLOCK_ALGORITHM

static

Definition at line 139 of file agent_reduce.cuh.

◆ d_in

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

InputIteratorT cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::d_in

Input data to reduce.

Definition at line 159 of file agent_reduce.cuh.

◆ d_wrapped_in

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

WrappedInputIteratorT cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::d_wrapped_in

Wrapped input data to reduce.

Definition at line 160 of file agent_reduce.cuh.

◆ LOAD_MODIFIER

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

const CacheLoadModifier cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::LOAD_MODIFIER = AgentReducePolicy::LOAD_MODIFIER

static

Definition at line 138 of file agent_reduce.cuh.

◆ reduction_op

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

ReductionOp cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::reduction_op

Binary reduction operator.

Definition at line 161 of file agent_reduce.cuh.

◆ temp_storage

template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >

_TempStorage& cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::temp_storage

Reference to temp_storage.

Definition at line 158 of file agent_reduce.cuh.

The documentation for this struct was generated from the following file:

openfpm_data/src/util/cuda/cub_old/agent/agent_reduce.cuh

Detailed Description

Data Structures

Public Types

Public Member Functions

Static Public Member Functions

Data Fields

Static Public Attributes

Member Typedef Documentation

◆ BlockReduceT

◆ InputT

◆ OutputT

◆ VectorT

◆ WrappedInputIteratorT

Member Enumeration Documentation

◆ anonymous enum

Constructor & Destructor Documentation

◆ AgentReduce()

Member Function Documentation

◆ ConsumeRange() [1/2]

◆ ConsumeRange() [2/2]

◆ ConsumeTile() [1/3]

◆ ConsumeTile() [2/3]

◆ ConsumeTile() [3/3]

◆ ConsumeTiles()

◆ IsAligned() [1/2]

◆ IsAligned() [2/2]

Field Documentation

◆ BLOCK_ALGORITHM

◆ d_in

◆ d_wrapped_in

◆ LOAD_MODIFIER

◆ reduction_op

◆ temp_storage