|
| enum | {
BLOCK_THREADS = AgentReducePolicy::BLOCK_THREADS
, ITEMS_PER_THREAD = AgentReducePolicy::ITEMS_PER_THREAD
, VECTOR_LOAD_LENGTH = CUB_MIN(ITEMS_PER_THREAD, AgentReducePolicy::VECTOR_LOAD_LENGTH)
, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD
,
ATTEMPT_VECTORIZATION
} |
| | Constants.
|
| |
|
typedef std::iterator_traits< InputIteratorT >::value_type | InputT |
| | The input value type.
|
| |
|
typedef If<(Equals< typename std::iterator_traits< OutputIteratorT >::value_type, void >::VALUE), typename std::iterator_traits< InputIteratorT >::value_type, typename std::iterator_traits< OutputIteratorT >::value_type >::Type | OutputT |
| | The output value type.
|
| |
|
typedef CubVector< InputT, AgentReducePolicy::VECTOR_LOAD_LENGTH >::Type | VectorT |
| | Vector type of InputT for data movement.
|
| |
|
typedef If< IsPointer< InputIteratorT >::VALUE, CacheModifiedInputIterator< AgentReducePolicy::LOAD_MODIFIER, InputT, OffsetT >, InputIteratorT >::Type | WrappedInputIteratorT |
| | Input iterator wrapper type (for applying cache modifier)
|
| |
|
typedef BlockReduce< OutputT, BLOCK_THREADS, AgentReducePolicy::BLOCK_ALGORITHM > | BlockReduceT |
| | Parameterized BlockReduce primitive.
|
| |
|
| __device__ __forceinline__ | AgentReduce (TempStorage &temp_storage, InputIteratorT d_in, ReductionOp reduction_op) |
| |
| template<int IS_FIRST_TILE> |
| __device__ __forceinline__ void | ConsumeTile (OutputT &thread_aggregate, OffsetT block_offset, int, Int2Type< true >, Int2Type< false >) |
| |
| template<int IS_FIRST_TILE> |
| __device__ __forceinline__ void | ConsumeTile (OutputT &thread_aggregate, OffsetT block_offset, int, Int2Type< true >, Int2Type< true >) |
| |
| template<int IS_FIRST_TILE, int CAN_VECTORIZE> |
| __device__ __forceinline__ void | ConsumeTile (OutputT &thread_aggregate, OffsetT block_offset, int valid_items, Int2Type< false >, Int2Type< CAN_VECTORIZE >) |
| |
| template<int CAN_VECTORIZE> |
| __device__ __forceinline__ OutputT | ConsumeRange (GridEvenShare< OffsetT > &even_share, Int2Type< CAN_VECTORIZE > can_vectorize) |
| | Reduce a contiguous segment of input tiles. More...
|
| |
| __device__ __forceinline__ OutputT | ConsumeRange (OffsetT block_offset, OffsetT block_end) |
| | Reduce a contiguous segment of input tiles. More...
|
| |
| __device__ __forceinline__ OutputT | ConsumeTiles (GridEvenShare< OffsetT > &even_share) |
| |
template<typename AgentReducePolicy, typename InputIteratorT, typename OutputIteratorT, typename OffsetT, typename ReductionOp>
struct cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >
AgentReduce implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduction .
Each thread reduces only the values it loads. If FIRST_TILE, this partial reduction is stored into thread_aggregate. Otherwise it is accumulated into thread_aggregate. < Binary reduction operator type having member T operator()(const T &a, const T &b)
Definition at line 99 of file agent_reduce.cuh.
template<typename AgentReducePolicy , typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOp >
template<int IS_FIRST_TILE, int CAN_VECTORIZE>
Consume a partial tile of input
- Parameters
-
| block_offset | The offset the tile to consume |
| valid_items | The number of valid items in the tile |
Definition at line 277 of file agent_reduce.cuh.