AgentReduce implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduction . More...
AgentReduce implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduction .
Each thread reduces only the values it loads. If FIRST_TILE, this partial reduction is stored into thread_aggregate. Otherwise it is accumulated into thread_aggregate. < Binary reduction operator type having member T operator()(const T &a, const T &b) 
Definition at line 99 of file agent_reduce.cuh.
| Data Structures | |
| struct | _TempStorage | 
| Shared memory type required by this thread block.  More... | |
| struct | TempStorage | 
| Alias wrapper allowing storage to be unioned.  More... | |
| Public Types | |
| enum | { BLOCK_THREADS = AgentReducePolicy::BLOCK_THREADS , ITEMS_PER_THREAD = AgentReducePolicy::ITEMS_PER_THREAD , VECTOR_LOAD_LENGTH = CUB_MIN(ITEMS_PER_THREAD, AgentReducePolicy::VECTOR_LOAD_LENGTH) , TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD , ATTEMPT_VECTORIZATION } | 
| Constants.  More... | |
| typedef std::iterator_traits< InputIteratorT >::value_type | InputT | 
| The input value type. | |
| typedef If<(Equals< typenamestd::iterator_traits< OutputIteratorT >::value_type, void >::VALUE), typenamestd::iterator_traits< InputIteratorT >::value_type, typenamestd::iterator_traits< OutputIteratorT >::value_type >::Type | OutputT | 
| The output value type. | |
| typedef CubVector< InputT, AgentReducePolicy::VECTOR_LOAD_LENGTH >::Type | VectorT | 
| Vector type of InputT for data movement. | |
| typedef If< IsPointer< InputIteratorT >::VALUE, CacheModifiedInputIterator< AgentReducePolicy::LOAD_MODIFIER, InputT, OffsetT >, InputIteratorT >::Type | WrappedInputIteratorT | 
| Input iterator wrapper type (for applying cache modifier) | |
| typedef BlockReduce< OutputT, BLOCK_THREADS, AgentReducePolicy::BLOCK_ALGORITHM > | BlockReduceT | 
| Parameterized BlockReduce primitive. | |
| Public Member Functions | |
| __device__ __forceinline__ | AgentReduce (TempStorage &temp_storage, InputIteratorT d_in, ReductionOp reduction_op) | 
| template<int IS_FIRST_TILE> | |
| __device__ __forceinline__ void | ConsumeTile (OutputT &thread_aggregate, OffsetT block_offset, int, Int2Type< true >, Int2Type< false >) | 
| template<int IS_FIRST_TILE> | |
| __device__ __forceinline__ void | ConsumeTile (OutputT &thread_aggregate, OffsetT block_offset, int, Int2Type< true >, Int2Type< true >) | 
| template<int IS_FIRST_TILE, int CAN_VECTORIZE> | |
| __device__ __forceinline__ void | ConsumeTile (OutputT &thread_aggregate, OffsetT block_offset, int valid_items, Int2Type< false >, Int2Type< CAN_VECTORIZE >) | 
| template<int CAN_VECTORIZE> | |
| __device__ __forceinline__ OutputT | ConsumeRange (GridEvenShare< OffsetT > &even_share, Int2Type< CAN_VECTORIZE > can_vectorize) | 
| Reduce a contiguous segment of input tiles. | |
| __device__ __forceinline__ OutputT | ConsumeRange (OffsetT block_offset, OffsetT block_end) | 
| Reduce a contiguous segment of input tiles. | |
| __device__ __forceinline__ OutputT | ConsumeTiles (GridEvenShare< OffsetT > &even_share) | 
| Static Public Member Functions | |
| template<typename Iterator > | |
| static __device__ __forceinline__ bool | IsAligned (Iterator d_in, Int2Type< true >) | 
| template<typename Iterator > | |
| static __device__ __forceinline__ bool | IsAligned (Iterator, Int2Type< false >) | 
| Data Fields | |
| _TempStorage & | temp_storage | 
| Reference to temp_storage. | |
| InputIteratorT | d_in | 
| Input data to reduce. | |
| WrappedInputIteratorT | d_wrapped_in | 
| Wrapped input data to reduce. | |
| ReductionOp | reduction_op | 
| Binary reduction operator. | |
| Static Public Attributes | |
| static const CacheLoadModifier | LOAD_MODIFIER = AgentReducePolicy::LOAD_MODIFIER | 
| static const BlockReduceAlgorithm | BLOCK_ALGORITHM = AgentReducePolicy::BLOCK_ALGORITHM | 
| typedef BlockReduce<OutputT, BLOCK_THREADS, AgentReducePolicy::BLOCK_ALGORITHM> cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::BlockReduceT | 
Parameterized BlockReduce primitive.
Definition at line 142 of file agent_reduce.cuh.
| typedef std::iterator_traits<InputIteratorT>::value_type cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::InputT | 
The input value type.
Definition at line 107 of file agent_reduce.cuh.
| typedef If<(Equals<typenamestd::iterator_traits<OutputIteratorT>::value_type,void>::VALUE),typenamestd::iterator_traits<InputIteratorT>::value_type,typenamestd::iterator_traits<OutputIteratorT>::value_type>::Type cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::OutputT | 
The output value type.
Definition at line 112 of file agent_reduce.cuh.
| typedef CubVector<InputT,AgentReducePolicy::VECTOR_LOAD_LENGTH>::Type cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::VectorT | 
Vector type of InputT for data movement.
Definition at line 115 of file agent_reduce.cuh.
| typedef If<IsPointer<InputIteratorT>::VALUE,CacheModifiedInputIterator<AgentReducePolicy::LOAD_MODIFIER,InputT,OffsetT>,InputIteratorT>::Type cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::WrappedInputIteratorT | 
Input iterator wrapper type (for applying cache modifier)
Definition at line 121 of file agent_reduce.cuh.
| anonymous enum | 
Constants.
Definition at line 124 of file agent_reduce.cuh.
| 
 | inline | 
Constructor
| temp_storage | Reference to temp_storage | 
| d_in | Input data to reduce | 
| reduction_op | Binary reduction operator | 
Definition at line 195 of file agent_reduce.cuh.
| 
 | inline | 
Reduce a contiguous segment of input tiles.
| even_share | GridEvenShare descriptor | 
| can_vectorize | Whether or not we can vectorize loads | 
Definition at line 312 of file agent_reduce.cuh.
| 
 | inline | 
Reduce a contiguous segment of input tiles.
| [in] | block_offset | Threadblock begin offset (inclusive) | 
| [in] | block_end | Threadblock end offset (exclusive) | 
Definition at line 352 of file agent_reduce.cuh.
| 
 | inline | 
Consume a partial tile of input
| block_offset | The offset the tile to consume | 
| valid_items | The number of valid items in the tile | 
Definition at line 277 of file agent_reduce.cuh.
| 
 | inline | 
Consume a full tile of input (non-vectorized)
| block_offset | The offset the tile to consume | 
Definition at line 215 of file agent_reduce.cuh.
| 
 | inline | 
Consume a full tile of input (vectorized)
| block_offset | The offset the tile to consume | 
Definition at line 238 of file agent_reduce.cuh.
| 
 | inline | 
Reduce a contiguous segment of input tiles
| [in] | even_share | GridEvenShare descriptor | 
Definition at line 368 of file agent_reduce.cuh.
| 
 | inlinestatic | 
Definition at line 171 of file agent_reduce.cuh.
| 
 | inlinestatic | 
Definition at line 180 of file agent_reduce.cuh.
| 
 | static | 
Definition at line 139 of file agent_reduce.cuh.
| InputIteratorT cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::d_in | 
Input data to reduce.
Definition at line 159 of file agent_reduce.cuh.
| WrappedInputIteratorT cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::d_wrapped_in | 
Wrapped input data to reduce.
Definition at line 160 of file agent_reduce.cuh.
| 
 | static | 
Definition at line 138 of file agent_reduce.cuh.
| ReductionOp cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::reduction_op | 
Binary reduction operator.
Definition at line 161 of file agent_reduce.cuh.
| _TempStorage& cub::AgentReduce< AgentReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOp >::temp_storage | 
Reference to temp_storage.
Definition at line 158 of file agent_reduce.cuh.