AgentHistogram implements a stateful abstraction of CUDA thread blocks for participating in device-wide histogram . More...
AgentHistogram implements a stateful abstraction of CUDA thread blocks for participating in device-wide histogram .
< PTX compute capability
Definition at line 111 of file agent_histogram.cuh.
Data Structures | |
struct | _TempStorage |
Shared memory type required by this thread block. More... | |
struct | TempStorage |
Temporary storage type (unionable) More... | |
Public Types | |
enum | { BLOCK_THREADS = AgentHistogramPolicyT::BLOCK_THREADS , PIXELS_PER_THREAD = AgentHistogramPolicyT::PIXELS_PER_THREAD , SAMPLES_PER_THREAD = PIXELS_PER_THREAD * NUM_CHANNELS , QUADS_PER_THREAD = SAMPLES_PER_THREAD / 4 , TILE_PIXELS = PIXELS_PER_THREAD * BLOCK_THREADS , TILE_SAMPLES = SAMPLES_PER_THREAD * BLOCK_THREADS , IS_RLE_COMPRESS = AgentHistogramPolicyT::IS_RLE_COMPRESS , MEM_PREFERENCE , IS_WORK_STEALING = AgentHistogramPolicyT::IS_WORK_STEALING } |
Constants. More... | |
typedef std::iterator_traits< SampleIteratorT >::value_type | SampleT |
The sample type of the input iterator. | |
typedef CubVector< SampleT, NUM_CHANNELS >::Type | PixelT |
The pixel type of SampleT. | |
typedef CubVector< SampleT, 4 >::Type | QuadT |
The quad type of SampleT. | |
typedef If< IsPointer< SampleIteratorT >::VALUE, CacheModifiedInputIterator< LOAD_MODIFIER, SampleT, OffsetT >, SampleIteratorT >::Type | WrappedSampleIteratorT |
Input iterator wrapper type (for applying cache modifier) | |
typedef CacheModifiedInputIterator< LOAD_MODIFIER, PixelT, OffsetT > | WrappedPixelIteratorT |
Pixel input iterator type (for applying cache modifier) | |
typedef CacheModifiedInputIterator< LOAD_MODIFIER, QuadT, OffsetT > | WrappedQuadIteratorT |
Qaud input iterator type (for applying cache modifier) | |
typedef BlockLoad< SampleT, BLOCK_THREADS, SAMPLES_PER_THREAD, AgentHistogramPolicyT::LOAD_ALGORITHM > | BlockLoadSampleT |
Parameterized BlockLoad type for samples. | |
typedef BlockLoad< PixelT, BLOCK_THREADS, PIXELS_PER_THREAD, AgentHistogramPolicyT::LOAD_ALGORITHM > | BlockLoadPixelT |
Parameterized BlockLoad type for pixels. | |
typedef BlockLoad< QuadT, BLOCK_THREADS, QUADS_PER_THREAD, AgentHistogramPolicyT::LOAD_ALGORITHM > | BlockLoadQuadT |
Parameterized BlockLoad type for quads. | |
Public Member Functions | |
__device__ __forceinline__ void | InitBinCounters (CounterT *privatized_histograms[NUM_ACTIVE_CHANNELS]) |
__device__ __forceinline__ void | InitSmemBinCounters () |
__device__ __forceinline__ void | InitGmemBinCounters () |
__device__ __forceinline__ void | StoreOutput (CounterT *privatized_histograms[NUM_ACTIVE_CHANNELS]) |
__device__ __forceinline__ void | StoreSmemOutput () |
__device__ __forceinline__ void | StoreGmemOutput () |
__device__ __forceinline__ void | AccumulatePixels (SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], bool is_valid[PIXELS_PER_THREAD], CounterT *privatized_histograms[NUM_ACTIVE_CHANNELS], Int2Type< true > is_rle_compress) |
__device__ __forceinline__ void | AccumulatePixels (SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], bool is_valid[PIXELS_PER_THREAD], CounterT *privatized_histograms[NUM_ACTIVE_CHANNELS], Int2Type< false > is_rle_compress) |
__device__ __forceinline__ void | AccumulateSmemPixels (SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], bool is_valid[PIXELS_PER_THREAD]) |
__device__ __forceinline__ void | AccumulateGmemPixels (SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], bool is_valid[PIXELS_PER_THREAD]) |
template<int _NUM_ACTIVE_CHANNELS> | |
__device__ __forceinline__ void | LoadFullAlignedTile (OffsetT block_offset, int valid_samples, SampleT(&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type< _NUM_ACTIVE_CHANNELS > num_active_channels) |
__device__ __forceinline__ void | LoadFullAlignedTile (OffsetT block_offset, int valid_samples, SampleT(&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type< 1 > num_active_channels) |
__device__ __forceinline__ void | LoadTile (OffsetT block_offset, int valid_samples, SampleT(&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type< true > is_full_tile, Int2Type< true > is_aligned) |
__device__ __forceinline__ void | LoadTile (OffsetT block_offset, int valid_samples, SampleT(&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type< true > is_full_tile, Int2Type< false > is_aligned) |
__device__ __forceinline__ void | LoadTile (OffsetT block_offset, int valid_samples, SampleT(&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type< false > is_full_tile, Int2Type< true > is_aligned) |
__device__ __forceinline__ void | LoadTile (OffsetT block_offset, int valid_samples, SampleT(&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type< false > is_full_tile, Int2Type< false > is_aligned) |
template<bool IS_ALIGNED, bool IS_FULL_TILE> | |
__device__ __forceinline__ void | ConsumeTile (OffsetT block_offset, int valid_samples) |
template<bool IS_ALIGNED> | |
__device__ __forceinline__ void | ConsumeTiles (OffsetT num_row_pixels, OffsetT num_rows, OffsetT row_stride_samples, int tiles_per_row, GridQueue< int > tile_queue, Int2Type< true > is_work_stealing) |
template<bool IS_ALIGNED> | |
__device__ __forceinline__ void | ConsumeTiles (OffsetT num_row_pixels, OffsetT num_rows, OffsetT row_stride_samples, int tiles_per_row, GridQueue< int > tile_queue, Int2Type< false > is_work_stealing) |
template<CacheLoadModifier _MODIFIER, typename _ValueT , typename _OffsetT > | |
__device__ __forceinline__ SampleT * | NativePointer (CacheModifiedInputIterator< _MODIFIER, _ValueT, _OffsetT > itr) |
template<typename IteratorT > | |
__device__ __forceinline__ SampleT * | NativePointer (IteratorT itr) |
__device__ __forceinline__ | AgentHistogram (TempStorage &temp_storage, SampleIteratorT d_samples, int(&num_output_bins)[NUM_ACTIVE_CHANNELS], int(&num_privatized_bins)[NUM_ACTIVE_CHANNELS], CounterT *(&d_output_histograms)[NUM_ACTIVE_CHANNELS], CounterT *(&d_privatized_histograms)[NUM_ACTIVE_CHANNELS], OutputDecodeOpT(&output_decode_op)[NUM_ACTIVE_CHANNELS], PrivatizedDecodeOpT(&privatized_decode_op)[NUM_ACTIVE_CHANNELS]) |
__device__ __forceinline__ void | ConsumeTiles (OffsetT num_row_pixels, OffsetT num_rows, OffsetT row_stride_samples, int tiles_per_row, GridQueue< int > tile_queue) |
__device__ __forceinline__ void | InitBinCounters () |
__device__ __forceinline__ void | StoreOutput () |
Data Fields | |
_TempStorage & | temp_storage |
Reference to temp_storage. | |
WrappedSampleIteratorT | d_wrapped_samples |
Sample input iterator (with cache modifier applied, if possible) | |
SampleT * | d_native_samples |
Native pointer for input samples (possibly NULL if unavailable) | |
int(& | num_output_bins )[NUM_ACTIVE_CHANNELS] |
The number of output bins for each channel. | |
int(& | num_privatized_bins )[NUM_ACTIVE_CHANNELS] |
The number of privatized bins for each channel. | |
CounterT * | d_privatized_histograms [NUM_ACTIVE_CHANNELS] |
Reference to gmem privatized histograms for each channel. | |
CounterT *(& | d_output_histograms )[NUM_ACTIVE_CHANNELS] |
Reference to final output histograms (gmem) | |
OutputDecodeOpT(& | output_decode_op )[NUM_ACTIVE_CHANNELS] |
The transform operator for determining output bin-ids from privatized counter indices, one for each channel. | |
PrivatizedDecodeOpT(& | privatized_decode_op )[NUM_ACTIVE_CHANNELS] |
The transform operator for determining privatized counter indices from samples, one for each channel. | |
bool | prefer_smem |
Whether to prefer privatized smem counters vs privatized global counters. | |
Static Public Attributes | |
static const CacheLoadModifier | LOAD_MODIFIER = AgentHistogramPolicyT::LOAD_MODIFIER |
Cache load modifier for reading input elements. | |
typedef BlockLoad< PixelT, BLOCK_THREADS, PIXELS_PER_THREAD, AgentHistogramPolicyT::LOAD_ALGORITHM> cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::BlockLoadPixelT |
Parameterized BlockLoad type for pixels.
Definition at line 179 of file agent_histogram.cuh.
typedef BlockLoad< QuadT, BLOCK_THREADS, QUADS_PER_THREAD, AgentHistogramPolicyT::LOAD_ALGORITHM> cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::BlockLoadQuadT |
Parameterized BlockLoad type for quads.
Definition at line 187 of file agent_histogram.cuh.
typedef BlockLoad< SampleT, BLOCK_THREADS, SAMPLES_PER_THREAD, AgentHistogramPolicyT::LOAD_ALGORITHM> cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::BlockLoadSampleT |
Parameterized BlockLoad type for samples.
Definition at line 171 of file agent_histogram.cuh.
typedef CubVector<SampleT,NUM_CHANNELS>::Type cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::PixelT |
The pixel type of SampleT.
Definition at line 121 of file agent_histogram.cuh.
typedef CubVector<SampleT,4>::Type cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::QuadT |
The quad type of SampleT.
Definition at line 124 of file agent_histogram.cuh.
typedef std::iterator_traits<SampleIteratorT>::value_type cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::SampleT |
The sample type of the input iterator.
Definition at line 118 of file agent_histogram.cuh.
typedef CacheModifiedInputIterator<LOAD_MODIFIER, PixelT, OffsetT> cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::WrappedPixelIteratorT |
Pixel input iterator type (for applying cache modifier)
Definition at line 159 of file agent_histogram.cuh.
typedef CacheModifiedInputIterator<LOAD_MODIFIER, QuadT, OffsetT> cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::WrappedQuadIteratorT |
Qaud input iterator type (for applying cache modifier)
Definition at line 163 of file agent_histogram.cuh.
typedef If<IsPointer<SampleIteratorT>::VALUE,CacheModifiedInputIterator<LOAD_MODIFIER,SampleT,OffsetT>,SampleIteratorT>::Type cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::WrappedSampleIteratorT |
Input iterator wrapper type (for applying cache modifier)
Definition at line 155 of file agent_histogram.cuh.
anonymous enum |
Constants.
Definition at line 127 of file agent_histogram.cuh.
|
inline |
Constructor
temp_storage | Reference to temp_storage |
d_samples | Input data to reduce |
num_output_bins | The number bins per final output histogram |
num_privatized_bins | The number bins per privatized histogram |
d_output_histograms | Reference to final output histograms |
d_privatized_histograms | Reference to privatized histograms |
output_decode_op | The transform operator for determining output bin-ids from privatized counter indices, one for each channel |
privatized_decode_op | The transform operator for determining privatized counter indices from samples, one for each channel |
Definition at line 693 of file agent_histogram.cuh.
|
inline |
Accumulate pixel, specialized for gmem privatized histogram
Definition at line 427 of file agent_histogram.cuh.
|
inline |
Definition at line 387 of file agent_histogram.cuh.
|
inline |
Definition at line 345 of file agent_histogram.cuh.
|
inline |
Accumulate pixel, specialized for smem privatized histogram
Definition at line 411 of file agent_histogram.cuh.
|
inline |
Definition at line 548 of file agent_histogram.cuh.
|
inline |
Consume image
num_row_pixels | The number of multi-channel pixels per row in the region of interest |
num_rows | The number of rows in the region of interest |
row_stride_samples | The number of samples between starts of consecutive rows in the region of interest |
tiles_per_row | Number of image tiles per row |
tile_queue | Queue descriptor for assigning tiles of work to thread blocks |
Definition at line 728 of file agent_histogram.cuh.
|
inline |
num_row_pixels | The number of multi-channel pixels per row in the region of interest |
num_rows | The number of rows in the region of interest |
row_stride_samples | The number of samples between starts of consecutive rows in the region of interest |
tiles_per_row | Number of image tiles per row |
Definition at line 629 of file agent_histogram.cuh.
|
inline |
num_row_pixels | The number of multi-channel pixels per row in the region of interest |
num_rows | The number of rows in the region of interest |
row_stride_samples | The number of samples between starts of consecutive rows in the region of interest |
tiles_per_row | Number of image tiles per row |
Definition at line 581 of file agent_histogram.cuh.
|
inline |
Initialize privatized bin counters. Specialized for privatized shared-memory counters
Definition at line 759 of file agent_histogram.cuh.
|
inline |
Definition at line 251 of file agent_histogram.cuh.
|
inline |
Definition at line 281 of file agent_histogram.cuh.
|
inline |
Definition at line 269 of file agent_histogram.cuh.
|
inline |
Definition at line 459 of file agent_histogram.cuh.
|
inline |
Definition at line 442 of file agent_histogram.cuh.
|
inline |
Definition at line 524 of file agent_histogram.cuh.
|
inline |
Definition at line 503 of file agent_histogram.cuh.
|
inline |
Definition at line 487 of file agent_histogram.cuh.
|
inline |
Definition at line 476 of file agent_histogram.cuh.
|
inline |
Definition at line 671 of file agent_histogram.cuh.
|
inline |
Definition at line 678 of file agent_histogram.cuh.
|
inline |
Definition at line 334 of file agent_histogram.cuh.
|
inline |
Store privatized histogram to device-accessible memory. Specialized for privatized shared-memory counters
Definition at line 771 of file agent_histogram.cuh.
|
inline |
Definition at line 292 of file agent_histogram.cuh.
|
inline |
Definition at line 323 of file agent_histogram.cuh.
SampleT* cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::d_native_samples |
Native pointer for input samples (possibly NULL if unavailable)
Definition at line 222 of file agent_histogram.cuh.
CounterT*(& cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::d_output_histograms)[NUM_ACTIVE_CHANNELS] |
Reference to final output histograms (gmem)
Definition at line 234 of file agent_histogram.cuh.
CounterT* cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::d_privatized_histograms[NUM_ACTIVE_CHANNELS] |
Reference to gmem privatized histograms for each channel.
Definition at line 231 of file agent_histogram.cuh.
WrappedSampleIteratorT cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::d_wrapped_samples |
Sample input iterator (with cache modifier applied, if possible)
Definition at line 219 of file agent_histogram.cuh.
|
static |
Cache load modifier for reading input elements.
Definition at line 148 of file agent_histogram.cuh.
int(& cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::num_output_bins)[NUM_ACTIVE_CHANNELS] |
The number of output bins for each channel.
Definition at line 225 of file agent_histogram.cuh.
int(& cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::num_privatized_bins)[NUM_ACTIVE_CHANNELS] |
The number of privatized bins for each channel.
Definition at line 228 of file agent_histogram.cuh.
OutputDecodeOpT(& cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::output_decode_op)[NUM_ACTIVE_CHANNELS] |
The transform operator for determining output bin-ids from privatized counter indices, one for each channel.
Definition at line 237 of file agent_histogram.cuh.
bool cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::prefer_smem |
Whether to prefer privatized smem counters vs privatized global counters.
Definition at line 243 of file agent_histogram.cuh.
PrivatizedDecodeOpT(& cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::privatized_decode_op)[NUM_ACTIVE_CHANNELS] |
The transform operator for determining privatized counter indices from samples, one for each channel.
Definition at line 240 of file agent_histogram.cuh.
_TempStorage& cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::temp_storage |
Reference to temp_storage.
Definition at line 216 of file agent_histogram.cuh.