OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH > Struct Template Reference

AgentHistogram implements a stateful abstraction of CUDA thread blocks for participating in device-wide histogram . More...

Detailed Description

template<typename AgentHistogramPolicyT, int PRIVATIZED_SMEM_BINS, int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT, typename CounterT, typename PrivatizedDecodeOpT, typename OutputDecodeOpT, typename OffsetT, int PTX_ARCH = CUB_PTX_ARCH>
struct cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >

AgentHistogram implements a stateful abstraction of CUDA thread blocks for participating in device-wide histogram .

< PTX compute capability

Definition at line 111 of file agent_histogram.cuh.

Data Structures

struct  _TempStorage
 Shared memory type required by this thread block. More...
 
struct  TempStorage
 Temporary storage type (unionable) More...
 

Public Types

enum  {
  BLOCK_THREADS = AgentHistogramPolicyT::BLOCK_THREADS, PIXELS_PER_THREAD = AgentHistogramPolicyT::PIXELS_PER_THREAD, SAMPLES_PER_THREAD = PIXELS_PER_THREAD * NUM_CHANNELS, QUADS_PER_THREAD = SAMPLES_PER_THREAD / 4,
  TILE_PIXELS = PIXELS_PER_THREAD * BLOCK_THREADS, TILE_SAMPLES = SAMPLES_PER_THREAD * BLOCK_THREADS, IS_RLE_COMPRESS = AgentHistogramPolicyT::IS_RLE_COMPRESS, MEM_PREFERENCE,
  IS_WORK_STEALING = AgentHistogramPolicyT::IS_WORK_STEALING
}
 Constants.
 
typedef std::iterator_traits< SampleIteratorT >::value_type SampleT
 The sample type of the input iterator.
 
typedef CubVector< SampleT, NUM_CHANNELS >::Type PixelT
 The pixel type of SampleT.
 
typedef CubVector< SampleT, 4 >::Type QuadT
 The quad type of SampleT.
 
typedef If< IsPointer< SampleIteratorT >::VALUE, CacheModifiedInputIterator< LOAD_MODIFIER, SampleT, OffsetT >, SampleIteratorT >::Type WrappedSampleIteratorT
 Input iterator wrapper type (for applying cache modifier)
 
typedef CacheModifiedInputIterator< LOAD_MODIFIER, PixelT, OffsetTWrappedPixelIteratorT
 Pixel input iterator type (for applying cache modifier)
 
typedef CacheModifiedInputIterator< LOAD_MODIFIER, QuadT, OffsetTWrappedQuadIteratorT
 Qaud input iterator type (for applying cache modifier)
 
typedef BlockLoad< SampleT, BLOCK_THREADS, SAMPLES_PER_THREAD, AgentHistogramPolicyT::LOAD_ALGORITHM > BlockLoadSampleT
 Parameterized BlockLoad type for samples.
 
typedef BlockLoad< PixelT, BLOCK_THREADS, PIXELS_PER_THREAD, AgentHistogramPolicyT::LOAD_ALGORITHM > BlockLoadPixelT
 Parameterized BlockLoad type for pixels.
 
typedef BlockLoad< QuadT, BLOCK_THREADS, QUADS_PER_THREAD, AgentHistogramPolicyT::LOAD_ALGORITHM > BlockLoadQuadT
 Parameterized BlockLoad type for quads.
 

Public Member Functions

__device__ __forceinline__ void InitBinCounters (CounterT *privatized_histograms[NUM_ACTIVE_CHANNELS])
 
__device__ __forceinline__ void InitSmemBinCounters ()
 
__device__ __forceinline__ void InitGmemBinCounters ()
 
__device__ __forceinline__ void StoreOutput (CounterT *privatized_histograms[NUM_ACTIVE_CHANNELS])
 
__device__ __forceinline__ void StoreSmemOutput ()
 
__device__ __forceinline__ void StoreGmemOutput ()
 
__device__ __forceinline__ void AccumulatePixels (SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], bool is_valid[PIXELS_PER_THREAD], CounterT *privatized_histograms[NUM_ACTIVE_CHANNELS], Int2Type< true > is_rle_compress)
 
__device__ __forceinline__ void AccumulatePixels (SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], bool is_valid[PIXELS_PER_THREAD], CounterT *privatized_histograms[NUM_ACTIVE_CHANNELS], Int2Type< false > is_rle_compress)
 
__device__ __forceinline__ void AccumulateSmemPixels (SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], bool is_valid[PIXELS_PER_THREAD])
 
__device__ __forceinline__ void AccumulateGmemPixels (SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], bool is_valid[PIXELS_PER_THREAD])
 
template<int _NUM_ACTIVE_CHANNELS>
__device__ __forceinline__ void LoadFullAlignedTile (OffsetT block_offset, int valid_samples, SampleT(&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type< _NUM_ACTIVE_CHANNELS > num_active_channels)
 
__device__ __forceinline__ void LoadFullAlignedTile (OffsetT block_offset, int valid_samples, SampleT(&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type< 1 > num_active_channels)
 
__device__ __forceinline__ void LoadTile (OffsetT block_offset, int valid_samples, SampleT(&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type< true > is_full_tile, Int2Type< true > is_aligned)
 
__device__ __forceinline__ void LoadTile (OffsetT block_offset, int valid_samples, SampleT(&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type< true > is_full_tile, Int2Type< false > is_aligned)
 
__device__ __forceinline__ void LoadTile (OffsetT block_offset, int valid_samples, SampleT(&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type< false > is_full_tile, Int2Type< true > is_aligned)
 
__device__ __forceinline__ void LoadTile (OffsetT block_offset, int valid_samples, SampleT(&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type< false > is_full_tile, Int2Type< false > is_aligned)
 
template<bool IS_ALIGNED, bool IS_FULL_TILE>
__device__ __forceinline__ void ConsumeTile (OffsetT block_offset, int valid_samples)
 
template<bool IS_ALIGNED>
__device__ __forceinline__ void ConsumeTiles (OffsetT num_row_pixels, OffsetT num_rows, OffsetT row_stride_samples, int tiles_per_row, GridQueue< int > tile_queue, Int2Type< true > is_work_stealing)
 
template<bool IS_ALIGNED>
__device__ __forceinline__ void ConsumeTiles (OffsetT num_row_pixels, OffsetT num_rows, OffsetT row_stride_samples, int tiles_per_row, GridQueue< int > tile_queue, Int2Type< false > is_work_stealing)
 
template<CacheLoadModifier _MODIFIER, typename _ValueT , typename _OffsetT >
__device__ __forceinline__ SampleTNativePointer (CacheModifiedInputIterator< _MODIFIER, _ValueT, _OffsetT > itr)
 
template<typename IteratorT >
__device__ __forceinline__ SampleTNativePointer (IteratorT itr)
 
__device__ __forceinline__ AgentHistogram (TempStorage &temp_storage, SampleIteratorT d_samples, int(&num_output_bins)[NUM_ACTIVE_CHANNELS], int(&num_privatized_bins)[NUM_ACTIVE_CHANNELS], CounterT *(&d_output_histograms)[NUM_ACTIVE_CHANNELS], CounterT *(&d_privatized_histograms)[NUM_ACTIVE_CHANNELS], OutputDecodeOpT(&output_decode_op)[NUM_ACTIVE_CHANNELS], PrivatizedDecodeOpT(&privatized_decode_op)[NUM_ACTIVE_CHANNELS])
 
__device__ __forceinline__ void ConsumeTiles (OffsetT num_row_pixels, OffsetT num_rows, OffsetT row_stride_samples, int tiles_per_row, GridQueue< int > tile_queue)
 
__device__ __forceinline__ void InitBinCounters ()
 
__device__ __forceinline__ void StoreOutput ()
 

Data Fields

_TempStoragetemp_storage
 Reference to temp_storage.
 
WrappedSampleIteratorT d_wrapped_samples
 Sample input iterator (with cache modifier applied, if possible)
 
SampleTd_native_samples
 Native pointer for input samples (possibly NULL if unavailable)
 
int(& num_output_bins )[NUM_ACTIVE_CHANNELS]
 The number of output bins for each channel.
 
int(& num_privatized_bins )[NUM_ACTIVE_CHANNELS]
 The number of privatized bins for each channel.
 
CounterT * d_privatized_histograms [NUM_ACTIVE_CHANNELS]
 Reference to gmem privatized histograms for each channel.
 
CounterT *(& d_output_histograms )[NUM_ACTIVE_CHANNELS]
 Reference to final output histograms (gmem)
 
OutputDecodeOpT(& output_decode_op )[NUM_ACTIVE_CHANNELS]
 The transform operator for determining output bin-ids from privatized counter indices, one for each channel.
 
PrivatizedDecodeOpT(& privatized_decode_op )[NUM_ACTIVE_CHANNELS]
 The transform operator for determining privatized counter indices from samples, one for each channel.
 
bool prefer_smem
 Whether to prefer privatized smem counters vs privatized global counters.
 

Static Public Attributes

static const CacheLoadModifier LOAD_MODIFIER = AgentHistogramPolicyT::LOAD_MODIFIER
 Cache load modifier for reading input elements.
 

Constructor & Destructor Documentation

◆ AgentHistogram()

template<typename AgentHistogramPolicyT , int PRIVATIZED_SMEM_BINS, int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT , typename CounterT , typename PrivatizedDecodeOpT , typename OutputDecodeOpT , typename OffsetT , int PTX_ARCH = CUB_PTX_ARCH>
__device__ __forceinline__ cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::AgentHistogram ( TempStorage temp_storage,
SampleIteratorT  d_samples,
int(&)  num_output_bins[NUM_ACTIVE_CHANNELS],
int(&)  num_privatized_bins[NUM_ACTIVE_CHANNELS],
CounterT *(&)  d_output_histograms[NUM_ACTIVE_CHANNELS],
CounterT *(&)  d_privatized_histograms[NUM_ACTIVE_CHANNELS],
OutputDecodeOpT(&)  output_decode_op[NUM_ACTIVE_CHANNELS],
PrivatizedDecodeOpT(&)  privatized_decode_op[NUM_ACTIVE_CHANNELS] 
)
inline

Constructor

Parameters
temp_storageReference to temp_storage
d_samplesInput data to reduce
num_output_binsThe number bins per final output histogram
num_privatized_binsThe number bins per privatized histogram
d_output_histogramsReference to final output histograms
d_privatized_histogramsReference to privatized histograms
output_decode_opThe transform operator for determining output bin-ids from privatized counter indices, one for each channel
privatized_decode_opThe transform operator for determining privatized counter indices from samples, one for each channel

Definition at line 693 of file agent_histogram.cuh.

Member Function Documentation

◆ AccumulateGmemPixels()

template<typename AgentHistogramPolicyT , int PRIVATIZED_SMEM_BINS, int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT , typename CounterT , typename PrivatizedDecodeOpT , typename OutputDecodeOpT , typename OffsetT , int PTX_ARCH = CUB_PTX_ARCH>
__device__ __forceinline__ void cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::AccumulateGmemPixels ( SampleT  samples[PIXELS_PER_THREAD][NUM_CHANNELS],
bool  is_valid[PIXELS_PER_THREAD] 
)
inline

Accumulate pixel, specialized for gmem privatized histogram

Definition at line 427 of file agent_histogram.cuh.

◆ AccumulateSmemPixels()

template<typename AgentHistogramPolicyT , int PRIVATIZED_SMEM_BINS, int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT , typename CounterT , typename PrivatizedDecodeOpT , typename OutputDecodeOpT , typename OffsetT , int PTX_ARCH = CUB_PTX_ARCH>
__device__ __forceinline__ void cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::AccumulateSmemPixels ( SampleT  samples[PIXELS_PER_THREAD][NUM_CHANNELS],
bool  is_valid[PIXELS_PER_THREAD] 
)
inline

Accumulate pixel, specialized for smem privatized histogram

Definition at line 411 of file agent_histogram.cuh.

◆ ConsumeTiles() [1/3]

template<typename AgentHistogramPolicyT , int PRIVATIZED_SMEM_BINS, int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT , typename CounterT , typename PrivatizedDecodeOpT , typename OutputDecodeOpT , typename OffsetT , int PTX_ARCH = CUB_PTX_ARCH>
template<bool IS_ALIGNED>
__device__ __forceinline__ void cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::ConsumeTiles ( OffsetT  num_row_pixels,
OffsetT  num_rows,
OffsetT  row_stride_samples,
int  tiles_per_row,
GridQueue< int tile_queue,
Int2Type< true >  is_work_stealing 
)
inline
Parameters
num_row_pixelsThe number of multi-channel pixels per row in the region of interest
num_rowsThe number of rows in the region of interest
row_stride_samplesThe number of samples between starts of consecutive rows in the region of interest
tiles_per_rowNumber of image tiles per row

Definition at line 581 of file agent_histogram.cuh.

◆ ConsumeTiles() [2/3]

template<typename AgentHistogramPolicyT , int PRIVATIZED_SMEM_BINS, int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT , typename CounterT , typename PrivatizedDecodeOpT , typename OutputDecodeOpT , typename OffsetT , int PTX_ARCH = CUB_PTX_ARCH>
template<bool IS_ALIGNED>
__device__ __forceinline__ void cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::ConsumeTiles ( OffsetT  num_row_pixels,
OffsetT  num_rows,
OffsetT  row_stride_samples,
int  tiles_per_row,
GridQueue< int tile_queue,
Int2Type< false >  is_work_stealing 
)
inline
Parameters
num_row_pixelsThe number of multi-channel pixels per row in the region of interest
num_rowsThe number of rows in the region of interest
row_stride_samplesThe number of samples between starts of consecutive rows in the region of interest
tiles_per_rowNumber of image tiles per row

Definition at line 629 of file agent_histogram.cuh.

◆ ConsumeTiles() [3/3]

template<typename AgentHistogramPolicyT , int PRIVATIZED_SMEM_BINS, int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT , typename CounterT , typename PrivatizedDecodeOpT , typename OutputDecodeOpT , typename OffsetT , int PTX_ARCH = CUB_PTX_ARCH>
__device__ __forceinline__ void cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::ConsumeTiles ( OffsetT  num_row_pixels,
OffsetT  num_rows,
OffsetT  row_stride_samples,
int  tiles_per_row,
GridQueue< int tile_queue 
)
inline

Consume image

Parameters
num_row_pixelsThe number of multi-channel pixels per row in the region of interest
num_rowsThe number of rows in the region of interest
row_stride_samplesThe number of samples between starts of consecutive rows in the region of interest
tiles_per_rowNumber of image tiles per row
tile_queueQueue descriptor for assigning tiles of work to thread blocks

Definition at line 728 of file agent_histogram.cuh.

◆ InitBinCounters()

template<typename AgentHistogramPolicyT , int PRIVATIZED_SMEM_BINS, int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT , typename CounterT , typename PrivatizedDecodeOpT , typename OutputDecodeOpT , typename OffsetT , int PTX_ARCH = CUB_PTX_ARCH>
__device__ __forceinline__ void cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::InitBinCounters ( )
inline

Initialize privatized bin counters. Specialized for privatized shared-memory counters

Definition at line 759 of file agent_histogram.cuh.

◆ StoreOutput()

template<typename AgentHistogramPolicyT , int PRIVATIZED_SMEM_BINS, int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT , typename CounterT , typename PrivatizedDecodeOpT , typename OutputDecodeOpT , typename OffsetT , int PTX_ARCH = CUB_PTX_ARCH>
__device__ __forceinline__ void cub::AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT, PTX_ARCH >::StoreOutput ( )
inline

Store privatized histogram to device-accessible memory. Specialized for privatized shared-memory counters

Definition at line 771 of file agent_histogram.cuh.


The documentation for this struct was generated from the following file: