OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > Class Template Reference

The BlockHistogram class provides collective methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. More...

Detailed Description

template<typename T, int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
class cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >

The BlockHistogram class provides collective methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block.

Template Parameters
TThe sample type being histogrammed (must be castable to an integer bin identifier)
BLOCK_DIM_XThe thread block length in threads along the X dimension
ITEMS_PER_THREADThe number of items per thread
BINSThe number bins within the histogram
ALGORITHM[optional] cub::BlockHistogramAlgorithm enumerator specifying the underlying algorithm to use (default: cub::BLOCK_HISTO_SORT)
BLOCK_DIM_Y[optional] The thread block length in threads along the Y dimension (default: 1)
BLOCK_DIM_Z[optional] The thread block length in threads along the Z dimension (default: 1)
PTX_ARCH[optional] \ptxversion
Overview
Performance Considerations
  • \granularity
A Simple Example
\blockcollective{BlockHistogram}
The code snippet below illustrates a 256-bin histogram of 512 integer samples that are partitioned across 128 threads where each thread owns 4 samples.
#include <cub/cub.cuh> // or equivalently <cub/block/block_histogram.cuh>
__global__ void ExampleKernel(...)
{
// Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each
// Allocate shared memory for BlockHistogram
// Allocate shared memory for block-wide histogram bin counts
__shared__ unsigned int smem_histogram[256];
// Obtain input samples per thread
unsigned char data[4];
...
// Compute the block-wide histogram
BlockHistogram(temp_storage).Histogram(data, smem_histogram);
The BlockHistogram class provides collective methods for constructing block-wide histograms from data...
__device__ __forceinline__ BlockHistogram()
Collective constructor using a private static allocation of shared memory as temporary storage.
_TempStorage & temp_storage
Shared storage reference.
\smemstorage{BlockHistogram}
Performance and Usage Considerations
  • The histogram output can be constructed in shared or device-accessible memory
  • See cub::BlockHistogramAlgorithm for performance details regarding algorithmic alternatives

Definition at line 157 of file block_histogram.cuh.

Data Structures

struct  TempStorage
 \smemstorage{BlockHistogram} More...
 

Public Member Functions

Collective constructors
__device__ __forceinline__ BlockHistogram ()
 Collective constructor using a private static allocation of shared memory as temporary storage.
 
__device__ __forceinline__ BlockHistogram (TempStorage &temp_storage)
 Collective constructor using the specified memory allocation as temporary storage.
 
Histogram operations
template<typename CounterT >
__device__ __forceinline__ void InitHistogram (CounterT histogram[BINS])
 Initialize the shared histogram counters to zero.
 
template<typename CounterT >
__device__ __forceinline__ void Histogram (T(&items)[ITEMS_PER_THREAD], CounterT histogram[BINS])
 Constructs a block-wide histogram in shared/device-accessible memory. Each thread contributes an array of input elements.
 
template<typename CounterT >
__device__ __forceinline__ void Composite (T(&items)[ITEMS_PER_THREAD], CounterT histogram[BINS])
 Updates an existing block-wide histogram in shared/device-accessible memory. Each thread composites an array of input elements.
 

Private Types

enum  { BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z }
 Constants. More...
 
typedef If<(SAFE_ALGORITHM==BLOCK_HISTO_SORT), BlockHistogramSort< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >, BlockHistogramAtomic< BINS > >::Type InternalBlockHistogram
 Internal specialization.
 
typedef InternalBlockHistogram::TempStorage _TempStorage
 Shared memory storage layout type for BlockHistogram.
 

Private Member Functions

__device__ __forceinline__ _TempStoragePrivateStorage ()
 Internal storage allocator.
 

Private Attributes

_TempStoragetemp_storage
 Shared storage reference.
 
unsigned int linear_tid
 Linear thread-id.
 

Static Private Attributes

static const BlockHistogramAlgorithm SAFE_ALGORITHM
 

Member Typedef Documentation

◆ _TempStorage

template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
typedef InternalBlockHistogram::TempStorage cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::_TempStorage
private

Shared memory storage layout type for BlockHistogram.

Definition at line 189 of file block_histogram.cuh.

◆ InternalBlockHistogram

template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
typedef If<(SAFE_ALGORITHM==BLOCK_HISTO_SORT),BlockHistogramSort<T,BLOCK_DIM_X,ITEMS_PER_THREAD,BINS,BLOCK_DIM_Y,BLOCK_DIM_Z,PTX_ARCH>,BlockHistogramAtomic<BINS>>::Type cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::InternalBlockHistogram
private

Internal specialization.

Definition at line 186 of file block_histogram.cuh.

Member Enumeration Documentation

◆ anonymous enum

template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
anonymous enum
private

Constants.

Enumerator
BLOCK_THREADS 

The thread block size in threads.

Definition at line 166 of file block_histogram.cuh.

Constructor & Destructor Documentation

◆ BlockHistogram() [1/2]

template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
__device__ __forceinline__ cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::BlockHistogram ( )
inline

Collective constructor using a private static allocation of shared memory as temporary storage.

Definition at line 229 of file block_histogram.cuh.

◆ BlockHistogram() [2/2]

template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
__device__ __forceinline__ cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::BlockHistogram ( TempStorage temp_storage)
inline

Collective constructor using the specified memory allocation as temporary storage.

Parameters
[in]temp_storageReference to memory allocation having layout type TempStorage

Definition at line 239 of file block_histogram.cuh.

Member Function Documentation

◆ Composite()

template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
template<typename CounterT >
__device__ __forceinline__ void cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::Composite ( T(&)  items[ITEMS_PER_THREAD],
CounterT  histogram[BINS] 
)
inline

Updates an existing block-wide histogram in shared/device-accessible memory. Each thread composites an array of input elements.

  • \granularity
  • \smemreuse
Snippet
The code snippet below illustrates a the initialization and update of a histogram of 512 integer samples that are partitioned across 128 threads where each thread owns 4 samples.
#include <cub/cub.cuh> // or equivalently <cub/block/block_histogram.cuh>
__global__ void ExampleKernel(...)
{
// Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each
// Allocate shared memory for BlockHistogram
// Allocate shared memory for block-wide histogram bin counts
__shared__ unsigned int smem_histogram[256];
// Obtain input samples per thread
unsigned char thread_samples[4];
...
// Initialize the block-wide histogram
BlockHistogram(temp_storage).InitHistogram(smem_histogram);
// Update the block-wide histogram
BlockHistogram(temp_storage).Composite(thread_samples, smem_histogram);
Template Parameters
CounterT[inferred] Histogram counter type
Parameters
[in]itemsCalling thread's input values to histogram
[out]histogramReference to shared/device-accessible memory histogram

Definition at line 404 of file block_histogram.cuh.

◆ Histogram()

template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
template<typename CounterT >
__device__ __forceinline__ void cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::Histogram ( T(&)  items[ITEMS_PER_THREAD],
CounterT  histogram[BINS] 
)
inline

Constructs a block-wide histogram in shared/device-accessible memory. Each thread contributes an array of input elements.

  • \granularity
  • \smemreuse
Snippet
The code snippet below illustrates a 256-bin histogram of 512 integer samples that are partitioned across 128 threads where each thread owns 4 samples.
#include <cub/cub.cuh> // or equivalently <cub/block/block_histogram.cuh>
__global__ void ExampleKernel(...)
{
// Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each
// Allocate shared memory for BlockHistogram
// Allocate shared memory for block-wide histogram bin counts
__shared__ unsigned int smem_histogram[256];
// Obtain input samples per thread
unsigned char thread_samples[4];
...
// Compute the block-wide histogram
BlockHistogram(temp_storage).Histogram(thread_samples, smem_histogram);
Template Parameters
CounterT[inferred] Histogram counter type
Parameters
[in]itemsCalling thread's input values to histogram
[out]histogramReference to shared/device-accessible memory histogram

Definition at line 347 of file block_histogram.cuh.

◆ InitHistogram()

template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
template<typename CounterT >
__device__ __forceinline__ void cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::InitHistogram ( CounterT  histogram[BINS])
inline

Initialize the shared histogram counters to zero.

Snippet
The code snippet below illustrates a the initialization and update of a histogram of 512 integer samples that are partitioned across 128 threads where each thread owns 4 samples.
#include <cub/cub.cuh> // or equivalently <cub/block/block_histogram.cuh>
__global__ void ExampleKernel(...)
{
// Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each
// Allocate shared memory for BlockHistogram
// Allocate shared memory for block-wide histogram bin counts
__shared__ unsigned int smem_histogram[256];
// Obtain input samples per thread
unsigned char thread_samples[4];
...
// Initialize the block-wide histogram
BlockHistogram(temp_storage).InitHistogram(smem_histogram);
// Update the block-wide histogram
BlockHistogram(temp_storage).Composite(thread_samples, smem_histogram);
Template Parameters
CounterT[inferred] Histogram counter type

Definition at line 291 of file block_histogram.cuh.

◆ PrivateStorage()

template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
__device__ __forceinline__ _TempStorage & cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::PrivateStorage ( )
inlineprivate

Internal storage allocator.

Definition at line 208 of file block_histogram.cuh.

Field Documentation

◆ linear_tid

template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
unsigned int cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::linear_tid
private

Linear thread-id.

Definition at line 200 of file block_histogram.cuh.

◆ SAFE_ALGORITHM

template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
const BlockHistogramAlgorithm cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::SAFE_ALGORITHM
staticprivate
Initial value:
=
((ALGORITHM == BLOCK_HISTO_ATOMIC) && (PTX_ARCH < 120)) ?
ALGORITHM
@ BLOCK_HISTO_ATOMIC
@ BLOCK_HISTO_SORT

Ensure the template parameterization meets the requirements of the targeted device architecture. BLOCK_HISTO_ATOMIC can only be used on version SM120 or later. Otherwise BLOCK_HISTO_SORT is used regardless.

Definition at line 178 of file block_histogram.cuh.

◆ temp_storage

template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
_TempStorage& cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::temp_storage
private

Shared storage reference.

Definition at line 197 of file block_histogram.cuh.


The documentation for this class was generated from the following file: