The BlockHistogram class provides collective methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block.
More...
template<typename T, int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
class cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >
The BlockHistogram class provides collective methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block.
- Template Parameters
-
T | The sample type being histogrammed (must be castable to an integer bin identifier) |
BLOCK_DIM_X | The thread block length in threads along the X dimension |
ITEMS_PER_THREAD | The number of items per thread |
BINS | The number bins within the histogram |
ALGORITHM | [optional] cub::BlockHistogramAlgorithm enumerator specifying the underlying algorithm to use (default: cub::BLOCK_HISTO_SORT) |
BLOCK_DIM_Y | [optional] The thread block length in threads along the Y dimension (default: 1) |
BLOCK_DIM_Z | [optional] The thread block length in threads along the Z dimension (default: 1) |
PTX_ARCH | [optional] \ptxversion |
- Overview
-
- Performance Considerations
-
- A Simple Example
- \blockcollective{BlockHistogram}
- The code snippet below illustrates a 256-bin histogram of 512 integer samples that are partitioned across 128 threads where each thread owns 4 samples.
#include <cub/cub.cuh>
__global__ void ExampleKernel(...)
{
__shared__
typename BlockHistogram::TempStorage
temp_storage;
__shared__ unsigned int smem_histogram[256];
unsigned char data[4];
...
- Performance and Usage Considerations
- The histogram output can be constructed in shared or device-accessible memory
- See cub::BlockHistogramAlgorithm for performance details regarding algorithmic alternatives
Definition at line 157 of file block_histogram.cuh.
|
|
__device__ __forceinline__ | BlockHistogram () |
| Collective constructor using a private static allocation of shared memory as temporary storage.
|
|
__device__ __forceinline__ | BlockHistogram (TempStorage &temp_storage) |
| Collective constructor using the specified memory allocation as temporary storage. More...
|
|
|
template<typename CounterT > |
__device__ __forceinline__ void | InitHistogram (CounterT histogram[BINS]) |
| Initialize the shared histogram counters to zero. More...
|
|
template<typename CounterT > |
__device__ __forceinline__ void | Histogram (T(&items)[ITEMS_PER_THREAD], CounterT histogram[BINS]) |
| Constructs a block-wide histogram in shared/device-accessible memory. Each thread contributes an array of input elements. More...
|
|
template<typename CounterT > |
__device__ __forceinline__ void | Composite (T(&items)[ITEMS_PER_THREAD], CounterT histogram[BINS]) |
| Updates an existing block-wide histogram in shared/device-accessible memory. Each thread composites an array of input elements. More...
|
|
◆ anonymous enum
template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
Constants.
Enumerator |
---|
BLOCK_THREADS | The thread block size in threads.
|
Definition at line 166 of file block_histogram.cuh.
◆ BlockHistogram()
template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
Collective constructor using the specified memory allocation as temporary storage.
- Parameters
-
[in] | temp_storage | Reference to memory allocation having layout type TempStorage |
Definition at line 239 of file block_histogram.cuh.
◆ Composite()
template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
template<typename CounterT >
__device__ __forceinline__ void cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::Composite |
( |
T(&) |
items[ITEMS_PER_THREAD], |
|
|
CounterT |
histogram[BINS] |
|
) |
| |
|
inline |
Updates an existing block-wide histogram in shared/device-accessible memory. Each thread composites an array of input elements.
-
- Snippet
- The code snippet below illustrates a the initialization and update of a histogram of 512 integer samples that are partitioned across 128 threads where each thread owns 4 samples.
#include <cub/cub.cuh>
__global__ void ExampleKernel(...)
{
__shared__
typename BlockHistogram::TempStorage
temp_storage;
__shared__ unsigned int smem_histogram[256];
unsigned char thread_samples[4];
...
- Template Parameters
-
CounterT | [inferred] Histogram counter type |
- Parameters
-
[in] | items | Calling thread's input values to histogram |
[out] | histogram | Reference to shared/device-accessible memory histogram |
Definition at line 404 of file block_histogram.cuh.
◆ Histogram()
template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
template<typename CounterT >
__device__ __forceinline__ void cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::Histogram |
( |
T(&) |
items[ITEMS_PER_THREAD], |
|
|
CounterT |
histogram[BINS] |
|
) |
| |
|
inline |
Constructs a block-wide histogram in shared/device-accessible memory. Each thread contributes an array of input elements.
-
- Snippet
- The code snippet below illustrates a 256-bin histogram of 512 integer samples that are partitioned across 128 threads where each thread owns 4 samples.
#include <cub/cub.cuh>
__global__ void ExampleKernel(...)
{
__shared__
typename BlockHistogram::TempStorage
temp_storage;
__shared__ unsigned int smem_histogram[256];
unsigned char thread_samples[4];
...
- Template Parameters
-
CounterT | [inferred] Histogram counter type |
- Parameters
-
[in] | items | Calling thread's input values to histogram |
[out] | histogram | Reference to shared/device-accessible memory histogram |
Definition at line 347 of file block_histogram.cuh.
◆ InitHistogram()
template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
template<typename CounterT >
__device__ __forceinline__ void cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::InitHistogram |
( |
CounterT |
histogram[BINS] | ) |
|
|
inline |
Initialize the shared histogram counters to zero.
- Snippet
- The code snippet below illustrates a the initialization and update of a histogram of 512 integer samples that are partitioned across 128 threads where each thread owns 4 samples.
#include <cub/cub.cuh>
__global__ void ExampleKernel(...)
{
__shared__
typename BlockHistogram::TempStorage
temp_storage;
__shared__ unsigned int smem_histogram[256];
unsigned char thread_samples[4];
...
- Template Parameters
-
CounterT | [inferred] Histogram counter type |
Definition at line 291 of file block_histogram.cuh.
◆ SAFE_ALGORITHM
template<typename T , int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
Initial value:Ensure the template parameterization meets the requirements of the targeted device architecture. BLOCK_HISTO_ATOMIC can only be used on version SM120 or later. Otherwise BLOCK_HISTO_SORT is used regardless.
Definition at line 178 of file block_histogram.cuh.
The documentation for this class was generated from the following file: