template<typename T,
int BLOCK_DIM_X,
int ITEMS_PER_THREAD,
int BINS,
BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int PTX_ARCH = CUB_PTX_ARCH>
class cub::BlockHistogram< T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >
The BlockHistogram class provides collective methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block.
- Template Parameters
-
T | The sample type being histogrammed (must be castable to an integer bin identifier) |
BLOCK_DIM_X | The thread block length in threads along the X dimension |
ITEMS_PER_THREAD | The number of items per thread |
BINS | The number bins within the histogram |
ALGORITHM | [optional] cub::BlockHistogramAlgorithm enumerator specifying the underlying algorithm to use (default: cub::BLOCK_HISTO_SORT) |
BLOCK_DIM_Y | [optional] The thread block length in threads along the Y dimension (default: 1) |
BLOCK_DIM_Z | [optional] The thread block length in threads along the Z dimension (default: 1) |
PTX_ARCH | [optional] \ptxversion |
- Overview
-
- Performance Considerations
-
- A Simple Example
- \blockcollective{BlockHistogram}
- The code snippet below illustrates a 256-bin histogram of 512 integer samples that are partitioned across 128 threads where each thread owns 4 samples.
#include <cub/cub.cuh>
__global__ void ExampleKernel(...)
{
__shared__ unsigned int smem_histogram[256];
unsigned char data[4];
...
The BlockHistogram class provides collective methods for constructing block-wide histograms from data...
__device__ __forceinline__ BlockHistogram()
Collective constructor using a private static allocation of shared memory as temporary storage.
_TempStorage & temp_storage
Shared storage reference.
\smemstorage{BlockHistogram}
- Performance and Usage Considerations
- The histogram output can be constructed in shared or device-accessible memory
- See cub::BlockHistogramAlgorithm for performance details regarding algorithmic alternatives
Definition at line 157 of file block_histogram.cuh.
|
|
__device__ __forceinline__ | BlockHistogram () |
| Collective constructor using a private static allocation of shared memory as temporary storage.
|
|
__device__ __forceinline__ | BlockHistogram (TempStorage &temp_storage) |
| Collective constructor using the specified memory allocation as temporary storage.
|
|
|
template<typename CounterT > |
__device__ __forceinline__ void | InitHistogram (CounterT histogram[BINS]) |
| Initialize the shared histogram counters to zero.
|
|
template<typename CounterT > |
__device__ __forceinline__ void | Histogram (T(&items)[ITEMS_PER_THREAD], CounterT histogram[BINS]) |
| Constructs a block-wide histogram in shared/device-accessible memory. Each thread contributes an array of input elements.
|
|
template<typename CounterT > |
__device__ __forceinline__ void | Composite (T(&items)[ITEMS_PER_THREAD], CounterT histogram[BINS]) |
| Updates an existing block-wide histogram in shared/device-accessible memory. Each thread composites an array of input elements.
|
|