OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
cub::DeviceHistogram Struct Reference

DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within device-accessible memory. More...

Detailed Description

DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within device-accessible memory.

Overview
A histogram counts the number of observations that fall into each of the disjoint categories (known as bins).
Usage Considerations
\cdp_class{DeviceHistogram}

Definition at line 63 of file device_histogram.cuh.

Static Public Member Functions

Evenly-segmented bin ranges
template<typename SampleIteratorT , typename CounterT , typename LevelT , typename OffsetT >
static CUB_RUNTIME_FUNCTION cudaError_t HistogramEven (void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram, int num_levels, LevelT lower_level, LevelT upper_level, OffsetT num_samples, cudaStream_t stream=0, bool debug_synchronous=false)
 Computes an intensity histogram from a sequence of data samples using equal-width bins.
 
template<typename SampleIteratorT , typename CounterT , typename LevelT , typename OffsetT >
static CUB_RUNTIME_FUNCTION cudaError_t HistogramEven (void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram, int num_levels, LevelT lower_level, LevelT upper_level, OffsetT num_row_samples, OffsetT num_rows, size_t row_stride_bytes, cudaStream_t stream=0, bool debug_synchronous=false)
 Computes an intensity histogram from a sequence of data samples using equal-width bins.
 
template<int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT , typename CounterT , typename LevelT , typename OffsetT >
static CUB_RUNTIME_FUNCTION cudaError_t MultiHistogramEven (void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT lower_level[NUM_ACTIVE_CHANNELS], LevelT upper_level[NUM_ACTIVE_CHANNELS], OffsetT num_pixels, cudaStream_t stream=0, bool debug_synchronous=false)
 Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using equal-width bins.
 
template<int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT , typename CounterT , typename LevelT , typename OffsetT >
static CUB_RUNTIME_FUNCTION cudaError_t MultiHistogramEven (void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT lower_level[NUM_ACTIVE_CHANNELS], LevelT upper_level[NUM_ACTIVE_CHANNELS], OffsetT num_row_pixels, OffsetT num_rows, size_t row_stride_bytes, cudaStream_t stream=0, bool debug_synchronous=false)
 Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using equal-width bins.
 
Custom bin ranges
template<typename SampleIteratorT , typename CounterT , typename LevelT , typename OffsetT >
static CUB_RUNTIME_FUNCTION cudaError_t HistogramRange (void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram, int num_levels, LevelT *d_levels, OffsetT num_samples, cudaStream_t stream=0, bool debug_synchronous=false)
 Computes an intensity histogram from a sequence of data samples using the specified bin boundary levels.
 
template<typename SampleIteratorT , typename CounterT , typename LevelT , typename OffsetT >
static CUB_RUNTIME_FUNCTION cudaError_t HistogramRange (void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram, int num_levels, LevelT *d_levels, OffsetT num_row_samples, OffsetT num_rows, size_t row_stride_bytes, cudaStream_t stream=0, bool debug_synchronous=false)
 Computes an intensity histogram from a sequence of data samples using the specified bin boundary levels.
 
template<int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT , typename CounterT , typename LevelT , typename OffsetT >
static CUB_RUNTIME_FUNCTION cudaError_t MultiHistogramRange (void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT *d_levels[NUM_ACTIVE_CHANNELS], OffsetT num_pixels, cudaStream_t stream=0, bool debug_synchronous=false)
 Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using the specified bin boundary levels.
 
template<int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT , typename CounterT , typename LevelT , typename OffsetT >
static CUB_RUNTIME_FUNCTION cudaError_t MultiHistogramRange (void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT *d_levels[NUM_ACTIVE_CHANNELS], OffsetT num_row_pixels, OffsetT num_rows, size_t row_stride_bytes, cudaStream_t stream=0, bool debug_synchronous=false)
 Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using the specified bin boundary levels.
 

Member Function Documentation

◆ HistogramEven() [1/2]

template<typename SampleIteratorT , typename CounterT , typename LevelT , typename OffsetT >
static CUB_RUNTIME_FUNCTION cudaError_t cub::DeviceHistogram::HistogramEven ( void *  d_temp_storage,
size_t &  temp_storage_bytes,
SampleIteratorT  d_samples,
CounterT *  d_histogram,
int  num_levels,
LevelT  lower_level,
LevelT  upper_level,
OffsetT  num_row_samples,
OffsetT  num_rows,
size_t  row_stride_bytes,
cudaStream_t  stream = 0,
bool  debug_synchronous = false 
)
inlinestatic

Computes an intensity histogram from a sequence of data samples using equal-width bins.

  • A two-dimensional region of interest within d_samples can be specified using the num_row_samples, num_rows, and row_stride_bytes parameters.
  • The row stride must be a whole multiple of the sample data type size, i.e., (row_stride_bytes % sizeof(SampleT)) == 0.
  • The number of histogram bins is (num_levels - 1)
  • All bins comprise the same width of sample values: (upper_level - lower_level) / (num_levels - 1)
  • \devicestorage
Snippet
The code snippet below illustrates the computation of a six-bin histogram from a 2x5 region of interest within a flattened 2x7 array of float samples.
#include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
// Declare, allocate, and initialize device-accessible pointers for input samples and
// output histogram
int num_row_samples; // e.g., 5
int num_rows; // e.g., 2;
size_t row_stride_bytes; // e.g., 7 * sizeof(float)
float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, -, -,
// 0.3, 2.9, 2.0, 6.1, 999.5, -, -]
int* d_histogram; // e.g., [ -, -, -, -, -, -, -, -]
int num_levels; // e.g., 7 (seven level boundaries for six bins)
float lower_level; // e.g., 0.0 (lower sample value boundary of lowest bin)
float upper_level; // e.g., 12.0 (upper sample value boundary of upper bin)
...
// Determine temporary device storage requirements
void* d_temp_storage = NULL;
size_t temp_storage_bytes = 0;
cub::DeviceHistogram::HistogramEven(d_temp_storage, temp_storage_bytes,
d_samples, d_histogram, num_levels, lower_level, upper_level,
num_row_samples, num_rows, row_stride_bytes);
// Allocate temporary storage
cudaMalloc(&d_temp_storage, temp_storage_bytes);
// Compute histograms
cub::DeviceHistogram::HistogramEven(d_temp_storage, temp_storage_bytes, d_samples, d_histogram,
d_samples, d_histogram, num_levels, lower_level, upper_level,
num_row_samples, num_rows, row_stride_bytes);
// d_histogram <-- [1, 0, 5, 0, 3, 0, 0, 0];
ArrayWrapper< int, NUM_ACTIVE_CHANNELS > ArrayWrapper< int, NUM_ACTIVE_CHANNELS > ArrayWrapper< CounterT *, NUM_ACTIVE_CHANNELS > ArrayWrapper< CounterT *, NUM_ACTIVE_CHANNELS > ArrayWrapper< OutputDecodeOpT, NUM_ACTIVE_CHANNELS > ArrayWrapper< PrivatizedDecodeOpT, NUM_ACTIVE_CHANNELS > OffsetT OffsetT num_rows
The number of rows in the region of interest.
static CUB_RUNTIME_FUNCTION cudaError_t HistogramEven(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram, int num_levels, LevelT lower_level, LevelT upper_level, OffsetT num_samples, cudaStream_t stream=0, bool debug_synchronous=false)
Computes an intensity histogram from a sequence of data samples using equal-width bins.
Template Parameters
SampleIteratorT[inferred] Random-access input iterator type for reading input samples. \iterator
CounterT[inferred] Integer type for histogram bin counters
LevelT[inferred] Type for specifying boundaries (levels)
OffsetT[inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1
Parameters
[in]d_temp_storageDevice-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done.
[in,out]temp_storage_bytesReference to size in bytes of d_temp_storage allocation
[in]d_samplesThe pointer to the input sequence of data samples.
[out]d_histogramThe pointer to the histogram counter output array of length num_levels - 1.
[in]num_levelsThe number of boundaries (levels) for delineating histogram samples. Implies that the number of bins is num_levels - 1.
[in]lower_levelThe lower sample value bound (inclusive) for the lowest histogram bin.
[in]upper_levelThe upper sample value bound (exclusive) for the highest histogram bin.
[in]num_row_samplesThe number of data samples per row in the region of interest
[in]num_rowsThe number of rows in the region of interest
[in]row_stride_bytesThe number of bytes between starts of consecutive rows in the region of interest
[in]stream[optional] CUDA stream to launch kernels within. Default is stream0.
[in]debug_synchronous[optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is false.

Definition at line 223 of file device_histogram.cuh.

◆ HistogramEven() [2/2]

template<typename SampleIteratorT , typename CounterT , typename LevelT , typename OffsetT >
static CUB_RUNTIME_FUNCTION cudaError_t cub::DeviceHistogram::HistogramEven ( void *  d_temp_storage,
size_t &  temp_storage_bytes,
SampleIteratorT  d_samples,
CounterT *  d_histogram,
int  num_levels,
LevelT  lower_level,
LevelT  upper_level,
OffsetT  num_samples,
cudaStream_t  stream = 0,
bool  debug_synchronous = false 
)
inlinestatic

Computes an intensity histogram from a sequence of data samples using equal-width bins.

  • The number of histogram bins is (num_levels - 1)
  • All bins comprise the same width of sample values: (upper_level - lower_level) / (num_levels - 1)
  • \devicestorage
Snippet
The code snippet below illustrates the computation of a six-bin histogram from a sequence of float samples
#include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
// Declare, allocate, and initialize device-accessible pointers for input samples and
// output histogram
int num_samples; // e.g., 10
float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, 0.3, 2.9, 2.0, 6.1, 999.5]
int* d_histogram; // e.g., [ -, -, -, -, -, -, -, -]
int num_levels; // e.g., 7 (seven level boundaries for six bins)
float lower_level; // e.g., 0.0 (lower sample value boundary of lowest bin)
float upper_level; // e.g., 12.0 (upper sample value boundary of upper bin)
...
// Determine temporary device storage requirements
void* d_temp_storage = NULL;
size_t temp_storage_bytes = 0;
cub::DeviceHistogram::HistogramEven(d_temp_storage, temp_storage_bytes,
d_samples, d_histogram, num_levels, lower_level, upper_level, num_samples);
// Allocate temporary storage
cudaMalloc(&d_temp_storage, temp_storage_bytes);
// Compute histograms
cub::DeviceHistogram::HistogramEven(d_temp_storage, temp_storage_bytes,
d_samples, d_histogram, num_levels, lower_level, upper_level, num_samples);
// d_histogram <-- [1, 0, 5, 0, 3, 0, 0, 0];
Template Parameters
SampleIteratorT[inferred] Random-access input iterator type for reading input samples. \iterator
CounterT[inferred] Integer type for histogram bin counters
LevelT[inferred] Type for specifying boundaries (levels)
OffsetT[inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1

The sample value type of the input iterator

Parameters
[in]d_temp_storageDevice-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done.
[in,out]temp_storage_bytesReference to size in bytes of d_temp_storage allocation
[in]d_samplesThe pointer to the input sequence of data samples.
[out]d_histogramThe pointer to the histogram counter output array of length num_levels - 1.
[in]num_levelsThe number of boundaries (levels) for delineating histogram samples. Implies that the number of bins is num_levels - 1.
[in]lower_levelThe lower sample value bound (inclusive) for the lowest histogram bin.
[in]upper_levelThe upper sample value bound (exclusive) for the highest histogram bin.
[in]num_samplesThe number of input samples (i.e., the length of d_samples)
[in]stream[optional] CUDA stream to launch kernels within. Default is stream0.
[in]debug_synchronous[optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is false.

Definition at line 124 of file device_histogram.cuh.

◆ HistogramRange() [1/2]

template<typename SampleIteratorT , typename CounterT , typename LevelT , typename OffsetT >
static CUB_RUNTIME_FUNCTION cudaError_t cub::DeviceHistogram::HistogramRange ( void *  d_temp_storage,
size_t &  temp_storage_bytes,
SampleIteratorT  d_samples,
CounterT *  d_histogram,
int  num_levels,
LevelT *  d_levels,
OffsetT  num_row_samples,
OffsetT  num_rows,
size_t  row_stride_bytes,
cudaStream_t  stream = 0,
bool  debug_synchronous = false 
)
inlinestatic

Computes an intensity histogram from a sequence of data samples using the specified bin boundary levels.

  • A two-dimensional region of interest within d_samples can be specified using the num_row_samples, num_rows, and row_stride_bytes parameters.
  • The row stride must be a whole multiple of the sample data type size, i.e., (row_stride_bytes % sizeof(SampleT)) == 0.
  • The number of histogram bins is (num_levels - 1)
  • The value range for bini is [level[i], level[i+1])
  • \devicestorage
Snippet
The code snippet below illustrates the computation of a six-bin histogram from a 2x5 region of interest within a flattened 2x7 array of float samples.
#include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
// Declare, allocate, and initialize device-accessible pointers for input samples and
// output histogram
int num_row_samples; // e.g., 5
int num_rows; // e.g., 2;
int row_stride_bytes; // e.g., 7 * sizeof(float)
float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, -, -,
// 0.3, 2.9, 2.0, 6.1, 999.5, -, -]
int* d_histogram; // e.g., [ , , , , , , , ]
int num_levels // e.g., 7 (seven level boundaries for six bins)
float *d_levels; // e.g., [0.0, 2.0, 4.0, 6.0, 8.0, 12.0, 16.0]
...
// Determine temporary device storage requirements
void* d_temp_storage = NULL;
size_t temp_storage_bytes = 0;
cub::DeviceHistogram::HistogramRange(d_temp_storage, temp_storage_bytes,
d_samples, d_histogram, num_levels, d_levels,
num_row_samples, num_rows, row_stride_bytes);
// Allocate temporary storage
cudaMalloc(&d_temp_storage, temp_storage_bytes);
// Compute histograms
cub::DeviceHistogram::HistogramRange(d_temp_storage, temp_storage_bytes,
d_samples, d_histogram, num_levels, d_levels,
num_row_samples, num_rows, row_stride_bytes);
// d_histogram <-- [1, 0, 5, 0, 3, 0, 0, 0];
static CUB_RUNTIME_FUNCTION cudaError_t HistogramRange(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram, int num_levels, LevelT *d_levels, OffsetT num_samples, cudaStream_t stream=0, bool debug_synchronous=false)
Computes an intensity histogram from a sequence of data samples using the specified bin boundary leve...
Template Parameters
SampleIteratorT[inferred] Random-access input iterator type for reading input samples. \iterator
CounterT[inferred] Integer type for histogram bin counters
LevelT[inferred] Type for specifying boundaries (levels)
OffsetT[inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1
Parameters
[in]d_temp_storageDevice-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done.
[in,out]temp_storage_bytesReference to size in bytes of d_temp_storage allocation
[in]d_samplesThe pointer to the input sequence of data samples.
[out]d_histogramThe pointer to the histogram counter output array of length num_levels - 1.
[in]num_levelsThe number of boundaries (levels) for delineating histogram samples. Implies that the number of bins is num_levels - 1.
[in]d_levelsThe pointer to the array of boundaries (levels). Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive.
[in]num_row_samplesThe number of data samples per row in the region of interest
[in]num_rowsThe number of rows in the region of interest
[in]row_stride_bytesThe number of bytes between starts of consecutive rows in the region of interest
[in]stream[optional] CUDA stream to launch kernels within. Default is stream0.
[in]debug_synchronous[optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is false.

Definition at line 623 of file device_histogram.cuh.

◆ HistogramRange() [2/2]

template<typename SampleIteratorT , typename CounterT , typename LevelT , typename OffsetT >
static CUB_RUNTIME_FUNCTION cudaError_t cub::DeviceHistogram::HistogramRange ( void *  d_temp_storage,
size_t &  temp_storage_bytes,
SampleIteratorT  d_samples,
CounterT *  d_histogram,
int  num_levels,
LevelT *  d_levels,
OffsetT  num_samples,
cudaStream_t  stream = 0,
bool  debug_synchronous = false 
)
inlinestatic

Computes an intensity histogram from a sequence of data samples using the specified bin boundary levels.

  • The number of histogram bins is (num_levels - 1)
  • The value range for bini is [level[i], level[i+1])
  • \devicestorage
Snippet
The code snippet below illustrates the computation of an six-bin histogram from a sequence of float samples
#include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
// Declare, allocate, and initialize device-accessible pointers for input samples and
// output histogram
int num_samples; // e.g., 10
float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, 0.3, 2.9, 2.0, 6.1, 999.5]
int* d_histogram; // e.g., [ -, -, -, -, -, -, -, -]
int num_levels // e.g., 7 (seven level boundaries for six bins)
float* d_levels; // e.g., [0.0, 2.0, 4.0, 6.0, 8.0, 12.0, 16.0]
...
// Determine temporary device storage requirements
void* d_temp_storage = NULL;
size_t temp_storage_bytes = 0;
cub::DeviceHistogram::HistogramRange(d_temp_storage, temp_storage_bytes,
d_samples, d_histogram, num_levels, d_levels, num_samples);
// Allocate temporary storage
cudaMalloc(&d_temp_storage, temp_storage_bytes);
// Compute histograms
cub::DeviceHistogram::HistogramRange(d_temp_storage, temp_storage_bytes,
d_samples, d_histogram, num_levels, d_levels, num_samples);
// d_histogram <-- [1, 0, 5, 0, 3, 0, 0, 0];
Template Parameters
SampleIteratorT[inferred] Random-access input iterator type for reading input samples. \iterator
CounterT[inferred] Integer type for histogram bin counters
LevelT[inferred] Type for specifying boundaries (levels)
OffsetT[inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1

The sample value type of the input iterator

Parameters
[in]d_temp_storageDevice-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done.
[in,out]temp_storage_bytesReference to size in bytes of d_temp_storage allocation
[in]d_samplesThe pointer to the input sequence of data samples.
[out]d_histogramThe pointer to the histogram counter output array of length num_levels - 1.
[in]num_levelsThe number of boundaries (levels) for delineating histogram samples. Implies that the number of bins is num_levels - 1.
[in]d_levelsThe pointer to the array of boundaries (levels). Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive.
[in]num_samplesThe number of data samples per row in the region of interest
[in]stream[optional] CUDA stream to launch kernels within. Default is stream0.
[in]debug_synchronous[optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is false.

Definition at line 528 of file device_histogram.cuh.

◆ MultiHistogramEven() [1/2]

template<int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT , typename CounterT , typename LevelT , typename OffsetT >
static CUB_RUNTIME_FUNCTION cudaError_t cub::DeviceHistogram::MultiHistogramEven ( void *  d_temp_storage,
size_t &  temp_storage_bytes,
SampleIteratorT  d_samples,
CounterT *  d_histogram[NUM_ACTIVE_CHANNELS],
int  num_levels[NUM_ACTIVE_CHANNELS],
LevelT  lower_level[NUM_ACTIVE_CHANNELS],
LevelT  upper_level[NUM_ACTIVE_CHANNELS],
OffsetT  num_pixels,
cudaStream_t  stream = 0,
bool  debug_synchronous = false 
)
inlinestatic

Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using equal-width bins.

  • The input is a sequence of pixel structures, where each pixel comprises a record of NUM_CHANNELS consecutive data samples (e.g., an RGBA pixel).
  • Of the NUM_CHANNELS specified, the function will only compute histograms for the first NUM_ACTIVE_CHANNELS (e.g., only RGB histograms from RGBA pixel samples).
  • The number of histogram bins for channeli is num_levels[i] - 1.
  • For channeli, the range of values for all histogram bins have the same width: (upper_level[i] - lower_level[i]) / ( num_levels[i] - 1)
  • \devicestorage
Snippet
The code snippet below illustrates the computation of three 256-bin RGB histograms from a quad-channel sequence of RGBA pixels (8 bits per channel per pixel)
#include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
// Declare, allocate, and initialize device-accessible pointers for input samples
// and output histograms
int num_pixels; // e.g., 5
unsigned char* d_samples; // e.g., [(2, 6, 7, 5), (3, 0, 2, 1), (7, 0, 6, 2),
// (0, 6, 7, 5), (3, 0, 2, 6)]
int* d_histogram[3]; // e.g., three device pointers to three device buffers,
// each allocated with 256 integer counters
int num_levels[3]; // e.g., {257, 257, 257};
unsigned int lower_level[3]; // e.g., {0, 0, 0};
unsigned int upper_level[3]; // e.g., {256, 256, 256};
...
// Determine temporary device storage requirements
void* d_temp_storage = NULL;
size_t temp_storage_bytes = 0;
cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage, temp_storage_bytes,
d_samples, d_histogram, num_levels, lower_level, upper_level, num_pixels);
// Allocate temporary storage
cudaMalloc(&d_temp_storage, temp_storage_bytes);
// Compute histograms
cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage, temp_storage_bytes,
d_samples, d_histogram, num_levels, lower_level, upper_level, num_pixels);
// d_histogram <-- [ [1, 0, 1, 2, 0, 0, 0, 1, 0, 0, 0, ..., 0],
// [0, 3, 0, 0, 0, 0, 2, 0, 0, 0, 0, ..., 0],
// [0, 0, 2, 0, 0, 0, 1, 2, 0, 0, 0, ..., 0] ]
Template Parameters
NUM_CHANNELSNumber of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed)
NUM_ACTIVE_CHANNELS[inferred] Number of channels actively being histogrammed
SampleIteratorT[inferred] Random-access input iterator type for reading input samples. \iterator
CounterT[inferred] Integer type for histogram bin counters
LevelT[inferred] Type for specifying boundaries (levels)
OffsetT[inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1

The sample value type of the input iterator

Parameters
[in]d_temp_storageDevice-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done.
[in,out]temp_storage_bytesReference to size in bytes of d_temp_storage allocation
[in]d_samplesThe pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples).
[out]d_histogramThe pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histogram[i] should be num_levels[i] - 1.
[in]num_levelsThe number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1.
[in]lower_levelThe lower sample value bound (inclusive) for the lowest histogram bin in each active channel.
[in]upper_levelThe upper sample value bound (exclusive) for the highest histogram bin in each active channel.
[in]num_pixelsThe number of multi-channel pixels (i.e., the length of d_samples / NUM_CHANNELS)
[in]stream[optional] CUDA stream to launch kernels within. Default is stream0.
[in]debug_synchronous[optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is false.

Definition at line 325 of file device_histogram.cuh.

◆ MultiHistogramEven() [2/2]

template<int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT , typename CounterT , typename LevelT , typename OffsetT >
static CUB_RUNTIME_FUNCTION cudaError_t cub::DeviceHistogram::MultiHistogramEven ( void *  d_temp_storage,
size_t &  temp_storage_bytes,
SampleIteratorT  d_samples,
CounterT *  d_histogram[NUM_ACTIVE_CHANNELS],
int  num_levels[NUM_ACTIVE_CHANNELS],
LevelT  lower_level[NUM_ACTIVE_CHANNELS],
LevelT  upper_level[NUM_ACTIVE_CHANNELS],
OffsetT  num_row_pixels,
OffsetT  num_rows,
size_t  row_stride_bytes,
cudaStream_t  stream = 0,
bool  debug_synchronous = false 
)
inlinestatic

Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using equal-width bins.

  • The input is a sequence of pixel structures, where each pixel comprises a record of NUM_CHANNELS consecutive data samples (e.g., an RGBA pixel).
  • Of the NUM_CHANNELS specified, the function will only compute histograms for the first NUM_ACTIVE_CHANNELS (e.g., only RGB histograms from RGBA pixel samples).
  • A two-dimensional region of interest within d_samples can be specified using the num_row_samples, num_rows, and row_stride_bytes parameters.
  • The row stride must be a whole multiple of the sample data type size, i.e., (row_stride_bytes % sizeof(SampleT)) == 0.
  • The number of histogram bins for channeli is num_levels[i] - 1.
  • For channeli, the range of values for all histogram bins have the same width: (upper_level[i] - lower_level[i]) / ( num_levels[i] - 1)
  • \devicestorage
Snippet
The code snippet below illustrates the computation of three 256-bin RGB histograms from a 2x3 region of interest of within a flattened 2x4 array of quad-channel RGBA pixels (8 bits per channel per pixel).
#include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
// Declare, allocate, and initialize device-accessible pointers for input samples
// and output histograms
int num_row_pixels; // e.g., 3
int num_rows; // e.g., 2
size_t row_stride_bytes; // e.g., 4 * sizeof(unsigned char) * NUM_CHANNELS
unsigned char* d_samples; // e.g., [(2, 6, 7, 5), (3, 0, 2, 1), (7, 0, 6, 2), (-, -, -, -),
// (0, 6, 7, 5), (3, 0, 2, 6), (1, 1, 1, 1), (-, -, -, -)]
int* d_histogram[3]; // e.g., three device pointers to three device buffers,
// each allocated with 256 integer counters
int num_levels[3]; // e.g., {257, 257, 257};
unsigned int lower_level[3]; // e.g., {0, 0, 0};
unsigned int upper_level[3]; // e.g., {256, 256, 256};
...
// Determine temporary device storage requirements
void* d_temp_storage = NULL;
size_t temp_storage_bytes = 0;
cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage, temp_storage_bytes,
d_samples, d_histogram, num_levels, lower_level, upper_level,
num_row_pixels, num_rows, row_stride_bytes);
// Allocate temporary storage
cudaMalloc(&d_temp_storage, temp_storage_bytes);
// Compute histograms
cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage, temp_storage_bytes,
d_samples, d_histogram, num_levels, lower_level, upper_level,
num_row_pixels, num_rows, row_stride_bytes);
// d_histogram <-- [ [1, 1, 1, 2, 0, 0, 0, 1, 0, 0, 0, ..., 0],
// [0, 4, 0, 0, 0, 0, 2, 0, 0, 0, 0, ..., 0],
// [0, 1, 2, 0, 0, 0, 1, 2, 0, 0, 0, ..., 0] ]
ArrayWrapper< int, NUM_ACTIVE_CHANNELS > ArrayWrapper< int, NUM_ACTIVE_CHANNELS > ArrayWrapper< CounterT *, NUM_ACTIVE_CHANNELS > ArrayWrapper< CounterT *, NUM_ACTIVE_CHANNELS > ArrayWrapper< OutputDecodeOpT, NUM_ACTIVE_CHANNELS > ArrayWrapper< PrivatizedDecodeOpT, NUM_ACTIVE_CHANNELS > OffsetT num_row_pixels
The number of multi-channel pixels per row in the region of interest.
Template Parameters
NUM_CHANNELSNumber of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed)
NUM_ACTIVE_CHANNELS[inferred] Number of channels actively being histogrammed
SampleIteratorT[inferred] Random-access input iterator type for reading input samples. \iterator
CounterT[inferred] Integer type for histogram bin counters
LevelT[inferred] Type for specifying boundaries (levels)
OffsetT[inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1

The sample value type of the input iterator

Parameters
[in]d_temp_storageDevice-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done.
[in,out]temp_storage_bytesReference to size in bytes of d_temp_storage allocation
[in]d_samplesThe pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples).
[out]d_histogramThe pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histogram[i] should be num_levels[i] - 1.
[in]num_levelsThe number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1.
[in]lower_levelThe lower sample value bound (inclusive) for the lowest histogram bin in each active channel.
[in]upper_levelThe upper sample value bound (exclusive) for the highest histogram bin in each active channel.
[in]num_row_pixelsThe number of multi-channel pixels per row in the region of interest
[in]num_rowsThe number of rows in the region of interest
[in]row_stride_bytesThe number of bytes between starts of consecutive rows in the region of interest
[in]stream[optional] CUDA stream to launch kernels within. Default is stream0.
[in]debug_synchronous[optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is false.

Definition at line 432 of file device_histogram.cuh.

◆ MultiHistogramRange() [1/2]

template<int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT , typename CounterT , typename LevelT , typename OffsetT >
static CUB_RUNTIME_FUNCTION cudaError_t cub::DeviceHistogram::MultiHistogramRange ( void *  d_temp_storage,
size_t &  temp_storage_bytes,
SampleIteratorT  d_samples,
CounterT *  d_histogram[NUM_ACTIVE_CHANNELS],
int  num_levels[NUM_ACTIVE_CHANNELS],
LevelT *  d_levels[NUM_ACTIVE_CHANNELS],
OffsetT  num_pixels,
cudaStream_t  stream = 0,
bool  debug_synchronous = false 
)
inlinestatic

Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using the specified bin boundary levels.

  • The input is a sequence of pixel structures, where each pixel comprises a record of NUM_CHANNELS consecutive data samples (e.g., an RGBA pixel).
  • Of the NUM_CHANNELS specified, the function will only compute histograms for the first NUM_ACTIVE_CHANNELS (e.g., RGB histograms from RGBA pixel samples).
  • The number of histogram bins for channeli is num_levels[i] - 1.
  • For channeli, the range of values for all histogram bins have the same width: (upper_level[i] - lower_level[i]) / ( num_levels[i] - 1)
  • \devicestorage
Snippet
The code snippet below illustrates the computation of three 4-bin RGB histograms from a quad-channel sequence of RGBA pixels (8 bits per channel per pixel)
#include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
// Declare, allocate, and initialize device-accessible pointers for input samples
// and output histograms
int num_pixels; // e.g., 5
unsigned char *d_samples; // e.g., [(2, 6, 7, 5),(3, 0, 2, 1),(7, 0, 6, 2),
// (0, 6, 7, 5),(3, 0, 2, 6)]
unsigned int *d_histogram[3]; // e.g., [[ -, -, -, -],[ -, -, -, -],[ -, -, -, -]];
int num_levels[3]; // e.g., {5, 5, 5};
unsigned int *d_levels[3]; // e.g., [ [0, 2, 4, 6, 8],
// [0, 2, 4, 6, 8],
// [0, 2, 4, 6, 8] ];
...
// Determine temporary device storage requirements
void* d_temp_storage = NULL;
size_t temp_storage_bytes = 0;
cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage, temp_storage_bytes,
d_samples, d_histogram, num_levels, d_levels, num_pixels);
// Allocate temporary storage
cudaMalloc(&d_temp_storage, temp_storage_bytes);
// Compute histograms
cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage, temp_storage_bytes,
d_samples, d_histogram, num_levels, d_levels, num_pixels);
// d_histogram <-- [ [1, 3, 0, 1],
// [3, 0, 0, 2],
// [0, 2, 0, 3] ]
Template Parameters
NUM_CHANNELSNumber of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed)
NUM_ACTIVE_CHANNELS[inferred] Number of channels actively being histogrammed
SampleIteratorT[inferred] Random-access input iterator type for reading input samples. \iterator
CounterT[inferred] Integer type for histogram bin counters
LevelT[inferred] Type for specifying boundaries (levels)
OffsetT[inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1

The sample value type of the input iterator

Parameters
[in]d_temp_storageDevice-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done.
[in,out]temp_storage_bytesReference to size in bytes of d_temp_storage allocation
[in]d_samplesThe pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples).
[out]d_histogramThe pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histogram[i] should be num_levels[i] - 1.
[in]num_levelsThe number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1.
[in]d_levelsThe pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive.
[in]num_pixelsThe number of multi-channel pixels (i.e., the length of d_samples / NUM_CHANNELS)
[in]stream[optional] CUDA stream to launch kernels within. Default is stream0.
[in]debug_synchronous[optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is false.

Definition at line 722 of file device_histogram.cuh.

◆ MultiHistogramRange() [2/2]

template<int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT , typename CounterT , typename LevelT , typename OffsetT >
static CUB_RUNTIME_FUNCTION cudaError_t cub::DeviceHistogram::MultiHistogramRange ( void *  d_temp_storage,
size_t &  temp_storage_bytes,
SampleIteratorT  d_samples,
CounterT *  d_histogram[NUM_ACTIVE_CHANNELS],
int  num_levels[NUM_ACTIVE_CHANNELS],
LevelT *  d_levels[NUM_ACTIVE_CHANNELS],
OffsetT  num_row_pixels,
OffsetT  num_rows,
size_t  row_stride_bytes,
cudaStream_t  stream = 0,
bool  debug_synchronous = false 
)
inlinestatic

Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using the specified bin boundary levels.

  • The input is a sequence of pixel structures, where each pixel comprises a record of NUM_CHANNELS consecutive data samples (e.g., an RGBA pixel).
  • Of the NUM_CHANNELS specified, the function will only compute histograms for the first NUM_ACTIVE_CHANNELS (e.g., RGB histograms from RGBA pixel samples).
  • A two-dimensional region of interest within d_samples can be specified using the num_row_samples, num_rows, and row_stride_bytes parameters.
  • The row stride must be a whole multiple of the sample data type size, i.e., (row_stride_bytes % sizeof(SampleT)) == 0.
  • The number of histogram bins for channeli is num_levels[i] - 1.
  • For channeli, the range of values for all histogram bins have the same width: (upper_level[i] - lower_level[i]) / ( num_levels[i] - 1)
  • \devicestorage
Snippet
The code snippet below illustrates the computation of three 4-bin RGB histograms from a 2x3 region of interest of within a flattened 2x4 array of quad-channel RGBA pixels (8 bits per channel per pixel).
#include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
// Declare, allocate, and initialize device-accessible pointers for input samples
// and output histograms
int num_row_pixels; // e.g., 3
int num_rows; // e.g., 2
size_t row_stride_bytes; // e.g., 4 * sizeof(unsigned char) * NUM_CHANNELS
unsigned char* d_samples; // e.g., [(2, 6, 7, 5),(3, 0, 2, 1),(1, 1, 1, 1),(-, -, -, -),
// (7, 0, 6, 2),(0, 6, 7, 5),(3, 0, 2, 6),(-, -, -, -)]
int* d_histogram[3]; // e.g., [[ -, -, -, -],[ -, -, -, -],[ -, -, -, -]];
int num_levels[3]; // e.g., {5, 5, 5};
unsigned int* d_levels[3]; // e.g., [ [0, 2, 4, 6, 8],
// [0, 2, 4, 6, 8],
// [0, 2, 4, 6, 8] ];
...
// Determine temporary device storage requirements
void* d_temp_storage = NULL;
size_t temp_storage_bytes = 0;
cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage, temp_storage_bytes,
d_samples, d_histogram, num_levels, d_levels, num_row_pixels, num_rows, row_stride_bytes);
// Allocate temporary storage
cudaMalloc(&d_temp_storage, temp_storage_bytes);
// Compute histograms
cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage, temp_storage_bytes,
d_samples, d_histogram, num_levels, d_levels, num_row_pixels, num_rows, row_stride_bytes);
// d_histogram <-- [ [2, 3, 0, 1],
// [3, 0, 0, 2],
// [1, 2, 0, 3] ]
Template Parameters
NUM_CHANNELSNumber of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed)
NUM_ACTIVE_CHANNELS[inferred] Number of channels actively being histogrammed
SampleIteratorT[inferred] Random-access input iterator type for reading input samples. \iterator
CounterT[inferred] Integer type for histogram bin counters
LevelT[inferred] Type for specifying boundaries (levels)
OffsetT[inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1

The sample value type of the input iterator

Parameters
[in]d_temp_storageDevice-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done.
[in,out]temp_storage_bytesReference to size in bytes of d_temp_storage allocation
[in]d_samplesThe pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples).
[out]d_histogramThe pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histogram[i] should be num_levels[i] - 1.
[in]num_levelsThe number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1.
[in]d_levelsThe pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive.
[in]num_row_pixelsThe number of multi-channel pixels per row in the region of interest
[in]num_rowsThe number of rows in the region of interest
[in]row_stride_bytesThe number of bytes between starts of consecutive rows in the region of interest
[in]stream[optional] CUDA stream to launch kernels within. Default is stream0.
[in]debug_synchronous[optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is false.

Definition at line 825 of file device_histogram.cuh.


The documentation for this struct was generated from the following file: