template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
class cub::BlockRadixRankMatch< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, INNER_SCAN_ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >
Radix-rank using match.any
Definition at line 460 of file block_radix_rank.cuh.
|
|
__device__ __forceinline__ | BlockRadixRankMatch (TempStorage &temp_storage) |
| Collective constructor using the specified memory allocation as temporary storage. More...
|
|
|
template<typename UnsignedBits , int KEYS_PER_THREAD> |
__device__ __forceinline__ void | RankKeys (UnsignedBits(&keys)[KEYS_PER_THREAD], int(&ranks)[KEYS_PER_THREAD], int current_bit, int num_bits) |
| Rank keys. More...
|
|
template<typename UnsignedBits , int KEYS_PER_THREAD> |
__device__ __forceinline__ void | RankKeys (UnsignedBits(&keys)[KEYS_PER_THREAD], int(&ranks)[KEYS_PER_THREAD], int current_bit, int num_bits, int(&exclusive_digit_prefix)[BINS_TRACKED_PER_THREAD]) |
| Rank keys. For the lower RADIX_DIGITS threads, digit counts for each digit are provided for the corresponding thread. More...
|
|
|
enum | {
BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z,
RADIX_DIGITS = 1 << RADIX_BITS,
LOG_WARP_THREADS = CUB_LOG_WARP_THREADS(PTX_ARCH),
WARP_THREADS = 1 << LOG_WARP_THREADS,
WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS,
PADDED_WARPS,
COUNTERS = PADDED_WARPS * RADIX_DIGITS,
RAKING_SEGMENT = (COUNTERS + BLOCK_THREADS - 1) / BLOCK_THREADS,
PADDED_RAKING_SEGMENT
} |
|
typedef int32_t | RankT |
|
typedef int32_t | DigitCounterT |
|
typedef BlockScan< DigitCounterT, BLOCK_THREADS, INNER_SCAN_ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | BlockScanT |
| BlockScan type.
|
|
template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
Enumerator |
---|
BINS_TRACKED_PER_THREAD | Number of bin-starting offsets tracked per thread.
|
Definition at line 495 of file block_radix_rank.cuh.
template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
Collective constructor using the specified memory allocation as temporary storage.
- Parameters
-
[in] | temp_storage | Reference to memory allocation having layout type TempStorage |
Definition at line 555 of file block_radix_rank.cuh.
template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
template<typename UnsignedBits , int KEYS_PER_THREAD>
__device__ __forceinline__ void cub::BlockRadixRankMatch< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, INNER_SCAN_ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::RankKeys |
( |
UnsignedBits(&) |
keys[KEYS_PER_THREAD], |
|
|
int(&) |
ranks[KEYS_PER_THREAD], |
|
|
int |
current_bit, |
|
|
int |
num_bits |
|
) |
| |
|
inline |
Rank keys.
- Parameters
-
[in] | keys | Keys for this tile |
[out] | ranks | For each key, the local rank within the tile |
[in] | current_bit | The least-significant bit position of the current digit to extract |
[in] | num_bits | The number of bits in the current digit |
Definition at line 575 of file block_radix_rank.cuh.
template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
template<typename UnsignedBits , int KEYS_PER_THREAD>
__device__ __forceinline__ void cub::BlockRadixRankMatch< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, INNER_SCAN_ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::RankKeys |
( |
UnsignedBits(&) |
keys[KEYS_PER_THREAD], |
|
|
int(&) |
ranks[KEYS_PER_THREAD], |
|
|
int |
current_bit, |
|
|
int |
num_bits, |
|
|
int(&) |
exclusive_digit_prefix[BINS_TRACKED_PER_THREAD] |
|
) |
| |
|
inline |
Rank keys. For the lower RADIX_DIGITS
threads, digit counts for each digit are provided for the corresponding thread.
- Parameters
-
[in] | keys | Keys for this tile |
[out] | ranks | For each key, the local rank within the tile (out parameter) |
[in] | current_bit | The least-significant bit position of the current digit to extract |
[in] | num_bits | The number of bits in the current digit |
[out] | exclusive_digit_prefix | The exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1] |
Definition at line 666 of file block_radix_rank.cuh.