Radix-rank using match.any
Definition at line 460 of file block_radix_rank.cuh.
Data Structures | |
struct | TempStorage |
\smemstorage{BlockScan} More... | |
Public Types | |
enum | { BINS_TRACKED_PER_THREAD = CUB_MAX(1, (RADIX_DIGITS + BLOCK_THREADS - 1) / BLOCK_THREADS) } |
Public Member Functions | |
Collective constructors | |
__device__ __forceinline__ | BlockRadixRankMatch (TempStorage &temp_storage) |
Collective constructor using the specified memory allocation as temporary storage. | |
Raking | |
template<typename UnsignedBits , int KEYS_PER_THREAD> | |
__device__ __forceinline__ void | RankKeys (UnsignedBits(&keys)[KEYS_PER_THREAD], int(&ranks)[KEYS_PER_THREAD], int current_bit, int num_bits) |
Rank keys. | |
template<typename UnsignedBits , int KEYS_PER_THREAD> | |
__device__ __forceinline__ void | RankKeys (UnsignedBits(&keys)[KEYS_PER_THREAD], int(&ranks)[KEYS_PER_THREAD], int current_bit, int num_bits, int(&exclusive_digit_prefix)[BINS_TRACKED_PER_THREAD]) |
Rank keys. For the lower RADIX_DIGITS threads, digit counts for each digit are provided for the corresponding thread. | |
Private Types | |
enum | { BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z , RADIX_DIGITS = 1 << RADIX_BITS , LOG_WARP_THREADS = CUB_LOG_WARP_THREADS(PTX_ARCH) , WARP_THREADS = 1 << LOG_WARP_THREADS , WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS , PADDED_WARPS , COUNTERS = PADDED_WARPS * RADIX_DIGITS , RAKING_SEGMENT = (COUNTERS + BLOCK_THREADS - 1) / BLOCK_THREADS , PADDED_RAKING_SEGMENT } |
typedef int32_t | RankT |
typedef int32_t | DigitCounterT |
typedef BlockScan< DigitCounterT, BLOCK_THREADS, INNER_SCAN_ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > | BlockScanT |
BlockScan type. | |
Private Member Functions | |
struct | __align__ (16) _TempStorage |
Shared memory storage layout type for BlockRadixRank. | |
Private Attributes | |
_TempStorage & | temp_storage |
Shared storage reference. | |
unsigned int | linear_tid |
Linear thread-id. | |
|
private |
BlockScan type.
Definition at line 511 of file block_radix_rank.cuh.
|
private |
Definition at line 469 of file block_radix_rank.cuh.
|
private |
Definition at line 468 of file block_radix_rank.cuh.
|
private |
Definition at line 471 of file block_radix_rank.cuh.
anonymous enum |
Enumerator | |
---|---|
BINS_TRACKED_PER_THREAD | Number of bin-starting offsets tracked per thread. |
Definition at line 495 of file block_radix_rank.cuh.
|
inline |
Collective constructor using the specified memory allocation as temporary storage.
[in] | temp_storage | Reference to memory allocation having layout type TempStorage |
Definition at line 555 of file block_radix_rank.cuh.
|
inlineprivate |
Shared memory storage layout type for BlockRadixRank.
Definition at line 511 of file block_radix_rank.cuh.
|
inline |
Rank keys.
[in] | keys | Keys for this tile |
[out] | ranks | For each key, the local rank within the tile |
[in] | current_bit | The least-significant bit position of the current digit to extract |
[in] | num_bits | The number of bits in the current digit |
Definition at line 575 of file block_radix_rank.cuh.
|
inline |
Rank keys. For the lower RADIX_DIGITS
threads, digit counts for each digit are provided for the corresponding thread.
[in] | keys | Keys for this tile |
[out] | ranks | For each key, the local rank within the tile (out parameter) |
[in] | current_bit | The least-significant bit position of the current digit to extract |
[in] | num_bits | The number of bits in the current digit |
[out] | exclusive_digit_prefix | The exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1] |
Definition at line 666 of file block_radix_rank.cuh.
|
private |
Linear thread-id.
Definition at line 536 of file block_radix_rank.cuh.
|
private |
Shared storage reference.
Definition at line 533 of file block_radix_rank.cuh.