AgentRadixSortUpsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort upsweep . More...
AgentRadixSortUpsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort upsweep .
< Signed integer type for global offsets
Definition at line 86 of file agent_radix_sort_upsweep.cuh.
Data Structures | |
struct | Iterate |
struct | Iterate< MAX, MAX > |
struct | TempStorage |
Alias wrapper allowing storage to be unioned. More... | |
Public Types | |
enum | { RADIX_BITS = AgentRadixSortUpsweepPolicy::RADIX_BITS , BLOCK_THREADS = AgentRadixSortUpsweepPolicy::BLOCK_THREADS , KEYS_PER_THREAD = AgentRadixSortUpsweepPolicy::ITEMS_PER_THREAD , RADIX_DIGITS = 1 << RADIX_BITS , LOG_WARP_THREADS = CUB_PTX_LOG_WARP_THREADS , WARP_THREADS = 1 << LOG_WARP_THREADS , WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS , TILE_ITEMS = BLOCK_THREADS * KEYS_PER_THREAD , BYTES_PER_COUNTER = sizeof(DigitCounter) , LOG_BYTES_PER_COUNTER = Log2<BYTES_PER_COUNTER>::VALUE , PACKING_RATIO = sizeof(PackedCounter) / sizeof(DigitCounter) , LOG_PACKING_RATIO = Log2<PACKING_RATIO>::VALUE , LOG_COUNTER_LANES = CUB_MAX(0, RADIX_BITS - LOG_PACKING_RATIO) , COUNTER_LANES = 1 << LOG_COUNTER_LANES , LANES_PER_WARP = CUB_MAX(1, (COUNTER_LANES + WARPS - 1) / WARPS) , UNROLL_COUNT = CUB_MIN(64, 255 / KEYS_PER_THREAD) , UNROLLED_ELEMENTS = UNROLL_COUNT * TILE_ITEMS } |
typedef Traits< KeyT >::UnsignedBits | UnsignedBits |
typedef unsigned char | DigitCounter |
typedef unsigned int | PackedCounter |
typedef CacheModifiedInputIterator< LOAD_MODIFIER, UnsignedBits, OffsetT > | KeysItr |
Public Member Functions | |
union | __align__ (16) _TempStorage |
__device__ __forceinline__ void | Bucket (UnsignedBits key) |
__device__ __forceinline__ void | ResetDigitCounters () |
__device__ __forceinline__ void | ResetUnpackedCounters () |
__device__ __forceinline__ void | UnpackDigitCounts () |
__device__ __forceinline__ void | ProcessFullTile (OffsetT block_offset) |
__device__ __forceinline__ void | ProcessPartialTile (OffsetT block_offset, const OffsetT &block_end) |
__device__ __forceinline__ | AgentRadixSortUpsweep (TempStorage &temp_storage, const KeyT *d_keys_in, int current_bit, int num_bits) |
__device__ __forceinline__ void | ProcessRegion (OffsetT block_offset, const OffsetT &block_end) |
template<bool IS_DESCENDING> | |
__device__ __forceinline__ void | ExtractCounts (OffsetT *counters, int bin_stride=1, int bin_offset=0) |
template<int BINS_TRACKED_PER_THREAD> | |
__device__ __forceinline__ void | ExtractCounts (OffsetT(&bin_count)[BINS_TRACKED_PER_THREAD]) |
Data Fields | |
_TempStorage & | temp_storage |
OffsetT | local_counts [LANES_PER_WARP][PACKING_RATIO] |
KeysItr | d_keys_in |
int | current_bit |
int | num_bits |
Static Public Attributes | |
static const CacheLoadModifier | LOAD_MODIFIER = AgentRadixSortUpsweepPolicy::LOAD_MODIFIER |
typedef unsigned char cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::DigitCounter |
Definition at line 96 of file agent_radix_sort_upsweep.cuh.
typedef CacheModifiedInputIterator<LOAD_MODIFIER, UnsignedBits, OffsetT> cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::KeysItr |
Definition at line 139 of file agent_radix_sort_upsweep.cuh.
typedef unsigned int cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::PackedCounter |
Definition at line 99 of file agent_radix_sort_upsweep.cuh.
typedef Traits<KeyT>::UnsignedBits cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::UnsignedBits |
Definition at line 93 of file agent_radix_sort_upsweep.cuh.
anonymous enum |
Definition at line 103 of file agent_radix_sort_upsweep.cuh.
|
inline |
Constructor
Definition at line 336 of file agent_radix_sort_upsweep.cuh.
|
inline |
Shared memory storage layout
Definition at line 139 of file agent_radix_sort_upsweep.cuh.
|
inline |
Decode a key and increment corresponding smem digit counter
Definition at line 213 of file agent_radix_sort_upsweep.cuh.
|
inline |
Extract counts (saving them to the external array)
Definition at line 403 of file agent_radix_sort_upsweep.cuh.
|
inline |
Extract counts
[out] | bin_count | The exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1] |
Definition at line 476 of file agent_radix_sort_upsweep.cuh.
|
inline |
Processes a single, full tile
Definition at line 295 of file agent_radix_sort_upsweep.cuh.
|
inline |
Processes a single load (may have some threads masked off)
Definition at line 313 of file agent_radix_sort_upsweep.cuh.
|
inline |
Compute radix digit histograms from a segment of input tiles.
Definition at line 352 of file agent_radix_sort_upsweep.cuh.
|
inline |
Reset composite counters
Definition at line 235 of file agent_radix_sort_upsweep.cuh.
|
inline |
Reset the unpacked counters in each thread
Definition at line 248 of file agent_radix_sort_upsweep.cuh.
|
inline |
Extracts and aggregates the digit counters for each counter lane owned by this warp
Definition at line 266 of file agent_radix_sort_upsweep.cuh.
int cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::current_bit |
Definition at line 170 of file agent_radix_sort_upsweep.cuh.
KeysItr cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::d_keys_in |
Definition at line 167 of file agent_radix_sort_upsweep.cuh.
|
static |
Definition at line 101 of file agent_radix_sort_upsweep.cuh.
OffsetT cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::local_counts[LANES_PER_WARP][PACKING_RATIO] |
Definition at line 164 of file agent_radix_sort_upsweep.cuh.
int cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::num_bits |
Definition at line 173 of file agent_radix_sort_upsweep.cuh.
_TempStorage& cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::temp_storage |
Definition at line 161 of file agent_radix_sort_upsweep.cuh.