#include <stdio.h>
#include <iterator>
#include "../../agent/agent_radix_sort_upsweep.cuh"
#include "../../agent/agent_radix_sort_downsweep.cuh"
#include "../../agent/agent_scan.cuh"
#include "../../block/block_radix_sort.cuh"
#include "../../grid/grid_even_share.cuh"
#include "../../util_type.cuh"
#include "../../util_debug.cuh"
#include "../../util_device.cuh"
#include "../../util_namespace.cuh"
Go to the source code of this file.
Namespaces | |
namespace | cub |
Optional outer namespace(s) | |
Typedefs | |
typedef AgentRadixSortUpsweep< typename If<(ALT_DIGIT_BITS), typename ChainedPolicyT::ActivePolicy::AltUpsweepPolicy, typename ChainedPolicyT::ActivePolicy::UpsweepPolicy >::Type, KeyT, OffsetT > | cub::AgentRadixSortUpsweepT |
typedef AgentRadixSortDownsweep< typename If<(ALT_DIGIT_BITS), typename ChainedPolicyT::ActivePolicy::AltDownsweepPolicy, typename ChainedPolicyT::ActivePolicy::DownsweepPolicy >::Type, IS_DESCENDING, KeyT, ValueT, OffsetT > | cub::AgentRadixSortDownsweepT |
typedef BlockRadixSort< KeyT, BLOCK_THREADS, ITEMS_PER_THREAD, ValueT, ChainedPolicyT::ActivePolicy::SingleTilePolicy::RADIX_BITS,(ChainedPolicyT::ActivePolicy::SingleTilePolicy::RANK_ALGORITHM==RADIX_RANK_MEMOIZE), ChainedPolicyT::ActivePolicy::SingleTilePolicy::SCAN_ALGORITHM > | cub::BlockRadixSortT |
typedef BlockLoad< KeyT, BLOCK_THREADS, ITEMS_PER_THREAD, ChainedPolicyT::ActivePolicy::SingleTilePolicy::LOAD_ALGORITHM > | cub::BlockLoadKeys |
typedef BlockLoad< ValueT, BLOCK_THREADS, ITEMS_PER_THREAD, ChainedPolicyT::ActivePolicy::SingleTilePolicy::LOAD_ALGORITHM > | cub::BlockLoadValues |
typedef Traits< KeyT >::UnsignedBits | cub::UnsignedBitsT |
typedef AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy< BLOCK_THREADS, ITEMS_PER_THREAD, SegmentedPolicyT::LOAD_MODIFIER, RADIX_BITS >, KeyT, OffsetT > | cub::BlockUpsweepT |
typedef BlockScan< OffsetT, BLOCK_THREADS > | cub::DigitScanT |
typedef AgentRadixSortDownsweep< SegmentedPolicyT, IS_DESCENDING, KeyT, ValueT, OffsetT > | cub::BlockDownsweepT |
Enumerations | |
enum | { BLOCK_THREADS = SegmentedPolicyT::BLOCK_THREADS , ITEMS_PER_THREAD = SegmentedPolicyT::ITEMS_PER_THREAD , RADIX_BITS = SegmentedPolicyT::RADIX_BITS , TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD , RADIX_DIGITS = 1 << RADIX_BITS , KEYS_ONLY = Equals<ValueT, NullType>::VALUE } |
enum | { cub::BINS_TRACKED_PER_THREAD = BlockDownsweepT::BINS_TRACKED_PER_THREAD } |
Functions | |
template<typename ChainedPolicyT , bool ALT_DIGIT_BITS, bool IS_DESCENDING, typename KeyT , typename OffsetT > | |
cub::__launch_bounds__ (int((ALT_DIGIT_BITS) ? ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::BLOCK_THREADS :ChainedPolicyT::ActivePolicy::UpsweepPolicy::BLOCK_THREADS)) __global__ void DeviceRadixSortUpsweepKernel(const KeyT *d_keys | |
< Signed integer type for global offsets | |
even_share template | cub::BlockInit< TILE_ITEMS, GRID_MAPPING_RAKE > () |
upsweep | cub::ProcessRegion (even_share.block_offset, even_share.block_end) |
cub::CTA_SYNC () | |
upsweep template | cub::ExtractCounts< IS_DESCENDING > (d_spine, gridDim.x, blockIdx.x) |
template<typename ChainedPolicyT , typename OffsetT > | |
cub::__launch_bounds__ (int(ChainedPolicyT::ActivePolicy::ScanPolicy::BLOCK_THREADS), 1) __global__ void RadixSortScanBinsKernel(OffsetT *d_spine | |
< Signed integer type for global offsets | |
AgentScanT | cub::block_scan (temp_storage, d_spine, d_spine, cub::Sum(), OffsetT(0)) |
cub::while (block_offset+AgentScanT::TILE_ITEMS<=num_counts) | |
template<typename ChainedPolicyT , bool ALT_DIGIT_BITS, bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT > | |
cub::__launch_bounds__ (int((ALT_DIGIT_BITS) ? ChainedPolicyT::ActivePolicy::AltDownsweepPolicy::BLOCK_THREADS :ChainedPolicyT::ActivePolicy::DownsweepPolicy::BLOCK_THREADS)) __global__ void DeviceRadixSortDownsweepKernel(const KeyT *d_keys_in | |
< Signed integer type for global offsets | |
cub::AgentRadixSortDownsweepT (temp_storage, num_items, d_spine, d_keys_in, d_keys_out, d_values_in, d_values_out, current_bit, num_bits).ProcessRegion(even_share.block_offset | |
template<typename ChainedPolicyT , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT > | |
cub::__launch_bounds__ (int(ChainedPolicyT::ActivePolicy::SingleTilePolicy::BLOCK_THREADS), 1) __global__ void DeviceRadixSortSingleTileKernel(const KeyT *d_keys_in | |
< Signed integer type for global offsets | |
cub::BlockLoadKeys (temp_storage.load_keys).Load(d_keys_in | |
cub::if (!KEYS_ONLY) | |
cub::BlockRadixSortT (temp_storage.sort).SortBlockedToStriped(keys | |
cub::Int2Type< IS_DESCENDING > () | |
cub::Int2Type< KEYS_ONLY > ()) | |
cub::for (int ITEM=0;ITEM< ITEMS_PER_THREAD;++ITEM) | |
template<typename ChainedPolicyT , bool ALT_DIGIT_BITS, bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetIteratorT , typename OffsetT > | |
cub::__launch_bounds__ (int((ALT_DIGIT_BITS) ? ChainedPolicyT::ActivePolicy::AltSegmentedPolicy::BLOCK_THREADS :ChainedPolicyT::ActivePolicy::SegmentedPolicy::BLOCK_THREADS)) __global__ void DeviceSegmentedRadixSortKernel(const KeyT *d_keys_in | |
< Signed integer type for global offsets | |
cub::if (num_items<=0) return | |
BlockUpsweepT | cub::upsweep (temp_storage.upsweep, d_keys_in, current_bit, pass_bits) |
upsweep | cub::ProcessRegion (segment_begin, segment_end) |
upsweep | cub::ExtractCounts (bin_count) |
cub::if (IS_DESCENDING) | |
cub::DigitScanT (temp_storage.scan).ExclusiveSum(bin_count | |
BlockDownsweepT | cub::downsweep (temp_storage.downsweep, bin_offset, num_items, d_keys_in, d_keys_out, d_values_in, d_values_out, current_bit, pass_bits) |
Variables | |
OffsetT * | cub::d_spine |
< [in] Input keys buffer | |
OffsetT | cub::OffsetT |
[in] Total number of input data items | |
OffsetT int | cub::current_bit |
[in] Bit position of current radix digit | |
OffsetT int int | cub::num_bits |
[in] Number of bits of current radix digit | |
OffsetT int int GridEvenShare< OffsetT > | cub::even_share |
< [in] Even-share descriptor for mapan equal number of tiles onto each thread block | |
AgentRadixSortUpsweepT | cub::upsweep (temp_storage, d_keys, current_bit, num_bits) |
int | cub::num_counts |
< [in,out] Privatized (per block) digit histograms (striped, i.e., 0s counts from each block, then 1s counts from each block, etc.) | |
int | cub::block_offset = 0 |
BlockScanRunningPrefixOp< OffsetT, Sum > | cub::prefix_op (0, Sum()) |
KeyT * | cub::d_keys_out |
< [in] Input keys buffer | |
KeyT const ValueT * | cub::d_values_in |
[in] Input values buffer | |
KeyT const ValueT ValueT * | cub::d_values_out |
[in] Output values buffer | |
KeyT const ValueT ValueT OffsetT OffsetT | cub::num_items = segment_end - segment_begin |
[in] Total number of input data items | |
even_share | cub::block_end |
KeyT const ValueT ValueT OffsetT int int | cub::end_bit |
< [in] The past-the-end (most-significant) bit index needed for key comparison | |
KeyT | cub::keys [ITEMS_PER_THREAD] |
ValueT | cub::values [ITEMS_PER_THREAD] |
UnsignedBitsT | cub::default_key_bits = (IS_DESCENDING) ? Traits<KeyT>::LOWEST_KEY : Traits<KeyT>::MAX_KEY |
KeyT | cub::default_key = reinterpret_cast<KeyT&>(default_key_bits) |
KeyT const ValueT ValueT OffsetIteratorT | cub::d_begin_offsets |
[in] Pointer to the sequence of beginning offsets of length num_segments , such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* | |
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT | cub::d_end_offsets |
[in] Pointer to the sequence of ending offsets of length num_segments , such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_* . If d_end_offsets[i]-1 <= d_begin_offsets[i] , the ith is considered empty. | |
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT | cub::int |
[in] The number of segments that comprise the sorting data | |
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int int | cub::pass_bits |
< [in] Number of bits of current radix digit | |
OffsetT | cub::segment_begin = d_begin_offsets[blockIdx.x] |
OffsetT | cub::segment_end = d_end_offsets[blockIdx.x] |
OffsetT | cub::bin_count [BINS_TRACKED_PER_THREAD] |
OffsetT | cub::bin_offset [BINS_TRACKED_PER_THREAD] |
cub::DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within device-accessible memory.
Definition in file dispatch_radix_sort.cuh.