#include <stdio.h>#include <iterator>#include "../../agent/agent_radix_sort_upsweep.cuh"#include "../../agent/agent_radix_sort_downsweep.cuh"#include "../../agent/agent_scan.cuh"#include "../../block/block_radix_sort.cuh"#include "../../grid/grid_even_share.cuh"#include "../../util_type.cuh"#include "../../util_debug.cuh"#include "../../util_device.cuh"#include "../../util_namespace.cuh"Go to the source code of this file.
Namespaces | |
| namespace | cub |
| Optional outer namespace(s) | |
Typedefs | |
| typedef AgentRadixSortUpsweep< typename If<(ALT_DIGIT_BITS), typename ChainedPolicyT::ActivePolicy::AltUpsweepPolicy, typename ChainedPolicyT::ActivePolicy::UpsweepPolicy >::Type, KeyT, OffsetT > | cub::AgentRadixSortUpsweepT |
| typedef AgentRadixSortDownsweep< typename If<(ALT_DIGIT_BITS), typename ChainedPolicyT::ActivePolicy::AltDownsweepPolicy, typename ChainedPolicyT::ActivePolicy::DownsweepPolicy >::Type, IS_DESCENDING, KeyT, ValueT, OffsetT > | cub::AgentRadixSortDownsweepT |
| typedef BlockRadixSort< KeyT, BLOCK_THREADS, ITEMS_PER_THREAD, ValueT, ChainedPolicyT::ActivePolicy::SingleTilePolicy::RADIX_BITS,(ChainedPolicyT::ActivePolicy::SingleTilePolicy::RANK_ALGORITHM==RADIX_RANK_MEMOIZE), ChainedPolicyT::ActivePolicy::SingleTilePolicy::SCAN_ALGORITHM > | cub::BlockRadixSortT |
| typedef BlockLoad< KeyT, BLOCK_THREADS, ITEMS_PER_THREAD, ChainedPolicyT::ActivePolicy::SingleTilePolicy::LOAD_ALGORITHM > | cub::BlockLoadKeys |
| typedef BlockLoad< ValueT, BLOCK_THREADS, ITEMS_PER_THREAD, ChainedPolicyT::ActivePolicy::SingleTilePolicy::LOAD_ALGORITHM > | cub::BlockLoadValues |
| typedef Traits< KeyT >::UnsignedBits | cub::UnsignedBitsT |
| typedef AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy< BLOCK_THREADS, ITEMS_PER_THREAD, SegmentedPolicyT::LOAD_MODIFIER, RADIX_BITS >, KeyT, OffsetT > | cub::BlockUpsweepT |
| typedef BlockScan< OffsetT, BLOCK_THREADS > | cub::DigitScanT |
| typedef AgentRadixSortDownsweep< SegmentedPolicyT, IS_DESCENDING, KeyT, ValueT, OffsetT > | cub::BlockDownsweepT |
Enumerations | |
| enum | { BLOCK_THREADS = SegmentedPolicyT::BLOCK_THREADS , ITEMS_PER_THREAD = SegmentedPolicyT::ITEMS_PER_THREAD , RADIX_BITS = SegmentedPolicyT::RADIX_BITS , TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD , RADIX_DIGITS = 1 << RADIX_BITS , KEYS_ONLY = Equals<ValueT, NullType>::VALUE } |
| enum | { cub::BINS_TRACKED_PER_THREAD = BlockDownsweepT::BINS_TRACKED_PER_THREAD } |
Functions | |
| template<typename ChainedPolicyT , bool ALT_DIGIT_BITS, bool IS_DESCENDING, typename KeyT , typename OffsetT > | |
| cub::__launch_bounds__ (int((ALT_DIGIT_BITS) ? ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::BLOCK_THREADS :ChainedPolicyT::ActivePolicy::UpsweepPolicy::BLOCK_THREADS)) __global__ void DeviceRadixSortUpsweepKernel(const KeyT *d_keys | |
| < Signed integer type for global offsets | |
| even_share template | cub::BlockInit< TILE_ITEMS, GRID_MAPPING_RAKE > () |
| upsweep | cub::ProcessRegion (even_share.block_offset, even_share.block_end) |
| cub::CTA_SYNC () | |
| upsweep template | cub::ExtractCounts< IS_DESCENDING > (d_spine, gridDim.x, blockIdx.x) |
| template<typename ChainedPolicyT , typename OffsetT > | |
| cub::__launch_bounds__ (int(ChainedPolicyT::ActivePolicy::ScanPolicy::BLOCK_THREADS), 1) __global__ void RadixSortScanBinsKernel(OffsetT *d_spine | |
| < Signed integer type for global offsets | |
| AgentScanT | cub::block_scan (temp_storage, d_spine, d_spine, cub::Sum(), OffsetT(0)) |
| cub::while (block_offset+AgentScanT::TILE_ITEMS<=num_counts) | |
| template<typename ChainedPolicyT , bool ALT_DIGIT_BITS, bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT > | |
| cub::__launch_bounds__ (int((ALT_DIGIT_BITS) ? ChainedPolicyT::ActivePolicy::AltDownsweepPolicy::BLOCK_THREADS :ChainedPolicyT::ActivePolicy::DownsweepPolicy::BLOCK_THREADS)) __global__ void DeviceRadixSortDownsweepKernel(const KeyT *d_keys_in | |
| < Signed integer type for global offsets | |
| cub::AgentRadixSortDownsweepT (temp_storage, num_items, d_spine, d_keys_in, d_keys_out, d_values_in, d_values_out, current_bit, num_bits).ProcessRegion(even_share.block_offset | |
| template<typename ChainedPolicyT , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT > | |
| cub::__launch_bounds__ (int(ChainedPolicyT::ActivePolicy::SingleTilePolicy::BLOCK_THREADS), 1) __global__ void DeviceRadixSortSingleTileKernel(const KeyT *d_keys_in | |
| < Signed integer type for global offsets | |
| cub::BlockLoadKeys (temp_storage.load_keys).Load(d_keys_in | |
| cub::if (!KEYS_ONLY) | |
| cub::BlockRadixSortT (temp_storage.sort).SortBlockedToStriped(keys | |
| cub::Int2Type< IS_DESCENDING > () | |
| cub::Int2Type< KEYS_ONLY > ()) | |
| cub::for (int ITEM=0;ITEM< ITEMS_PER_THREAD;++ITEM) | |
| template<typename ChainedPolicyT , bool ALT_DIGIT_BITS, bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetIteratorT , typename OffsetT > | |
| cub::__launch_bounds__ (int((ALT_DIGIT_BITS) ? ChainedPolicyT::ActivePolicy::AltSegmentedPolicy::BLOCK_THREADS :ChainedPolicyT::ActivePolicy::SegmentedPolicy::BLOCK_THREADS)) __global__ void DeviceSegmentedRadixSortKernel(const KeyT *d_keys_in | |
| < Signed integer type for global offsets | |
| cub::if (num_items<=0) return | |
| BlockUpsweepT | cub::upsweep (temp_storage.upsweep, d_keys_in, current_bit, pass_bits) |
| upsweep | cub::ProcessRegion (segment_begin, segment_end) |
| upsweep | cub::ExtractCounts (bin_count) |
| cub::if (IS_DESCENDING) | |
| cub::DigitScanT (temp_storage.scan).ExclusiveSum(bin_count | |
| BlockDownsweepT | cub::downsweep (temp_storage.downsweep, bin_offset, num_items, d_keys_in, d_keys_out, d_values_in, d_values_out, current_bit, pass_bits) |
Variables | |
| OffsetT * | cub::d_spine |
| < [in] Input keys buffer | |
| OffsetT | cub::OffsetT |
| [in] Total number of input data items | |
| OffsetT int | cub::current_bit |
| [in] Bit position of current radix digit | |
| OffsetT int int | cub::num_bits |
| [in] Number of bits of current radix digit | |
| OffsetT int int GridEvenShare< OffsetT > | cub::even_share |
| < [in] Even-share descriptor for mapan equal number of tiles onto each thread block | |
| AgentRadixSortUpsweepT | cub::upsweep (temp_storage, d_keys, current_bit, num_bits) |
| int | cub::num_counts |
| < [in,out] Privatized (per block) digit histograms (striped, i.e., 0s counts from each block, then 1s counts from each block, etc.) | |
| int | cub::block_offset = 0 |
| BlockScanRunningPrefixOp< OffsetT, Sum > | cub::prefix_op (0, Sum()) |
| KeyT * | cub::d_keys_out |
| < [in] Input keys buffer | |
| KeyT const ValueT * | cub::d_values_in |
| [in] Input values buffer | |
| KeyT const ValueT ValueT * | cub::d_values_out |
| [in] Output values buffer | |
| KeyT const ValueT ValueT OffsetT OffsetT | cub::num_items = segment_end - segment_begin |
| [in] Total number of input data items | |
| even_share | cub::block_end |
| KeyT const ValueT ValueT OffsetT int int | cub::end_bit |
| < [in] The past-the-end (most-significant) bit index needed for key comparison | |
| KeyT | cub::keys [ITEMS_PER_THREAD] |
| ValueT | cub::values [ITEMS_PER_THREAD] |
| UnsignedBitsT | cub::default_key_bits = (IS_DESCENDING) ? Traits<KeyT>::LOWEST_KEY : Traits<KeyT>::MAX_KEY |
| KeyT | cub::default_key = reinterpret_cast<KeyT&>(default_key_bits) |
| KeyT const ValueT ValueT OffsetIteratorT | cub::d_begin_offsets |
[in] Pointer to the sequence of beginning offsets of length num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* | |
| KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT | cub::d_end_offsets |
[in] Pointer to the sequence of ending offsets of length num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. | |
| KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT | cub::int |
| [in] The number of segments that comprise the sorting data | |
| KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int int | cub::pass_bits |
| < [in] Number of bits of current radix digit | |
| OffsetT | cub::segment_begin = d_begin_offsets[blockIdx.x] |
| OffsetT | cub::segment_end = d_end_offsets[blockIdx.x] |
| OffsetT | cub::bin_count [BINS_TRACKED_PER_THREAD] |
| OffsetT | cub::bin_offset [BINS_TRACKED_PER_THREAD] |
cub::DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within device-accessible memory.
Definition in file dispatch_radix_sort.cuh.