OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT > Struct Template Reference

AgentRadixSortUpsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort upsweep . More...

Detailed Description

template<typename AgentRadixSortUpsweepPolicy, typename KeyT, typename OffsetT>
struct cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >

AgentRadixSortUpsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort upsweep .

< Signed integer type for global offsets

Definition at line 86 of file agent_radix_sort_upsweep.cuh.

Data Structures

struct  Iterate
 
struct  Iterate< MAX, MAX >
 
struct  TempStorage
 Alias wrapper allowing storage to be unioned. More...
 

Public Types

enum  {
  RADIX_BITS = AgentRadixSortUpsweepPolicy::RADIX_BITS, BLOCK_THREADS = AgentRadixSortUpsweepPolicy::BLOCK_THREADS, KEYS_PER_THREAD = AgentRadixSortUpsweepPolicy::ITEMS_PER_THREAD, RADIX_DIGITS = 1 << RADIX_BITS,
  LOG_WARP_THREADS = CUB_PTX_LOG_WARP_THREADS, WARP_THREADS = 1 << LOG_WARP_THREADS, WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, TILE_ITEMS = BLOCK_THREADS * KEYS_PER_THREAD,
  BYTES_PER_COUNTER = sizeof(DigitCounter), LOG_BYTES_PER_COUNTER = Log2<BYTES_PER_COUNTER>::VALUE, PACKING_RATIO = sizeof(PackedCounter) / sizeof(DigitCounter), LOG_PACKING_RATIO = Log2<PACKING_RATIO>::VALUE,
  LOG_COUNTER_LANES = CUB_MAX(0, RADIX_BITS - LOG_PACKING_RATIO), COUNTER_LANES = 1 << LOG_COUNTER_LANES, LANES_PER_WARP = CUB_MAX(1, (COUNTER_LANES + WARPS - 1) / WARPS), UNROLL_COUNT = CUB_MIN(64, 255 / KEYS_PER_THREAD),
  UNROLLED_ELEMENTS = UNROLL_COUNT * TILE_ITEMS
}
 
typedef Traits< KeyT >::UnsignedBits UnsignedBits
 
typedef unsigned char DigitCounter
 
typedef unsigned int PackedCounter
 
typedef CacheModifiedInputIterator< LOAD_MODIFIER, UnsignedBits, OffsetTKeysItr
 

Public Member Functions

union __align__ (16) _TempStorage
 
__device__ __forceinline__ void Bucket (UnsignedBits key)
 
__device__ __forceinline__ void ResetDigitCounters ()
 
__device__ __forceinline__ void ResetUnpackedCounters ()
 
__device__ __forceinline__ void UnpackDigitCounts ()
 
__device__ __forceinline__ void ProcessFullTile (OffsetT block_offset)
 
__device__ __forceinline__ void ProcessPartialTile (OffsetT block_offset, const OffsetT &block_end)
 
__device__ __forceinline__ AgentRadixSortUpsweep (TempStorage &temp_storage, const KeyT *d_keys_in, int current_bit, int num_bits)
 
__device__ __forceinline__ void ProcessRegion (OffsetT block_offset, const OffsetT &block_end)
 
template<bool IS_DESCENDING>
__device__ __forceinline__ void ExtractCounts (OffsetT *counters, int bin_stride=1, int bin_offset=0)
 
template<int BINS_TRACKED_PER_THREAD>
__device__ __forceinline__ void ExtractCounts (OffsetT(&bin_count)[BINS_TRACKED_PER_THREAD])
 

Data Fields

_TempStorage & temp_storage
 
OffsetT local_counts [LANES_PER_WARP][PACKING_RATIO]
 
KeysItr d_keys_in
 
int current_bit
 
int num_bits
 

Static Public Attributes

static const CacheLoadModifier LOAD_MODIFIER = AgentRadixSortUpsweepPolicy::LOAD_MODIFIER
 

Constructor & Destructor Documentation

◆ AgentRadixSortUpsweep()

template<typename AgentRadixSortUpsweepPolicy , typename KeyT , typename OffsetT >
__device__ __forceinline__ cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::AgentRadixSortUpsweep ( TempStorage temp_storage,
const KeyT *  d_keys_in,
int  current_bit,
int  num_bits 
)
inline

Constructor

Definition at line 336 of file agent_radix_sort_upsweep.cuh.

Member Function Documentation

◆ __align__()

template<typename AgentRadixSortUpsweepPolicy , typename KeyT , typename OffsetT >
union cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::__align__ ( 16  )
inline

Shared memory storage layout

Definition at line 144 of file agent_radix_sort_upsweep.cuh.

◆ Bucket()

template<typename AgentRadixSortUpsweepPolicy , typename KeyT , typename OffsetT >
__device__ __forceinline__ void cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::Bucket ( UnsignedBits  key)
inline

Decode a key and increment corresponding smem digit counter

Definition at line 213 of file agent_radix_sort_upsweep.cuh.

◆ ExtractCounts() [1/2]

template<typename AgentRadixSortUpsweepPolicy , typename KeyT , typename OffsetT >
template<bool IS_DESCENDING>
__device__ __forceinline__ void cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::ExtractCounts ( OffsetT counters,
int  bin_stride = 1,
int  bin_offset = 0 
)
inline

Extract counts (saving them to the external array)

Definition at line 403 of file agent_radix_sort_upsweep.cuh.

◆ ExtractCounts() [2/2]

template<typename AgentRadixSortUpsweepPolicy , typename KeyT , typename OffsetT >
template<int BINS_TRACKED_PER_THREAD>
__device__ __forceinline__ void cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::ExtractCounts ( OffsetT(&)  bin_count[BINS_TRACKED_PER_THREAD])
inline

Extract counts

Parameters
[out]bin_countThe exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1]

Definition at line 476 of file agent_radix_sort_upsweep.cuh.

◆ ProcessFullTile()

template<typename AgentRadixSortUpsweepPolicy , typename KeyT , typename OffsetT >
__device__ __forceinline__ void cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::ProcessFullTile ( OffsetT  block_offset)
inline

Processes a single, full tile

Definition at line 295 of file agent_radix_sort_upsweep.cuh.

◆ ProcessPartialTile()

template<typename AgentRadixSortUpsweepPolicy , typename KeyT , typename OffsetT >
__device__ __forceinline__ void cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::ProcessPartialTile ( OffsetT  block_offset,
const OffsetT block_end 
)
inline

Processes a single load (may have some threads masked off)

Definition at line 313 of file agent_radix_sort_upsweep.cuh.

◆ ProcessRegion()

template<typename AgentRadixSortUpsweepPolicy , typename KeyT , typename OffsetT >
__device__ __forceinline__ void cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::ProcessRegion ( OffsetT  block_offset,
const OffsetT block_end 
)
inline

Compute radix digit histograms from a segment of input tiles.

Definition at line 352 of file agent_radix_sort_upsweep.cuh.

◆ ResetDigitCounters()

template<typename AgentRadixSortUpsweepPolicy , typename KeyT , typename OffsetT >
__device__ __forceinline__ void cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::ResetDigitCounters ( )
inline

Reset composite counters

Definition at line 235 of file agent_radix_sort_upsweep.cuh.

◆ ResetUnpackedCounters()

template<typename AgentRadixSortUpsweepPolicy , typename KeyT , typename OffsetT >
__device__ __forceinline__ void cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::ResetUnpackedCounters ( )
inline

Reset the unpacked counters in each thread

Definition at line 248 of file agent_radix_sort_upsweep.cuh.

◆ UnpackDigitCounts()

template<typename AgentRadixSortUpsweepPolicy , typename KeyT , typename OffsetT >
__device__ __forceinline__ void cub::AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT >::UnpackDigitCounts ( )
inline

Extracts and aggregates the digit counters for each counter lane owned by this warp

Definition at line 266 of file agent_radix_sort_upsweep.cuh.


The documentation for this struct was generated from the following file: