OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT > Struct Template Reference

AgentRadixSortDownsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort downsweep . More...

Detailed Description

template<typename AgentRadixSortDownsweepPolicy, bool IS_DESCENDING, typename KeyT, typename ValueT, typename OffsetT>
struct cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >

AgentRadixSortDownsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort downsweep .

< Signed integer type for global offsets

Definition at line 113 of file agent_radix_sort_downsweep.cuh.

Data Structures

struct  TempStorage
 Alias wrapper allowing storage to be unioned. More...
 

Public Types

enum  {
  BLOCK_THREADS = AgentRadixSortDownsweepPolicy::BLOCK_THREADS, ITEMS_PER_THREAD = AgentRadixSortDownsweepPolicy::ITEMS_PER_THREAD, RADIX_BITS = AgentRadixSortDownsweepPolicy::RADIX_BITS, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD,
  RADIX_DIGITS = 1 << RADIX_BITS, KEYS_ONLY = Equals<ValueT, NullType>::VALUE
}
 
enum  { BINS_TRACKED_PER_THREAD = BlockRadixRankT::BINS_TRACKED_PER_THREAD }
 
typedef Traits< KeyT >::UnsignedBits UnsignedBits
 
typedef CacheModifiedInputIterator< LOAD_MODIFIER, UnsignedBits, OffsetTKeysItr
 
typedef CacheModifiedInputIterator< LOAD_MODIFIER, ValueT, OffsetTValuesItr
 
typedef If<(RANK_ALGORITHM==RADIX_RANK_BASIC), BlockRadixRank< BLOCK_THREADS, RADIX_BITS, IS_DESCENDING, false, SCAN_ALGORITHM >, typename If<(RANK_ALGORITHM==RADIX_RANK_MEMOIZE), BlockRadixRank< BLOCK_THREADS, RADIX_BITS, IS_DESCENDING, true, SCAN_ALGORITHM >, BlockRadixRankMatch< BLOCK_THREADS, RADIX_BITS, IS_DESCENDING, SCAN_ALGORITHM > >::Type >::Type BlockRadixRankT
 
typedef BlockLoad< UnsignedBits, BLOCK_THREADS, ITEMS_PER_THREAD, LOAD_ALGORITHM > BlockLoadKeysT
 
typedef BlockLoad< ValueT, BLOCK_THREADS, ITEMS_PER_THREAD, LOAD_ALGORITHM > BlockLoadValuesT
 
typedef ValueT ValueExchangeT[TILE_ITEMS]
 

Public Member Functions

union __align__ (16) _TempStorage
 
template<bool FULL_TILE>
__device__ __forceinline__ void ScatterKeys (UnsignedBits(&twiddled_keys)[ITEMS_PER_THREAD], OffsetT(&relative_bin_offsets)[ITEMS_PER_THREAD], int(&ranks)[ITEMS_PER_THREAD], OffsetT valid_items)
 
template<bool FULL_TILE>
__device__ __forceinline__ void ScatterValues (ValueT(&values)[ITEMS_PER_THREAD], OffsetT(&relative_bin_offsets)[ITEMS_PER_THREAD], int(&ranks)[ITEMS_PER_THREAD], OffsetT valid_items)
 
template<int _RANK_ALGORITHM>
__device__ __forceinline__ void LoadKeys (UnsignedBits(&keys)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, UnsignedBits oob_item, Int2Type< true > is_full_tile, Int2Type< _RANK_ALGORITHM > rank_algorithm)
 
template<int _RANK_ALGORITHM>
__device__ __forceinline__ void LoadKeys (UnsignedBits(&keys)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, UnsignedBits oob_item, Int2Type< false > is_full_tile, Int2Type< _RANK_ALGORITHM > rank_algorithm)
 
__device__ __forceinline__ void LoadKeys (UnsignedBits(&keys)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, UnsignedBits oob_item, Int2Type< true > is_full_tile, Int2Type< RADIX_RANK_MATCH > rank_algorithm)
 
__device__ __forceinline__ void LoadKeys (UnsignedBits(&keys)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, UnsignedBits oob_item, Int2Type< false > is_full_tile, Int2Type< RADIX_RANK_MATCH > rank_algorithm)
 
template<int _RANK_ALGORITHM>
__device__ __forceinline__ void LoadValues (ValueT(&values)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, Int2Type< true > is_full_tile, Int2Type< _RANK_ALGORITHM > rank_algorithm)
 
template<int _RANK_ALGORITHM>
__device__ __forceinline__ void LoadValues (ValueT(&values)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, Int2Type< false > is_full_tile, Int2Type< _RANK_ALGORITHM > rank_algorithm)
 
__device__ __forceinline__ void LoadValues (ValueT(&values)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, Int2Type< true > is_full_tile, Int2Type< RADIX_RANK_MATCH > rank_algorithm)
 
__device__ __forceinline__ void LoadValues (ValueT(&values)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, Int2Type< false > is_full_tile, Int2Type< RADIX_RANK_MATCH > rank_algorithm)
 
template<bool FULL_TILE>
__device__ __forceinline__ void GatherScatterValues (OffsetT(&relative_bin_offsets)[ITEMS_PER_THREAD], int(&ranks)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, Int2Type< false >)
 
template<bool FULL_TILE>
__device__ __forceinline__ void GatherScatterValues (OffsetT(&)[ITEMS_PER_THREAD], int(&)[ITEMS_PER_THREAD], OffsetT, OffsetT, Int2Type< true >)
 
template<bool FULL_TILE>
__device__ __forceinline__ void ProcessTile (OffsetT block_offset, const OffsetT &valid_items=TILE_ITEMS)
 
template<typename InputIteratorT , typename T >
__device__ __forceinline__ void Copy (InputIteratorT d_in, T *d_out, OffsetT block_offset, OffsetT block_end)
 
template<typename InputIteratorT >
__device__ __forceinline__ void Copy (InputIteratorT, NullType *, OffsetT, OffsetT)
 
__device__ __forceinline__ AgentRadixSortDownsweep (TempStorage &temp_storage, OffsetT(&bin_offset)[BINS_TRACKED_PER_THREAD], OffsetT num_items, const KeyT *d_keys_in, KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, int current_bit, int num_bits)
 
__device__ __forceinline__ AgentRadixSortDownsweep (TempStorage &temp_storage, OffsetT num_items, OffsetT *d_spine, const KeyT *d_keys_in, KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, int current_bit, int num_bits)
 
__device__ __forceinline__ void ProcessRegion (OffsetT block_offset, OffsetT block_end)
 

Data Fields

_TempStorage & temp_storage
 
KeysItr d_keys_in
 
ValuesItr d_values_in
 
UnsignedBits * d_keys_out
 
ValueT * d_values_out
 
OffsetT bin_offset [BINS_TRACKED_PER_THREAD]
 
int current_bit
 
int num_bits
 
int short_circuit
 

Static Public Attributes

static const UnsignedBits LOWEST_KEY = Traits<KeyT>::LOWEST_KEY
 
static const UnsignedBits MAX_KEY = Traits<KeyT>::MAX_KEY
 
static const BlockLoadAlgorithm LOAD_ALGORITHM = AgentRadixSortDownsweepPolicy::LOAD_ALGORITHM
 
static const CacheLoadModifier LOAD_MODIFIER = AgentRadixSortDownsweepPolicy::LOAD_MODIFIER
 
static const RadixRankAlgorithm RANK_ALGORITHM = AgentRadixSortDownsweepPolicy::RANK_ALGORITHM
 
static const BlockScanAlgorithm SCAN_ALGORITHM = AgentRadixSortDownsweepPolicy::SCAN_ALGORITHM
 

Member Enumeration Documentation

◆ anonymous enum

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
anonymous enum
Enumerator
BINS_TRACKED_PER_THREAD 

Number of bin-starting offsets tracked per thread.

Definition at line 154 of file agent_radix_sort_downsweep.cuh.

Constructor & Destructor Documentation

◆ AgentRadixSortDownsweep() [1/2]

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
__device__ __forceinline__ cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::AgentRadixSortDownsweep ( TempStorage temp_storage,
OffsetT(&)  bin_offset[BINS_TRACKED_PER_THREAD],
OffsetT  num_items,
const KeyT *  d_keys_in,
KeyT *  d_keys_out,
const ValueT *  d_values_in,
ValueT *  d_values_out,
int  current_bit,
int  num_bits 
)
inline

Constructor

Definition at line 663 of file agent_radix_sort_downsweep.cuh.

◆ AgentRadixSortDownsweep() [2/2]

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
__device__ __forceinline__ cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::AgentRadixSortDownsweep ( TempStorage temp_storage,
OffsetT  num_items,
OffsetT d_spine,
const KeyT *  d_keys_in,
KeyT *  d_keys_out,
const ValueT *  d_values_in,
ValueT *  d_values_out,
int  current_bit,
int  num_bits 
)
inline

Constructor

Definition at line 703 of file agent_radix_sort_downsweep.cuh.

Member Function Documentation

◆ __align__()

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
union cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::__align__ ( 16  )
inline

Shared memory storage layout

Definition at line 180 of file agent_radix_sort_downsweep.cuh.

◆ Copy() [1/2]

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
template<typename InputIteratorT , typename T >
__device__ __forceinline__ void cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::Copy ( InputIteratorT  d_in,
T *  d_out,
OffsetT  block_offset,
OffsetT  block_end 
)
inline

Copy tiles within the range of input

Definition at line 612 of file agent_radix_sort_downsweep.cuh.

◆ Copy() [2/2]

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
template<typename InputIteratorT >
__device__ __forceinline__ void cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::Copy ( InputIteratorT  ,
NullType ,
OffsetT  ,
OffsetT   
)
inline

Copy tiles within the range of input (specialized for NullType)

Definition at line 648 of file agent_radix_sort_downsweep.cuh.

◆ GatherScatterValues() [1/2]

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
template<bool FULL_TILE>
__device__ __forceinline__ void cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::GatherScatterValues ( OffsetT(&)  relative_bin_offsets[ITEMS_PER_THREAD],
int(&)  ranks[ITEMS_PER_THREAD],
OffsetT  block_offset,
OffsetT  valid_items,
Int2Type< false >   
)
inline

Truck along associated values

Definition at line 456 of file agent_radix_sort_downsweep.cuh.

◆ GatherScatterValues() [2/2]

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
template<bool FULL_TILE>
__device__ __forceinline__ void cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::GatherScatterValues ( OffsetT(&)  [ITEMS_PER_THREAD],
int(&)  [ITEMS_PER_THREAD],
OffsetT  ,
OffsetT  ,
Int2Type< true >   
)
inline

Truck along associated values (specialized for key-only sorting)

Definition at line 486 of file agent_radix_sort_downsweep.cuh.

◆ LoadKeys() [1/4]

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
template<int _RANK_ALGORITHM>
__device__ __forceinline__ void cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::LoadKeys ( UnsignedBits(&)  keys[ITEMS_PER_THREAD],
OffsetT  block_offset,
OffsetT  valid_items,
UnsignedBits  oob_item,
Int2Type< true >  is_full_tile,
Int2Type< _RANK_ALGORITHM >  rank_algorithm 
)
inline

Load a tile of keys (specialized for full tile, any ranking algorithm)

Definition at line 308 of file agent_radix_sort_downsweep.cuh.

◆ LoadKeys() [2/4]

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
template<int _RANK_ALGORITHM>
__device__ __forceinline__ void cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::LoadKeys ( UnsignedBits(&)  keys[ITEMS_PER_THREAD],
OffsetT  block_offset,
OffsetT  valid_items,
UnsignedBits  oob_item,
Int2Type< false >  is_full_tile,
Int2Type< _RANK_ALGORITHM >  rank_algorithm 
)
inline

Load a tile of keys (specialized for partial tile, any ranking algorithm)

Definition at line 327 of file agent_radix_sort_downsweep.cuh.

◆ LoadKeys() [3/4]

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
__device__ __forceinline__ void cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::LoadKeys ( UnsignedBits(&)  keys[ITEMS_PER_THREAD],
OffsetT  block_offset,
OffsetT  valid_items,
UnsignedBits  oob_item,
Int2Type< true >  is_full_tile,
Int2Type< RADIX_RANK_MATCH >  rank_algorithm 
)
inline

Load a tile of keys (specialized for full tile, match ranking algorithm)

Definition at line 349 of file agent_radix_sort_downsweep.cuh.

◆ LoadKeys() [4/4]

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
__device__ __forceinline__ void cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::LoadKeys ( UnsignedBits(&)  keys[ITEMS_PER_THREAD],
OffsetT  block_offset,
OffsetT  valid_items,
UnsignedBits  oob_item,
Int2Type< false >  is_full_tile,
Int2Type< RADIX_RANK_MATCH >  rank_algorithm 
)
inline

Load a tile of keys (specialized for partial tile, match ranking algorithm)

Definition at line 364 of file agent_radix_sort_downsweep.cuh.

◆ LoadValues() [1/4]

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
template<int _RANK_ALGORITHM>
__device__ __forceinline__ void cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::LoadValues ( ValueT(&)  values[ITEMS_PER_THREAD],
OffsetT  block_offset,
OffsetT  valid_items,
Int2Type< true >  is_full_tile,
Int2Type< _RANK_ALGORITHM >  rank_algorithm 
)
inline

Load a tile of values (specialized for full tile, any ranking algorithm)

Definition at line 384 of file agent_radix_sort_downsweep.cuh.

◆ LoadValues() [2/4]

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
template<int _RANK_ALGORITHM>
__device__ __forceinline__ void cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::LoadValues ( ValueT(&)  values[ITEMS_PER_THREAD],
OffsetT  block_offset,
OffsetT  valid_items,
Int2Type< false >  is_full_tile,
Int2Type< _RANK_ALGORITHM >  rank_algorithm 
)
inline

Load a tile of values (specialized for partial tile, any ranking algorithm)

Definition at line 402 of file agent_radix_sort_downsweep.cuh.

◆ LoadValues() [3/4]

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
__device__ __forceinline__ void cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::LoadValues ( ValueT(&)  values[ITEMS_PER_THREAD],
OffsetT  block_offset,
OffsetT  valid_items,
Int2Type< true >  is_full_tile,
Int2Type< RADIX_RANK_MATCH >  rank_algorithm 
)
inline

Load a tile of items (specialized for full tile, match ranking algorithm)

Definition at line 423 of file agent_radix_sort_downsweep.cuh.

◆ LoadValues() [4/4]

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
__device__ __forceinline__ void cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::LoadValues ( ValueT(&)  values[ITEMS_PER_THREAD],
OffsetT  block_offset,
OffsetT  valid_items,
Int2Type< false >  is_full_tile,
Int2Type< RADIX_RANK_MATCH >  rank_algorithm 
)
inline

Load a tile of items (specialized for partial tile, match ranking algorithm)

Definition at line 437 of file agent_radix_sort_downsweep.cuh.

◆ ProcessRegion()

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
__device__ __forceinline__ void cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::ProcessRegion ( OffsetT  block_offset,
OffsetT  block_end 
)
inline

Distribute keys from a segment of input tiles.

Definition at line 750 of file agent_radix_sort_downsweep.cuh.

◆ ProcessTile()

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
template<bool FULL_TILE>
__device__ __forceinline__ void cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::ProcessTile ( OffsetT  block_offset,
const OffsetT valid_items = TILE_ITEMS 
)
inline

Process tile

Definition at line 499 of file agent_radix_sort_downsweep.cuh.

◆ ScatterKeys()

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
template<bool FULL_TILE>
__device__ __forceinline__ void cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::ScatterKeys ( UnsignedBits(&)  twiddled_keys[ITEMS_PER_THREAD],
OffsetT(&)  relative_bin_offsets[ITEMS_PER_THREAD],
int(&)  ranks[ITEMS_PER_THREAD],
OffsetT  valid_items 
)
inline

Scatter ranked keys through shared memory, then to device-accessible memory

Definition at line 236 of file agent_radix_sort_downsweep.cuh.

◆ ScatterValues()

template<typename AgentRadixSortDownsweepPolicy , bool IS_DESCENDING, typename KeyT , typename ValueT , typename OffsetT >
template<bool FULL_TILE>
__device__ __forceinline__ void cub::AgentRadixSortDownsweep< AgentRadixSortDownsweepPolicy, IS_DESCENDING, KeyT, ValueT, OffsetT >::ScatterValues ( ValueT(&)  values[ITEMS_PER_THREAD],
OffsetT(&)  relative_bin_offsets[ITEMS_PER_THREAD],
int(&)  ranks[ITEMS_PER_THREAD],
OffsetT  valid_items 
)
inline

Scatter ranked values through shared memory, then to device-accessible memory

Definition at line 273 of file agent_radix_sort_downsweep.cuh.


The documentation for this struct was generated from the following file: