OpenFPM_pdata  3.0.0
Project that contain the implementation of distributed structures
cub::BlockRadixRankMatch< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, INNER_SCAN_ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > Class Template Reference

Detailed Description

template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
class cub::BlockRadixRankMatch< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, INNER_SCAN_ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >

Radix-rank using match.any

Definition at line 460 of file block_radix_rank.cuh.

Data Structures

struct  TempStorage
 \smemstorage{BlockScan} More...
 

Public Types

enum  { BINS_TRACKED_PER_THREAD = CUB_MAX(1, (RADIX_DIGITS + BLOCK_THREADS - 1) / BLOCK_THREADS) }
 

Public Member Functions

Collective constructors
__device__ __forceinline__ BlockRadixRankMatch (TempStorage &temp_storage)
 Collective constructor using the specified memory allocation as temporary storage. More...
 
Raking
template<typename UnsignedBits , int KEYS_PER_THREAD>
__device__ __forceinline__ void RankKeys (UnsignedBits(&keys)[KEYS_PER_THREAD], int(&ranks)[KEYS_PER_THREAD], int current_bit, int num_bits)
 Rank keys. More...
 
template<typename UnsignedBits , int KEYS_PER_THREAD>
__device__ __forceinline__ void RankKeys (UnsignedBits(&keys)[KEYS_PER_THREAD], int(&ranks)[KEYS_PER_THREAD], int current_bit, int num_bits, int(&exclusive_digit_prefix)[BINS_TRACKED_PER_THREAD])
 Rank keys. For the lower RADIX_DIGITS threads, digit counts for each digit are provided for the corresponding thread. More...
 

Private Types

enum  {
  BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, RADIX_DIGITS = 1 << RADIX_BITS, LOG_WARP_THREADS = CUB_LOG_WARP_THREADS(PTX_ARCH), WARP_THREADS = 1 << LOG_WARP_THREADS,
  WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, PADDED_WARPS, COUNTERS = PADDED_WARPS * RADIX_DIGITS, RAKING_SEGMENT = (COUNTERS + BLOCK_THREADS - 1) / BLOCK_THREADS,
  PADDED_RAKING_SEGMENT
}
 
typedef int32_t RankT
 
typedef int32_t DigitCounterT
 
typedef BlockScan< DigitCounterT, BLOCK_THREADS, INNER_SCAN_ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > BlockScanT
 BlockScan type.
 

Private Member Functions

struct __align__ (16) _TempStorage
 Shared memory storage layout type for BlockRadixRank.
 

Private Attributes

_TempStorage & temp_storage
 Shared storage reference.
 
unsigned int linear_tid
 Linear thread-id.
 

Member Enumeration Documentation

◆ anonymous enum

template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
anonymous enum
Enumerator
BINS_TRACKED_PER_THREAD 

Number of bin-starting offsets tracked per thread.

Definition at line 495 of file block_radix_rank.cuh.

Constructor & Destructor Documentation

◆ BlockRadixRankMatch()

template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
__device__ __forceinline__ cub::BlockRadixRankMatch< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, INNER_SCAN_ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::BlockRadixRankMatch ( TempStorage temp_storage)
inline

Collective constructor using the specified memory allocation as temporary storage.

Parameters
[in]temp_storageReference to memory allocation having layout type TempStorage

Definition at line 555 of file block_radix_rank.cuh.

Member Function Documentation

◆ RankKeys() [1/2]

template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
template<typename UnsignedBits , int KEYS_PER_THREAD>
__device__ __forceinline__ void cub::BlockRadixRankMatch< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, INNER_SCAN_ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::RankKeys ( UnsignedBits(&)  keys[KEYS_PER_THREAD],
int(&)  ranks[KEYS_PER_THREAD],
int  current_bit,
int  num_bits 
)
inline

Rank keys.

Parameters
[in]keysKeys for this tile
[out]ranksFor each key, the local rank within the tile
[in]current_bitThe least-significant bit position of the current digit to extract
[in]num_bitsThe number of bits in the current digit

Definition at line 575 of file block_radix_rank.cuh.

◆ RankKeys() [2/2]

template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
template<typename UnsignedBits , int KEYS_PER_THREAD>
__device__ __forceinline__ void cub::BlockRadixRankMatch< BLOCK_DIM_X, RADIX_BITS, IS_DESCENDING, INNER_SCAN_ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::RankKeys ( UnsignedBits(&)  keys[KEYS_PER_THREAD],
int(&)  ranks[KEYS_PER_THREAD],
int  current_bit,
int  num_bits,
int(&)  exclusive_digit_prefix[BINS_TRACKED_PER_THREAD] 
)
inline

Rank keys. For the lower RADIX_DIGITS threads, digit counts for each digit are provided for the corresponding thread.

Parameters
[in]keysKeys for this tile
[out]ranksFor each key, the local rank within the tile (out parameter)
[in]current_bitThe least-significant bit position of the current digit to extract
[in]num_bitsThe number of bits in the current digit
[out]exclusive_digit_prefixThe exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1]

Definition at line 666 of file block_radix_rank.cuh.


The documentation for this class was generated from the following file: