OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH > Struct Template Reference

WarpScanShfl provides SHFL-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. More...

Detailed Description

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
struct cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >

WarpScanShfl provides SHFL-based variants of parallel prefix scan of items partitioned across a CUDA thread warp.

LOGICAL_WARP_THREADS must be a power-of-two < The PTX compute capability for which to to specialize this collective

Definition at line 56 of file warp_scan_shfl.cuh.

Data Structures

struct  IntegerTraits
 
struct  TempStorage
 Shared memory storage layout type. More...
 

Public Types

enum  { IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)) , STEPS = Log2<LOGICAL_WARP_THREADS>::VALUE , SHFL_C = (CUB_WARP_THREADS(PTX_ARCH) - LOGICAL_WARP_THREADS) << 8 }
 

Public Member Functions

__device__ __forceinline__ WarpScanShfl (TempStorage &)
 Constructor.
 
__device__ __forceinline__ int InclusiveScanStep (int input, cub::Sum, int first_lane, int offset)
 Inclusive prefix scan step (specialized for summation across int32 types)
 
__device__ __forceinline__ unsigned int InclusiveScanStep (unsigned int input, cub::Sum, int first_lane, int offset)
 Inclusive prefix scan step (specialized for summation across uint32 types)
 
__device__ __forceinline__ float InclusiveScanStep (float input, cub::Sum, int first_lane, int offset)
 Inclusive prefix scan step (specialized for summation across fp32 types)
 
__device__ __forceinline__ unsigned long long InclusiveScanStep (unsigned long long input, cub::Sum, int first_lane, int offset)
 Inclusive prefix scan step (specialized for summation across unsigned long long types)
 
__device__ __forceinline__ long long InclusiveScanStep (long long input, cub::Sum, int first_lane, int offset)
 Inclusive prefix scan step (specialized for summation across long long types)
 
__device__ __forceinline__ double InclusiveScanStep (double input, cub::Sum, int first_lane, int offset)
 Inclusive prefix scan step (specialized for summation across fp64 types)
 
template<typename _T , typename ScanOpT >
__device__ __forceinline__ _T InclusiveScanStep (_T input, ScanOpT scan_op, int first_lane, int offset)
 Inclusive prefix scan step (generic)
 
template<typename _T , typename ScanOpT >
__device__ __forceinline__ _T InclusiveScanStep (_T input, ScanOpT scan_op, int first_lane, int offset, Int2Type< true >)
 Inclusive prefix scan step (specialized for small integers size 32b or less)
 
template<typename _T , typename ScanOpT >
__device__ __forceinline__ _T InclusiveScanStep (_T input, ScanOpT scan_op, int first_lane, int offset, Int2Type< false >)
 Inclusive prefix scan step (specialized for types other than small integers size 32b or less)
 
__device__ __forceinline__ T Broadcast (T input, int src_lane)
 Broadcast.
 
template<typename _T , typename ScanOpT >
__device__ __forceinline__ void InclusiveScan (_T input, _T &inclusive_output, ScanOpT scan_op)
 Inclusive scan.
 
template<typename KeyT , typename ValueT , typename ReductionOpT >
__device__ __forceinline__ void InclusiveScan (KeyValuePair< KeyT, ValueT > input, KeyValuePair< KeyT, ValueT > &inclusive_output, ReduceByKeyOp< ReductionOpT > scan_op)
 Inclusive scan, specialized for reduce-value-by-key.
 
template<typename ScanOpT >
__device__ __forceinline__ void InclusiveScan (T input, T &inclusive_output, ScanOpT scan_op, T &warp_aggregate)
 Inclusive scan with aggregate.
 
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void Update (T, T &inclusive, T &exclusive, ScanOpT, IsIntegerT)
 Update inclusive and exclusive using input and inclusive.
 
__device__ __forceinline__ void Update (T input, T &inclusive, T &exclusive, cub::Sum, Int2Type< true >)
 Update inclusive and exclusive using input and inclusive (specialized for summation of integer types)
 
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void Update (T, T &inclusive, T &exclusive, ScanOpT scan_op, T initial_value, IsIntegerT)
 Update inclusive and exclusive using initial value using input, inclusive, and initial value.
 
__device__ __forceinline__ void Update (T input, T &inclusive, T &exclusive, cub::Sum scan_op, T initial_value, Int2Type< true >)
 Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types)
 
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void Update (T input, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT scan_op, IsIntegerT is_integer)
 Update inclusive, exclusive, and warp aggregate using input and inclusive.
 
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void Update (T input, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT scan_op, T initial_value, IsIntegerT is_integer)
 Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value.
 

Data Fields

unsigned int lane_id
 Lane index in logical warp.
 
unsigned int warp_id
 Logical warp index in 32-thread physical warp.
 
unsigned int member_mask
 32-thread physical warp member mask of logical warp
 

Member Enumeration Documentation

◆ anonymous enum

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
anonymous enum
Enumerator
IS_ARCH_WARP 

Whether the logical warp size and the PTX warp size coincide.

STEPS 

The number of warp scan steps.

SHFL_C 

The 5-bit SHFL mask for logically splitting warps into sub-segments starts 8-bits up.

Definition at line 62 of file warp_scan_shfl.cuh.

Constructor & Destructor Documentation

◆ WarpScanShfl()

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::WarpScanShfl ( TempStorage )
inline

Constructor.

Definition at line 105 of file warp_scan_shfl.cuh.

Member Function Documentation

◆ Broadcast()

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ T cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Broadcast ( input,
int  src_lane 
)
inline

Broadcast.

Parameters
[in]inputThe value to broadcast
[in]src_laneWhich warp lane is to do the broadcasting

Definition at line 451 of file warp_scan_shfl.cuh.

◆ InclusiveScan() [1/3]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename _T , typename ScanOpT >
__device__ __forceinline__ void cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScan ( _T  input,
_T &  inclusive_output,
ScanOpT  scan_op 
)
inline

Inclusive scan.

Parameters
[in]inputCalling thread's input item.
[out]inclusive_outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator

Definition at line 465 of file warp_scan_shfl.cuh.

◆ InclusiveScan() [2/3]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename KeyT , typename ValueT , typename ReductionOpT >
__device__ __forceinline__ void cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScan ( KeyValuePair< KeyT, ValueT >  input,
KeyValuePair< KeyT, ValueT > &  inclusive_output,
ReduceByKeyOp< ReductionOpT >  scan_op 
)
inline

Inclusive scan, specialized for reduce-value-by-key.

Parameters
[in]inputCalling thread's input item.
[out]inclusive_outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator

Definition at line 491 of file warp_scan_shfl.cuh.

◆ InclusiveScan() [3/3]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename ScanOpT >
__device__ __forceinline__ void cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScan ( input,
T &  inclusive_output,
ScanOpT  scan_op,
T &  warp_aggregate 
)
inline

Inclusive scan with aggregate.

Parameters
[in]inputCalling thread's input item.
[out]inclusive_outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator
[out]warp_aggregateWarp-wide aggregate reduction of input items.

Definition at line 524 of file warp_scan_shfl.cuh.

◆ InclusiveScanStep() [1/9]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename _T , typename ScanOpT >
__device__ __forceinline__ _T cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScanStep ( _T  input,
ScanOpT  scan_op,
int  first_lane,
int  offset 
)
inline

Inclusive prefix scan step (generic)

Parameters
[in]inputCalling thread's input item.
[in]scan_opBinary scan operator
[in]first_laneIndex of first lane in segment
[in]offsetUp-offset to pull from

Definition at line 399 of file warp_scan_shfl.cuh.

◆ InclusiveScanStep() [2/9]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename _T , typename ScanOpT >
__device__ __forceinline__ _T cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScanStep ( _T  input,
ScanOpT  scan_op,
int  first_lane,
int  offset,
Int2Type< false >   
)
inline

Inclusive prefix scan step (specialized for types other than small integers size 32b or less)

Parameters
[in]inputCalling thread's input item.
[in]scan_opBinary scan operator
[in]first_laneIndex of first lane in segment
[in]offsetUp-offset to pull from

Definition at line 431 of file warp_scan_shfl.cuh.

◆ InclusiveScanStep() [3/9]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename _T , typename ScanOpT >
__device__ __forceinline__ _T cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScanStep ( _T  input,
ScanOpT  scan_op,
int  first_lane,
int  offset,
Int2Type< true >   
)
inline

Inclusive prefix scan step (specialized for small integers size 32b or less)

Parameters
[in]inputCalling thread's input item.
[in]scan_opBinary scan operator
[in]first_laneIndex of first lane in segment
[in]offsetUp-offset to pull from

Definition at line 418 of file warp_scan_shfl.cuh.

◆ InclusiveScanStep() [4/9]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ double cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScanStep ( double  input,
cub::Sum  ,
int  first_lane,
int  offset 
)
inline

Inclusive prefix scan step (specialized for summation across fp64 types)

Parameters
[in]inputCalling thread's input item.
[in]first_laneIndex of first lane in segment
[in]offsetUp-offset to pull from

Definition at line 330 of file warp_scan_shfl.cuh.

◆ InclusiveScanStep() [5/9]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ float cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScanStep ( float  input,
cub::Sum  ,
int  first_lane,
int  offset 
)
inline

Inclusive prefix scan step (specialized for summation across fp32 types)

Parameters
[in]inputCalling thread's input item.
[in]first_laneIndex of first lane in segment
[in]offsetUp-offset to pull from

Definition at line 199 of file warp_scan_shfl.cuh.

◆ InclusiveScanStep() [6/9]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ int cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScanStep ( int  input,
cub::Sum  ,
int  first_lane,
int  offset 
)
inline

Inclusive prefix scan step (specialized for summation across int32 types)

Parameters
[in]inputCalling thread's input item.
[in]first_laneIndex of first lane in segment
[in]offsetUp-offset to pull from

Definition at line 126 of file warp_scan_shfl.cuh.

◆ InclusiveScanStep() [7/9]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ long long cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScanStep ( long long  input,
cub::Sum  ,
int  first_lane,
int  offset 
)
inline

Inclusive prefix scan step (specialized for summation across long long types)

Parameters
[in]inputCalling thread's input item.
[in]first_laneIndex of first lane in segment
[in]offsetUp-offset to pull from

Definition at line 283 of file warp_scan_shfl.cuh.

◆ InclusiveScanStep() [8/9]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ unsigned int cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScanStep ( unsigned int  input,
cub::Sum  ,
int  first_lane,
int  offset 
)
inline

Inclusive prefix scan step (specialized for summation across uint32 types)

Parameters
[in]inputCalling thread's input item.
[in]first_laneIndex of first lane in segment
[in]offsetUp-offset to pull from

Definition at line 162 of file warp_scan_shfl.cuh.

◆ InclusiveScanStep() [9/9]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ unsigned long long cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScanStep ( unsigned long long  input,
cub::Sum  ,
int  first_lane,
int  offset 
)
inline

Inclusive prefix scan step (specialized for summation across unsigned long long types)

Parameters
[in]inputCalling thread's input item.
[in]first_laneIndex of first lane in segment
[in]offsetUp-offset to pull from

Definition at line 236 of file warp_scan_shfl.cuh.

◆ Update() [1/6]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ void cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Update ( input,
T &  inclusive,
T &  exclusive,
cub::Sum  scan_op,
initial_value,
Int2Type< true >   
)
inline

Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types)

Definition at line 584 of file warp_scan_shfl.cuh.

◆ Update() [2/6]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ void cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Update ( input,
T &  inclusive,
T &  exclusive,
cub::Sum  ,
Int2Type< true >   
)
inline

Update inclusive and exclusive using input and inclusive (specialized for summation of integer types)

Definition at line 555 of file warp_scan_shfl.cuh.

◆ Update() [3/6]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Update ( input,
T &  inclusive,
T &  exclusive,
T &  warp_aggregate,
ScanOpT  scan_op,
IsIntegerT  is_integer 
)
inline

Update inclusive, exclusive, and warp aggregate using input and inclusive.

Definition at line 599 of file warp_scan_shfl.cuh.

◆ Update() [4/6]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Update ( input,
T &  inclusive,
T &  exclusive,
T &  warp_aggregate,
ScanOpT  scan_op,
initial_value,
IsIntegerT  is_integer 
)
inline

Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value.

Definition at line 613 of file warp_scan_shfl.cuh.

◆ Update() [5/6]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Update ( ,
T &  inclusive,
T &  exclusive,
ScanOpT  scan_op,
initial_value,
IsIntegerT   
)
inline

Update inclusive and exclusive using initial value using input, inclusive, and initial value.

Definition at line 568 of file warp_scan_shfl.cuh.

◆ Update() [6/6]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Update ( ,
T &  inclusive,
T &  exclusive,
ScanOpT  ,
IsIntegerT   
)
inline

Update inclusive and exclusive using input and inclusive.

Parameters
[in,out]inclusive
[out]exclusive

Definition at line 543 of file warp_scan_shfl.cuh.

Field Documentation

◆ lane_id

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
unsigned int cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::lane_id

Lane index in logical warp.

Definition at line 92 of file warp_scan_shfl.cuh.

◆ member_mask

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
unsigned int cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::member_mask

32-thread physical warp member mask of logical warp

Definition at line 98 of file warp_scan_shfl.cuh.

◆ warp_id

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
unsigned int cub::WarpScanShfl< T, LOGICAL_WARP_THREADS, PTX_ARCH >::warp_id

Logical warp index in 32-thread physical warp.

Definition at line 95 of file warp_scan_shfl.cuh.


The documentation for this struct was generated from the following file: