OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH > Struct Template Reference

WarpScanSmem provides smem-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. More...

Detailed Description

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
struct cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >

WarpScanSmem provides smem-based variants of parallel prefix scan of items partitioned across a CUDA thread warp.

< The PTX compute capability for which to to specialize this collective

Definition at line 55 of file warp_scan_smem.cuh.

Data Structures

struct  TempStorage
 

Public Types

enum  {
  IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)) , IS_POW_OF_TWO = PowerOfTwo<LOGICAL_WARP_THREADS>::VALUE , STEPS = Log2<LOGICAL_WARP_THREADS>::VALUE , HALF_WARP_THREADS = 1 << (STEPS - 1) ,
  WARP_SMEM_ELEMENTS = LOGICAL_WARP_THREADS + HALF_WARP_THREADS
}
 
typedef If<((Equals< T, char >::VALUE||Equals< T, signedchar >::VALUE)&&(PTX_ARCH< 200)), int, T >::Type CellT
 Storage cell type (workaround for SM1x compiler bugs with custom-ops like Max() on signed chars)
 
typedef CellT _TempStorage[WARP_SMEM_ELEMENTS]
 Shared memory storage layout type (1.5 warps-worth of elements for each warp)
 

Public Member Functions

__device__ __forceinline__ WarpScanSmem (TempStorage &temp_storage)
 Constructor.
 
template<bool HAS_IDENTITY, int STEP, typename ScanOp >
__device__ __forceinline__ void ScanStep (T &partial, ScanOp scan_op, Int2Type< STEP >)
 Basic inclusive scan iteration (template unrolled, inductive-case specialization)
 
template<bool HAS_IDENTITY, typename ScanOp >
__device__ __forceinline__ void ScanStep (T &, ScanOp, Int2Type< STEPS >)
 Basic inclusive scan iteration(template unrolled, base-case specialization)
 
__device__ __forceinline__ void InclusiveScan (T input, T &output, Sum scan_op, Int2Type< true >)
 Inclusive prefix scan (specialized for summation across primitive types)
 
template<typename ScanOp , int IS_PRIMITIVE>
__device__ __forceinline__ void InclusiveScan (T input, T &output, ScanOp scan_op, Int2Type< IS_PRIMITIVE >)
 Inclusive prefix scan.
 
__device__ __forceinline__ T Broadcast (T input, unsigned int src_lane)
 Broadcast.
 
template<typename ScanOp >
__device__ __forceinline__ void InclusiveScan (T input, T &inclusive_output, ScanOp scan_op)
 Inclusive scan.
 
template<typename ScanOp >
__device__ __forceinline__ void InclusiveScan (T input, T &inclusive_output, ScanOp scan_op, T &warp_aggregate)
 Inclusive scan with aggregate.
 
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void Update (T, T &inclusive, T &exclusive, ScanOpT, IsIntegerT)
 Update inclusive and exclusive using input and inclusive.
 
__device__ __forceinline__ void Update (T input, T &inclusive, T &exclusive, cub::Sum, Int2Type< true >)
 Update inclusive and exclusive using input and inclusive (specialized for summation of integer types)
 
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void Update (T, T &inclusive, T &exclusive, ScanOpT scan_op, T initial_value, IsIntegerT)
 Update inclusive and exclusive using initial value using input, inclusive, and initial value.
 
__device__ __forceinline__ void Update (T input, T &inclusive, T &exclusive, cub::Sum scan_op, T initial_value, Int2Type< true >)
 Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types)
 
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void Update (T, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT, IsIntegerT)
 Update inclusive, exclusive, and warp aggregate using input and inclusive.
 
__device__ __forceinline__ void Update (T input, T &inclusive, T &exclusive, T &warp_aggregate, cub::Sum, Int2Type< true >)
 Update inclusive, exclusive, and warp aggregate using input and inclusive (specialized for summation of integer types)
 
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void Update (T, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT scan_op, T initial_value, IsIntegerT)
 Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value.
 

Data Fields

_TempStoragetemp_storage
 
unsigned int lane_id
 
unsigned int member_mask
 

Member Typedef Documentation

◆ _TempStorage

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
typedef CellT cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::_TempStorage[WARP_SMEM_ELEMENTS]

Shared memory storage layout type (1.5 warps-worth of elements for each warp)

Definition at line 83 of file warp_scan_smem.cuh.

◆ CellT

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
typedef If<((Equals<T,char>::VALUE||Equals<T,signedchar>::VALUE)&&(PTX_ARCH<200)),int,T>::Type cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::CellT

Storage cell type (workaround for SM1x compiler bugs with custom-ops like Max() on signed chars)

Definition at line 80 of file warp_scan_smem.cuh.

Member Enumeration Documentation

◆ anonymous enum

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
anonymous enum
Enumerator
IS_ARCH_WARP 

Whether the logical warp size and the PTX warp size coincide.

IS_POW_OF_TWO 

Whether the logical warp size is a power-of-two.

STEPS 

The number of warp scan steps.

HALF_WARP_THREADS 

The number of threads in half a warp.

WARP_SMEM_ELEMENTS 

The number of shared memory elements per warp.

Definition at line 61 of file warp_scan_smem.cuh.

Constructor & Destructor Documentation

◆ WarpScanSmem()

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::WarpScanSmem ( TempStorage temp_storage)
inline

Constructor.

Definition at line 103 of file warp_scan_smem.cuh.

Member Function Documentation

◆ Broadcast()

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ T cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Broadcast ( input,
unsigned int  src_lane 
)
inline

Broadcast.

Parameters
[in]inputThe value to broadcast
[in]src_laneWhich warp lane is to do the broadcasting

Definition at line 203 of file warp_scan_smem.cuh.

◆ InclusiveScan() [1/4]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScan ( input,
T &  inclusive_output,
ScanOp  scan_op 
)
inline

Inclusive scan.

Parameters
[in]inputCalling thread's input item.
[out]inclusive_outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator

Definition at line 224 of file warp_scan_smem.cuh.

◆ InclusiveScan() [2/4]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScan ( input,
T &  inclusive_output,
ScanOp  scan_op,
T &  warp_aggregate 
)
inline

Inclusive scan with aggregate.

Parameters
[in]inputCalling thread's input item.
[out]inclusive_outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator
[out]warp_aggregateWarp-wide aggregate reduction of input items.

Definition at line 235 of file warp_scan_smem.cuh.

◆ InclusiveScan() [3/4]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename ScanOp , int IS_PRIMITIVE>
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScan ( input,
T &  output,
ScanOp  scan_op,
Int2Type< IS_PRIMITIVE >   
)
inline

Inclusive prefix scan.

Parameters
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator

Definition at line 182 of file warp_scan_smem.cuh.

◆ InclusiveScan() [4/4]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScan ( input,
T &  output,
Sum  scan_op,
Int2Type< true >   
)
inline

Inclusive prefix scan (specialized for summation across primitive types)

Parameters
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator

Definition at line 163 of file warp_scan_smem.cuh.

◆ ScanStep() [1/2]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool HAS_IDENTITY, typename ScanOp >
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::ScanStep ( T &  ,
ScanOp  ,
Int2Type< STEPS  
)
inline

Basic inclusive scan iteration(template unrolled, base-case specialization)

Definition at line 155 of file warp_scan_smem.cuh.

◆ ScanStep() [2/2]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<bool HAS_IDENTITY, int STEP, typename ScanOp >
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::ScanStep ( T &  partial,
ScanOp  scan_op,
Int2Type< STEP >   
)
inline

Basic inclusive scan iteration (template unrolled, inductive-case specialization)

Definition at line 127 of file warp_scan_smem.cuh.

◆ Update() [1/7]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Update ( input,
T &  inclusive,
T &  exclusive,
cub::Sum  scan_op,
initial_value,
Int2Type< true >   
)
inline

Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types)

Definition at line 308 of file warp_scan_smem.cuh.

◆ Update() [2/7]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Update ( input,
T &  inclusive,
T &  exclusive,
cub::Sum  ,
Int2Type< true >   
)
inline

Update inclusive and exclusive using input and inclusive (specialized for summation of integer types)

Definition at line 276 of file warp_scan_smem.cuh.

◆ Update() [3/7]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Update ( input,
T &  inclusive,
T &  exclusive,
T &  warp_aggregate,
cub::Sum  ,
Int2Type< true >   
)
inline

Update inclusive, exclusive, and warp aggregate using input and inclusive (specialized for summation of integer types)

Definition at line 341 of file warp_scan_smem.cuh.

◆ Update() [4/7]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Update ( ,
T &  inclusive,
T &  exclusive,
ScanOpT  scan_op,
initial_value,
IsIntegerT   
)
inline

Update inclusive and exclusive using initial value using input, inclusive, and initial value.

Definition at line 289 of file warp_scan_smem.cuh.

◆ Update() [5/7]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Update ( ,
T &  inclusive,
T &  exclusive,
ScanOpT  ,
IsIntegerT   
)
inline

Update inclusive and exclusive using input and inclusive.

Parameters
[in,out]inclusive
[out]exclusive

Definition at line 260 of file warp_scan_smem.cuh.

◆ Update() [6/7]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Update ( ,
T &  inclusive,
T &  exclusive,
T &  warp_aggregate,
ScanOpT  scan_op,
initial_value,
IsIntegerT   
)
inline

Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value.

Definition at line 360 of file warp_scan_smem.cuh.

◆ Update() [7/7]

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Update ( ,
T &  inclusive,
T &  exclusive,
T &  warp_aggregate,
ScanOpT  ,
IsIntegerT   
)
inline

Update inclusive, exclusive, and warp aggregate using input and inclusive.

Definition at line 323 of file warp_scan_smem.cuh.

Field Documentation

◆ lane_id

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
unsigned int cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::lane_id

Definition at line 94 of file warp_scan_smem.cuh.

◆ member_mask

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
unsigned int cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::member_mask

Definition at line 95 of file warp_scan_smem.cuh.

◆ temp_storage

template<typename T , int LOGICAL_WARP_THREADS, int PTX_ARCH>
_TempStorage& cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::temp_storage

Definition at line 93 of file warp_scan_smem.cuh.


The documentation for this struct was generated from the following file: