OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH > Struct Template Reference

WarpScanSmem provides smem-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. More...

Detailed Description

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
struct cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >

WarpScanSmem provides smem-based variants of parallel prefix scan of items partitioned across a CUDA thread warp.

< The PTX compute capability for which to to specialize this collective

Definition at line 55 of file warp_scan_smem.cuh.

Data Structures

struct  TempStorage
 

Public Types

enum  {
  IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), IS_POW_OF_TWO = PowerOfTwo<LOGICAL_WARP_THREADS>::VALUE, STEPS = Log2<LOGICAL_WARP_THREADS>::VALUE, HALF_WARP_THREADS = 1 << (STEPS - 1),
  WARP_SMEM_ELEMENTS = LOGICAL_WARP_THREADS + HALF_WARP_THREADS
}
 
typedef If<((Equals< T, char >::VALUE||Equals< T, signed char >::VALUE) &&(PTX_ARCH< 200)), int, T >::Type CellT
 Storage cell type (workaround for SM1x compiler bugs with custom-ops like Max() on signed chars)
 
typedef CellT _TempStorage[WARP_SMEM_ELEMENTS]
 Shared memory storage layout type (1.5 warps-worth of elements for each warp)
 

Public Member Functions

__device__ __forceinline__ WarpScanSmem (TempStorage &temp_storage)
 Constructor.
 
template<bool HAS_IDENTITY, int STEP, typename ScanOp >
__device__ __forceinline__ void ScanStep (T &partial, ScanOp scan_op, Int2Type< STEP >)
 Basic inclusive scan iteration (template unrolled, inductive-case specialization)
 
template<bool HAS_IDENTITY, typename ScanOp >
__device__ __forceinline__ void ScanStep (T &, ScanOp, Int2Type< STEPS >)
 Basic inclusive scan iteration(template unrolled, base-case specialization)
 
__device__ __forceinline__ void InclusiveScan (T input, T &output, Sum scan_op, Int2Type< true >)
 Inclusive prefix scan (specialized for summation across primitive types) More...
 
template<typename ScanOp , int IS_PRIMITIVE>
__device__ __forceinline__ void InclusiveScan (T input, T &output, ScanOp scan_op, Int2Type< IS_PRIMITIVE >)
 Inclusive prefix scan. More...
 
__device__ __forceinline__ T Broadcast (T input, unsigned int src_lane)
 Broadcast. More...
 
template<typename ScanOp >
__device__ __forceinline__ void InclusiveScan (T input, T &inclusive_output, ScanOp scan_op)
 Inclusive scan. More...
 
template<typename ScanOp >
__device__ __forceinline__ void InclusiveScan (T input, T &inclusive_output, ScanOp scan_op, T &warp_aggregate)
 Inclusive scan with aggregate. More...
 
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void Update (T, T &inclusive, T &exclusive, ScanOpT, IsIntegerT)
 Update inclusive and exclusive using input and inclusive. More...
 
__device__ __forceinline__ void Update (T input, T &inclusive, T &exclusive, cub::Sum, Int2Type< true >)
 Update inclusive and exclusive using input and inclusive (specialized for summation of integer types)
 
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void Update (T, T &inclusive, T &exclusive, ScanOpT scan_op, T initial_value, IsIntegerT)
 Update inclusive and exclusive using initial value using input, inclusive, and initial value.
 
__device__ __forceinline__ void Update (T input, T &inclusive, T &exclusive, cub::Sum scan_op, T initial_value, Int2Type< true >)
 Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types)
 
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void Update (T, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT, IsIntegerT)
 Update inclusive, exclusive, and warp aggregate using input and inclusive.
 
__device__ __forceinline__ void Update (T input, T &inclusive, T &exclusive, T &warp_aggregate, cub::Sum, Int2Type< true >)
 Update inclusive, exclusive, and warp aggregate using input and inclusive (specialized for summation of integer types)
 
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void Update (T, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT scan_op, T initial_value, IsIntegerT)
 Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value.
 

Data Fields

_TempStoragetemp_storage
 
unsigned int lane_id
 
unsigned int member_mask
 

Member Enumeration Documentation

◆ anonymous enum

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
anonymous enum
Enumerator
IS_ARCH_WARP 

Whether the logical warp size and the PTX warp size coincide.

IS_POW_OF_TWO 

Whether the logical warp size is a power-of-two.

STEPS 

The number of warp scan steps.

HALF_WARP_THREADS 

The number of threads in half a warp.

WARP_SMEM_ELEMENTS 

The number of shared memory elements per warp.

Definition at line 61 of file warp_scan_smem.cuh.

Member Function Documentation

◆ Broadcast()

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ T cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Broadcast ( input,
unsigned int  src_lane 
)
inline

Broadcast.

Parameters
[in]inputThe value to broadcast
[in]src_laneWhich warp lane is to do the broadcasting

Definition at line 203 of file warp_scan_smem.cuh.

◆ InclusiveScan() [1/4]

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScan ( input,
T &  output,
Sum  scan_op,
Int2Type< true >   
)
inline

Inclusive prefix scan (specialized for summation across primitive types)

Parameters
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator

Definition at line 163 of file warp_scan_smem.cuh.

◆ InclusiveScan() [2/4]

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename ScanOp , int IS_PRIMITIVE>
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScan ( input,
T &  output,
ScanOp  scan_op,
Int2Type< IS_PRIMITIVE >   
)
inline

Inclusive prefix scan.

Parameters
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator

Definition at line 182 of file warp_scan_smem.cuh.

◆ InclusiveScan() [3/4]

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScan ( input,
T &  inclusive_output,
ScanOp  scan_op 
)
inline

Inclusive scan.

Parameters
[in]inputCalling thread's input item.
[out]inclusive_outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator

Definition at line 224 of file warp_scan_smem.cuh.

◆ InclusiveScan() [4/4]

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::InclusiveScan ( input,
T &  inclusive_output,
ScanOp  scan_op,
T &  warp_aggregate 
)
inline

Inclusive scan with aggregate.

Parameters
[in]inputCalling thread's input item.
[out]inclusive_outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator
[out]warp_aggregateWarp-wide aggregate reduction of input items.

Definition at line 235 of file warp_scan_smem.cuh.

◆ Update()

template<typename T, int LOGICAL_WARP_THREADS, int PTX_ARCH>
template<typename ScanOpT , typename IsIntegerT >
__device__ __forceinline__ void cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::Update ( ,
T &  inclusive,
T &  exclusive,
ScanOpT  ,
IsIntegerT   
)
inline

Update inclusive and exclusive using input and inclusive.

Parameters
[in,out]inclusive
[out]exclusive

Definition at line 260 of file warp_scan_smem.cuh.


The documentation for this struct was generated from the following file: