WarpScanSmem provides smem-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. More...
WarpScanSmem provides smem-based variants of parallel prefix scan of items partitioned across a CUDA thread warp.
< The PTX compute capability for which to to specialize this collective
Definition at line 55 of file warp_scan_smem.cuh.
Data Structures | |
struct | TempStorage |
Public Types | |
enum | { IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)) , IS_POW_OF_TWO = PowerOfTwo<LOGICAL_WARP_THREADS>::VALUE , STEPS = Log2<LOGICAL_WARP_THREADS>::VALUE , HALF_WARP_THREADS = 1 << (STEPS - 1) , WARP_SMEM_ELEMENTS = LOGICAL_WARP_THREADS + HALF_WARP_THREADS } |
typedef If<((Equals< T, char >::VALUE||Equals< T, signedchar >::VALUE)&&(PTX_ARCH< 200)), int, T >::Type | CellT |
Storage cell type (workaround for SM1x compiler bugs with custom-ops like Max() on signed chars) | |
typedef CellT | _TempStorage[WARP_SMEM_ELEMENTS] |
Shared memory storage layout type (1.5 warps-worth of elements for each warp) | |
Public Member Functions | |
__device__ __forceinline__ | WarpScanSmem (TempStorage &temp_storage) |
Constructor. | |
template<bool HAS_IDENTITY, int STEP, typename ScanOp > | |
__device__ __forceinline__ void | ScanStep (T &partial, ScanOp scan_op, Int2Type< STEP >) |
Basic inclusive scan iteration (template unrolled, inductive-case specialization) | |
template<bool HAS_IDENTITY, typename ScanOp > | |
__device__ __forceinline__ void | ScanStep (T &, ScanOp, Int2Type< STEPS >) |
Basic inclusive scan iteration(template unrolled, base-case specialization) | |
__device__ __forceinline__ void | InclusiveScan (T input, T &output, Sum scan_op, Int2Type< true >) |
Inclusive prefix scan (specialized for summation across primitive types) | |
template<typename ScanOp , int IS_PRIMITIVE> | |
__device__ __forceinline__ void | InclusiveScan (T input, T &output, ScanOp scan_op, Int2Type< IS_PRIMITIVE >) |
Inclusive prefix scan. | |
__device__ __forceinline__ T | Broadcast (T input, unsigned int src_lane) |
Broadcast. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | InclusiveScan (T input, T &inclusive_output, ScanOp scan_op) |
Inclusive scan. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | InclusiveScan (T input, T &inclusive_output, ScanOp scan_op, T &warp_aggregate) |
Inclusive scan with aggregate. | |
template<typename ScanOpT , typename IsIntegerT > | |
__device__ __forceinline__ void | Update (T, T &inclusive, T &exclusive, ScanOpT, IsIntegerT) |
Update inclusive and exclusive using input and inclusive. | |
__device__ __forceinline__ void | Update (T input, T &inclusive, T &exclusive, cub::Sum, Int2Type< true >) |
Update inclusive and exclusive using input and inclusive (specialized for summation of integer types) | |
template<typename ScanOpT , typename IsIntegerT > | |
__device__ __forceinline__ void | Update (T, T &inclusive, T &exclusive, ScanOpT scan_op, T initial_value, IsIntegerT) |
Update inclusive and exclusive using initial value using input, inclusive, and initial value. | |
__device__ __forceinline__ void | Update (T input, T &inclusive, T &exclusive, cub::Sum scan_op, T initial_value, Int2Type< true >) |
Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types) | |
template<typename ScanOpT , typename IsIntegerT > | |
__device__ __forceinline__ void | Update (T, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT, IsIntegerT) |
Update inclusive, exclusive, and warp aggregate using input and inclusive. | |
__device__ __forceinline__ void | Update (T input, T &inclusive, T &exclusive, T &warp_aggregate, cub::Sum, Int2Type< true >) |
Update inclusive, exclusive, and warp aggregate using input and inclusive (specialized for summation of integer types) | |
template<typename ScanOpT , typename IsIntegerT > | |
__device__ __forceinline__ void | Update (T, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT scan_op, T initial_value, IsIntegerT) |
Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value. | |
Data Fields | |
_TempStorage & | temp_storage |
unsigned int | lane_id |
unsigned int | member_mask |
typedef CellT cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::_TempStorage[WARP_SMEM_ELEMENTS] |
Shared memory storage layout type (1.5 warps-worth of elements for each warp)
Definition at line 83 of file warp_scan_smem.cuh.
typedef If<((Equals<T,char>::VALUE||Equals<T,signedchar>::VALUE)&&(PTX_ARCH<200)),int,T>::Type cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::CellT |
Storage cell type (workaround for SM1x compiler bugs with custom-ops like Max() on signed chars)
Definition at line 80 of file warp_scan_smem.cuh.
Definition at line 61 of file warp_scan_smem.cuh.
|
inline |
Constructor.
Definition at line 103 of file warp_scan_smem.cuh.
|
inline |
Broadcast.
[in] | input | The value to broadcast |
[in] | src_lane | Which warp lane is to do the broadcasting |
Definition at line 203 of file warp_scan_smem.cuh.
|
inline |
Inclusive scan.
[in] | input | Calling thread's input item. |
[out] | inclusive_output | Calling thread's output item. May be aliased with input . |
[in] | scan_op | Binary scan operator |
Definition at line 224 of file warp_scan_smem.cuh.
|
inline |
Inclusive scan with aggregate.
[in] | input | Calling thread's input item. |
[out] | inclusive_output | Calling thread's output item. May be aliased with input . |
[in] | scan_op | Binary scan operator |
[out] | warp_aggregate | Warp-wide aggregate reduction of input items. |
Definition at line 235 of file warp_scan_smem.cuh.
|
inline |
Inclusive prefix scan.
[in] | input | Calling thread's input item. |
[out] | output | Calling thread's output item. May be aliased with input . |
[in] | scan_op | Binary scan operator |
Definition at line 182 of file warp_scan_smem.cuh.
|
inline |
Inclusive prefix scan (specialized for summation across primitive types)
[in] | input | Calling thread's input item. |
[out] | output | Calling thread's output item. May be aliased with input . |
[in] | scan_op | Binary scan operator |
Definition at line 163 of file warp_scan_smem.cuh.
|
inline |
Basic inclusive scan iteration(template unrolled, base-case specialization)
Definition at line 155 of file warp_scan_smem.cuh.
|
inline |
Basic inclusive scan iteration (template unrolled, inductive-case specialization)
Definition at line 127 of file warp_scan_smem.cuh.
|
inline |
Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types)
Definition at line 308 of file warp_scan_smem.cuh.
|
inline |
Update inclusive and exclusive using input and inclusive (specialized for summation of integer types)
Definition at line 276 of file warp_scan_smem.cuh.
|
inline |
Update inclusive, exclusive, and warp aggregate using input and inclusive (specialized for summation of integer types)
Definition at line 341 of file warp_scan_smem.cuh.
|
inline |
Update inclusive and exclusive using initial value using input, inclusive, and initial value.
Definition at line 289 of file warp_scan_smem.cuh.
|
inline |
Update inclusive and exclusive using input and inclusive.
[in,out] | inclusive | |
[out] | exclusive |
Definition at line 260 of file warp_scan_smem.cuh.
|
inline |
Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value.
Definition at line 360 of file warp_scan_smem.cuh.
|
inline |
Update inclusive, exclusive, and warp aggregate using input and inclusive.
Definition at line 323 of file warp_scan_smem.cuh.
unsigned int cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::lane_id |
Definition at line 94 of file warp_scan_smem.cuh.
unsigned int cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::member_mask |
Definition at line 95 of file warp_scan_smem.cuh.
_TempStorage& cub::WarpScanSmem< T, LOGICAL_WARP_THREADS, PTX_ARCH >::temp_storage |
Definition at line 93 of file warp_scan_smem.cuh.