OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > Struct Template Reference

BlockScanWarpScans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. More...

Detailed Description

template<typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
struct cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >

BlockScanWarpScans provides warpscan-based variants of parallel prefix scan across a CUDA thread block.

< The PTX compute capability for which to to specialize this collective

Definition at line 56 of file block_scan_warp_scans.cuh.

Data Structures

struct  _TempStorage
 Shared memory storage layout type. More...
 
struct  TempStorage
 Alias wrapper allowing storage to be unioned. More...
 

Public Types

enum  { WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) , BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z , WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS }
 Constants. More...
 
enum  { WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) , BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z , WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS }
 Constants. More...
 
enum  { BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z , INNER_WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) , OUTER_WARP_THREADS = BLOCK_THREADS / INNER_WARP_THREADS , OUTER_WARPS = INNER_WARP_THREADS }
 Constants. More...
 
typedef WarpScan< T, WARP_THREADS, PTX_ARCH > WarpScanT
 WarpScan utility type.
 
typedef WarpScan< T, WARPS, PTX_ARCH > WarpAggregateScan
 WarpScan utility type.
 
typedef WarpScan< T, WARP_THREADS, PTX_ARCH > WarpScanT
 WarpScan utility type.
 
typedef WarpScan< T, WARPS, PTX_ARCH > WarpAggregateScanT
 WarpScan utility type.
 
typedef WarpScan< T, OUTER_WARP_THREADS, PTX_ARCH > OuterWarpScanT
 Outer WarpScan utility type.
 
typedef WarpScan< T, INNER_WARP_THREADS, PTX_ARCH > InnerWarpScanT
 Inner WarpScan utility type.
 
typedef OuterWarpScanT::TempStorage OuterScanArray[OUTER_WARPS]
 

Public Member Functions

struct __align__ (32) _TempStorage
 Shared memory storage layout type.
 
__device__ __forceinline__ BlockScanWarpScans (TempStorage &temp_storage)
 Constructor.
 
template<typename ScanOp , int WARP>
__device__ __forceinline__ void ApplyWarpAggregates (T &warp_prefix, ScanOp scan_op, T &block_aggregate, Int2Type< WARP >)
 
template<typename ScanOp >
__device__ __forceinline__ void ApplyWarpAggregates (T &, ScanOp, T &, Int2Type< WARPS >)
 
template<typename ScanOp >
__device__ __forceinline__ T ComputeWarpPrefix (ScanOp scan_op, T warp_aggregate, T &block_aggregate)
 Use the warp-wide aggregates to compute the calling warp's prefix. Also returns block-wide aggregate in all threads.
 
template<typename ScanOp >
__device__ __forceinline__ T ComputeWarpPrefix (ScanOp scan_op, T warp_aggregate, T &block_aggregate, const T &initial_value)
 Use the warp-wide aggregates and initial-value to compute the calling warp's prefix. Also returns block-wide aggregate in all threads.
 
template<typename ScanOp >
__device__ __forceinline__ void ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op)
 Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined.
 
template<typename ScanOp >
__device__ __forceinline__ void ExclusiveScan (T input, T &exclusive_output, const T &initial_value, ScanOp scan_op)
 Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element.
 
template<typename ScanOp >
__device__ __forceinline__ void ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op, T &block_aggregate)
 Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined.
 
template<typename ScanOp >
__device__ __forceinline__ void ExclusiveScan (T input, T &exclusive_output, const T &initial_value, ScanOp scan_op, T &block_aggregate)
 Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs.
 
template<typename ScanOp , typename BlockPrefixCallbackOp >
__device__ __forceinline__ void ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op, BlockPrefixCallbackOp &block_prefix_callback_op)
 Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide block_aggregate of all inputs.
 
template<typename ScanOp >
__device__ __forceinline__ void InclusiveScan (T input, T &inclusive_output, ScanOp scan_op)
 Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element.
 
template<typename ScanOp >
__device__ __forceinline__ void InclusiveScan (T input, T &inclusive_output, ScanOp scan_op, T &block_aggregate)
 Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs.
 
template<typename ScanOp , typename BlockPrefixCallbackOp >
__device__ __forceinline__ void InclusiveScan (T input, T &exclusive_output, ScanOp scan_op, BlockPrefixCallbackOp &block_prefix_callback_op)
 Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide block_aggregate of all inputs.
 
__device__ __forceinline__ BlockScanWarpScans (TempStorage &temp_storage)
 Constructor.
 
template<typename ScanOp , int WARP>
__device__ __forceinline__ void ApplyWarpAggregates (T &warp_prefix, ScanOp scan_op, T &block_aggregate, Int2Type< WARP > addend_warp)
 
template<typename ScanOp >
__device__ __forceinline__ void ApplyWarpAggregates (T &warp_prefix, ScanOp scan_op, T &block_aggregate, Int2Type< WARPS > addend_warp)
 
template<typename ScanOp >
__device__ __forceinline__ T ComputeWarpPrefix (ScanOp scan_op, T warp_aggregate, T &block_aggregate)
 Use the warp-wide aggregates to compute the calling warp's prefix. Also returns block-wide aggregate in all threads.
 
template<typename ScanOp >
__device__ __forceinline__ T ComputeWarpPrefix (ScanOp scan_op, T warp_aggregate, T &block_aggregate, const T &initial_value)
 Use the warp-wide aggregates and initial-value to compute the calling warp's prefix. Also returns block-wide aggregate in all threads.
 
template<typename ScanOp >
__device__ __forceinline__ void ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op)
 Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined.
 
template<typename ScanOp >
__device__ __forceinline__ void ExclusiveScan (T input, T &exclusive_output, const T &initial_value, ScanOp scan_op)
 Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element.
 
template<typename ScanOp >
__device__ __forceinline__ void ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op, T &block_aggregate)
 Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined.
 
template<typename ScanOp >
__device__ __forceinline__ void ExclusiveScan (T input, T &exclusive_output, const T &initial_value, ScanOp scan_op, T &block_aggregate)
 Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs.
 
template<typename ScanOp , typename BlockPrefixCallbackOp >
__device__ __forceinline__ void ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op, BlockPrefixCallbackOp &block_prefix_callback_op)
 Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide block_aggregate of all inputs.
 
template<typename ScanOp >
__device__ __forceinline__ void InclusiveScan (T input, T &inclusive_output, ScanOp scan_op)
 Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element.
 
template<typename ScanOp >
__device__ __forceinline__ void InclusiveScan (T input, T &inclusive_output, ScanOp scan_op, T &block_aggregate)
 Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs.
 
template<typename ScanOp , typename BlockPrefixCallbackOp >
__device__ __forceinline__ void InclusiveScan (T input, T &exclusive_output, ScanOp scan_op, BlockPrefixCallbackOp &block_prefix_callback_op)
 Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide block_aggregate of all inputs.
 
__device__ __forceinline__ BlockScanWarpScans (TempStorage &temp_storage)
 Constructor.
 
template<typename ScanOp >
__device__ __forceinline__ void ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op)
 Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined.
 
template<typename ScanOp >
__device__ __forceinline__ void ExclusiveScan (T input, T &exclusive_output, const T &initial_value, ScanOp scan_op)
 Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element.
 
template<typename ScanOp >
__device__ __forceinline__ void ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op, T &block_aggregate)
 Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined.
 
template<typename ScanOp >
__device__ __forceinline__ void ExclusiveScan (T input, T &exclusive_output, const T &initial_value, ScanOp scan_op, T &block_aggregate)
 Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs.
 
template<typename ScanOp , typename BlockPrefixCallbackOp >
__device__ __forceinline__ void ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op, BlockPrefixCallbackOp &block_prefix_callback_op)
 Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. The call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs.
 
template<typename ScanOp >
__device__ __forceinline__ void InclusiveScan (T input, T &inclusive_output, ScanOp scan_op)
 Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element.
 
template<typename ScanOp >
__device__ __forceinline__ void InclusiveScan (T input, T &inclusive_output, ScanOp scan_op, T &block_aggregate)
 Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs.
 
template<typename ScanOp , typename BlockPrefixCallbackOp >
__device__ __forceinline__ void InclusiveScan (T input, T &inclusive_output, ScanOp scan_op, BlockPrefixCallbackOp &block_prefix_callback_op)
 Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs.
 

Data Fields

_TempStoragetemp_storage
 
unsigned int linear_tid
 
unsigned int warp_id
 
unsigned int lane_id
 

Member Typedef Documentation

◆ InnerWarpScanT

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
typedef WarpScan<T, INNER_WARP_THREADS, PTX_ARCH> cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::InnerWarpScanT

Inner WarpScan utility type.

Definition at line 80 of file block_scan_warp_scans3.cuh.

◆ OuterScanArray

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
typedef OuterWarpScanT::TempStorage cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::OuterScanArray[OUTER_WARPS]

Definition at line 82 of file block_scan_warp_scans3.cuh.

◆ OuterWarpScanT

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
typedef WarpScan<T, OUTER_WARP_THREADS, PTX_ARCH> cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::OuterWarpScanT

Outer WarpScan utility type.

Definition at line 77 of file block_scan_warp_scans3.cuh.

◆ WarpAggregateScan

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
typedef WarpScan<T, WARPS, PTX_ARCH> cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::WarpAggregateScan

WarpScan utility type.

Definition at line 79 of file block_scan_warp_scans.cuh.

◆ WarpAggregateScanT

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
typedef WarpScan<T, WARPS, PTX_ARCH> cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::WarpAggregateScanT

WarpScan utility type.

Definition at line 79 of file block_scan_warp_scans2.cuh.

◆ WarpScanT [1/2]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
typedef WarpScan<T, WARP_THREADS, PTX_ARCH> cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::WarpScanT

WarpScan utility type.

Definition at line 76 of file block_scan_warp_scans.cuh.

◆ WarpScanT [2/2]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
typedef WarpScan<T, WARP_THREADS, PTX_ARCH> cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::WarpScanT

WarpScan utility type.

Definition at line 76 of file block_scan_warp_scans2.cuh.

Member Enumeration Documentation

◆ anonymous enum

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
anonymous enum

Constants.

Enumerator
WARP_THREADS 

Number of warp threads.

BLOCK_THREADS 

The thread block size in threads.

WARPS 

Number of active warps.

Definition at line 63 of file block_scan_warp_scans.cuh.

◆ anonymous enum

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
anonymous enum

Constants.

Enumerator
WARP_THREADS 

Number of warp threads.

BLOCK_THREADS 

The thread block size in threads.

WARPS 

Number of active warps.

Definition at line 63 of file block_scan_warp_scans2.cuh.

◆ anonymous enum

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
anonymous enum

Constants.

Enumerator
BLOCK_THREADS 

The thread block size in threads.

INNER_WARP_THREADS 

Number of warp threads.

OUTER_WARPS 

Number of outer scan warps.

Definition at line 63 of file block_scan_warp_scans3.cuh.

Constructor & Destructor Documentation

◆ BlockScanWarpScans() [1/3]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
__device__ __forceinline__ cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::BlockScanWarpScans ( TempStorage temp_storage)
inline

Constructor.

Definition at line 111 of file block_scan_warp_scans.cuh.

◆ BlockScanWarpScans() [2/3]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
__device__ __forceinline__ cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::BlockScanWarpScans ( TempStorage temp_storage)
inline

Constructor.

Definition at line 111 of file block_scan_warp_scans2.cuh.

◆ BlockScanWarpScans() [3/3]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
__device__ __forceinline__ cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::BlockScanWarpScans ( TempStorage temp_storage)
inline

Constructor.

Definition at line 121 of file block_scan_warp_scans3.cuh.

Member Function Documentation

◆ __align__()

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
struct cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::__align__ ( 32  )
inline

Shared memory storage layout type.

< Buffer for warp-synchronous scans

< Shared prefix for the entire thread block

Definition at line 79 of file block_scan_warp_scans.cuh.

◆ ApplyWarpAggregates() [1/4]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ApplyWarpAggregates ( T &  ,
ScanOp  ,
T &  ,
Int2Type< WARPS  
)
inline

Definition at line 142 of file block_scan_warp_scans.cuh.

◆ ApplyWarpAggregates() [2/4]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp , int WARP>
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ApplyWarpAggregates ( T &  warp_prefix,
ScanOp  scan_op,
T &  block_aggregate,
Int2Type< WARP >  addend_warp 
)
inline
Parameters
[out]warp_prefixThe calling thread's partial reduction
[in]scan_opBinary scan operator
[out]block_aggregateThreadblock-wide aggregate reduction of input items

Definition at line 126 of file block_scan_warp_scans2.cuh.

◆ ApplyWarpAggregates() [3/4]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp , int WARP>
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ApplyWarpAggregates ( T &  warp_prefix,
ScanOp  scan_op,
T &  block_aggregate,
Int2Type< WARP >   
)
inline
Parameters
[out]warp_prefixThe calling thread's partial reduction
[in]scan_opBinary scan operator
[out]block_aggregateThreadblock-wide aggregate reduction of input items

Definition at line 126 of file block_scan_warp_scans.cuh.

◆ ApplyWarpAggregates() [4/4]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ApplyWarpAggregates ( T &  warp_prefix,
ScanOp  scan_op,
T &  block_aggregate,
Int2Type< WARPS addend_warp 
)
inline
Parameters
[out]warp_prefixThe calling thread's partial reduction
[in]scan_opBinary scan operator
[out]block_aggregateThreadblock-wide aggregate reduction of input items

Definition at line 142 of file block_scan_warp_scans2.cuh.

◆ ComputeWarpPrefix() [1/4]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ T cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ComputeWarpPrefix ( ScanOp  scan_op,
warp_aggregate,
T &  block_aggregate 
)
inline

Use the warp-wide aggregates to compute the calling warp's prefix. Also returns block-wide aggregate in all threads.

Parameters
[in]scan_opBinary scan operator
[in]warp_aggregate[laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items
[out]block_aggregateThreadblock-wide aggregate reduction of input items

Definition at line 152 of file block_scan_warp_scans.cuh.

◆ ComputeWarpPrefix() [2/4]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ T cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ComputeWarpPrefix ( ScanOp  scan_op,
warp_aggregate,
T &  block_aggregate 
)
inline

Use the warp-wide aggregates to compute the calling warp's prefix. Also returns block-wide aggregate in all threads.

Parameters
[in]scan_opBinary scan operator
[in]warp_aggregate[laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items
[out]block_aggregateThreadblock-wide aggregate reduction of input items

Definition at line 152 of file block_scan_warp_scans2.cuh.

◆ ComputeWarpPrefix() [3/4]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ T cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ComputeWarpPrefix ( ScanOp  scan_op,
warp_aggregate,
T &  block_aggregate,
const T &  initial_value 
)
inline

Use the warp-wide aggregates and initial-value to compute the calling warp's prefix. Also returns block-wide aggregate in all threads.

Parameters
[in]scan_opBinary scan operator
[in]warp_aggregate[laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items
[out]block_aggregateThreadblock-wide aggregate reduction of input items
[in]initial_valueInitial value to seed the exclusive scan

Definition at line 186 of file block_scan_warp_scans.cuh.

◆ ComputeWarpPrefix() [4/4]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ T cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ComputeWarpPrefix ( ScanOp  scan_op,
warp_aggregate,
T &  block_aggregate,
const T &  initial_value 
)
inline

Use the warp-wide aggregates and initial-value to compute the calling warp's prefix. Also returns block-wide aggregate in all threads.

Parameters
[in]scan_opBinary scan operator
[in]warp_aggregate[laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items
[out]block_aggregateThreadblock-wide aggregate reduction of input items
[in]initial_valueInitial value to seed the exclusive scan

Definition at line 186 of file block_scan_warp_scans2.cuh.

◆ ExclusiveScan() [1/15]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ExclusiveScan ( input,
T &  exclusive_output,
const T &  initial_value,
ScanOp  scan_op 
)
inline

Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element.

Parameters
[in]inputCalling thread's input items
[out]exclusive_outputCalling thread's output items (may be aliased to input)
[in]initial_valueInitial value to seed the exclusive scan
[in]scan_opBinary scan operator

Definition at line 221 of file block_scan_warp_scans.cuh.

◆ ExclusiveScan() [2/15]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ExclusiveScan ( input,
T &  exclusive_output,
const T &  initial_value,
ScanOp  scan_op 
)
inline

Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element.

Parameters
[in]inputCalling thread's input items
[out]exclusive_outputCalling thread's output items (may be aliased to input)
[in]initial_valueInitial value to seed the exclusive scan
[in]scan_opBinary scan operator

Definition at line 221 of file block_scan_warp_scans2.cuh.

◆ ExclusiveScan() [3/15]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ExclusiveScan ( input,
T &  exclusive_output,
const T &  initial_value,
ScanOp  scan_op 
)
inline

Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element.

Parameters
[in]inputCalling thread's input items
[out]exclusive_outputCalling thread's output items (may be aliased to input)
[in]initial_valueInitial value to seed the exclusive scan
[in]scan_opBinary scan operator

Definition at line 150 of file block_scan_warp_scans3.cuh.

◆ ExclusiveScan() [4/15]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ExclusiveScan ( input,
T &  exclusive_output,
const T &  initial_value,
ScanOp  scan_op,
T &  block_aggregate 
)
inline

Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs.

Parameters
[in]inputCalling thread's input items
[out]exclusive_outputCalling thread's output items (may be aliased to input)
[in]initial_valueInitial value to seed the exclusive scan
[in]scan_opBinary scan operator
[out]block_aggregateThreadblock-wide aggregate reduction of input items

Definition at line 259 of file block_scan_warp_scans.cuh.

◆ ExclusiveScan() [5/15]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ExclusiveScan ( input,
T &  exclusive_output,
const T &  initial_value,
ScanOp  scan_op,
T &  block_aggregate 
)
inline

Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs.

Parameters
[in]inputCalling thread's input items
[out]exclusive_outputCalling thread's output items (may be aliased to input)
[in]initial_valueInitial value to seed the exclusive scan
[in]scan_opBinary scan operator
[out]block_aggregateThreadblock-wide aggregate reduction of input items

Definition at line 281 of file block_scan_warp_scans2.cuh.

◆ ExclusiveScan() [6/15]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ExclusiveScan ( input,
T &  exclusive_output,
const T &  initial_value,
ScanOp  scan_op,
T &  block_aggregate 
)
inline

Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs.

Parameters
[in]inputCalling thread's input items
[out]exclusive_outputCalling thread's output items (may be aliased to input)
[in]initial_valueInitial value to seed the exclusive scan
[in]scan_opBinary scan operator
[out]block_aggregateThreadblock-wide aggregate reduction of input items

Definition at line 210 of file block_scan_warp_scans3.cuh.

◆ ExclusiveScan() [7/15]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ExclusiveScan ( input,
T &  exclusive_output,
ScanOp  scan_op 
)
inline

Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined.

Parameters
[in]inputCalling thread's input item
[out]exclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator

Definition at line 208 of file block_scan_warp_scans.cuh.

◆ ExclusiveScan() [8/15]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ExclusiveScan ( input,
T &  exclusive_output,
ScanOp  scan_op 
)
inline

Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined.

Parameters
[in]inputCalling thread's input item
[out]exclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator

Definition at line 208 of file block_scan_warp_scans2.cuh.

◆ ExclusiveScan() [9/15]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ExclusiveScan ( input,
T &  exclusive_output,
ScanOp  scan_op 
)
inline

Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined.

Parameters
[in]inputCalling thread's input item
[out]exclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator

Definition at line 137 of file block_scan_warp_scans3.cuh.

◆ ExclusiveScan() [10/15]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp , typename BlockPrefixCallbackOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ExclusiveScan ( input,
T &  exclusive_output,
ScanOp  scan_op,
BlockPrefixCallbackOp &  block_prefix_callback_op 
)
inline

Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide block_aggregate of all inputs.

Parameters
[in]inputCalling thread's input item
[out]exclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator
block_prefix_callback_op[in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs.

Definition at line 284 of file block_scan_warp_scans.cuh.

◆ ExclusiveScan() [11/15]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp , typename BlockPrefixCallbackOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ExclusiveScan ( input,
T &  exclusive_output,
ScanOp  scan_op,
BlockPrefixCallbackOp &  block_prefix_callback_op 
)
inline

Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide block_aggregate of all inputs.

Parameters
[in]inputCalling thread's input item
[out]exclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator
block_prefix_callback_op[in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs.

Definition at line 328 of file block_scan_warp_scans2.cuh.

◆ ExclusiveScan() [12/15]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp , typename BlockPrefixCallbackOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ExclusiveScan ( input,
T &  exclusive_output,
ScanOp  scan_op,
BlockPrefixCallbackOp &  block_prefix_callback_op 
)
inline

Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. The call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs.

Parameters
[in]inputCalling thread's input item
[out]exclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator
block_prefix_callback_op[in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs.

Definition at line 259 of file block_scan_warp_scans3.cuh.

◆ ExclusiveScan() [13/15]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ExclusiveScan ( input,
T &  exclusive_output,
ScanOp  scan_op,
T &  block_aggregate 
)
inline

Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined.

Parameters
[in]inputCalling thread's input item
[out]exclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator
[out]block_aggregateThreadblock-wide aggregate reduction of input items

Definition at line 234 of file block_scan_warp_scans.cuh.

◆ ExclusiveScan() [14/15]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ExclusiveScan ( input,
T &  exclusive_output,
ScanOp  scan_op,
T &  block_aggregate 
)
inline

Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined.

Parameters
[in]inputCalling thread's input item
[out]exclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator
[out]block_aggregateThreadblock-wide aggregate reduction of input items

Definition at line 234 of file block_scan_warp_scans2.cuh.

◆ ExclusiveScan() [15/15]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::ExclusiveScan ( input,
T &  exclusive_output,
ScanOp  scan_op,
T &  block_aggregate 
)
inline

Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined.

Parameters
[in]inputCalling thread's input item
[out]exclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator
[out]block_aggregateThreadblock-wide aggregate reduction of input items

Definition at line 163 of file block_scan_warp_scans3.cuh.

◆ InclusiveScan() [1/9]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp , typename BlockPrefixCallbackOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::InclusiveScan ( input,
T &  exclusive_output,
ScanOp  scan_op,
BlockPrefixCallbackOp &  block_prefix_callback_op 
)
inline

Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide block_aggregate of all inputs.

Parameters
[in]inputCalling thread's input item
[out]exclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator
block_prefix_callback_op[in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs.

Definition at line 358 of file block_scan_warp_scans.cuh.

◆ InclusiveScan() [2/9]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp , typename BlockPrefixCallbackOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::InclusiveScan ( input,
T &  exclusive_output,
ScanOp  scan_op,
BlockPrefixCallbackOp &  block_prefix_callback_op 
)
inline

Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide block_aggregate of all inputs.

Parameters
[in]inputCalling thread's input item
[out]exclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator
block_prefix_callback_op[in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs.

Definition at line 402 of file block_scan_warp_scans2.cuh.

◆ InclusiveScan() [3/9]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::InclusiveScan ( input,
T &  inclusive_output,
ScanOp  scan_op 
)
inline

Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element.

Parameters
[in]inputCalling thread's input item
[out]inclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator

Definition at line 323 of file block_scan_warp_scans.cuh.

◆ InclusiveScan() [4/9]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::InclusiveScan ( input,
T &  inclusive_output,
ScanOp  scan_op 
)
inline

Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element.

Parameters
[in]inputCalling thread's input item
[out]inclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator

Definition at line 367 of file block_scan_warp_scans2.cuh.

◆ InclusiveScan() [5/9]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::InclusiveScan ( input,
T &  inclusive_output,
ScanOp  scan_op 
)
inline

Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element.

Parameters
[in]inputCalling thread's input item
[out]inclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator

Definition at line 312 of file block_scan_warp_scans3.cuh.

◆ InclusiveScan() [6/9]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp , typename BlockPrefixCallbackOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::InclusiveScan ( input,
T &  inclusive_output,
ScanOp  scan_op,
BlockPrefixCallbackOp &  block_prefix_callback_op 
)
inline

Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs.

Parameters
[in]inputCalling thread's input item
[out]inclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator
block_prefix_callback_op[in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs.

Definition at line 370 of file block_scan_warp_scans3.cuh.

◆ InclusiveScan() [7/9]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::InclusiveScan ( input,
T &  inclusive_output,
ScanOp  scan_op,
T &  block_aggregate 
)
inline

Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs.

Parameters
[in]inputCalling thread's input item
[out]inclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator
[out]block_aggregateThreadblock-wide aggregate reduction of input items

Definition at line 335 of file block_scan_warp_scans.cuh.

◆ InclusiveScan() [8/9]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::InclusiveScan ( input,
T &  inclusive_output,
ScanOp  scan_op,
T &  block_aggregate 
)
inline

Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs.

Parameters
[in]inputCalling thread's input item
[out]inclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator
[out]block_aggregateThreadblock-wide aggregate reduction of input items

Definition at line 379 of file block_scan_warp_scans2.cuh.

◆ InclusiveScan() [9/9]

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
template<typename ScanOp >
__device__ __forceinline__ void cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::InclusiveScan ( input,
T &  inclusive_output,
ScanOp  scan_op,
T &  block_aggregate 
)
inline

Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs.

Parameters
[in]inputCalling thread's input item
[out]inclusive_outputCalling thread's output item (may be aliased to input)
[in]scan_opBinary scan operator
[out]block_aggregateThreadblock-wide aggregate reduction of input items

Definition at line 324 of file block_scan_warp_scans3.cuh.

Field Documentation

◆ lane_id

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
unsigned int cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::lane_id

Definition at line 103 of file block_scan_warp_scans.cuh.

◆ linear_tid

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
unsigned int cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::linear_tid

Definition at line 101 of file block_scan_warp_scans.cuh.

◆ temp_storage

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
_TempStorage & cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::temp_storage

Definition at line 100 of file block_scan_warp_scans.cuh.

◆ warp_id

template<typename T , int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int PTX_ARCH>
unsigned int cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::warp_id

Definition at line 102 of file block_scan_warp_scans.cuh.


The documentation for this struct was generated from the following files: