BlockScanWarpScans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. More...
BlockScanWarpScans provides warpscan-based variants of parallel prefix scan across a CUDA thread block.
< The PTX compute capability for which to to specialize this collective
Definition at line 56 of file block_scan_warp_scans.cuh.
Data Structures | |
struct | _TempStorage |
Shared memory storage layout type. More... | |
struct | TempStorage |
Alias wrapper allowing storage to be unioned. More... | |
Public Types | |
enum | { WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) , BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z , WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS } |
Constants. More... | |
enum | { WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) , BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z , WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS } |
Constants. More... | |
enum | { BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z , INNER_WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) , OUTER_WARP_THREADS = BLOCK_THREADS / INNER_WARP_THREADS , OUTER_WARPS = INNER_WARP_THREADS } |
Constants. More... | |
typedef WarpScan< T, WARP_THREADS, PTX_ARCH > | WarpScanT |
WarpScan utility type. | |
typedef WarpScan< T, WARPS, PTX_ARCH > | WarpAggregateScan |
WarpScan utility type. | |
typedef WarpScan< T, WARP_THREADS, PTX_ARCH > | WarpScanT |
WarpScan utility type. | |
typedef WarpScan< T, WARPS, PTX_ARCH > | WarpAggregateScanT |
WarpScan utility type. | |
typedef WarpScan< T, OUTER_WARP_THREADS, PTX_ARCH > | OuterWarpScanT |
Outer WarpScan utility type. | |
typedef WarpScan< T, INNER_WARP_THREADS, PTX_ARCH > | InnerWarpScanT |
Inner WarpScan utility type. | |
typedef OuterWarpScanT::TempStorage | OuterScanArray[OUTER_WARPS] |
Public Member Functions | |
struct | __align__ (32) _TempStorage |
Shared memory storage layout type. | |
__device__ __forceinline__ | BlockScanWarpScans (TempStorage &temp_storage) |
Constructor. | |
template<typename ScanOp , int WARP> | |
__device__ __forceinline__ void | ApplyWarpAggregates (T &warp_prefix, ScanOp scan_op, T &block_aggregate, Int2Type< WARP >) |
template<typename ScanOp > | |
__device__ __forceinline__ void | ApplyWarpAggregates (T &, ScanOp, T &, Int2Type< WARPS >) |
template<typename ScanOp > | |
__device__ __forceinline__ T | ComputeWarpPrefix (ScanOp scan_op, T warp_aggregate, T &block_aggregate) |
Use the warp-wide aggregates to compute the calling warp's prefix. Also returns block-wide aggregate in all threads. | |
template<typename ScanOp > | |
__device__ __forceinline__ T | ComputeWarpPrefix (ScanOp scan_op, T warp_aggregate, T &block_aggregate, const T &initial_value) |
Use the warp-wide aggregates and initial-value to compute the calling warp's prefix. Also returns block-wide aggregate in all threads. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op) |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | ExclusiveScan (T input, T &exclusive_output, const T &initial_value, ScanOp scan_op) |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op, T &block_aggregate) |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | ExclusiveScan (T input, T &exclusive_output, const T &initial_value, ScanOp scan_op, T &block_aggregate) |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs. | |
template<typename ScanOp , typename BlockPrefixCallbackOp > | |
__device__ __forceinline__ void | ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op, BlockPrefixCallbackOp &block_prefix_callback_op) |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide block_aggregate of all inputs. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | InclusiveScan (T input, T &inclusive_output, ScanOp scan_op) |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | InclusiveScan (T input, T &inclusive_output, ScanOp scan_op, T &block_aggregate) |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs. | |
template<typename ScanOp , typename BlockPrefixCallbackOp > | |
__device__ __forceinline__ void | InclusiveScan (T input, T &exclusive_output, ScanOp scan_op, BlockPrefixCallbackOp &block_prefix_callback_op) |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide block_aggregate of all inputs. | |
__device__ __forceinline__ | BlockScanWarpScans (TempStorage &temp_storage) |
Constructor. | |
template<typename ScanOp , int WARP> | |
__device__ __forceinline__ void | ApplyWarpAggregates (T &warp_prefix, ScanOp scan_op, T &block_aggregate, Int2Type< WARP > addend_warp) |
template<typename ScanOp > | |
__device__ __forceinline__ void | ApplyWarpAggregates (T &warp_prefix, ScanOp scan_op, T &block_aggregate, Int2Type< WARPS > addend_warp) |
template<typename ScanOp > | |
__device__ __forceinline__ T | ComputeWarpPrefix (ScanOp scan_op, T warp_aggregate, T &block_aggregate) |
Use the warp-wide aggregates to compute the calling warp's prefix. Also returns block-wide aggregate in all threads. | |
template<typename ScanOp > | |
__device__ __forceinline__ T | ComputeWarpPrefix (ScanOp scan_op, T warp_aggregate, T &block_aggregate, const T &initial_value) |
Use the warp-wide aggregates and initial-value to compute the calling warp's prefix. Also returns block-wide aggregate in all threads. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op) |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | ExclusiveScan (T input, T &exclusive_output, const T &initial_value, ScanOp scan_op) |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op, T &block_aggregate) |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | ExclusiveScan (T input, T &exclusive_output, const T &initial_value, ScanOp scan_op, T &block_aggregate) |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs. | |
template<typename ScanOp , typename BlockPrefixCallbackOp > | |
__device__ __forceinline__ void | ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op, BlockPrefixCallbackOp &block_prefix_callback_op) |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide block_aggregate of all inputs. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | InclusiveScan (T input, T &inclusive_output, ScanOp scan_op) |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | InclusiveScan (T input, T &inclusive_output, ScanOp scan_op, T &block_aggregate) |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs. | |
template<typename ScanOp , typename BlockPrefixCallbackOp > | |
__device__ __forceinline__ void | InclusiveScan (T input, T &exclusive_output, ScanOp scan_op, BlockPrefixCallbackOp &block_prefix_callback_op) |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide block_aggregate of all inputs. | |
__device__ __forceinline__ | BlockScanWarpScans (TempStorage &temp_storage) |
Constructor. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op) |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | ExclusiveScan (T input, T &exclusive_output, const T &initial_value, ScanOp scan_op) |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op, T &block_aggregate) |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | ExclusiveScan (T input, T &exclusive_output, const T &initial_value, ScanOp scan_op, T &block_aggregate) |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs. | |
template<typename ScanOp , typename BlockPrefixCallbackOp > | |
__device__ __forceinline__ void | ExclusiveScan (T input, T &exclusive_output, ScanOp scan_op, BlockPrefixCallbackOp &block_prefix_callback_op) |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. The call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | InclusiveScan (T input, T &inclusive_output, ScanOp scan_op) |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. | |
template<typename ScanOp > | |
__device__ __forceinline__ void | InclusiveScan (T input, T &inclusive_output, ScanOp scan_op, T &block_aggregate) |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate of all inputs. | |
template<typename ScanOp , typename BlockPrefixCallbackOp > | |
__device__ __forceinline__ void | InclusiveScan (T input, T &inclusive_output, ScanOp scan_op, BlockPrefixCallbackOp &block_prefix_callback_op) |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. | |
Data Fields | |
_TempStorage & | temp_storage |
unsigned int | linear_tid |
unsigned int | warp_id |
unsigned int | lane_id |
typedef WarpScan<T, INNER_WARP_THREADS, PTX_ARCH> cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::InnerWarpScanT |
Inner WarpScan utility type.
Definition at line 80 of file block_scan_warp_scans3.cuh.
typedef OuterWarpScanT::TempStorage cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::OuterScanArray[OUTER_WARPS] |
Definition at line 82 of file block_scan_warp_scans3.cuh.
typedef WarpScan<T, OUTER_WARP_THREADS, PTX_ARCH> cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::OuterWarpScanT |
Outer WarpScan utility type.
Definition at line 77 of file block_scan_warp_scans3.cuh.
typedef WarpScan<T, WARPS, PTX_ARCH> cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::WarpAggregateScan |
WarpScan utility type.
Definition at line 79 of file block_scan_warp_scans.cuh.
typedef WarpScan<T, WARPS, PTX_ARCH> cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::WarpAggregateScanT |
WarpScan utility type.
Definition at line 79 of file block_scan_warp_scans2.cuh.
typedef WarpScan<T, WARP_THREADS, PTX_ARCH> cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::WarpScanT |
WarpScan utility type.
Definition at line 76 of file block_scan_warp_scans.cuh.
typedef WarpScan<T, WARP_THREADS, PTX_ARCH> cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::WarpScanT |
WarpScan utility type.
Definition at line 76 of file block_scan_warp_scans2.cuh.
anonymous enum |
Constants.
Enumerator | |
---|---|
WARP_THREADS | Number of warp threads. |
BLOCK_THREADS | The thread block size in threads. |
WARPS | Number of active warps. |
Definition at line 63 of file block_scan_warp_scans.cuh.
anonymous enum |
Constants.
Enumerator | |
---|---|
WARP_THREADS | Number of warp threads. |
BLOCK_THREADS | The thread block size in threads. |
WARPS | Number of active warps. |
Definition at line 63 of file block_scan_warp_scans2.cuh.
anonymous enum |
Constants.
Enumerator | |
---|---|
BLOCK_THREADS | The thread block size in threads. |
INNER_WARP_THREADS | Number of warp threads. |
OUTER_WARPS | Number of outer scan warps. |
Definition at line 63 of file block_scan_warp_scans3.cuh.
|
inline |
Constructor.
Definition at line 111 of file block_scan_warp_scans.cuh.
|
inline |
Constructor.
Definition at line 111 of file block_scan_warp_scans2.cuh.
|
inline |
Constructor.
Definition at line 121 of file block_scan_warp_scans3.cuh.
|
inline |
Shared memory storage layout type.
< Buffer for warp-synchronous scans
< Shared prefix for the entire thread block
Definition at line 79 of file block_scan_warp_scans.cuh.
|
inline |
Definition at line 142 of file block_scan_warp_scans.cuh.
|
inline |
[out] | warp_prefix | The calling thread's partial reduction |
[in] | scan_op | Binary scan operator |
[out] | block_aggregate | Threadblock-wide aggregate reduction of input items |
Definition at line 126 of file block_scan_warp_scans2.cuh.
|
inline |
[out] | warp_prefix | The calling thread's partial reduction |
[in] | scan_op | Binary scan operator |
[out] | block_aggregate | Threadblock-wide aggregate reduction of input items |
Definition at line 126 of file block_scan_warp_scans.cuh.
|
inline |
[out] | warp_prefix | The calling thread's partial reduction |
[in] | scan_op | Binary scan operator |
[out] | block_aggregate | Threadblock-wide aggregate reduction of input items |
Definition at line 142 of file block_scan_warp_scans2.cuh.
|
inline |
Use the warp-wide aggregates to compute the calling warp's prefix. Also returns block-wide aggregate in all threads.
[in] | scan_op | Binary scan operator |
[in] | warp_aggregate | [laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items |
[out] | block_aggregate | Threadblock-wide aggregate reduction of input items |
Definition at line 152 of file block_scan_warp_scans.cuh.
|
inline |
Use the warp-wide aggregates to compute the calling warp's prefix. Also returns block-wide aggregate in all threads.
[in] | scan_op | Binary scan operator |
[in] | warp_aggregate | [laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items |
[out] | block_aggregate | Threadblock-wide aggregate reduction of input items |
Definition at line 152 of file block_scan_warp_scans2.cuh.
|
inline |
Use the warp-wide aggregates and initial-value to compute the calling warp's prefix. Also returns block-wide aggregate in all threads.
[in] | scan_op | Binary scan operator |
[in] | warp_aggregate | [laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items |
[out] | block_aggregate | Threadblock-wide aggregate reduction of input items |
[in] | initial_value | Initial value to seed the exclusive scan |
Definition at line 186 of file block_scan_warp_scans.cuh.
|
inline |
Use the warp-wide aggregates and initial-value to compute the calling warp's prefix. Also returns block-wide aggregate in all threads.
[in] | scan_op | Binary scan operator |
[in] | warp_aggregate | [laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items |
[out] | block_aggregate | Threadblock-wide aggregate reduction of input items |
[in] | initial_value | Initial value to seed the exclusive scan |
Definition at line 186 of file block_scan_warp_scans2.cuh.
|
inline |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element.
[in] | input | Calling thread's input items |
[out] | exclusive_output | Calling thread's output items (may be aliased to input ) |
[in] | initial_value | Initial value to seed the exclusive scan |
[in] | scan_op | Binary scan operator |
Definition at line 221 of file block_scan_warp_scans.cuh.
|
inline |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element.
[in] | input | Calling thread's input items |
[out] | exclusive_output | Calling thread's output items (may be aliased to input ) |
[in] | initial_value | Initial value to seed the exclusive scan |
[in] | scan_op | Binary scan operator |
Definition at line 221 of file block_scan_warp_scans2.cuh.
|
inline |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element.
[in] | input | Calling thread's input items |
[out] | exclusive_output | Calling thread's output items (may be aliased to input ) |
[in] | initial_value | Initial value to seed the exclusive scan |
[in] | scan_op | Binary scan operator |
Definition at line 150 of file block_scan_warp_scans3.cuh.
|
inline |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate
of all inputs.
[in] | input | Calling thread's input items |
[out] | exclusive_output | Calling thread's output items (may be aliased to input ) |
[in] | initial_value | Initial value to seed the exclusive scan |
[in] | scan_op | Binary scan operator |
[out] | block_aggregate | Threadblock-wide aggregate reduction of input items |
Definition at line 259 of file block_scan_warp_scans.cuh.
|
inline |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate
of all inputs.
[in] | input | Calling thread's input items |
[out] | exclusive_output | Calling thread's output items (may be aliased to input ) |
[in] | initial_value | Initial value to seed the exclusive scan |
[in] | scan_op | Binary scan operator |
[out] | block_aggregate | Threadblock-wide aggregate reduction of input items |
Definition at line 281 of file block_scan_warp_scans2.cuh.
|
inline |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate
of all inputs.
[in] | input | Calling thread's input items |
[out] | exclusive_output | Calling thread's output items (may be aliased to input ) |
[in] | initial_value | Initial value to seed the exclusive scan |
[in] | scan_op | Binary scan operator |
[out] | block_aggregate | Threadblock-wide aggregate reduction of input items |
Definition at line 210 of file block_scan_warp_scans3.cuh.
|
inline |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined.
[in] | input | Calling thread's input item |
[out] | exclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
Definition at line 208 of file block_scan_warp_scans.cuh.
|
inline |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined.
[in] | input | Calling thread's input item |
[out] | exclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
Definition at line 208 of file block_scan_warp_scans2.cuh.
|
inline |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined.
[in] | input | Calling thread's input item |
[out] | exclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
Definition at line 137 of file block_scan_warp_scans3.cuh.
|
inline |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op
is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide block_aggregate
of all inputs.
[in] | input | Calling thread's input item |
[out] | exclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
block_prefix_callback_op | [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. |
Definition at line 284 of file block_scan_warp_scans.cuh.
|
inline |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op
is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide block_aggregate
of all inputs.
[in] | input | Calling thread's input item |
[out] | exclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
block_prefix_callback_op | [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. |
Definition at line 328 of file block_scan_warp_scans2.cuh.
|
inline |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. The call-back functor block_prefix_callback_op
is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs.
[in] | input | Calling thread's input item |
[out] | exclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
block_prefix_callback_op | [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. |
Definition at line 259 of file block_scan_warp_scans3.cuh.
|
inline |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate
of all inputs. With no initial value, the output computed for thread0 is undefined.
[in] | input | Calling thread's input item |
[out] | exclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
[out] | block_aggregate | Threadblock-wide aggregate reduction of input items |
Definition at line 234 of file block_scan_warp_scans.cuh.
|
inline |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate
of all inputs. With no initial value, the output computed for thread0 is undefined.
[in] | input | Calling thread's input item |
[out] | exclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
[out] | block_aggregate | Threadblock-wide aggregate reduction of input items |
Definition at line 234 of file block_scan_warp_scans2.cuh.
|
inline |
Computes an exclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate
of all inputs. With no initial value, the output computed for thread0 is undefined.
[in] | input | Calling thread's input item |
[out] | exclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
[out] | block_aggregate | Threadblock-wide aggregate reduction of input items |
Definition at line 163 of file block_scan_warp_scans3.cuh.
|
inline |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op
is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide block_aggregate
of all inputs.
[in] | input | Calling thread's input item |
[out] | exclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
block_prefix_callback_op | [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. |
Definition at line 358 of file block_scan_warp_scans.cuh.
|
inline |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op
is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide block_aggregate
of all inputs.
[in] | input | Calling thread's input item |
[out] | exclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
block_prefix_callback_op | [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. |
Definition at line 402 of file block_scan_warp_scans2.cuh.
|
inline |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element.
[in] | input | Calling thread's input item |
[out] | inclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
Definition at line 323 of file block_scan_warp_scans.cuh.
|
inline |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element.
[in] | input | Calling thread's input item |
[out] | inclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
Definition at line 367 of file block_scan_warp_scans2.cuh.
|
inline |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element.
[in] | input | Calling thread's input item |
[out] | inclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
Definition at line 312 of file block_scan_warp_scans3.cuh.
|
inline |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. the call-back functor block_prefix_callback_op
is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs.
[in] | input | Calling thread's input item |
[out] | inclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
block_prefix_callback_op | [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. |
Definition at line 370 of file block_scan_warp_scans3.cuh.
|
inline |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate
of all inputs.
[in] | input | Calling thread's input item |
[out] | inclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
[out] | block_aggregate | Threadblock-wide aggregate reduction of input items |
Definition at line 335 of file block_scan_warp_scans.cuh.
|
inline |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate
of all inputs.
[in] | input | Calling thread's input item |
[out] | inclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
[out] | block_aggregate | Threadblock-wide aggregate reduction of input items |
Definition at line 379 of file block_scan_warp_scans2.cuh.
|
inline |
Computes an inclusive thread block-wide prefix scan using the specified binary scan_op
functor. Each thread contributes one input element. Also provides every thread with the block-wide block_aggregate
of all inputs.
[in] | input | Calling thread's input item |
[out] | inclusive_output | Calling thread's output item (may be aliased to input ) |
[in] | scan_op | Binary scan operator |
[out] | block_aggregate | Threadblock-wide aggregate reduction of input items |
Definition at line 324 of file block_scan_warp_scans3.cuh.
unsigned int cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::lane_id |
Definition at line 103 of file block_scan_warp_scans.cuh.
unsigned int cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::linear_tid |
Definition at line 101 of file block_scan_warp_scans.cuh.
_TempStorage & cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::temp_storage |
Definition at line 100 of file block_scan_warp_scans.cuh.
unsigned int cub::BlockScanWarpScans< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::warp_id |
Definition at line 102 of file block_scan_warp_scans.cuh.