Internal namespace (to prevent ADL mishaps between static functions when mixing different CUB installations)
More...
|
template<int LENGTH, typename T , typename ReductionOp > |
__device__ __forceinline__ T | ThreadReduce (T *input, ReductionOp reduction_op, T prefix, Int2Type< LENGTH >) |
|
template<int LENGTH, typename T , typename ReductionOp > |
__device__ __forceinline__ T | ThreadReduce (T *input, ReductionOp reduction_op, T prefix) |
| Perform a sequential reduction over LENGTH elements of the input array, seeded with the specified prefix . The aggregate is returned.
|
|
template<int LENGTH, typename T , typename ReductionOp > |
__device__ __forceinline__ T | ThreadReduce (T *input, ReductionOp reduction_op) |
| Perform a sequential reduction over LENGTH elements of the input array. The aggregate is returned.
|
|
template<int LENGTH, typename T , typename ReductionOp > |
__device__ __forceinline__ T | ThreadReduce (T(&input)[LENGTH], ReductionOp reduction_op, T prefix) |
| Perform a sequential reduction over the statically-sized input array, seeded with the specified prefix . The aggregate is returned.
|
|
template<int LENGTH, typename T , typename ReductionOp > |
__device__ __forceinline__ T | ThreadReduce (T(&input)[LENGTH], ReductionOp reduction_op) |
| Serial reduction with the specified operator.
|
|
|
template<int LENGTH, typename T , typename ScanOp > |
__device__ __forceinline__ T | ThreadScanExclusive (T inclusive, T exclusive, T *input, T *output, ScanOp scan_op, Int2Type< LENGTH >) |
|
template<int LENGTH, typename T , typename ScanOp > |
__device__ __forceinline__ T | ThreadScanExclusive (T *input, T *output, ScanOp scan_op, T prefix, bool apply_prefix=true) |
| Perform a sequential exclusive prefix scan over LENGTH elements of the input array, seeded with the specified prefix . The aggregate is returned.
|
|
template<int LENGTH, typename T , typename ScanOp > |
__device__ __forceinline__ T | ThreadScanExclusive (T(&input)[LENGTH], T(&output)[LENGTH], ScanOp scan_op, T prefix, bool apply_prefix=true) |
| Perform a sequential exclusive prefix scan over the statically-sized input array, seeded with the specified prefix . The aggregate is returned.
|
|
template<int LENGTH, typename T , typename ScanOp > |
__device__ __forceinline__ T | ThreadScanInclusive (T inclusive, T *input, T *output, ScanOp scan_op, Int2Type< LENGTH >) |
|
template<int LENGTH, typename T , typename ScanOp > |
__device__ __forceinline__ T | ThreadScanInclusive (T *input, T *output, ScanOp scan_op) |
| Perform a sequential inclusive prefix scan over LENGTH elements of the input array. The aggregate is returned.
|
|
template<int LENGTH, typename T , typename ScanOp > |
__device__ __forceinline__ T | ThreadScanInclusive (T(&input)[LENGTH], T(&output)[LENGTH], ScanOp scan_op) |
| Perform a sequential inclusive prefix scan over the statically-sized input array. The aggregate is returned.
|
|
template<int LENGTH, typename T , typename ScanOp > |
__device__ __forceinline__ T | ThreadScanInclusive (T *input, T *output, ScanOp scan_op, T prefix, bool apply_prefix=true) |
| Perform a sequential inclusive prefix scan over LENGTH elements of the input array, seeded with the specified prefix . The aggregate is returned.
|
|
template<int LENGTH, typename T , typename ScanOp > |
__device__ __forceinline__ T | ThreadScanInclusive (T(&input)[LENGTH], T(&output)[LENGTH], ScanOp scan_op, T prefix, bool apply_prefix=true) |
| Perform a sequential inclusive prefix scan over the statically-sized input array, seeded with the specified prefix . The aggregate is returned.
|
|
Internal namespace (to prevent ADL mishaps between static functions when mixing different CUB installations)