OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
cub::internal Namespace Reference

Internal namespace (to prevent ADL mishaps between static functions when mixing different CUB installations) More...

Functions

template<int LENGTH, typename T , typename ReductionOp >
__device__ __forceinline__ T ThreadReduce (T *input, ReductionOp reduction_op, T prefix, Int2Type< LENGTH >)
 
template<int LENGTH, typename T , typename ReductionOp >
__device__ __forceinline__ T ThreadReduce (T *input, ReductionOp reduction_op, T prefix)
 Perform a sequential reduction over LENGTH elements of the input array, seeded with the specified prefix. The aggregate is returned.
 
template<int LENGTH, typename T , typename ReductionOp >
__device__ __forceinline__ T ThreadReduce (T *input, ReductionOp reduction_op)
 Perform a sequential reduction over LENGTH elements of the input array. The aggregate is returned.
 
template<int LENGTH, typename T , typename ReductionOp >
__device__ __forceinline__ T ThreadReduce (T(&input)[LENGTH], ReductionOp reduction_op, T prefix)
 Perform a sequential reduction over the statically-sized input array, seeded with the specified prefix. The aggregate is returned.
 
template<int LENGTH, typename T , typename ReductionOp >
__device__ __forceinline__ T ThreadReduce (T(&input)[LENGTH], ReductionOp reduction_op)
 Serial reduction with the specified operator.
 
Sequential prefix scan over statically-sized array types
template<int LENGTH, typename T , typename ScanOp >
__device__ __forceinline__ T ThreadScanExclusive (T inclusive, T exclusive, T *input, T *output, ScanOp scan_op, Int2Type< LENGTH >)
 
template<int LENGTH, typename T , typename ScanOp >
__device__ __forceinline__ T ThreadScanExclusive (T *input, T *output, ScanOp scan_op, T prefix, bool apply_prefix=true)
 Perform a sequential exclusive prefix scan over LENGTH elements of the input array, seeded with the specified prefix. The aggregate is returned.
 
template<int LENGTH, typename T , typename ScanOp >
__device__ __forceinline__ T ThreadScanExclusive (T(&input)[LENGTH], T(&output)[LENGTH], ScanOp scan_op, T prefix, bool apply_prefix=true)
 Perform a sequential exclusive prefix scan over the statically-sized input array, seeded with the specified prefix. The aggregate is returned.
 
template<int LENGTH, typename T , typename ScanOp >
__device__ __forceinline__ T ThreadScanInclusive (T inclusive, T *input, T *output, ScanOp scan_op, Int2Type< LENGTH >)
 
template<int LENGTH, typename T , typename ScanOp >
__device__ __forceinline__ T ThreadScanInclusive (T *input, T *output, ScanOp scan_op)
 Perform a sequential inclusive prefix scan over LENGTH elements of the input array. The aggregate is returned.
 
template<int LENGTH, typename T , typename ScanOp >
__device__ __forceinline__ T ThreadScanInclusive (T(&input)[LENGTH], T(&output)[LENGTH], ScanOp scan_op)
 Perform a sequential inclusive prefix scan over the statically-sized input array. The aggregate is returned.
 
template<int LENGTH, typename T , typename ScanOp >
__device__ __forceinline__ T ThreadScanInclusive (T *input, T *output, ScanOp scan_op, T prefix, bool apply_prefix=true)
 Perform a sequential inclusive prefix scan over LENGTH elements of the input array, seeded with the specified prefix. The aggregate is returned.
 
template<int LENGTH, typename T , typename ScanOp >
__device__ __forceinline__ T ThreadScanInclusive (T(&input)[LENGTH], T(&output)[LENGTH], ScanOp scan_op, T prefix, bool apply_prefix=true)
 Perform a sequential inclusive prefix scan over the statically-sized input array, seeded with the specified prefix. The aggregate is returned.
 

Detailed Description

Internal namespace (to prevent ADL mishaps between static functions when mixing different CUB installations)

Function Documentation

◆ ThreadReduce() [1/5]

template<int LENGTH, typename T , typename ReductionOp >
__device__ __forceinline__ T cub::internal::ThreadReduce ( T *  input,
ReductionOp  reduction_op 
)

Perform a sequential reduction over LENGTH elements of the input array. The aggregate is returned.

Template Parameters
LENGTHLengthT of input array
T[inferred] The data type to be reduced.
ScanOp[inferred] Binary reduction operator type having member T operator()(const T &a, const T &b)
Parameters
[in]inputInput array
[in]reduction_opBinary reduction operator

Definition at line 102 of file thread_reduce.cuh.

◆ ThreadReduce() [2/5]

template<int LENGTH, typename T , typename ReductionOp >
__device__ __forceinline__ T cub::internal::ThreadReduce ( T *  input,
ReductionOp  reduction_op,
prefix 
)

Perform a sequential reduction over LENGTH elements of the input array, seeded with the specified prefix. The aggregate is returned.

Template Parameters
LENGTHLengthT of input array
T[inferred] The data type to be reduced.
ScanOp[inferred] Binary reduction operator type having member T operator()(const T &a, const T &b)
Parameters
[in]inputInput array
[in]reduction_opBinary reduction operator
[in]prefixPrefix to seed reduction with

Definition at line 82 of file thread_reduce.cuh.

◆ ThreadReduce() [3/5]

template<int LENGTH, typename T , typename ReductionOp >
__device__ __forceinline__ T cub::internal::ThreadReduce ( T *  input,
ReductionOp  reduction_op,
prefix,
Int2Type< LENGTH >   
)

Sequential reduction over statically-sized array types

Parameters
[in]inputInput array
[in]reduction_opBinary reduction operator
[in]prefixPrefix to seed reduction with

Definition at line 55 of file thread_reduce.cuh.

◆ ThreadReduce() [4/5]

template<int LENGTH, typename T , typename ReductionOp >
__device__ __forceinline__ T cub::internal::ThreadReduce ( T(&)  input[LENGTH],
ReductionOp  reduction_op 
)

Serial reduction with the specified operator.

Template Parameters
LENGTH[inferred] LengthT of input array
T[inferred] The data type to be reduced.
ScanOp[inferred] Binary reduction operator type having member T operator()(const T &a, const T &b)
Parameters
[in]inputInput array
[in]reduction_opBinary reduction operator

Definition at line 142 of file thread_reduce.cuh.

◆ ThreadReduce() [5/5]

template<int LENGTH, typename T , typename ReductionOp >
__device__ __forceinline__ T cub::internal::ThreadReduce ( T(&)  input[LENGTH],
ReductionOp  reduction_op,
prefix 
)

Perform a sequential reduction over the statically-sized input array, seeded with the specified prefix. The aggregate is returned.

Template Parameters
LENGTH[inferred] LengthT of input array
T[inferred] The data type to be reduced.
ScanOp[inferred] Binary reduction operator type having member T operator()(const T &a, const T &b)
Parameters
[in]inputInput array
[in]reduction_opBinary reduction operator
[in]prefixPrefix to seed reduction with

Definition at line 122 of file thread_reduce.cuh.