OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT > Struct Template Reference

< Binary reduction functor type having member T operator()(const T &a, const T &b) More...

Detailed Description

template<typename InputIteratorT, typename OutputIteratorT, typename OffsetT, typename ReductionOpT>
struct cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >

< Binary reduction functor type having member T operator()(const T &a, const T &b)

Utility class for dispatching the appropriately-tuned kernels for device-wide reduction

Definition at line 359 of file dispatch_reduce.cuh.

+ Inheritance diagram for cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >:

Public Types

typedef If<(Equals< typenamestd::iterator_traits< OutputIteratorT >::value_type, void >::VALUE), typenamestd::iterator_traits< InputIteratorT >::value_type, typenamestd::iterator_traits< OutputIteratorT >::value_type >::Type OutputT
 
- Public Types inherited from cub::DeviceReducePolicy< If<(Equals< std::iterator_traits< OutputIteratorT >::value_type, void >::VALUE), std::iterator_traits< InputIteratorT >::value_type, std::iterator_traits< OutputIteratorT >::value_type >::Type, OffsetT, ReductionOpT >
typedef Policy600 MaxPolicy
 MaxPolicy.
 

Public Member Functions

CUB_RUNTIME_FUNCTION __forceinline__ DispatchReduce (void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, OffsetT num_items, ReductionOpT reduction_op, OutputT init, cudaStream_t stream, bool debug_synchronous, int ptx_version)
 Constructor.
 
template<typename ActivePolicyT , typename SingleTileKernelT >
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokeSingleTile (SingleTileKernelT single_tile_kernel)
 Invoke a single block block to reduce in-core.
 
template<typename ActivePolicyT , typename ReduceKernelT , typename SingleTileKernelT >
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokePasses (ReduceKernelT reduce_kernel, SingleTileKernelT single_tile_kernel)
 Invoke two-passes to reduce.
 
template<typename ActivePolicyT >
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t Invoke ()
 Invocation.
 

Static Public Member Functions

CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Dispatch (void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, OffsetT num_items, ReductionOpT reduction_op, OutputT init, cudaStream_t stream, bool debug_synchronous)
 

Data Fields

void * d_temp_storage
 [in] Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done.
 
size_t & temp_storage_bytes
 [in,out] Reference to size in bytes of d_temp_storage allocation
 
InputIteratorT d_in
 [in] Pointer to the input sequence of data items
 
OutputIteratorT d_out
 [out] Pointer to the output aggregate
 
OffsetT num_items
 [in] Total number of input items (i.e., length of d_in)
 
ReductionOpT reduction_op
 [in] Binary reduction functor
 
OutputT init
 [in] The initial value of the reduction
 
cudaStream_t stream
 [in] CUDA stream to launch kernels within. Default is stream0.
 
bool debug_synchronous
 [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is false.
 
int ptx_version
 [in] PTX version
 

Member Typedef Documentation

◆ OutputT

template<typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOpT >
typedef If<(Equals<typenamestd::iterator_traits<OutputIteratorT>::value_type,void>::VALUE),typenamestd::iterator_traits<InputIteratorT>::value_type,typenamestd::iterator_traits<OutputIteratorT>::value_type>::Type cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >::OutputT

Definition at line 374 of file dispatch_reduce.cuh.

Constructor & Destructor Documentation

◆ DispatchReduce()

template<typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOpT >
CUB_RUNTIME_FUNCTION __forceinline__ cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >::DispatchReduce ( void *  d_temp_storage,
size_t &  temp_storage_bytes,
InputIteratorT  d_in,
OutputIteratorT  d_out,
OffsetT  num_items,
ReductionOpT  reduction_op,
OutputT  init,
cudaStream_t  stream,
bool  debug_synchronous,
int  ptx_version 
)
inline

Constructor.

Definition at line 398 of file dispatch_reduce.cuh.

Member Function Documentation

◆ Dispatch()

template<typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOpT >
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >::Dispatch ( void *  d_temp_storage,
size_t &  temp_storage_bytes,
InputIteratorT  d_in,
OutputIteratorT  d_out,
OffsetT  num_items,
ReductionOpT  reduction_op,
OutputT  init,
cudaStream_t  stream,
bool  debug_synchronous 
)
inlinestatic

Internal dispatch routine for computing a device-wide reduction

Parameters
[in]d_temp_storageDevice-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done.
[in,out]temp_storage_bytesReference to size in bytes of d_temp_storage allocation
[in]d_inPointer to the input sequence of data items
[out]d_outPointer to the output aggregate
[in]num_itemsTotal number of input items (i.e., length of d_in)
[in]reduction_opBinary reduction functor
[in]initThe initial value of the reduction
[in]stream[optional] CUDA stream to launch kernels within. Default is stream0.
[in]debug_synchronous[optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is false.

Definition at line 631 of file dispatch_reduce.cuh.

◆ Invoke()

template<typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOpT >
template<typename ActivePolicyT >
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >::Invoke ( )
inline

Invocation.

Definition at line 601 of file dispatch_reduce.cuh.

◆ InvokePasses()

template<typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOpT >
template<typename ActivePolicyT , typename ReduceKernelT , typename SingleTileKernelT >
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >::InvokePasses ( ReduceKernelT  reduce_kernel,
SingleTileKernelT  single_tile_kernel 
)
inline

Invoke two-passes to reduce.

< Function type of cub::DeviceReduceSingleTileKernel

Parameters
[in]reduce_kernelKernel function pointer to parameterization of cub::DeviceReduceKernel
[in]single_tile_kernelKernel function pointer to parameterization of cub::DeviceReduceSingleTileKernel

Definition at line 489 of file dispatch_reduce.cuh.

◆ InvokeSingleTile()

template<typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOpT >
template<typename ActivePolicyT , typename SingleTileKernelT >
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >::InvokeSingleTile ( SingleTileKernelT  single_tile_kernel)
inline

Invoke a single block block to reduce in-core.

< Function type of cub::DeviceReduceSingleTileKernel

Parameters
[in]single_tile_kernelKernel function pointer to parameterization of cub::DeviceReduceSingleTileKernel

Definition at line 432 of file dispatch_reduce.cuh.

Field Documentation

◆ d_in

template<typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOpT >
InputIteratorT cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >::d_in

[in] Pointer to the input sequence of data items

Definition at line 383 of file dispatch_reduce.cuh.

◆ d_out

template<typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOpT >
OutputIteratorT cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >::d_out

[out] Pointer to the output aggregate

Definition at line 384 of file dispatch_reduce.cuh.

◆ d_temp_storage

template<typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOpT >
void* cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >::d_temp_storage

[in] Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done.

Definition at line 381 of file dispatch_reduce.cuh.

◆ debug_synchronous

template<typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOpT >
bool cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >::debug_synchronous

[in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is false.

Definition at line 389 of file dispatch_reduce.cuh.

◆ init

template<typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOpT >
OutputT cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >::init

[in] The initial value of the reduction

Definition at line 387 of file dispatch_reduce.cuh.

◆ num_items

template<typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOpT >
OffsetT cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >::num_items

[in] Total number of input items (i.e., length of d_in)

Definition at line 385 of file dispatch_reduce.cuh.

◆ ptx_version

template<typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOpT >
int cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >::ptx_version

[in] PTX version

Definition at line 390 of file dispatch_reduce.cuh.

◆ reduction_op

template<typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOpT >
ReductionOpT cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >::reduction_op

[in] Binary reduction functor

Definition at line 386 of file dispatch_reduce.cuh.

◆ stream

template<typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOpT >
cudaStream_t cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >::stream

[in] CUDA stream to launch kernels within. Default is stream0.

Definition at line 388 of file dispatch_reduce.cuh.

◆ temp_storage_bytes

template<typename InputIteratorT , typename OutputIteratorT , typename OffsetT , typename ReductionOpT >
size_t& cub::DispatchReduce< InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT >::temp_storage_bytes

[in,out] Reference to size in bytes of d_temp_storage allocation

Definition at line 382 of file dispatch_reduce.cuh.


The documentation for this struct was generated from the following file: