OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
cub::DeviceRleDispatch< InputIteratorT, OffsetsOutputIteratorT, LengthsOutputIteratorT, NumRunsOutputIteratorT, EqualityOpT, OffsetT > Struct Template Reference

< Signed integer type for global offsets More...

Detailed Description

template<typename InputIteratorT, typename OffsetsOutputIteratorT, typename LengthsOutputIteratorT, typename NumRunsOutputIteratorT, typename EqualityOpT, typename OffsetT>
struct cub::DeviceRleDispatch< InputIteratorT, OffsetsOutputIteratorT, LengthsOutputIteratorT, NumRunsOutputIteratorT, EqualityOpT, OffsetT >

< Signed integer type for global offsets

Utility class for dispatching the appropriately-tuned kernels for DeviceRle

Definition at line 121 of file dispatch_rle.cuh.

Data Structures

struct  KernelConfig
 
struct  Policy100
 SM10. More...
 
struct  Policy130
 SM13. More...
 
struct  Policy200
 SM20. More...
 
struct  Policy300
 SM30. More...
 
struct  Policy350
 SM35. More...
 
struct  PtxRleSweepPolicy
 

Public Types

enum  { INIT_KERNEL_THREADS = 128 }
 
typedef std::iterator_traits< InputIteratorT >::value_type T
 
typedef If<(Equals< typename std::iterator_traits< LengthsOutputIteratorT >::value_type, void >::VALUE), OffsetT, typename std::iterator_traits< LengthsOutputIteratorT >::value_type >::Type LengthT
 
typedef ReduceByKeyScanTileState< LengthT, OffsetTScanTileStateT
 
typedef Policy100 PtxPolicy
 

Static Public Member Functions

template<typename KernelConfig >
CUB_RUNTIME_FUNCTION static __forceinline__ void InitConfigs (int ptx_version, KernelConfig &device_rle_config)
 
template<typename DeviceScanInitKernelPtr , typename DeviceRleSweepKernelPtr >
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Dispatch (void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OffsetsOutputIteratorT d_offsets_out, LengthsOutputIteratorT d_lengths_out, NumRunsOutputIteratorT d_num_runs_out, EqualityOpT equality_op, OffsetT num_items, cudaStream_t stream, bool debug_synchronous, int ptx_version, DeviceScanInitKernelPtr device_scan_init_kernel, DeviceRleSweepKernelPtr device_rle_sweep_kernel, KernelConfig device_rle_config)
 < Function type of cub::DeviceRleSweepKernelPtr More...
 
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Dispatch (void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OffsetsOutputIteratorT d_offsets_out, LengthsOutputIteratorT d_lengths_out, NumRunsOutputIteratorT d_num_runs_out, EqualityOpT equality_op, OffsetT num_items, cudaStream_t stream, bool debug_synchronous)
 

Member Function Documentation

◆ Dispatch() [1/2]

template<typename InputIteratorT , typename OffsetsOutputIteratorT , typename LengthsOutputIteratorT , typename NumRunsOutputIteratorT , typename EqualityOpT , typename OffsetT >
template<typename DeviceScanInitKernelPtr , typename DeviceRleSweepKernelPtr >
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t cub::DeviceRleDispatch< InputIteratorT, OffsetsOutputIteratorT, LengthsOutputIteratorT, NumRunsOutputIteratorT, EqualityOpT, OffsetT >::Dispatch ( void *  d_temp_storage,
size_t &  temp_storage_bytes,
InputIteratorT  d_in,
OffsetsOutputIteratorT  d_offsets_out,
LengthsOutputIteratorT  d_lengths_out,
NumRunsOutputIteratorT  d_num_runs_out,
EqualityOpT  equality_op,
OffsetT  num_items,
cudaStream_t  stream,
bool  debug_synchronous,
int  ptx_version,
DeviceScanInitKernelPtr  device_scan_init_kernel,
DeviceRleSweepKernelPtr  device_rle_sweep_kernel,
KernelConfig  device_rle_config 
)
inlinestatic

< Function type of cub::DeviceRleSweepKernelPtr

Internal dispatch routine for computing a device-wide run-length-encode using the specified kernel functions.

Parameters
[in]d_temp_storageDevice-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done.
[in,out]temp_storage_bytesReference to size in bytes of d_temp_storage allocation
[in]d_inPointer to the input sequence of data items
[out]d_offsets_outPointer to the output sequence of run-offsets
[out]d_lengths_outPointer to the output sequence of run-lengths
[out]d_num_runs_outPointer to the total number of runs encountered (i.e., length of d_offsets_out)
[in]equality_opEquality operator for input items
[in]num_itemsTotal number of input items (i.e., length of d_in)
[in]streamCUDA stream to launch kernels within. Default is stream0.
[in]debug_synchronousWhether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is false.
[in]ptx_versionPTX version of dispatch kernels
[in]device_scan_init_kernelKernel function pointer to parameterization of cub::DeviceScanInitKernel
[in]device_rle_sweep_kernelKernel function pointer to parameterization of cub::DeviceRleSweepKernel
[in]device_rle_configDispatch parameters that match the policy that device_rle_sweep_kernel was compiled for

Definition at line 357 of file dispatch_rle.cuh.

◆ Dispatch() [2/2]

template<typename InputIteratorT , typename OffsetsOutputIteratorT , typename LengthsOutputIteratorT , typename NumRunsOutputIteratorT , typename EqualityOpT , typename OffsetT >
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t cub::DeviceRleDispatch< InputIteratorT, OffsetsOutputIteratorT, LengthsOutputIteratorT, NumRunsOutputIteratorT, EqualityOpT, OffsetT >::Dispatch ( void *  d_temp_storage,
size_t &  temp_storage_bytes,
InputIteratorT  d_in,
OffsetsOutputIteratorT  d_offsets_out,
LengthsOutputIteratorT  d_lengths_out,
NumRunsOutputIteratorT  d_num_runs_out,
EqualityOpT  equality_op,
OffsetT  num_items,
cudaStream_t  stream,
bool  debug_synchronous 
)
inlinestatic

Internal dispatch routine

Parameters
[in]d_temp_storageDevice-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done.
[in,out]temp_storage_bytesReference to size in bytes of d_temp_storage allocation
[in]d_inPointer to input sequence of data items
[out]d_offsets_outPointer to output sequence of run-offsets
[out]d_lengths_outPointer to output sequence of run-lengths
[out]d_num_runs_outPointer to total number of runs (i.e., length of d_offsets_out)
[in]equality_opEquality operator for input items
[in]num_itemsTotal number of input items (i.e., length of d_in)
[in]stream[optional] CUDA stream to launch kernels within. Default is stream0.
[in]debug_synchronous[optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is false.

Definition at line 484 of file dispatch_rle.cuh.

◆ InitConfigs()

template<typename InputIteratorT , typename OffsetsOutputIteratorT , typename LengthsOutputIteratorT , typename NumRunsOutputIteratorT , typename EqualityOpT , typename OffsetT >
template<typename KernelConfig >
CUB_RUNTIME_FUNCTION static __forceinline__ void cub::DeviceRleDispatch< InputIteratorT, OffsetsOutputIteratorT, LengthsOutputIteratorT, NumRunsOutputIteratorT, EqualityOpT, OffsetT >::InitConfigs ( int  ptx_version,
KernelConfig device_rle_config 
)
inlinestatic

Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use

Definition at line 273 of file dispatch_rle.cuh.


The documentation for this struct was generated from the following file: