OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
cub::BlockLoad< InputT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::LoadInternal< BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY > Struct Template Reference

Detailed Description

template<typename InputT, int BLOCK_DIM_X, int ITEMS_PER_THREAD, BlockLoadAlgorithm ALGORITHM = BLOCK_LOAD_DIRECT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
template<int DUMMY>
struct cub::BlockLoad< InputT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::LoadInternal< BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY >

BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED specialization of load helper

Definition at line 947 of file block_load.cuh.

Data Structures

struct  _TempStorage
 Shared memory storage layout type. More...
 
struct  TempStorage
 Alias wrapper allowing storage to be unioned. More...
 

Public Types

enum  { WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) }
 
typedef BlockExchange< InputT, BLOCK_DIM_X, ITEMS_PER_THREAD, true, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > BlockExchange
 

Public Member Functions

 CUB_STATIC_ASSERT ((BLOCK_THREADS % WARP_THREADS==0), "BLOCK_THREADS must be a multiple of WARP_THREADS")
 
__device__ __forceinline__ LoadInternal (TempStorage &temp_storage, int linear_tid)
 Constructor.
 
template<typename InputIteratorT >
__device__ __forceinline__ void Load (InputIteratorT block_itr, InputT(&items)[ITEMS_PER_THREAD])
 Load a linear segment of items from memory. More...
 
template<typename InputIteratorT >
__device__ __forceinline__ void Load (InputIteratorT block_itr, InputT(&items)[ITEMS_PER_THREAD], int valid_items)
 Load a linear segment of items from memory, guarded by range. More...
 
template<typename InputIteratorT , typename DefaultT >
__device__ __forceinline__ void Load (InputIteratorT block_itr, InputT(&items)[ITEMS_PER_THREAD], int valid_items, DefaultT oob_default)
 Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements. More...
 

Data Fields

_TempStoragetemp_storage
 Thread reference to shared storage.
 
int linear_tid
 Linear thread-id.
 

Member Function Documentation

◆ Load() [1/3]

template<typename InputT , int BLOCK_DIM_X, int ITEMS_PER_THREAD, BlockLoadAlgorithm ALGORITHM = BLOCK_LOAD_DIRECT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
template<int DUMMY>
template<typename InputIteratorT >
__device__ __forceinline__ void cub::BlockLoad< InputT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::LoadInternal< BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY >::Load ( InputIteratorT  block_itr,
InputT(&)  items[ITEMS_PER_THREAD] 
)
inline

Load a linear segment of items from memory.

Parameters
[in]block_itrThe thread block's base input iterator for loading from
[out]itemsData to load{

Definition at line 984 of file block_load.cuh.

◆ Load() [2/3]

template<typename InputT , int BLOCK_DIM_X, int ITEMS_PER_THREAD, BlockLoadAlgorithm ALGORITHM = BLOCK_LOAD_DIRECT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
template<int DUMMY>
template<typename InputIteratorT >
__device__ __forceinline__ void cub::BlockLoad< InputT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::LoadInternal< BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY >::Load ( InputIteratorT  block_itr,
InputT(&)  items[ITEMS_PER_THREAD],
int  valid_items 
)
inline

Load a linear segment of items from memory, guarded by range.

Parameters
[in]block_itrThe thread block's base input iterator for loading from
[out]itemsData to load
[in]valid_itemsNumber of valid items to load

Definition at line 994 of file block_load.cuh.

◆ Load() [3/3]

template<typename InputT , int BLOCK_DIM_X, int ITEMS_PER_THREAD, BlockLoadAlgorithm ALGORITHM = BLOCK_LOAD_DIRECT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH>
template<int DUMMY>
template<typename InputIteratorT , typename DefaultT >
__device__ __forceinline__ void cub::BlockLoad< InputT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::LoadInternal< BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, DUMMY >::Load ( InputIteratorT  block_itr,
InputT(&)  items[ITEMS_PER_THREAD],
int  valid_items,
DefaultT  oob_default 
)
inline

Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements.

Parameters
[in]block_itrThe thread block's base input iterator for loading from
[out]itemsData to load
[in]valid_itemsNumber of valid items to load
[in]oob_defaultDefault value to assign out-of-bound items

Definition at line 1006 of file block_load.cuh.


The documentation for this struct was generated from the following file: