OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
block_store.cuh File Reference
#include <iterator>
#include "block_exchange.cuh"
#include "../util_ptx.cuh"
#include "../util_macro.cuh"
#include "../util_type.cuh"
#include "../util_namespace.cuh"

Go to the source code of this file.

Data Structures

class  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >
 The BlockStore class provides collective data movement methods for writing a blocked arrangement of items partitioned across a CUDA thread block to a linear segment of memory. More...
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_DIRECT, DUMMY >
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_VECTORIZE, DUMMY >
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_TRANSPOSE, DUMMY >
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_TRANSPOSE, DUMMY >::_TempStorage
 Shared memory storage layout type. More...
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_TRANSPOSE, DUMMY >::TempStorage
 Alias wrapper allowing storage to be unioned. More...
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE, DUMMY >
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE, DUMMY >::_TempStorage
 Shared memory storage layout type. More...
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE, DUMMY >::TempStorage
 Alias wrapper allowing storage to be unioned. More...
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY >
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY >::_TempStorage
 Shared memory storage layout type. More...
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY >::TempStorage
 Alias wrapper allowing storage to be unioned. More...
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage
 \smemstorage{BlockStore} More...
 

Namespaces

namespace  cub
 Optional outer namespace(s)
 

Enumerations

enum  cub::BlockStoreAlgorithm {
  cub::BLOCK_STORE_DIRECT , cub::BLOCK_STORE_VECTORIZE , cub::BLOCK_STORE_TRANSPOSE , cub::BLOCK_STORE_WARP_TRANSPOSE ,
  cub::BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED
}
 cub::BlockStoreAlgorithm enumerates alternative algorithms for cub::BlockStore to write a blocked arrangement of items across a CUDA thread block to a linear segment of memory. More...
 

Functions

Blocked arrangement I/O (direct)
template<typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void cub::StoreDirectBlocked (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD])
 Store a blocked arrangement of items across a thread block into a linear segment of items.
 
template<typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void cub::StoreDirectBlocked (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD], int valid_items)
 Store a blocked arrangement of items across a thread block into a linear segment of items, guarded by range.
 
template<typename T , int ITEMS_PER_THREAD>
__device__ __forceinline__ void cub::StoreDirectBlockedVectorized (int linear_tid, T *block_ptr, T(&items)[ITEMS_PER_THREAD])
 Store a blocked arrangement of items across a thread block into a linear segment of items.
 
Striped arrangement I/O (direct)
template<int BLOCK_THREADS, typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void cub::StoreDirectStriped (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD])
 Store a striped arrangement of data across the thread block into a linear segment of items.
 
template<int BLOCK_THREADS, typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void cub::StoreDirectStriped (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD], int valid_items)
 Store a striped arrangement of data across the thread block into a linear segment of items, guarded by range.
 
Warp-striped arrangement I/O (direct)
template<typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void cub::StoreDirectWarpStriped (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD])
 Store a warp-striped arrangement of data across the thread block into a linear segment of items.
 
template<typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void cub::StoreDirectWarpStriped (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD], int valid_items)
 Store a warp-striped arrangement of data across the thread block into a linear segment of items, guarded by range.
 

Detailed Description

Operations for writing linear segments of data from the CUDA thread block

Definition in file block_store.cuh.