#include <iterator>
#include "block_exchange.cuh"
#include "../util_ptx.cuh"
#include "../util_macro.cuh"
#include "../util_type.cuh"
#include "../util_namespace.cuh"

Data Structures
class	cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >
	The BlockStore class provides collective data movement methods for writing a blocked arrangement of items partitioned across a CUDA thread block to a linear segment of memory. More...

struct	cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_DIRECT, DUMMY >

struct	cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_VECTORIZE, DUMMY >

struct	cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_TRANSPOSE, DUMMY >

struct	cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_TRANSPOSE, DUMMY >::_TempStorage
	Shared memory storage layout type. More...

struct	cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_TRANSPOSE, DUMMY >::TempStorage
	Alias wrapper allowing storage to be unioned. More...

struct	cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE, DUMMY >

struct	cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE, DUMMY >::_TempStorage
	Shared memory storage layout type. More...

struct	cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE, DUMMY >::TempStorage
	Alias wrapper allowing storage to be unioned. More...

struct	cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY >

struct	cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY >::_TempStorage
	Shared memory storage layout type. More...

struct	cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY >::TempStorage
	Alias wrapper allowing storage to be unioned. More...

struct	cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage
	\smemstorage{BlockStore} More...

Namespaces
namespace	cub
	Optional outer namespace(s)

Enumerations
enum	cub::BlockStoreAlgorithm { cub::BLOCK_STORE_DIRECT , cub::BLOCK_STORE_VECTORIZE , cub::BLOCK_STORE_TRANSPOSE , cub::BLOCK_STORE_WARP_TRANSPOSE , cub::BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED }
	cub::BlockStoreAlgorithm enumerates alternative algorithms for cub::BlockStore to write a blocked arrangement of items across a CUDA thread block to a linear segment of memory. More...

Functions
Blocked arrangement I/O (direct)
template<typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void	cub::StoreDirectBlocked (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD])
	Store a blocked arrangement of items across a thread block into a linear segment of items.

template<typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void	cub::StoreDirectBlocked (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD], int valid_items)
	Store a blocked arrangement of items across a thread block into a linear segment of items, guarded by range.

template<typename T , int ITEMS_PER_THREAD>
__device__ __forceinline__ void	cub::StoreDirectBlockedVectorized (int linear_tid, T *block_ptr, T(&items)[ITEMS_PER_THREAD])
	Store a blocked arrangement of items across a thread block into a linear segment of items.

Striped arrangement I/O (direct)
template<int BLOCK_THREADS, typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void	cub::StoreDirectStriped (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD])
	Store a striped arrangement of data across the thread block into a linear segment of items.

template<int BLOCK_THREADS, typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void	cub::StoreDirectStriped (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD], int valid_items)
	Store a striped arrangement of data across the thread block into a linear segment of items, guarded by range.

Warp-striped arrangement I/O (direct)
template<typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void	cub::StoreDirectWarpStriped (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD])
	Store a warp-striped arrangement of data across the thread block into a linear segment of items.

template<typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void	cub::StoreDirectWarpStriped (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD], int valid_items)
	Store a warp-striped arrangement of data across the thread block into a linear segment of items, guarded by range.

Detailed Description

Operations for writing linear segments of data from the CUDA thread block

Definition in file block_store.cuh.

Data Structures

Namespaces

Enumerations

Functions

Detailed Description