#include <iterator>
#include "block_exchange.cuh"
#include "../util_ptx.cuh"
#include "../util_macro.cuh"
#include "../util_type.cuh"
#include "../util_namespace.cuh"
Go to the source code of this file.
Namespaces | |
namespace | cub |
Optional outer namespace(s) | |
Enumerations | |
enum | cub::BlockStoreAlgorithm { cub::BLOCK_STORE_DIRECT , cub::BLOCK_STORE_VECTORIZE , cub::BLOCK_STORE_TRANSPOSE , cub::BLOCK_STORE_WARP_TRANSPOSE , cub::BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED } |
cub::BlockStoreAlgorithm enumerates alternative algorithms for cub::BlockStore to write a blocked arrangement of items across a CUDA thread block to a linear segment of memory. More... | |
Functions | |
Blocked arrangement I/O (direct) | |
template<typename T , int ITEMS_PER_THREAD, typename OutputIteratorT > | |
__device__ __forceinline__ void | cub::StoreDirectBlocked (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD]) |
Store a blocked arrangement of items across a thread block into a linear segment of items. | |
template<typename T , int ITEMS_PER_THREAD, typename OutputIteratorT > | |
__device__ __forceinline__ void | cub::StoreDirectBlocked (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD], int valid_items) |
Store a blocked arrangement of items across a thread block into a linear segment of items, guarded by range. | |
template<typename T , int ITEMS_PER_THREAD> | |
__device__ __forceinline__ void | cub::StoreDirectBlockedVectorized (int linear_tid, T *block_ptr, T(&items)[ITEMS_PER_THREAD]) |
Store a blocked arrangement of items across a thread block into a linear segment of items. | |
Striped arrangement I/O (direct) | |
template<int BLOCK_THREADS, typename T , int ITEMS_PER_THREAD, typename OutputIteratorT > | |
__device__ __forceinline__ void | cub::StoreDirectStriped (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD]) |
Store a striped arrangement of data across the thread block into a linear segment of items. | |
template<int BLOCK_THREADS, typename T , int ITEMS_PER_THREAD, typename OutputIteratorT > | |
__device__ __forceinline__ void | cub::StoreDirectStriped (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD], int valid_items) |
Store a striped arrangement of data across the thread block into a linear segment of items, guarded by range. | |
Warp-striped arrangement I/O (direct) | |
template<typename T , int ITEMS_PER_THREAD, typename OutputIteratorT > | |
__device__ __forceinline__ void | cub::StoreDirectWarpStriped (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD]) |
Store a warp-striped arrangement of data across the thread block into a linear segment of items. | |
template<typename T , int ITEMS_PER_THREAD, typename OutputIteratorT > | |
__device__ __forceinline__ void | cub::StoreDirectWarpStriped (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD], int valid_items) |
Store a warp-striped arrangement of data across the thread block into a linear segment of items, guarded by range. | |
Operations for writing linear segments of data from the CUDA thread block
Definition in file block_store.cuh.