Data Structures | |
| struct | cub::CachingDeviceAllocator |
| A simple caching allocator for device memory allocations. More... | |
| struct | cub::KernelConfig |
| struct | cub::ChainedPolicy< PTX_VERSION, PolicyT, PrevPolicyT > |
| Helper for dispatching into a policy chain. More... | |
| struct | cub::ChainedPolicy< PTX_VERSION, PolicyT, PolicyT > |
| Helper for dispatching into a policy chain (end-of-chain specialization) More... | |
Macros | |
| #define | CubDebug(e) cub::Debug((cudaError_t) (e), __FILE__, __LINE__) |
| Debug macro. | |
| #define | CubDebugExit(e) if (cub::Debug((cudaError_t) (e), __FILE__, __LINE__)) { exit(1); } |
| Debug macro with exit. | |
| #define | _CubLog(format, ...) printf(format,__VA_ARGS__); |
| Log macro for printf statements. | |
Functions | |
| __host__ __device__ __forceinline__ cudaError_t | cub::Debug (cudaError_t error, const char *filename, int line) |
| CUB error reporting macro (prints error messages to stderr) | |
| template<int ALLOCATIONS> | |
| __host__ __device__ __forceinline__ cudaError_t | cub::AliasTemporaries (void *d_temp_storage, size_t &temp_storage_bytes, void *(&allocations)[ALLOCATIONS], size_t(&allocation_sizes)[ALLOCATIONS]) |
| template<typename T > | |
| __global__ void | cub::EmptyKernel (void) |
| CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | cub::PtxVersion (int &ptx_version) |
| Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10) | |
| CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | cub::SmVersion (int &sm_version, int device_ordinal) |
| Retrieves the SM version (major * 100 + minor * 10) | |
| CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t | cub::SyncStream (cudaStream_t stream) |
| template<typename KernelPtr > | |
| CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | cub::MaxSmOccupancy (int &max_sm_occupancy, KernelPtr kernel_ptr, int block_threads, int dynamic_smem_bytes=0) |
Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer kernel_ptr on the current device with block_threads per thread block. | |
| #define _CubLog | ( | format, | |
| ... | |||
| ) | printf(format,__VA_ARGS__); |
Log macro for printf statements.
Definition at line 112 of file util_debug.cuh.
| #define CubDebug | ( | e | ) | cub::Debug((cudaError_t) (e), __FILE__, __LINE__) |
Debug macro.
Definition at line 94 of file util_debug.cuh.
| #define CubDebugExit | ( | e | ) | if (cub::Debug((cudaError_t) (e), __FILE__, __LINE__)) { exit(1); } |
Debug macro with exit.
Definition at line 102 of file util_debug.cuh.
| __host__ __device__ __forceinline__ cudaError_t cub::AliasTemporaries | ( | void * | d_temp_storage, |
| size_t & | temp_storage_bytes, | ||
| void *(&) | allocations[ALLOCATIONS], | ||
| size_t(&) | allocation_sizes[ALLOCATIONS] | ||
| ) |
Alias temporaries to externally-allocated device storage (or simply return the amount of storage needed).
| [in] | d_temp_storage | Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done. |
| [in,out] | temp_storage_bytes | Size in bytes of \t d_temp_storage allocation |
| [in,out] | allocations | Pointers to device allocations needed |
| [in] | allocation_sizes | Sizes in bytes of device allocations needed |
Definition at line 62 of file util_device.cuh.
| __host__ __device__ __forceinline__ cudaError_t cub::Debug | ( | cudaError_t | error, |
| const char * | filename, | ||
| int | line | ||
| ) |
CUB error reporting macro (prints error messages to stderr)
If CUB_STDERR is defined and error is not cudaSuccess, the corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context.
Definition at line 68 of file util_debug.cuh.
| __global__ void cub::EmptyKernel | ( | void | ) |
Empty kernel for querying PTX manifest metadata (e.g., version) for the current device
Definition at line 110 of file util_device.cuh.
| CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::MaxSmOccupancy | ( | int & | max_sm_occupancy, |
| KernelPtr | kernel_ptr, | ||
| int | block_threads, | ||
| int | dynamic_smem_bytes = 0 |
||
| ) |
Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer kernel_ptr on the current device with block_threads per thread block.
| [out] | max_sm_occupancy | maximum number of thread blocks that can reside on a single SM |
| [in] | kernel_ptr | Kernel pointer for which to compute SM occupancy |
| [in] | block_threads | Number of threads per thread block |
Definition at line 244 of file util_device.cuh.
| CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::PtxVersion | ( | int & | ptx_version | ) |
Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10)
Type definition of the EmptyKernel kernel entry point
Force EmptyKernel<void> to be generated if this class is used
Definition at line 118 of file util_device.cuh.
| CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::SmVersion | ( | int & | sm_version, |
| int | device_ordinal | ||
| ) |
Retrieves the SM version (major * 100 + minor * 10)
Definition at line 165 of file util_device.cuh.
|
static |
Synchronize the stream if specified
Definition at line 199 of file util_device.cuh.