Data Structures | |
struct | cub::CachingDeviceAllocator |
A simple caching allocator for device memory allocations. More... | |
struct | cub::KernelConfig |
struct | cub::ChainedPolicy< PTX_VERSION, PolicyT, PrevPolicyT > |
Helper for dispatching into a policy chain. More... | |
struct | cub::ChainedPolicy< PTX_VERSION, PolicyT, PolicyT > |
Helper for dispatching into a policy chain (end-of-chain specialization) More... | |
Macros | |
#define | CubDebug(e) cub::Debug((cudaError_t) (e), __FILE__, __LINE__) |
Debug macro. | |
#define | CubDebugExit(e) if (cub::Debug((cudaError_t) (e), __FILE__, __LINE__)) { exit(1); } |
Debug macro with exit. | |
#define | _CubLog(format, ...) printf(format,__VA_ARGS__); |
Log macro for printf statements. | |
Functions | |
__host__ __device__ __forceinline__ cudaError_t | cub::Debug (cudaError_t error, const char *filename, int line) |
CUB error reporting macro (prints error messages to stderr) | |
template<int ALLOCATIONS> | |
__host__ __device__ __forceinline__ cudaError_t | cub::AliasTemporaries (void *d_temp_storage, size_t &temp_storage_bytes, void *(&allocations)[ALLOCATIONS], size_t(&allocation_sizes)[ALLOCATIONS]) |
template<typename T > | |
__global__ void | cub::EmptyKernel (void) |
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | cub::PtxVersion (int &ptx_version) |
Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10) | |
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | cub::SmVersion (int &sm_version, int device_ordinal) |
Retrieves the SM version (major * 100 + minor * 10) | |
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t | cub::SyncStream (cudaStream_t stream) |
template<typename KernelPtr > | |
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | cub::MaxSmOccupancy (int &max_sm_occupancy, KernelPtr kernel_ptr, int block_threads, int dynamic_smem_bytes=0) |
Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer kernel_ptr on the current device with block_threads per thread block. | |
#define _CubLog | ( | format, | |
... | |||
) | printf(format,__VA_ARGS__); |
Log macro for printf statements.
Definition at line 112 of file util_debug.cuh.
#define CubDebug | ( | e | ) | cub::Debug((cudaError_t) (e), __FILE__, __LINE__) |
Debug macro.
Definition at line 94 of file util_debug.cuh.
#define CubDebugExit | ( | e | ) | if (cub::Debug((cudaError_t) (e), __FILE__, __LINE__)) { exit(1); } |
Debug macro with exit.
Definition at line 102 of file util_debug.cuh.
__host__ __device__ __forceinline__ cudaError_t cub::AliasTemporaries | ( | void * | d_temp_storage, |
size_t & | temp_storage_bytes, | ||
void *(&) | allocations[ALLOCATIONS], | ||
size_t(&) | allocation_sizes[ALLOCATIONS] | ||
) |
Alias temporaries to externally-allocated device storage (or simply return the amount of storage needed).
[in] | d_temp_storage | Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done. |
[in,out] | temp_storage_bytes | Size in bytes of \t d_temp_storage allocation |
[in,out] | allocations | Pointers to device allocations needed |
[in] | allocation_sizes | Sizes in bytes of device allocations needed |
Definition at line 62 of file util_device.cuh.
__host__ __device__ __forceinline__ cudaError_t cub::Debug | ( | cudaError_t | error, |
const char * | filename, | ||
int | line | ||
) |
CUB error reporting macro (prints error messages to stderr)
If CUB_STDERR
is defined and error
is not cudaSuccess
, the corresponding error message is printed to stderr
(or stdout
in device code) along with the supplied source context.
Definition at line 68 of file util_debug.cuh.
__global__ void cub::EmptyKernel | ( | void | ) |
Empty kernel for querying PTX manifest metadata (e.g., version) for the current device
Definition at line 110 of file util_device.cuh.
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::MaxSmOccupancy | ( | int & | max_sm_occupancy, |
KernelPtr | kernel_ptr, | ||
int | block_threads, | ||
int | dynamic_smem_bytes = 0 |
||
) |
Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer kernel_ptr
on the current device with block_threads
per thread block.
[out] | max_sm_occupancy | maximum number of thread blocks that can reside on a single SM |
[in] | kernel_ptr | Kernel pointer for which to compute SM occupancy |
[in] | block_threads | Number of threads per thread block |
Definition at line 244 of file util_device.cuh.
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::PtxVersion | ( | int & | ptx_version | ) |
Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10)
Type definition of the EmptyKernel kernel entry point
Force EmptyKernel<void> to be generated if this class is used
Definition at line 118 of file util_device.cuh.
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::SmVersion | ( | int & | sm_version, |
int | device_ordinal | ||
) |
Retrieves the SM version (major * 100 + minor * 10)
Definition at line 165 of file util_device.cuh.
|
static |
Synchronize the stream if specified
Definition at line 199 of file util_device.cuh.