Data Structures
struct	cub::CachingDeviceAllocator
	A simple caching allocator for device memory allocations. More...

struct	cub::KernelConfig

struct	cub::ChainedPolicy< PTX_VERSION, PolicyT, PrevPolicyT >
	Helper for dispatching into a policy chain. More...

struct	cub::ChainedPolicy< PTX_VERSION, PolicyT, PolicyT >
	Helper for dispatching into a policy chain (end-of-chain specialization) More...

Macros
#define	CubDebug(e) cub::Debug((cudaError_t) (e), __FILE__, __LINE__)
	Debug macro.

#define	CubDebugExit(e) if (cub::Debug((cudaError_t) (e), __FILE__, __LINE__)) { exit(1); }
	Debug macro with exit.

#define	_CubLog(format, ...) printf(format,__VA_ARGS__);
	Log macro for printf statements.

Functions
__host__ __device__ __forceinline__ cudaError_t	cub::Debug (cudaError_t error, const char *filename, int line)
	CUB error reporting macro (prints error messages to stderr)

template<int ALLOCATIONS>
__host__ __device__ __forceinline__ cudaError_t	cub::AliasTemporaries (void d_temp_storage, size_t &temp_storage_bytes, void (&allocations)[ALLOCATIONS], size_t(&allocation_sizes)[ALLOCATIONS])

template<typename T >
__global__ void	cub::EmptyKernel (void)

CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t	cub::PtxVersion (int &ptx_version)
	Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10)

CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t	cub::SmVersion (int &sm_version, int device_ordinal)
	Retrieves the SM version (major * 100 + minor * 10)

CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t	cub::SyncStream (cudaStream_t stream)

template<typename KernelPtr >
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t	cub::MaxSmOccupancy (int &max_sm_occupancy, KernelPtr kernel_ptr, int block_threads, int dynamic_smem_bytes=0)
	Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer `kernel_ptr` on the current device with `block_threads` per thread block.

Detailed Description

Macro Definition Documentation

◆ _CubLog

#define _CubLog	(	format,
		...
	)	printf(format,__VA_ARGS__);

Log macro for printf statements.

Definition at line 112 of file util_debug.cuh.

◆ CubDebug

#define CubDebug ( e ) cub::Debug((cudaError_t) (e), __FILE__, __LINE__)

Debug macro.

Definition at line 94 of file util_debug.cuh.

◆ CubDebugExit

#define CubDebugExit ( e ) if (cub::Debug((cudaError_t) (e), __FILE__, __LINE__)) { exit(1); }

Debug macro with exit.

Definition at line 102 of file util_debug.cuh.

Function Documentation

◆ AliasTemporaries()

template<int ALLOCATIONS>

__host__ __device__ __forceinline__ cudaError_t cub::AliasTemporaries	(	void *	d_temp_storage,
		size_t &	temp_storage_bytes,
		void *(&)	allocations[ALLOCATIONS],
		size_t(&)	allocation_sizes[ALLOCATIONS]
	)

Alias temporaries to externally-allocated device storage (or simply return the amount of storage needed).

Parameters

[in]	d_temp_storage	Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to `temp_storage_bytes` and no work is done.
[in,out]	temp_storage_bytes	Size in bytes of \t d_temp_storage allocation
[in,out]	allocations	Pointers to device allocations needed
[in]	allocation_sizes	Sizes in bytes of device allocations needed

Definition at line 62 of file util_device.cuh.

◆ Debug()

__host__ __device__ __forceinline__ cudaError_t cub::Debug	(	cudaError_t	error,
		const char *	filename,
		int	line
	)

CUB error reporting macro (prints error messages to stderr)

If CUB_STDERR is defined and error is not cudaSuccess, the corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context.

Returns: The CUDA error.

Definition at line 68 of file util_debug.cuh.

◆ EmptyKernel()

template<typename T >

__global__ void cub::EmptyKernel ( void )

Empty kernel for querying PTX manifest metadata (e.g., version) for the current device

Definition at line 110 of file util_device.cuh.

◆ MaxSmOccupancy()

template<typename KernelPtr >

CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::MaxSmOccupancy	(	int &	max_sm_occupancy,
		KernelPtr	kernel_ptr,
		int	block_threads,
		int	dynamic_smem_bytes = `0`
	)

Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer kernel_ptr on the current device with block_threads per thread block.

Snippet: The code snippet below illustrates the use of the MaxSmOccupancy function.

: #include <cub/cub.cuh> // or equivalently <cub/util_device.cuh>

template <typename T>

__global__ void ExampleKernel()

{

// Allocate shared memory for BlockScan

__shared__ volatile T buffer[4096];

...

}

...

// Determine SM occupancy for ExampleKernel specialized for unsigned char

int max_sm_occupancy;

MaxSmOccupancy(max_sm_occupancy, ExampleKernel<unsigned char>, 64);

// max_sm_occupancy <-- 4 on SM10

// max_sm_occupancy <-- 8 on SM20

// max_sm_occupancy <-- 12 on SM35

cub::MaxSmOccupancy
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t MaxSmOccupancy(int &max_sm_occupancy, KernelPtr kernel_ptr, int block_threads, int dynamic_smem_bytes=0)
Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer kernel...
Definition util_device.cuh:244

Parameters

[out]	max_sm_occupancy	maximum number of thread blocks that can reside on a single SM
[in]	kernel_ptr	Kernel pointer for which to compute SM occupancy
[in]	block_threads	Number of threads per thread block

Definition at line 244 of file util_device.cuh.

◆ PtxVersion()

CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::PtxVersion ( int & ptx_version )

Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10)

Type definition of the EmptyKernel kernel entry point

Force EmptyKernel<void> to be generated if this class is used

Definition at line 118 of file util_device.cuh.

◆ SmVersion()

CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::SmVersion	(	int &	sm_version,
		int	device_ordinal
	)

Retrieves the SM version (major * 100 + minor * 10)

Definition at line 165 of file util_device.cuh.

◆ SyncStream()

CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t cub::SyncStream ( cudaStream_t stream )

static

Synchronize the stream if specified

Definition at line 199 of file util_device.cuh.

Data Structures

Macros

Functions