OpenFPM_pdata  3.0.0
Project that contain the implementation of distributed structures
UtilMgmt

Data Structures

struct  cub::CachingDeviceAllocator
 A simple caching allocator for device memory allocations. More...
 
struct  cub::KernelConfig
 
struct  cub::ChainedPolicy< PTX_VERSION, PolicyT, PrevPolicyT >
 Helper for dispatching into a policy chain. More...
 
struct  cub::ChainedPolicy< PTX_VERSION, PolicyT, PolicyT >
 Helper for dispatching into a policy chain (end-of-chain specialization) More...
 

Macros

#define CubDebug(e)   cub::Debug((cudaError_t) (e), __FILE__, __LINE__)
 Debug macro.
 
#define CubDebugExit(e)   if (cub::Debug((cudaError_t) (e), __FILE__, __LINE__)) { exit(1); }
 Debug macro with exit.
 
#define _CubLog(format, ...)   printf(format,__VA_ARGS__);
 Log macro for printf statements.
 

Functions

__host__ __device__ __forceinline__ cudaError_t cub::Debug (cudaError_t error, const char *filename, int line)
 CUB error reporting macro (prints error messages to stderr) More...
 
template<int ALLOCATIONS>
__host__ __device__ __forceinline__ cudaError_t cub::AliasTemporaries (void *d_temp_storage, size_t &temp_storage_bytes, void *(&allocations)[ALLOCATIONS], size_t(&allocation_sizes)[ALLOCATIONS])
 
template<typename T >
__global__ void cub::EmptyKernel (void)
 
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::PtxVersion (int &ptx_version)
 Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10) More...
 
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::SmVersion (int &sm_version, int device_ordinal)
 Retrieves the SM version (major * 100 + minor * 10)
 
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t cub::SyncStream (cudaStream_t stream)
 
template<typename KernelPtr >
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::MaxSmOccupancy (int &max_sm_occupancy, KernelPtr kernel_ptr, int block_threads, int dynamic_smem_bytes=0)
 Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer kernel_ptr on the current device with block_threads per thread block. More...
 

Detailed Description

Function Documentation

◆ AliasTemporaries()

template<int ALLOCATIONS>
__host__ __device__ __forceinline__ cudaError_t cub::AliasTemporaries ( void *  d_temp_storage,
size_t &  temp_storage_bytes,
void *(&)  allocations[ALLOCATIONS],
size_t(&)  allocation_sizes[ALLOCATIONS] 
)

Alias temporaries to externally-allocated device storage (or simply return the amount of storage needed).

Parameters
[in]d_temp_storageDevice-accessible allocation of temporary storage. When NULL, the required allocation size is written to temp_storage_bytes and no work is done.
[in,out]temp_storage_bytesSize in bytes of \t d_temp_storage allocation
[in,out]allocationsPointers to device allocations needed
[in]allocation_sizesSizes in bytes of device allocations needed

Definition at line 62 of file util_device.cuh.

◆ Debug()

__host__ __device__ __forceinline__ cudaError_t cub::Debug ( cudaError_t  error,
const char *  filename,
int  line 
)

CUB error reporting macro (prints error messages to stderr)

If CUB_STDERR is defined and error is not cudaSuccess, the corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context.

Returns
The CUDA error.

Definition at line 68 of file util_debug.cuh.

◆ EmptyKernel()

template<typename T >
__global__ void cub::EmptyKernel ( void  )

Empty kernel for querying PTX manifest metadata (e.g., version) for the current device

Definition at line 110 of file util_device.cuh.

◆ MaxSmOccupancy()

template<typename KernelPtr >
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::MaxSmOccupancy ( int max_sm_occupancy,
KernelPtr  kernel_ptr,
int  block_threads,
int  dynamic_smem_bytes = 0 
)

Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer kernel_ptr on the current device with block_threads per thread block.

Snippet
The code snippet below illustrates the use of the MaxSmOccupancy function.
#include <cub/cub.cuh> // or equivalently <cub/util_device.cuh>
template <typename T>
__global__ void ExampleKernel()
{
// Allocate shared memory for BlockScan
__shared__ volatile T buffer[4096];
...
}
...
// Determine SM occupancy for ExampleKernel specialized for unsigned char
int max_sm_occupancy;
MaxSmOccupancy(max_sm_occupancy, ExampleKernel<unsigned char>, 64);
// max_sm_occupancy <-- 4 on SM10
// max_sm_occupancy <-- 8 on SM20
// max_sm_occupancy <-- 12 on SM35
Parameters
[out]max_sm_occupancymaximum number of thread blocks that can reside on a single SM
[in]kernel_ptrKernel pointer for which to compute SM occupancy
[in]block_threadsNumber of threads per thread block

Definition at line 244 of file util_device.cuh.

◆ PtxVersion()

CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::PtxVersion ( int ptx_version)

Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10)

Type definition of the EmptyKernel kernel entry point

Force EmptyKernel<void> to be generated if this class is used

Definition at line 118 of file util_device.cuh.

◆ SyncStream()

CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t cub::SyncStream ( cudaStream_t  stream)
static

Synchronize the stream if specified

Definition at line 199 of file util_device.cuh.