#include "util_namespace.cuh"
Go to the source code of this file.
Namespaces | |
namespace | cub |
Optional outer namespace(s) | |
Macros | |
#define | CUB_PTX_ARCH 0 |
CUB_PTX_ARCH reflects the PTX version targeted by the active compiler pass (or zero during the host pass). | |
#define | CUB_RUNTIME_ENABLED |
Whether or not the source targeted by the active compiler pass is allowed to invoke device kernels or methods from the CUDA runtime API. | |
#define | CUB_RUNTIME_FUNCTION __host__ __device__ |
#define | CUB_LOG_WARP_THREADS(arch) (5) |
Number of threads per warp. | |
#define | CUB_WARP_THREADS(arch) (1 << CUB_LOG_WARP_THREADS(arch)) |
#define | CUB_PTX_WARP_THREADS CUB_WARP_THREADS(CUB_PTX_ARCH) |
#define | CUB_PTX_LOG_WARP_THREADS CUB_LOG_WARP_THREADS(CUB_PTX_ARCH) |
#define | CUB_LOG_SMEM_BANKS(arch) |
Number of smem banks. | |
#define | CUB_SMEM_BANKS(arch) (1 << CUB_LOG_SMEM_BANKS(arch)) |
#define | CUB_PTX_LOG_SMEM_BANKS CUB_LOG_SMEM_BANKS(CUB_PTX_ARCH) |
#define | CUB_PTX_SMEM_BANKS CUB_SMEM_BANKS(CUB_PTX_ARCH) |
#define | CUB_SUBSCRIPTION_FACTOR(arch) |
Oversubscription factor. | |
#define | CUB_PTX_SUBSCRIPTION_FACTOR CUB_SUBSCRIPTION_FACTOR(CUB_PTX_ARCH) |
#define | CUB_PREFER_CONFLICT_OVER_PADDING(arch) |
Prefer padding overhead vs X-way conflicts greater than this threshold. | |
#define | CUB_PTX_PREFER_CONFLICT_OVER_PADDING CUB_PREFER_CONFLICT_OVER_PADDING(CUB_PTX_ARCH) |
#define | CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) |
Scale down the number of threads to keep same amount of scratch storage as the nominal configuration for 4B data. Minimum of two warps. | |
#define | CUB_SCALED_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) |
Scale down number of items per thread to keep the same amount of register storage as the nominal configuration for 4B data. Minimum 1 item per thread. | |
#define | CUB_SCALED_GRANULARITIES(NOMINAL_4B_BLOCK_THREADS, NOMINAL_4B_ITEMS_PER_THREAD, T) |
Define both nominal threads-per-block and items-per-thread. | |
Static architectural properties by SM version.
Definition in file util_arch.cuh.
#define CUB_LOG_SMEM_BANKS | ( | arch | ) |
Number of smem banks.
Definition at line 85 of file util_arch.cuh.
#define CUB_LOG_WARP_THREADS | ( | arch | ) | (5) |
Number of threads per warp.
Definition at line 73 of file util_arch.cuh.
#define CUB_PREFER_CONFLICT_OVER_PADDING | ( | arch | ) |
Prefer padding overhead vs X-way conflicts greater than this threshold.
Definition at line 111 of file util_arch.cuh.
#define CUB_PTX_ARCH 0 |
CUB_PTX_ARCH reflects the PTX version targeted by the active compiler pass (or zero during the host pass).
Definition at line 53 of file util_arch.cuh.
#define CUB_PTX_LOG_SMEM_BANKS CUB_LOG_SMEM_BANKS(CUB_PTX_ARCH) |
Definition at line 92 of file util_arch.cuh.
#define CUB_PTX_LOG_WARP_THREADS CUB_LOG_WARP_THREADS(CUB_PTX_ARCH) |
Definition at line 79 of file util_arch.cuh.
#define CUB_PTX_PREFER_CONFLICT_OVER_PADDING CUB_PREFER_CONFLICT_OVER_PADDING(CUB_PTX_ARCH) |
Definition at line 115 of file util_arch.cuh.
#define CUB_PTX_SMEM_BANKS CUB_SMEM_BANKS(CUB_PTX_ARCH) |
Definition at line 93 of file util_arch.cuh.
#define CUB_PTX_SUBSCRIPTION_FACTOR CUB_SUBSCRIPTION_FACTOR(CUB_PTX_ARCH) |
Definition at line 105 of file util_arch.cuh.
#define CUB_PTX_WARP_THREADS CUB_WARP_THREADS(CUB_PTX_ARCH) |
Definition at line 78 of file util_arch.cuh.
#define CUB_RUNTIME_ENABLED |
Whether or not the source targeted by the active compiler pass is allowed to invoke device kernels or methods from the CUDA runtime API.
Definition at line 63 of file util_arch.cuh.
#define CUB_RUNTIME_FUNCTION __host__ __device__ |
Definition at line 64 of file util_arch.cuh.
#define CUB_SCALED_BLOCK_THREADS | ( | NOMINAL_4B_BLOCK_THREADS, | |
T, | |||
PTX_ARCH | |||
) |
Scale down the number of threads to keep same amount of scratch storage as the nominal configuration for 4B data. Minimum of two warps.
Definition at line 121 of file util_arch.cuh.
#define CUB_SCALED_GRANULARITIES | ( | NOMINAL_4B_BLOCK_THREADS, | |
NOMINAL_4B_ITEMS_PER_THREAD, | |||
T | |||
) |
Define both nominal threads-per-block and items-per-thread.
Definition at line 141 of file util_arch.cuh.
#define CUB_SCALED_ITEMS_PER_THREAD | ( | NOMINAL_4B_ITEMS_PER_THREAD, | |
NOMINAL_4B_BLOCK_THREADS, | |||
T, | |||
PTX_ARCH | |||
) |
Scale down number of items per thread to keep the same amount of register storage as the nominal configuration for 4B data. Minimum 1 item per thread.
Definition at line 131 of file util_arch.cuh.
#define CUB_SMEM_BANKS | ( | arch | ) | (1 << CUB_LOG_SMEM_BANKS(arch)) |
Definition at line 89 of file util_arch.cuh.
#define CUB_SUBSCRIPTION_FACTOR | ( | arch | ) |
Oversubscription factor.
Definition at line 99 of file util_arch.cuh.
#define CUB_WARP_THREADS | ( | arch | ) | (1 << CUB_LOG_WARP_THREADS(arch)) |
Definition at line 75 of file util_arch.cuh.