44 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document 46 #if (__CUDACC_VER_MAJOR__ >= 9) && !defined(CUB_USE_COOPERATIVE_GROUPS) 47 #define CUB_USE_COOPERATIVE_GROUPS 53 #define CUB_PTX_ARCH 0 55 #define CUB_PTX_ARCH __CUDA_ARCH__ 61 #ifndef CUB_RUNTIME_FUNCTION 62 #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__>= 350 && defined(__CUDACC_RDC__)) 63 #define CUB_RUNTIME_ENABLED 64 #define CUB_RUNTIME_FUNCTION __host__ __device__ 66 #define CUB_RUNTIME_FUNCTION __host__ 72 #ifndef CUB_LOG_WARP_THREADS 73 #define CUB_LOG_WARP_THREADS(arch) \ 75 #define CUB_WARP_THREADS(arch) \ 76 (1 << CUB_LOG_WARP_THREADS(arch)) 78 #define CUB_PTX_WARP_THREADS CUB_WARP_THREADS(CUB_PTX_ARCH) 79 #define CUB_PTX_LOG_WARP_THREADS CUB_LOG_WARP_THREADS(CUB_PTX_ARCH) 84 #ifndef CUB_LOG_SMEM_BANKS 85 #define CUB_LOG_SMEM_BANKS(arch) \ 89 #define CUB_SMEM_BANKS(arch) \ 90 (1 << CUB_LOG_SMEM_BANKS(arch)) 92 #define CUB_PTX_LOG_SMEM_BANKS CUB_LOG_SMEM_BANKS(CUB_PTX_ARCH) 93 #define CUB_PTX_SMEM_BANKS CUB_SMEM_BANKS(CUB_PTX_ARCH) 98 #ifndef CUB_SUBSCRIPTION_FACTOR 99 #define CUB_SUBSCRIPTION_FACTOR(arch) \ 105 #define CUB_PTX_SUBSCRIPTION_FACTOR CUB_SUBSCRIPTION_FACTOR(CUB_PTX_ARCH) 110 #ifndef CUB_PREFER_CONFLICT_OVER_PADDING 111 #define CUB_PREFER_CONFLICT_OVER_PADDING(arch) \ 115 #define CUB_PTX_PREFER_CONFLICT_OVER_PADDING CUB_PREFER_CONFLICT_OVER_PADDING(CUB_PTX_ARCH) 120 #ifndef CUB_SCALED_BLOCK_THREADS 121 #define CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \ 123 NOMINAL_4B_BLOCK_THREADS, \ 124 CUB_WARP_THREADS(PTX_ARCH) * CUB_MAX( \ 126 (NOMINAL_4B_BLOCK_THREADS / CUB_WARP_THREADS(PTX_ARCH)) * 4 / sizeof(T)))) 130 #ifndef CUB_SCALED_ITEMS_PER_THREAD 131 #define CUB_SCALED_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \ 135 ((NOMINAL_4B_ITEMS_PER_THREAD * NOMINAL_4B_BLOCK_THREADS * 4) / CUB_MAX(4, sizeof(T))) / CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) / 2 : \ 136 ((NOMINAL_4B_ITEMS_PER_THREAD * NOMINAL_4B_BLOCK_THREADS * 4) / CUB_MAX(4, sizeof(T))) / CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH)) 140 #ifndef CUB_SCALED_GRANULARITIES 141 #define CUB_SCALED_GRANULARITIES(NOMINAL_4B_BLOCK_THREADS, NOMINAL_4B_ITEMS_PER_THREAD, T) \ 142 CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, 200), \ 143 CUB_SCALED_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, 200) 148 #endif // Do not document Optional outer namespace(s)