OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
util_arch.cuh File Reference
#include "util_namespace.cuh"

Go to the source code of this file.

Namespaces

namespace  cub
 Optional outer namespace(s)
 

Macros

#define CUB_PTX_ARCH   0
 CUB_PTX_ARCH reflects the PTX version targeted by the active compiler pass (or zero during the host pass).
 
#define CUB_RUNTIME_ENABLED
 Whether or not the source targeted by the active compiler pass is allowed to invoke device kernels or methods from the CUDA runtime API.
 
#define CUB_RUNTIME_FUNCTION   __host__ __device__
 
#define CUB_LOG_WARP_THREADS(arch)    (5)
 Number of threads per warp.
 
#define CUB_WARP_THREADS(arch)    (1 << CUB_LOG_WARP_THREADS(arch))
 
#define CUB_PTX_WARP_THREADS   CUB_WARP_THREADS(CUB_PTX_ARCH)
 
#define CUB_PTX_LOG_WARP_THREADS   CUB_LOG_WARP_THREADS(CUB_PTX_ARCH)
 
#define CUB_LOG_SMEM_BANKS(arch)
 Number of smem banks.
 
#define CUB_SMEM_BANKS(arch)    (1 << CUB_LOG_SMEM_BANKS(arch))
 
#define CUB_PTX_LOG_SMEM_BANKS   CUB_LOG_SMEM_BANKS(CUB_PTX_ARCH)
 
#define CUB_PTX_SMEM_BANKS   CUB_SMEM_BANKS(CUB_PTX_ARCH)
 
#define CUB_SUBSCRIPTION_FACTOR(arch)
 Oversubscription factor.
 
#define CUB_PTX_SUBSCRIPTION_FACTOR   CUB_SUBSCRIPTION_FACTOR(CUB_PTX_ARCH)
 
#define CUB_PREFER_CONFLICT_OVER_PADDING(arch)
 Prefer padding overhead vs X-way conflicts greater than this threshold.
 
#define CUB_PTX_PREFER_CONFLICT_OVER_PADDING   CUB_PREFER_CONFLICT_OVER_PADDING(CUB_PTX_ARCH)
 
#define CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH)
 Scale down the number of threads to keep same amount of scratch storage as the nominal configuration for 4B data. Minimum of two warps.
 
#define CUB_SCALED_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH)
 Scale down number of items per thread to keep the same amount of register storage as the nominal configuration for 4B data. Minimum 1 item per thread.
 
#define CUB_SCALED_GRANULARITIES(NOMINAL_4B_BLOCK_THREADS, NOMINAL_4B_ITEMS_PER_THREAD, T)
 Define both nominal threads-per-block and items-per-thread.
 

Detailed Description

Static architectural properties by SM version.

Definition in file util_arch.cuh.

Macro Definition Documentation

◆ CUB_LOG_SMEM_BANKS

#define CUB_LOG_SMEM_BANKS (   arch)
Value:
((arch >= 200) ? \
(5) : \
(4))

Number of smem banks.

Definition at line 85 of file util_arch.cuh.

◆ CUB_LOG_WARP_THREADS

#define CUB_LOG_WARP_THREADS (   arch)     (5)

Number of threads per warp.

Definition at line 73 of file util_arch.cuh.

◆ CUB_PREFER_CONFLICT_OVER_PADDING

#define CUB_PREFER_CONFLICT_OVER_PADDING (   arch)
Value:
((arch >= 300) ? \
(1) : \
(4))

Prefer padding overhead vs X-way conflicts greater than this threshold.

Definition at line 111 of file util_arch.cuh.

◆ CUB_PTX_ARCH

#define CUB_PTX_ARCH   0

CUB_PTX_ARCH reflects the PTX version targeted by the active compiler pass (or zero during the host pass).

Definition at line 53 of file util_arch.cuh.

◆ CUB_PTX_LOG_SMEM_BANKS

#define CUB_PTX_LOG_SMEM_BANKS   CUB_LOG_SMEM_BANKS(CUB_PTX_ARCH)

Definition at line 92 of file util_arch.cuh.

◆ CUB_PTX_LOG_WARP_THREADS

#define CUB_PTX_LOG_WARP_THREADS   CUB_LOG_WARP_THREADS(CUB_PTX_ARCH)

Definition at line 79 of file util_arch.cuh.

◆ CUB_PTX_PREFER_CONFLICT_OVER_PADDING

#define CUB_PTX_PREFER_CONFLICT_OVER_PADDING   CUB_PREFER_CONFLICT_OVER_PADDING(CUB_PTX_ARCH)

Definition at line 115 of file util_arch.cuh.

◆ CUB_PTX_SMEM_BANKS

#define CUB_PTX_SMEM_BANKS   CUB_SMEM_BANKS(CUB_PTX_ARCH)

Definition at line 93 of file util_arch.cuh.

◆ CUB_PTX_SUBSCRIPTION_FACTOR

#define CUB_PTX_SUBSCRIPTION_FACTOR   CUB_SUBSCRIPTION_FACTOR(CUB_PTX_ARCH)

Definition at line 105 of file util_arch.cuh.

◆ CUB_PTX_WARP_THREADS

#define CUB_PTX_WARP_THREADS   CUB_WARP_THREADS(CUB_PTX_ARCH)

Definition at line 78 of file util_arch.cuh.

◆ CUB_RUNTIME_ENABLED

#define CUB_RUNTIME_ENABLED

Whether or not the source targeted by the active compiler pass is allowed to invoke device kernels or methods from the CUDA runtime API.

Definition at line 63 of file util_arch.cuh.

◆ CUB_RUNTIME_FUNCTION

#define CUB_RUNTIME_FUNCTION   __host__ __device__

Definition at line 64 of file util_arch.cuh.

◆ CUB_SCALED_BLOCK_THREADS

#define CUB_SCALED_BLOCK_THREADS (   NOMINAL_4B_BLOCK_THREADS,
  T,
  PTX_ARCH 
)
Value:
(CUB_MIN( \
NOMINAL_4B_BLOCK_THREADS, \
CUB_WARP_THREADS(PTX_ARCH) * CUB_MAX( \
2, \
(NOMINAL_4B_BLOCK_THREADS / CUB_WARP_THREADS(PTX_ARCH)) * 4 / sizeof(T))))
#define CUB_MAX(a, b)
Select maximum(a, b)
#define CUB_MIN(a, b)
Select minimum(a, b)

Scale down the number of threads to keep same amount of scratch storage as the nominal configuration for 4B data. Minimum of two warps.

Definition at line 121 of file util_arch.cuh.

◆ CUB_SCALED_GRANULARITIES

#define CUB_SCALED_GRANULARITIES (   NOMINAL_4B_BLOCK_THREADS,
  NOMINAL_4B_ITEMS_PER_THREAD,
 
)
Value:
CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, 200), \
CUB_SCALED_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, 200)
#define CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH)
Scale down the number of threads to keep same amount of scratch storage as the nominal configuration ...

Define both nominal threads-per-block and items-per-thread.

Definition at line 141 of file util_arch.cuh.

◆ CUB_SCALED_ITEMS_PER_THREAD

#define CUB_SCALED_ITEMS_PER_THREAD (   NOMINAL_4B_ITEMS_PER_THREAD,
  NOMINAL_4B_BLOCK_THREADS,
  T,
  PTX_ARCH 
)
Value:
1, \
(sizeof(T) < 4) ? \
((NOMINAL_4B_ITEMS_PER_THREAD * NOMINAL_4B_BLOCK_THREADS * 4) / CUB_MAX(4, sizeof(T))) / CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) / 2 : \
((NOMINAL_4B_ITEMS_PER_THREAD * NOMINAL_4B_BLOCK_THREADS * 4) / CUB_MAX(4, sizeof(T))) / CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH))

Scale down number of items per thread to keep the same amount of register storage as the nominal configuration for 4B data. Minimum 1 item per thread.

Definition at line 131 of file util_arch.cuh.

◆ CUB_SMEM_BANKS

#define CUB_SMEM_BANKS (   arch)     (1 << CUB_LOG_SMEM_BANKS(arch))

Definition at line 89 of file util_arch.cuh.

◆ CUB_SUBSCRIPTION_FACTOR

#define CUB_SUBSCRIPTION_FACTOR (   arch)
Value:
((arch >= 300) ? \
(5) : \
((arch >= 200) ? \
(3) : \
(10)))

Oversubscription factor.

Definition at line 99 of file util_arch.cuh.

◆ CUB_WARP_THREADS

#define CUB_WARP_THREADS (   arch)     (1 << CUB_LOG_WARP_THREADS(arch))

Definition at line 75 of file util_arch.cuh.