37#include "../util_macro.cuh"
38#include "../util_arch.cuh"
39#include "../util_type.cuh"
40#include "../util_namespace.cuh"
123 unsigned int linear_tid)
126 unsigned int offset = linear_tid;
135 return temp_storage.Alias().buff + offset;
144 unsigned int linear_tid)
#define CUB_MIN(a, b)
Select minimum(a, b)
Optional outer namespace(s)
Alias wrapper allowing storage to be unioned.
BlockRakingLayout provides a conflict-free shared memory layout abstraction for 1D raking across thre...
struct __align__(16) _TempStorage
Shared memory storage type.
@ USE_SEGMENT_PADDING
Pad each segment length with one element if segment length is not relatively prime to warp size and c...
@ UNGUARDED
Whether or not we need bounds checking during raking (the number of reduction elements is not a multi...
@ HAS_CONFLICTS
Whether we will have bank conflicts (technically we should find out if the GCD is > 1)
@ MAX_RAKING_THREADS
Maximum number of warp-synchronous raking threads.
@ SHARED_ELEMENTS
The total number of elements that need to be cooperatively reduced.
@ SEGMENT_LENGTH
Number of raking elements per warp-synchronous raking thread (rounded up)
@ CONFLICT_DEGREE
Degree of bank conflicts (e.g., 4-way)
@ RAKING_THREADS
Never use a raking thread that will have no valid data (e.g., when BLOCK_THREADS is 62 and SEGMENT_LE...
@ GRID_ELEMENTS
Total number of elements in the raking grid.
static __device__ __forceinline__ T * PlacementPtr(TempStorage &temp_storage, unsigned int linear_tid)
Returns the location for the calling thread to place data into the grid.
static __device__ __forceinline__ T * RakingPtr(TempStorage &temp_storage, unsigned int linear_tid)
Returns the location for the calling thread to begin sequential raking.
A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions.
#define CUB_PTX_ARCH
CUB_PTX_ARCH reflects the PTX version targeted by the active compiler pass (or zero during the host p...