37#include "../util_namespace.cuh"
38#include "../util_macro.cuh"
73template <
typename OffsetT>
109 normal_share_items(0),
110 normal_base_offset(0),
130 this->total_tiles = (
num_items + tile_items - 1) / tile_items;
131 this->grid_size =
CUB_MIN(total_tiles, max_grid_size);
133 this->big_shares = total_tiles - (avg_tiles_per_block *
grid_size);
134 this->normal_share_items = avg_tiles_per_block * tile_items;
135 this->normal_base_offset = big_shares * tile_items;
136 this->big_share_items = normal_share_items + tile_items;
145 template <
int TILE_ITEMS>
151 if (block_id < big_shares)
157 else if (block_id < total_tiles)
160 block_offset = normal_base_offset + (block_id * normal_share_items);
172 template <
int TILE_ITEMS>
202 template <
int TILE_ITEMS>
209 this->block_stride = TILE_ITEMS;
GridMappingStrategy
cub::GridMappingStrategy enumerates alternative strategies for mapping constant-sized tiles of device...
#define CUB_MIN(a, b)
Select minimum(a, b)
Optional outer namespace(s)
OffsetT OffsetT
[in] Total number of input data items
GridEvenShare is a descriptor utility for distributing input among CUDA thread blocks in an "even-sha...
__host__ __device__ __forceinline__ GridEvenShare()
Constructor.
int grid_size
Grid size in thread blocks.
__device__ __forceinline__ void BlockInit(int block_id, Int2Type< GRID_MAPPING_STRIP_MINE >)
Block-initialization, specialized for a "raking" access pattern in which each thread block is assigne...
OffsetT block_end
OffsetT into input of marking the end (one-past) of the owning thread block's segment of input tiles.
OffsetT block_offset
OffsetT into input marking the beginning of the owning thread block's segment of input tiles.
OffsetT block_stride
Stride between input tiles.
__device__ __forceinline__ void BlockInit()
Block-initialization, specialized for "strip mining" access pattern in which the input tiles assigned...
__device__ __forceinline__ void BlockInit(OffsetT block_offset, OffsetT block_end)
Block-initialization, specialized for a "raking" access pattern in which each thread block is assigne...
OffsetT num_items
Total number of input items.
__device__ __forceinline__ void BlockInit(int block_id, Int2Type< GRID_MAPPING_RAKE >)
Initializes ranges for the specified thread block index. Specialized for a "raking" access pattern in...
__host__ __device__ __forceinline__ void DispatchInit(OffsetT num_items, int max_grid_size, int tile_items)
Dispatch initializer. To be called prior prior to kernel launch.
Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...