OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
grid_even_share.cuh
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
35 #pragma once
36 
37 #include "../util_namespace.cuh"
38 #include "../util_macro.cuh"
39 #include "grid_mapping.cuh"
40 
42 CUB_NS_PREFIX
43 
45 namespace cub {
46 
47 
73 template <typename OffsetT>
75 {
76 private:
77 
78  OffsetT total_tiles;
79  int big_shares;
80  OffsetT big_share_items;
81  OffsetT normal_share_items;
82  OffsetT normal_base_offset;
83 
84 public:
85 
88 
90  int grid_size;
91 
94 
97 
100 
101 
105  __host__ __device__ __forceinline__ GridEvenShare() :
106  total_tiles(0),
107  big_shares(0),
108  big_share_items(0),
109  normal_share_items(0),
110  normal_base_offset(0),
111  num_items(0),
112  grid_size(0),
113  block_offset(0),
114  block_end(0),
115  block_stride(0)
116  {}
117 
118 
122  __host__ __device__ __forceinline__ void DispatchInit(
124  int max_grid_size,
125  int tile_items)
126  {
127  this->block_offset = num_items; // Initialize past-the-end
128  this->block_end = num_items; // Initialize past-the-end
129  this->num_items = num_items;
130  this->total_tiles = (num_items + tile_items - 1) / tile_items;
131  this->grid_size = CUB_MIN(total_tiles, max_grid_size);
132  OffsetT avg_tiles_per_block = total_tiles / grid_size;
133  this->big_shares = total_tiles - (avg_tiles_per_block * grid_size); // leftover grains go to big blocks
134  this->normal_share_items = avg_tiles_per_block * tile_items;
135  this->normal_base_offset = big_shares * tile_items;
136  this->big_share_items = normal_share_items + tile_items;
137  }
138 
139 
145  template <int TILE_ITEMS>
146  __device__ __forceinline__ void BlockInit(
147  int block_id,
148  Int2Type<GRID_MAPPING_RAKE> /*strategy_tag*/)
149  {
150  block_stride = TILE_ITEMS;
151  if (block_id < big_shares)
152  {
153  // This thread block gets a big share of grains (avg_tiles_per_block + 1)
154  block_offset = (block_id * big_share_items);
155  block_end = block_offset + big_share_items;
156  }
157  else if (block_id < total_tiles)
158  {
159  // This thread block gets a normal share of grains (avg_tiles_per_block)
160  block_offset = normal_base_offset + (block_id * normal_share_items);
161  block_end = CUB_MIN(num_items, block_offset + normal_share_items);
162  }
163  // Else default past-the-end
164  }
165 
166 
172  template <int TILE_ITEMS>
173  __device__ __forceinline__ void BlockInit(
174  int block_id,
175  Int2Type<GRID_MAPPING_STRIP_MINE> /*strategy_tag*/)
176  {
177  block_stride = grid_size * TILE_ITEMS;
178  block_offset = (block_id * TILE_ITEMS);
180  }
181 
182 
188  template <
189  int TILE_ITEMS,
190  GridMappingStrategy STRATEGY>
191  __device__ __forceinline__ void BlockInit()
192  {
193  BlockInit<TILE_ITEMS>(blockIdx.x, Int2Type<STRATEGY>());
194  }
195 
196 
202  template <int TILE_ITEMS>
203  __device__ __forceinline__ void BlockInit(
206  {
207  this->block_offset = block_offset;
208  this->block_end = block_end;
209  this->block_stride = TILE_ITEMS;
210  }
211 
212 
213 };
214 
215 
216 
217 
218  // end group GridModule
220 
221 } // CUB namespace
222 CUB_NS_POSTFIX // Optional outer namespace(s)
OffsetT block_stride
Stride between input tiles.
int grid_size
Grid size in thread blocks.
__device__ __forceinline__ void BlockInit(int block_id, Int2Type< GRID_MAPPING_STRIP_MINE >)
Block-initialization, specialized for a "raking" access pattern in which each thread block is assigne...
Optional outer namespace(s)
OffsetT num_items
Total number of input items.
GridEvenShare is a descriptor utility for distributing input among CUDA thread blocks in an "even-sha...
OffsetT block_end
OffsetT into input of marking the end (one-past) of the owning thread block's segment of input tiles.
__device__ __forceinline__ void BlockInit(int block_id, Int2Type< GRID_MAPPING_RAKE >)
Initializes ranges for the specified thread block index. Specialized for a "raking" access pattern in...
OffsetT OffsetT
[in] Total number of input data items
__device__ __forceinline__ void BlockInit()
Block-initialization, specialized for "strip mining" access pattern in which the input tiles assigned...
__host__ __device__ __forceinline__ void DispatchInit(OffsetT num_items, int max_grid_size, int tile_items)
Dispatch initializer. To be called prior prior to kernel launch.
Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...
Definition: util_type.cuh:275
__host__ __device__ __forceinline__ GridEvenShare()
Constructor.
#define CUB_MIN(a, b)
Select minimum(a, b)
Definition: util_macro.cuh:66
GridMappingStrategy
cub::GridMappingStrategy enumerates alternative strategies for mapping constant-sized tiles of device...
OffsetT block_offset
OffsetT into input marking the beginning of the owning thread block's segment of input tiles.
__device__ __forceinline__ void BlockInit(OffsetT block_offset, OffsetT block_end)
Block-initialization, specialized for a "raking" access pattern in which each thread block is assigne...