OpenFPM_pdata  4.1.0 Project that contain the implementation of distributed structures
compute_optimal_device_grid.hpp
1 /*
2  * compute_optimal_device_grid.hpp
3  *
4  * Created on: Oct 1, 2017
5  * Author: i-bird
6  */
7
8 #ifndef OPENFPM_DATA_SRC_UTIL_COMPUTE_OPTIMAL_DEVICE_GRID_HPP_
9 #define OPENFPM_DATA_SRC_UTIL_COMPUTE_OPTIMAL_DEVICE_GRID_HPP_
10
11
36 template<unsigned int dim>
37 void calculate_optimal_device_grid(device_grid<dim> & dg,
38  size_t (& sz)[dim],
39  size_t max_block_size,
40  size_t min_block_size)
41 {
42
43  if (dim == 0) {return ;}
44
45  size_t tot_block = 1;
46
47  // Here we calculate the factors for each grid dimension and prioritize the
48  // factors by 2 for the blocks
49
50  // Get the factors for x
51  std::vector<unsigned long int> x;
52  openfpm::math::getFactorization(sz[0],x);
53
55  size_t jx = 0;
56
57  while(jx < x.size() && x[jx] == 2 && tot_block < max_block_size)
58  {
59  if (tot_block * 2 > max_block_size)
60  {break;}
61
63  tot_block *= 2;
64
65  jx++;
66  }
67
68  // if we already reach the maximum block size we finished
69  if (tot_block * 2 > max_block_size)
70  {
73
74  dg.grids.x = 1;
75  for (; jx < x.size() ; jx++)
76  {dg.grids.x *= x[jx];}
77
78  if (dim >= 2)
79  {dg.grids.y = sz[1];}
80  else
81  {dg.grids.y = 1;}
82
83  dg.grids.z = 1;
84  for (size_t k = 2 ; k < dim ; k++)
86  {dg.grids.z *= sz[k];}
87
88  return;
89  }
90
91
92  // Get the factors for y
93  std::vector<unsigned long int> y;
94  size_t jy = 0;
96
97  if (dim >= 2)
98  {
99  openfpm::math::getFactorization(sz[1],y);
100
101  while(jy < y.size() && y[jy] == 2 && tot_block < max_block_size)
102  {
103  if (tot_block * 2 > max_block_size)
104  {break;}
105
107  tot_block *= 2;
108
109  jy++;
110  }
111
112  // if we already reach the maximum block size we finished
113  if (tot_block * 2 > max_block_size)
114  {
116
117  dg.grids.x = 1;
118  for (; jx < x.size() ; jx++)
119  {dg.grids.x *= x[jx];}
120
121  dg.grids.y = 1;
122  for (; jy < y.size() ; jy++)
123  {dg.grids.y *= y[jy];}
124
125  dg.grids.z = 1;
126  for (size_t k = 2 ; k < dim ; k++)
127  {dg.grids.z *= sz[k];}
128
129  return;
130  }
131  }
132
133  // Get the factors for z
134  std::vector<unsigned long int> z;
135
136  size_t jz = 0;
138
139  if (dim >= 3)
140  {
141  openfpm::math::getFactorization(sz[2],z);
142
143  while(jz < z.size() && z[jz] == 2 && tot_block < max_block_size)
144  {
145  if (tot_block * 2 > max_block_size)
146  {break;}
147
149  tot_block *= 2;
150
151  jz++;
152  }
153
154  // if we already reach the maximum block size we finished
155  if (tot_block * 2 > max_block_size)
156  {
157  dg.grids.x = 1;
158  for (; jx < x.size() ; jx++)
159  {dg.grids.x *= x[jx];}
160
161  dg.grids.y = 1;
162  for (; jy < y.size() ; jy++)
163  {dg.grids.y *= y[jy];}
164
165  dg.grids.z = 1;
166  for (; jz < z.size() ; jz++)
167  {dg.grids.z *= z[jz];}
168
169  for (size_t k = 3 ; k < dim ; k++)
171  {dg.grids.z *= sz[k];}
172
173  return;
174  }
175  }
176
177  if (tot_block >= min_block_size)
178  {return;}
179
180  // Calculate the grids from the threads configuration
181
182  dg.grids.x = 1;
183  for (size_t k = jx ; k < x.size() ; k++)
184  {dg.grids.x *= x[k];}
185
186  dg.grids.y = 1;
187  for (size_t k = jy ; k < y.size() ; k++)
188  {dg.grids.y *= y[k];}
189
190  dg.grids.z = 1;
191  for (size_t k = jz ; k < z.size() ; k++)
192  {dg.grids.z *= z[k];}
193
194  std::vector<unsigned long int> * ptr_xyz[3];
195  ptr_xyz[0] = &x;
196  ptr_xyz[1] = &y;
197  ptr_xyz[2] = &z;
198
199  size_t * jj[3];
200  jj[0] = &jx;
201  jj[1] = &jy;
202  jj[2] = &jz;
203
204  while (tot_block < min_block_size && (jx < x.size() || jy < y.size() || jz < z.size() ) )
205  {
206  size_t best_fact = std::numeric_limits<size_t>::max();
207  size_t k_best = 0;
208
209  for (size_t k = 0 ; k < dim ; k++)
210  {
211  if (*jj[k] < ptr_xyz[k]->size() && ptr_xyz[k]->operator[](*jj[k]) < best_fact )
212  {
213  best_fact = ptr_xyz[k]->operator[](*jj[k]);
214  k_best = k;
215  }
216  }
217
218  // The maximum block size cannot be passed
219  if (tot_block * best_fact > max_block_size)
220  {break;}
221
222  if (k_best == 0)
223  {
225  dg.grids.x /= best_fact;
226  }
227  else if (k_best == 1)
228  {
230  dg.grids.y /= best_fact;
231  }
233  else if (k_best == 2)
234  {
236  dg.grids.z /= best_fact;
237  }
238
239  tot_block *= best_fact;
240
241  (*jj[k_best])++;
242  }
243 }
244
245
246 #endif /* OPENFPM_DATA_SRC_UTIL_COMPUTE_OPTIMAL_DEVICE_GRID_HPP_ */
unsigned int x
size in x dimension
Definition: map_grid.hpp:465
dim3_ grids
number of grid for the kernel execution
Definition: map_grid.hpp:481