OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
compute_optimal_device_grid.hpp
1 /*
2  * compute_optimal_device_grid.hpp
3  *
4  * Created on: Oct 1, 2017
5  * Author: i-bird
6  */
7 
8 #ifndef OPENFPM_DATA_SRC_UTIL_COMPUTE_OPTIMAL_DEVICE_GRID_HPP_
9 #define OPENFPM_DATA_SRC_UTIL_COMPUTE_OPTIMAL_DEVICE_GRID_HPP_
10 
11 
36 template<unsigned int dim>
37 void calculate_optimal_device_grid(device_grid<dim> & dg,
38  size_t (& sz)[dim],
39  size_t max_block_size,
40  size_t min_block_size)
41 {
42 
43  if (dim == 0) {return ;}
44 
45  size_t tot_block = 1;
46 
47  // Here we calculate the factors for each grid dimension and prioritize the
48  // factors by 2 for the blocks
49 
50  // Get the factors for x
51  std::vector<unsigned long int> x;
52  openfpm::math::getFactorization(sz[0],x);
53 
54  dg.threads.x = 1;
55  size_t jx = 0;
56 
57  while(jx < x.size() && x[jx] == 2 && tot_block < max_block_size)
58  {
59  if (tot_block * 2 > max_block_size)
60  {break;}
61 
62  dg.threads.x *= 2;
63  tot_block *= 2;
64 
65  jx++;
66  }
67 
68  // if we already reach the maximum block size we finished
69  if (tot_block * 2 > max_block_size)
70  {
71  dg.threads.y = 1;
72  dg.threads.z = 1;
73 
74  dg.grids.x = 1;
75  for (; jx < x.size() ; jx++)
76  {dg.grids.x *= x[jx];}
77 
78  if (dim >= 2)
79  {dg.grids.y = sz[1];}
80  else
81  {dg.grids.y = 1;}
82 
83  dg.grids.z = 1;
84  for (size_t k = 2 ; k < dim ; k++)
85  // coverty[dead_error_line]
86  {dg.grids.z *= sz[k];}
87 
88  return;
89  }
90 
91 
92  // Get the factors for y
93  std::vector<unsigned long int> y;
94  size_t jy = 0;
95  dg.threads.y = 1;
96 
97  if (dim >= 2)
98  {
99  openfpm::math::getFactorization(sz[1],y);
100 
101  while(jy < y.size() && y[jy] == 2 && tot_block < max_block_size)
102  {
103  if (tot_block * 2 > max_block_size)
104  {break;}
105 
106  dg.threads.y *= 2;
107  tot_block *= 2;
108 
109  jy++;
110  }
111 
112  // if we already reach the maximum block size we finished
113  if (tot_block * 2 > max_block_size)
114  {
115  dg.threads.z = 1;
116 
117  dg.grids.x = 1;
118  for (; jx < x.size() ; jx++)
119  {dg.grids.x *= x[jx];}
120 
121  dg.grids.y = 1;
122  for (; jy < y.size() ; jy++)
123  {dg.grids.y *= y[jy];}
124 
125  dg.grids.z = 1;
126  for (size_t k = 2 ; k < dim ; k++)
127  {dg.grids.z *= sz[k];}
128 
129  return;
130  }
131  }
132 
133  // Get the factors for z
134  std::vector<unsigned long int> z;
135 
136  size_t jz = 0;
137  dg.threads.z = 1;
138 
139  if (dim >= 3)
140  {
141  openfpm::math::getFactorization(sz[2],z);
142 
143  while(jz < z.size() && z[jz] == 2 && tot_block < max_block_size)
144  {
145  if (tot_block * 2 > max_block_size)
146  {break;}
147 
148  dg.threads.z *= 2;
149  tot_block *= 2;
150 
151  jz++;
152  }
153 
154  // if we already reach the maximum block size we finished
155  if (tot_block * 2 > max_block_size)
156  {
157  dg.grids.x = 1;
158  for (; jx < x.size() ; jx++)
159  {dg.grids.x *= x[jx];}
160 
161  dg.grids.y = 1;
162  for (; jy < y.size() ; jy++)
163  {dg.grids.y *= y[jy];}
164 
165  dg.grids.z = 1;
166  for (; jz < z.size() ; jz++)
167  {dg.grids.z *= z[jz];}
168 
169  for (size_t k = 3 ; k < dim ; k++)
170  // coverty[dead_error_line]
171  {dg.grids.z *= sz[k];}
172 
173  return;
174  }
175  }
176 
177  if (tot_block >= min_block_size)
178  {return;}
179 
180  // Calculate the grids from the threads configuration
181 
182  dg.grids.x = 1;
183  for (size_t k = jx ; k < x.size() ; k++)
184  {dg.grids.x *= x[k];}
185 
186  dg.grids.y = 1;
187  for (size_t k = jy ; k < y.size() ; k++)
188  {dg.grids.y *= y[k];}
189 
190  dg.grids.z = 1;
191  for (size_t k = jz ; k < z.size() ; k++)
192  {dg.grids.z *= z[k];}
193 
194  std::vector<unsigned long int> * ptr_xyz[3];
195  ptr_xyz[0] = &x;
196  ptr_xyz[1] = &y;
197  ptr_xyz[2] = &z;
198 
199  size_t * jj[3];
200  jj[0] = &jx;
201  jj[1] = &jy;
202  jj[2] = &jz;
203 
204  while (tot_block < min_block_size && (jx < x.size() || jy < y.size() || jz < z.size() ) )
205  {
206  size_t best_fact = std::numeric_limits<size_t>::max();
207  size_t k_best = 0;
208 
209  for (size_t k = 0 ; k < dim ; k++)
210  {
211  if (*jj[k] < ptr_xyz[k]->size() && ptr_xyz[k]->operator[](*jj[k]) < best_fact )
212  {
213  best_fact = ptr_xyz[k]->operator[](*jj[k]);
214  k_best = k;
215  }
216  }
217 
218  // The maximum block size cannot be passed
219  if (tot_block * best_fact > max_block_size)
220  {break;}
221 
222  if (k_best == 0)
223  {
224  dg.threads.x *= best_fact;
225  dg.grids.x /= best_fact;
226  }
227  else if (k_best == 1)
228  {
229  dg.threads.y *= best_fact;
230  dg.grids.y /= best_fact;
231  }
232  /* coverty[dead_error_line] */
233  else if (k_best == 2)
234  {
235  dg.threads.z *= best_fact;
236  dg.grids.z /= best_fact;
237  }
238 
239  tot_block *= best_fact;
240 
241  (*jj[k_best])++;
242  }
243 }
244 
245 
246 #endif /* OPENFPM_DATA_SRC_UTIL_COMPUTE_OPTIMAL_DEVICE_GRID_HPP_ */
unsigned int x
size in x dimension
Definition: map_grid.hpp:465
dim3_ grids
number of grid for the kernel execution
Definition: map_grid.hpp:481
dim3_ threads
number of treads in each block
Definition: map_grid.hpp:478
unsigned int y
size in y dimension
Definition: map_grid.hpp:468
unsigned int z
size in z dimension
Definition: map_grid.hpp:471