OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
SparseGrid_conv_opt.hpp
1 /*
2  * SparseGrid_conv_opt.hpp
3  *
4  * Created on: Jul 19, 2020
5  * Author: i-bird
6  */
7 
8 #ifndef SPARSEGRID_CONV_OPT_HPP_
9 #define SPARSEGRID_CONV_OPT_HPP_
10 
11 template<unsigned int l>
12 union data_il
13 {
14 };
15 
16 template<>
17 union data_il<8>
18 {
19  typedef long int type;
20 
21  unsigned char uc[8];
22  long int i;
23 };
24 
25 template<>
26 union data_il<4>
27 {
28  typedef int type;
29 
30  unsigned char uc[4];
31  int i;
32 };
33 
34 template<>
35 union data_il<2>
36 {
37  typedef short int type;
38 
39  unsigned char uc[2];
40  short int i;
41 };
42 
43 template<>
44 union data_il<1>
45 {
46  typedef char type;
47 
48  unsigned char uc[4];
49  char i;
50 };
51 
52 
53 template<unsigned int dim, unsigned int sz>
54 struct ids_crs
55 {
56  long int sumdm[dim];
57  long int sumdp[dim];
58 
59  long int s2;
60  bool mask_row[sz];
61  int k;
62 };
63 
64 
65 
66 template<unsigned int dim>
67 struct conv_impl
68 {
69  template<unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size , unsigned int N, typename SparseGridType, typename lambda_f, typename ... ArgsT >
70  void conv(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
71  {
72 #ifndef __NVCC__
73  std::cout << __FILE__ << ":" << __LINE__ << " error conv operation not implemented for this dimension " << std::endl;
74 #else
75  std::cout << __FILE__ << ":" << __LINE__ << " error conv is unsupported when compiled on NVCC " << std::endl;
76 #endif
77  }
78 
79  template<bool findNN, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT >
80  static void conv_cross(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
81  {
82 #ifndef __NVCC__
83  std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross operation not implemented for this dimension " << std::endl;
84 #else
85  std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross is unsupported when compiled on NVCC " << std::endl;
86 #endif
87  }
88 
89  template<bool findNN, typename NNType, unsigned int prop_src1, unsigned int prop_src2,
90  unsigned int prop_dst1, unsigned int prop_dst2,
91  unsigned int stencil_size , unsigned int N,
92  typename SparseGridType, typename lambda_f, typename ... ArgsT >
93  static void conv2(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
94  {
95 #ifndef __NVCC__
96  std::cout << __FILE__ << ":" << __LINE__ << " error conv2 operation not implemented for this dimension " << std::endl;
97 #else
98  std::cout << __FILE__ << ":" << __LINE__ << " error conv2 is unsupported when compiled on NVCC " << std::endl;
99 #endif
100  }
101 
102  template<bool findNN, unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT >
103  static void conv_cross2(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
104  {
105 #ifndef __NVCC__
106  std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross2 operation not implemented for this dimension " << std::endl;
107 #else
108  std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross2 is unsupported when compiled on NVCC " << std::endl;
109 #endif
110  }
111 };
112 
113 #if !defined(__NVCC__) || defined(CUDA_ON_CPU) || defined(__HIP__)
114 
115 
116 template<unsigned int dir,int p, unsigned int prop_src1,typename chunk_type, typename vect_type, typename ids_type>
117 void load_crs(vect_type & cs1, chunk_type & chunk, ids_type & ids)
118 {
119  if (dir == 0 && p < 0)
120  {
121  Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[ids.s2]);
122 
123  cs1 = cmd1;
124  cs1 = cs1.shifted(-1);
125  cs1[0] = chunk.template get<prop_src1>()[ids.sumdm[dir]];
126  }
127  else if (dir == 0 && p > 0)
128  {
129  Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[ids.s2]);
130 
131  cs1 = cmd1;
132  cs1 = cs1.shifted(1);
133  cs1[Vc::Vector<typename vect_type::EntryType>::Size - 1] = chunk.template get<prop_src1>()[ids.sumdp[0]];
134  }
135  else if (p < 0)
136  {
137  cs1.load(&chunk.template get<prop_src1>()[ids.sumdm[dir]],Vc::Aligned);
138  }
139  else if (p > 0)
140  {
141  cs1.load(&chunk.template get<prop_src1>()[ids.sumdp[dir]],Vc::Aligned);
142  }
143  else
144  {
145  Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[ids.s2]);
146 
147  cs1 = cmd1;
148  }
149 }
150 
151 template<unsigned int prop_dst1,typename chunk_type, typename vect_type, typename ids_type>
152 void store_crs(chunk_type & chunk, vect_type & res, ids_type & ids)
153 {
154  Vc::Mask<typename vect_type::EntryType> m(&ids.mask_row[ids.k]);
155 
156  res.store(&chunk.template get<prop_dst1>()[ids.s2],m,Vc::Aligned);
157 }
158 
159 template<unsigned int prop_dst1,unsigned int comp, typename chunk_type, typename vect_type, typename ids_type>
160 void store_crs_v(chunk_type & chunk, vect_type & res, ids_type & ids)
161 {
162  Vc::Mask<typename vect_type::EntryType> m(&ids.mask_row[ids.k]);
163 
164  res.store(&chunk.template get<prop_dst1>()[comp][ids.s2],m,Vc::Aligned);
165 }
166 
167 template<unsigned int dir,int p, unsigned int comp, unsigned int prop_src1,typename chunk_type, typename vect_type, typename ids_type>
168 void load_crs_v(vect_type & cs1, chunk_type & chunk, ids_type & ids)
169 {
170  if (dir == 0 && p < 0)
171  {
172  Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[comp][ids.s2]);
173 
174  cs1 = cmd1;
175  cs1 = cs1.shifted(-1);
176  cs1[0] = chunk.template get<prop_src1>()[comp][ids.sumdm[dir]];
177  }
178  else if (dir == 0 && p > 0)
179  {
180  Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[comp][ids.s2]);
181 
182  cs1 = cmd1;
183  cs1 = cs1.shifted(1);
184  cs1[Vc::Vector<typename vect_type::EntryType>::Size - 1] = chunk.template get<prop_src1>()[comp][ids.sumdp[dir]];
185  }
186  else if (p < 0)
187  {
188  cs1.load(&chunk.template get<prop_src1>()[comp][ids.sumdm[dir]],Vc::Aligned);
189  }
190  else if (p > 0)
191  {
192  cs1.load(&chunk.template get<prop_src1>()[comp][ids.sumdp[dir]],Vc::Aligned);
193  }
194  else
195  {
196  Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[comp][ids.s2]);
197 
198  cs1 = cmd1;
199  }
200 }
201 
202 
203 template<typename prop_type>
205 {
206  Vc::Vector<prop_type> xm;
207  Vc::Vector<prop_type> xp;
208  Vc::Vector<prop_type> ym;
209  Vc::Vector<prop_type> yp;
210  Vc::Vector<prop_type> zm;
211  Vc::Vector<prop_type> zp;
212 };
213 
214 template<>
215 struct conv_impl<3>
216 {
217  template<bool findNN, typename NNtype, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size , unsigned int N, typename SparseGridType, typename lambda_f, typename ... ArgsT >
218  static void conv(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
219  {
220  auto it = grid.template getBlockIterator<stencil_size>(start,stop);
221 
222  typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src>>::type prop_type;
223 
224  unsigned char mask[decltype(it)::sizeBlockBord];
225  unsigned char mask_sum[decltype(it)::sizeBlockBord];
226  unsigned char mask_unused[decltype(it)::sizeBlock];
227  __attribute__ ((aligned (32))) prop_type block_bord_src[decltype(it)::sizeBlockBord];
228  __attribute__ ((aligned (32))) prop_type block_bord_dst[decltype(it)::sizeBlock];
229 
230  typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0;
231  typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1;
232  typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2;
233 
234  while (it.isNext())
235  {
236  it.template loadBlockBorder<prop_src,NNtype,findNN>(block_bord_src,mask);
237 
238  if (it.start_b(2) != stencil_size || it.start_b(1) != stencil_size || it.start_b(0) != stencil_size ||
239  it.stop_b(2) != sz2::value+stencil_size || it.stop_b(1) != sz1::value+stencil_size || it.stop_b(0) != sz0::value+stencil_size)
240  {
241  auto & header_mask = grid.private_get_header_mask();
242  auto & header_inf = grid.private_get_header_inf();
243 
244  loadBlock_impl<prop_dst,0,3,typename decltype(it)::vector_blocks_exts_type, typename decltype(it)::vector_ext_type>::template loadBlock<decltype(it)::sizeBlock>(block_bord_dst,grid,it.getChunkId(),mask_unused);
245  }
246 
247  // Sum the mask
248  for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++)
249  {
250  for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++)
251  {
252  int cc = it.LinB(it.start_b(0),j,k);
253  int c[N];
254 
255  for (int s = 0 ; s < N ; s++)
256  {
257  c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]);
258  }
259 
260  for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += sizeof(size_t))
261  {
262  size_t cmd = *(size_t *)&mask[cc];
263 
264  if (cmd != 0)
265  {
266  size_t xm[N];
267 
268  for (int s = 0 ; s < N ; s++)
269  {
270  xm[s] = *(size_t *)&mask[c[s]];
271  }
272 
273  size_t sum = 0;
274  for (int s = 0 ; s < N ; s++)
275  {
276  sum += xm[s];
277  }
278 
279  *(size_t *)&mask_sum[cc] = sum;
280  }
281 
282  cc += sizeof(size_t);
283  for (int s = 0 ; s < N ; s++)
284  {
285  c[s] += sizeof(size_t);
286  }
287  }
288  }
289  }
290 
291  for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++)
292  {
293  for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++)
294  {
295  int cc = it.LinB(it.start_b(0),j,k);
296  int c[N];
297 
298  int cd = it.LinB_off(it.start_b(0),j,k);
299 
300  for (int s = 0 ; s < N ; s++)
301  {
302  c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]);
303  }
304 
305  for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += Vc::Vector<prop_type>::Size)
306  {
307  Vc::Mask<prop_type> cmp;
308 
309  for (int s = 0 ; s < Vc::Vector<prop_type>::Size ; s++)
310  {
311  cmp[s] = (mask[cc+s] == true && i+s < it.stop_b(0));
312  }
313 
314  // we do only if exist the point
315  if (Vc::none_of(cmp) == false)
316  {
317  Vc::Mask<prop_type> surround;
318 
319  Vc::Vector<prop_type> xs[N+1];
320 
321  xs[0] = Vc::Vector<prop_type>(&block_bord_src[cc],Vc::Unaligned);
322 
323  for (int s = 1 ; s < N+1 ; s++)
324  {
325  xs[s] = Vc::Vector<prop_type>(&block_bord_src[c[s-1]],Vc::Unaligned);
326  }
327 
328  auto res = func(xs, &mask_sum[cc], args ...);
329 
330  res.store(&block_bord_dst[cd],cmp,Vc::Aligned);
331  }
332 
333  cc += Vc::Vector<prop_type>::Size;
334  for (int s = 0 ; s < N ; s++)
335  {
336  c[s] += Vc::Vector<prop_type>::Size;
337  }
338  cd += Vc::Vector<prop_type>::Size;
339  }
340  }
341  }
342 
343  it.template storeBlock<prop_dst>(block_bord_dst);
344 
345  ++it;
346  }
347  }
348 
349  template<bool findNN, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT >
350  static void conv_cross(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
351  {
352  auto it = grid.template getBlockIterator<1>(start,stop);
353 
354  auto & datas = grid.private_get_data();
355  auto & headers = grid.private_get_header_mask();
356 
357  typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0;
358  typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1;
359  typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2;
360 
361  typedef typename SparseGridType::chunking_type chunking;
362 
363  typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src>>::type prop_type;
364 
365  while (it.isNext())
366  {
367  // Load
368  long int offset_jump[6];
369 
370  size_t cid = it.getChunkId();
371 
372  auto chunk = datas.get(cid);
373  auto & mask = headers.get(cid);
374 
375  bool exist;
376  grid_key_dx<3> p = grid.getChunkPos(cid) + grid_key_dx<3>({-1,0,0});
377  long int r = grid.getChunk(p,exist);
378  offset_jump[0] = (r-cid)*decltype(it)::sizeBlock;
379 
380  p = grid.getChunkPos(cid) + grid_key_dx<3>({1,0,0});
381  r = grid.getChunk(p,exist);
382  offset_jump[1] = (r-cid)*decltype(it)::sizeBlock;
383 
384  p = grid.getChunkPos(cid) + grid_key_dx<3>({0,-1,0});
385  r = grid.getChunk(p,exist);
386  offset_jump[2] = (r-cid)*decltype(it)::sizeBlock;
387 
388  p = grid.getChunkPos(cid) + grid_key_dx<3>({0,1,0});
389  r = grid.getChunk(p,exist);
390  offset_jump[3] = (r-cid)*decltype(it)::sizeBlock;
391 
392  p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,-1});
393  r = grid.getChunk(p,exist);
394  offset_jump[4] = (r-cid)*decltype(it)::sizeBlock;
395 
396  p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,1});
397  r = grid.getChunk(p,exist);
398  offset_jump[5] = (r-cid)*decltype(it)::sizeBlock;
399 
400  // Load offset jumps
401 
402  // construct a row mask
403 
404  long int s2 = 0;
405 
406  typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type sz;
407  typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type sy;
408  typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type sx;
409 
410 
411  bool mask_row[sx::value];
412 
413  for (int k = 0 ; k < sx::value ; k++)
414  {
415  mask_row[k] = (k >= it.start(0) && k < it.stop(0))?true:false;
416  }
417 
418  for (int v = it.start(2) ; v < it.stop(2) ; v++)
419  {
420  for (int j = it.start(1) ; j < it.stop(1) ; j++)
421  {
422  s2 = it.Lin(0,j,v);
423  for (int k = 0 ; k < sx::value ; k += Vc::Vector<prop_type>::Size)
424  {
425  // we do only id exist the point
426  if (*(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[s2] == 0) {s2 += Vc::Vector<prop_type>::Size; continue;}
427 
434 
436 
437  Vc::Vector<prop_type> cmd(&chunk.template get<prop_src>()[s2]);
438 
439  // Load x-1
440  long int sumxm = s2-1;
441  sumxm += (k==0)?offset_jump[0] + sx::value:0;
442 
443  // Load x+1
444  long int sumxp = s2+Vc::Vector<prop_type>::Size;
445  sumxp += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[1] - sx::value:0;
446 
447  long int sumym = (j == 0)?offset_jump[2] + (sy::value-1)*sx::value:-sx::value;
448  sumym += s2;
449  long int sumyp = (j == sy::value-1)?offset_jump[3] - (sy::value - 1)*sx::value:sx::value;
450  sumyp += s2;
451  long int sumzm = (v == 0)?offset_jump[4] + (sz::value-1)*sx::value*sy::value:-sx::value*sy::value;
452  sumzm += s2;
453  long int sumzp = (v == sz::value-1)?offset_jump[5] - (sz::value - 1)*sx::value*sy::value:sx::value*sy::value;
454  sumzp += s2;
455 
456  if (Vc::Vector<prop_type>::Size == 2 || Vc::Vector<prop_type>::Size == 4 || Vc::Vector<prop_type>::Size == 8)
457  {
458  mxm.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[s2];
459  mxm.i = mxm.i << 8;
460  mxm.i |= (typename data_il<Vc::Vector<prop_type>::Size>::type)mask.mask[sumxm];
461 
462  mxp.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[s2];
463  mxp.i = mxp.i >> 8;
464  mxp.i |= ((typename data_il<Vc::Vector<prop_type>::Size>::type)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - 1)*8;
465 
466  mym.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[sumym];
467  myp.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[sumyp];
468 
469  mzm.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[sumzm];
470  mzp.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[sumzp];
471  }
472  else
473  {
474  std::cout << __FILE__ << ":" << __LINE__ << " UNSUPPORTED" << std::endl;
475  }
476 
477  cs.xm = cmd;
478  cs.xm = cs.xm.shifted(-1);
479  cs.xm[0] = chunk.template get<prop_src>()[sumxm];
480 
481 
482  cs.xp = cmd;
483  cs.xp = cs.xp.shifted(1);
484  cs.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src>()[sumxp];
485 
486  // Load y and z direction
487 
488  cs.ym.load(&chunk.template get<prop_src>()[sumym],Vc::Aligned);
489  cs.yp.load(&chunk.template get<prop_src>()[sumyp],Vc::Aligned);
490  cs.zm.load(&chunk.template get<prop_src>()[sumzm],Vc::Aligned);
491  cs.zp.load(&chunk.template get<prop_src>()[sumzp],Vc::Aligned);
492 
493  // Calculate
494 
496  tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i;
497 
498  Vc::Vector<prop_type> res = func(cmd,cs,tot_m.uc,args ... );
499 
500  Vc::Mask<prop_type> m(&mask_row[k]);
501 
502  res.store(&chunk.template get<prop_dst>()[s2],m,Vc::Aligned);
503 
504  s2 += Vc::Vector<prop_type>::Size;
505  }
506  }
507  }
508 
509  ++it;
510  }
511  }
512 
513 
514  template<bool findNN, typename NNType, unsigned int prop_src1, unsigned int prop_src2,
515  unsigned int prop_dst1, unsigned int prop_dst2,
516  unsigned int stencil_size , unsigned int N,
517  typename SparseGridType, typename lambda_f, typename ... ArgsT >
518  static void conv2(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
519  {
520  auto it = grid.template getBlockIterator<stencil_size>(start,stop);
521 
522  typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src1>>::type prop_type;
523 
524  unsigned char mask[decltype(it)::sizeBlockBord];
525  unsigned char mask_sum[decltype(it)::sizeBlockBord];
526  __attribute__ ((aligned (64))) prop_type block_bord_src1[decltype(it)::sizeBlockBord];
527  __attribute__ ((aligned (64))) prop_type block_bord_dst1[decltype(it)::sizeBlock+16];
528  __attribute__ ((aligned (64))) prop_type block_bord_src2[decltype(it)::sizeBlockBord];
529  __attribute__ ((aligned (64))) prop_type block_bord_dst2[decltype(it)::sizeBlock+16];
530 
531  typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0;
532  typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1;
533  typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2;
534 
535  while (it.isNext())
536  {
537  it.template loadBlockBorder<prop_src1,NNType,findNN>(block_bord_src1,mask);
538  it.template loadBlockBorder<prop_src2,NNType,findNN>(block_bord_src2,mask);
539 
540  // Sum the mask
541  for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++)
542  {
543  for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++)
544  {
545  int cc = it.LinB(it.start_b(0),j,k);
546  int c[N];
547 
548  for (int s = 0 ; s < N ; s++)
549  {
550  c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]);
551  }
552 
553  for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += sizeof(size_t))
554  {
555  size_t cmd = *(size_t *)&mask[cc];
556 
557  if (cmd == 0) {continue;}
558 
559 
560  size_t xm[N];
561 
562  for (int s = 0 ; s < N ; s++)
563  {
564  xm[s] = *(size_t *)&mask[c[s]];
565  }
566 
567  size_t sum = 0;
568  for (int s = 0 ; s < N ; s++)
569  {
570  sum += xm[s];
571  }
572 
573  *(size_t *)&mask_sum[cc] = sum;
574 
575  cc += sizeof(size_t);
576  for (int s = 0 ; s < N ; s++)
577  {
578  c[s] += sizeof(size_t);
579  }
580  }
581  }
582  }
583 
584  for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++)
585  {
586  for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++)
587  {
588  int cc = it.LinB(it.start_b(0),j,k);
589  int c[N];
590 
591  int cd = it.LinB_off(it.start_b(0),j,k);
592 
593  for (int s = 0 ; s < N ; s++)
594  {
595  c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]);
596  }
597 
598  for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += Vc::Vector<prop_type>::Size)
599  {
600  Vc::Mask<prop_type> cmp;
601 
602  for (int s = 0 ; s < Vc::Vector<prop_type>::Size ; s++)
603  {
604  cmp[s] = (mask[cc+s] == true);
605  }
606 
607  // we do only id exist the point
608  if (Vc::none_of(cmp) == true) {continue;}
609 
610  Vc::Mask<prop_type> surround;
611 
612  Vc::Vector<prop_type> xs1[N+1];
613  Vc::Vector<prop_type> xs2[N+1];
614 
615  xs1[0] = Vc::Vector<prop_type>(&block_bord_src1[cc],Vc::Unaligned);
616  xs2[0] = Vc::Vector<prop_type>(&block_bord_src2[cc],Vc::Unaligned);
617 
618  for (int s = 1 ; s < N+1 ; s++)
619  {
620  xs1[s] = Vc::Vector<prop_type>(&block_bord_src1[c[s-1]],Vc::Unaligned);
621  xs2[s] = Vc::Vector<prop_type>(&block_bord_src2[c[s-1]],Vc::Unaligned);
622  }
623 
624  Vc::Vector<prop_type> vo1;
625  Vc::Vector<prop_type> vo2;
626 
627  func(vo1, vo2, xs1, xs2, &mask_sum[cc], args ...);
628 
629  vo1.store(&block_bord_dst1[cd],cmp,Vc::Unaligned);
630  vo2.store(&block_bord_dst2[cd],cmp,Vc::Unaligned);
631 
632  cc += Vc::Vector<prop_type>::Size;
633  for (int s = 0 ; s < N ; s++)
634  {
635  c[s] += Vc::Vector<prop_type>::Size;
636  }
637  cd += Vc::Vector<prop_type>::Size;
638  }
639  }
640  }
641 
642  it.template storeBlock<prop_dst1>(block_bord_dst1);
643  it.template storeBlock<prop_dst2>(block_bord_dst2);
644 
645  ++it;
646  }
647  }
648 
649  template<bool findNN, unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT >
650  static void conv_cross2(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
651  {
652  auto it = grid.template getBlockIterator<stencil_size>(start,stop);
653 
654  auto & datas = grid.private_get_data();
655  auto & headers = grid.private_get_header_mask();
656 
657  typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0;
658  typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1;
659  typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2;
660 
661  typedef typename SparseGridType::chunking_type chunking;
662 
663  typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src1>>::type prop_type;
664 
665  while (it.isNext())
666  {
667  // Load
668  long int offset_jump[6];
669 
670  size_t cid = it.getChunkId();
671 
672  auto chunk = datas.get(cid);
673  auto & mask = headers.get(cid);
674 
675  bool exist;
676  grid_key_dx<3> p = grid.getChunkPos(cid) + grid_key_dx<3>({-1,0,0});
677  long int r = grid.getChunk(p,exist);
678  offset_jump[0] = (r-cid)*decltype(it)::sizeBlock;
679 
680  p = grid.getChunkPos(cid) + grid_key_dx<3>({1,0,0});
681  r = grid.getChunk(p,exist);
682  offset_jump[1] = (r-cid)*decltype(it)::sizeBlock;
683 
684  p = grid.getChunkPos(cid) + grid_key_dx<3>({0,-1,0});
685  r = grid.getChunk(p,exist);
686  offset_jump[2] = (r-cid)*decltype(it)::sizeBlock;
687 
688  p = grid.getChunkPos(cid) + grid_key_dx<3>({0,1,0});
689  r = grid.getChunk(p,exist);
690  offset_jump[3] = (r-cid)*decltype(it)::sizeBlock;
691 
692  p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,-1});
693  r = grid.getChunk(p,exist);
694  offset_jump[4] = (r-cid)*decltype(it)::sizeBlock;
695 
696  p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,1});
697  r = grid.getChunk(p,exist);
698  offset_jump[5] = (r-cid)*decltype(it)::sizeBlock;
699 
700  // Load offset jumps
701 
702  // construct a row mask
703 
704  long int s2 = 0;
705 
706  typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type sz;
707  typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type sy;
708  typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type sx;
709 
710 
711  bool mask_row[sx::value];
712 
713  for (int k = 0 ; k < sx::value ; k++)
714  {
715  mask_row[k] = (k >= it.start(0) && k < it.stop(0))?true:false;
716  }
717 
718  for (int v = it.start(2) ; v < it.stop(2) ; v++)
719  {
720  for (int j = it.start(1) ; j < it.stop(1) ; j++)
721  {
722  s2 = it.Lin(0,j,v);
723  for (int k = 0 ; k < sx::value ; k += Vc::Vector<prop_type>::Size)
724  {
725  // we do only id exist the point
726  if (*(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[s2] == 0) {s2 += Vc::Vector<prop_type>::Size; continue;}
727 
728  data_il<4> mxm;
729  data_il<4> mxp;
730  data_il<4> mym;
731  data_il<4> myp;
732  data_il<4> mzm;
733  data_il<4> mzp;
734 
737 
738  Vc::Vector<prop_type> cmd1(&chunk.template get<prop_src1>()[s2]);
739  Vc::Vector<prop_type> cmd2(&chunk.template get<prop_src2>()[s2]);
740 
741  // Load x-1
742  long int sumxm = s2-1;
743  sumxm += (k==0)?offset_jump[0] + sx::value:0;
744 
745  // Load x+1
746  long int sumxp = s2+Vc::Vector<prop_type>::Size;
747  sumxp += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[1] - sx::value:0;
748 
749  long int sumym = (j == 0)?offset_jump[2] + (sy::value-1)*sx::value:-sx::value;
750  sumym += s2;
751  long int sumyp = (j == sy::value-1)?offset_jump[3] - (sy::value - 1)*sx::value:sx::value;
752  sumyp += s2;
753  long int sumzm = (v == 0)?offset_jump[4] + (sz::value-1)*sx::value*sy::value:-sx::value*sy::value;
754  sumzm += s2;
755  long int sumzp = (v == sz::value-1)?offset_jump[5] - (sz::value - 1)*sx::value*sy::value:sx::value*sy::value;
756  sumzp += s2;
757 
758  if (Vc::Vector<prop_type>::Size == 1 || Vc::Vector<prop_type>::Size == 2 || Vc::Vector<prop_type>::Size == 4 || Vc::Vector<prop_type>::Size == 8)
759  {
760  mxm.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[s2];
761  mxm.i = mxm.i << 8;
762  mxm.i |= (typename data_il<Vc::Vector<prop_type>::Size>::type)mask.mask[sumxm];
763 
764  mxp.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[s2];
765  mxp.i = mxp.i >> 8;
766  mxp.i |= ((typename data_il<Vc::Vector<prop_type>::Size>::type)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - 1)*8;
767 
768  mym.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[sumym];
769  myp.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[sumyp];
770 
771  mzm.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[sumzm];
772  mzp.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[sumzp];
773  }
774 
775  cs1.xm = cmd1;
776  cs1.xm = cs1.xm.shifted(-1);
777  cs1.xm[0] = chunk.template get<prop_src1>()[sumxm];
778 
779  cs2.xm = cmd2;
780  cs2.xm = cs2.xm.shifted(-1);
781  cs2.xm[0] = chunk.template get<prop_src2>()[sumxm];
782 
783  cs1.xp = cmd1;
784  cs1.xp = cs1.xp.shifted(1);
785  cs1.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src1>()[sumxp];
786 
787  cs2.xp = cmd2;
788  cs2.xp = cs2.xp.shifted(1);
789  cs2.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src2>()[sumxp];
790 
791  // Load y and z direction
792 
793  cs1.ym.load(&chunk.template get<prop_src1>()[sumym],Vc::Aligned);
794  cs1.yp.load(&chunk.template get<prop_src1>()[sumyp],Vc::Aligned);
795  cs1.zm.load(&chunk.template get<prop_src1>()[sumzm],Vc::Aligned);
796  cs1.zp.load(&chunk.template get<prop_src1>()[sumzp],Vc::Aligned);
797 
798  cs2.ym.load(&chunk.template get<prop_src2>()[sumym],Vc::Aligned);
799  cs2.yp.load(&chunk.template get<prop_src2>()[sumyp],Vc::Aligned);
800  cs2.zm.load(&chunk.template get<prop_src2>()[sumzm],Vc::Aligned);
801  cs2.zp.load(&chunk.template get<prop_src2>()[sumzp],Vc::Aligned);
802 
803  // Calculate
804 
805  data_il<4> tot_m;
806  tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i;
807 
808  Vc::Vector<prop_type> res1;
809  Vc::Vector<prop_type> res2;
810 
811  func(res1,res2,cmd1,cmd2,cs1,cs2,tot_m.uc,args ... );
812 
813  Vc::Mask<prop_type> m(&mask_row[k]);
814 
815  res1.store(&chunk.template get<prop_dst1>()[s2],m,Vc::Aligned);
816  res2.store(&chunk.template get<prop_dst2>()[s2],m,Vc::Aligned);
817 
818  s2 += Vc::Vector<prop_type>::Size;
819  }
820  }
821  }
822 
823  ++it;
824  }
825  }
826 
827  template<bool findNN, unsigned int stencil_size, typename prop_type, typename SparseGridType, typename lambda_f, typename ... ArgsT >
828  static void conv_cross_ids(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
829  {
830  auto it = grid.template getBlockIterator<stencil_size>(start,stop);
831 
832  auto & datas = grid.private_get_data();
833  auto & headers = grid.private_get_header_mask();
834 
835  typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0;
836  typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1;
837  typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2;
838 
839  typedef typename SparseGridType::chunking_type chunking;
840 
841  while (it.isNext())
842  {
843  // Load
844  long int offset_jump[6];
845 
846  size_t cid = it.getChunkId();
847 
848  auto chunk = datas.get(cid);
849  auto & mask = headers.get(cid);
850 
851  bool exist;
852  grid_key_dx<3> p = grid.getChunkPos(cid) + grid_key_dx<3>({-1,0,0});
853  long int r = grid.getChunk(p,exist);
854  offset_jump[0] = (r-cid)*decltype(it)::sizeBlock;
855 
856  p = grid.getChunkPos(cid) + grid_key_dx<3>({1,0,0});
857  r = grid.getChunk(p,exist);
858  offset_jump[1] = (r-cid)*decltype(it)::sizeBlock;
859 
860  p = grid.getChunkPos(cid) + grid_key_dx<3>({0,-1,0});
861  r = grid.getChunk(p,exist);
862  offset_jump[2] = (r-cid)*decltype(it)::sizeBlock;
863 
864  p = grid.getChunkPos(cid) + grid_key_dx<3>({0,1,0});
865  r = grid.getChunk(p,exist);
866  offset_jump[3] = (r-cid)*decltype(it)::sizeBlock;
867 
868  p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,-1});
869  r = grid.getChunk(p,exist);
870  offset_jump[4] = (r-cid)*decltype(it)::sizeBlock;
871 
872  p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,1});
873  r = grid.getChunk(p,exist);
874  offset_jump[5] = (r-cid)*decltype(it)::sizeBlock;
875 
876  // Load offset jumps
877 
878  // construct a row mask
879 
880  long int s2 = 0;
881 
882  typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type sz;
883  typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type sy;
884  typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type sx;
885 
887 
888  for (int k = 0 ; k < sx::value ; k++)
889  {
890  ids.mask_row[k] = (k >= it.start(0) && k < it.stop(0))?true:false;
891  }
892 
893  for (int v = it.start(2) ; v < it.stop(2) ; v++)
894  {
895  for (int j = it.start(1) ; j < it.stop(1) ; j++)
896  {
897  s2 = it.Lin(0,j,v);
898  for (int k = 0 ; k < sx::value ; k += Vc::Vector<prop_type>::Size)
899  {
900  // we do only id exist the point
901  if (*(int *)&mask.mask[s2] == 0) {s2 += Vc::Vector<prop_type>::Size; continue;}
902 
903  data_il<4> mxm;
904  data_il<4> mxp;
905  data_il<4> mym;
906  data_il<4> myp;
907  data_il<4> mzm;
908  data_il<4> mzp;
909 
910  ids.k = k;
911 
912  // Load x-1
913  ids.sumdm[0] = s2-1;
914  ids.sumdm[0] += (k==0)?offset_jump[0] + sx::value:0;
915 
916  // Load x+1
917  ids.sumdp[0] = s2+Vc::Vector<prop_type>::Size;
918  ids.sumdp[0] += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[1] - sx::value:0;
919 
920  ids.sumdm[1] = (j == 0)?offset_jump[2] + (sy::value-1)*sx::value:-sx::value;
921  ids.sumdm[1] += s2;
922  ids.sumdp[1] = (j == sy::value-1)?offset_jump[3] - (sy::value - 1)*sx::value:sx::value;
923  ids.sumdp[1] += s2;
924  ids.sumdm[2] = (v == 0)?offset_jump[4] + (sz::value-1)*sx::value*sy::value:-sx::value*sy::value;
925  ids.sumdm[2] += s2;
926  ids.sumdp[2] = (v == sz::value-1)?offset_jump[5] - (sz::value - 1)*sx::value*sy::value:sx::value*sy::value;
927  ids.sumdp[2] += s2;
928 
929  ids.s2 = s2;
930 
931  if (Vc::Vector<prop_type>::Size == 2)
932  {
933  mxm.i = *(short int *)&mask.mask[s2];
934  mxm.i = mxm.i << 8;
935  mxm.i |= (short int)mask.mask[ids.sumdm[0]];
936 
937  mxp.i = *(short int *)&mask.mask[s2];
938  mxp.i = mxp.i >> 8;
939  mxp.i |= ((short int)mask.mask[ids.sumdp[0]]) << (Vc::Vector<prop_type>::Size - 1)*8;
940 
941  mym.i = *(short int *)&mask.mask[ids.sumdm[1]];
942  myp.i = *(short int *)&mask.mask[ids.sumdp[1]];
943 
944  mzm.i = *(short int *)&mask.mask[ids.sumdm[2]];
945  mzp.i = *(short int *)&mask.mask[ids.sumdp[2]];
946  }
947  else if (Vc::Vector<prop_type>::Size == 4)
948  {
949  mxm.i = *(int *)&mask.mask[s2];
950  mxm.i = mxm.i << 8;
951  mxm.i |= (int)mask.mask[ids.sumdm[0]];
952 
953  mxp.i = *(int *)&mask.mask[s2];
954  mxp.i = mxp.i >> 8;
955  mxp.i |= ((int)mask.mask[ids.sumdp[0]]) << (Vc::Vector<prop_type>::Size - 1)*8;
956 
957  mym.i = *(int *)&mask.mask[ids.sumdm[1]];
958  myp.i = *(int *)&mask.mask[ids.sumdp[1]];
959 
960  mzm.i = *(int *)&mask.mask[ids.sumdm[2]];
961  mzp.i = *(int *)&mask.mask[ids.sumdp[2]];
962  }
963  else
964  {
965  std::cout << __FILE__ << ":" << __LINE__ << " UNSUPPORTED" << std::endl;
966  }
967 
968  // Calculate
969 
970  data_il<4> tot_m;
971  tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i;
972 
973  func(chunk,ids,tot_m.uc,args ... );
974 
975  s2 += Vc::Vector<prop_type>::Size;
976  }
977  }
978  }
979 
980  ++it;
981  }
982  }
983 
984 };
985 
986 #endif
987 
988 
989 #endif /* SPARSEGRID_CONV_OPT_HPP_ */
grid_key_dx is the key to access any element in the grid
Definition: grid_key.hpp:18
Get the neighborhood iterator based on type.
It model an expression expr1 + ... exprn.
Definition: sum.hpp:92