OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
SparseGrid_conv_opt.hpp
1/*
2 * SparseGrid_conv_opt.hpp
3 *
4 * Created on: Jul 19, 2020
5 * Author: i-bird
6 */
7
8#ifndef SPARSEGRID_CONV_OPT_HPP_
9#define SPARSEGRID_CONV_OPT_HPP_
10
11template<unsigned int l>
13{
14};
15
16template<>
17union data_il<8>
18{
19 typedef long int type;
20
21 unsigned char uc[8];
22 long int i;
23};
24
25template<>
26union data_il<4>
27{
28 typedef int type;
29
30 unsigned char uc[4];
31 int i;
32};
33
34template<>
35union data_il<2>
36{
37 typedef short int type;
38
39 unsigned char uc[2];
40 short int i;
41};
42
43template<>
44union data_il<1>
45{
46 typedef char type;
47
48 unsigned char uc[4];
49 char i;
50};
51
52
53template<unsigned int dim, unsigned int sz>
54struct ids_crs
55{
56 long int sumdm[dim];
57 long int sumdp[dim];
58
59 long int s2;
60 bool mask_row[sz];
61 int k;
62};
63
64
65
66template<unsigned int dim>
68{
69 template<unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size , unsigned int N, typename SparseGridType, typename lambda_f, typename ... ArgsT >
70 void conv(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
71 {
72#ifndef __NVCC__
73 std::cout << __FILE__ << ":" << __LINE__ << " error conv operation not implemented for this dimension " << std::endl;
74#else
75 std::cout << __FILE__ << ":" << __LINE__ << " error conv is unsupported when compiled on NVCC " << std::endl;
76#endif
77 }
78
79 template<bool findNN, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT >
80 static void conv_cross(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
81 {
82#ifndef __NVCC__
83 std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross operation not implemented for this dimension " << std::endl;
84#else
85 std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross is unsupported when compiled on NVCC " << std::endl;
86#endif
87 }
88
89 template<bool findNN, typename NNType, unsigned int prop_src1, unsigned int prop_src2,
90 unsigned int prop_dst1, unsigned int prop_dst2,
91 unsigned int stencil_size , unsigned int N,
92 typename SparseGridType, typename lambda_f, typename ... ArgsT >
93 static void conv2(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
94 {
95#ifndef __NVCC__
96 std::cout << __FILE__ << ":" << __LINE__ << " error conv2 operation not implemented for this dimension " << std::endl;
97#else
98 std::cout << __FILE__ << ":" << __LINE__ << " error conv2 is unsupported when compiled on NVCC " << std::endl;
99#endif
100 }
101
102 template<bool findNN, unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT >
103 static void conv_cross2(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
104 {
105#ifndef __NVCC__
106 std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross2 operation not implemented for this dimension " << std::endl;
107#else
108 std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross2 is unsupported when compiled on NVCC " << std::endl;
109#endif
110 }
111};
112
113#if !defined(__NVCC__) || defined(CUDA_ON_CPU) || defined(__HIP__)
114
115
116template<unsigned int dir,int p, unsigned int prop_src1,typename chunk_type, typename vect_type, typename ids_type>
117void load_crs(vect_type & cs1, chunk_type & chunk, ids_type & ids)
118{
119 if (dir == 0 && p < 0)
120 {
121 Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[ids.s2]);
122
123 cs1 = cmd1;
124 cs1 = cs1.shifted(-1);
125 cs1[0] = chunk.template get<prop_src1>()[ids.sumdm[dir]];
126 }
127 else if (dir == 0 && p > 0)
128 {
129 Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[ids.s2]);
130
131 cs1 = cmd1;
132 cs1 = cs1.shifted(1);
133 cs1[Vc::Vector<typename vect_type::EntryType>::Size - 1] = chunk.template get<prop_src1>()[ids.sumdp[0]];
134 }
135 else if (p < 0)
136 {
137 cs1.load(&chunk.template get<prop_src1>()[ids.sumdm[dir]],Vc::Aligned);
138 }
139 else if (p > 0)
140 {
141 cs1.load(&chunk.template get<prop_src1>()[ids.sumdp[dir]],Vc::Aligned);
142 }
143 else
144 {
145 Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[ids.s2]);
146
147 cs1 = cmd1;
148 }
149}
150
151template<unsigned int prop_dst1,typename chunk_type, typename vect_type, typename ids_type>
152void store_crs(chunk_type & chunk, vect_type & res, ids_type & ids)
153{
154 Vc::Mask<typename vect_type::EntryType> m(&ids.mask_row[ids.k]);
155
156 res.store(&chunk.template get<prop_dst1>()[ids.s2],m,Vc::Aligned);
157}
158
159template<unsigned int prop_dst1,unsigned int comp, typename chunk_type, typename vect_type, typename ids_type>
160void store_crs_v(chunk_type & chunk, vect_type & res, ids_type & ids)
161{
162 Vc::Mask<typename vect_type::EntryType> m(&ids.mask_row[ids.k]);
163
164 res.store(&chunk.template get<prop_dst1>()[comp][ids.s2],m,Vc::Aligned);
165}
166
167template<unsigned int dir,int p, unsigned int comp, unsigned int prop_src1,typename chunk_type, typename vect_type, typename ids_type>
168void load_crs_v(vect_type & cs1, chunk_type & chunk, ids_type & ids)
169{
170 if (dir == 0 && p < 0)
171 {
172 Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[comp][ids.s2]);
173
174 cs1 = cmd1;
175 cs1 = cs1.shifted(-1);
176 cs1[0] = chunk.template get<prop_src1>()[comp][ids.sumdm[dir]];
177 }
178 else if (dir == 0 && p > 0)
179 {
180 Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[comp][ids.s2]);
181
182 cs1 = cmd1;
183 cs1 = cs1.shifted(1);
184 cs1[Vc::Vector<typename vect_type::EntryType>::Size - 1] = chunk.template get<prop_src1>()[comp][ids.sumdp[dir]];
185 }
186 else if (p < 0)
187 {
188 cs1.load(&chunk.template get<prop_src1>()[comp][ids.sumdm[dir]],Vc::Aligned);
189 }
190 else if (p > 0)
191 {
192 cs1.load(&chunk.template get<prop_src1>()[comp][ids.sumdp[dir]],Vc::Aligned);
193 }
194 else
195 {
196 Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[comp][ids.s2]);
197
198 cs1 = cmd1;
199 }
200}
201
202
203template<typename prop_type>
205{
206 Vc::Vector<prop_type> xm;
207 Vc::Vector<prop_type> xp;
208 Vc::Vector<prop_type> ym;
209 Vc::Vector<prop_type> yp;
210 Vc::Vector<prop_type> zm;
211 Vc::Vector<prop_type> zp;
212};
213
214template<>
215struct conv_impl<3>
216{
217 template<bool findNN, typename NNtype, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size , unsigned int N, typename SparseGridType, typename lambda_f, typename ... ArgsT >
218 static void conv(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
219 {
220 auto it = grid.template getBlockIterator<stencil_size>(start,stop);
221
222 typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src>>::type prop_type;
223
224 unsigned char mask[decltype(it)::sizeBlockBord];
225 unsigned char mask_sum[decltype(it)::sizeBlockBord];
226 unsigned char mask_unused[decltype(it)::sizeBlock];
227 __attribute__ ((aligned (32))) prop_type block_bord_src[decltype(it)::sizeBlockBord];
228 __attribute__ ((aligned (32))) prop_type block_bord_dst[decltype(it)::sizeBlock];
229
230 typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0;
231 typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1;
232 typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2;
233
234 while (it.isNext())
235 {
236 it.template loadBlockBorder<prop_src,NNtype,findNN>(block_bord_src,mask);
237
238 if (it.start_b(2) != stencil_size || it.start_b(1) != stencil_size || it.start_b(0) != stencil_size ||
239 it.stop_b(2) != sz2::value+stencil_size || it.stop_b(1) != sz1::value+stencil_size || it.stop_b(0) != sz0::value+stencil_size)
240 {
241 auto & header_mask = grid.private_get_header_mask();
242 auto & header_inf = grid.private_get_header_inf();
243
244 loadBlock_impl<prop_dst,0,3,typename decltype(it)::vector_blocks_exts_type, typename decltype(it)::vector_ext_type>::template loadBlock<decltype(it)::sizeBlock>(block_bord_dst,grid,it.getChunkId(),mask_unused);
245 }
246
247 // Sum the mask
248 for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++)
249 {
250 for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++)
251 {
252 int cc = it.LinB(it.start_b(0),j,k);
253 int c[N];
254
255 for (int s = 0 ; s < N ; s++)
256 {
257 c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]);
258 }
259
260 for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += sizeof(size_t))
261 {
262 size_t cmd = *(size_t *)&mask[cc];
263
264 if (cmd != 0)
265 {
266 size_t xm[N];
267
268 for (int s = 0 ; s < N ; s++)
269 {
270 xm[s] = *(size_t *)&mask[c[s]];
271 }
272
273 size_t sum = 0;
274 for (int s = 0 ; s < N ; s++)
275 {
276 sum += xm[s];
277 }
278
279 *(size_t *)&mask_sum[cc] = sum;
280 }
281
282 cc += sizeof(size_t);
283 for (int s = 0 ; s < N ; s++)
284 {
285 c[s] += sizeof(size_t);
286 }
287 }
288 }
289 }
290
291 for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++)
292 {
293 for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++)
294 {
295 int cc = it.LinB(it.start_b(0),j,k);
296 int c[N];
297
298 int cd = it.LinB_off(it.start_b(0),j,k);
299
300 for (int s = 0 ; s < N ; s++)
301 {
302 c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]);
303 }
304
305 for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += Vc::Vector<prop_type>::Size)
306 {
307 Vc::Mask<prop_type> cmp;
308
309 for (int s = 0 ; s < Vc::Vector<prop_type>::Size ; s++)
310 {
311 cmp[s] = (mask[cc+s] == true && i+s < it.stop_b(0));
312 }
313
314 // we do only if exist the point
315 if (Vc::none_of(cmp) == false)
316 {
317 Vc::Mask<prop_type> surround;
318
319 Vc::Vector<prop_type> xs[N+1];
320
321 xs[0] = Vc::Vector<prop_type>(&block_bord_src[cc],Vc::Unaligned);
322
323 for (int s = 1 ; s < N+1 ; s++)
324 {
325 xs[s] = Vc::Vector<prop_type>(&block_bord_src[c[s-1]],Vc::Unaligned);
326 }
327
328 auto res = func(xs, &mask_sum[cc], args ...);
329
330 res.store(&block_bord_dst[cd],cmp,Vc::Aligned);
331 }
332
333 cc += Vc::Vector<prop_type>::Size;
334 for (int s = 0 ; s < N ; s++)
335 {
336 c[s] += Vc::Vector<prop_type>::Size;
337 }
338 cd += Vc::Vector<prop_type>::Size;
339 }
340 }
341 }
342
343 it.template storeBlock<prop_dst>(block_bord_dst);
344
345 ++it;
346 }
347 }
348
349 template<bool findNN, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT >
350 static void conv_cross(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
351 {
352 auto it = grid.template getBlockIterator<1>(start,stop);
353
354 auto & datas = grid.private_get_data();
355 auto & headers = grid.private_get_header_mask();
356
357 typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0;
358 typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1;
359 typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2;
360
361 typedef typename SparseGridType::chunking_type chunking;
362
363 typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src>>::type prop_type;
364
365 while (it.isNext())
366 {
367 // Load
368 long int offset_jump[6];
369
370 size_t cid = it.getChunkId();
371
372 auto chunk = datas.get(cid);
373 auto & mask = headers.get(cid);
374
375 bool exist;
376 grid_key_dx<3> p = grid.getChunkPos(cid) + grid_key_dx<3>({-1,0,0});
377 long int r = grid.getChunk(p,exist);
378 offset_jump[0] = (r-cid)*decltype(it)::sizeBlock;
379
380 p = grid.getChunkPos(cid) + grid_key_dx<3>({1,0,0});
381 r = grid.getChunk(p,exist);
382 offset_jump[1] = (r-cid)*decltype(it)::sizeBlock;
383
384 p = grid.getChunkPos(cid) + grid_key_dx<3>({0,-1,0});
385 r = grid.getChunk(p,exist);
386 offset_jump[2] = (r-cid)*decltype(it)::sizeBlock;
387
388 p = grid.getChunkPos(cid) + grid_key_dx<3>({0,1,0});
389 r = grid.getChunk(p,exist);
390 offset_jump[3] = (r-cid)*decltype(it)::sizeBlock;
391
392 p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,-1});
393 r = grid.getChunk(p,exist);
394 offset_jump[4] = (r-cid)*decltype(it)::sizeBlock;
395
396 p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,1});
397 r = grid.getChunk(p,exist);
398 offset_jump[5] = (r-cid)*decltype(it)::sizeBlock;
399
400 // Load offset jumps
401
402 // construct a row mask
403
404 long int s2 = 0;
405
406 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type sz;
407 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type sy;
408 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type sx;
409
410
411 bool mask_row[sx::value];
412
413 for (int k = 0 ; k < sx::value ; k++)
414 {
415 mask_row[k] = (k >= it.start(0) && k < it.stop(0))?true:false;
416 }
417
418 for (int v = it.start(2) ; v < it.stop(2) ; v++)
419 {
420 for (int j = it.start(1) ; j < it.stop(1) ; j++)
421 {
422 s2 = it.Lin(0,j,v);
423 for (int k = 0 ; k < sx::value ; k += Vc::Vector<prop_type>::Size)
424 {
425 // we do only id exist the point
426 if (*(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[s2] == 0) {s2 += Vc::Vector<prop_type>::Size; continue;}
427
434
436
437 Vc::Vector<prop_type> cmd(&chunk.template get<prop_src>()[s2]);
438
439 // Load x-1
440 long int sumxm = s2-1;
441 sumxm += (k==0)?offset_jump[0] + sx::value:0;
442
443 // Load x+1
444 long int sumxp = s2+Vc::Vector<prop_type>::Size;
445 sumxp += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[1] - sx::value:0;
446
447 long int sumym = (j == 0)?offset_jump[2] + (sy::value-1)*sx::value:-sx::value;
448 sumym += s2;
449 long int sumyp = (j == sy::value-1)?offset_jump[3] - (sy::value - 1)*sx::value:sx::value;
450 sumyp += s2;
451 long int sumzm = (v == 0)?offset_jump[4] + (sz::value-1)*sx::value*sy::value:-sx::value*sy::value;
452 sumzm += s2;
453 long int sumzp = (v == sz::value-1)?offset_jump[5] - (sz::value - 1)*sx::value*sy::value:sx::value*sy::value;
454 sumzp += s2;
455
456 if (Vc::Vector<prop_type>::Size == 2 || Vc::Vector<prop_type>::Size == 4 || Vc::Vector<prop_type>::Size == 8)
457 {
458 mxm.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[s2];
459 mxm.i = mxm.i << 8;
460 mxm.i |= (typename data_il<Vc::Vector<prop_type>::Size>::type)mask.mask[sumxm];
461
462 mxp.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[s2];
463 mxp.i = mxp.i >> 8;
464 mxp.i |= ((typename data_il<Vc::Vector<prop_type>::Size>::type)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - 1)*8;
465
466 mym.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[sumym];
467 myp.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[sumyp];
468
469 mzm.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[sumzm];
470 mzp.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[sumzp];
471 }
472 else
473 {
474 std::cout << __FILE__ << ":" << __LINE__ << " UNSUPPORTED" << std::endl;
475 }
476
477 cs.xm = cmd;
478 cs.xm = cs.xm.shifted(-1);
479 cs.xm[0] = chunk.template get<prop_src>()[sumxm];
480
481
482 cs.xp = cmd;
483 cs.xp = cs.xp.shifted(1);
484 cs.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src>()[sumxp];
485
486 // Load y and z direction
487
488 cs.ym.load(&chunk.template get<prop_src>()[sumym],Vc::Aligned);
489 cs.yp.load(&chunk.template get<prop_src>()[sumyp],Vc::Aligned);
490 cs.zm.load(&chunk.template get<prop_src>()[sumzm],Vc::Aligned);
491 cs.zp.load(&chunk.template get<prop_src>()[sumzp],Vc::Aligned);
492
493 // Calculate
494
496 tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i;
497
498 Vc::Vector<prop_type> res = func(cmd,cs,tot_m.uc,args ... );
499
500 Vc::Mask<prop_type> m(&mask_row[k]);
501
502 res.store(&chunk.template get<prop_dst>()[s2],m,Vc::Aligned);
503
504 s2 += Vc::Vector<prop_type>::Size;
505 }
506 }
507 }
508
509 ++it;
510 }
511 }
512
513
514 template<bool findNN, typename NNType, unsigned int prop_src1, unsigned int prop_src2,
515 unsigned int prop_dst1, unsigned int prop_dst2,
516 unsigned int stencil_size , unsigned int N,
517 typename SparseGridType, typename lambda_f, typename ... ArgsT >
518 static void conv2(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
519 {
520 auto it = grid.template getBlockIterator<stencil_size>(start,stop);
521
522 typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src1>>::type prop_type;
523
524 unsigned char mask[decltype(it)::sizeBlockBord];
525 unsigned char mask_sum[decltype(it)::sizeBlockBord];
526 __attribute__ ((aligned (64))) prop_type block_bord_src1[decltype(it)::sizeBlockBord];
527 __attribute__ ((aligned (64))) prop_type block_bord_dst1[decltype(it)::sizeBlock+16];
528 __attribute__ ((aligned (64))) prop_type block_bord_src2[decltype(it)::sizeBlockBord];
529 __attribute__ ((aligned (64))) prop_type block_bord_dst2[decltype(it)::sizeBlock+16];
530
531 typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0;
532 typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1;
533 typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2;
534
535 while (it.isNext())
536 {
537 it.template loadBlockBorder<prop_src1,NNType,findNN>(block_bord_src1,mask);
538 it.template loadBlockBorder<prop_src2,NNType,findNN>(block_bord_src2,mask);
539
540 // Sum the mask
541 for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++)
542 {
543 for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++)
544 {
545 int cc = it.LinB(it.start_b(0),j,k);
546 int c[N];
547
548 for (int s = 0 ; s < N ; s++)
549 {
550 c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]);
551 }
552
553 for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += sizeof(size_t))
554 {
555 size_t cmd = *(size_t *)&mask[cc];
556
557 if (cmd == 0) {continue;}
558
559
560 size_t xm[N];
561
562 for (int s = 0 ; s < N ; s++)
563 {
564 xm[s] = *(size_t *)&mask[c[s]];
565 }
566
567 size_t sum = 0;
568 for (int s = 0 ; s < N ; s++)
569 {
570 sum += xm[s];
571 }
572
573 *(size_t *)&mask_sum[cc] = sum;
574
575 cc += sizeof(size_t);
576 for (int s = 0 ; s < N ; s++)
577 {
578 c[s] += sizeof(size_t);
579 }
580 }
581 }
582 }
583
584 for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++)
585 {
586 for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++)
587 {
588 int cc = it.LinB(it.start_b(0),j,k);
589 int c[N];
590
591 int cd = it.LinB_off(it.start_b(0),j,k);
592
593 for (int s = 0 ; s < N ; s++)
594 {
595 c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]);
596 }
597
598 for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += Vc::Vector<prop_type>::Size)
599 {
600 Vc::Mask<prop_type> cmp;
601
602 for (int s = 0 ; s < Vc::Vector<prop_type>::Size ; s++)
603 {
604 cmp[s] = (mask[cc+s] == true);
605 }
606
607 // we do only id exist the point
608 if (Vc::none_of(cmp) == true) {continue;}
609
610 Vc::Mask<prop_type> surround;
611
612 Vc::Vector<prop_type> xs1[N+1];
613 Vc::Vector<prop_type> xs2[N+1];
614
615 xs1[0] = Vc::Vector<prop_type>(&block_bord_src1[cc],Vc::Unaligned);
616 xs2[0] = Vc::Vector<prop_type>(&block_bord_src2[cc],Vc::Unaligned);
617
618 for (int s = 1 ; s < N+1 ; s++)
619 {
620 xs1[s] = Vc::Vector<prop_type>(&block_bord_src1[c[s-1]],Vc::Unaligned);
621 xs2[s] = Vc::Vector<prop_type>(&block_bord_src2[c[s-1]],Vc::Unaligned);
622 }
623
624 Vc::Vector<prop_type> vo1;
625 Vc::Vector<prop_type> vo2;
626
627 func(vo1, vo2, xs1, xs2, &mask_sum[cc], args ...);
628
629 vo1.store(&block_bord_dst1[cd],cmp,Vc::Unaligned);
630 vo2.store(&block_bord_dst2[cd],cmp,Vc::Unaligned);
631
632 cc += Vc::Vector<prop_type>::Size;
633 for (int s = 0 ; s < N ; s++)
634 {
635 c[s] += Vc::Vector<prop_type>::Size;
636 }
637 cd += Vc::Vector<prop_type>::Size;
638 }
639 }
640 }
641
642 it.template storeBlock<prop_dst1>(block_bord_dst1);
643 it.template storeBlock<prop_dst2>(block_bord_dst2);
644
645 ++it;
646 }
647 }
648
649 template<bool findNN, unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT >
650 static void conv_cross2(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
651 {
652 auto it = grid.template getBlockIterator<stencil_size>(start,stop);
653
654 auto & datas = grid.private_get_data();
655 auto & headers = grid.private_get_header_mask();
656
657 typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0;
658 typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1;
659 typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2;
660
661 typedef typename SparseGridType::chunking_type chunking;
662
663 typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src1>>::type prop_type;
664
665 while (it.isNext())
666 {
667 // Load
668 long int offset_jump[6];
669
670 size_t cid = it.getChunkId();
671
672 auto chunk = datas.get(cid);
673 auto & mask = headers.get(cid);
674
675 bool exist;
676 grid_key_dx<3> p = grid.getChunkPos(cid) + grid_key_dx<3>({-1,0,0});
677 long int r = grid.getChunk(p,exist);
678 offset_jump[0] = (r-cid)*decltype(it)::sizeBlock;
679
680 p = grid.getChunkPos(cid) + grid_key_dx<3>({1,0,0});
681 r = grid.getChunk(p,exist);
682 offset_jump[1] = (r-cid)*decltype(it)::sizeBlock;
683
684 p = grid.getChunkPos(cid) + grid_key_dx<3>({0,-1,0});
685 r = grid.getChunk(p,exist);
686 offset_jump[2] = (r-cid)*decltype(it)::sizeBlock;
687
688 p = grid.getChunkPos(cid) + grid_key_dx<3>({0,1,0});
689 r = grid.getChunk(p,exist);
690 offset_jump[3] = (r-cid)*decltype(it)::sizeBlock;
691
692 p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,-1});
693 r = grid.getChunk(p,exist);
694 offset_jump[4] = (r-cid)*decltype(it)::sizeBlock;
695
696 p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,1});
697 r = grid.getChunk(p,exist);
698 offset_jump[5] = (r-cid)*decltype(it)::sizeBlock;
699
700 // Load offset jumps
701
702 // construct a row mask
703
704 long int s2 = 0;
705
706 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type sz;
707 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type sy;
708 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type sx;
709
710
711 bool mask_row[sx::value];
712
713 for (int k = 0 ; k < sx::value ; k++)
714 {
715 mask_row[k] = (k >= it.start(0) && k < it.stop(0))?true:false;
716 }
717
718 for (int v = it.start(2) ; v < it.stop(2) ; v++)
719 {
720 for (int j = it.start(1) ; j < it.stop(1) ; j++)
721 {
722 s2 = it.Lin(0,j,v);
723 for (int k = 0 ; k < sx::value ; k += Vc::Vector<prop_type>::Size)
724 {
725 // we do only id exist the point
726 if (*(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[s2] == 0) {s2 += Vc::Vector<prop_type>::Size; continue;}
727
728 data_il<4> mxm;
729 data_il<4> mxp;
730 data_il<4> mym;
731 data_il<4> myp;
732 data_il<4> mzm;
733 data_il<4> mzp;
734
737
738 Vc::Vector<prop_type> cmd1(&chunk.template get<prop_src1>()[s2]);
739 Vc::Vector<prop_type> cmd2(&chunk.template get<prop_src2>()[s2]);
740
741 // Load x-1
742 long int sumxm = s2-1;
743 sumxm += (k==0)?offset_jump[0] + sx::value:0;
744
745 // Load x+1
746 long int sumxp = s2+Vc::Vector<prop_type>::Size;
747 sumxp += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[1] - sx::value:0;
748
749 long int sumym = (j == 0)?offset_jump[2] + (sy::value-1)*sx::value:-sx::value;
750 sumym += s2;
751 long int sumyp = (j == sy::value-1)?offset_jump[3] - (sy::value - 1)*sx::value:sx::value;
752 sumyp += s2;
753 long int sumzm = (v == 0)?offset_jump[4] + (sz::value-1)*sx::value*sy::value:-sx::value*sy::value;
754 sumzm += s2;
755 long int sumzp = (v == sz::value-1)?offset_jump[5] - (sz::value - 1)*sx::value*sy::value:sx::value*sy::value;
756 sumzp += s2;
757
758 if (Vc::Vector<prop_type>::Size == 1 || Vc::Vector<prop_type>::Size == 2 || Vc::Vector<prop_type>::Size == 4 || Vc::Vector<prop_type>::Size == 8)
759 {
760 mxm.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[s2];
761 mxm.i = mxm.i << 8;
762 mxm.i |= (typename data_il<Vc::Vector<prop_type>::Size>::type)mask.mask[sumxm];
763
764 mxp.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[s2];
765 mxp.i = mxp.i >> 8;
766 mxp.i |= ((typename data_il<Vc::Vector<prop_type>::Size>::type)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - 1)*8;
767
768 mym.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[sumym];
769 myp.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[sumyp];
770
771 mzm.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[sumzm];
772 mzp.i = *(typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[sumzp];
773 }
774
775 cs1.xm = cmd1;
776 cs1.xm = cs1.xm.shifted(-1);
777 cs1.xm[0] = chunk.template get<prop_src1>()[sumxm];
778
779 cs2.xm = cmd2;
780 cs2.xm = cs2.xm.shifted(-1);
781 cs2.xm[0] = chunk.template get<prop_src2>()[sumxm];
782
783 cs1.xp = cmd1;
784 cs1.xp = cs1.xp.shifted(1);
785 cs1.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src1>()[sumxp];
786
787 cs2.xp = cmd2;
788 cs2.xp = cs2.xp.shifted(1);
789 cs2.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src2>()[sumxp];
790
791 // Load y and z direction
792
793 cs1.ym.load(&chunk.template get<prop_src1>()[sumym],Vc::Aligned);
794 cs1.yp.load(&chunk.template get<prop_src1>()[sumyp],Vc::Aligned);
795 cs1.zm.load(&chunk.template get<prop_src1>()[sumzm],Vc::Aligned);
796 cs1.zp.load(&chunk.template get<prop_src1>()[sumzp],Vc::Aligned);
797
798 cs2.ym.load(&chunk.template get<prop_src2>()[sumym],Vc::Aligned);
799 cs2.yp.load(&chunk.template get<prop_src2>()[sumyp],Vc::Aligned);
800 cs2.zm.load(&chunk.template get<prop_src2>()[sumzm],Vc::Aligned);
801 cs2.zp.load(&chunk.template get<prop_src2>()[sumzp],Vc::Aligned);
802
803 // Calculate
804
805 data_il<4> tot_m;
806 tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i;
807
808 Vc::Vector<prop_type> res1;
809 Vc::Vector<prop_type> res2;
810
811 func(res1,res2,cmd1,cmd2,cs1,cs2,tot_m.uc,args ... );
812
813 Vc::Mask<prop_type> m(&mask_row[k]);
814
815 res1.store(&chunk.template get<prop_dst1>()[s2],m,Vc::Aligned);
816 res2.store(&chunk.template get<prop_dst2>()[s2],m,Vc::Aligned);
817
818 s2 += Vc::Vector<prop_type>::Size;
819 }
820 }
821 }
822
823 ++it;
824 }
825 }
826
827 template<bool findNN, unsigned int stencil_size, typename prop_type, typename SparseGridType, typename lambda_f, typename ... ArgsT >
828 static void conv_cross_ids(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args)
829 {
830 auto it = grid.template getBlockIterator<stencil_size>(start,stop);
831
832 auto & datas = grid.private_get_data();
833 auto & headers = grid.private_get_header_mask();
834
835 typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0;
836 typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1;
837 typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2;
838
839 typedef typename SparseGridType::chunking_type chunking;
840
841 while (it.isNext())
842 {
843 // Load
844 long int offset_jump[6];
845
846 size_t cid = it.getChunkId();
847
848 auto chunk = datas.get(cid);
849 auto & mask = headers.get(cid);
850
851 bool exist;
852 grid_key_dx<3> p = grid.getChunkPos(cid) + grid_key_dx<3>({-1,0,0});
853 long int r = grid.getChunk(p,exist);
854 offset_jump[0] = (r-cid)*decltype(it)::sizeBlock;
855
856 p = grid.getChunkPos(cid) + grid_key_dx<3>({1,0,0});
857 r = grid.getChunk(p,exist);
858 offset_jump[1] = (r-cid)*decltype(it)::sizeBlock;
859
860 p = grid.getChunkPos(cid) + grid_key_dx<3>({0,-1,0});
861 r = grid.getChunk(p,exist);
862 offset_jump[2] = (r-cid)*decltype(it)::sizeBlock;
863
864 p = grid.getChunkPos(cid) + grid_key_dx<3>({0,1,0});
865 r = grid.getChunk(p,exist);
866 offset_jump[3] = (r-cid)*decltype(it)::sizeBlock;
867
868 p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,-1});
869 r = grid.getChunk(p,exist);
870 offset_jump[4] = (r-cid)*decltype(it)::sizeBlock;
871
872 p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,1});
873 r = grid.getChunk(p,exist);
874 offset_jump[5] = (r-cid)*decltype(it)::sizeBlock;
875
876 // Load offset jumps
877
878 // construct a row mask
879
880 long int s2 = 0;
881
882 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type sz;
883 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type sy;
884 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type sx;
885
887
888 for (int k = 0 ; k < sx::value ; k++)
889 {
890 ids.mask_row[k] = (k >= it.start(0) && k < it.stop(0))?true:false;
891 }
892
893 for (int v = it.start(2) ; v < it.stop(2) ; v++)
894 {
895 for (int j = it.start(1) ; j < it.stop(1) ; j++)
896 {
897 s2 = it.Lin(0,j,v);
898 for (int k = 0 ; k < sx::value ; k += Vc::Vector<prop_type>::Size)
899 {
900 // we do only id exist the point
901 if (*(int *)&mask.mask[s2] == 0) {s2 += Vc::Vector<prop_type>::Size; continue;}
902
903 data_il<4> mxm;
904 data_il<4> mxp;
905 data_il<4> mym;
906 data_il<4> myp;
907 data_il<4> mzm;
908 data_il<4> mzp;
909
910 ids.k = k;
911
912 // Load x-1
913 ids.sumdm[0] = s2-1;
914 ids.sumdm[0] += (k==0)?offset_jump[0] + sx::value:0;
915
916 // Load x+1
917 ids.sumdp[0] = s2+Vc::Vector<prop_type>::Size;
918 ids.sumdp[0] += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[1] - sx::value:0;
919
920 ids.sumdm[1] = (j == 0)?offset_jump[2] + (sy::value-1)*sx::value:-sx::value;
921 ids.sumdm[1] += s2;
922 ids.sumdp[1] = (j == sy::value-1)?offset_jump[3] - (sy::value - 1)*sx::value:sx::value;
923 ids.sumdp[1] += s2;
924 ids.sumdm[2] = (v == 0)?offset_jump[4] + (sz::value-1)*sx::value*sy::value:-sx::value*sy::value;
925 ids.sumdm[2] += s2;
926 ids.sumdp[2] = (v == sz::value-1)?offset_jump[5] - (sz::value - 1)*sx::value*sy::value:sx::value*sy::value;
927 ids.sumdp[2] += s2;
928
929 ids.s2 = s2;
930
931 if (Vc::Vector<prop_type>::Size == 2)
932 {
933 mxm.i = *(short int *)&mask.mask[s2];
934 mxm.i = mxm.i << 8;
935 mxm.i |= (short int)mask.mask[ids.sumdm[0]];
936
937 mxp.i = *(short int *)&mask.mask[s2];
938 mxp.i = mxp.i >> 8;
939 mxp.i |= ((short int)mask.mask[ids.sumdp[0]]) << (Vc::Vector<prop_type>::Size - 1)*8;
940
941 mym.i = *(short int *)&mask.mask[ids.sumdm[1]];
942 myp.i = *(short int *)&mask.mask[ids.sumdp[1]];
943
944 mzm.i = *(short int *)&mask.mask[ids.sumdm[2]];
945 mzp.i = *(short int *)&mask.mask[ids.sumdp[2]];
946 }
947 else if (Vc::Vector<prop_type>::Size == 4)
948 {
949 mxm.i = *(int *)&mask.mask[s2];
950 mxm.i = mxm.i << 8;
951 mxm.i |= (int)mask.mask[ids.sumdm[0]];
952
953 mxp.i = *(int *)&mask.mask[s2];
954 mxp.i = mxp.i >> 8;
955 mxp.i |= ((int)mask.mask[ids.sumdp[0]]) << (Vc::Vector<prop_type>::Size - 1)*8;
956
957 mym.i = *(int *)&mask.mask[ids.sumdm[1]];
958 myp.i = *(int *)&mask.mask[ids.sumdp[1]];
959
960 mzm.i = *(int *)&mask.mask[ids.sumdm[2]];
961 mzp.i = *(int *)&mask.mask[ids.sumdp[2]];
962 }
963 else
964 {
965 std::cout << __FILE__ << ":" << __LINE__ << " UNSUPPORTED" << std::endl;
966 }
967
968 // Calculate
969
970 data_il<4> tot_m;
971 tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i;
972
973 func(chunk,ids,tot_m.uc,args ... );
974
975 s2 += Vc::Vector<prop_type>::Size;
976 }
977 }
978 }
979
980 ++it;
981 }
982 }
983
984};
985
986#endif
987
988
989#endif /* SPARSEGRID_CONV_OPT_HPP_ */
grid_key_dx is the key to access any element in the grid
Definition grid_key.hpp:19
Get the neighborhood iterator based on type.
It model an expression expr1 + ... exprn.
Definition sum.hpp:93