8#ifndef SPARSEGRID_CONV_OPT_HPP_
9#define SPARSEGRID_CONV_OPT_HPP_
11template<
unsigned int l>
19 typedef long int type;
37 typedef short int type;
53template<
unsigned int dim,
unsigned int sz>
66template<
unsigned int dim>
69 template<
unsigned int prop_src,
unsigned int prop_dst,
unsigned int stencil_size ,
unsigned int N,
typename SparseGridType,
typename lambda_f,
typename ... ArgsT >
73 std::cout << __FILE__ <<
":" << __LINE__ <<
" error conv operation not implemented for this dimension " << std::endl;
75 std::cout << __FILE__ <<
":" << __LINE__ <<
" error conv is unsupported when compiled on NVCC " << std::endl;
79 template<
bool findNN,
unsigned int prop_src,
unsigned int prop_dst,
unsigned int stencil_size,
typename SparseGridType,
typename lambda_f,
typename ... ArgsT >
83 std::cout << __FILE__ <<
":" << __LINE__ <<
" error conv_cross operation not implemented for this dimension " << std::endl;
85 std::cout << __FILE__ <<
":" << __LINE__ <<
" error conv_cross is unsupported when compiled on NVCC " << std::endl;
89 template<
bool findNN,
typename NNType,
unsigned int prop_src1,
unsigned int prop_src2,
90 unsigned int prop_dst1,
unsigned int prop_dst2,
91 unsigned int stencil_size ,
unsigned int N,
92 typename SparseGridType,
typename lambda_f,
typename ... ArgsT >
96 std::cout << __FILE__ <<
":" << __LINE__ <<
" error conv2 operation not implemented for this dimension " << std::endl;
98 std::cout << __FILE__ <<
":" << __LINE__ <<
" error conv2 is unsupported when compiled on NVCC " << std::endl;
102 template<
bool findNN,
unsigned int prop_src1,
unsigned int prop_src2,
unsigned int prop_dst1,
unsigned int prop_dst2,
unsigned int stencil_size,
typename SparseGridType,
typename lambda_f,
typename ... ArgsT >
106 std::cout << __FILE__ <<
":" << __LINE__ <<
" error conv_cross2 operation not implemented for this dimension " << std::endl;
108 std::cout << __FILE__ <<
":" << __LINE__ <<
" error conv_cross2 is unsupported when compiled on NVCC " << std::endl;
113#if !defined(__NVCC__) || defined(CUDA_ON_CPU) || defined(__HIP__)
116template<
unsigned int dir,
int p,
unsigned int prop_src1,
typename chunk_type,
typename vect_type,
typename ids_type>
117void load_crs(vect_type & cs1, chunk_type & chunk, ids_type & ids)
119 if (dir == 0 && p < 0)
121 Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[ids.s2]);
124 cs1 = cs1.shifted(-1);
125 cs1[0] = chunk.template get<prop_src1>()[ids.sumdm[dir]];
127 else if (dir == 0 && p > 0)
129 Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[ids.s2]);
132 cs1 = cs1.shifted(1);
133 cs1[Vc::Vector<typename vect_type::EntryType>::Size - 1] = chunk.template get<prop_src1>()[ids.sumdp[0]];
137 cs1.load(&chunk.template get<prop_src1>()[ids.sumdm[dir]],Vc::Aligned);
141 cs1.load(&chunk.template get<prop_src1>()[ids.sumdp[dir]],Vc::Aligned);
145 Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[ids.s2]);
151template<
unsigned int prop_dst1,
typename chunk_type,
typename vect_type,
typename ids_type>
152void store_crs(chunk_type & chunk, vect_type & res, ids_type & ids)
154 Vc::Mask<typename vect_type::EntryType> m(&ids.mask_row[ids.k]);
156 res.store(&chunk.template get<prop_dst1>()[ids.s2],m,Vc::Aligned);
159template<
unsigned int prop_dst1,
unsigned int comp,
typename chunk_type,
typename vect_type,
typename ids_type>
160void store_crs_v(chunk_type & chunk, vect_type & res, ids_type & ids)
162 Vc::Mask<typename vect_type::EntryType> m(&ids.mask_row[ids.k]);
164 res.store(&chunk.template get<prop_dst1>()[comp][ids.s2],m,Vc::Aligned);
167template<
unsigned int dir,
int p,
unsigned int comp,
unsigned int prop_src1,
typename chunk_type,
typename vect_type,
typename ids_type>
168void load_crs_v(vect_type & cs1, chunk_type & chunk, ids_type & ids)
170 if (dir == 0 && p < 0)
172 Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[comp][ids.s2]);
175 cs1 = cs1.shifted(-1);
176 cs1[0] = chunk.template get<prop_src1>()[comp][ids.sumdm[dir]];
178 else if (dir == 0 && p > 0)
180 Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[comp][ids.s2]);
183 cs1 = cs1.shifted(1);
184 cs1[Vc::Vector<typename vect_type::EntryType>::Size - 1] = chunk.template get<prop_src1>()[comp][ids.sumdp[dir]];
188 cs1.load(&chunk.template get<prop_src1>()[comp][ids.sumdm[dir]],Vc::Aligned);
192 cs1.load(&chunk.template get<prop_src1>()[comp][ids.sumdp[dir]],Vc::Aligned);
196 Vc::Vector<typename vect_type::EntryType> cmd1(&chunk.template get<prop_src1>()[comp][ids.s2]);
203template<
typename prop_type>
206 Vc::Vector<prop_type> xm;
207 Vc::Vector<prop_type> xp;
208 Vc::Vector<prop_type> ym;
209 Vc::Vector<prop_type> yp;
210 Vc::Vector<prop_type> zm;
211 Vc::Vector<prop_type> zp;
217 template<
bool findNN,
typename NNtype,
unsigned int prop_src,
unsigned int prop_dst,
unsigned int stencil_size ,
unsigned int N,
typename SparseGridType,
typename lambda_f,
typename ... ArgsT >
220 auto it =
grid.template getBlockIterator<stencil_size>(start,stop);
222 typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src>>::type prop_type;
224 unsigned char mask[
decltype(it)::sizeBlockBord];
225 unsigned char mask_sum[
decltype(it)::sizeBlockBord];
226 unsigned char mask_unused[
decltype(it)::sizeBlock];
227 __attribute__ ((aligned (32))) prop_type block_bord_src[
decltype(it)::sizeBlockBord];
228 __attribute__ ((aligned (32))) prop_type block_bord_dst[
decltype(it)::sizeBlock];
230 typedef typename boost::mpl::at<
typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0;
231 typedef typename boost::mpl::at<
typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1;
232 typedef typename boost::mpl::at<
typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2;
236 it.template loadBlockBorder<prop_src,NNtype,findNN>(block_bord_src,mask);
238 if (it.start_b(2) != stencil_size || it.start_b(1) != stencil_size || it.start_b(0) != stencil_size ||
239 it.stop_b(2) != sz2::value+stencil_size || it.stop_b(1) != sz1::value+stencil_size || it.stop_b(0) != sz0::value+stencil_size)
241 auto & header_mask =
grid.private_get_header_mask();
242 auto & header_inf =
grid.private_get_header_inf();
244 loadBlock_impl<prop_dst,0,3,
typename decltype(it)::vector_blocks_exts_type,
typename decltype(it)::vector_ext_type>::template loadBlock<decltype(it)::sizeBlock>(block_bord_dst,
grid,it.getChunkId(),mask_unused);
248 for (
int k = it.start_b(2) ; k < it.stop_b(2) ; k++)
250 for (
int j = it.start_b(1) ; j < it.stop_b(1) ; j++)
252 int cc = it.LinB(it.start_b(0),j,k);
255 for (
int s = 0 ; s < N ; s++)
257 c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]);
260 for (
int i = it.start_b(0) ; i < it.stop_b(0) ; i +=
sizeof(size_t))
262 size_t cmd = *(
size_t *)&mask[cc];
268 for (
int s = 0 ; s < N ; s++)
270 xm[s] = *(
size_t *)&mask[c[s]];
274 for (
int s = 0 ; s < N ; s++)
279 *(
size_t *)&mask_sum[cc] =
sum;
282 cc +=
sizeof(size_t);
283 for (
int s = 0 ; s < N ; s++)
285 c[s] +=
sizeof(size_t);
291 for (
int k = it.start_b(2) ; k < it.stop_b(2) ; k++)
293 for (
int j = it.start_b(1) ; j < it.stop_b(1) ; j++)
295 int cc = it.LinB(it.start_b(0),j,k);
298 int cd = it.LinB_off(it.start_b(0),j,k);
300 for (
int s = 0 ; s < N ; s++)
302 c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]);
305 for (
int i = it.start_b(0) ; i < it.stop_b(0) ; i += Vc::Vector<prop_type>::Size)
307 Vc::Mask<prop_type> cmp;
309 for (
int s = 0 ; s < Vc::Vector<prop_type>::Size ; s++)
311 cmp[s] = (mask[cc+s] ==
true && i+s < it.stop_b(0));
315 if (Vc::none_of(cmp) ==
false)
317 Vc::Mask<prop_type> surround;
319 Vc::Vector<prop_type> xs[N+1];
321 xs[0] = Vc::Vector<prop_type>(&block_bord_src[cc],Vc::Unaligned);
323 for (
int s = 1 ; s < N+1 ; s++)
325 xs[s] = Vc::Vector<prop_type>(&block_bord_src[c[s-1]],Vc::Unaligned);
328 auto res = func(xs, &mask_sum[cc], args ...);
330 res.store(&block_bord_dst[cd],cmp,Vc::Aligned);
333 cc += Vc::Vector<prop_type>::Size;
334 for (
int s = 0 ; s < N ; s++)
336 c[s] += Vc::Vector<prop_type>::Size;
338 cd += Vc::Vector<prop_type>::Size;
343 it.template storeBlock<prop_dst>(block_bord_dst);
349 template<
bool findNN,
unsigned int prop_src,
unsigned int prop_dst,
unsigned int stencil_size,
typename SparseGridType,
typename lambda_f,
typename ... ArgsT >
352 auto it =
grid.template getBlockIterator<1>(start,stop);
354 auto & datas =
grid.private_get_data();
355 auto & headers =
grid.private_get_header_mask();
357 typedef typename boost::mpl::at<
typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0;
358 typedef typename boost::mpl::at<
typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1;
359 typedef typename boost::mpl::at<
typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2;
361 typedef typename SparseGridType::chunking_type chunking;
363 typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src>>::type prop_type;
368 long int offset_jump[6];
370 size_t cid = it.getChunkId();
372 auto chunk = datas.get(cid);
373 auto & mask = headers.get(cid);
377 long int r =
grid.getChunk(p,exist);
378 offset_jump[0] = (r-cid)*
decltype(it)::sizeBlock;
381 r =
grid.getChunk(p,exist);
382 offset_jump[1] = (r-cid)*
decltype(it)::sizeBlock;
385 r =
grid.getChunk(p,exist);
386 offset_jump[2] = (r-cid)*
decltype(it)::sizeBlock;
389 r =
grid.getChunk(p,exist);
390 offset_jump[3] = (r-cid)*
decltype(it)::sizeBlock;
393 r =
grid.getChunk(p,exist);
394 offset_jump[4] = (r-cid)*
decltype(it)::sizeBlock;
397 r =
grid.getChunk(p,exist);
398 offset_jump[5] = (r-cid)*
decltype(it)::sizeBlock;
406 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type sz;
407 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type sy;
408 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type sx;
411 bool mask_row[sx::value];
413 for (
int k = 0 ; k < sx::value ; k++)
415 mask_row[k] = (k >= it.start(0) && k < it.stop(0))?
true:
false;
418 for (
int v = it.start(2) ; v < it.stop(2) ; v++)
420 for (
int j = it.start(1) ; j < it.stop(1) ; j++)
423 for (
int k = 0 ; k < sx::value ; k += Vc::Vector<prop_type>::Size)
426 if (*(
typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[s2] == 0) {s2 += Vc::Vector<prop_type>::Size;
continue;}
437 Vc::Vector<prop_type> cmd(&chunk.template get<prop_src>()[s2]);
440 long int sumxm = s2-1;
441 sumxm += (k==0)?offset_jump[0] + sx::value:0;
444 long int sumxp = s2+Vc::Vector<prop_type>::Size;
445 sumxp += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[1] - sx::value:0;
447 long int sumym = (j == 0)?offset_jump[2] + (sy::value-1)*sx::value:-sx::value;
449 long int sumyp = (j == sy::value-1)?offset_jump[3] - (sy::value - 1)*sx::value:sx::value;
451 long int sumzm = (v == 0)?offset_jump[4] + (sz::value-1)*sx::value*sy::value:-sx::value*sy::value;
453 long int sumzp = (v == sz::value-1)?offset_jump[5] - (sz::value - 1)*sx::value*sy::value:sx::value*sy::value;
456 if (Vc::Vector<prop_type>::Size == 2 || Vc::Vector<prop_type>::Size == 4 || Vc::Vector<prop_type>::Size == 8)
474 std::cout << __FILE__ <<
":" << __LINE__ <<
" UNSUPPORTED" << std::endl;
478 cs.xm = cs.xm.shifted(-1);
479 cs.xm[0] = chunk.template get<prop_src>()[sumxm];
483 cs.xp = cs.xp.shifted(1);
484 cs.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src>()[sumxp];
488 cs.ym.load(&chunk.template get<prop_src>()[sumym],Vc::Aligned);
489 cs.yp.load(&chunk.template get<prop_src>()[sumyp],Vc::Aligned);
490 cs.zm.load(&chunk.template get<prop_src>()[sumzm],Vc::Aligned);
491 cs.zp.load(&chunk.template get<prop_src>()[sumzp],Vc::Aligned);
496 tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i;
498 Vc::Vector<prop_type> res = func(cmd,cs,tot_m.uc,args ... );
500 Vc::Mask<prop_type> m(&mask_row[k]);
502 res.store(&chunk.template get<prop_dst>()[s2],m,Vc::Aligned);
504 s2 += Vc::Vector<prop_type>::Size;
514 template<
bool findNN,
typename NNType,
unsigned int prop_src1,
unsigned int prop_src2,
515 unsigned int prop_dst1,
unsigned int prop_dst2,
516 unsigned int stencil_size ,
unsigned int N,
517 typename SparseGridType,
typename lambda_f,
typename ... ArgsT >
520 auto it =
grid.template getBlockIterator<stencil_size>(start,stop);
522 typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src1>>::type prop_type;
524 unsigned char mask[
decltype(it)::sizeBlockBord];
525 unsigned char mask_sum[
decltype(it)::sizeBlockBord];
526 __attribute__ ((aligned (64))) prop_type block_bord_src1[
decltype(it)::sizeBlockBord];
527 __attribute__ ((aligned (64))) prop_type block_bord_dst1[
decltype(it)::sizeBlock+16];
528 __attribute__ ((aligned (64))) prop_type block_bord_src2[
decltype(it)::sizeBlockBord];
529 __attribute__ ((aligned (64))) prop_type block_bord_dst2[
decltype(it)::sizeBlock+16];
531 typedef typename boost::mpl::at<
typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0;
532 typedef typename boost::mpl::at<
typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1;
533 typedef typename boost::mpl::at<
typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2;
537 it.template loadBlockBorder<prop_src1,NNType,findNN>(block_bord_src1,mask);
538 it.template loadBlockBorder<prop_src2,NNType,findNN>(block_bord_src2,mask);
541 for (
int k = it.start_b(2) ; k < it.stop_b(2) ; k++)
543 for (
int j = it.start_b(1) ; j < it.stop_b(1) ; j++)
545 int cc = it.LinB(it.start_b(0),j,k);
548 for (
int s = 0 ; s < N ; s++)
550 c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]);
553 for (
int i = it.start_b(0) ; i < it.stop_b(0) ; i +=
sizeof(size_t))
555 size_t cmd = *(
size_t *)&mask[cc];
557 if (cmd == 0) {
continue;}
562 for (
int s = 0 ; s < N ; s++)
564 xm[s] = *(
size_t *)&mask[c[s]];
568 for (
int s = 0 ; s < N ; s++)
573 *(
size_t *)&mask_sum[cc] =
sum;
575 cc +=
sizeof(size_t);
576 for (
int s = 0 ; s < N ; s++)
578 c[s] +=
sizeof(size_t);
584 for (
int k = it.start_b(2) ; k < it.stop_b(2) ; k++)
586 for (
int j = it.start_b(1) ; j < it.stop_b(1) ; j++)
588 int cc = it.LinB(it.start_b(0),j,k);
591 int cd = it.LinB_off(it.start_b(0),j,k);
593 for (
int s = 0 ; s < N ; s++)
595 c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]);
598 for (
int i = it.start_b(0) ; i < it.stop_b(0) ; i += Vc::Vector<prop_type>::Size)
600 Vc::Mask<prop_type> cmp;
602 for (
int s = 0 ; s < Vc::Vector<prop_type>::Size ; s++)
604 cmp[s] = (mask[cc+s] ==
true);
608 if (Vc::none_of(cmp) ==
true) {
continue;}
610 Vc::Mask<prop_type> surround;
612 Vc::Vector<prop_type> xs1[N+1];
613 Vc::Vector<prop_type> xs2[N+1];
615 xs1[0] = Vc::Vector<prop_type>(&block_bord_src1[cc],Vc::Unaligned);
616 xs2[0] = Vc::Vector<prop_type>(&block_bord_src2[cc],Vc::Unaligned);
618 for (
int s = 1 ; s < N+1 ; s++)
620 xs1[s] = Vc::Vector<prop_type>(&block_bord_src1[c[s-1]],Vc::Unaligned);
621 xs2[s] = Vc::Vector<prop_type>(&block_bord_src2[c[s-1]],Vc::Unaligned);
624 Vc::Vector<prop_type> vo1;
625 Vc::Vector<prop_type> vo2;
627 func(vo1, vo2, xs1, xs2, &mask_sum[cc], args ...);
629 vo1.store(&block_bord_dst1[cd],cmp,Vc::Unaligned);
630 vo2.store(&block_bord_dst2[cd],cmp,Vc::Unaligned);
632 cc += Vc::Vector<prop_type>::Size;
633 for (
int s = 0 ; s < N ; s++)
635 c[s] += Vc::Vector<prop_type>::Size;
637 cd += Vc::Vector<prop_type>::Size;
642 it.template storeBlock<prop_dst1>(block_bord_dst1);
643 it.template storeBlock<prop_dst2>(block_bord_dst2);
649 template<
bool findNN,
unsigned int prop_src1,
unsigned int prop_src2,
unsigned int prop_dst1,
unsigned int prop_dst2,
unsigned int stencil_size,
typename SparseGridType,
typename lambda_f,
typename ... ArgsT >
652 auto it =
grid.template getBlockIterator<stencil_size>(start,stop);
654 auto & datas =
grid.private_get_data();
655 auto & headers =
grid.private_get_header_mask();
657 typedef typename boost::mpl::at<
typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0;
658 typedef typename boost::mpl::at<
typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1;
659 typedef typename boost::mpl::at<
typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2;
661 typedef typename SparseGridType::chunking_type chunking;
663 typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src1>>::type prop_type;
668 long int offset_jump[6];
670 size_t cid = it.getChunkId();
672 auto chunk = datas.get(cid);
673 auto & mask = headers.get(cid);
677 long int r =
grid.getChunk(p,exist);
678 offset_jump[0] = (r-cid)*
decltype(it)::sizeBlock;
681 r =
grid.getChunk(p,exist);
682 offset_jump[1] = (r-cid)*
decltype(it)::sizeBlock;
685 r =
grid.getChunk(p,exist);
686 offset_jump[2] = (r-cid)*
decltype(it)::sizeBlock;
689 r =
grid.getChunk(p,exist);
690 offset_jump[3] = (r-cid)*
decltype(it)::sizeBlock;
693 r =
grid.getChunk(p,exist);
694 offset_jump[4] = (r-cid)*
decltype(it)::sizeBlock;
697 r =
grid.getChunk(p,exist);
698 offset_jump[5] = (r-cid)*
decltype(it)::sizeBlock;
706 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type sz;
707 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type sy;
708 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type sx;
711 bool mask_row[sx::value];
713 for (
int k = 0 ; k < sx::value ; k++)
715 mask_row[k] = (k >= it.start(0) && k < it.stop(0))?
true:
false;
718 for (
int v = it.start(2) ; v < it.stop(2) ; v++)
720 for (
int j = it.start(1) ; j < it.stop(1) ; j++)
723 for (
int k = 0 ; k < sx::value ; k += Vc::Vector<prop_type>::Size)
726 if (*(
typename data_il<Vc::Vector<prop_type>::Size>::type *)&mask.mask[s2] == 0) {s2 += Vc::Vector<prop_type>::Size;
continue;}
738 Vc::Vector<prop_type> cmd1(&chunk.template get<prop_src1>()[s2]);
739 Vc::Vector<prop_type> cmd2(&chunk.template get<prop_src2>()[s2]);
742 long int sumxm = s2-1;
743 sumxm += (k==0)?offset_jump[0] + sx::value:0;
746 long int sumxp = s2+Vc::Vector<prop_type>::Size;
747 sumxp += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[1] - sx::value:0;
749 long int sumym = (j == 0)?offset_jump[2] + (sy::value-1)*sx::value:-sx::value;
751 long int sumyp = (j == sy::value-1)?offset_jump[3] - (sy::value - 1)*sx::value:sx::value;
753 long int sumzm = (v == 0)?offset_jump[4] + (sz::value-1)*sx::value*sy::value:-sx::value*sy::value;
755 long int sumzp = (v == sz::value-1)?offset_jump[5] - (sz::value - 1)*sx::value*sy::value:sx::value*sy::value;
758 if (Vc::Vector<prop_type>::Size == 1 || Vc::Vector<prop_type>::Size == 2 || Vc::Vector<prop_type>::Size == 4 || Vc::Vector<prop_type>::Size == 8)
776 cs1.xm = cs1.xm.shifted(-1);
777 cs1.xm[0] = chunk.template get<prop_src1>()[sumxm];
780 cs2.xm = cs2.xm.shifted(-1);
781 cs2.xm[0] = chunk.template get<prop_src2>()[sumxm];
784 cs1.xp = cs1.xp.shifted(1);
785 cs1.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src1>()[sumxp];
788 cs2.xp = cs2.xp.shifted(1);
789 cs2.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src2>()[sumxp];
793 cs1.ym.load(&chunk.template get<prop_src1>()[sumym],Vc::Aligned);
794 cs1.yp.load(&chunk.template get<prop_src1>()[sumyp],Vc::Aligned);
795 cs1.zm.load(&chunk.template get<prop_src1>()[sumzm],Vc::Aligned);
796 cs1.zp.load(&chunk.template get<prop_src1>()[sumzp],Vc::Aligned);
798 cs2.ym.load(&chunk.template get<prop_src2>()[sumym],Vc::Aligned);
799 cs2.yp.load(&chunk.template get<prop_src2>()[sumyp],Vc::Aligned);
800 cs2.zm.load(&chunk.template get<prop_src2>()[sumzm],Vc::Aligned);
801 cs2.zp.load(&chunk.template get<prop_src2>()[sumzp],Vc::Aligned);
806 tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i;
808 Vc::Vector<prop_type> res1;
809 Vc::Vector<prop_type> res2;
811 func(res1,res2,cmd1,cmd2,cs1,cs2,tot_m.uc,args ... );
813 Vc::Mask<prop_type> m(&mask_row[k]);
815 res1.store(&chunk.template get<prop_dst1>()[s2],m,Vc::Aligned);
816 res2.store(&chunk.template get<prop_dst2>()[s2],m,Vc::Aligned);
818 s2 += Vc::Vector<prop_type>::Size;
827 template<
bool findNN,
unsigned int stencil_size,
typename prop_type,
typename SparseGridType,
typename lambda_f,
typename ... ArgsT >
830 auto it =
grid.template getBlockIterator<stencil_size>(start,stop);
832 auto & datas =
grid.private_get_data();
833 auto & headers =
grid.private_get_header_mask();
835 typedef typename boost::mpl::at<
typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0;
836 typedef typename boost::mpl::at<
typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1;
837 typedef typename boost::mpl::at<
typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2;
839 typedef typename SparseGridType::chunking_type chunking;
844 long int offset_jump[6];
846 size_t cid = it.getChunkId();
848 auto chunk = datas.get(cid);
849 auto & mask = headers.get(cid);
853 long int r =
grid.getChunk(p,exist);
854 offset_jump[0] = (r-cid)*
decltype(it)::sizeBlock;
857 r =
grid.getChunk(p,exist);
858 offset_jump[1] = (r-cid)*
decltype(it)::sizeBlock;
861 r =
grid.getChunk(p,exist);
862 offset_jump[2] = (r-cid)*
decltype(it)::sizeBlock;
865 r =
grid.getChunk(p,exist);
866 offset_jump[3] = (r-cid)*
decltype(it)::sizeBlock;
869 r =
grid.getChunk(p,exist);
870 offset_jump[4] = (r-cid)*
decltype(it)::sizeBlock;
873 r =
grid.getChunk(p,exist);
874 offset_jump[5] = (r-cid)*
decltype(it)::sizeBlock;
882 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type sz;
883 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type sy;
884 typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type sx;
888 for (
int k = 0 ; k < sx::value ; k++)
890 ids.mask_row[k] = (k >= it.start(0) && k < it.stop(0))?
true:
false;
893 for (
int v = it.start(2) ; v < it.stop(2) ; v++)
895 for (
int j = it.start(1) ; j < it.stop(1) ; j++)
898 for (
int k = 0 ; k < sx::value ; k += Vc::Vector<prop_type>::Size)
901 if (*(
int *)&mask.mask[s2] == 0) {s2 += Vc::Vector<prop_type>::Size;
continue;}
914 ids.sumdm[0] += (k==0)?offset_jump[0] + sx::value:0;
917 ids.sumdp[0] = s2+Vc::Vector<prop_type>::Size;
918 ids.sumdp[0] += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[1] - sx::value:0;
920 ids.sumdm[1] = (j == 0)?offset_jump[2] + (sy::value-1)*sx::value:-sx::value;
922 ids.sumdp[1] = (j == sy::value-1)?offset_jump[3] - (sy::value - 1)*sx::value:sx::value;
924 ids.sumdm[2] = (v == 0)?offset_jump[4] + (sz::value-1)*sx::value*sy::value:-sx::value*sy::value;
926 ids.sumdp[2] = (v == sz::value-1)?offset_jump[5] - (sz::value - 1)*sx::value*sy::value:sx::value*sy::value;
931 if (Vc::Vector<prop_type>::Size == 2)
933 mxm.i = *(
short int *)&mask.mask[s2];
935 mxm.i |= (
short int)mask.mask[ids.sumdm[0]];
937 mxp.i = *(
short int *)&mask.mask[s2];
939 mxp.i |= ((
short int)mask.mask[ids.sumdp[0]]) << (Vc::Vector<prop_type>::Size - 1)*8;
941 mym.i = *(
short int *)&mask.mask[ids.sumdm[1]];
942 myp.i = *(
short int *)&mask.mask[ids.sumdp[1]];
944 mzm.i = *(
short int *)&mask.mask[ids.sumdm[2]];
945 mzp.i = *(
short int *)&mask.mask[ids.sumdp[2]];
947 else if (Vc::Vector<prop_type>::Size == 4)
949 mxm.i = *(
int *)&mask.mask[s2];
951 mxm.i |= (int)mask.mask[ids.sumdm[0]];
953 mxp.i = *(
int *)&mask.mask[s2];
955 mxp.i |= ((int)mask.mask[ids.sumdp[0]]) << (Vc::Vector<prop_type>::Size - 1)*8;
957 mym.i = *(
int *)&mask.mask[ids.sumdm[1]];
958 myp.i = *(
int *)&mask.mask[ids.sumdp[1]];
960 mzm.i = *(
int *)&mask.mask[ids.sumdm[2]];
961 mzp.i = *(
int *)&mask.mask[ids.sumdp[2]];
965 std::cout << __FILE__ <<
":" << __LINE__ <<
" UNSUPPORTED" << std::endl;
971 tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i;
973 func(chunk,ids,tot_m.uc,args ... );
975 s2 += Vc::Vector<prop_type>::Size;
grid_key_dx is the key to access any element in the grid
Get the neighborhood iterator based on type.
It model an expression expr1 + ... exprn.