8#ifndef SPARSEGRID_CHUNK_COPY_HPP_
9#define SPARSEGRID_CHUNK_COPY_HPP_
11#if !defined(__NVCC__) || defined(CUDA_ON_CPU) || defined(__HIP__)
16#include "util/mathutil.hpp"
27template<
typename headerType>
28inline bool exist_sub(headerType & h,
int sub_id)
30 return h.mask[sub_id];
33template<
unsigned int v>
36 typedef unsigned char type;
38 template<
typename headerType>
39 static inline void exist(headerType & h,
int sub_id,
unsigned char * pmask)
41 pmask[0] = h.mask[sub_id];
48 typedef unsigned short type;
50 template<
typename headerType>
51 static inline void exist(headerType & h,
int sub_id,
unsigned char * pmask)
53 pmask[0] = h.mask[sub_id];
54 pmask[1] = h.mask[sub_id+1];
61 typedef unsigned int type;
63 template<
typename headerType>
64 static inline void exist(headerType & h,
int sub_id,
unsigned char * pmask)
66 pmask[0] = h.mask[sub_id];
67 pmask[1] = h.mask[sub_id+1];
68 pmask[2] = h.mask[sub_id+2];
69 pmask[3] = h.mask[sub_id+3];
76 typedef unsigned long int type;
78 template<
typename headerType>
79 static inline void exist(headerType & h,
int sub_id,
unsigned char * pmask)
81 pmask[0] = h.mask[sub_id];
82 pmask[1] = h.mask[sub_id+1];
83 pmask[2] = h.mask[sub_id+2];
84 pmask[3] = h.mask[sub_id+3];
85 pmask[4] = h.mask[sub_id+4];
86 pmask[5] = h.mask[sub_id+5];
87 pmask[6] = h.mask[sub_id+6];
88 pmask[7] = h.mask[sub_id+7];
100template<
unsigned int v,
typename headerType>
101inline void exist_sub_v(headerType & h,
int sub_id,
unsigned char * pmask)
108template<
int layout_type,
int prop,
int stencil_size ,
typename chunking,
bool is_cross>
111 template<
unsigned int N1,
typename T,
typename headerType,
typename chunkType>
112 inline static void copy(T ptr[N1],
unsigned char mask[N1], headerType & h ,
const chunkType & chunk)
114 int s = stencil_size + stencil_size*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size) +
115 stencil_size*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size);
119 for (
int v = 0 ; v < boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type::value ; v++)
121 for (
int j = 0 ; j < boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value ; j++)
123 for (
int k = 0 ; k < boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value ; k++)
126 mask[s] = exist_sub(h,s2);
133 s+= 2*stencil_size*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size);
137 template<
unsigned int N1,
typename T,
typename chunkType>
138 inline static void store(T ptr[N1] , chunkType & chunk)
142 for (
int v = 0 ; v < boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type::value ; v++)
144 for (
int j = 0 ; j < boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value ; j++)
146 for (
int k = 0 ; k < boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value ; k++)
157template<
unsigned int i>
164 typedef unsigned char type;
170 typedef unsigned short int type;
176 typedef unsigned int type;
182 typedef unsigned long int type;
188 typedef unsigned long int type;
194template<
int prop,
int stencil_size ,
typename chunking,
bool is_cross>
197 template<
unsigned int N1,
typename T,
typename headerType,
typename chunkType>
198 inline static void copy(T ptr[N1],
unsigned char mask[N1], headerType & h ,
const chunkType & chunk)
200 int s = stencil_size + stencil_size*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size) +
201 stencil_size*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size);
203 typedef boost::mpl::int_<boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value *
sizeof(chunk[0]) /
204 Vc::float_v::Size /
sizeof(
float)> n_it_lead;
208 for (
int v = 0 ; v < boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type::value ; v++)
210 for (
int j = 0 ; j < boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value ; j++)
212 for (
int k = 0 ; k < n_it_lead::value ; k+=4)
214 exist_sub_v<Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))>(h,s2,&mask[s]);
215 exist_sub_v<Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))>(h,s2+Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(float)),&mask[s+Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))]);
216 exist_sub_v<Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))>(h,s2+2*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(float)),&mask[s+2*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))]);
217 exist_sub_v<Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))>(h,s2+3*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(float)),&mask[s+3*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))]);
219 Vc::float_v tmp = Vc::float_v((
float *)&chunk[s2],Vc::Aligned);
220 Vc::float_v tmp2 = Vc::float_v((
float *)&chunk[s2+Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))],Vc::Aligned);
221 Vc::float_v tmp3 = Vc::float_v((
float *)&chunk[s2+2*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))],Vc::Aligned);
222 Vc::float_v tmp4 = Vc::float_v((
float *)&chunk[s2+3*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))],Vc::Aligned);
223 tmp.store((
float *)&ptr[s],Vc::Unaligned);
224 tmp2.store((
float *)&ptr[s+Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))],Vc::Unaligned);
225 tmp3.store((
float *)&ptr[s+2*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))],Vc::Unaligned);
226 tmp4.store((
float *)&ptr[s+3*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))],Vc::Unaligned);
228 s += 4*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float));
229 s2 += 4*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float));
231 s += 2*stencil_size ;
233 s+= 2*stencil_size*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size);
237 template<
unsigned int N1,
typename T,
typename chunkType>
238 inline static void store(T ptr[N1] , chunkType & chunk)
240 typedef boost::mpl::int_<boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value *
sizeof(chunk[0]) /
241 Vc::float_v::Size /
sizeof(
float)> n_it_lead;
245 for (
int v = 0 ; v < boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type::value ; v++)
247 for (
int j = 0 ; j < boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value ; j++)
249 for (
int k = 0 ; k < n_it_lead::value ; k += 4)
251 Vc::float_v tmp = Vc::float_v((
float *)&ptr[s2],Vc::Aligned);
252 Vc::float_v tmp1 = Vc::float_v((
float *)&ptr[s2+1*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))],Vc::Aligned);
253 Vc::float_v tmp2 = Vc::float_v((
float *)&ptr[s2+2*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))],Vc::Aligned);
254 Vc::float_v tmp3 = Vc::float_v((
float *)&ptr[s2+3*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))],Vc::Aligned);
256 tmp.store((
float *)&chunk[s2],Vc::Aligned);
257 tmp1.store((
float *)&chunk[s2+1*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))],Vc::Aligned);
258 tmp2.store((
float *)&chunk[s2+2*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))],Vc::Aligned);
259 tmp3.store((
float *)&chunk[s2+3*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float))],Vc::Aligned);
261 s2 += 4*Vc::float_v::Size / (
sizeof(chunk[0])/
sizeof(
float));
271template<
int layout_type,
int prop,
int stencil_size,
typename chunking,
bool is_cross>
274 template<
unsigned int i_src,
unsigned int i_dest,
unsigned int N1,
typename T,
typename headerType,
typename chunkType>
275 inline static void copy(T ptr[N1],
unsigned char mask[N1], headerType & h ,
const chunkType & chunk)
277 int s = stencil_size + stencil_size*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size) +
278 i_dest*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size);
280 for (
int v = 0 ; v < stencil_size ; v++)
282 for (
int j = 0 ; j < boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value ; j++)
284 for (
int k = 0 ; k < boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value ; k++)
286 const int id = Lin_vmpl<typename chunking::type>(k,j,i_src+v);
288 ptr[s] = chunk.template get<prop>()[id];
289 mask[s] = exist_sub(h,
id);
296 s+= 2*stencil_size*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size);
300 template<
unsigned int i_dest,
unsigned int N1>
301 inline static void mask_null(
unsigned char mask[N1])
303 int s = stencil_size + stencil_size*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size) +
304 i_dest*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size);
306 for (
int v = 0 ; v < stencil_size ; v++)
308 for (
int j = 0 ; j < boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value ; j++)
310 for (
int k = 0 ; k < boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value ; k++)
319 s+= 2*stencil_size*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size);
327template<
int prop,
int stencil_size,
typename chunking,
bool is_cross>
330 template<
unsigned int i_src,
unsigned int i_dest,
unsigned int N1,
typename T,
typename headerType,
typename chunkType>
331 inline static void copy(T ptr[N1],
unsigned char mask[N1], headerType & h ,
const chunkType & chunk)
333 int s = stencil_size + stencil_size*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size) +
334 i_dest*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size);
336 int s2 = i_src*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value)*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<1>>::type::value);
338 typedef boost::mpl::int_<boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value *
sizeof(chunk.template get<prop>()[0]) /
339 Vc::float_v::Size /
sizeof(
float)> n_it_lead;
341 for (
int v = 0 ; v < stencil_size ; v++)
343 for (
int j = 0 ; j < boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value ; j++)
345 for (
int k = 0 ; k < n_it_lead::value ; k+=4)
347 exist_sub_v<Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(
float))>(h,s2,&mask[s]);
348 exist_sub_v<Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(
float))>(h,s2+1*Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(float)),&mask[s+1*Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(
float))]);
349 exist_sub_v<Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(
float))>(h,s2+2*Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(float)),&mask[s+2*Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(
float))]);
350 exist_sub_v<Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(
float))>(h,s2+3*Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(float)),&mask[s+3*Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(
float))]);
353 Vc::float_v tmp = Vc::float_v((
float *)&chunk.template get<prop>()[s2],Vc::Unaligned);
354 Vc::float_v tmp1 = Vc::float_v((
float *)&chunk.template get<prop>()[s2+1*Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(
float))],Vc::Unaligned);
355 Vc::float_v tmp2 = Vc::float_v((
float *)&chunk.template get<prop>()[s2+2*Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(
float))],Vc::Unaligned);
356 Vc::float_v tmp3 = Vc::float_v((
float *)&chunk.template get<prop>()[s2+3*Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(
float))],Vc::Unaligned);
357 tmp.store((
float *)&ptr[s],Vc::Unaligned);
358 tmp1.store((
float *)&ptr[s+1*Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(
float))],Vc::Unaligned);
359 tmp2.store((
float *)&ptr[s+2*Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(
float))],Vc::Unaligned);
360 tmp3.store((
float *)&ptr[s+3*Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(
float))],Vc::Unaligned);
362 s += 4*Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(
float));
363 s2 += 4*Vc::float_v::Size / (
sizeof(chunk.template get<prop>()[0])/
sizeof(
float));
369 s+= 2*stencil_size*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size);
373 template<
unsigned int i_dest,
unsigned int N1>
374 inline static void mask_null(
unsigned char mask[N1])
376 int s = stencil_size + stencil_size*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size) +
377 i_dest*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size);
379 for (
int v = 0 ; v < stencil_size ; v++)
381 for (
int j = 0 ; j < boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value ; j++)
383 for (
int k = 0 ; k < boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value ; k++)
392 s+= 2*stencil_size*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size);
400template<
int layout_type,
int prop,
int stencil_size,
typename chunking,
bool is_cross>
403 template<
unsigned int j_src,
unsigned int j_dest,
unsigned int N1,
typename T,
typename headerType ,
typename chunkType>
404 inline static void copy(T ptr[N1],
unsigned char mask[N1], headerType & h ,
const chunkType & chunk)
406 int s = stencil_size + j_dest*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size) +
407 stencil_size*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size);
409 for (
int v = 0 ; v < stencil_size ; v++)
411 for (
int i = 0 ; i < boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type::value ; i++)
413 for (
int k = 0 ; k < boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value ; k++)
415 const int id = Lin_vmpl<typename chunking::type>(k,j_src+v,i);
417 ptr[s] = chunk.template get<prop>()[id];
418 mask[s] = exist_sub(h,
id);
423 s += (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<1>>::type::value + 2*stencil_size) - boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value;
426 s += (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value + 2*stencil_size) - boost::mpl::at<
typename chunking::type,boost::mpl::int_<2>>::type::value*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size);
432template<
int layout_type,
int prop,
typename chunking,
bool is_cross>
435 template<
unsigned int j_src,
unsigned int j_dest,
unsigned int N1,
typename T,
typename headerType ,
typename chunkType>
436 inline static void copy(T ptr[N1],
unsigned char mask[N1], headerType & h ,
const chunkType & chunk)
438 int s = 1 + j_dest*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1) +
439 1*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*1)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*1);
441 for (
int i = 0 ; i < boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type::value ; i++)
443 for (
int k = 0 ; k < boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value ; k++)
445 const int id = Lin_vmpl<typename chunking::type>(k,j_src,i);
447 ptr[s] = chunk.template get<prop>()[id];
448 mask[s] = exist_sub(h,
id);
453 s += 1 * (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1)*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<1>>::type::value + 2*1) - boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value;
457 template<
unsigned int j_dest,
unsigned int N1>
458 inline static void mask_null(
unsigned char mask[N1])
460 int s = 1 + j_dest*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1) +
461 1*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*1)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*1);
463 for (
int i = 0 ; i < boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type::value ; i++)
465 for (
int k = 0 ; k < boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value ; k++)
472 s += 1 * (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1)*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<1>>::type::value + 2*1) - boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value;
478template<
int layout_type,
int prop,
int stencil_size,
typename chunking,
bool is_cross>
481 template<
unsigned int k_src,
unsigned int k_dest,
unsigned int N1,
typename T,
typename headerType ,
typename chunkType>
482 inline static void copy(T ptr[N1],
unsigned char mask[N1], headerType & h ,
const chunkType & chunk)
484 int s = k_dest + stencil_size*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size) +
485 stencil_size*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size);
487 for (
int v = 0 ; v < stencil_size ; v++)
489 for (
int i = 0 ; i < boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type::value ; i++)
491 for (
int j = 0 ; j < boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value ; j++)
493 const int id = Lin_vmpl<typename chunking::type>(k_src+v,j,i);
495 ptr[s] = chunk.template get<prop>()[id];
496 mask[s] = exist_sub(h,
id);
498 s += (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size);
501 s += (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<1>>::type::value + 2*stencil_size) - boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size);
504 s += 1 - boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type::value*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size);
511template<
int layout_type,
int prop,
typename chunking,
bool is_cross>
514 template<
unsigned int k_src,
unsigned int k_dest,
unsigned int N1,
typename T,
typename headerType,
typename chunkType>
515 inline static void copy(T ptr[N1],
unsigned char mask[N1], headerType & h ,
const chunkType & chunk)
517 int s = k_dest + 1*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1) +
518 1*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*1)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*1);
520 for (
int i = 0 ; i < boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type::value ; i++)
522 for (
int j = 0 ; j < boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value ; j++)
524 const int id = Lin_vmpl<typename chunking::type>(k_src,j,i);
526 ptr[s] = chunk.template get<prop>()[id];
527 mask[s] = exist_sub(h,
id);
529 s += (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1);
532 s += 1 * (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1)*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<1>>::type::value + 2*1) - boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1);
536 template<
unsigned int k_dest,
unsigned int N1>
537 inline static void mask_null(
unsigned char mask[N1])
539 int s = k_dest + 1*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1) +
540 1*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*1)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*1);
542 for (
int i = 0 ; i < boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type::value ; i++)
544 for (
int j = 0 ; j < boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value ; j++)
548 s += (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1);
551 s += 1 * (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1)*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<1>>::type::value + 2*1) - boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1);
557template<
int layout_type,
int prop,
int stencil_size,
typename chunking,
bool is_cross>
560 template<
unsigned int i_src,
unsigned int i_dest,
unsigned int j_src,
unsigned int j_dest ,
unsigned int N1,
typename T,
typename chunkType>
561 inline static void copy(T ptr[N1],
const chunkType & chunk)
563 int s = stencil_size + j_dest*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size) +
564 i_dest*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size);
566 for (
int v1 = 0 ; v1 < stencil_size ; v1++)
568 for (
int v2 = 0 ; v2 < stencil_size ; v2++)
570 for (
int k = 0 ; k < boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value ; k++)
572 ptr[s] = chunk.template get<prop>()[Lin_vmpl<typename chunking::type>(k,j_src+v2,i_src+v1)];
580 s+= (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size) - stencil_size*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size);
586template<
int layout_type,
int prop,
typename chunking,
bool is_cross>
589 template<
unsigned int i_src,
unsigned int i_dest,
unsigned int j_src,
unsigned int j_dest ,
unsigned int N1,
typename T,
typename chunkType>
590 inline static void copy(T ptr[N1],
const chunkType & chunk)
592 int s = 1 + j_dest*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1) +
593 i_dest*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*1)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*1);
596 for (
int k = 0 ; k < boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value ; k++)
598 ptr[s] = chunk.template get<prop>()[Lin_vmpl<typename chunking::type>(k,j_src,i_src)];
606template<
int layout_type,
int prop,
int stencil_size,
typename chunking>
609 template<
unsigned int i_src,
unsigned int i_dest,
unsigned int j_src,
unsigned int j_dest ,
unsigned int N1,
typename T,
typename chunkType>
610 inline static void copy(T ptr[N1],
const chunkType & chunk)
615template<
int layout_type,
int prop,
int stencil_size,
typename chunking,
bool is_cross>
618 template<
unsigned int i_src,
unsigned int i_dest,
unsigned int k_src,
unsigned int k_dest ,
unsigned int N1,
typename T,
typename chunkType>
619 inline static void copy(T ptr[N1],
const chunkType & chunk)
621 int s = k_dest + stencil_size*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size) +
622 i_dest*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size);
624 for (
int v1 = 0 ; v1 < stencil_size ; v1++)
626 for (
int j = 0 ; j < boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value ; j++)
628 for (
int v2 = 0 ; v2 < stencil_size ; v2++)
630 ptr[s] = chunk.template get<prop>()[Lin_vmpl<typename chunking::type>(k_src+v2,j,i_src+v1)];
634 s += (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size) - stencil_size;
637 s += (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size) - boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size);
643template<
int layout_type,
int prop,
typename chunking,
bool is_cross>
646 template<
unsigned int i_src,
unsigned int i_dest,
unsigned int k_src,
unsigned int k_dest ,
unsigned int N1,
typename T,
typename chunkType>
647 inline static void copy(T ptr[N1],
const chunkType & chunk)
649 int s = k_dest + 1*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1) +
650 i_dest*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*1)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*1);
653 for (
int j = 0 ; j < boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value ; j++)
655 ptr[s] = chunk.template get<prop>()[Lin_vmpl<typename chunking::type>(k_src,j,i_src)];
657 s += (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1);
663template<
int layout_type,
int prop,
int stencil_size,
typename chunking>
666 template<
unsigned int i_src,
unsigned int i_dest,
unsigned int k_src,
unsigned int k_dest ,
unsigned int N1,
typename T,
typename chunkType>
667 inline static void copy(T ptr[N1],
const chunkType & chunk)
673template<
int layout_type,
int prop,
int stencil_size,
typename chunking,
bool is_cross>
676 template<
unsigned int j_src,
unsigned int j_dest,
unsigned int k_src,
unsigned int k_dest ,
unsigned int N1,
typename T,
typename chunkType>
677 inline static void copy(T ptr[N1],
const chunkType & chunk)
679 int s = k_dest + j_dest*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size) +
680 stencil_size*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size);
683 for (
int i = 0 ; i < boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type::value ; i++)
685 for (
int v1 = 0 ; v1 < stencil_size ; v1++)
687 for (
int v2 = 0 ; v2 < stencil_size ; v2++)
689 ptr[s] = chunk.template get<prop>()[Lin_vmpl<typename chunking::type>(k_src+v2,j_src+v1,i)];
697 s += (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size);
703template<
int layout_type,
int prop,
typename chunking,
bool is_cross>
706 template<
unsigned int j_src,
unsigned int j_dest,
unsigned int k_src,
unsigned int k_dest ,
unsigned int N1,
typename T,
typename chunkType>
707 inline static void copy(T ptr[N1],
const chunkType & chunk)
709 int s = k_dest + j_dest*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1) +
710 1*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*1)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*1);
713 for (
int i = 0 ; i < boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value ; i++)
715 ptr[s] = chunk.template get<prop>()[Lin_vmpl<typename chunking::type>(k_src,j_src,i)];
717 s += (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1)*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<1>>::type::value+2*1);
723template<
int layout_type,
int prop,
int stencil_size,
typename chunking>
726 template<
unsigned int j_src,
unsigned int j_dest,
unsigned int k_src,
unsigned int k_dest ,
unsigned int N1,
typename T,
typename chunkType>
727 inline static void copy(T ptr[N1],
const chunkType & chunk)
732template<
int layout_type,
int prop,
int stencil_size,
typename chunking,
bool is_cross>
735 template<
unsigned int i_src,
unsigned int i_dest,
unsigned int j_src,
unsigned int j_dest,
unsigned int k_src,
unsigned int k_dest ,
unsigned int N1,
typename T,
typename chunkType>
736 inline static void copy(T ptr[N1],
const chunkType & chunk)
738 int s = k_dest + j_dest*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size) +
739 i_dest*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size);
742 for (
int i = 0 ; i < stencil_size ; i++)
744 for (
int j = 0 ; j < stencil_size ; j++)
746 for (
int k = 0 ; k < stencil_size ; k++)
748 ptr[s] = chunk.template get<prop>()[Lin_vmpl<typename chunking::type>(k_src+k,j_src+j,i_src+i)];
753 s += (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size) - stencil_size;
756 s += (boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size)*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<1>>::type::value+2*stencil_size) - stencil_size*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*stencil_size);
762template<
int layout_type,
int prop,
typename chunking,
bool is_cross>
765 template<
unsigned int i_src,
unsigned int i_dest,
unsigned int j_src,
unsigned int j_dest,
unsigned int k_src,
unsigned int k_dest ,
unsigned int N1,
typename T,
typename chunkType>
766 inline static void copy(T ptr[N1],
const chunkType & chunk)
768 int s = k_dest + j_dest*(boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type::value+2*1) +
769 i_dest*(boost::mpl::at<
typename chunking::type,boost::mpl::int_<0>>::type::value+2*1)*(boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type::value+2*1);
772 ptr[s] = chunk.template get<prop>()[Lin_vmpl<typename chunking::type>(k_src,j_src,i_src)];
777template<
int layout_type,
int prop,
int stencil_size,
typename chunking>
780 template<
unsigned int i_src,
unsigned int i_dest,
unsigned int j_src,
unsigned int j_dest,
unsigned int k_src,
unsigned int k_dest ,
unsigned int N1,
typename T,
typename chunkType>
781 inline static void copy(T ptr[N1],
const chunkType & chunk)
787template<
unsigned int dim>
819template<
unsigned int dim,
unsigned int N,
typename chunking>
820void construct_chunk_missalign_map(
unsigned char miss_al_map[N],
829 boost::mpl::for_each_ref< boost::mpl::range_c<int,0,dim> >(gbs);
845 for (
int i = 0 ; i < dim ; i++)
850 int c_id[openfpm::math::pow(2,dim)];
853 for (
int i = 0 ; i < dim ; i++)
866 for (
int i = 0 ; i < dim ; i++)
868 if ((
long int)k.get(i) + key_sub.
get(i) >= (
long int)gbs.
sz_block[i])
873 else if ((
long int)k.get(i) + key_sub.
get(i) < 0)
882 off += (openfpm::math::positive_modulo(key_sub.
get(i) + k.get(i),gbs.
sz_block[i]))*stride_off;
887 miss_al_map[gcnk.LinId(k)] = bid;
888 mp_off[gcnk.LinId(k)] = off;
899 for (
int i = 0 ; i < dim ; i++)
918 for (
int i = 0 ; i < dim ; i++)
920 if (key_sub.
get(i) >= 0)
924 if (slice.
getLow(i) < bord)
927 for (
int j = 0 ; j < vk.size() ; j++)
929 vk.get(j).k.set_d(i,0);
933 int n_dup = vk.size();
936 for (
int j = 0 ; j < n_dup ; j++)
950 for (
int j = 0 ; j < vk.size() ; j++)
952 vk.get(j).k.set_d(i,0);
958 size_t bord = -key_sub.
get(i);
960 if (slice.
getLow(i) < bord)
963 for (
int j = 0 ; j < vk.size() ; j++)
965 vk.get(j).k.set_d(i,-1);
969 int n_dup = vk.size();
972 for (
int j = 0 ; j < n_dup ; j++)
986 for (
int j = 0 ; j < vk.size() ; j++)
988 vk.get(j).k.set_d(i,0);
1030template<
typename SparseGr
idType>
1031void copy_remove_to_impl(
const SparseGridType & grid_src,
1032 SparseGridType & grid_dst,
1039 boost::mpl::for_each_ref< boost::mpl::range_c<int,0,SparseGridType::dims> >(gbs);
1041 typedef typename vmpl_reduce_prod<typename SparseGridType::chunking_type::type>::type sizeBlock;
1043 unsigned char miss_al_map[sizeBlock::value];
1044 short int mp_off[sizeBlock::value];
1048 construct_chunk_missalign_map<SparseGridType::dims,
1050 typename SparseGridType::chunking_type>
1051 (miss_al_map,mp_off,
1059 auto & data_src = grid_src.private_get_data();
1060 auto & header_src = grid_src.private_get_header();
1062 auto & data_dst = grid_src.private_get_data();
1063 auto & header_dst = grid_src.private_get_header();
1067 int chunk_pos[openfpm::math::pow(2,SparseGridType::dims)];
1069 for (
int i = 0 ; i < header_src.size() ; i++)
1071 for (
int j = 0 ; j < SparseGridType::dims ; j++)
1073 b_c.
setLow(j,header_src.get(i).pos.get(j));
1077 bool inte = b_src.
Intersect(b_c,b_inte);
1083 for (
int s = 0 ; s < vk.
size() ; s++)
1085 chunk_pos[vk.get(s).i] = grid_dst.getChunkCreate();
1090 auto & block_src = data_src.get(i);
1094 auto id = gb.
LinId(it.get());
1096 int dest_bid = chunk_pos[miss_al_map[id]];
1098 data_dst.get(dest_bid)[mp_off[id]] = block_src[i];
This class represent an N-dimensional box.
__device__ __host__ T getLow(int i) const
get the i-coordinate of the low bound interval of the box
__device__ __host__ bool Intersect(const Box< dim, T > &b, Box< dim, T > &b_out) const
Intersect.
__device__ __host__ T getHigh(int i) const
get the high interval of the box
__device__ __host__ void setHigh(int i, T val)
set the high interval of the box
__device__ __host__ void setLow(int i, T val)
set the low interval of the box
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
Declaration grid_key_dx_iterator_sub.
grid_key_dx is the key to access any element in the grid
void zero()
Set to zero the key.
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
__device__ __host__ index_type get(index_type i) const
Get the i index.
mem_id LinId(const grid_key_dx< N, ids_type > &gk, const signed char sum_id[N]) const
Linearization of the grid_key_dx with a specified shift.
Implementation of 1-D std::vector like structure.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
this class is a functor for "for_each" algorithm
size_t sz_block[dim]
sizes
to_variadic_const_impl< 1, N, M, exit_::value, M >::type type
generate the boost::fusion::vector apply H on each term