8 #ifndef MAP_VECTOR_CUDA_HPP_ 9 #define MAP_VECTOR_CUDA_HPP_ 13 template<
typename vector_src_type,
typename vector_dst_type,
unsigned int ... args>
14 __global__
void merge_add_prp_device_impl(vector_src_type v_src, vector_dst_type v_dst,
unsigned int old_sz)
16 int i = threadIdx.x + blockIdx.x * blockDim.x;
18 if (i >= v_src.size())
22 object_s_di<decltype(v_src.get(i)),decltype(v_dst.get(old_sz+i)),OBJ_ENCAP,args...>(v_src.get(i),v_dst.get(old_sz+i));
25 template<
typename vector_src_type,
typename vector_dst_type>
26 __global__
void copy_two_vectors(vector_src_type v_dst, vector_dst_type v_src)
28 int i = threadIdx.x + blockIdx.x * blockDim.x;
30 if (i >= v_src.size())
33 v_dst.get(i) = v_src.get(i);
37 template<
template<
typename,
typename>
class op,
38 typename vector_src_type,
39 typename vector_dst_type,
40 typename vector_opart_type,
41 unsigned int ... args>
42 __global__
void merge_add_prp_device_impl_src_dst_opar_offset(vector_src_type v_src, vector_dst_type v_dst, vector_opart_type opart,
unsigned int start)
44 int i = threadIdx.x + blockIdx.x * blockDim.x;
46 if (i >= v_src.size())
50 object_s_di_op<op,decltype(v_src.get(0)),decltype(v_dst.get(0)),OBJ_ENCAP,args...>(v_src.get(i),v_dst.get(opart.template get<1>(start + i)));
53 template<
template<
typename,
typename>
class op,
54 typename vector_src_type,
55 typename vector_dst_type,
56 typename vector_opart_type,
57 unsigned int ... args>
58 __global__
void merge_add_prp_device_impl_src_offset_dst_opar(vector_src_type v_src, vector_dst_type v_dst, vector_opart_type opart,
unsigned int start)
60 int i = threadIdx.x + blockIdx.x * blockDim.x;
62 if (i >= opart.size())
66 object_si_di_op<op,decltype(v_src.get(0)),decltype(v_dst.get(0)),OBJ_ENCAP,args...>(v_src.get(start + i),v_dst.get(opart.template get<0>(i)));
73 __device__
void fill_vector_error_array_overflow(
const void * sptr,
int key)
77 int * ptr = (
int *)&global_cuda_error_array[0];
80 ptr[1] = ((size_t)sptr) & 0xFFFFFFFF;
81 ptr[2] = (((size_t)sptr) & 0xFFFFFFFF00000000) >> 32;
85 for (
int i = 0 ; i < 1 ; i++)
90 ptr[5+1] = blockIdx.x;
91 ptr[6+1] = blockIdx.y;
92 ptr[7+1] = blockIdx.z;
94 ptr[8+1] = blockDim.x;
95 ptr[9+1] = blockDim.y;
96 ptr[10+1] = blockDim.z;
98 ptr[11+1] = threadIdx.x;
99 ptr[12+1] = threadIdx.y;
100 ptr[13+1] = threadIdx.z;
111 template<
typename T,
template <
typename>
class layout_base>
120 template<
typename T,
template <
typename>
class layout_base>
125 typedef typename apply_transform<layout_base,T>::type T_;
170 __device__ __host__
unsigned int size()
const 196 template <
unsigned int p>
201 {fill_vector_error_array_overflow<p>(this->getPointer<p>(),
id);}
205 return base.template get<p>(key);
221 {fill_vector_error_array_overflow<-1>(this->
template getPointer<0>(),id);}
242 {fill_vector_error_array_overflow<-1>(this->getPointer<0>(),id);}
266 {fill_vector_error_array_overflow<-1>(this->
template getPointer<0>(),id);}
290 {fill_vector_error_array_overflow<-1>(this->
template getPointer<0>(),id);}
320 template <
unsigned int p>
325 {fill_vector_error_array_overflow<p>(this->
template getPointer<p>(),
id);}
330 return base.template get<p>(key);
353 vector_gpu_ker(
const vector_gpu_ker_ref<T,layout_base> & vref)
366 base.constructor_impl(cpy);
377 base.constructor_impl(cpy);
390 {fill_vector_error_array_overflow<-1>(this->
template getPointer<0>(),id);}
402 template<
unsigned int p> __device__ __host__
void *
getPointer()
405 return base.template getPointer<p>();
413 template<
unsigned int p> __device__ __host__
const void *
getPointer()
const 416 return base.template getPointer<p>();
434 template <
typename encap_S,
unsigned int ...args>
void set_o(
unsigned int i,
const encap_S & obj)
438 {fill_vector_error_array_overflow<-1>(this->
template getPointer<0>(),i);}
456 {fill_vector_error_array_overflow<-1>(this->
template getPointer<0>(),id);}
459 base.set(
id,v.base,src);
469 template<
unsigned int ... prp>
474 {fill_vector_error_array_overflow<-1>(this->
template getPointer<0>(),id);}
477 base.template
set<prp...>(id,v.base,src);
527 void * internal_get_size_pointer() {
return &
v_size;}
531 #ifndef DISABLE_ALL_RTTI 532 std::cout <<
"the size of: " << demangle(
typeid(self_type).name()) <<
" is " <<
sizeof(self_type) << std::endl;
533 std::cout <<
" " << demangle(
typeid(decltype(
v_size)).name()) <<
":" <<
sizeof(decltype(
v_size)) << std::endl;
534 std::cout <<
" " << demangle(
typeid(decltype(
base)).name()) <<
":" <<
sizeof(decltype(
base)) << std::endl;
552 boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::max_prop>>(ptr_chk);
554 if (ptr_chk.result ==
true)
557 pc.
match_str += std::string(
"Property: ") + std::to_string(ptr_chk.prp) +
"\n";
572 template<
typename T,
template <
typename>
class layout_base>
573 struct vector_gpu_ker_ref
575 typedef vector_gpu_ker<T,layout_base> self_type;
577 typedef typename apply_transform<layout_base,T>::type T_;
584 typedef int yes_i_am_vector;
586 typedef typename layout_base<T_>::type layout_type;
590 typedef T_ value_type;
592 typedef int yes_has_check_device_pointer;
594 __device__ __host__
unsigned int size()
const 599 __device__ __host__
unsigned int capacity()
const 601 return vref.capacity;
604 template <
unsigned int p>
605 __device__ __host__
inline auto get(
unsigned int id)
const -> decltype(
vref.template get<p>(
id))
607 return vref.template get<p>(
id);
610 __device__ __host__
inline auto get(
unsigned int id) -> decltype(
vref.get(
id))
615 inline __device__ __host__
auto get(
unsigned int id)
const -> decltype(
vref.get(
id))
620 inline __device__ __host__
auto get_o(
unsigned int id)
const -> decltype(
vref.get_o(
id))
622 return vref.get_o(
id);
625 inline __device__ __host__
auto get_o(
unsigned int id) -> decltype(
vref.get_o(
id))
627 return vref.get_o(
id);
630 inline auto last() const -> decltype(
vref.last())
635 template <
unsigned int p>
636 __device__ __host__
inline auto get(
unsigned int id) -> decltype(
vref.template get<p>(
id))
638 return vref.template get<p>(
id);
641 inline auto last() -> decltype(
vref.last())
646 vector_gpu_ker_ref(vector_gpu_ker<T,layout_base> &
vref)
650 __device__
void set(
int id,
const container & obj)
655 template<
unsigned int p> __device__ __host__
void * getPointer()
657 return vref.template getPointer<p>();
660 template<
unsigned int p> __device__ __host__
const void * getPointer()
const 662 return vref.template getPointer<p>();
665 template <
typename encap_S,
unsigned int ...args>
void set_o(
unsigned int i,
const encap_S & obj)
670 __device__
void set(
unsigned int id,
const vector_gpu_ker<T_,layout_base> & v,
unsigned int src)
675 template<
unsigned int ... prp>
676 __device__
void set(
unsigned int id,
const vector_gpu_ker<T_,layout_base> & v,
unsigned int src)
678 vref.template set<prp ...>(id,v,src);
681 __host__
ite_gpu<1> getGPUIterator(
size_t n_thr = default_kernel_wg_threads_)
const 683 return vref.getGPUIterator(n_thr);
692 return vref.getGPUItertatorTo(stop,n_thr);
703 return vref.getBase();
708 return vref.check_device_pointer(ptr);
711 void * internal_get_size_pointer() {
return &
vref.internal_get_size_pointer();}
715 return vref.print_size();
__device__ __host__ const void * getPointer() const
Get the pointer for the property p.
convert a type into constant type
It copy the properties from one object to another.
bool match
Indicate if the pointer match.
__device__ __host__ auto get(unsigned int id) const -> const decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
grid_key_dx is the key to access any element in the grid
grid interface available when on gpu
__host__ vector_gpu_ker< T, layout_base > & operator=(const vector_gpu_ker< T, layout_base > &v)
operator= this operator absorb the pointers, consider that this object wrap device pointers
__device__ __host__ unsigned int capacity() const
return the maximum capacity of the vector before reallocation
grid_base< 1, T_, CudaMemory, typename memory_traits_inte< T_ >::type >::container container
Object container for T, it is the return type of get_o it return a object type trough.
ite_gpu< 1 > getGPUIteratorTo(size_t stop, size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
auto last() const -> decltype(base.get_o(grid_key_dx< 1 >(0)))
Get the last element of the vector.
void constructor_impl(int v_size, const grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void >> &cpy)
implementation of the constructor
__device__ __host__ auto get_o(unsigned int id) const -> decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
__host__ ite_gpu< 1 > getGPUIterator(size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
grid interface available when on gpu
vector_gpu_ker< T, layout_base > & vref
vector reference
void constructor_impl(int v_size, const grid_gpu_ker_ref< 1, T_, layout_base, grid_sm< 1, void >> &cpy)
implementation of the constructor
ite_gpu< 1 > getGPUIteratorTo(size_t stop, size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
auto last() -> decltype(base.get_o(grid_key_dx< 1 >(0)))
Get the last element of the vector.
__device__ encapc< dim, T_, layout > get_o(const grid_key_dx< dim, Tk > &v1)
Get the of the selected element as a boost::fusion::vector.
__device__ __host__ auto get(unsigned int id) -> decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
void set_o(unsigned int i, const encap_S &obj)
It set an element of the vector from a object that is a subset of the vector properties.
layout_base< T_ >::type layout_type
Type of the encapsulation memory parameter.
__device__ __host__ auto get_o(unsigned int id) -> decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
It copy the properties from one object to another applying an operation.
T_ value_type
Type of the value the vector is storing.
It copy the properties from one object to another applying an operation.
__device__ __host__ auto get(unsigned int id) const -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
grid interface available when on gpu
struct ite_gpu< dim > getGPUIterator(grid_key_dx< dim > &key1, grid_key_dx< dim > &key2, size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
__device__ __host__ auto get(unsigned int id) -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void > > base
1-D static grid
__device__ grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void > > & getBase()
Return the base.
__device__ void set(unsigned int id, const vector_gpu_ker< T_, layout_base > &v, unsigned int src)
Set the element of the vector v from another element of another vector.
__device__ __host__ void * getPointer()
Get the pointer for the property p.
int yes_has_check_device_pointer
Indicate this structure has a function to check the device pointer.
__device__ __host__ unsigned int size() const
Return the size of the vector.
std::string match_str
match string
__device__ void set(int id, const container &obj)
Set the object id to obj.
this class is a functor for "for_each" algorithm
__device__ __host__ bool check_bound(size_t v1) const
Check that the key is inside the grid.
int yes_i_am_vector
it define that it is a vector
__device__ void set(unsigned int id, const vector_gpu_ker< T_, layout_base > &v, unsigned int src)
Set the element of the vector v from another element of another vector.