8#ifndef MAP_VECTOR_CUDA_HPP_
9#define MAP_VECTOR_CUDA_HPP_
13template<
typename vector_src_type,
typename vector_dst_type,
unsigned int ... args>
14__global__
void merge_add_prp_device_impl(vector_src_type v_src, vector_dst_type v_dst,
unsigned int old_sz)
16 int i = threadIdx.x + blockIdx.x * blockDim.x;
18 if (i >= v_src.size())
22 object_s_di<
decltype(v_src.get(i)),
decltype(v_dst.get(old_sz+i)),OBJ_ENCAP,args...>(v_src.get(i),v_dst.get(old_sz+i));
25template<
typename vector_src_type,
typename vector_dst_type>
26__global__
void copy_two_vectors(vector_src_type v_dst, vector_dst_type v_src)
28 int i = threadIdx.x + blockIdx.x * blockDim.x;
30 if (i >= v_src.size())
33 v_dst.get(i) = v_src.get(i);
37template<
template<
typename,
typename>
class op,
38 typename vector_src_type,
39 typename vector_dst_type,
40 typename vector_opart_type,
41 unsigned int ... args>
42__global__
void merge_add_prp_device_impl_src_dst_opar_offset(vector_src_type v_src, vector_dst_type v_dst, vector_opart_type opart,
unsigned int start)
44 int i = threadIdx.x + blockIdx.x * blockDim.x;
46 if (i >= v_src.size())
50 object_s_di_op<op,
decltype(v_src.get(0)),
decltype(v_dst.get(0)),OBJ_ENCAP,args...>(v_src.get(i),v_dst.get(opart.template get<1>(start + i)));
53template<
template<
typename,
typename>
class op,
54 typename vector_src_type,
55 typename vector_dst_type,
56 typename vector_opart_type,
57 unsigned int ... args>
58__global__
void merge_add_prp_device_impl_src_offset_dst_opar(vector_src_type v_src, vector_dst_type v_dst, vector_opart_type opart,
unsigned int start)
60 int i = threadIdx.x + blockIdx.x * blockDim.x;
62 if (i >= opart.size())
66 object_si_di_op<op,
decltype(v_src.get(0)),
decltype(v_dst.get(0)),OBJ_ENCAP,args...>(v_src.get(start + i),v_dst.get(opart.template get<0>(i)));
73__device__
void fill_vector_error_array_overflow(
const void * sptr,
int key)
77 int * ptr = (
int *)&global_cuda_error_array[0];
80 ptr[1] = ((size_t)sptr) & 0xFFFFFFFF;
81 ptr[2] = (((size_t)sptr) & 0xFFFFFFFF00000000) >> 32;
85 for (
int i = 0 ; i < 1 ; i++)
90 ptr[5+1] = blockIdx.x;
91 ptr[6+1] = blockIdx.y;
92 ptr[7+1] = blockIdx.z;
94 ptr[8+1] = blockDim.x;
95 ptr[9+1] = blockDim.y;
96 ptr[10+1] = blockDim.z;
98 ptr[11+1] = threadIdx.x;
99 ptr[12+1] = threadIdx.y;
100 ptr[13+1] = threadIdx.z;
111 template<
typename T,
template <
typename>
class layout_base>
112 struct vector_gpu_ker_ref;
120 template<
typename T,
template <
typename>
class layout_base>
125 typedef typename apply_transform<layout_base,T>::type T_;
170 __device__ __host__
unsigned int size()
const
175 __host__ __device__
size_t size_local()
const
201 template <
unsigned int p>
206 {fill_vector_error_array_overflow<p>(this->getPointer<p>(),
id);}
210 return base.template get<p>(key);
222 template <
unsigned int p>
225 return this->get<p>(
id);
239 template <
unsigned int p,
typename key_type>
242 return this->get<p>(
id.getKey());
258 {fill_vector_error_array_overflow<-1>(this->
template getPointer<0>(),id);}
279 {fill_vector_error_array_overflow<-1>(this->getPointer<0>(),id);}
303 {fill_vector_error_array_overflow<-1>(this->
template getPointer<0>(),id);}
327 {fill_vector_error_array_overflow<-1>(this->
template getPointer<0>(),id);}
357 template <
unsigned int p>
362 {fill_vector_error_array_overflow<p>(this->
template getPointer<p>(),
id);}
367 return base.template get<p>(key);
390 vector_gpu_ker(
const vector_gpu_ker_ref<T,layout_base> & vref)
403 base.constructor_impl(cpy);
414 base.constructor_impl(cpy);
427 {fill_vector_error_array_overflow<-1>(this->
template getPointer<0>(),id);}
439 template<
unsigned int p> __device__ __host__
void *
getPointer()
442 return base.template getPointer<p>();
450 template<
unsigned int p> __device__ __host__
const void *
getPointer()
const
453 return base.template getPointer<p>();
471 template <
typename encap_S,
unsigned int ...args>
void set_o(
unsigned int i,
const encap_S & obj)
475 {fill_vector_error_array_overflow<-1>(this->
template getPointer<0>(),i);}
493 {fill_vector_error_array_overflow<-1>(this->
template getPointer<0>(),id);}
496 base.set(
id,v.base,src);
506 template<
unsigned int ... prp>
511 {fill_vector_error_array_overflow<-1>(this->
template getPointer<0>(),id);}
514 base.template
set<prp...>(id,v.base,src);
540 __host__ __device__
auto value(
unsigned int p) ->
decltype(
base.template get<0>(
grid_key_dx<1>(0)))
575 __device__ __host__
const vector_gpu_ker<T,layout_base> & getVector()
const
590 void * internal_get_size_pointer() {
return &
v_size;}
594#ifndef DISABLE_ALL_RTTI
595 std::cout <<
"the size of: " << demangle(
typeid(self_type).name()) <<
" is " <<
sizeof(self_type) << std::endl;
596 std::cout <<
" " << demangle(
typeid(
decltype(
v_size)).name()) <<
":" <<
sizeof(
decltype(
v_size)) << std::endl;
597 std::cout <<
" " << demangle(
typeid(
decltype(
base)).name()) <<
":" <<
sizeof(
decltype(
base)) << std::endl;
615 boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::max_prop>>(ptr_chk);
617 if (ptr_chk.result ==
true)
620 pc.
match_str += std::string(
"Property: ") + std::to_string(ptr_chk.prp) +
"\n";
635 template<
typename T,
template <
typename>
class layout_base>
640 typedef typename apply_transform<layout_base,T>::type T_;
647 typedef int yes_i_am_vector;
649 typedef typename layout_base<T_>::type layout_type;
653 typedef T_ value_type;
655 typedef int yes_has_check_device_pointer;
657 __device__ __host__
unsigned int size()
const
662 __host__ __device__
size_t size_local()
const
667 __device__ __host__
unsigned int capacity()
const
669 return vref.capacity;
672 template <
unsigned int p>
673 __device__ __host__
inline auto get(
unsigned int id)
const ->
decltype(
vref.template get<p>(
id))
675 return vref.template get<p>(
id);
678 __device__ __host__
inline auto get(
unsigned int id) ->
decltype(
vref.get(
id))
683 inline __device__ __host__
auto get(
unsigned int id)
const ->
decltype(
vref.get(
id))
688 inline __device__ __host__
auto get_o(
unsigned int id)
const ->
decltype(
vref.get_o(
id))
690 return vref.get_o(
id);
693 inline __device__ __host__
auto get_o(
unsigned int id) ->
decltype(
vref.get_o(
id))
695 return vref.get_o(
id);
698 inline auto last() const -> decltype(
vref.last())
703 template <
unsigned int p>
704 __device__ __host__
inline auto get(
unsigned int id) ->
decltype(
vref.template get<p>(
id))
706 return vref.template get<p>(
id);
709 inline auto last() ->
decltype(
vref.last())
714 vector_gpu_ker_ref(vector_gpu_ker<T,layout_base> &
vref)
718 __device__
void set(
int id,
const container & obj)
723 template<
unsigned int p> __device__ __host__
void * getPointer()
725 return vref.template getPointer<p>();
728 template<
unsigned int p> __device__ __host__
const void * getPointer()
const
730 return vref.template getPointer<p>();
733 template <
typename encap_S,
unsigned int ...args>
void set_o(
unsigned int i,
const encap_S & obj)
738 __device__
void set(
unsigned int id,
const vector_gpu_ker<T_,layout_base> & v,
unsigned int src)
743 template<
unsigned int ... prp>
744 __device__
void set(
unsigned int id,
const vector_gpu_ker<T_,layout_base> & v,
unsigned int src)
746 vref.template set<prp ...>(id,v,src);
749 __host__
ite_gpu<1> getGPUIterator(
size_t n_thr = default_kernel_wg_threads_)
const
751 return vref.getGPUIterator(n_thr);
760 return vref.getGPUItertatorTo(stop,n_thr);
768 const vector_gpu_ker<T,layout_base> & getVector()
const
773 __host__ vector_gpu_ker_ref<T,layout_base> & operator=(
const vector_gpu_ker<T,layout_base> & v)
781 return vref.getBase();
786 return vref.check_device_pointer(ptr);
789 void * internal_get_size_pointer() {
return &
vref.internal_get_size_pointer();}
793 return vref.print_size();
grid interface available when on gpu
__device__ encapc< dim, T_, layout > get_o(const grid_key_dx< dim, Tk > &v1)
Get the of the selected element as a boost::fusion::vector.
struct ite_gpu< dim > getGPUIterator(grid_key_dx< dim > &key1, grid_key_dx< dim > &key2, size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
grid_key_dx is the key to access any element in the grid
convert a type into constant type
this class is a functor for "for_each" algorithm
It copy the properties from one object to another applying an operation.
It copy the properties from one object to another.
It copy the properties from one object to another applying an operation.
grid interface available when on gpu
ite_gpu< 1 > getGPUIteratorTo(size_t stop, size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
vector_gpu_ker< T, layout_base > & vref
vector reference
grid interface available when on gpu
grid_base< 1, T_, CudaMemory, typenamelayout_base< T_ >::type >::container container
Object container for T, it is the return type of get_o it return a object type trough.
ite_gpu< 1 > getGPUIteratorTo(size_t stop, size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
auto last() -> decltype(base.get_o(grid_key_dx< 1 >(0)))
Get the last element of the vector.
__device__ void set(unsigned int id, const vector_gpu_ker< T_, layout_base > &v, unsigned int src)
Set the element of the vector v from another element of another vector.
__device__ __host__ auto get(unsigned int id) const -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
void constructor_impl(int v_size, const grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void > > &cpy)
implementation of the constructor
void set_o(unsigned int i, const encap_S &obj)
It set an element of the vector from a object that is a subset of the vector properties.
__device__ __host__ auto get(unsigned int id) const -> const decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void > > base
1-D static grid
__device__ __host__ unsigned int capacity() const
return the maximum capacity of the vector before reallocation
__device__ __host__ auto get(unsigned int id) -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
__device__ __host__ bool check_bound(size_t v1) const
Check that the key is inside the grid.
ite_gpu< 1 > getDomainIteratorGPU(size_t n_thr=default_kernel_wg_threads_) const
Get a domain iterator for the GPU.
__device__ __host__ auto get_o(unsigned int id) const -> decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
__device__ __host__ unsigned int size() const
Return the size of the vector.
__device__ __host__ auto get(unsigned int id) -> decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
auto last() const -> decltype(base.get_o(grid_key_dx< 1 >(0)))
Get the last element of the vector.
__device__ grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void > > & getBase()
Return the base.
__host__ ite_gpu< 1 > getGPUIterator(size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
__host__ vector_gpu_ker< T, layout_base > & operator=(const vector_gpu_ker< T, layout_base > &v)
operator= this operator absorb the pointers, consider that this object wrap device pointers
__device__ __host__ auto getProp(unsigned int id) const -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
__device__ __host__ void * getPointer()
Get the pointer for the property p.
T_ value_type
Type of the value the vector is storing.
int yes_has_check_device_pointer
Indicate this structure has a function to check the device pointer.
void constructor_impl(int v_size, const grid_gpu_ker_ref< 1, T_, layout_base, grid_sm< 1, void > > &cpy)
implementation of the constructor
layout_base< T_ >::type layout_type
Type of the encapsulation memory parameter.
__device__ __host__ auto getProp(key_type id) const -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
__device__ __host__ const void * getPointer() const
Get the pointer for the property p.
__device__ void set(unsigned int id, const vector_gpu_ker< T_, layout_base > &v, unsigned int src)
Set the element of the vector v from another element of another vector.
__device__ __host__ auto get_o(unsigned int id) -> decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
int yes_i_am_vector
it define that it is a vector
__device__ void set(int id, const container &obj)
Set the object id to obj.
std::string match_str
match string
bool match
Indicate if the pointer match.