OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
map_vector_cuda_ker.cuh
1 /*
2  * map_vector_cuda.hpp
3  *
4  * Created on: Jun 28, 2018
5  * Author: i-bird
6  */
7 
8 #ifndef MAP_VECTOR_CUDA_HPP_
9 #define MAP_VECTOR_CUDA_HPP_
10 
11 #ifdef __NVCC__
12 
13 template<typename vector_src_type, typename vector_dst_type, unsigned int ... args>
14 __global__ void merge_add_prp_device_impl(vector_src_type v_src, vector_dst_type v_dst, unsigned int old_sz)
15 {
16  int i = threadIdx.x + blockIdx.x * blockDim.x;
17 
18  if (i >= v_src.size())
19  {return;}
20 
21  // write the object in the last element
22  object_s_di<decltype(v_src.get(i)),decltype(v_dst.get(old_sz+i)),OBJ_ENCAP,args...>(v_src.get(i),v_dst.get(old_sz+i));
23 }
24 
25 template<typename vector_src_type, typename vector_dst_type>
26 __global__ void copy_two_vectors(vector_src_type v_dst, vector_dst_type v_src)
27 {
28  int i = threadIdx.x + blockIdx.x * blockDim.x;
29 
30  if (i >= v_src.size())
31  {return;}
32 
33  v_dst.get(i) = v_src.get(i);
34 }
35 
36 
37 template<template<typename,typename> class op,
38  typename vector_src_type,
39  typename vector_dst_type,
40  typename vector_opart_type,
41  unsigned int ... args>
42 __global__ void merge_add_prp_device_impl_src_dst_opar_offset(vector_src_type v_src, vector_dst_type v_dst, vector_opart_type opart, unsigned int start)
43 {
44  int i = threadIdx.x + blockIdx.x * blockDim.x;
45 
46  if (i >= v_src.size())
47  {return;}
48 
49  // write the object in the last element
50  object_s_di_op<op,decltype(v_src.get(0)),decltype(v_dst.get(0)),OBJ_ENCAP,args...>(v_src.get(i),v_dst.get(opart.template get<1>(start + i)));
51 }
52 
53 template<template<typename,typename> class op,
54  typename vector_src_type,
55  typename vector_dst_type,
56  typename vector_opart_type,
57  unsigned int ... args>
58 __global__ void merge_add_prp_device_impl_src_offset_dst_opar(vector_src_type v_src, vector_dst_type v_dst, vector_opart_type opart, unsigned int start)
59 {
60  int i = threadIdx.x + blockIdx.x * blockDim.x;
61 
62  if (i >= opart.size())
63  {return;}
64 
65  // write the object in the last element
66  object_si_di_op<op,decltype(v_src.get(0)),decltype(v_dst.get(0)),OBJ_ENCAP,args...>(v_src.get(start + i),v_dst.get(opart.template get<0>(i)));
67 }
68 
69 #endif
70 
71 
72 template<int prp>
73 __device__ void fill_vector_error_array_overflow(const void * sptr,int key)
74 {
75 #ifdef CUDA_GPU
76 
77  int * ptr = (int *)&global_cuda_error_array[0];
78 
79  ptr[0] = 1;
80  ptr[1] = ((size_t)sptr) & 0xFFFFFFFF;
81  ptr[2] = (((size_t)sptr) & 0xFFFFFFFF00000000) >> 32;
82  ptr[3] = prp;
83  ptr[4] = 1;
84 
85  for (int i = 0 ; i < 1 ; i++)
86  {ptr[i+5] = key;}
87 
88 #ifdef __NVCC__
89 
90  ptr[5+1] = blockIdx.x;
91  ptr[6+1] = blockIdx.y;
92  ptr[7+1] = blockIdx.z;
93 
94  ptr[8+1] = blockDim.x;
95  ptr[9+1] = blockDim.y;
96  ptr[10+1] = blockDim.z;
97 
98  ptr[11+1] = threadIdx.x;
99  ptr[12+1] = threadIdx.y;
100  ptr[13+1] = threadIdx.z;
101 
102 #endif
103 
104 #endif
105 }
106 
107 
108 namespace openfpm
109 {
110 
111  template<typename T, template <typename> class layout_base>
113 
120  template<typename T, template <typename> class layout_base>
121  struct vector_gpu_ker
122  {
124 
125  typedef typename apply_transform<layout_base,T>::type T_;
126 
130  unsigned int v_size;
131 
134 
142  __device__ __host__ inline bool check_bound(size_t v1) const
143  {
144  return v1 < size();
145  }
146 
147  public:
148 
150  typedef int yes_i_am_vector;
151 
153  typedef typename layout_base<T_>::type layout_type;
154 
156  // you can access all the properties of T
158 
160  typedef T_ value_type;
161 
164 
170  __device__ __host__ unsigned int size() const
171  {
172  return v_size;
173  }
174 
181  __device__ __host__ unsigned int capacity() const
182  {
183  return base.size();
184  }
185 
196  template <unsigned int p>
197  __device__ __host__ inline auto get(unsigned int id) const -> decltype(base.template get<p>(grid_key_dx<1>(0)))
198  {
199 #ifdef SE_CLASS1
200  if (check_bound(id) == false)
201  {fill_vector_error_array_overflow<p>(this->getPointer<p>(),id);}
202 #endif
203  grid_key_dx<1> key(id);
204 
205  return base.template get<p>(key);
206  }
207 
217  __device__ __host__ inline auto get(unsigned int id) -> decltype(base.get_o(grid_key_dx<1>(id)))
218  {
219 #ifdef SE_CLASS1
220  if (check_bound(id) == false)
221  {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
222 #endif
223 
224  grid_key_dx<1> key(id);
225 
226  return base.get_o(key);
227  }
228 
238  inline __device__ __host__ auto get(unsigned int id) const -> const decltype(base.get_o(grid_key_dx<1>(id)))
239  {
240 #ifdef SE_CLASS1
241  if (check_bound(id) == false)
242  {fill_vector_error_array_overflow<-1>(this->getPointer<0>(),id);}
243 #endif
244 
245  grid_key_dx<1> key(id);
246 
247  return base.get_o(key);
248  }
249 
262  inline __device__ __host__ auto get_o(unsigned int id) const -> decltype(base.get_o(grid_key_dx<1>(id)))
263  {
264 #ifdef SE_CLASS1
265  if (check_bound(id) == false)
266  {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
267 #endif
268 
269  grid_key_dx<1> key(id);
270 
271  return base.get_o(key);
272  }
273 
286  inline __device__ __host__ auto get_o(unsigned int id) -> decltype(base.get_o(grid_key_dx<1>(id)))
287  {
288 #ifdef SE_CLASS1
289  if (check_bound(id) == false)
290  {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
291 #endif
292 
293  grid_key_dx<1> key(id);
294 
295  return base.get_o(key);
296  }
297 
303  inline auto last() const -> decltype(base.get_o(grid_key_dx<1>(0)))
304  {
305  grid_key_dx<1> key(size()-1);
306 
307  return base.get_o(key);
308  }
309 
320  template <unsigned int p>
321  __device__ __host__ inline auto get(unsigned int id) -> decltype(base.template get<p>(grid_key_dx<1>(0)))
322  {
323 #ifdef SE_CLASS1
324  if (check_bound(id) == false)
325  {fill_vector_error_array_overflow<p>(this->template getPointer<p>(),id);}
326 #endif
327 
328  grid_key_dx<1> key(id);
329 
330  return base.template get<p>(key);
331  }
332 
338  inline auto last() -> decltype(base.get_o(grid_key_dx<1>(0)))
339  {
340  grid_key_dx<1> key(size()-1);
341 
342  return base.get_o(key);
343  }
344 
346  :v_size(0)
347  {}
348 
349  vector_gpu_ker(int v_size, const grid_gpu_ker<1,T_,layout_base,grid_sm<1,void>> & cpy)
350  :v_size(v_size),base(cpy)
351  {}
352 
353  vector_gpu_ker(const vector_gpu_ker_ref<T,layout_base> & vref)
354  {
355  this->operator=(vref.vref);
356  }
357 
363  inline void constructor_impl(int v_size, const grid_gpu_ker<1,T_,layout_base,grid_sm<1,void>> & cpy)
364  {
365  this->v_size = v_size;
366  base.constructor_impl(cpy);
367  }
368 
374  inline void constructor_impl(int v_size, const grid_gpu_ker_ref<1,T_,layout_base,grid_sm<1,void>> & cpy)
375  {
376  this->v_size = v_size;
377  base.constructor_impl(cpy);
378  }
379 
386  __device__ void set(int id, const container & obj)
387  {
388 #ifdef SE_CLASS1
389  if (check_bound(id) == false)
390  {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
391 #endif
392 
394  base.set(id,obj);
395  }
396 
402  template<unsigned int p> __device__ __host__ void * getPointer()
403  {
405  return base.template getPointer<p>();
406  }
407 
413  template<unsigned int p> __device__ __host__ const void * getPointer() const
414  {
416  return base.template getPointer<p>();
417  }
418 
434  template <typename encap_S, unsigned int ...args> void set_o(unsigned int i, const encap_S & obj)
435  {
436 #ifdef SE_CLASS1
437  if (check_bound(i) == false)
438  {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),i);}
439 #endif
440 
441  // write the object in the last element
442  object_s_di<encap_S,decltype(get(i)),OBJ_ENCAP,args...>(obj,get(i));
443  }
444 
452  __device__ void set(unsigned int id, const vector_gpu_ker<T_,layout_base> & v, unsigned int src)
453  {
454 #ifdef SE_CLASS1
455  if (check_bound(id) == false)
456  {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
457 #endif
458 
459  base.set(id,v.base,src);
460  }
461 
469  template<unsigned int ... prp>
470  __device__ void set(unsigned int id, const vector_gpu_ker<T_,layout_base> & v, unsigned int src)
471  {
472 #ifdef SE_CLASS1
473  if (check_bound(id) == false)
474  {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
475 #endif
476 
477  base.template set<prp...>(id,v.base,src);
478  }
479 
484  __host__ ite_gpu<1> getGPUIterator(size_t n_thr = default_kernel_wg_threads_) const
485  {
486  grid_key_dx<1> start(0);
487  grid_key_dx<1> stop(size()-1);
488 
489  return base.getGPUIterator(start,stop,n_thr);
490  }
491 
496  ite_gpu<1> getGPUIteratorTo(size_t stop, size_t n_thr = default_kernel_wg_threads_) const
497  {
498  grid_key_dx<1> start(0);
499  grid_key_dx<1> stop_(stop);
500 
501  return base.getGPUIterator(start,stop_,n_thr);
502  }
503 
510  {
511  v_size = v.v_size;
512  base = v.base;
513 
514  return *this;
515  }
516 
523  {
524  return base;
525  }
526 
527  void * internal_get_size_pointer() {return &v_size;}
528 
529  void print_size()
530  {
531 #ifndef DISABLE_ALL_RTTI
532  std::cout << "the size of: " << demangle(typeid(self_type).name()) << " is " << sizeof(self_type) << std::endl;
533  std::cout << " " << demangle(typeid(decltype(v_size)).name()) << ":" << sizeof(decltype(v_size)) << std::endl;
534  std::cout << " " << demangle(typeid(decltype(base)).name()) << ":" << sizeof(decltype(base)) << std::endl;
535 #endif
536  }
537 
538 #ifdef SE_CLASS1
539 
545  pointer_check check_device_pointer(void * ptr)
546  {
547  pointer_check pc;
548  pc.match = false;
549 
550  check_device_ptr<self_type> ptr_chk(ptr,*this);
551 
552  boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::max_prop>>(ptr_chk);
553 
554  if (ptr_chk.result == true)
555  {
556  pc.match = true;
557  pc.match_str += std::string("Property: ") + std::to_string(ptr_chk.prp) + "\n";
558  }
559 
560  return pc;
561  }
562 
563 #endif
564  };
565 
572  template<typename T, template <typename> class layout_base>
573  struct vector_gpu_ker_ref
574  {
575  typedef vector_gpu_ker<T,layout_base> self_type;
576 
577  typedef typename apply_transform<layout_base,T>::type T_;
578 
581 
582  public:
583 
584  typedef int yes_i_am_vector;
585 
586  typedef typename layout_base<T_>::type layout_type;
587 
588  typedef typename grid_base<1,T_,CudaMemory,typename layout_base<T_>::type>::container container;
589 
590  typedef T_ value_type;
591 
592  typedef int yes_has_check_device_pointer;
593 
594  __device__ __host__ unsigned int size() const
595  {
596  return vref.size();
597  }
598 
599  __device__ __host__ unsigned int capacity() const
600  {
601  return vref.capacity;
602  }
603 
604  template <unsigned int p>
605  __device__ __host__ inline auto get(unsigned int id) const -> decltype(vref.template get<p>(id))
606  {
607  return vref.template get<p>(id);
608  }
609 
610  __device__ __host__ inline auto get(unsigned int id) -> decltype(vref.get(id))
611  {
612  return vref.get(id);
613  }
614 
615  inline __device__ __host__ auto get(unsigned int id) const -> decltype(vref.get(id))
616  {
617  return vref.get(id);
618  }
619 
620  inline __device__ __host__ auto get_o(unsigned int id) const -> decltype(vref.get_o(id))
621  {
622  return vref.get_o(id);
623  }
624 
625  inline __device__ __host__ auto get_o(unsigned int id) -> decltype(vref.get_o(id))
626  {
627  return vref.get_o(id);
628  }
629 
630  inline auto last() const -> decltype(vref.last())
631  {
632  return vref.last();
633  }
634 
635  template <unsigned int p>
636  __device__ __host__ inline auto get(unsigned int id) -> decltype(vref.template get<p>(id))
637  {
638  return vref.template get<p>(id);
639  }
640 
641  inline auto last() -> decltype(vref.last())
642  {
643  return vref.last();
644  }
645 
646  vector_gpu_ker_ref(vector_gpu_ker<T,layout_base> & vref)
647  :vref(vref)
648  {}
649 
650  __device__ void set(int id, const container & obj)
651  {
652  vref.set(id,obj);
653  }
654 
655  template<unsigned int p> __device__ __host__ void * getPointer()
656  {
657  return vref.template getPointer<p>();
658  }
659 
660  template<unsigned int p> __device__ __host__ const void * getPointer() const
661  {
662  return vref.template getPointer<p>();
663  }
664 
665  template <typename encap_S, unsigned int ...args> void set_o(unsigned int i, const encap_S & obj)
666  {
667  vref.set(i,obj);
668  }
669 
670  __device__ void set(unsigned int id, const vector_gpu_ker<T_,layout_base> & v, unsigned int src)
671  {
672  vref.set(id,v,src);
673  }
674 
675  template<unsigned int ... prp>
676  __device__ void set(unsigned int id, const vector_gpu_ker<T_,layout_base> & v, unsigned int src)
677  {
678  vref.template set<prp ...>(id,v,src);
679  }
680 
681  __host__ ite_gpu<1> getGPUIterator(size_t n_thr = default_kernel_wg_threads_) const
682  {
683  return vref.getGPUIterator(n_thr);
684  }
685 
690  ite_gpu<1> getGPUIteratorTo(size_t stop, size_t n_thr = default_kernel_wg_threads_) const
691  {
692  return vref.getGPUItertatorTo(stop,n_thr);
693  }
694 
696  {
697  vref.operator=(v);
698  return this;
699  }
700 
701  __device__ grid_gpu_ker<1,T_,layout_base, grid_sm<1,void>> & getBase()
702  {
703  return vref.getBase();
704  }
705 
706  pointer_check check_device_pointer(void * ptr)
707  {
708  return vref.check_device_pointer(ptr);
709  }
710 
711  void * internal_get_size_pointer() {return &vref.internal_get_size_pointer();}
712 
713  void print_size()
714  {
715  return vref.print_size();
716  }
717  };
718 
719 }
720 
721 #endif /* MAP_VECTOR_CUDA_HPP_ */
__device__ __host__ const void * getPointer() const
Get the pointer for the property p.
convert a type into constant type
Definition: aggregate.hpp:292
It copy the properties from one object to another.
bool match
Indicate if the pointer match.
__device__ __host__ auto get(unsigned int id) const -> const decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
grid_key_dx is the key to access any element in the grid
Definition: grid_key.hpp:18
grid interface available when on gpu
__host__ vector_gpu_ker< T, layout_base > & operator=(const vector_gpu_ker< T, layout_base > &v)
operator= this operator absorb the pointers, consider that this object wrap device pointers
__device__ __host__ unsigned int capacity() const
return the maximum capacity of the vector before reallocation
grid_base< 1, T_, CudaMemory, typename memory_traits_inte< T_ >::type >::container container
Object container for T, it is the return type of get_o it return a object type trough.
ite_gpu< 1 > getGPUIteratorTo(size_t stop, size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
auto last() const -> decltype(base.get_o(grid_key_dx< 1 >(0)))
Get the last element of the vector.
void constructor_impl(int v_size, const grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void >> &cpy)
implementation of the constructor
__device__ __host__ auto get_o(unsigned int id) const -> decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
__host__ ite_gpu< 1 > getGPUIterator(size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
grid interface available when on gpu
vector_gpu_ker< T, layout_base > & vref
vector reference
void constructor_impl(int v_size, const grid_gpu_ker_ref< 1, T_, layout_base, grid_sm< 1, void >> &cpy)
implementation of the constructor
ite_gpu< 1 > getGPUIteratorTo(size_t stop, size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
auto last() -> decltype(base.get_o(grid_key_dx< 1 >(0)))
Get the last element of the vector.
__device__ encapc< dim, T_, layout > get_o(const grid_key_dx< dim, Tk > &v1)
Get the of the selected element as a boost::fusion::vector.
__device__ __host__ auto get(unsigned int id) -> decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
void set_o(unsigned int i, const encap_S &obj)
It set an element of the vector from a object that is a subset of the vector properties.
layout_base< T_ >::type layout_type
Type of the encapsulation memory parameter.
__device__ __host__ auto get_o(unsigned int id) -> decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
It copy the properties from one object to another applying an operation.
T_ value_type
Type of the value the vector is storing.
It copy the properties from one object to another applying an operation.
__device__ __host__ auto get(unsigned int id) const -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
grid interface available when on gpu
struct ite_gpu< dim > getGPUIterator(grid_key_dx< dim > &key1, grid_key_dx< dim > &key2, size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
__device__ __host__ auto get(unsigned int id) -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void > > base
1-D static grid
__device__ grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void > > & getBase()
Return the base.
__device__ void set(unsigned int id, const vector_gpu_ker< T_, layout_base > &v, unsigned int src)
Set the element of the vector v from another element of another vector.
__device__ __host__ void * getPointer()
Get the pointer for the property p.
int yes_has_check_device_pointer
Indicate this structure has a function to check the device pointer.
__device__ __host__ unsigned int size() const
Return the size of the vector.
std::string match_str
match string
__device__ void set(int id, const container &obj)
Set the object id to obj.
this class is a functor for "for_each" algorithm
Definition: util.hpp:103
__device__ __host__ bool check_bound(size_t v1) const
Check that the key is inside the grid.
int yes_i_am_vector
it define that it is a vector
__device__ void set(unsigned int id, const vector_gpu_ker< T_, layout_base > &v, unsigned int src)
Set the element of the vector v from another element of another vector.