OpenFPM  5.2.0
Project that contain the implementation of distributed structures
map_vector_cuda_ker.cuh
1 /*
2  * map_vector_cuda.hpp
3  *
4  * Created on: Jun 28, 2018
5  * Author: i-bird
6  */
7 
8 #ifndef MAP_VECTOR_CUDA_HPP_
9 #define MAP_VECTOR_CUDA_HPP_
10 
11 #ifdef __NVCC__
12 
13 template<typename vector_src_type, typename vector_dst_type, unsigned int ... args>
14 __global__ void merge_add_prp_device_impl(vector_src_type v_src, vector_dst_type v_dst, unsigned int old_sz)
15 {
16  int i = threadIdx.x + blockIdx.x * blockDim.x;
17 
18  if (i >= v_src.size())
19  {return;}
20 
21  // write the object in the last element
22  object_s_di<decltype(v_src.get(i)),decltype(v_dst.get(old_sz+i)),OBJ_ENCAP,args...>(v_src.get(i),v_dst.get(old_sz+i));
23 }
24 
25 template<typename vector_src_type, typename vector_dst_type>
26 __global__ void copy_two_vectors(vector_src_type v_dst, vector_dst_type v_src)
27 {
28  int i = threadIdx.x + blockIdx.x * blockDim.x;
29 
30  if (i >= v_src.size())
31  {return;}
32 
33  v_dst.get(i) = v_src.get(i);
34 }
35 
36 
37 template<template<typename,typename> class op,
38  typename vector_src_type,
39  typename vector_dst_type,
40  typename vector_opart_type,
41  unsigned int ... args>
42 __global__ void merge_add_prp_device_impl_src_dst_opar_offset(vector_src_type v_src, vector_dst_type v_dst, vector_opart_type opart, unsigned int start)
43 {
44  int i = threadIdx.x + blockIdx.x * blockDim.x;
45 
46  if (i >= v_src.size())
47  {return;}
48 
49  // write the object in the last element
50  object_s_di_op<op,decltype(v_src.get(0)),decltype(v_dst.get(0)),OBJ_ENCAP,args...>(v_src.get(i),v_dst.get(opart.template get<1>(start + i)));
51 }
52 
53 template<template<typename,typename> class op,
54  typename vector_src_type,
55  typename vector_dst_type,
56  typename vector_opart_type,
57  unsigned int ... args>
58 __global__ void merge_add_prp_device_impl_src_offset_dst_opar(vector_src_type v_src, vector_dst_type v_dst, vector_opart_type opart, unsigned int start)
59 {
60  int i = threadIdx.x + blockIdx.x * blockDim.x;
61 
62  if (i >= opart.size())
63  {return;}
64 
65  // write the object in the last element
66  object_si_di_op<op,decltype(v_src.get(0)),decltype(v_dst.get(0)),OBJ_ENCAP,args...>(v_src.get(start + i),v_dst.get(opart.template get<0>(i)));
67 }
68 
69 #endif
70 
71 
72 template<int prp>
73 __device__ void fill_vector_error_array_overflow(const void * sptr,int key)
74 {
75 #ifdef CUDA_GPU
76 
77  int * ptr = (int *)&global_cuda_error_array[0];
78 
79  ptr[0] = 1;
80  ptr[1] = ((size_t)sptr) & 0xFFFFFFFF;
81  ptr[2] = (((size_t)sptr) & 0xFFFFFFFF00000000) >> 32;
82  ptr[3] = prp;
83  ptr[4] = 1;
84 
85  for (int i = 0 ; i < 1 ; i++)
86  {ptr[i+5] = key;}
87 
88 #ifdef __NVCC__
89 
90  ptr[5+1] = blockIdx.x;
91  ptr[6+1] = blockIdx.y;
92  ptr[7+1] = blockIdx.z;
93 
94  ptr[8+1] = blockDim.x;
95  ptr[9+1] = blockDim.y;
96  ptr[10+1] = blockDim.z;
97 
98  ptr[11+1] = threadIdx.x;
99  ptr[12+1] = threadIdx.y;
100  ptr[13+1] = threadIdx.z;
101 
102 #endif
103 
104 #endif
105 }
106 
107 
108 namespace openfpm
109 {
110 
111  template<typename T, template <typename> class layout_base>
112  struct vector_gpu_ker_ref;
113 
120  template<typename T, template <typename> class layout_base>
122  {
124 
125  typedef typename apply_transform<layout_base,T>::type T_;
126 
130  unsigned int v_size;
131 
134 
142  __device__ __host__ inline bool check_bound(size_t v1) const
143  {
144  return v1 < size();
145  }
146 
147  public:
148 
150  typedef int yes_i_am_vector;
151 
153  typedef typename layout_base<T_>::type layout_type;
154 
156  // you can access all the properties of T
158 
160  typedef T_ value_type;
161 
164 
170  __device__ __host__ unsigned int size() const
171  {
172  return v_size;
173  }
174 
175  __host__ __device__ size_t size_local() const
176  {
177  return size();
178  }
179 
186  __device__ __host__ unsigned int capacity() const
187  {
188  return base.size();
189  }
190 
201  template <unsigned int p>
202  __device__ __host__ inline auto get(unsigned int id) const -> decltype(base.template get<p>(grid_key_dx<1>(0)))
203  {
204 #ifdef SE_CLASS1
205  if (check_bound(id) == false)
206  {fill_vector_error_array_overflow<p>(this->getPointer<p>(),id);}
207 #endif
208  grid_key_dx<1> key(id);
209 
210  return base.template get<p>(key);
211  }
222  template <unsigned int p>
223  __device__ __host__ inline auto getProp(unsigned int id) const -> decltype(base.template get<p>(grid_key_dx<1>(0)))
224  {
225  return this->get<p>(id);
226  }
227 
228 
239  template <unsigned int p, typename key_type>
240  __device__ __host__ inline auto getProp(key_type id) const -> decltype(base.template get<p>(grid_key_dx<1>(0)))
241  {
242  return this->get<p>(id.getKey());
243  }
244 
254  __device__ __host__ inline auto get(unsigned int id) -> decltype(base.get_o(grid_key_dx<1>(id)))
255  {
256 #ifdef SE_CLASS1
257  if (check_bound(id) == false)
258  {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
259 #endif
260 
261  grid_key_dx<1> key(id);
262 
263  return base.get_o(key);
264  }
265 
275  inline __device__ __host__ auto get(unsigned int id) const -> const decltype(base.get_o(grid_key_dx<1>(id)))
276  {
277 #ifdef SE_CLASS1
278  if (check_bound(id) == false)
279  {fill_vector_error_array_overflow<-1>(this->getPointer<0>(),id);}
280 #endif
281 
282  grid_key_dx<1> key(id);
283 
284  return base.get_o(key);
285  }
286 
299  inline __device__ __host__ auto get_o(unsigned int id) const -> decltype(base.get_o(grid_key_dx<1>(id)))
300  {
301 #ifdef SE_CLASS1
302  if (check_bound(id) == false)
303  {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
304 #endif
305 
306  grid_key_dx<1> key(id);
307 
308  return base.get_o(key);
309  }
310 
323  inline __device__ __host__ auto get_o(unsigned int id) -> decltype(base.get_o(grid_key_dx<1>(id)))
324  {
325 #ifdef SE_CLASS1
326  if (check_bound(id) == false)
327  {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
328 #endif
329 
330  grid_key_dx<1> key(id);
331 
332  return base.get_o(key);
333  }
334 
340  inline auto last() const -> decltype(base.get_o(grid_key_dx<1>(0)))
341  {
342  grid_key_dx<1> key(size()-1);
343 
344  return base.get_o(key);
345  }
346 
357  template <unsigned int p>
358  __device__ __host__ inline auto get(unsigned int id) -> decltype(base.template get<p>(grid_key_dx<1>(0)))
359  {
360 #ifdef SE_CLASS1
361  if (check_bound(id) == false)
362  {fill_vector_error_array_overflow<p>(this->template getPointer<p>(),id);}
363 #endif
364 
365  grid_key_dx<1> key(id);
366 
367  return base.template get<p>(key);
368  }
369 
375  inline auto last() -> decltype(base.get_o(grid_key_dx<1>(0)))
376  {
377  grid_key_dx<1> key(size()-1);
378 
379  return base.get_o(key);
380  }
381 
383  :v_size(0)
384  {}
385 
386  vector_gpu_ker(int v_size, const grid_gpu_ker<1,T_,layout_base,grid_sm<1,void>> & cpy)
387  :v_size(v_size),base(cpy)
388  {}
389 
390  vector_gpu_ker(const vector_gpu_ker_ref<T,layout_base> & vref)
391  {
392  this->operator=(vref.vref);
393  }
394 
400  inline void constructor_impl(int v_size, const grid_gpu_ker<1,T_,layout_base,grid_sm<1,void>> & cpy)
401  {
402  this->v_size = v_size;
403  base.constructor_impl(cpy);
404  }
405 
411  inline void constructor_impl(int v_size, const grid_gpu_ker_ref<1,T_,layout_base,grid_sm<1,void>> & cpy)
412  {
413  this->v_size = v_size;
414  base.constructor_impl(cpy);
415  }
416 
423  __device__ void set(int id, const container & obj)
424  {
425 #ifdef SE_CLASS1
426  if (check_bound(id) == false)
427  {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
428 #endif
429 
431  base.set(id,obj);
432  }
433 
439  template<unsigned int p> __device__ __host__ void * getPointer()
440  {
442  return base.template getPointer<p>();
443  }
444 
450  template<unsigned int p> __device__ __host__ const void * getPointer() const
451  {
453  return base.template getPointer<p>();
454  }
455 
471  template <typename encap_S, unsigned int ...args> void set_o(unsigned int i, const encap_S & obj)
472  {
473 #ifdef SE_CLASS1
474  if (check_bound(i) == false)
475  {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),i);}
476 #endif
477 
478  // write the object in the last element
479  object_s_di<encap_S,decltype(get(i)),OBJ_ENCAP,args...>(obj,get(i));
480  }
481 
489  __device__ void set(unsigned int id, const vector_gpu_ker<T_,layout_base> & v, unsigned int src)
490  {
491 #ifdef SE_CLASS1
492  if (check_bound(id) == false)
493  {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
494 #endif
495 
496  base.set(id,v.base,src);
497  }
498 
506  template<unsigned int ... prp>
507  __device__ void set(unsigned int id, const vector_gpu_ker<T_,layout_base> & v, unsigned int src)
508  {
509 #ifdef SE_CLASS1
510  if (check_bound(id) == false)
511  {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
512 #endif
513 
514  base.template set<prp...>(id,v.base,src);
515  }
516 
521  __host__ ite_gpu<1> getGPUIterator(size_t n_thr = default_kernel_wg_threads_) const
522  {
523  grid_key_dx<1> start(0);
524  grid_key_dx<1> stop(size()-1);
525 
526  return base.getGPUIterator(start,stop,n_thr);
527  }
532  ite_gpu<1> getDomainIteratorGPU(size_t n_thr = default_kernel_wg_threads_) const
533  {
534  return getGPUIterator(n_thr);
535  }
536 
537  //Stub for some expression
538  void init() const {}
539 
540  __host__ __device__ auto value(unsigned int p) -> decltype(base.template get<0>(grid_key_dx<1>(0)))
541  {
542  return get<0>(p);
543  }
548  ite_gpu<1> getGPUIteratorTo(size_t stop, size_t n_thr = default_kernel_wg_threads_) const
549  {
550  grid_key_dx<1> start(0);
551  grid_key_dx<1> stop_(stop);
552 
553  return base.getGPUIterator(start,stop_,n_thr);
554  }
555 
556 
563  {
564  v_size = v.v_size;
565  base = v.base;
566 
567  return *this;
568  }
569 
570  __device__ __host__ vector_gpu_ker<T,layout_base> & getVector()
571  {
572  return *this;
573  }
574 
575  __device__ __host__ const vector_gpu_ker<T,layout_base> & getVector() const
576  {
577  return *this;
578  }
579 
586  {
587  return base;
588  }
589 
590  void * internal_get_size_pointer() {return &v_size;}
591 
592  void print_size()
593  {
594 #ifndef DISABLE_ALL_RTTI
595  std::cout << "the size of: " << demangle(typeid(self_type).name()) << " is " << sizeof(self_type) << std::endl;
596  std::cout << " " << demangle(typeid(decltype(v_size)).name()) << ":" << sizeof(decltype(v_size)) << std::endl;
597  std::cout << " " << demangle(typeid(decltype(base)).name()) << ":" << sizeof(decltype(base)) << std::endl;
598 #endif
599  }
600 
601 #ifdef SE_CLASS1
602 
608  pointer_check check_device_pointer(void * ptr)
609  {
610  pointer_check pc;
611  pc.match = false;
612 
613  check_device_ptr<self_type> ptr_chk(ptr,*this);
614 
615  boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::max_prop>>(ptr_chk);
616 
617  if (ptr_chk.result == true)
618  {
619  pc.match = true;
620  pc.match_str += std::string("Property: ") + std::to_string(ptr_chk.prp) + "\n";
621  }
622 
623  return pc;
624  }
625 
626 #endif
627  };
628 
635  template<typename T, template <typename> class layout_base>
637  {
639 
640  typedef typename apply_transform<layout_base,T>::type T_;
641 
644 
645  public:
646 
647  typedef int yes_i_am_vector;
648 
649  typedef typename layout_base<T_>::type layout_type;
650 
651  typedef typename grid_base<1,T_,CudaMemory,typename layout_base<T_>::type>::container container;
652 
653  typedef T_ value_type;
654 
655  typedef int yes_has_check_device_pointer;
656 
657  __device__ __host__ unsigned int size() const
658  {
659  return vref.size();
660  }
661 
662  __host__ __device__ size_t size_local() const
663  {
664  return size();
665  }
666 
667  __device__ __host__ unsigned int capacity() const
668  {
669  return vref.capacity;
670  }
671 
672  template <unsigned int p>
673  __device__ __host__ inline auto get(unsigned int id) const -> decltype(vref.template get<p>(id))
674  {
675  return vref.template get<p>(id);
676  }
677 
678  __device__ __host__ inline auto get(unsigned int id) -> decltype(vref.get(id))
679  {
680  return vref.get(id);
681  }
682 
683  inline __device__ __host__ auto get(unsigned int id) const -> decltype(vref.get(id))
684  {
685  return vref.get(id);
686  }
687 
688  inline __device__ __host__ auto get_o(unsigned int id) const -> decltype(vref.get_o(id))
689  {
690  return vref.get_o(id);
691  }
692 
693  inline __device__ __host__ auto get_o(unsigned int id) -> decltype(vref.get_o(id))
694  {
695  return vref.get_o(id);
696  }
697 
698  inline auto last() const -> decltype(vref.last())
699  {
700  return vref.last();
701  }
702 
703  template <unsigned int p>
704  __device__ __host__ inline auto get(unsigned int id) -> decltype(vref.template get<p>(id))
705  {
706  return vref.template get<p>(id);
707  }
708 
709  inline auto last() -> decltype(vref.last())
710  {
711  return vref.last();
712  }
713 
714  vector_gpu_ker_ref(vector_gpu_ker<T,layout_base> & vref)
715  :vref(vref)
716  {}
717 
718  __device__ void set(int id, const container & obj)
719  {
720  vref.set(id,obj);
721  }
722 
723  template<unsigned int p> __device__ __host__ void * getPointer()
724  {
725  return vref.template getPointer<p>();
726  }
727 
728  template<unsigned int p> __device__ __host__ const void * getPointer() const
729  {
730  return vref.template getPointer<p>();
731  }
732 
733  template <typename encap_S, unsigned int ...args> void set_o(unsigned int i, const encap_S & obj)
734  {
735  vref.set(i,obj);
736  }
737 
738  __device__ void set(unsigned int id, const vector_gpu_ker<T_,layout_base> & v, unsigned int src)
739  {
740  vref.set(id,v,src);
741  }
742 
743  template<unsigned int ... prp>
744  __device__ void set(unsigned int id, const vector_gpu_ker<T_,layout_base> & v, unsigned int src)
745  {
746  vref.template set<prp ...>(id,v,src);
747  }
748 
749  __host__ ite_gpu<1> getGPUIterator(size_t n_thr = default_kernel_wg_threads_) const
750  {
751  return vref.getGPUIterator(n_thr);
752  }
753 
758  ite_gpu<1> getGPUIteratorTo(size_t stop, size_t n_thr = default_kernel_wg_threads_) const
759  {
760  return vref.getGPUItertatorTo(stop,n_thr);
761  }
762 
763  vector_gpu_ker<T,layout_base> & getVector()
764  {
765  return *this;
766  }
767 
768  const vector_gpu_ker<T,layout_base> & getVector() const
769  {
770  return *this;
771  }
772 
773  __host__ vector_gpu_ker_ref<T,layout_base> & operator=(const vector_gpu_ker<T,layout_base> & v)
774  {
775  vref.operator=(v);
776  return this;
777  }
778 
779  __device__ grid_gpu_ker<1,T_,layout_base, grid_sm<1,void>> & getBase()
780  {
781  return vref.getBase();
782  }
783 
784  pointer_check check_device_pointer(void * ptr)
785  {
786  return vref.check_device_pointer(ptr);
787  }
788 
789  void * internal_get_size_pointer() {return &vref.internal_get_size_pointer();}
790 
791  void print_size()
792  {
793  return vref.print_size();
794  }
795  };
796 
797 }
798 
799 #endif /* MAP_VECTOR_CUDA_HPP_ */
grid interface available when on gpu
__device__ encapc< dim, T_, layout > get_o(const grid_key_dx< dim, Tk > &v1)
Get the of the selected element as a boost::fusion::vector.
struct ite_gpu< dim > getGPUIterator(grid_key_dx< dim > &key1, grid_key_dx< dim > &key2, size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
grid_key_dx is the key to access any element in the grid
Definition: grid_key.hpp:19
Declaration grid_sm.
Definition: grid_sm.hpp:167
convert a type into constant type
Definition: aggregate.hpp:302
this class is a functor for "for_each" algorithm
Definition: util.hpp:104
It copy the properties from one object to another applying an operation.
It copy the properties from one object to another.
It copy the properties from one object to another applying an operation.
grid interface available when on gpu
vector_gpu_ker< T, layout_base > & vref
vector reference
ite_gpu< 1 > getGPUIteratorTo(size_t stop, size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
grid interface available when on gpu
auto last() -> decltype(base.get_o(grid_key_dx< 1 >(0)))
Get the last element of the vector.
__device__ void set(unsigned int id, const vector_gpu_ker< T_, layout_base > &v, unsigned int src)
Set the element of the vector v from another element of another vector.
__device__ __host__ auto get(unsigned int id) const -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
ite_gpu< 1 > getDomainIteratorGPU(size_t n_thr=default_kernel_wg_threads_) const
Get a domain iterator for the GPU.
grid_base< 1, T_, CudaMemory, typename layout_base< T_ >::type >::container container
Object container for T, it is the return type of get_o it return a object type trough.
__host__ ite_gpu< 1 > getGPUIterator(size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
__host__ vector_gpu_ker< T, layout_base > & operator=(const vector_gpu_ker< T, layout_base > &v)
operator= this operator absorb the pointers, consider that this object wrap device pointers
void constructor_impl(int v_size, const grid_gpu_ker_ref< 1, T_, layout_base, grid_sm< 1, void >> &cpy)
implementation of the constructor
void set_o(unsigned int i, const encap_S &obj)
It set an element of the vector from a object that is a subset of the vector properties.
__device__ __host__ auto get(unsigned int id) const -> const decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void > > base
1-D static grid
__device__ grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void > > & getBase()
Return the base.
__device__ __host__ unsigned int capacity() const
return the maximum capacity of the vector before reallocation
__device__ __host__ auto get(unsigned int id) -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
ite_gpu< 1 > getGPUIteratorTo(size_t stop, size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
__device__ __host__ void * getPointer()
Get the pointer for the property p.
__device__ __host__ bool check_bound(size_t v1) const
Check that the key is inside the grid.
__device__ __host__ auto get_o(unsigned int id) const -> decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
__device__ __host__ unsigned int size() const
Return the size of the vector.
__device__ __host__ auto get(unsigned int id) -> decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
auto last() const -> decltype(base.get_o(grid_key_dx< 1 >(0)))
Get the last element of the vector.
__device__ __host__ const void * getPointer() const
Get the pointer for the property p.
void constructor_impl(int v_size, const grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void >> &cpy)
implementation of the constructor
__device__ __host__ auto getProp(unsigned int id) const -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
T_ value_type
Type of the value the vector is storing.
int yes_has_check_device_pointer
Indicate this structure has a function to check the device pointer.
layout_base< T_ >::type layout_type
Type of the encapsulation memory parameter.
__device__ __host__ auto getProp(key_type id) const -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
__device__ void set(unsigned int id, const vector_gpu_ker< T_, layout_base > &v, unsigned int src)
Set the element of the vector v from another element of another vector.
__device__ __host__ auto get_o(unsigned int id) -> decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
int yes_i_am_vector
it define that it is a vector
__device__ void set(int id, const container &obj)
Set the object id to obj.
std::string match_str
match string
bool match
Indicate if the pointer match.