OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
map_vector_cuda_ker.cuh
1/*
2 * map_vector_cuda.hpp
3 *
4 * Created on: Jun 28, 2018
5 * Author: i-bird
6 */
7
8#ifndef MAP_VECTOR_CUDA_HPP_
9#define MAP_VECTOR_CUDA_HPP_
10
11#ifdef __NVCC__
12
13template<typename vector_src_type, typename vector_dst_type, unsigned int ... args>
14__global__ void merge_add_prp_device_impl(vector_src_type v_src, vector_dst_type v_dst, unsigned int old_sz)
15{
16 int i = threadIdx.x + blockIdx.x * blockDim.x;
17
18 if (i >= v_src.size())
19 {return;}
20
21 // write the object in the last element
22 object_s_di<decltype(v_src.get(i)),decltype(v_dst.get(old_sz+i)),OBJ_ENCAP,args...>(v_src.get(i),v_dst.get(old_sz+i));
23}
24
25template<typename vector_src_type, typename vector_dst_type>
26__global__ void copy_two_vectors(vector_src_type v_dst, vector_dst_type v_src)
27{
28 int i = threadIdx.x + blockIdx.x * blockDim.x;
29
30 if (i >= v_src.size())
31 {return;}
32
33 v_dst.get(i) = v_src.get(i);
34}
35
36
37template<template<typename,typename> class op,
38 typename vector_src_type,
39 typename vector_dst_type,
40 typename vector_opart_type,
41 unsigned int ... args>
42__global__ void merge_add_prp_device_impl_src_dst_opar_offset(vector_src_type v_src, vector_dst_type v_dst, vector_opart_type opart, unsigned int start)
43{
44 int i = threadIdx.x + blockIdx.x * blockDim.x;
45
46 if (i >= v_src.size())
47 {return;}
48
49 // write the object in the last element
50 object_s_di_op<op,decltype(v_src.get(0)),decltype(v_dst.get(0)),OBJ_ENCAP,args...>(v_src.get(i),v_dst.get(opart.template get<1>(start + i)));
51}
52
53template<template<typename,typename> class op,
54 typename vector_src_type,
55 typename vector_dst_type,
56 typename vector_opart_type,
57 unsigned int ... args>
58__global__ void merge_add_prp_device_impl_src_offset_dst_opar(vector_src_type v_src, vector_dst_type v_dst, vector_opart_type opart, unsigned int start)
59{
60 int i = threadIdx.x + blockIdx.x * blockDim.x;
61
62 if (i >= opart.size())
63 {return;}
64
65 // write the object in the last element
66 object_si_di_op<op,decltype(v_src.get(0)),decltype(v_dst.get(0)),OBJ_ENCAP,args...>(v_src.get(start + i),v_dst.get(opart.template get<0>(i)));
67}
68
69#endif
70
71
72template<int prp>
73__device__ void fill_vector_error_array_overflow(const void * sptr,int key)
74{
75#ifdef CUDA_GPU
76
77 int * ptr = (int *)&global_cuda_error_array[0];
78
79 ptr[0] = 1;
80 ptr[1] = ((size_t)sptr) & 0xFFFFFFFF;
81 ptr[2] = (((size_t)sptr) & 0xFFFFFFFF00000000) >> 32;
82 ptr[3] = prp;
83 ptr[4] = 1;
84
85 for (int i = 0 ; i < 1 ; i++)
86 {ptr[i+5] = key;}
87
88#ifdef __NVCC__
89
90 ptr[5+1] = blockIdx.x;
91 ptr[6+1] = blockIdx.y;
92 ptr[7+1] = blockIdx.z;
93
94 ptr[8+1] = blockDim.x;
95 ptr[9+1] = blockDim.y;
96 ptr[10+1] = blockDim.z;
97
98 ptr[11+1] = threadIdx.x;
99 ptr[12+1] = threadIdx.y;
100 ptr[13+1] = threadIdx.z;
101
102#endif
103
104#endif
105}
106
107
108namespace openfpm
109{
110
111 template<typename T, template <typename> class layout_base>
112 struct vector_gpu_ker_ref;
113
120 template<typename T, template <typename> class layout_base>
122 {
124
125 typedef typename apply_transform<layout_base,T>::type T_;
126
130 unsigned int v_size;
131
134
142 __device__ __host__ inline bool check_bound(size_t v1) const
143 {
144 return v1 < size();
145 }
146
147 public:
148
150 typedef int yes_i_am_vector;
151
153 typedef typename layout_base<T_>::type layout_type;
154
156 // you can access all the properties of T
158
160 typedef T_ value_type;
161
164
170 __device__ __host__ unsigned int size() const
171 {
172 return v_size;
173 }
174
175 __host__ __device__ size_t size_local() const
176 {
177 return size();
178 }
179
186 __device__ __host__ unsigned int capacity() const
187 {
188 return base.size();
189 }
190
201 template <unsigned int p>
202 __device__ __host__ inline auto get(unsigned int id) const -> decltype(base.template get<p>(grid_key_dx<1>(0)))
203 {
204#ifdef SE_CLASS1
205 if (check_bound(id) == false)
206 {fill_vector_error_array_overflow<p>(this->getPointer<p>(),id);}
207#endif
208 grid_key_dx<1> key(id);
209
210 return base.template get<p>(key);
211 }
222 template <unsigned int p>
223 __device__ __host__ inline auto getProp(unsigned int id) const -> decltype(base.template get<p>(grid_key_dx<1>(0)))
224 {
225 return this->get<p>(id);
226 }
227
228
239 template <unsigned int p, typename key_type>
240 __device__ __host__ inline auto getProp(key_type id) const -> decltype(base.template get<p>(grid_key_dx<1>(0)))
241 {
242 return this->get<p>(id.getKey());
243 }
244
254 __device__ __host__ inline auto get(unsigned int id) -> decltype(base.get_o(grid_key_dx<1>(id)))
255 {
256#ifdef SE_CLASS1
257 if (check_bound(id) == false)
258 {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
259#endif
260
261 grid_key_dx<1> key(id);
262
263 return base.get_o(key);
264 }
265
275 inline __device__ __host__ auto get(unsigned int id) const -> const decltype(base.get_o(grid_key_dx<1>(id)))
276 {
277#ifdef SE_CLASS1
278 if (check_bound(id) == false)
279 {fill_vector_error_array_overflow<-1>(this->getPointer<0>(),id);}
280#endif
281
282 grid_key_dx<1> key(id);
283
284 return base.get_o(key);
285 }
286
299 inline __device__ __host__ auto get_o(unsigned int id) const -> decltype(base.get_o(grid_key_dx<1>(id)))
300 {
301#ifdef SE_CLASS1
302 if (check_bound(id) == false)
303 {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
304#endif
305
306 grid_key_dx<1> key(id);
307
308 return base.get_o(key);
309 }
310
323 inline __device__ __host__ auto get_o(unsigned int id) -> decltype(base.get_o(grid_key_dx<1>(id)))
324 {
325#ifdef SE_CLASS1
326 if (check_bound(id) == false)
327 {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
328#endif
329
330 grid_key_dx<1> key(id);
331
332 return base.get_o(key);
333 }
334
340 inline auto last() const -> decltype(base.get_o(grid_key_dx<1>(0)))
341 {
342 grid_key_dx<1> key(size()-1);
343
344 return base.get_o(key);
345 }
346
357 template <unsigned int p>
358 __device__ __host__ inline auto get(unsigned int id) -> decltype(base.template get<p>(grid_key_dx<1>(0)))
359 {
360#ifdef SE_CLASS1
361 if (check_bound(id) == false)
362 {fill_vector_error_array_overflow<p>(this->template getPointer<p>(),id);}
363#endif
364
365 grid_key_dx<1> key(id);
366
367 return base.template get<p>(key);
368 }
369
375 inline auto last() -> decltype(base.get_o(grid_key_dx<1>(0)))
376 {
377 grid_key_dx<1> key(size()-1);
378
379 return base.get_o(key);
380 }
381
383 :v_size(0)
384 {}
385
386 vector_gpu_ker(int v_size, const grid_gpu_ker<1,T_,layout_base,grid_sm<1,void>> & cpy)
387 :v_size(v_size),base(cpy)
388 {}
389
390 vector_gpu_ker(const vector_gpu_ker_ref<T,layout_base> & vref)
391 {
392 this->operator=(vref.vref);
393 }
394
400 inline void constructor_impl(int v_size, const grid_gpu_ker<1,T_,layout_base,grid_sm<1,void>> & cpy)
401 {
402 this->v_size = v_size;
403 base.constructor_impl(cpy);
404 }
405
411 inline void constructor_impl(int v_size, const grid_gpu_ker_ref<1,T_,layout_base,grid_sm<1,void>> & cpy)
412 {
413 this->v_size = v_size;
414 base.constructor_impl(cpy);
415 }
416
423 __device__ void set(int id, const container & obj)
424 {
425#ifdef SE_CLASS1
426 if (check_bound(id) == false)
427 {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
428#endif
429
431 base.set(id,obj);
432 }
433
439 template<unsigned int p> __device__ __host__ void * getPointer()
440 {
442 return base.template getPointer<p>();
443 }
444
450 template<unsigned int p> __device__ __host__ const void * getPointer() const
451 {
453 return base.template getPointer<p>();
454 }
455
471 template <typename encap_S, unsigned int ...args> void set_o(unsigned int i, const encap_S & obj)
472 {
473#ifdef SE_CLASS1
474 if (check_bound(i) == false)
475 {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),i);}
476#endif
477
478 // write the object in the last element
479 object_s_di<encap_S,decltype(get(i)),OBJ_ENCAP,args...>(obj,get(i));
480 }
481
489 __device__ void set(unsigned int id, const vector_gpu_ker<T_,layout_base> & v, unsigned int src)
490 {
491#ifdef SE_CLASS1
492 if (check_bound(id) == false)
493 {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
494#endif
495
496 base.set(id,v.base,src);
497 }
498
506 template<unsigned int ... prp>
507 __device__ void set(unsigned int id, const vector_gpu_ker<T_,layout_base> & v, unsigned int src)
508 {
509#ifdef SE_CLASS1
510 if (check_bound(id) == false)
511 {fill_vector_error_array_overflow<-1>(this->template getPointer<0>(),id);}
512#endif
513
514 base.template set<prp...>(id,v.base,src);
515 }
516
521 __host__ ite_gpu<1> getGPUIterator(size_t n_thr = default_kernel_wg_threads_) const
522 {
523 grid_key_dx<1> start(0);
524 grid_key_dx<1> stop(size()-1);
525
526 return base.getGPUIterator(start,stop,n_thr);
527 }
532 ite_gpu<1> getDomainIteratorGPU(size_t n_thr = default_kernel_wg_threads_) const
533 {
534 return getGPUIterator(n_thr);
535 }
536
537 //Stub for some expression
538 void init() const {}
539
540 __host__ __device__ auto value(unsigned int p) -> decltype(base.template get<0>(grid_key_dx<1>(0)))
541 {
542 return get<0>(p);
543 }
548 ite_gpu<1> getGPUIteratorTo(size_t stop, size_t n_thr = default_kernel_wg_threads_) const
549 {
550 grid_key_dx<1> start(0);
551 grid_key_dx<1> stop_(stop);
552
553 return base.getGPUIterator(start,stop_,n_thr);
554 }
555
556
563 {
564 v_size = v.v_size;
565 base = v.base;
566
567 return *this;
568 }
569
570 __device__ __host__ vector_gpu_ker<T,layout_base> & getVector()
571 {
572 return *this;
573 }
574
575 __device__ __host__ const vector_gpu_ker<T,layout_base> & getVector() const
576 {
577 return *this;
578 }
579
586 {
587 return base;
588 }
589
590 void * internal_get_size_pointer() {return &v_size;}
591
592 void print_size()
593 {
594#ifndef DISABLE_ALL_RTTI
595 std::cout << "the size of: " << demangle(typeid(self_type).name()) << " is " << sizeof(self_type) << std::endl;
596 std::cout << " " << demangle(typeid(decltype(v_size)).name()) << ":" << sizeof(decltype(v_size)) << std::endl;
597 std::cout << " " << demangle(typeid(decltype(base)).name()) << ":" << sizeof(decltype(base)) << std::endl;
598#endif
599 }
600
601#ifdef SE_CLASS1
602
608 pointer_check check_device_pointer(void * ptr)
609 {
610 pointer_check pc;
611 pc.match = false;
612
613 check_device_ptr<self_type> ptr_chk(ptr,*this);
614
615 boost::mpl::for_each_ref<boost::mpl::range_c<int,0,T::max_prop>>(ptr_chk);
616
617 if (ptr_chk.result == true)
618 {
619 pc.match = true;
620 pc.match_str += std::string("Property: ") + std::to_string(ptr_chk.prp) + "\n";
621 }
622
623 return pc;
624 }
625
626#endif
627 };
628
635 template<typename T, template <typename> class layout_base>
637 {
639
640 typedef typename apply_transform<layout_base,T>::type T_;
641
644
645 public:
646
647 typedef int yes_i_am_vector;
648
649 typedef typename layout_base<T_>::type layout_type;
650
651 typedef typename grid_base<1,T_,CudaMemory,typename layout_base<T_>::type>::container container;
652
653 typedef T_ value_type;
654
655 typedef int yes_has_check_device_pointer;
656
657 __device__ __host__ unsigned int size() const
658 {
659 return vref.size();
660 }
661
662 __host__ __device__ size_t size_local() const
663 {
664 return size();
665 }
666
667 __device__ __host__ unsigned int capacity() const
668 {
669 return vref.capacity;
670 }
671
672 template <unsigned int p>
673 __device__ __host__ inline auto get(unsigned int id) const -> decltype(vref.template get<p>(id))
674 {
675 return vref.template get<p>(id);
676 }
677
678 __device__ __host__ inline auto get(unsigned int id) -> decltype(vref.get(id))
679 {
680 return vref.get(id);
681 }
682
683 inline __device__ __host__ auto get(unsigned int id) const -> decltype(vref.get(id))
684 {
685 return vref.get(id);
686 }
687
688 inline __device__ __host__ auto get_o(unsigned int id) const -> decltype(vref.get_o(id))
689 {
690 return vref.get_o(id);
691 }
692
693 inline __device__ __host__ auto get_o(unsigned int id) -> decltype(vref.get_o(id))
694 {
695 return vref.get_o(id);
696 }
697
698 inline auto last() const -> decltype(vref.last())
699 {
700 return vref.last();
701 }
702
703 template <unsigned int p>
704 __device__ __host__ inline auto get(unsigned int id) -> decltype(vref.template get<p>(id))
705 {
706 return vref.template get<p>(id);
707 }
708
709 inline auto last() -> decltype(vref.last())
710 {
711 return vref.last();
712 }
713
714 vector_gpu_ker_ref(vector_gpu_ker<T,layout_base> & vref)
715 :vref(vref)
716 {}
717
718 __device__ void set(int id, const container & obj)
719 {
720 vref.set(id,obj);
721 }
722
723 template<unsigned int p> __device__ __host__ void * getPointer()
724 {
725 return vref.template getPointer<p>();
726 }
727
728 template<unsigned int p> __device__ __host__ const void * getPointer() const
729 {
730 return vref.template getPointer<p>();
731 }
732
733 template <typename encap_S, unsigned int ...args> void set_o(unsigned int i, const encap_S & obj)
734 {
735 vref.set(i,obj);
736 }
737
738 __device__ void set(unsigned int id, const vector_gpu_ker<T_,layout_base> & v, unsigned int src)
739 {
740 vref.set(id,v,src);
741 }
742
743 template<unsigned int ... prp>
744 __device__ void set(unsigned int id, const vector_gpu_ker<T_,layout_base> & v, unsigned int src)
745 {
746 vref.template set<prp ...>(id,v,src);
747 }
748
749 __host__ ite_gpu<1> getGPUIterator(size_t n_thr = default_kernel_wg_threads_) const
750 {
751 return vref.getGPUIterator(n_thr);
752 }
753
758 ite_gpu<1> getGPUIteratorTo(size_t stop, size_t n_thr = default_kernel_wg_threads_) const
759 {
760 return vref.getGPUItertatorTo(stop,n_thr);
761 }
762
764 {
765 return *this;
766 }
767
768 const vector_gpu_ker<T,layout_base> & getVector() const
769 {
770 return *this;
771 }
772
773 __host__ vector_gpu_ker_ref<T,layout_base> & operator=(const vector_gpu_ker<T,layout_base> & v)
774 {
775 vref.operator=(v);
776 return this;
777 }
778
780 {
781 return vref.getBase();
782 }
783
784 pointer_check check_device_pointer(void * ptr)
785 {
786 return vref.check_device_pointer(ptr);
787 }
788
789 void * internal_get_size_pointer() {return &vref.internal_get_size_pointer();}
790
791 void print_size()
792 {
793 return vref.print_size();
794 }
795 };
796
797}
798
799#endif /* MAP_VECTOR_CUDA_HPP_ */
grid interface available when on gpu
__device__ encapc< dim, T_, layout > get_o(const grid_key_dx< dim, Tk > &v1)
Get the of the selected element as a boost::fusion::vector.
struct ite_gpu< dim > getGPUIterator(grid_key_dx< dim > &key1, grid_key_dx< dim > &key2, size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
grid_key_dx is the key to access any element in the grid
Definition grid_key.hpp:19
Declaration grid_sm.
Definition grid_sm.hpp:167
convert a type into constant type
this class is a functor for "for_each" algorithm
Definition util.hpp:104
It copy the properties from one object to another applying an operation.
It copy the properties from one object to another.
It copy the properties from one object to another applying an operation.
grid interface available when on gpu
ite_gpu< 1 > getGPUIteratorTo(size_t stop, size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
vector_gpu_ker< T, layout_base > & vref
vector reference
grid interface available when on gpu
grid_base< 1, T_, CudaMemory, typenamelayout_base< T_ >::type >::container container
Object container for T, it is the return type of get_o it return a object type trough.
ite_gpu< 1 > getGPUIteratorTo(size_t stop, size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
auto last() -> decltype(base.get_o(grid_key_dx< 1 >(0)))
Get the last element of the vector.
__device__ void set(unsigned int id, const vector_gpu_ker< T_, layout_base > &v, unsigned int src)
Set the element of the vector v from another element of another vector.
__device__ __host__ auto get(unsigned int id) const -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
void constructor_impl(int v_size, const grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void > > &cpy)
implementation of the constructor
void set_o(unsigned int i, const encap_S &obj)
It set an element of the vector from a object that is a subset of the vector properties.
__device__ __host__ auto get(unsigned int id) const -> const decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void > > base
1-D static grid
__device__ __host__ unsigned int capacity() const
return the maximum capacity of the vector before reallocation
__device__ __host__ auto get(unsigned int id) -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
__device__ __host__ bool check_bound(size_t v1) const
Check that the key is inside the grid.
ite_gpu< 1 > getDomainIteratorGPU(size_t n_thr=default_kernel_wg_threads_) const
Get a domain iterator for the GPU.
__device__ __host__ auto get_o(unsigned int id) const -> decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
__device__ __host__ unsigned int size() const
Return the size of the vector.
__device__ __host__ auto get(unsigned int id) -> decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
auto last() const -> decltype(base.get_o(grid_key_dx< 1 >(0)))
Get the last element of the vector.
__device__ grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void > > & getBase()
Return the base.
__host__ ite_gpu< 1 > getGPUIterator(size_t n_thr=default_kernel_wg_threads_) const
Get an iterator for the GPU.
__host__ vector_gpu_ker< T, layout_base > & operator=(const vector_gpu_ker< T, layout_base > &v)
operator= this operator absorb the pointers, consider that this object wrap device pointers
__device__ __host__ auto getProp(unsigned int id) const -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
__device__ __host__ void * getPointer()
Get the pointer for the property p.
T_ value_type
Type of the value the vector is storing.
int yes_has_check_device_pointer
Indicate this structure has a function to check the device pointer.
void constructor_impl(int v_size, const grid_gpu_ker_ref< 1, T_, layout_base, grid_sm< 1, void > > &cpy)
implementation of the constructor
layout_base< T_ >::type layout_type
Type of the encapsulation memory parameter.
__device__ __host__ auto getProp(key_type id) const -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
__device__ __host__ const void * getPointer() const
Get the pointer for the property p.
__device__ void set(unsigned int id, const vector_gpu_ker< T_, layout_base > &v, unsigned int src)
Set the element of the vector v from another element of another vector.
__device__ __host__ auto get_o(unsigned int id) -> decltype(base.get_o(grid_key_dx< 1 >(id)))
Get an element of the vector.
int yes_i_am_vector
it define that it is a vector
__device__ void set(int id, const container &obj)
Set the object id to obj.
std::string match_str
match string
bool match
Indicate if the pointer match.