OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
kernels.cuh
1 /*
2  * kernels.cuh
3  *
4  * Created on: Jan 26, 2019
5  * Author: i-bird
6  */
7 
8 #ifndef KERNELS_CUH_
9 #define KERNELS_CUH_
10 
11 
12 template<unsigned int prp_off, typename vector_type,typename vector_type_offs>
13 __global__ void find_buffer_offsets_zero(vector_type vd, int * cnt, vector_type_offs offs)
14 {
15  int p = threadIdx.x + blockIdx.x * blockDim.x;
16 
17  if (p >= (int)vd.size()) return;
18 
19  if (p == 0)
20  {
21  int i = atomicAdd(cnt, 1);
22  offs.template get<1>(i) = 0;
23  offs.template get<0>(i) = vd.template get<prp_off>(0);
24  return;
25  }
26 
27  if (vd.template get<prp_off>(p-1) != vd.template get<prp_off>(p))
28  {
29  int i = atomicAdd(cnt, 1);
30  offs.template get<1>(i) = p;
31  offs.template get<0>(i) = vd.template get<prp_off>(p);
32  }
33 }
34 
35 template<unsigned int prp_off, typename vector_type2, typename vector_type,typename vector_type_offs>
36 __global__ void construct_index_unique(vector_type2 vd_input, vector_type vd, vector_type_offs offs)
37 {
38  int p = threadIdx.x + blockIdx.x * blockDim.x;
39 
40  if (p >= (int)vd.size() - 1) return;
41 
42  unsigned int id = vd.template get<0>(p);
43  unsigned int id_p1 = vd.template get<0>(p+1);
44 
45  if (p == 0)
46  {
47  offs.template get<0>(id) = vd_input.template get<prp_off>(0);
48  offs.template get<1>(id) = p;
49  }
50 
51  if (id != id_p1)
52  {
53  offs.template get<0>(id) = vd_input.template get<prp_off>(p);
54  offs.template get<1>(id) = p;
55  }
56 }
57 
58 template<unsigned int prp_off, typename vector_type,typename vector_type_offs>
59 __global__ void find_buffer_offsets_for_scan(vector_type vd, vector_type_offs offs)
60 {
61  int p = threadIdx.x + blockIdx.x * blockDim.x;
62 
63  if (p >= (int)vd.size()) return;
64 
65  unsigned int pm1 = (p == 0)?p:p-1;
66 
67  bool predicate = vd.template get<prp_off>(pm1) != vd.template get<prp_off>(p) || (p == 0);
68 
69  offs.template get<0>(p) = predicate;
70 }
71 
72 
73 template<unsigned int prp_off, typename vector_type,typename vector_type_offs>
74 __global__ void find_buffer_offsets(vector_type vd, int * cnt, vector_type_offs offs)
75 {
76  int p = threadIdx.x + blockIdx.x * blockDim.x;
77 
78  if (p >= (int)vd.size() - 1) return;
79 
80  if (vd.template get<prp_off>(p) != vd.template get<prp_off>(p+1))
81  {
82  int i = atomicAdd(cnt, 1);
83 
84  offs.template get<0>(i) = p+1;
85  offs.template get<1>(i) = vd.template get<prp_off>(p);
86  }
87 }
88 
89 template<unsigned int prp_off, typename vector_type,typename vector_type_offs>
90 __global__ void find_buffer_offsets_no_prc(vector_type vd, int * cnt, vector_type_offs offs, int g_m)
91 {
92  int p = threadIdx.x + blockIdx.x * blockDim.x;
93 
94  if (p >= (int)g_m - 1) return;
95 
96  if (vd.template get<prp_off>(p) != vd.template get<prp_off>(p+1))
97  {
98  int i = atomicAdd(cnt, 1);
99  offs.template get<0>(i) = p+1;
100  }
101 }
102 
103 
104 #endif /* KERNELS_CUH_ */
Distributed vector.