OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
kernels.cuh
1/*
2 * kernels.cuh
3 *
4 * Created on: Jan 26, 2019
5 * Author: i-bird
6 */
7
8#ifndef KERNELS_CUH_
9#define KERNELS_CUH_
10
11
12template<unsigned int prp_off, typename vector_type,typename vector_type_offs>
13__global__ void find_buffer_offsets_zero(vector_type vd, int * cnt, vector_type_offs offs)
14{
15 int p = threadIdx.x + blockIdx.x * blockDim.x;
16
17 if (p >= (int)vd.size()) return;
18
19 if (p == 0)
20 {
21 int i = atomicAdd(cnt, 1);
22 offs.template get<1>(i) = 0;
23 offs.template get<0>(i) = vd.template get<prp_off>(0);
24 return;
25 }
26
27 if (vd.template get<prp_off>(p-1) != vd.template get<prp_off>(p))
28 {
29 int i = atomicAdd(cnt, 1);
30 offs.template get<1>(i) = p;
31 offs.template get<0>(i) = vd.template get<prp_off>(p);
32 }
33}
34
35template<unsigned int prp_off, typename vector_type2, typename vector_type,typename vector_type_offs>
36__global__ void construct_index_unique(vector_type2 vd_input, vector_type vd, vector_type_offs offs)
37{
38 int p = threadIdx.x + blockIdx.x * blockDim.x;
39
40 if (p >= (int)vd.size() - 1) return;
41
42 unsigned int id = vd.template get<0>(p);
43 unsigned int id_p1 = vd.template get<0>(p+1);
44
45 if (p == 0)
46 {
47 offs.template get<0>(id) = vd_input.template get<prp_off>(0);
48 offs.template get<1>(id) = p;
49 }
50
51 if (id != id_p1)
52 {
53 offs.template get<0>(id) = vd_input.template get<prp_off>(p);
54 offs.template get<1>(id) = p;
55 }
56}
57
58template<unsigned int prp_off, typename vector_type,typename vector_type_offs>
59__global__ void find_buffer_offsets_for_scan(vector_type vd, vector_type_offs offs)
60{
61 int p = threadIdx.x + blockIdx.x * blockDim.x;
62
63 if (p >= (int)vd.size()) return;
64
65 unsigned int pm1 = (p == 0)?p:p-1;
66
67 bool predicate = vd.template get<prp_off>(pm1) != vd.template get<prp_off>(p) || (p == 0);
68
69 offs.template get<0>(p) = predicate;
70}
71
72
73template<unsigned int prp_off, typename vector_type,typename vector_type_offs>
74__global__ void find_buffer_offsets(vector_type vd, int * cnt, vector_type_offs offs)
75{
76 int p = threadIdx.x + blockIdx.x * blockDim.x;
77
78 if (p >= (int)vd.size() - 1) return;
79
80 if (vd.template get<prp_off>(p) != vd.template get<prp_off>(p+1))
81 {
82 int i = atomicAdd(cnt, 1);
83
84 offs.template get<0>(i) = p+1;
85 offs.template get<1>(i) = vd.template get<prp_off>(p);
86 }
87}
88
89template<unsigned int prp_off, typename vector_type,typename vector_type_offs>
90__global__ void find_buffer_offsets_no_prc(vector_type vd, int * cnt, vector_type_offs offs, int g_m)
91{
92 int p = threadIdx.x + blockIdx.x * blockDim.x;
93
94 if (p >= (int)g_m - 1) return;
95
96 if (vd.template get<prp_off>(p) != vd.template get<prp_off>(p+1))
97 {
98 int i = atomicAdd(cnt, 1);
99 offs.template get<0>(i) = p+1;
100 }
101}
102
103
104#endif /* KERNELS_CUH_ */
Distributed vector.