12template<
unsigned int prp_off,
typename vector_type,
typename vector_type_offs>
13__global__
void find_buffer_offsets_zero(
vector_type vd,
int * cnt, vector_type_offs offs)
15 int p = threadIdx.x + blockIdx.x * blockDim.x;
17 if (p >= (
int)vd.size())
return;
21 int i = atomicAdd(cnt, 1);
22 offs.template get<1>(i) = 0;
23 offs.template get<0>(i) = vd.template get<prp_off>(0);
27 if (vd.template get<prp_off>(p-1) != vd.template get<prp_off>(p))
29 int i = atomicAdd(cnt, 1);
30 offs.template get<1>(i) = p;
31 offs.template get<0>(i) = vd.template get<prp_off>(p);
35template<
unsigned int prp_off,
typename vector_type2,
typename vector_type,
typename vector_type_offs>
38 int p = threadIdx.x + blockIdx.x * blockDim.x;
40 if (p >= (
int)vd.size() - 1)
return;
42 unsigned int id = vd.template get<0>(p);
43 unsigned int id_p1 = vd.template get<0>(p+1);
47 offs.template get<0>(
id) = vd_input.template get<prp_off>(0);
48 offs.template get<1>(
id) = p;
53 offs.template get<0>(
id) = vd_input.template get<prp_off>(p);
54 offs.template get<1>(
id) = p;
58template<
unsigned int prp_off,
typename vector_type,
typename vector_type_offs>
59__global__
void find_buffer_offsets_for_scan(
vector_type vd, vector_type_offs offs)
61 int p = threadIdx.x + blockIdx.x * blockDim.x;
63 if (p >= (
int)vd.size())
return;
65 unsigned int pm1 = (p == 0)?p:p-1;
67 bool predicate = vd.template get<prp_off>(pm1) != vd.template get<prp_off>(p) || (p == 0);
69 offs.template get<0>(p) = predicate;
73template<
unsigned int prp_off,
typename vector_type,
typename vector_type_offs>
74__global__
void find_buffer_offsets(
vector_type vd,
int * cnt, vector_type_offs offs)
76 int p = threadIdx.x + blockIdx.x * blockDim.x;
78 if (p >= (
int)vd.size() - 1)
return;
80 if (vd.template get<prp_off>(p) != vd.template get<prp_off>(p+1))
82 int i = atomicAdd(cnt, 1);
84 offs.template get<0>(i) = p+1;
85 offs.template get<1>(i) = vd.template get<prp_off>(p);
89template<
unsigned int prp_off,
typename vector_type,
typename vector_type_offs>
90__global__
void find_buffer_offsets_no_prc(
vector_type vd,
int * cnt, vector_type_offs offs,
int g_m)
92 int p = threadIdx.x + blockIdx.x * blockDim.x;
94 if (p >= (
int)g_m - 1)
return;
96 if (vd.template get<prp_off>(p) != vd.template get<prp_off>(p+1))
98 int i = atomicAdd(cnt, 1);
99 offs.template get<0>(i) = p+1;