OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
thread_operators.cuh
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
34 /******************************************************************************
35  * Simple functor operators
36  ******************************************************************************/
37 
38 #pragma once
39 
40 #include "../util_macro.cuh"
41 #include "../util_type.cuh"
42 #include "../util_namespace.cuh"
43 
45 CUB_NS_PREFIX
46 
48 namespace cub {
49 
50 
59 struct Equality
60 {
62  template <typename T>
63  __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) const
64  {
65  return a == b;
66  }
67 };
68 
69 
73 struct Inequality
74 {
76  template <typename T>
77  __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) const
78  {
79  return a != b;
80  }
81 };
82 
83 
87 template <typename EqualityOp>
89 {
91  EqualityOp op;
92 
94  __host__ __device__ __forceinline__
95  InequalityWrapper(EqualityOp op) : op(op) {}
96 
98  template <typename T>
99  __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b)
100  {
101  return !op(a, b);
102  }
103 };
104 
105 
109 struct Sum
110 {
112  template <typename T>
113  __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const
114  {
115  return a + b;
116  }
117 };
118 
119 
123 struct Max
124 {
126  template <typename T>
127  __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const
128  {
129  return CUB_MAX(a, b);
130  }
131 };
132 
133 
137 struct ArgMax
138 {
140  template <typename T, typename OffsetT>
141  __host__ __device__ __forceinline__ KeyValuePair<OffsetT, T> operator()(
142  const KeyValuePair<OffsetT, T> &a,
143  const KeyValuePair<OffsetT, T> &b) const
144  {
145 // Mooch BUG (device reduce argmax gk110 3.2 million random fp32)
146 // return ((b.value > a.value) || ((a.value == b.value) && (b.key < a.key))) ? b : a;
147 
148  if ((b.value > a.value) || ((a.value == b.value) && (b.key < a.key)))
149  return b;
150  return a;
151  }
152 };
153 
154 
158 struct Min
159 {
161  template <typename T>
162  __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const
163  {
164  return CUB_MIN(a, b);
165  }
166 };
167 
168 
172 struct ArgMin
173 {
175  template <typename T, typename OffsetT>
176  __host__ __device__ __forceinline__ KeyValuePair<OffsetT, T> operator()(
177  const KeyValuePair<OffsetT, T> &a,
178  const KeyValuePair<OffsetT, T> &b) const
179  {
180 // Mooch BUG (device reduce argmax gk110 3.2 million random fp32)
181 // return ((b.value < a.value) || ((a.value == b.value) && (b.key < a.key))) ? b : a;
182 
183  if ((b.value < a.value) || ((a.value == b.value) && (b.key < a.key)))
184  return b;
185  return a;
186  }
187 };
188 
189 
193 template <typename B>
194 struct CastOp
195 {
197  template <typename A>
198  __host__ __device__ __forceinline__ B operator()(const A &a) const
199  {
200  return (B) a;
201  }
202 };
203 
204 
208 template <typename ScanOp>
210 {
211 private:
212 
214  ScanOp scan_op;
215 
216 public:
217 
219  __host__ __device__ __forceinline__
221 
223  template <typename T>
224  __host__ __device__ __forceinline__
225  T operator()(const T &a, const T &b)
226  {
227  T _a(a);
228  T _b(b);
229 
230  return scan_op(_b, _a);
231  }
232 };
233 
234 
251 template <typename ReductionOpT>
253 {
255  ReductionOpT op;
256 
258  __host__ __device__ __forceinline__ ReduceBySegmentOp() {}
259 
261  __host__ __device__ __forceinline__ ReduceBySegmentOp(ReductionOpT op) : op(op) {}
262 
264  template <typename KeyValuePairT>
265  __host__ __device__ __forceinline__ KeyValuePairT operator()(
266  const KeyValuePairT &first,
267  const KeyValuePairT &second)
268  {
269  KeyValuePairT retval;
270  retval.key = first.key + second.key;
271  retval.value = (second.key) ?
272  second.value : // The second partial reduction spans a segment reset, so it's value aggregate becomes the running aggregate
273  op(first.value, second.value); // The second partial reduction does not span a reset, so accumulate both into the running aggregate
274  return retval;
275  }
276 };
277 
278 
279 
280 template <typename ReductionOpT>
282 {
284  ReductionOpT op;
285 
287  __host__ __device__ __forceinline__ ReduceByKeyOp() {}
288 
290  __host__ __device__ __forceinline__ ReduceByKeyOp(ReductionOpT op) : op(op) {}
291 
293  template <typename KeyValuePairT>
294  __host__ __device__ __forceinline__ KeyValuePairT operator()(
295  const KeyValuePairT &first,
296  const KeyValuePairT &second)
297  {
298  KeyValuePairT retval = second;
299 
300  if (first.key == second.key)
301  retval.value = op(first.value, retval.value);
302 
303  return retval;
304  }
305 };
306 
307 
308 
309 
310 
311 
312  // end group UtilModule
314 
315 
316 } // CUB namespace
317 CUB_NS_POSTFIX // Optional outer namespace(s)
__host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) const
Boolean inequality operator, returns (a != b)
__host__ __device__ __forceinline__ InequalityWrapper(EqualityOp op)
Constructor.
< Binary reduction operator to apply to values
__host__ __device__ __forceinline__ B operator()(const A &a) const
Cast operator, returns (B) a
Optional outer namespace(s)
__host__ __device__ __forceinline__ KeyValuePair< OffsetT, T > operator()(const KeyValuePair< OffsetT, T > &a, const KeyValuePair< OffsetT, T > &b) const
Boolean max operator, preferring the item having the smaller offset in case of ties.
Default inequality functor.
__host__ __device__ __forceinline__ SwizzleScanOp(ScanOp scan_op)
Constructor.
A key identifier paired with a corresponding value.
Definition: util_type.cuh:666
Default equality functor.
ReductionOpT op
Wrapped reduction operator.
__host__ __device__ __forceinline__ ReduceByKeyOp(ReductionOpT op)
Constructor.
__host__ __device__ __forceinline__ KeyValuePairT operator()(const KeyValuePairT &first, const KeyValuePairT &second)
Scan operator.
__host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const
Boolean sum operator, returns a + b
Default max functor.
__host__ __device__ __forceinline__ T operator()(const T &a, const T &b)
Switch the scan arguments.
ScanOp scan_op
Wrapped scan operator.
__host__ __device__ __forceinline__ ReduceBySegmentOp()
Constructor.
Arg max functor (keeps the value and offset of the first occurrence of the larger item)
Reduce-by-segment functor.
__host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const
Boolean max operator, returns (a > b) ? a : b
__host__ __device__ __forceinline__ ReduceBySegmentOp(ReductionOpT op)
Constructor.
__host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const
Boolean min operator, returns (a < b) ? a : b
__host__ __device__ __forceinline__ KeyValuePairT operator()(const KeyValuePairT &first, const KeyValuePairT &second)
Scan operator.
Arg min functor (keeps the value and offset of the first occurrence of the smallest item)
#define CUB_MIN(a, b)
Select minimum(a, b)
Definition: util_macro.cuh:66
__host__ __device__ __forceinline__ bool operator()(const T &a, const T &b)
Boolean inequality operator, returns (a != b)
ReductionOpT op
Wrapped reduction operator.
Default cast functor.
Inequality functor (wraps equality functor)
Default sum functor.
#define CUB_MAX(a, b)
Select maximum(a, b)
Definition: util_macro.cuh:61
__host__ __device__ __forceinline__ ReduceByKeyOp()
Constructor.
Default min functor.
EqualityOp op
Wrapped equality operator.
__host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) const
Boolean equality operator, returns (a == b)
__host__ __device__ __forceinline__ KeyValuePair< OffsetT, T > operator()(const KeyValuePair< OffsetT, T > &a, const KeyValuePair< OffsetT, T > &b) const
Boolean min operator, preferring the item having the smaller offset in case of ties.
Binary operator wrapper for switching non-commutative scan arguments.