OpenFPM  5.2.0
Project that contain the implementation of distributed structures
thread_operators.cuh
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
34 /******************************************************************************
35  * Simple functor operators
36  ******************************************************************************/
37 
38 #pragma once
39 
40 #include "../util_macro.cuh"
41 #include "../util_type.cuh"
42 #include "../util_namespace.cuh"
43 
45 CUB_NS_PREFIX
46 
48 namespace cub {
49 
50 
59 struct Equality
60 {
62  template <typename T>
63  __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) const
64  {
65  return a == b;
66  }
67 };
68 
69 
73 struct Inequality
74 {
76  template <typename T>
77  __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) const
78  {
79  return a != b;
80  }
81 };
82 
83 
87 template <typename EqualityOp>
89 {
91  EqualityOp op;
92 
94  __host__ __device__ __forceinline__
95  InequalityWrapper(EqualityOp op) : op(op) {}
96 
98  template <typename T>
99  __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b)
100  {
101  return !op(a, b);
102  }
103 };
104 
105 
109 struct Sum
110 {
112  template <typename T>
113  __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const
114  {
115  return a + b;
116  }
117 };
118 
119 
123 struct Max
124 {
126  template <typename T>
127  __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const
128  {
129  return CUB_MAX(a, b);
130  }
131 };
132 
133 
137 struct ArgMax
138 {
140  template <typename T, typename OffsetT>
141  __host__ __device__ __forceinline__ KeyValuePair<OffsetT, T> operator()(
142  const KeyValuePair<OffsetT, T> &a,
143  const KeyValuePair<OffsetT, T> &b) const
144  {
145 // Mooch BUG (device reduce argmax gk110 3.2 million random fp32)
146 // return ((b.value > a.value) || ((a.value == b.value) && (b.key < a.key))) ? b : a;
147 
148  if ((b.value > a.value) || ((a.value == b.value) && (b.key < a.key)))
149  return b;
150  return a;
151  }
152 };
153 
154 
158 struct Min
159 {
161  template <typename T>
162  __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const
163  {
164  return CUB_MIN(a, b);
165  }
166 };
167 
168 
172 struct ArgMin
173 {
175  template <typename T, typename OffsetT>
176  __host__ __device__ __forceinline__ KeyValuePair<OffsetT, T> operator()(
177  const KeyValuePair<OffsetT, T> &a,
178  const KeyValuePair<OffsetT, T> &b) const
179  {
180 // Mooch BUG (device reduce argmax gk110 3.2 million random fp32)
181 // return ((b.value < a.value) || ((a.value == b.value) && (b.key < a.key))) ? b : a;
182 
183  if ((b.value < a.value) || ((a.value == b.value) && (b.key < a.key)))
184  return b;
185  return a;
186  }
187 };
188 
189 
193 template <typename B>
194 struct CastOp
195 {
197  template <typename A>
198  __host__ __device__ __forceinline__ B operator()(const A &a) const
199  {
200  return (B) a;
201  }
202 };
203 
204 
208 template <typename ScanOp>
210 {
211 private:
212 
214  ScanOp scan_op;
215 
216 public:
217 
219  __host__ __device__ __forceinline__
221 
223  template <typename T>
224  __host__ __device__ __forceinline__
225  T operator()(const T &a, const T &b)
226  {
227  T _a(a);
228  T _b(b);
229 
230  return scan_op(_b, _a);
231  }
232 };
233 
234 
251 template <typename ReductionOpT>
253 {
255  ReductionOpT op;
256 
258  __host__ __device__ __forceinline__ ReduceBySegmentOp() {}
259 
261  __host__ __device__ __forceinline__ ReduceBySegmentOp(ReductionOpT op) : op(op) {}
262 
264  template <typename KeyValuePairT>
265  __host__ __device__ __forceinline__ KeyValuePairT operator()(
266  const KeyValuePairT &first,
267  const KeyValuePairT &second)
268  {
269  KeyValuePairT retval;
270  retval.key = first.key + second.key;
271  retval.value = (second.key) ?
272  second.value : // The second partial reduction spans a segment reset, so it's value aggregate becomes the running aggregate
273  op(first.value, second.value); // The second partial reduction does not span a reset, so accumulate both into the running aggregate
274  return retval;
275  }
276 };
277 
278 
279 
280 template <typename ReductionOpT>
282 {
284  ReductionOpT op;
285 
287  __host__ __device__ __forceinline__ ReduceByKeyOp() {}
288 
290  __host__ __device__ __forceinline__ ReduceByKeyOp(ReductionOpT op) : op(op) {}
291 
293  template <typename KeyValuePairT>
294  __host__ __device__ __forceinline__ KeyValuePairT operator()(
295  const KeyValuePairT &first,
296  const KeyValuePairT &second)
297  {
298  KeyValuePairT retval = second;
299 
300  if (first.key == second.key)
301  retval.value = op(first.value, retval.value);
302 
303  return retval;
304  }
305 };
306 
307 
308 
309 
310 
311 
312  // end group UtilModule
314 
315 
316 } // CUB namespace
317 CUB_NS_POSTFIX // Optional outer namespace(s)
Binary operator wrapper for switching non-commutative scan arguments.
ScanOp scan_op
Wrapped scan operator.
__host__ __device__ __forceinline__ T operator()(const T &a, const T &b)
Switch the scan arguments.
__host__ __device__ __forceinline__ SwizzleScanOp(ScanOp scan_op)
Constructor.
#define CUB_MAX(a, b)
Select maximum(a, b)
Definition: util_macro.cuh:61
#define CUB_MIN(a, b)
Select minimum(a, b)
Definition: util_macro.cuh:66
Optional outer namespace(s)
Arg max functor (keeps the value and offset of the first occurrence of the larger item)
__host__ __device__ __forceinline__ KeyValuePair< OffsetT, T > operator()(const KeyValuePair< OffsetT, T > &a, const KeyValuePair< OffsetT, T > &b) const
Boolean max operator, preferring the item having the smaller offset in case of ties.
Arg min functor (keeps the value and offset of the first occurrence of the smallest item)
__host__ __device__ __forceinline__ KeyValuePair< OffsetT, T > operator()(const KeyValuePair< OffsetT, T > &a, const KeyValuePair< OffsetT, T > &b) const
Boolean min operator, preferring the item having the smaller offset in case of ties.
Default cast functor.
__host__ __device__ __forceinline__ B operator()(const A &a) const
Cast operator, returns (B) a
Default equality functor.
__host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) const
Boolean equality operator, returns (a == b)
Inequality functor (wraps equality functor)
EqualityOp op
Wrapped equality operator.
__host__ __device__ __forceinline__ InequalityWrapper(EqualityOp op)
Constructor.
__host__ __device__ __forceinline__ bool operator()(const T &a, const T &b)
Boolean inequality operator, returns (a != b)
Default inequality functor.
__host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) const
Boolean inequality operator, returns (a != b)
A key identifier paired with a corresponding value.
Definition: util_type.cuh:667
Default max functor.
__host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const
Boolean max operator, returns (a > b) ? a : b
Default min functor.
__host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const
Boolean min operator, returns (a < b) ? a : b
< Binary reduction operator to apply to values
ReductionOpT op
Wrapped reduction operator.
__host__ __device__ __forceinline__ ReduceByKeyOp()
Constructor.
__host__ __device__ __forceinline__ ReduceByKeyOp(ReductionOpT op)
Constructor.
__host__ __device__ __forceinline__ KeyValuePairT operator()(const KeyValuePairT &first, const KeyValuePairT &second)
Scan operator.
Reduce-by-segment functor.
ReductionOpT op
Wrapped reduction operator.
__host__ __device__ __forceinline__ ReduceBySegmentOp()
Constructor.
__host__ __device__ __forceinline__ ReduceBySegmentOp(ReductionOpT op)
Constructor.
__host__ __device__ __forceinline__ KeyValuePairT operator()(const KeyValuePairT &first, const KeyValuePairT &second)
Scan operator.
Default sum functor.
__host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const
Boolean sum operator, returns a + b