OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
thread_reduce.cuh
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
34 #pragma once
35 
36 #include "../thread/thread_operators.cuh"
37 #include "../util_namespace.cuh"
38 
40 CUB_NS_PREFIX
41 
43 namespace cub {
44 
46 namespace internal {
47 
51 template <
52  int LENGTH,
53  typename T,
54  typename ReductionOp>
55 __device__ __forceinline__ T ThreadReduce(
56  T* input,
57  ReductionOp reduction_op,
58  T prefix,
59  Int2Type<LENGTH> /*length*/)
60 {
61  T retval = prefix;
62 
63  #pragma unroll
64  for (int i = 0; i < LENGTH; ++i)
65  retval = reduction_op(retval, input[i]);
66 
67  return retval;
68 }
69 
70 
78 template <
79  int LENGTH,
80  typename T,
81  typename ReductionOp>
82 __device__ __forceinline__ T ThreadReduce(
83  T* input,
84  ReductionOp reduction_op,
85  T prefix)
86 {
87  return ThreadReduce(input, reduction_op, prefix, Int2Type<LENGTH>());
88 }
89 
90 
98 template <
99  int LENGTH,
100  typename T,
101  typename ReductionOp>
102 __device__ __forceinline__ T ThreadReduce(
103  T* input,
104  ReductionOp reduction_op)
105 {
106  T prefix = input[0];
107  return ThreadReduce<LENGTH - 1>(input + 1, reduction_op, prefix);
108 }
109 
110 
118 template <
119  int LENGTH,
120  typename T,
121  typename ReductionOp>
122 __device__ __forceinline__ T ThreadReduce(
123  T (&input)[LENGTH],
124  ReductionOp reduction_op,
125  T prefix)
126 {
127  return ThreadReduce(input, reduction_op, prefix, Int2Type<LENGTH>());
128 }
129 
130 
138 template <
139  int LENGTH,
140  typename T,
141  typename ReductionOp>
142 __device__ __forceinline__ T ThreadReduce(
143  T (&input)[LENGTH],
144  ReductionOp reduction_op)
145 {
146  return ThreadReduce<LENGTH>((T*) input, reduction_op);
147 }
148 
149 
150 } // internal namespace
151 } // CUB namespace
152 CUB_NS_POSTFIX // Optional outer namespace(s)
__device__ __forceinline__ T ThreadReduce(T *input, ReductionOp reduction_op, T prefix, Int2Type< LENGTH >)
Optional outer namespace(s)
OutputIteratorT OffsetT GridEvenShare< OffsetT > ReductionOpT reduction_op
< [in] Binary reduction functor
Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...
Definition: util_type.cuh:275