OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
thread_reduce.cuh
Go to the documentation of this file.
1/******************************************************************************
2 * Copyright (c) 2011, Duane Merrill. All rights reserved.
3 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of the NVIDIA CORPORATION nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 ******************************************************************************/
28
34#pragma once
35
36#include "../thread/thread_operators.cuh"
37#include "../util_namespace.cuh"
38
40CUB_NS_PREFIX
41
43namespace cub {
44
46namespace internal {
47
51template <
52 int LENGTH,
53 typename T,
54 typename ReductionOp>
55__device__ __forceinline__ T ThreadReduce(
56 T* input,
57 ReductionOp reduction_op,
58 T prefix,
59 Int2Type<LENGTH> /*length*/)
60{
61 T retval = prefix;
62
63 #pragma unroll
64 for (int i = 0; i < LENGTH; ++i)
65 retval = reduction_op(retval, input[i]);
66
67 return retval;
68}
69
70
78template <
79 int LENGTH,
80 typename T,
81 typename ReductionOp>
82__device__ __forceinline__ T ThreadReduce(
83 T* input,
84 ReductionOp reduction_op,
85 T prefix)
86{
87 return ThreadReduce(input, reduction_op, prefix, Int2Type<LENGTH>());
88}
89
90
98template <
99 int LENGTH,
100 typename T,
101 typename ReductionOp>
102__device__ __forceinline__ T ThreadReduce(
103 T* input,
104 ReductionOp reduction_op)
105{
106 T prefix = input[0];
107 return ThreadReduce<LENGTH - 1>(input + 1, reduction_op, prefix);
108}
109
110
118template <
119 int LENGTH,
120 typename T,
121 typename ReductionOp>
122__device__ __forceinline__ T ThreadReduce(
123 T (&input)[LENGTH],
124 ReductionOp reduction_op,
125 T prefix)
126{
127 return ThreadReduce(input, reduction_op, prefix, Int2Type<LENGTH>());
128}
129
130
138template <
139 int LENGTH,
140 typename T,
141 typename ReductionOp>
142__device__ __forceinline__ T ThreadReduce(
143 T (&input)[LENGTH],
144 ReductionOp reduction_op)
145{
146 return ThreadReduce<LENGTH>((T*) input, reduction_op);
147}
148
149
150} // internal namespace
151} // CUB namespace
152CUB_NS_POSTFIX // Optional outer namespace(s)
__device__ __forceinline__ T ThreadReduce(T *input, ReductionOp reduction_op, T prefix, Int2Type< LENGTH >)
Optional outer namespace(s)
OutputIteratorT OffsetT GridEvenShare< OffsetT > ReductionOpT reduction_op
< [in] Binary reduction functor
Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...