OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
thread_scan.cuh
Go to the documentation of this file.
1/******************************************************************************
2 * Copyright (c) 2011, Duane Merrill. All rights reserved.
3 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of the NVIDIA CORPORATION nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 ******************************************************************************/
28
34#pragma once
35
36#include "../thread/thread_operators.cuh"
37#include "../util_namespace.cuh"
38
40CUB_NS_PREFIX
41
43namespace cub {
44
46namespace internal {
47
48
59template <
60 int LENGTH,
61 typename T,
62 typename ScanOp>
63__device__ __forceinline__ T ThreadScanExclusive(
64 T inclusive,
65 T exclusive,
66 T *input,
67 T *output,
68 ScanOp scan_op,
69 Int2Type<LENGTH> /*length*/)
70{
71 #pragma unroll
72 for (int i = 0; i < LENGTH; ++i)
73 {
74 inclusive = scan_op(exclusive, input[i]);
75 output[i] = exclusive;
76 exclusive = inclusive;
77 }
78
79 return inclusive;
80}
81
82
83
91template <
92 int LENGTH,
93 typename T,
94 typename ScanOp>
95__device__ __forceinline__ T ThreadScanExclusive(
96 T *input,
97 T *output,
98 ScanOp scan_op,
99 T prefix,
100 bool apply_prefix = true)
101{
102 T inclusive = input[0];
103 if (apply_prefix)
104 {
105 inclusive = scan_op(prefix, inclusive);
106 }
107 output[0] = prefix;
108 T exclusive = inclusive;
109
110 return ThreadScanExclusive(inclusive, exclusive, input + 1, output + 1, scan_op, Int2Type<LENGTH - 1>());
111}
112
113
121template <
122 int LENGTH,
123 typename T,
124 typename ScanOp>
125__device__ __forceinline__ T ThreadScanExclusive(
126 T (&input)[LENGTH],
127 T (&output)[LENGTH],
128 ScanOp scan_op,
129 T prefix,
130 bool apply_prefix = true)
131{
132 return ThreadScanExclusive<LENGTH>((T*) input, (T*) output, scan_op, prefix, apply_prefix);
133}
134
135
136
137
138
139
140
141
142
143template <
144 int LENGTH,
145 typename T,
146 typename ScanOp>
147__device__ __forceinline__ T ThreadScanInclusive(
148 T inclusive,
149 T *input,
150 T *output,
151 ScanOp scan_op,
152 Int2Type<LENGTH> /*length*/)
153{
154 #pragma unroll
155 for (int i = 0; i < LENGTH; ++i)
156 {
157 inclusive = scan_op(inclusive, input[i]);
158 output[i] = inclusive;
159 }
160
161 return inclusive;
162}
163
164
172template <
173 int LENGTH,
174 typename T,
175 typename ScanOp>
176__device__ __forceinline__ T ThreadScanInclusive(
177 T *input,
178 T *output,
179 ScanOp scan_op)
180{
181 T inclusive = input[0];
182 output[0] = inclusive;
183
184 // Continue scan
185 return ThreadScanInclusive(inclusive, input + 1, output + 1, scan_op, Int2Type<LENGTH - 1>());
186}
187
188
196template <
197 int LENGTH,
198 typename T,
199 typename ScanOp>
200__device__ __forceinline__ T ThreadScanInclusive(
201 T (&input)[LENGTH],
202 T (&output)[LENGTH],
203 ScanOp scan_op)
204{
205 return ThreadScanInclusive<LENGTH>((T*) input, (T*) output, scan_op);
206}
207
208
216template <
217 int LENGTH,
218 typename T,
219 typename ScanOp>
220__device__ __forceinline__ T ThreadScanInclusive(
221 T *input,
222 T *output,
223 ScanOp scan_op,
224 T prefix,
225 bool apply_prefix = true)
226{
227 T inclusive = input[0];
228 if (apply_prefix)
229 {
230 inclusive = scan_op(prefix, inclusive);
231 }
232 output[0] = inclusive;
233
234 // Continue scan
235 return ThreadScanInclusive(inclusive, input + 1, output + 1, scan_op, Int2Type<LENGTH - 1>());
236}
237
238
246template <
247 int LENGTH,
248 typename T,
249 typename ScanOp>
250__device__ __forceinline__ T ThreadScanInclusive(
251 T (&input)[LENGTH],
252 T (&output)[LENGTH],
253 ScanOp scan_op,
254 T prefix,
255 bool apply_prefix = true)
256{
257 return ThreadScanInclusive<LENGTH>((T*) input, (T*) output, scan_op, prefix, apply_prefix);
258}
259
260
262 // end group UtilModule
264
265
266} // internal namespace
267} // CUB namespace
268CUB_NS_POSTFIX // Optional outer namespace(s)
__device__ __forceinline__ T ThreadScanExclusive(T inclusive, T exclusive, T *input, T *output, ScanOp scan_op, Int2Type< LENGTH >)
__device__ __forceinline__ T ThreadScanInclusive(T inclusive, T *input, T *output, ScanOp scan_op, Int2Type< LENGTH >)
Optional outer namespace(s)
OutputIteratorT ScanTileStateT int ScanOpT scan_op
Binary scan functor.
Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...