OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
thread_scan.cuh
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
34 #pragma once
35 
36 #include "../thread/thread_operators.cuh"
37 #include "../util_namespace.cuh"
38 
40 CUB_NS_PREFIX
41 
43 namespace cub {
44 
46 namespace internal {
47 
48 
59 template <
60  int LENGTH,
61  typename T,
62  typename ScanOp>
63 __device__ __forceinline__ T ThreadScanExclusive(
64  T inclusive,
65  T exclusive,
66  T *input,
67  T *output,
68  ScanOp scan_op,
69  Int2Type<LENGTH> /*length*/)
70 {
71  #pragma unroll
72  for (int i = 0; i < LENGTH; ++i)
73  {
74  inclusive = scan_op(exclusive, input[i]);
75  output[i] = exclusive;
76  exclusive = inclusive;
77  }
78 
79  return inclusive;
80 }
81 
82 
83 
91 template <
92  int LENGTH,
93  typename T,
94  typename ScanOp>
95 __device__ __forceinline__ T ThreadScanExclusive(
96  T *input,
97  T *output,
98  ScanOp scan_op,
99  T prefix,
100  bool apply_prefix = true)
101 {
102  T inclusive = input[0];
103  if (apply_prefix)
104  {
105  inclusive = scan_op(prefix, inclusive);
106  }
107  output[0] = prefix;
108  T exclusive = inclusive;
109 
110  return ThreadScanExclusive(inclusive, exclusive, input + 1, output + 1, scan_op, Int2Type<LENGTH - 1>());
111 }
112 
113 
121 template <
122  int LENGTH,
123  typename T,
124  typename ScanOp>
125 __device__ __forceinline__ T ThreadScanExclusive(
126  T (&input)[LENGTH],
127  T (&output)[LENGTH],
128  ScanOp scan_op,
129  T prefix,
130  bool apply_prefix = true)
131 {
132  return ThreadScanExclusive<LENGTH>((T*) input, (T*) output, scan_op, prefix, apply_prefix);
133 }
134 
135 
136 
137 
138 
139 
140 
141 
142 
143 template <
144  int LENGTH,
145  typename T,
146  typename ScanOp>
147 __device__ __forceinline__ T ThreadScanInclusive(
148  T inclusive,
149  T *input,
150  T *output,
151  ScanOp scan_op,
152  Int2Type<LENGTH> /*length*/)
153 {
154  #pragma unroll
155  for (int i = 0; i < LENGTH; ++i)
156  {
157  inclusive = scan_op(inclusive, input[i]);
158  output[i] = inclusive;
159  }
160 
161  return inclusive;
162 }
163 
164 
172 template <
173  int LENGTH,
174  typename T,
175  typename ScanOp>
176 __device__ __forceinline__ T ThreadScanInclusive(
177  T *input,
178  T *output,
179  ScanOp scan_op)
180 {
181  T inclusive = input[0];
182  output[0] = inclusive;
183 
184  // Continue scan
185  return ThreadScanInclusive(inclusive, input + 1, output + 1, scan_op, Int2Type<LENGTH - 1>());
186 }
187 
188 
196 template <
197  int LENGTH,
198  typename T,
199  typename ScanOp>
200 __device__ __forceinline__ T ThreadScanInclusive(
201  T (&input)[LENGTH],
202  T (&output)[LENGTH],
203  ScanOp scan_op)
204 {
205  return ThreadScanInclusive<LENGTH>((T*) input, (T*) output, scan_op);
206 }
207 
208 
216 template <
217  int LENGTH,
218  typename T,
219  typename ScanOp>
220 __device__ __forceinline__ T ThreadScanInclusive(
221  T *input,
222  T *output,
223  ScanOp scan_op,
224  T prefix,
225  bool apply_prefix = true)
226 {
227  T inclusive = input[0];
228  if (apply_prefix)
229  {
230  inclusive = scan_op(prefix, inclusive);
231  }
232  output[0] = inclusive;
233 
234  // Continue scan
235  return ThreadScanInclusive(inclusive, input + 1, output + 1, scan_op, Int2Type<LENGTH - 1>());
236 }
237 
238 
246 template <
247  int LENGTH,
248  typename T,
249  typename ScanOp>
250 __device__ __forceinline__ T ThreadScanInclusive(
251  T (&input)[LENGTH],
252  T (&output)[LENGTH],
253  ScanOp scan_op,
254  T prefix,
255  bool apply_prefix = true)
256 {
257  return ThreadScanInclusive<LENGTH>((T*) input, (T*) output, scan_op, prefix, apply_prefix);
258 }
259 
260 
262  // end group UtilModule
264 
265 
266 } // internal namespace
267 } // CUB namespace
268 CUB_NS_POSTFIX // Optional outer namespace(s)
Optional outer namespace(s)
OutputIteratorT ScanTileStateT int ScanOpT scan_op
Binary scan functor.
__device__ __forceinline__ T ThreadScanExclusive(T inclusive, T exclusive, T *input, T *output, ScanOp scan_op, Int2Type< LENGTH >)
Definition: thread_scan.cuh:63
Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...
Definition: util_type.cuh:275
__device__ __forceinline__ T ThreadScanInclusive(T inclusive, T *input, T *output, ScanOp scan_op, Int2Type< LENGTH >)