OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
device_scan.cuh
Go to the documentation of this file.
1
2/******************************************************************************
3 * Copyright (c) 2011, Duane Merrill. All rights reserved.
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of the NVIDIA CORPORATION nor the
14 * names of its contributors may be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 ******************************************************************************/
29
35#pragma once
36
37#include <stdio.h>
38#include <iterator>
39
41#include "../util_namespace.cuh"
42
44CUB_NS_PREFIX
45
47namespace cub {
48
49
90{
91 /******************************************************************/
95
145 template <
146 typename InputIteratorT,
147 typename OutputIteratorT>
148 CUB_RUNTIME_FUNCTION
149 static cudaError_t ExclusiveSum(
150 void *d_temp_storage,
151 size_t &temp_storage_bytes,
152 InputIteratorT d_in,
153 OutputIteratorT d_out,
154 int num_items,
155 cudaStream_t stream = 0,
156 bool debug_synchronous = false)
157 {
158 // Signed integer type for global offsets
159 typedef int OffsetT;
160
161 // The output value type
162 typedef typename If<(Equals<typename std::iterator_traits<OutputIteratorT>::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ?
163 typename std::iterator_traits<InputIteratorT>::value_type, // ... then the input iterator's value type,
164 typename std::iterator_traits<OutputIteratorT>::value_type>::Type OutputT; // ... else the output iterator's value type
165
166 // Initial value
167 OutputT init_value = 0;
168
170 d_temp_storage,
171 temp_storage_bytes,
172 d_in,
173 d_out,
174 Sum(),
176 num_items,
177 stream,
178 debug_synchronous);
179 }
180
181
237 template <
238 typename InputIteratorT,
239 typename OutputIteratorT,
240 typename ScanOpT,
241 typename InitValueT>
242 CUB_RUNTIME_FUNCTION
243 static cudaError_t ExclusiveScan(
244 void *d_temp_storage,
245 size_t &temp_storage_bytes,
246 InputIteratorT d_in,
247 OutputIteratorT d_out,
248 ScanOpT scan_op,
249 InitValueT init_value,
250 int num_items,
251 cudaStream_t stream = 0,
252 bool debug_synchronous = false)
253 {
254 // Signed integer type for global offsets
255 typedef int OffsetT;
256
258 d_temp_storage,
259 temp_storage_bytes,
260 d_in,
261 d_out,
262 scan_op,
264 num_items,
265 stream,
266 debug_synchronous);
267 }
268
269
271 /******************************************************************/
275
276
319 template <
320 typename InputIteratorT,
321 typename OutputIteratorT>
322 CUB_RUNTIME_FUNCTION
323 static cudaError_t InclusiveSum(
324 void* d_temp_storage,
325 size_t& temp_storage_bytes,
326 InputIteratorT d_in,
327 OutputIteratorT d_out,
328 int num_items,
329 cudaStream_t stream = 0,
330 bool debug_synchronous = false)
331 {
332 // Signed integer type for global offsets
333 typedef int OffsetT;
334
336 d_temp_storage,
337 temp_storage_bytes,
338 d_in,
339 d_out,
340 Sum(),
341 NullType(),
342 num_items,
343 stream,
344 debug_synchronous);
345 }
346
347
402 template <
403 typename InputIteratorT,
404 typename OutputIteratorT,
405 typename ScanOpT>
406 CUB_RUNTIME_FUNCTION
407 static cudaError_t InclusiveScan(
408 void *d_temp_storage,
409 size_t &temp_storage_bytes,
410 InputIteratorT d_in,
411 OutputIteratorT d_out,
412 ScanOpT scan_op,
413 int num_items,
414 cudaStream_t stream = 0,
415 bool debug_synchronous = false)
416 {
417 // Signed integer type for global offsets
418 typedef int OffsetT;
419
421 d_temp_storage,
422 temp_storage_bytes,
423 d_in,
424 d_out,
425 scan_op,
426 NullType(),
427 num_items,
428 stream,
429 debug_synchronous);
430 }
431
433
434};
435
440} // CUB namespace
441CUB_NS_POSTFIX // Optional outer namespace(s)
442
443
Optional outer namespace(s)
KeyT const ValueT ValueT OffsetT OffsetT num_items
[in] Total number of input data items
OutputIteratorT ScanTileStateT int ScanOpT InitValueT init_value
Initial value to seed the exclusive scan.
OffsetT OffsetT
[in] Total number of input data items
OutputIteratorT ScanTileStateT int ScanOpT scan_op
Binary scan functor.
OutputIteratorT d_out
< [in] Pointer to the input sequence of data items
DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of...
static CUB_RUNTIME_FUNCTION cudaError_t ExclusiveSum(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Computes a device-wide exclusive prefix sum. The value of 0 is applied as the initial value,...
static CUB_RUNTIME_FUNCTION cudaError_t InclusiveScan(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, ScanOpT scan_op, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Computes a device-wide inclusive prefix scan using the specified binary scan_op functor.
static CUB_RUNTIME_FUNCTION cudaError_t ExclusiveScan(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, ScanOpT scan_op, InitValueT init_value, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Computes a device-wide exclusive prefix scan using the specified binary scan_op functor....
static CUB_RUNTIME_FUNCTION cudaError_t InclusiveSum(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Computes a device-wide inclusive prefix sum.
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Dispatch(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, ScanOpT scan_op, InitValueT init_value, OffsetT num_items, cudaStream_t stream, bool debug_synchronous, int, ScanInitKernelPtrT init_kernel, ScanSweepKernelPtrT scan_kernel, KernelConfig scan_kernel_config)
< Function type of cub::DeviceScanKernelPtrT
Type equality test.
Definition util_type.cuh:99
Type selection (IF ? ThenType : ElseType)
Definition util_type.cuh:73
A simple "NULL" marker type.
Default sum functor.