OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
device_scan.cuh
Go to the documentation of this file.
1 
2 /******************************************************************************
3  * Copyright (c) 2011, Duane Merrill. All rights reserved.
4  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
35 #pragma once
36 
37 #include <stdio.h>
38 #include <iterator>
39 
41 #include "../util_namespace.cuh"
42 
44 CUB_NS_PREFIX
45 
47 namespace cub {
48 
49 
89 struct DeviceScan
90 {
91  /******************************************************************/
95 
145  template <
146  typename InputIteratorT,
147  typename OutputIteratorT>
148  CUB_RUNTIME_FUNCTION
149  static cudaError_t ExclusiveSum(
150  void *d_temp_storage,
151  size_t &temp_storage_bytes,
152  InputIteratorT d_in,
153  OutputIteratorT d_out,
154  int num_items,
155  cudaStream_t stream = 0,
156  bool debug_synchronous = false)
157  {
158  // Signed integer type for global offsets
159  typedef int OffsetT;
160 
161  // The output value type
162  typedef typename If<(Equals<typename std::iterator_traits<OutputIteratorT>::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ?
163  typename std::iterator_traits<InputIteratorT>::value_type, // ... then the input iterator's value type,
164  typename std::iterator_traits<OutputIteratorT>::value_type>::Type OutputT; // ... else the output iterator's value type
165 
166  // Initial value
167  OutputT init_value = 0;
168 
170  d_temp_storage,
171  temp_storage_bytes,
172  d_in,
173  d_out,
174  Sum(),
175  init_value,
176  num_items,
177  stream,
178  debug_synchronous);
179  }
180 
181 
237  template <
238  typename InputIteratorT,
239  typename OutputIteratorT,
240  typename ScanOpT,
241  typename InitValueT>
242  CUB_RUNTIME_FUNCTION
243  static cudaError_t ExclusiveScan(
244  void *d_temp_storage,
245  size_t &temp_storage_bytes,
246  InputIteratorT d_in,
247  OutputIteratorT d_out,
248  ScanOpT scan_op,
249  InitValueT init_value,
250  int num_items,
251  cudaStream_t stream = 0,
252  bool debug_synchronous = false)
253  {
254  // Signed integer type for global offsets
255  typedef int OffsetT;
256 
258  d_temp_storage,
259  temp_storage_bytes,
260  d_in,
261  d_out,
262  scan_op,
263  init_value,
264  num_items,
265  stream,
266  debug_synchronous);
267  }
268 
269 
271  /******************************************************************/
275 
276 
319  template <
320  typename InputIteratorT,
321  typename OutputIteratorT>
322  CUB_RUNTIME_FUNCTION
323  static cudaError_t InclusiveSum(
324  void* d_temp_storage,
325  size_t& temp_storage_bytes,
326  InputIteratorT d_in,
327  OutputIteratorT d_out,
328  int num_items,
329  cudaStream_t stream = 0,
330  bool debug_synchronous = false)
331  {
332  // Signed integer type for global offsets
333  typedef int OffsetT;
334 
336  d_temp_storage,
337  temp_storage_bytes,
338  d_in,
339  d_out,
340  Sum(),
341  NullType(),
342  num_items,
343  stream,
344  debug_synchronous);
345  }
346 
347 
402  template <
403  typename InputIteratorT,
404  typename OutputIteratorT,
405  typename ScanOpT>
406  CUB_RUNTIME_FUNCTION
407  static cudaError_t InclusiveScan(
408  void *d_temp_storage,
409  size_t &temp_storage_bytes,
410  InputIteratorT d_in,
411  OutputIteratorT d_out,
412  ScanOpT scan_op,
413  int num_items,
414  cudaStream_t stream = 0,
415  bool debug_synchronous = false)
416  {
417  // Signed integer type for global offsets
418  typedef int OffsetT;
419 
421  d_temp_storage,
422  temp_storage_bytes,
423  d_in,
424  d_out,
425  scan_op,
426  NullType(),
427  num_items,
428  stream,
429  debug_synchronous);
430  }
431 
433 
434 };
435 
440 } // CUB namespace
441 CUB_NS_POSTFIX // Optional outer namespace(s)
442 
443 
Type equality test.
Definition: util_type.cuh:98
DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of...
Definition: device_scan.cuh:89
KeyT const ValueT ValueT OffsetT OffsetT num_items
[in] Total number of input data items
Optional outer namespace(s)
static CUB_RUNTIME_FUNCTION cudaError_t ExclusiveScan(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, ScanOpT scan_op, InitValueT init_value, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Computes a device-wide exclusive prefix scan using the specified binary scan_op functor....
OffsetT OffsetT
[in] Total number of input data items
OutputIteratorT ScanTileStateT int ScanOpT scan_op
Binary scan functor.
static CUB_RUNTIME_FUNCTION cudaError_t ExclusiveSum(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Computes a device-wide exclusive prefix sum. The value of 0 is applied as the initial value,...
static CUB_RUNTIME_FUNCTION cudaError_t InclusiveScan(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, ScanOpT scan_op, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Computes a device-wide inclusive prefix scan using the specified binary scan_op functor.
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Dispatch(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, ScanOpT scan_op, InitValueT init_value, OffsetT num_items, cudaStream_t stream, bool debug_synchronous, int, ScanInitKernelPtrT init_kernel, ScanSweepKernelPtrT scan_kernel, KernelConfig scan_kernel_config)
< Function type of cub::DeviceScanKernelPtrT
static CUB_RUNTIME_FUNCTION cudaError_t InclusiveSum(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Computes a device-wide inclusive prefix sum.
Type selection (IF ? ThenType : ElseType)
Definition: util_type.cuh:72
OutputIteratorT ScanTileStateT int ScanOpT InitValueT init_value
Initial value to seed the exclusive scan.
A simple "NULL" marker type.
Definition: util_type.cuh:256
Default sum functor.
OutputIteratorT d_out
< [in] Pointer to the input sequence of data items