OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
device_select.cuh
Go to the documentation of this file.
1 
2 /******************************************************************************
3  * Copyright (c) 2011, Duane Merrill. All rights reserved.
4  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
35 #pragma once
36 
37 #include <stdio.h>
38 #include <iterator>
39 
41 #include "../util_namespace.cuh"
42 
44 CUB_NS_PREFIX
45 
47 namespace cub {
48 
49 
83 {
127  template <
128  typename InputIteratorT,
129  typename FlagIterator,
130  typename OutputIteratorT,
131  typename NumSelectedIteratorT>
132  CUB_RUNTIME_FUNCTION __forceinline__
133  static cudaError_t Flagged(
134  void* d_temp_storage,
135  size_t &temp_storage_bytes,
136  InputIteratorT d_in,
137  FlagIterator d_flags,
138  OutputIteratorT d_out,
139  NumSelectedIteratorT d_num_selected_out,
140  int num_items,
141  cudaStream_t stream = 0,
142  bool debug_synchronous = false)
143  {
144  typedef int OffsetT; // Signed integer type for global offsets
145  typedef NullType SelectOp; // Selection op (not used)
146  typedef NullType EqualityOp; // Equality operator (not used)
147 
149  d_temp_storage,
150  temp_storage_bytes,
151  d_in,
152  d_flags,
153  d_out,
155  SelectOp(),
156  EqualityOp(),
157  num_items,
158  stream,
159  debug_synchronous);
160  }
161 
162 
233  template <
234  typename InputIteratorT,
235  typename OutputIteratorT,
236  typename NumSelectedIteratorT,
237  typename SelectOp>
238  CUB_RUNTIME_FUNCTION __forceinline__
239  static cudaError_t If(
240  void* d_temp_storage,
241  size_t &temp_storage_bytes,
242  InputIteratorT d_in,
243  OutputIteratorT d_out,
244  NumSelectedIteratorT d_num_selected_out,
245  int num_items,
246  SelectOp select_op,
247  cudaStream_t stream = 0,
248  bool debug_synchronous = false)
249  {
250  typedef int OffsetT; // Signed integer type for global offsets
251  typedef NullType* FlagIterator; // FlagT iterator type (not used)
252  typedef NullType EqualityOp; // Equality operator (not used)
253 
255  d_temp_storage,
256  temp_storage_bytes,
257  d_in,
258  NULL,
259  d_out,
261  select_op,
262  EqualityOp(),
263  num_items,
264  stream,
265  debug_synchronous);
266  }
267 
268 
324  template <
325  typename InputIteratorT,
326  typename OutputIteratorT,
327  typename NumSelectedIteratorT>
328  CUB_RUNTIME_FUNCTION __forceinline__
329  static cudaError_t Unique(
330  void* d_temp_storage,
331  size_t &temp_storage_bytes,
332  InputIteratorT d_in,
333  OutputIteratorT d_out,
334  NumSelectedIteratorT d_num_selected_out,
335  int num_items,
336  cudaStream_t stream = 0,
337  bool debug_synchronous = false)
338  {
339  typedef int OffsetT; // Signed integer type for global offsets
340  typedef NullType* FlagIterator; // FlagT iterator type (not used)
341  typedef NullType SelectOp; // Selection op (not used)
342  typedef Equality EqualityOp; // Default == operator
343 
345  d_temp_storage,
346  temp_storage_bytes,
347  d_in,
348  NULL,
349  d_out,
351  SelectOp(),
352  EqualityOp(),
353  num_items,
354  stream,
355  debug_synchronous);
356  }
357 
358 };
359 
366 } // CUB namespace
367 CUB_NS_POSTFIX // Optional outer namespace(s)
368 
369 
FlagsInputIteratorT SelectedOutputIteratorT NumSelectedIteratorT d_num_selected_out
[out] Pointer to the total number of items selected (i.e., length of d_selected_out)
KeyT const ValueT ValueT OffsetT OffsetT num_items
[in] Total number of input data items
Optional outer namespace(s)
DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences o...
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Dispatch(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, FlagsInputIteratorT d_flags, SelectedOutputIteratorT d_selected_out, NumSelectedIteratorT d_num_selected_out, SelectOpT select_op, EqualityOpT equality_op, OffsetT num_items, cudaStream_t stream, bool debug_synchronous, int, ScanInitKernelPtrT scan_init_kernel, SelectIfKernelPtrT select_if_kernel, KernelConfig select_if_config)
< Function type of cub::SelectIfKernelPtrT
Default equality functor.
FlagsInputIteratorT SelectedOutputIteratorT NumSelectedIteratorT ScanTileStateT SelectOpT select_op
[in] Selection operator
OffsetT OffsetT
[in] Total number of input data items
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Unique(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, NumSelectedIteratorT d_num_selected_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Given an input sequence d_in having runs of consecutive equal-valued keys, only the first key from ea...
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Flagged(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, FlagIterator d_flags, OutputIteratorT d_out, NumSelectedIteratorT d_num_selected_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Uses the d_flags sequence to selectively copy the corresponding items from d_in into d_out....
A simple "NULL" marker type.
Definition: util_type.cuh:256
FlagsInputIteratorT d_flags
< [in] Pointer to the input sequence of data items
OutputIteratorT d_out
< [in] Pointer to the input sequence of data items
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t If(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, NumSelectedIteratorT d_num_selected_out, int num_items, SelectOp select_op, cudaStream_t stream=0, bool debug_synchronous=false)
Uses the select_op functor to selectively copy items from d_in into d_out. The total number of items ...