OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
device_partition.cuh
Go to the documentation of this file.
1 
2 /******************************************************************************
3  * Copyright (c) 2011, Duane Merrill. All rights reserved.
4  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
35 #pragma once
36 
37 #include <stdio.h>
38 #include <iterator>
39 
41 #include "../util_namespace.cuh"
42 
44 CUB_NS_PREFIX
45 
47 namespace cub {
48 
49 
74 {
120  template <
121  typename InputIteratorT,
122  typename FlagIterator,
123  typename OutputIteratorT,
124  typename NumSelectedIteratorT>
125  CUB_RUNTIME_FUNCTION __forceinline__
126  static cudaError_t Flagged(
127  void* d_temp_storage,
128  size_t &temp_storage_bytes,
129  InputIteratorT d_in,
130  FlagIterator d_flags,
131  OutputIteratorT d_out,
132  NumSelectedIteratorT d_num_selected_out,
133  int num_items,
134  cudaStream_t stream = 0,
135  bool debug_synchronous = false)
136  {
137  typedef int OffsetT; // Signed integer type for global offsets
138  typedef NullType SelectOp; // Selection op (not used)
139  typedef NullType EqualityOp; // Equality operator (not used)
140 
142  d_temp_storage,
143  temp_storage_bytes,
144  d_in,
145  d_flags,
146  d_out,
148  SelectOp(),
149  EqualityOp(),
150  num_items,
151  stream,
152  debug_synchronous);
153  }
154 
155 
228  template <
229  typename InputIteratorT,
230  typename OutputIteratorT,
231  typename NumSelectedIteratorT,
232  typename SelectOp>
233  CUB_RUNTIME_FUNCTION __forceinline__
234  static cudaError_t If(
235  void* d_temp_storage,
236  size_t &temp_storage_bytes,
237  InputIteratorT d_in,
238  OutputIteratorT d_out,
239  NumSelectedIteratorT d_num_selected_out,
240  int num_items,
241  SelectOp select_op,
242  cudaStream_t stream = 0,
243  bool debug_synchronous = false)
244  {
245  typedef int OffsetT; // Signed integer type for global offsets
246  typedef NullType* FlagIterator; // FlagT iterator type (not used)
247  typedef NullType EqualityOp; // Equality operator (not used)
248 
250  d_temp_storage,
251  temp_storage_bytes,
252  d_in,
253  NULL,
254  d_out,
256  select_op,
257  EqualityOp(),
258  num_items,
259  stream,
260  debug_synchronous);
261  }
262 
263 };
264 
270 } // CUB namespace
271 CUB_NS_POSTFIX // Optional outer namespace(s)
272 
273 
FlagsInputIteratorT SelectedOutputIteratorT NumSelectedIteratorT d_num_selected_out
[out] Pointer to the total number of items selected (i.e., length of d_selected_out)
KeyT const ValueT ValueT OffsetT OffsetT num_items
[in] Total number of input data items
Optional outer namespace(s)
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Dispatch(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, FlagsInputIteratorT d_flags, SelectedOutputIteratorT d_selected_out, NumSelectedIteratorT d_num_selected_out, SelectOpT select_op, EqualityOpT equality_op, OffsetT num_items, cudaStream_t stream, bool debug_synchronous, int, ScanInitKernelPtrT scan_init_kernel, SelectIfKernelPtrT select_if_kernel, KernelConfig select_if_config)
< Function type of cub::SelectIfKernelPtrT
FlagsInputIteratorT SelectedOutputIteratorT NumSelectedIteratorT ScanTileStateT SelectOpT select_op
[in] Selection operator
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t If(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, NumSelectedIteratorT d_num_selected_out, int num_items, SelectOp select_op, cudaStream_t stream=0, bool debug_synchronous=false)
Uses the select_op functor to split the corresponding items from d_in into a partitioned sequence d_o...
OffsetT OffsetT
[in] Total number of input data items
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Flagged(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, FlagIterator d_flags, OutputIteratorT d_out, NumSelectedIteratorT d_num_selected_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Uses the d_flags sequence to split the corresponding items from d_in into a partitioned sequence d_ou...
DevicePartition provides device-wide, parallel operations for partitioning sequences of data items re...
A simple "NULL" marker type.
Definition: util_type.cuh:256
FlagsInputIteratorT d_flags
< [in] Pointer to the input sequence of data items
OutputIteratorT d_out
< [in] Pointer to the input sequence of data items