OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
device_select.cuh
Go to the documentation of this file.
1
2/******************************************************************************
3 * Copyright (c) 2011, Duane Merrill. All rights reserved.
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of the NVIDIA CORPORATION nor the
14 * names of its contributors may be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 ******************************************************************************/
29
35#pragma once
36
37#include <stdio.h>
38#include <iterator>
39
41#include "../util_namespace.cuh"
42
44CUB_NS_PREFIX
45
47namespace cub {
48
49
83{
127 template <
128 typename InputIteratorT,
129 typename FlagIterator,
130 typename OutputIteratorT,
131 typename NumSelectedIteratorT>
132 CUB_RUNTIME_FUNCTION __forceinline__
133 static cudaError_t Flagged(
134 void* d_temp_storage,
135 size_t &temp_storage_bytes,
136 InputIteratorT d_in,
137 FlagIterator d_flags,
138 OutputIteratorT d_out,
139 NumSelectedIteratorT d_num_selected_out,
140 int num_items,
141 cudaStream_t stream = 0,
142 bool debug_synchronous = false)
143 {
144 typedef int OffsetT; // Signed integer type for global offsets
145 typedef NullType SelectOp; // Selection op (not used)
146 typedef NullType EqualityOp; // Equality operator (not used)
147
149 d_temp_storage,
150 temp_storage_bytes,
151 d_in,
152 d_flags,
153 d_out,
155 SelectOp(),
156 EqualityOp(),
157 num_items,
158 stream,
159 debug_synchronous);
160 }
161
162
233 template <
234 typename InputIteratorT,
235 typename OutputIteratorT,
236 typename NumSelectedIteratorT,
237 typename SelectOp>
238 CUB_RUNTIME_FUNCTION __forceinline__
239 static cudaError_t If(
240 void* d_temp_storage,
241 size_t &temp_storage_bytes,
242 InputIteratorT d_in,
243 OutputIteratorT d_out,
244 NumSelectedIteratorT d_num_selected_out,
245 int num_items,
246 SelectOp select_op,
247 cudaStream_t stream = 0,
248 bool debug_synchronous = false)
249 {
250 typedef int OffsetT; // Signed integer type for global offsets
251 typedef NullType* FlagIterator; // FlagT iterator type (not used)
252 typedef NullType EqualityOp; // Equality operator (not used)
253
255 d_temp_storage,
256 temp_storage_bytes,
257 d_in,
258 NULL,
259 d_out,
261 select_op,
262 EqualityOp(),
263 num_items,
264 stream,
265 debug_synchronous);
266 }
267
268
324 template <
325 typename InputIteratorT,
326 typename OutputIteratorT,
327 typename NumSelectedIteratorT>
328 CUB_RUNTIME_FUNCTION __forceinline__
329 static cudaError_t Unique(
330 void* d_temp_storage,
331 size_t &temp_storage_bytes,
332 InputIteratorT d_in,
333 OutputIteratorT d_out,
334 NumSelectedIteratorT d_num_selected_out,
335 int num_items,
336 cudaStream_t stream = 0,
337 bool debug_synchronous = false)
338 {
339 typedef int OffsetT; // Signed integer type for global offsets
340 typedef NullType* FlagIterator; // FlagT iterator type (not used)
341 typedef NullType SelectOp; // Selection op (not used)
342 typedef Equality EqualityOp; // Default == operator
343
345 d_temp_storage,
346 temp_storage_bytes,
347 d_in,
348 NULL,
349 d_out,
351 SelectOp(),
352 EqualityOp(),
353 num_items,
354 stream,
355 debug_synchronous);
356 }
357
358};
359
366} // CUB namespace
367CUB_NS_POSTFIX // Optional outer namespace(s)
368
369
Optional outer namespace(s)
FlagsInputIteratorT SelectedOutputIteratorT NumSelectedIteratorT ScanTileStateT SelectOpT select_op
[in] Selection operator
KeyT const ValueT ValueT OffsetT OffsetT num_items
[in] Total number of input data items
FlagsInputIteratorT SelectedOutputIteratorT NumSelectedIteratorT d_num_selected_out
[out] Pointer to the total number of items selected (i.e., length of d_selected_out)
FlagsInputIteratorT d_flags
< [in] Pointer to the input sequence of data items
OffsetT OffsetT
[in] Total number of input data items
OutputIteratorT d_out
< [in] Pointer to the input sequence of data items
DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences o...
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Unique(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, NumSelectedIteratorT d_num_selected_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Given an input sequence d_in having runs of consecutive equal-valued keys, only the first key from ea...
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t If(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, NumSelectedIteratorT d_num_selected_out, int num_items, SelectOp select_op, cudaStream_t stream=0, bool debug_synchronous=false)
Uses the select_op functor to selectively copy items from d_in into d_out. The total number of items ...
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Flagged(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, FlagIterator d_flags, OutputIteratorT d_out, NumSelectedIteratorT d_num_selected_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Uses the d_flags sequence to selectively copy the corresponding items from d_in into d_out....
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Dispatch(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, FlagsInputIteratorT d_flags, SelectedOutputIteratorT d_selected_out, NumSelectedIteratorT d_num_selected_out, SelectOpT select_op, EqualityOpT equality_op, OffsetT num_items, cudaStream_t stream, bool debug_synchronous, int, ScanInitKernelPtrT scan_init_kernel, SelectIfKernelPtrT select_if_kernel, KernelConfig select_if_config)
< Function type of cub::SelectIfKernelPtrT
Default equality functor.
A simple "NULL" marker type.