OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
device_run_length_encode.cuh
Go to the documentation of this file.
1 
2 /******************************************************************************
3  * Copyright (c) 2011, Duane Merrill. All rights reserved.
4  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
35 #pragma once
36 
37 #include <stdio.h>
38 #include <iterator>
39 
42 #include "../util_namespace.cuh"
43 
45 CUB_NS_PREFIX
46 
48 namespace cub {
49 
50 
79 {
80 
142  template <
143  typename InputIteratorT,
144  typename UniqueOutputIteratorT,
145  typename LengthsOutputIteratorT,
146  typename NumRunsOutputIteratorT>
147  CUB_RUNTIME_FUNCTION __forceinline__
148  static cudaError_t Encode(
149  void* d_temp_storage,
150  size_t &temp_storage_bytes,
151  InputIteratorT d_in,
152  UniqueOutputIteratorT d_unique_out,
153  LengthsOutputIteratorT d_counts_out,
154  NumRunsOutputIteratorT d_num_runs_out,
155  int num_items,
156  cudaStream_t stream = 0,
157  bool debug_synchronous = false)
158  {
159  typedef int OffsetT; // Signed integer type for global offsets
160  typedef NullType* FlagIterator; // FlagT iterator type (not used)
161  typedef NullType SelectOp; // Selection op (not used)
162  typedef Equality EqualityOp; // Default == operator
163  typedef cub::Sum ReductionOp; // Value reduction operator
164 
165  // The lengths output value type
166  typedef typename If<(Equals<typename std::iterator_traits<LengthsOutputIteratorT>::value_type, void>::VALUE), // LengthT = (if output iterator's value type is void) ?
167  OffsetT, // ... then the OffsetT type,
168  typename std::iterator_traits<LengthsOutputIteratorT>::value_type>::Type LengthT; // ... else the output iterator's value type
169 
170  // Generator type for providing 1s values for run-length reduction
171  typedef ConstantInputIterator<LengthT, OffsetT> LengthsInputIteratorT;
172 
174  d_temp_storage,
175  temp_storage_bytes,
176  d_in,
177  d_unique_out,
178  LengthsInputIteratorT((LengthT) 1),
179  d_counts_out,
181  EqualityOp(),
182  ReductionOp(),
183  num_items,
184  stream,
185  debug_synchronous);
186  }
187 
188 
238  template <
239  typename InputIteratorT,
240  typename OffsetsOutputIteratorT,
241  typename LengthsOutputIteratorT,
242  typename NumRunsOutputIteratorT>
243  CUB_RUNTIME_FUNCTION __forceinline__
244  static cudaError_t NonTrivialRuns(
245  void* d_temp_storage,
246  size_t &temp_storage_bytes,
247  InputIteratorT d_in,
248  OffsetsOutputIteratorT d_offsets_out,
249  LengthsOutputIteratorT d_lengths_out,
250  NumRunsOutputIteratorT d_num_runs_out,
251  int num_items,
252  cudaStream_t stream = 0,
253  bool debug_synchronous = false)
254  {
255  typedef int OffsetT; // Signed integer type for global offsets
256  typedef Equality EqualityOp; // Default == operator
257 
259  d_temp_storage,
260  temp_storage_bytes,
261  d_in,
265  EqualityOp(),
266  num_items,
267  stream,
268  debug_synchronous);
269  }
270 
271 
272 };
273 
274 
275 } // CUB namespace
276 CUB_NS_POSTFIX // Optional outer namespace(s)
277 
278 
Type equality test.
Definition: util_type.cuh:98
OffsetsOutputIteratorT d_offsets_out
< [in] Pointer to input sequence of data items
KeyT const ValueT ValueT OffsetT OffsetT num_items
[in] Total number of input data items
Optional outer namespace(s)
UniqueOutputIteratorT ValuesInputIteratorT AggregatesOutputIteratorT NumRunsOutputIteratorT d_num_runs_out
Pointer to total number of runs encountered (i.e., the length of d_unique_out)
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t NonTrivialRuns(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OffsetsOutputIteratorT d_offsets_out, LengthsOutputIteratorT d_lengths_out, NumRunsOutputIteratorT d_num_runs_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Enumerates the starting offsets and lengths of all non-trivial runs (of length > 1) of same-valued ke...
Default equality functor.
A random-access input generator for dereferencing a sequence of homogeneous values.
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Dispatch(void *d_temp_storage, size_t &temp_storage_bytes, KeysInputIteratorT d_keys_in, UniqueOutputIteratorT d_unique_out, ValuesInputIteratorT d_values_in, AggregatesOutputIteratorT d_aggregates_out, NumRunsOutputIteratorT d_num_runs_out, EqualityOpT equality_op, ReductionOpT reduction_op, OffsetT num_items, cudaStream_t stream, bool debug_synchronous, int, ScanInitKernelT init_kernel, ReduceByKeyKernelT reduce_by_key_kernel, KernelConfig reduce_by_key_config)
< Function type of cub::DeviceReduceByKeyKernelT
OffsetsOutputIteratorT LengthsOutputIteratorT d_lengths_out
[out] Pointer to output sequence of run-lengths
OffsetT OffsetT
[in] Total number of input data items
UniqueOutputIteratorT d_unique_out
< Pointer to the input sequence of keys
Type selection (IF ? ThenType : ElseType)
Definition: util_type.cuh:72
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Encode(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, UniqueOutputIteratorT d_unique_out, LengthsOutputIteratorT d_counts_out, NumRunsOutputIteratorT d_num_runs_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Computes a run-length encoding of the sequence d_in.
A simple "NULL" marker type.
Definition: util_type.cuh:256
Default sum functor.
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Dispatch(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OffsetsOutputIteratorT d_offsets_out, LengthsOutputIteratorT d_lengths_out, NumRunsOutputIteratorT d_num_runs_out, EqualityOpT equality_op, OffsetT num_items, cudaStream_t stream, bool debug_synchronous, int ptx_version, DeviceScanInitKernelPtr device_scan_init_kernel, DeviceRleSweepKernelPtr device_rle_sweep_kernel, KernelConfig device_rle_config)
< Function type of cub::DeviceRleSweepKernelPtr
DeviceRunLengthEncode provides device-wide, parallel operations for demarcating "runs" of same-valued...