doxygen/openfpm/device__run__length__encode_8cuh_source.html

 /******************************************************************************

  * Copyright (c) 2011, Duane Merrill.  All rights reserved.

  * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.

  *

  * Redistribution and use in source and binary forms, with or without

  * modification, are permitted provided that the following conditions are met:

  *     * Redistributions of source code must retain the above copyright

  *       notice, this list of conditions and the following disclaimer.

  *     * Redistributions in binary form must reproduce the above copyright

  *       notice, this list of conditions and the following disclaimer in the

  *       documentation and/or other materials provided with the distribution.

  *     * Neither the name of the NVIDIA CORPORATION nor the

  *       names of its contributors may be used to endorse or promote products

  *       derived from this software without specific prior written permission.

  *

  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND

  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY

  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES

  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

  *

  ******************************************************************************/


 #pragma once


 #include <stdio.h>

 #include <iterator>


 #include "dispatch/dispatch_rle.cuh"

 #include "dispatch/dispatch_reduce_by_key.cuh"

 #include "../util_namespace.cuh"


 CUB_NS_PREFIX


 namespace cub {


 struct DeviceRunLengthEncode

 {


     template <

         typename                    InputIteratorT,

         typename                    UniqueOutputIteratorT,

         typename                    LengthsOutputIteratorT,

         typename                    NumRunsOutputIteratorT>

     CUB_RUNTIME_FUNCTION __forceinline__

     static cudaError_t Encode(

         void*                       d_temp_storage,

         size_t                      &temp_storage_bytes,

         InputIteratorT              d_in,

         UniqueOutputIteratorT       d_unique_out,

         LengthsOutputIteratorT      d_counts_out,

         NumRunsOutputIteratorT      d_num_runs_out,

         int                         num_items,

         cudaStream_t                stream             = 0,

         bool                        debug_synchronous  = false)

     {

         typedef int         OffsetT;                    // Signed integer type for global offsets

         typedef NullType*   FlagIterator;               // FlagT iterator type (not used)

         typedef NullType    SelectOp;                   // Selection op (not used)

         typedef Equality    EqualityOp;                 // Default == operator

         typedef cub::Sum    ReductionOp;                // Value reduction operator


         // The lengths output value type

         typedef typename If<(Equals<typename std::iterator_traits<LengthsOutputIteratorT>::value_type, void>::VALUE),   // LengthT =  (if output iterator's value type is void) ?

             OffsetT,                                                                                                    // ... then the OffsetT type,

             typename std::iterator_traits<LengthsOutputIteratorT>::value_type>::Type LengthT;                           // ... else the output iterator's value type


         // Generator type for providing 1s values for run-length reduction

         typedef ConstantInputIterator<LengthT, OffsetT> LengthsInputIteratorT;


         return DispatchReduceByKey<InputIteratorT, UniqueOutputIteratorT, LengthsInputIteratorT, LengthsOutputIteratorT, NumRunsOutputIteratorT, EqualityOp, ReductionOp, OffsetT>::Dispatch(

             d_temp_storage,

             temp_storage_bytes,

             d_in,

             d_unique_out,

             LengthsInputIteratorT((LengthT) 1),

             d_counts_out,

             d_num_runs_out,

             EqualityOp(),

             ReductionOp(),

             num_items,

             stream,

             debug_synchronous);

     }


     template <

         typename                InputIteratorT,

         typename                OffsetsOutputIteratorT,

         typename                LengthsOutputIteratorT,

         typename                NumRunsOutputIteratorT>

     CUB_RUNTIME_FUNCTION __forceinline__

     static cudaError_t NonTrivialRuns(

         void*               d_temp_storage,

         size_t                  &temp_storage_bytes,

         InputIteratorT          d_in,

         OffsetsOutputIteratorT  d_offsets_out,

         LengthsOutputIteratorT  d_lengths_out,

         NumRunsOutputIteratorT  d_num_runs_out,

         int                     num_items,

         cudaStream_t            stream             = 0,

         bool                    debug_synchronous  = false)

     {

         typedef int         OffsetT;                    // Signed integer type for global offsets

         typedef Equality    EqualityOp;                 // Default == operator


         return DeviceRleDispatch<InputIteratorT, OffsetsOutputIteratorT, LengthsOutputIteratorT, NumRunsOutputIteratorT, EqualityOp, OffsetT>::Dispatch(

             d_temp_storage,

             temp_storage_bytes,

             d_in,

             d_offsets_out,

             d_lengths_out,

             d_num_runs_out,

             EqualityOp(),

             num_items,

             stream,

             debug_synchronous);

     }


 };


 }               // CUB namespace

 CUB_NS_POSTFIX  // Optional outer namespace(s)


cub::ConstantInputIterator
A random-access input generator for dereferencing a sequence of homogeneous values.
Definition: constant_input_iterator.cuh:97

dispatch_reduce_by_key.cuh

dispatch_rle.cuh

cub
Optional outer namespace(s)
Definition: agent_histogram.cuh:48

cub::d_offsets_out
OffsetsOutputIteratorT d_offsets_out
< [in] Pointer to input sequence of data items
Definition: dispatch_rle.cuh:77

cub::d_unique_out
UniqueOutputIteratorT d_unique_out
< Pointer to the input sequence of keys
Definition: dispatch_reduce_by_key.cuh:74

cub::d_num_runs_out
UniqueOutputIteratorT ValuesInputIteratorT AggregatesOutputIteratorT NumRunsOutputIteratorT d_num_runs_out
Pointer to total number of runs encountered (i.e., the length of d_unique_out)
Definition: dispatch_reduce_by_key.cuh:77

cub::num_items
KeyT const ValueT ValueT OffsetT OffsetT num_items
[in] Total number of input data items
Definition: dispatch_radix_sort.cuh:168

cub::d_lengths_out
OffsetsOutputIteratorT LengthsOutputIteratorT d_lengths_out
[out] Pointer to output sequence of run-lengths
Definition: dispatch_rle.cuh:78

cub::OffsetT
OffsetT OffsetT
[in] Total number of input data items
Definition: dispatch_radix_sort.cuh:75

cub::DeviceRleDispatch::Dispatch
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Dispatch(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OffsetsOutputIteratorT d_offsets_out, LengthsOutputIteratorT d_lengths_out, NumRunsOutputIteratorT d_num_runs_out, EqualityOpT equality_op, OffsetT num_items, cudaStream_t stream, bool debug_synchronous, int ptx_version, DeviceScanInitKernelPtr device_scan_init_kernel, DeviceRleSweepKernelPtr device_rle_sweep_kernel, KernelConfig device_rle_config)
< Function type of cub::DeviceRleSweepKernelPtr
Definition: dispatch_rle.cuh:357

cub::DeviceRunLengthEncode
DeviceRunLengthEncode provides device-wide, parallel operations for demarcating "runs" of same-valued...
Definition: device_run_length_encode.cuh:79

cub::DeviceRunLengthEncode::NonTrivialRuns
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t NonTrivialRuns(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OffsetsOutputIteratorT d_offsets_out, LengthsOutputIteratorT d_lengths_out, NumRunsOutputIteratorT d_num_runs_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Enumerates the starting offsets and lengths of all non-trivial runs (of length > 1) of same-valued ke...
Definition: device_run_length_encode.cuh:244

cub::DeviceRunLengthEncode::Encode
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Encode(void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, UniqueOutputIteratorT d_unique_out, LengthsOutputIteratorT d_counts_out, NumRunsOutputIteratorT d_num_runs_out, int num_items, cudaStream_t stream=0, bool debug_synchronous=false)
Computes a run-length encoding of the sequence d_in.
Definition: device_run_length_encode.cuh:148

cub::DispatchReduceByKey::Dispatch
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Dispatch(void *d_temp_storage, size_t &temp_storage_bytes, KeysInputIteratorT d_keys_in, UniqueOutputIteratorT d_unique_out, ValuesInputIteratorT d_values_in, AggregatesOutputIteratorT d_aggregates_out, NumRunsOutputIteratorT d_num_runs_out, EqualityOpT equality_op, ReductionOpT reduction_op, OffsetT num_items, cudaStream_t stream, bool debug_synchronous, int, ScanInitKernelT init_kernel, ReduceByKeyKernelT reduce_by_key_kernel, KernelConfig reduce_by_key_config)
< Function type of cub::DeviceReduceByKeyKernelT
Definition: dispatch_reduce_by_key.cuh:353

cub::Equality
Default equality functor.
Definition: thread_operators.cuh:60

cub::Equals
Type equality test.
Definition: util_type.cuh:99

cub::If
Type selection (IF ? ThenType : ElseType)
Definition: util_type.cuh:73

cub::NullType
A simple "NULL" marker type.
Definition: util_type.cuh:257

cub::Sum
Default sum functor.
Definition: thread_operators.cuh:110