doxygen/openfpm/device__segmented__radix__sort_8cuh_source.html

/******************************************************************************

 * Copyright (c) 2011, Duane Merrill.  All rights reserved.

 * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *     * Redistributions of source code must retain the above copyright

 *       notice, this list of conditions and the following disclaimer.

 *     * Redistributions in binary form must reproduce the above copyright

 *       notice, this list of conditions and the following disclaimer in the

 *       documentation and/or other materials provided with the distribution.

 *     * Neither the name of the NVIDIA CORPORATION nor the

 *       names of its contributors may be used to endorse or promote products

 *       derived from this software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND

 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY

 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES

 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 *

 ******************************************************************************/


#pragma once


#include <stdio.h>

#include <iterator>


#include "dispatch/dispatch_radix_sort.cuh"

#include "../util_arch.cuh"

#include "../util_namespace.cuh"


CUB_NS_PREFIX


namespace cub {


struct DeviceSegmentedRadixSort

{


    /******************************************************************/


    template <

        typename            KeyT,

        typename            ValueT,

        typename            OffsetIteratorT>

    CUB_RUNTIME_FUNCTION

    static cudaError_t SortPairs(

        void                *d_temp_storage,

        size_t              &temp_storage_bytes,

        const KeyT          *d_keys_in,

        KeyT                *d_keys_out,

        const ValueT        *d_values_in,

        ValueT              *d_values_out,

        int                 num_items,

        int                 num_segments,

        OffsetIteratorT     d_begin_offsets,

        OffsetIteratorT     d_end_offsets,

        int                 begin_bit           = 0,

        int                 end_bit             = sizeof(KeyT) * 8,

        cudaStream_t        stream              = 0,

        bool                debug_synchronous   = false)

    {

        // Signed integer type for global offsets

        typedef int OffsetT;


        DoubleBuffer<KeyT>       d_keys(const_cast<KeyT*>(d_keys_in), d_keys_out);

        DoubleBuffer<ValueT>     d_values(const_cast<ValueT*>(d_values_in), d_values_out);


        return DispatchSegmentedRadixSort<false, KeyT, ValueT, OffsetIteratorT, OffsetT>::Dispatch(

            d_temp_storage,

            temp_storage_bytes,

            d_keys,

            d_values,

            num_items,

            num_segments,

            d_begin_offsets,

            d_end_offsets,

            begin_bit,

            end_bit,

            false,

            stream,

            debug_synchronous);

    }


    template <

        typename                KeyT,

        typename                ValueT,

        typename                OffsetIteratorT>

    CUB_RUNTIME_FUNCTION

    static cudaError_t SortPairs(

        void                    *d_temp_storage,

        size_t                  &temp_storage_bytes,

        DoubleBuffer<KeyT>      &d_keys,

        DoubleBuffer<ValueT>    &d_values,

        int                     num_items,

        int                     num_segments,

        OffsetIteratorT         d_begin_offsets,

        OffsetIteratorT         d_end_offsets,

        int                     begin_bit           = 0,

        int                     end_bit             = sizeof(KeyT) * 8,

        cudaStream_t            stream              = 0,

        bool                    debug_synchronous   = false)

    {

        // Signed integer type for global offsets

        typedef int OffsetT;


        return DispatchSegmentedRadixSort<false, KeyT, ValueT, OffsetIteratorT, OffsetT>::Dispatch(

            d_temp_storage,

            temp_storage_bytes,

            d_keys,

            d_values,

            num_items,

            num_segments,

            d_begin_offsets,

            d_end_offsets,

            begin_bit,

            end_bit,

            true,

            stream,

            debug_synchronous);

    }


    template <

        typename            KeyT,

        typename            ValueT,

        typename            OffsetIteratorT>

    CUB_RUNTIME_FUNCTION

    static cudaError_t SortPairsDescending(

        void                *d_temp_storage,

        size_t              &temp_storage_bytes,

        const KeyT          *d_keys_in,

        KeyT                *d_keys_out,

        const ValueT        *d_values_in,

        ValueT              *d_values_out,

        int                 num_items,

        int                 num_segments,

        OffsetIteratorT     d_begin_offsets,

        OffsetIteratorT     d_end_offsets,

        int                 begin_bit           = 0,

        int                 end_bit             = sizeof(KeyT) * 8,

        cudaStream_t        stream              = 0,

        bool                debug_synchronous   = false)

    {

        // Signed integer type for global offsets

        typedef int OffsetT;


        DoubleBuffer<KeyT>       d_keys(const_cast<KeyT*>(d_keys_in), d_keys_out);

        DoubleBuffer<ValueT>     d_values(const_cast<ValueT*>(d_values_in), d_values_out);


        return DispatchSegmentedRadixSort<true, KeyT, ValueT, OffsetIteratorT, OffsetT>::Dispatch(

            d_temp_storage,

            temp_storage_bytes,

            d_keys,

            d_values,

            num_items,

            num_segments,

            d_begin_offsets,

            d_end_offsets,

            begin_bit,

            end_bit,

            false,

            stream,

            debug_synchronous);

    }


    template <

        typename                KeyT,

        typename                ValueT,

        typename                OffsetIteratorT>

    CUB_RUNTIME_FUNCTION

    static cudaError_t SortPairsDescending(

        void                    *d_temp_storage,

        size_t                  &temp_storage_bytes,

        DoubleBuffer<KeyT>      &d_keys,

        DoubleBuffer<ValueT>    &d_values,

        int                     num_items,

        int                     num_segments,

        OffsetIteratorT         d_begin_offsets,

        OffsetIteratorT         d_end_offsets,

        int                     begin_bit           = 0,

        int                     end_bit             = sizeof(KeyT) * 8,

        cudaStream_t            stream              = 0,

        bool                    debug_synchronous   = false)

    {

        // Signed integer type for global offsets

        typedef int OffsetT;


        return DispatchSegmentedRadixSort<true, KeyT, ValueT, OffsetIteratorT, OffsetT>::Dispatch(

            d_temp_storage,

            temp_storage_bytes,

            d_keys,

            d_values,

            num_items,

            num_segments,

            d_begin_offsets,

            d_end_offsets,

            begin_bit,

            end_bit,

            true,

            stream,

            debug_synchronous);

    }


    /******************************************************************/


    template <

        typename            KeyT,

        typename            OffsetIteratorT>

    CUB_RUNTIME_FUNCTION

    static cudaError_t SortKeys(

        void                *d_temp_storage,

        size_t              &temp_storage_bytes,

        const KeyT          *d_keys_in,

        KeyT                *d_keys_out,

        int                 num_items,

        int                 num_segments,

        OffsetIteratorT     d_begin_offsets,

        OffsetIteratorT     d_end_offsets,

        int                 begin_bit           = 0,

        int                 end_bit             = sizeof(KeyT) * 8,

        cudaStream_t        stream              = 0,

        bool                debug_synchronous   = false)

    {

        // Signed integer type for global offsets

        typedef int OffsetT;


        // Null value type

        DoubleBuffer<KeyT>      d_keys(const_cast<KeyT*>(d_keys_in), d_keys_out);

        DoubleBuffer<NullType>  d_values;


        return DispatchSegmentedRadixSort<false, KeyT, NullType, OffsetIteratorT, OffsetT>::Dispatch(

            d_temp_storage,

            temp_storage_bytes,

            d_keys,

            d_values,

            num_items,

            num_segments,

            d_begin_offsets,

            d_end_offsets,

            begin_bit,

            end_bit,

            false,

            stream,

            debug_synchronous);

    }


    template <

        typename            KeyT,

        typename            OffsetIteratorT>

    CUB_RUNTIME_FUNCTION

    static cudaError_t SortKeys(

        void                *d_temp_storage,

        size_t              &temp_storage_bytes,

        DoubleBuffer<KeyT>  &d_keys,

        int                 num_items,

        int                 num_segments,

        OffsetIteratorT     d_begin_offsets,

        OffsetIteratorT     d_end_offsets,

        int                 begin_bit           = 0,

        int                 end_bit             = sizeof(KeyT) * 8,

        cudaStream_t        stream              = 0,

        bool                debug_synchronous   = false)

    {

        // Signed integer type for global offsets

        typedef int OffsetT;


        // Null value type

        DoubleBuffer<NullType> d_values;


        return DispatchSegmentedRadixSort<false, KeyT, NullType, OffsetIteratorT, OffsetT>::Dispatch(

            d_temp_storage,

            temp_storage_bytes,

            d_keys,

            d_values,

            num_items,

            num_segments,

            d_begin_offsets,

            d_end_offsets,

            begin_bit,

            end_bit,

            true,

            stream,

            debug_synchronous);

    }


    template <

        typename            KeyT,

        typename            OffsetIteratorT>

    CUB_RUNTIME_FUNCTION

    static cudaError_t SortKeysDescending(

        void                *d_temp_storage,

        size_t              &temp_storage_bytes,

        const KeyT          *d_keys_in,

        KeyT                *d_keys_out,

        int                 num_items,

        int                 num_segments,

        OffsetIteratorT     d_begin_offsets,

        OffsetIteratorT     d_end_offsets,

        int                 begin_bit           = 0,

        int                 end_bit             = sizeof(KeyT) * 8,

        cudaStream_t        stream              = 0,

        bool                debug_synchronous   = false)

    {

        // Signed integer type for global offsets

        typedef int OffsetT;


        DoubleBuffer<KeyT>      d_keys(const_cast<KeyT*>(d_keys_in), d_keys_out);

        DoubleBuffer<NullType>  d_values;


        return DispatchSegmentedRadixSort<true, KeyT, NullType, OffsetIteratorT, OffsetT>::Dispatch(

            d_temp_storage,

            temp_storage_bytes,

            d_keys,

            d_values,

            num_items,

            num_segments,

            d_begin_offsets,

            d_end_offsets,

            begin_bit,

            end_bit,

            false,

            stream,

            debug_synchronous);

    }


    template <

        typename            KeyT,

        typename            OffsetIteratorT>

    CUB_RUNTIME_FUNCTION

    static cudaError_t SortKeysDescending(

        void                *d_temp_storage,

        size_t              &temp_storage_bytes,

        DoubleBuffer<KeyT>  &d_keys,

        int                 num_items,

        int                 num_segments,

        OffsetIteratorT     d_begin_offsets,

        OffsetIteratorT     d_end_offsets,

        int                 begin_bit           = 0,

        int                 end_bit             = sizeof(KeyT) * 8,

        cudaStream_t        stream              = 0,

        bool                debug_synchronous   = false)

    {

        // Signed integer type for global offsets

        typedef int OffsetT;


        // Null value type

        DoubleBuffer<NullType> d_values;


        return DispatchSegmentedRadixSort<true, KeyT, NullType, OffsetIteratorT, OffsetT>::Dispatch(

            d_temp_storage,

            temp_storage_bytes,

            d_keys,

            d_values,

            num_items,

            num_segments,

            d_begin_offsets,

            d_end_offsets,

            begin_bit,

            end_bit,

            true,

            stream,

            debug_synchronous);

    }


};


}               // CUB namespace

CUB_NS_POSTFIX  // Optional outer namespace(s)


dispatch_radix_sort.cuh

cub
Optional outer namespace(s)
Definition agent_histogram.cuh:48

cub::d_values_out
KeyT const ValueT ValueT * d_values_out
[in] Output values buffer
Definition dispatch_radix_sort.cuh:166

cub::end_bit
KeyT const ValueT ValueT OffsetT int int end_bit
< [in] The past-the-end (most-significant) bit index needed for key comparison
Definition dispatch_radix_sort.cuh:220

cub::num_items
KeyT const ValueT ValueT OffsetT OffsetT num_items
[in] Total number of input data items
Definition dispatch_radix_sort.cuh:168

cub::d_keys_out
KeyT * d_keys_out
< [in] Input keys buffer
Definition dispatch_radix_sort.cuh:164

cub::d_values_in
KeyT const ValueT * d_values_in
[in] Input values buffer
Definition dispatch_radix_sort.cuh:165

cub::d_begin_offsets
KeyT const ValueT ValueT OffsetIteratorT d_begin_offsets
[in] Pointer to the sequence of beginning offsets of length num_segments, such that d_begin_offsets[i...
Definition dispatch_radix_sort.cuh:334

cub::OffsetT
OffsetT OffsetT
[in] Total number of input data items
Definition dispatch_radix_sort.cuh:75

cub::d_end_offsets
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT d_end_offsets
[in] Pointer to the sequence of ending offsets of length num_segments, such that d_end_offsets[i]-1 i...
Definition dispatch_radix_sort.cuh:335

cub::DeviceSegmentedRadixSort
DeviceSegmentedRadixSort provides device-wide, parallel operations for computing a batched radix sort...
Definition device_segmented_radix_sort.cuh:77

cub::DeviceSegmentedRadixSort::SortPairs
static CUB_RUNTIME_FUNCTION cudaError_t SortPairs(void *d_temp_storage, size_t &temp_storage_bytes, const KeyT *d_keys_in, KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, int num_items, int num_segments, OffsetIteratorT d_begin_offsets, OffsetIteratorT d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts segments of key-value pairs into ascending order. (~2N auxiliary storage required)
Definition device_segmented_radix_sort.cuh:143

cub::DeviceSegmentedRadixSort::SortKeys
static CUB_RUNTIME_FUNCTION cudaError_t SortKeys(void *d_temp_storage, size_t &temp_storage_bytes, const KeyT *d_keys_in, KeyT *d_keys_out, int num_items, int num_segments, OffsetIteratorT d_begin_offsets, OffsetIteratorT d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts segments of keys into ascending order. (~2N auxiliary storage required)
Definition device_segmented_radix_sort.cuh:546

cub::DeviceSegmentedRadixSort::SortPairs
static CUB_RUNTIME_FUNCTION cudaError_t SortPairs(void *d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer< KeyT > &d_keys, DoubleBuffer< ValueT > &d_values, int num_items, int num_segments, OffsetIteratorT d_begin_offsets, OffsetIteratorT d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts segments of key-value pairs into ascending order. (~N auxiliary storage required)
Definition device_segmented_radix_sort.cuh:252

cub::DeviceSegmentedRadixSort::SortKeys
static CUB_RUNTIME_FUNCTION cudaError_t SortKeys(void *d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer< KeyT > &d_keys, int num_items, int num_segments, OffsetIteratorT d_begin_offsets, OffsetIteratorT d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts segments of keys into ascending order. (~N auxiliary storage required).
Definition device_segmented_radix_sort.cuh:645

cub::DeviceSegmentedRadixSort::SortPairsDescending
static CUB_RUNTIME_FUNCTION cudaError_t SortPairsDescending(void *d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer< KeyT > &d_keys, DoubleBuffer< ValueT > &d_values, int num_items, int num_segments, OffsetIteratorT d_begin_offsets, OffsetIteratorT d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts segments of key-value pairs into descending order. (~N auxiliary storage required).
Definition device_segmented_radix_sort.cuh:454

cub::DeviceSegmentedRadixSort::SortPairsDescending
static CUB_RUNTIME_FUNCTION cudaError_t SortPairsDescending(void *d_temp_storage, size_t &temp_storage_bytes, const KeyT *d_keys_in, KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, int num_items, int num_segments, OffsetIteratorT d_begin_offsets, OffsetIteratorT d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts segments of key-value pairs into descending order. (~2N auxiliary storage required).
Definition device_segmented_radix_sort.cuh:345

cub::DeviceSegmentedRadixSort::SortKeysDescending
static CUB_RUNTIME_FUNCTION cudaError_t SortKeysDescending(void *d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer< KeyT > &d_keys, int num_items, int num_segments, OffsetIteratorT d_begin_offsets, OffsetIteratorT d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts segments of keys into descending order. (~N auxiliary storage required).
Definition device_segmented_radix_sort.cuh:832

cub::DeviceSegmentedRadixSort::SortKeysDescending
static CUB_RUNTIME_FUNCTION cudaError_t SortKeysDescending(void *d_temp_storage, size_t &temp_storage_bytes, const KeyT *d_keys_in, KeyT *d_keys_out, int num_items, int num_segments, OffsetIteratorT d_begin_offsets, OffsetIteratorT d_end_offsets, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts segments of keys into descending order. (~2N auxiliary storage required).
Definition device_segmented_radix_sort.cuh:734

cub::DispatchSegmentedRadixSort::Dispatch
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Dispatch(void *d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer< KeyT > &d_keys, DoubleBuffer< ValueT > &d_values, int num_items, int num_segments, OffsetIteratorT d_begin_offsets, OffsetIteratorT d_end_offsets, int begin_bit, int end_bit, bool is_overwrite_okay, cudaStream_t stream, bool debug_synchronous)
Internal dispatch routine.
Definition dispatch_radix_sort.cuh:1575

cub::DoubleBuffer
Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...
Definition util_type.cuh:792