doxygen/openfpm/device__radix__sort_8cuh_source.html

/******************************************************************************

 * Copyright (c) 2011, Duane Merrill.  All rights reserved.

 * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *     * Redistributions of source code must retain the above copyright

 *       notice, this list of conditions and the following disclaimer.

 *     * Redistributions in binary form must reproduce the above copyright

 *       notice, this list of conditions and the following disclaimer in the

 *       documentation and/or other materials provided with the distribution.

 *     * Neither the name of the NVIDIA CORPORATION nor the

 *       names of its contributors may be used to endorse or promote products

 *       derived from this software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND

 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY

 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES

 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 *

 ******************************************************************************/


#pragma once


#include <stdio.h>

#include <iterator>


#include "dispatch/dispatch_radix_sort.cuh"

#include "../util_arch.cuh"

#include "../util_namespace.cuh"


CUB_NS_PREFIX


namespace cub {


struct DeviceRadixSort

{


    /******************************************************************/


    template <

        typename            KeyT,

        typename            ValueT>

    CUB_RUNTIME_FUNCTION

    static cudaError_t SortPairs(

        void                *d_temp_storage,

        size_t              &temp_storage_bytes,

        const KeyT          *d_keys_in,

        KeyT                *d_keys_out,

        const ValueT        *d_values_in,

        ValueT              *d_values_out,

        int                 num_items,

        int                 begin_bit           = 0,

        int                 end_bit             = sizeof(KeyT) * 8,

        cudaStream_t        stream              = 0,

        bool                debug_synchronous   = false)

    {

        // Signed integer type for global offsets

        typedef int OffsetT;


        DoubleBuffer<KeyT>       d_keys(const_cast<KeyT*>(d_keys_in), d_keys_out);

        DoubleBuffer<ValueT>     d_values(const_cast<ValueT*>(d_values_in), d_values_out);


        return DispatchRadixSort<false, KeyT, ValueT, OffsetT>::Dispatch(

            d_temp_storage,

            temp_storage_bytes,

            d_keys,

            d_values,

            num_items,

            begin_bit,

            end_bit,

            false,

            stream,

            debug_synchronous);

    }


    template <

        typename            KeyT,

        typename            ValueT>

    CUB_RUNTIME_FUNCTION

    static cudaError_t SortPairs(

        void                    *d_temp_storage,

        size_t                  &temp_storage_bytes,

        DoubleBuffer<KeyT>      &d_keys,

        DoubleBuffer<ValueT>    &d_values,

        int                     num_items,

        int                     begin_bit           = 0,

        int                     end_bit             = sizeof(KeyT) * 8,

        cudaStream_t            stream              = 0,

        bool                    debug_synchronous   = false)

    {

        // Signed integer type for global offsets

        typedef int OffsetT;


        return DispatchRadixSort<false, KeyT, ValueT, OffsetT>::Dispatch(

            d_temp_storage,

            temp_storage_bytes,

            d_keys,

            d_values,

            num_items,

            begin_bit,

            end_bit,

            true,

            stream,

            debug_synchronous);

    }


    template <

        typename            KeyT,

        typename            ValueT>

    CUB_RUNTIME_FUNCTION

    static cudaError_t SortPairsDescending(

        void                *d_temp_storage,

        size_t              &temp_storage_bytes,

        const KeyT          *d_keys_in,

        KeyT                *d_keys_out,

        const ValueT        *d_values_in,

        ValueT              *d_values_out,

        int                 num_items,

        int                 begin_bit           = 0,

        int                 end_bit             = sizeof(KeyT) * 8,

        cudaStream_t        stream              = 0,

        bool                debug_synchronous   = false)

    {

        // Signed integer type for global offsets

        typedef int OffsetT;


        DoubleBuffer<KeyT>       d_keys(const_cast<KeyT*>(d_keys_in), d_keys_out);

        DoubleBuffer<ValueT>     d_values(const_cast<ValueT*>(d_values_in), d_values_out);


        return DispatchRadixSort<true, KeyT, ValueT, OffsetT>::Dispatch(

            d_temp_storage,

            temp_storage_bytes,

            d_keys,

            d_values,

            num_items,

            begin_bit,

            end_bit,

            false,

            stream,

            debug_synchronous);

    }


    template <

        typename            KeyT,

        typename            ValueT>

    CUB_RUNTIME_FUNCTION

    static cudaError_t SortPairsDescending(

        void                    *d_temp_storage,

        size_t                  &temp_storage_bytes,

        DoubleBuffer<KeyT>      &d_keys,

        DoubleBuffer<ValueT>    &d_values,

        int                     num_items,

        int                     begin_bit           = 0,

        int                     end_bit             = sizeof(KeyT) * 8,

        cudaStream_t            stream              = 0,

        bool                    debug_synchronous   = false)

    {

        // Signed integer type for global offsets

        typedef int OffsetT;


        return DispatchRadixSort<true, KeyT, ValueT, OffsetT>::Dispatch(

            d_temp_storage,

            temp_storage_bytes,

            d_keys,

            d_values,

            num_items,

            begin_bit,

            end_bit,

            true,

            stream,

            debug_synchronous);

    }


    /******************************************************************/


    template <typename KeyT>

    CUB_RUNTIME_FUNCTION

    static cudaError_t SortKeys(

        void                *d_temp_storage,

        size_t              &temp_storage_bytes,

        const KeyT          *d_keys_in,

        KeyT                *d_keys_out,

        int                 num_items,

        int                 begin_bit           = 0,

        int                 end_bit             = sizeof(KeyT) * 8,

        cudaStream_t        stream              = 0,

        bool                debug_synchronous   = false)

    {

        // Signed integer type for global offsets

        typedef int OffsetT;


        // Null value type

        DoubleBuffer<KeyT>      d_keys(const_cast<KeyT*>(d_keys_in), d_keys_out);

        DoubleBuffer<NullType>  d_values;


        return DispatchRadixSort<false, KeyT, NullType, OffsetT>::Dispatch(

            d_temp_storage,

            temp_storage_bytes,

            d_keys,

            d_values,

            num_items,

            begin_bit,

            end_bit,

            false,

            stream,

            debug_synchronous);

    }


    template <typename KeyT>

    CUB_RUNTIME_FUNCTION

    static cudaError_t SortKeys(

        void                *d_temp_storage,

        size_t              &temp_storage_bytes,

        DoubleBuffer<KeyT>  &d_keys,

        int                 num_items,

        int                 begin_bit           = 0,

        int                 end_bit             = sizeof(KeyT) * 8,

        cudaStream_t        stream              = 0,

        bool                debug_synchronous   = false)

    {

        // Signed integer type for global offsets

        typedef int OffsetT;


        // Null value type

        DoubleBuffer<NullType> d_values;


        return DispatchRadixSort<false, KeyT, NullType, OffsetT>::Dispatch(

            d_temp_storage,

            temp_storage_bytes,

            d_keys,

            d_values,

            num_items,

            begin_bit,

            end_bit,

            true,

            stream,

            debug_synchronous);

    }


    template <typename KeyT>

    CUB_RUNTIME_FUNCTION

    static cudaError_t SortKeysDescending(

        void                *d_temp_storage,

        size_t              &temp_storage_bytes,

        const KeyT          *d_keys_in,

        KeyT                *d_keys_out,

        int                 num_items,

        int                 begin_bit           = 0,

        int                 end_bit             = sizeof(KeyT) * 8,

        cudaStream_t        stream              = 0,

        bool                debug_synchronous   = false)

    {

        // Signed integer type for global offsets

        typedef int OffsetT;


        DoubleBuffer<KeyT>      d_keys(const_cast<KeyT*>(d_keys_in), d_keys_out);

        DoubleBuffer<NullType>  d_values;


        return DispatchRadixSort<true, KeyT, NullType, OffsetT>::Dispatch(

            d_temp_storage,

            temp_storage_bytes,

            d_keys,

            d_values,

            num_items,

            begin_bit,

            end_bit,

            false,

            stream,

            debug_synchronous);

    }


    template <typename KeyT>

    CUB_RUNTIME_FUNCTION

    static cudaError_t SortKeysDescending(

        void                *d_temp_storage,

        size_t              &temp_storage_bytes,

        DoubleBuffer<KeyT>  &d_keys,

        int                 num_items,

        int                 begin_bit           = 0,

        int                 end_bit             = sizeof(KeyT) * 8,

        cudaStream_t        stream              = 0,

        bool                debug_synchronous   = false)

    {

        // Signed integer type for global offsets

        typedef int OffsetT;


        // Null value type

        DoubleBuffer<NullType> d_values;


        return DispatchRadixSort<true, KeyT, NullType, OffsetT>::Dispatch(

            d_temp_storage,

            temp_storage_bytes,

            d_keys,

            d_values,

            num_items,

            begin_bit,

            end_bit,

            true,

            stream,

            debug_synchronous);

    }


};


}               // CUB namespace

CUB_NS_POSTFIX  // Optional outer namespace(s)


dispatch_radix_sort.cuh

cub
Optional outer namespace(s)
Definition agent_histogram.cuh:48

cub::d_values_out
KeyT const ValueT ValueT * d_values_out
[in] Output values buffer
Definition dispatch_radix_sort.cuh:166

cub::end_bit
KeyT const ValueT ValueT OffsetT int int end_bit
< [in] The past-the-end (most-significant) bit index needed for key comparison
Definition dispatch_radix_sort.cuh:220

cub::num_items
KeyT const ValueT ValueT OffsetT OffsetT num_items
[in] Total number of input data items
Definition dispatch_radix_sort.cuh:168

cub::d_keys_out
KeyT * d_keys_out
< [in] Input keys buffer
Definition dispatch_radix_sort.cuh:164

cub::d_values_in
KeyT const ValueT * d_values_in
[in] Input values buffer
Definition dispatch_radix_sort.cuh:165

cub::OffsetT
OffsetT OffsetT
[in] Total number of input data items
Definition dispatch_radix_sort.cuh:75

cub::DeviceRadixSort
DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequenc...
Definition device_radix_sort.cuh:84

cub::DeviceRadixSort::SortKeys
static CUB_RUNTIME_FUNCTION cudaError_t SortKeys(void *d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer< KeyT > &d_keys, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts keys into ascending order. (~N auxiliary storage required).
Definition device_radix_sort.cuh:596

cub::DeviceRadixSort::SortKeysDescending
static CUB_RUNTIME_FUNCTION cudaError_t SortKeysDescending(void *d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer< KeyT > &d_keys, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts keys into descending order. (~N auxiliary storage required).
Definition device_radix_sort.cuh:755

cub::DeviceRadixSort::SortPairsDescending
static CUB_RUNTIME_FUNCTION cudaError_t SortPairsDescending(void *d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer< KeyT > &d_keys, DoubleBuffer< ValueT > &d_values, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts key-value pairs into descending order. (~N auxiliary storage required).
Definition device_radix_sort.cuh:425

cub::DeviceRadixSort::SortKeys
static CUB_RUNTIME_FUNCTION cudaError_t SortKeys(void *d_temp_storage, size_t &temp_storage_bytes, const KeyT *d_keys_in, KeyT *d_keys_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts keys into ascending order. (~2N auxiliary storage required)
Definition device_radix_sort.cuh:507

cub::DeviceRadixSort::SortPairs
static CUB_RUNTIME_FUNCTION cudaError_t SortPairs(void *d_temp_storage, size_t &temp_storage_bytes, const KeyT *d_keys_in, KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts key-value pairs into ascending order. (~2N auxiliary storage required)
Definition device_radix_sort.cuh:148

cub::DeviceRadixSort::SortPairsDescending
static CUB_RUNTIME_FUNCTION cudaError_t SortPairsDescending(void *d_temp_storage, size_t &temp_storage_bytes, const KeyT *d_keys_in, KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts key-value pairs into descending order. (~2N auxiliary storage required).
Definition device_radix_sort.cuh:329

cub::DeviceRadixSort::SortKeysDescending
static CUB_RUNTIME_FUNCTION cudaError_t SortKeysDescending(void *d_temp_storage, size_t &temp_storage_bytes, const KeyT *d_keys_in, KeyT *d_keys_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts keys into descending order. (~2N auxiliary storage required).
Definition device_radix_sort.cuh:671

cub::DeviceRadixSort::SortPairs
static CUB_RUNTIME_FUNCTION cudaError_t SortPairs(void *d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer< KeyT > &d_keys, DoubleBuffer< ValueT > &d_values, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts key-value pairs into ascending order. (~N auxiliary storage required)
Definition device_radix_sort.cuh:249

cub::DispatchRadixSort::Dispatch
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Dispatch(void *d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer< KeyT > &d_keys, DoubleBuffer< ValueT > &d_values, OffsetT num_items, int begin_bit, int end_bit, bool is_overwrite_okay, cudaStream_t stream, bool debug_synchronous)
Definition dispatch_radix_sort.cuh:1255

cub::DoubleBuffer
Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...
Definition util_type.cuh:792