doxygen/openfpm/block__radix__sort_8cuh_source.html

/******************************************************************************

 * Copyright (c) 2011, Duane Merrill.  All rights reserved.

 * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *     * Redistributions of source code must retain the above copyright

 *       notice, this list of conditions and the following disclaimer.

 *     * Redistributions in binary form must reproduce the above copyright

 *       notice, this list of conditions and the following disclaimer in the

 *       documentation and/or other materials provided with the distribution.

 *     * Neither the name of the NVIDIA CORPORATION nor the

 *       names of its contributors may be used to endorse or promote products

 *       derived from this software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND

 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY

 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES

 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 *

 ******************************************************************************/


#pragma once


#include "block_exchange.cuh"

#include "block_radix_rank.cuh"

#include "../util_ptx.cuh"

#include "../util_arch.cuh"

#include "../util_type.cuh"

#include "../util_namespace.cuh"


CUB_NS_PREFIX


namespace cub {


template <

    typename                KeyT,

    int                     BLOCK_DIM_X,

    int                     ITEMS_PER_THREAD,

    typename                ValueT                   = NullType,

    int                     RADIX_BITS              = 4,

    bool                    MEMOIZE_OUTER_SCAN      = (CUB_PTX_ARCH >= 350) ? true : false,

    BlockScanAlgorithm      INNER_SCAN_ALGORITHM    = BLOCK_SCAN_WARP_SCANS,

    cudaSharedMemConfig     SMEM_CONFIG             = cudaSharedMemBankSizeFourByte,

    int                     BLOCK_DIM_Y             = 1,

    int                     BLOCK_DIM_Z             = 1,

    int                     PTX_ARCH                = CUB_PTX_ARCH>

class BlockRadixSort

{

private:


    /******************************************************************************

     * Constants and type definitions

     ******************************************************************************/


    enum

    {

        // The thread block size in threads

        BLOCK_THREADS               = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z,


        // Whether or not there are values to be trucked along with keys

        KEYS_ONLY                   = Equals<ValueT, NullType>::VALUE,

    };


    // KeyT traits and unsigned bits type

    typedef Traits<KeyT>                        KeyTraits;

    typedef typename KeyTraits::UnsignedBits    UnsignedBits;


    typedef BlockRadixRank<

            BLOCK_DIM_X,

            RADIX_BITS,

            false,

            MEMOIZE_OUTER_SCAN,

            INNER_SCAN_ALGORITHM,

            SMEM_CONFIG,

            BLOCK_DIM_Y,

            BLOCK_DIM_Z,

            PTX_ARCH>

        AscendingBlockRadixRank;


    typedef BlockRadixRank<

            BLOCK_DIM_X,

            RADIX_BITS,

            true,

            MEMOIZE_OUTER_SCAN,

            INNER_SCAN_ALGORITHM,

            SMEM_CONFIG,

            BLOCK_DIM_Y,

            BLOCK_DIM_Z,

            PTX_ARCH>

        DescendingBlockRadixRank;


    typedef BlockExchange<KeyT, BLOCK_DIM_X, ITEMS_PER_THREAD, false, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH> BlockExchangeKeys;


    typedef BlockExchange<ValueT, BLOCK_DIM_X, ITEMS_PER_THREAD, false, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH> BlockExchangeValues;


    union _TempStorage

    {

        typename AscendingBlockRadixRank::TempStorage  asending_ranking_storage;

        typename DescendingBlockRadixRank::TempStorage descending_ranking_storage;

        typename BlockExchangeKeys::TempStorage        exchange_keys;

        typename BlockExchangeValues::TempStorage      exchange_values;

    };


    /******************************************************************************

     * Thread fields

     ******************************************************************************/


    _TempStorage &temp_storage;


    unsigned int linear_tid;


    /******************************************************************************

     * Utility methods

     ******************************************************************************/


    __device__ __forceinline__ _TempStorage& PrivateStorage()

    {

        __shared__ _TempStorage private_storage;

        return private_storage;

    }


    __device__ __forceinline__ void RankKeys(

        UnsignedBits    (&unsigned_keys)[ITEMS_PER_THREAD],

        int             (&ranks)[ITEMS_PER_THREAD],

        int             begin_bit,

        int             pass_bits,

        Int2Type<false> /*is_descending*/)

    {

        AscendingBlockRadixRank(temp_storage.asending_ranking_storage).RankKeys(

            unsigned_keys,

            ranks,

            begin_bit,

            pass_bits);

    }


    __device__ __forceinline__ void RankKeys(

        UnsignedBits    (&unsigned_keys)[ITEMS_PER_THREAD],

        int             (&ranks)[ITEMS_PER_THREAD],

        int             begin_bit,

        int             pass_bits,

        Int2Type<true>  /*is_descending*/)

    {

        DescendingBlockRadixRank(temp_storage.descending_ranking_storage).RankKeys(

            unsigned_keys,

            ranks,

            begin_bit,

            pass_bits);

    }


    __device__ __forceinline__ void ExchangeValues(

        ValueT          (&values)[ITEMS_PER_THREAD],

        int             (&ranks)[ITEMS_PER_THREAD],

        Int2Type<false> /*is_keys_only*/,

        Int2Type<true>  /*is_blocked*/)

    {

        CTA_SYNC();


        // Exchange values through shared memory in blocked arrangement

        BlockExchangeValues(temp_storage.exchange_values).ScatterToBlocked(values, ranks);

    }


    __device__ __forceinline__ void ExchangeValues(

        ValueT          (&values)[ITEMS_PER_THREAD],

        int             (&ranks)[ITEMS_PER_THREAD],

        Int2Type<false> /*is_keys_only*/,

        Int2Type<false> /*is_blocked*/)

    {

        CTA_SYNC();


        // Exchange values through shared memory in blocked arrangement

        BlockExchangeValues(temp_storage.exchange_values).ScatterToStriped(values, ranks);

    }


    template <int IS_BLOCKED>

    __device__ __forceinline__ void ExchangeValues(

        ValueT                  (&/*values*/)[ITEMS_PER_THREAD],

        int                     (&/*ranks*/)[ITEMS_PER_THREAD],

        Int2Type<true>          /*is_keys_only*/,

        Int2Type<IS_BLOCKED>    /*is_blocked*/)

    {}


    template <int DESCENDING, int KEYS_ONLY>

    __device__ __forceinline__ void SortBlocked(

        KeyT                    (&keys)[ITEMS_PER_THREAD],

        ValueT                  (&values)[ITEMS_PER_THREAD],

        int                     begin_bit,

        int                     end_bit,

        Int2Type<DESCENDING>    is_descending,

        Int2Type<KEYS_ONLY>     is_keys_only)

    {

        UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD] =

            reinterpret_cast<UnsignedBits (&)[ITEMS_PER_THREAD]>(keys);


        // Twiddle bits if necessary

        #pragma unroll

        for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++)

        {

            unsigned_keys[KEY] = KeyTraits::TwiddleIn(unsigned_keys[KEY]);

        }


        // Radix sorting passes

        while (true)

        {

            int pass_bits = CUB_MIN(RADIX_BITS, end_bit - begin_bit);


            // Rank the blocked keys

            int ranks[ITEMS_PER_THREAD];

            RankKeys(unsigned_keys, ranks, begin_bit, pass_bits, is_descending);

            begin_bit += RADIX_BITS;


            CTA_SYNC();


            // Exchange keys through shared memory in blocked arrangement

            BlockExchangeKeys(temp_storage.exchange_keys).ScatterToBlocked(keys, ranks);


            // Exchange values through shared memory in blocked arrangement

            ExchangeValues(values, ranks, is_keys_only, Int2Type<true>());


            // Quit if done

            if (begin_bit >= end_bit) break;


            CTA_SYNC();

        }


        // Untwiddle bits if necessary

        #pragma unroll

        for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++)

        {

            unsigned_keys[KEY] = KeyTraits::TwiddleOut(unsigned_keys[KEY]);

        }

    }


public:


#ifndef DOXYGEN_SHOULD_SKIP_THIS    // Do not document


    template <int DESCENDING, int KEYS_ONLY>

    __device__ __forceinline__ void SortBlockedToStriped(

        KeyT                    (&keys)[ITEMS_PER_THREAD],

        ValueT                  (&values)[ITEMS_PER_THREAD],

        int                     begin_bit,

        int                     end_bit,

        Int2Type<DESCENDING>    is_descending,

        Int2Type<KEYS_ONLY>     is_keys_only)

    {

        UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD] =

            reinterpret_cast<UnsignedBits (&)[ITEMS_PER_THREAD]>(keys);


        // Twiddle bits if necessary

        #pragma unroll

        for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++)

        {

            unsigned_keys[KEY] = KeyTraits::TwiddleIn(unsigned_keys[KEY]);

        }


        // Radix sorting passes

        while (true)

        {

            int pass_bits = CUB_MIN(RADIX_BITS, end_bit - begin_bit);


            // Rank the blocked keys

            int ranks[ITEMS_PER_THREAD];

            RankKeys(unsigned_keys, ranks, begin_bit, pass_bits, is_descending);

            begin_bit += RADIX_BITS;


            CTA_SYNC();


            // Check if this is the last pass

            if (begin_bit >= end_bit)

            {

                // Last pass exchanges keys through shared memory in striped arrangement

                BlockExchangeKeys(temp_storage.exchange_keys).ScatterToStriped(keys, ranks);


                // Last pass exchanges through shared memory in striped arrangement

                ExchangeValues(values, ranks, is_keys_only, Int2Type<false>());


                // Quit

                break;

            }


            // Exchange keys through shared memory in blocked arrangement

            BlockExchangeKeys(temp_storage.exchange_keys).ScatterToBlocked(keys, ranks);


            // Exchange values through shared memory in blocked arrangement

            ExchangeValues(values, ranks, is_keys_only, Int2Type<true>());


            CTA_SYNC();

        }


        // Untwiddle bits if necessary

        #pragma unroll

        for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++)

        {

            unsigned_keys[KEY] = KeyTraits::TwiddleOut(unsigned_keys[KEY]);

        }

    }


#endif // DOXYGEN_SHOULD_SKIP_THIS


    struct TempStorage : Uninitialized<_TempStorage> {};


    /******************************************************************/


    __device__ __forceinline__ BlockRadixSort()

    :

        temp_storage(PrivateStorage()),

        linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))

    {}


    __device__ __forceinline__ BlockRadixSort(

        TempStorage &temp_storage)

    :

        temp_storage(temp_storage.Alias()),

        linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))

    {}


    /******************************************************************/


    __device__ __forceinline__ void Sort(

        KeyT    (&keys)[ITEMS_PER_THREAD],

        int     begin_bit   = 0,

        int     end_bit     = sizeof(KeyT) * 8)

    {

        NullType values[ITEMS_PER_THREAD];


        SortBlocked(keys, values, begin_bit, end_bit, Int2Type<false>(), Int2Type<KEYS_ONLY>());

    }


    __device__ __forceinline__ void Sort(

        KeyT    (&keys)[ITEMS_PER_THREAD],

        ValueT  (&values)[ITEMS_PER_THREAD],

        int     begin_bit   = 0,

        int     end_bit     = sizeof(KeyT) * 8)

    {

        SortBlocked(keys, values, begin_bit, end_bit, Int2Type<false>(), Int2Type<KEYS_ONLY>());

    }


    __device__ __forceinline__ void SortDescending(

        KeyT    (&keys)[ITEMS_PER_THREAD],

        int     begin_bit   = 0,

        int     end_bit     = sizeof(KeyT) * 8)

    {

        NullType values[ITEMS_PER_THREAD];


        SortBlocked(keys, values, begin_bit, end_bit, Int2Type<true>(), Int2Type<KEYS_ONLY>());

    }


    __device__ __forceinline__ void SortDescending(

        KeyT    (&keys)[ITEMS_PER_THREAD],

        ValueT  (&values)[ITEMS_PER_THREAD],

        int     begin_bit   = 0,

        int     end_bit     = sizeof(KeyT) * 8)

    {

        SortBlocked(keys, values, begin_bit, end_bit, Int2Type<true>(), Int2Type<KEYS_ONLY>());

    }


    /******************************************************************/


    __device__ __forceinline__ void SortBlockedToStriped(

        KeyT    (&keys)[ITEMS_PER_THREAD],

        int     begin_bit   = 0,

        int     end_bit     = sizeof(KeyT) * 8)

    {

        NullType values[ITEMS_PER_THREAD];


        SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type<false>(), Int2Type<KEYS_ONLY>());

    }


    __device__ __forceinline__ void SortBlockedToStriped(

        KeyT    (&keys)[ITEMS_PER_THREAD],

        ValueT  (&values)[ITEMS_PER_THREAD],

        int     begin_bit   = 0,

        int     end_bit     = sizeof(KeyT) * 8)

    {

        SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type<false>(), Int2Type<KEYS_ONLY>());

    }


    __device__ __forceinline__ void SortDescendingBlockedToStriped(

        KeyT    (&keys)[ITEMS_PER_THREAD],

        int     begin_bit   = 0,

        int     end_bit     = sizeof(KeyT) * 8)

    {

        NullType values[ITEMS_PER_THREAD];


        SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type<true>(), Int2Type<KEYS_ONLY>());

    }


    __device__ __forceinline__ void SortDescendingBlockedToStriped(

        KeyT    (&keys)[ITEMS_PER_THREAD],

        ValueT  (&values)[ITEMS_PER_THREAD],

        int     begin_bit   = 0,

        int     end_bit     = sizeof(KeyT) * 8)

    {

        SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type<true>(), Int2Type<KEYS_ONLY>());

    }


};


}               // CUB namespace

CUB_NS_POSTFIX  // Optional outer namespace(s)


block_exchange.cuh

block_radix_rank.cuh

cub::BlockExchange
The BlockExchange class provides collective methods for rearranging data partitioned across a CUDA th...
Definition block_exchange.cuh:117

cub::BlockExchange::ScatterToBlocked
__device__ __forceinline__ void ScatterToBlocked(InputT input_items[ITEMS_PER_THREAD], OutputT output_items[ITEMS_PER_THREAD], OffsetT ranks[ITEMS_PER_THREAD], Int2Type< false >)
Definition block_exchange.cuh:534

cub::BlockExchange::ScatterToStriped
__device__ __forceinline__ void ScatterToStriped(InputT input_items[ITEMS_PER_THREAD], OutputT output_items[ITEMS_PER_THREAD], OffsetT ranks[ITEMS_PER_THREAD], Int2Type< false >)
Definition block_exchange.cuh:616

cub::BlockRadixRank
BlockRadixRank provides operations for ranking unsigned integer types within a CUDA thread block.
Definition block_radix_rank.cuh:98

cub::BlockRadixRank::RankKeys
__device__ __forceinline__ void RankKeys(UnsignedBits(&keys)[KEYS_PER_THREAD], int(&ranks)[KEYS_PER_THREAD], int current_bit, int num_bits)
Rank keys.
Definition block_radix_rank.cuh:351

cub::BlockRadixSort
The BlockRadixSort class provides collective methods for sorting items partitioned across a CUDA thre...
Definition block_radix_sort.cuh:133

cub::BlockRadixSort::linear_tid
unsigned int linear_tid
Linear thread-id.
Definition block_radix_sort.cuh:203

cub::BlockRadixSort::ExchangeValues
__device__ __forceinline__ void ExchangeValues(ValueT(&values)[ITEMS_PER_THREAD], int(&ranks)[ITEMS_PER_THREAD], Int2Type< false >, Int2Type< true >)
ExchangeValues (specialized for key-value sort, to-blocked arrangement)
Definition block_radix_sort.cuh:247

cub::BlockRadixSort::RankKeys
__device__ __forceinline__ void RankKeys(UnsignedBits(&unsigned_keys)[ITEMS_PER_THREAD], int(&ranks)[ITEMS_PER_THREAD], int begin_bit, int pass_bits, Int2Type< false >)
Rank keys (specialized for ascending sort)
Definition block_radix_sort.cuh:217

cub::BlockRadixSort::temp_storage
_TempStorage & temp_storage
Shared storage reference.
Definition block_radix_sort.cuh:200

cub::BlockRadixSort::SortBlockedToStriped
__device__ __forceinline__ void SortBlockedToStriped(KeyT(&keys)[ITEMS_PER_THREAD], ValueT(&values)[ITEMS_PER_THREAD], int begin_bit, int end_bit, Int2Type< DESCENDING > is_descending, Int2Type< KEYS_ONLY > is_keys_only)
Sort blocked -> striped arrangement.
Definition block_radix_sort.cuh:339

cub::BlockRadixSort::PrivateStorage
__device__ __forceinline__ _TempStorage & PrivateStorage()
Internal storage allocator.
Definition block_radix_sort.cuh:210

cub::BlockRadixSort::SortDescending
__device__ __forceinline__ void SortDescending(KeyT(&keys)[ITEMS_PER_THREAD], ValueT(&values)[ITEMS_PER_THREAD], int begin_bit=0, int end_bit=sizeof(KeyT) *8)
Performs a descending block-wide radix sort across a blocked arrangement of keys and values.
Definition block_radix_sort.cuh:630

cub::BlockRadixSort::ExchangeValues
__device__ __forceinline__ void ExchangeValues(ValueT(&values)[ITEMS_PER_THREAD], int(&ranks)[ITEMS_PER_THREAD], Int2Type< false >, Int2Type< false >)
ExchangeValues (specialized for key-value sort, to-striped arrangement)
Definition block_radix_sort.cuh:260

cub::BlockRadixSort::SortDescending
__device__ __forceinline__ void SortDescending(KeyT(&keys)[ITEMS_PER_THREAD], int begin_bit=0, int end_bit=sizeof(KeyT) *8)
Performs a descending block-wide radix sort over a blocked arrangement of keys.
Definition block_radix_sort.cuh:575

cub::BlockRadixSort::SortDescendingBlockedToStriped
__device__ __forceinline__ void SortDescendingBlockedToStriped(KeyT(&keys)[ITEMS_PER_THREAD], int begin_bit=0, int end_bit=sizeof(KeyT) *8)
Performs a descending radix sort across a blocked arrangement of keys, leaving them in a striped arra...
Definition block_radix_sort.cuh:788

cub::BlockRadixSort::DescendingBlockRadixRank
BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, true, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > DescendingBlockRadixRank
Descending BlockRadixRank utility type.
Definition block_radix_sort.cuh:177

cub::BlockRadixSort::Sort
__device__ __forceinline__ void Sort(KeyT(&keys)[ITEMS_PER_THREAD], ValueT(&values)[ITEMS_PER_THREAD], int begin_bit=0, int end_bit=sizeof(KeyT) *8)
Performs an ascending block-wide radix sort across a blocked arrangement of keys and values.
Definition block_radix_sort.cuh:529

cub::BlockRadixSort::ExchangeValues
__device__ __forceinline__ void ExchangeValues(ValueT(&)[ITEMS_PER_THREAD], int(&)[ITEMS_PER_THREAD], Int2Type< true >, Int2Type< IS_BLOCKED >)
ExchangeValues (specialized for keys-only sort)
Definition block_radix_sort.cuh:274

cub::BlockRadixSort::SortBlocked
__device__ __forceinline__ void SortBlocked(KeyT(&keys)[ITEMS_PER_THREAD], ValueT(&values)[ITEMS_PER_THREAD], int begin_bit, int end_bit, Int2Type< DESCENDING > is_descending, Int2Type< KEYS_ONLY > is_keys_only)
Sort blocked arrangement.
Definition block_radix_sort.cuh:283

cub::BlockRadixSort::BlockRadixSort
__device__ __forceinline__ BlockRadixSort()
Collective constructor using a private static allocation of shared memory as temporary storage.
Definition block_radix_sort.cuh:413

cub::BlockRadixSort::RankKeys
__device__ __forceinline__ void RankKeys(UnsignedBits(&unsigned_keys)[ITEMS_PER_THREAD], int(&ranks)[ITEMS_PER_THREAD], int begin_bit, int pass_bits, Int2Type< true >)
Rank keys (specialized for descending sort)
Definition block_radix_sort.cuh:232

cub::BlockRadixSort::BlockExchangeKeys
BlockExchange< KeyT, BLOCK_DIM_X, ITEMS_PER_THREAD, false, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > BlockExchangeKeys
BlockExchange utility type for keys.
Definition block_radix_sort.cuh:180

cub::BlockRadixSort::SortDescendingBlockedToStriped
__device__ __forceinline__ void SortDescendingBlockedToStriped(KeyT(&keys)[ITEMS_PER_THREAD], ValueT(&values)[ITEMS_PER_THREAD], int begin_bit=0, int end_bit=sizeof(KeyT) *8)
Performs a descending radix sort across a blocked arrangement of keys and values, leaving them in a s...
Definition block_radix_sort.cuh:843

cub::BlockRadixSort::Sort
__device__ __forceinline__ void Sort(KeyT(&keys)[ITEMS_PER_THREAD], int begin_bit=0, int end_bit=sizeof(KeyT) *8)
Performs an ascending block-wide radix sort over a blocked arrangement of keys.
Definition block_radix_sort.cuh:474

cub::BlockRadixSort::SortBlockedToStriped
__device__ __forceinline__ void SortBlockedToStriped(KeyT(&keys)[ITEMS_PER_THREAD], ValueT(&values)[ITEMS_PER_THREAD], int begin_bit=0, int end_bit=sizeof(KeyT) *8)
Performs an ascending radix sort across a blocked arrangement of keys and values, leaving them in a s...
Definition block_radix_sort.cuh:740

cub::BlockRadixSort::SortBlockedToStriped
__device__ __forceinline__ void SortBlockedToStriped(KeyT(&keys)[ITEMS_PER_THREAD], int begin_bit=0, int end_bit=sizeof(KeyT) *8)
Performs an ascending radix sort across a blocked arrangement of keys, leaving them in a striped arra...
Definition block_radix_sort.cuh:685

cub::BlockRadixSort::BlockExchangeValues
BlockExchange< ValueT, BLOCK_DIM_X, ITEMS_PER_THREAD, false, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > BlockExchangeValues
BlockExchange utility type for values.
Definition block_radix_sort.cuh:183

cub::BlockRadixSort::AscendingBlockRadixRank
BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, false, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > AscendingBlockRadixRank
Ascending BlockRadixRank utility type.
Definition block_radix_sort.cuh:164

cub::BlockRadixSort::BlockRadixSort
__device__ __forceinline__ BlockRadixSort(TempStorage &temp_storage)
Collective constructor using the specified memory allocation as temporary storage.
Definition block_radix_sort.cuh:423

CUB_MIN
#define CUB_MIN(a, b)
Select minimum(a, b)
Definition util_macro.cuh:66

cub::RowMajorTid
__device__ __forceinline__ int RowMajorTid(int block_dim_x, int block_dim_y, int block_dim_z)
Returns the row-major linear thread identifier for a multidimensional thread block.
Definition util_ptx.cuh:409

cub::CTA_SYNC
CTA_SYNC()
Definition util_ptx.cuh:255

cub
Optional outer namespace(s)
Definition agent_histogram.cuh:48

cub::end_bit
KeyT const ValueT ValueT OffsetT int int end_bit
< [in] The past-the-end (most-significant) bit index needed for key comparison
Definition dispatch_radix_sort.cuh:220

cub::int
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
Definition dispatch_radix_sort.cuh:336

cub::BlockScanAlgorithm
BlockScanAlgorithm
BlockScanAlgorithm enumerates alternative algorithms for cub::BlockScan to compute a parallel prefix ...
Definition block_scan.cuh:58

cub::BLOCK_SCAN_WARP_SCANS
@ BLOCK_SCAN_WARP_SCANS
Definition block_scan.cuh:108

cub::pass_bits
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int int pass_bits
< [in] Number of bits of current radix digit
Definition dispatch_radix_sort.cuh:339

cub::BlockExchange::TempStorage
\smemstorage{BlockExchange}
Definition block_exchange.cuh:165

cub::BlockRadixRank::TempStorage
\smemstorage{BlockScan}
Definition block_radix_rank.cuh:310

cub::BlockRadixSort::TempStorage
\smemstorage{BlockRadixSort}
Definition block_radix_sort.cuh:402

cub::Equals
Type equality test.
Definition util_type.cuh:99

cub::Int2Type
Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...
Definition util_type.cuh:276

cub::NullType
A simple "NULL" marker type.
Definition util_type.cuh:257

cub::Traits
Type traits.
Definition util_type.cuh:1158

cub::Uninitialized
A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions.
Definition util_type.cuh:635

cub::BlockRadixSort::_TempStorage
Shared memory storage layout type.
Definition block_radix_sort.cuh:187

CUB_PTX_ARCH
#define CUB_PTX_ARCH
CUB_PTX_ARCH reflects the PTX version targeted by the active compiler pass (or zero during the host p...
Definition util_arch.cuh:53