doxygen/openfpm/grid__even__share_8cuh_source.html

/******************************************************************************

 * Copyright (c) 2011, Duane Merrill.  All rights reserved.

 * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *     * Redistributions of source code must retain the above copyright

 *       notice, this list of conditions and the following disclaimer.

 *     * Redistributions in binary form must reproduce the above copyright

 *       notice, this list of conditions and the following disclaimer in the

 *       documentation and/or other materials provided with the distribution.

 *     * Neither the name of the NVIDIA CORPORATION nor the

 *       names of its contributors may be used to endorse or promote products

 *       derived from this software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND

 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY

 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES

 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 *

 ******************************************************************************/


#pragma once


#include "../util_namespace.cuh"

#include "../util_macro.cuh"

#include "grid_mapping.cuh"


CUB_NS_PREFIX


namespace cub {


template <typename OffsetT>

struct GridEvenShare

{

private:


    OffsetT     total_tiles;

    int         big_shares;

    OffsetT     big_share_items;

    OffsetT     normal_share_items;

    OffsetT     normal_base_offset;


public:


    OffsetT     num_items;


    int         grid_size;


    OffsetT     block_offset;


    OffsetT     block_end;


    OffsetT     block_stride;


    __host__ __device__ __forceinline__ GridEvenShare() :

        total_tiles(0),

        big_shares(0),

        big_share_items(0),

        normal_share_items(0),

        normal_base_offset(0),

        num_items(0),

        grid_size(0),

        block_offset(0),

        block_end(0),

        block_stride(0)

    {}


    __host__ __device__ __forceinline__ void DispatchInit(

        OffsetT num_items,

        int     max_grid_size,

        int     tile_items)

    {

        this->block_offset          = num_items;    // Initialize past-the-end

        this->block_end             = num_items;    // Initialize past-the-end

        this->num_items             = num_items;

        this->total_tiles           = (num_items + tile_items - 1) / tile_items;

        this->grid_size             = CUB_MIN(total_tiles, max_grid_size);

        OffsetT avg_tiles_per_block = total_tiles / grid_size;

        this->big_shares            = total_tiles - (avg_tiles_per_block * grid_size);        // leftover grains go to big blocks

        this->normal_share_items    = avg_tiles_per_block * tile_items;

        this->normal_base_offset    = big_shares * tile_items;

        this->big_share_items       = normal_share_items + tile_items;

    }


    template <int TILE_ITEMS>

    __device__ __forceinline__ void BlockInit(

        int block_id,

        Int2Type<GRID_MAPPING_RAKE> /*strategy_tag*/)

    {

        block_stride = TILE_ITEMS;

        if (block_id < big_shares)

        {

            // This thread block gets a big share of grains (avg_tiles_per_block + 1)

            block_offset = (block_id * big_share_items);

            block_end = block_offset + big_share_items;

        }

        else if (block_id < total_tiles)

        {

            // This thread block gets a normal share of grains (avg_tiles_per_block)

            block_offset = normal_base_offset + (block_id * normal_share_items);

            block_end = CUB_MIN(num_items, block_offset + normal_share_items);

        }

        // Else default past-the-end

    }


    template <int TILE_ITEMS>

    __device__ __forceinline__ void BlockInit(

        int block_id,

        Int2Type<GRID_MAPPING_STRIP_MINE> /*strategy_tag*/)

    {

        block_stride = grid_size * TILE_ITEMS;

        block_offset = (block_id * TILE_ITEMS);

        block_end = num_items;

    }


    template <

        int TILE_ITEMS,

        GridMappingStrategy STRATEGY>

    __device__ __forceinline__ void BlockInit()

    {

        BlockInit<TILE_ITEMS>(blockIdx.x, Int2Type<STRATEGY>());

    }


    template <int TILE_ITEMS>

    __device__ __forceinline__ void BlockInit(

        OffsetT block_offset,

        OffsetT block_end)

    {

        this->block_offset = block_offset;

        this->block_end = block_end;

        this->block_stride = TILE_ITEMS;

    }


};


       // end group GridModule


}               // CUB namespace

CUB_NS_POSTFIX  // Optional outer namespace(s)

grid_mapping.cuh

cub::GridMappingStrategy
GridMappingStrategy
cub::GridMappingStrategy enumerates alternative strategies for mapping constant-sized tiles of device...
Definition grid_mapping.cuh:60

CUB_MIN
#define CUB_MIN(a, b)
Select minimum(a, b)
Definition util_macro.cuh:66

cub
Optional outer namespace(s)
Definition agent_histogram.cuh:48

cub::OffsetT
OffsetT OffsetT
[in] Total number of input data items
Definition dispatch_radix_sort.cuh:75

block_offset
Definition SparseGridGpu_ker.cuh:14

cub::GridEvenShare
GridEvenShare is a descriptor utility for distributing input among CUDA thread blocks in an "even-sha...
Definition grid_even_share.cuh:75

cub::GridEvenShare::GridEvenShare
__host__ __device__ __forceinline__ GridEvenShare()
Constructor.
Definition grid_even_share.cuh:105

cub::GridEvenShare::grid_size
int grid_size
Grid size in thread blocks.
Definition grid_even_share.cuh:90

cub::GridEvenShare::BlockInit
__device__ __forceinline__ void BlockInit(int block_id, Int2Type< GRID_MAPPING_STRIP_MINE >)
Block-initialization, specialized for a "raking" access pattern in which each thread block is assigne...
Definition grid_even_share.cuh:173

cub::GridEvenShare::block_end
OffsetT block_end
OffsetT into input of marking the end (one-past) of the owning thread block's segment of input tiles.
Definition grid_even_share.cuh:96

cub::GridEvenShare::block_offset
OffsetT block_offset
OffsetT into input marking the beginning of the owning thread block's segment of input tiles.
Definition grid_even_share.cuh:93

cub::GridEvenShare::block_stride
OffsetT block_stride
Stride between input tiles.
Definition grid_even_share.cuh:99

cub::GridEvenShare::BlockInit
__device__ __forceinline__ void BlockInit()
Block-initialization, specialized for "strip mining" access pattern in which the input tiles assigned...
Definition grid_even_share.cuh:191

cub::GridEvenShare::BlockInit
__device__ __forceinline__ void BlockInit(OffsetT block_offset, OffsetT block_end)
Block-initialization, specialized for a "raking" access pattern in which each thread block is assigne...
Definition grid_even_share.cuh:203

cub::GridEvenShare::num_items
OffsetT num_items
Total number of input items.
Definition grid_even_share.cuh:87

cub::GridEvenShare::BlockInit
__device__ __forceinline__ void BlockInit(int block_id, Int2Type< GRID_MAPPING_RAKE >)
Initializes ranges for the specified thread block index. Specialized for a "raking" access pattern in...
Definition grid_even_share.cuh:146

cub::GridEvenShare::DispatchInit
__host__ __device__ __forceinline__ void DispatchInit(OffsetT num_items, int max_grid_size, int tile_items)
Dispatch initializer. To be called prior prior to kernel launch.
Definition grid_even_share.cuh:122

cub::Int2Type
Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...
Definition util_type.cuh:276