doxygen/openfpm/grid__queue_8cuh_source.html

/******************************************************************************

 * Copyright (c) 2011, Duane Merrill.  All rights reserved.

 * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *     * Redistributions of source code must retain the above copyright

 *       notice, this list of conditions and the following disclaimer.

 *     * Redistributions in binary form must reproduce the above copyright

 *       notice, this list of conditions and the following disclaimer in the

 *       documentation and/or other materials provided with the distribution.

 *     * Neither the name of the NVIDIA CORPORATION nor the

 *       names of its contributors may be used to endorse or promote products

 *       derived from this software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND

 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY

 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES

 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 *

 ******************************************************************************/


#pragma once


#include "../util_namespace.cuh"

#include "../util_debug.cuh"


CUB_NS_PREFIX


namespace cub {


template <typename OffsetT>

class GridQueue

{

private:


    enum

    {

        FILL    = 0,

        DRAIN   = 1,

    };


    OffsetT *d_counters;


public:


    __host__ __device__ __forceinline__

    static size_t AllocationSize()

    {

        return sizeof(OffsetT) * 2;

    }


    __host__ __device__ __forceinline__ GridQueue()

    :

        d_counters(NULL)

    {}


    __host__ __device__ __forceinline__ GridQueue(

        void *d_storage)

    :

        d_counters((OffsetT*) d_storage)

    {}


    __host__ __device__ __forceinline__ cudaError_t FillAndResetDrain(

        OffsetT fill_size,

        cudaStream_t stream = 0)

    {

#if (CUB_PTX_ARCH > 0)

        (void)stream;

        d_counters[FILL] = fill_size;

        d_counters[DRAIN] = 0;

        return cudaSuccess;

#else

        OffsetT counters[2];

        counters[FILL] = fill_size;

        counters[DRAIN] = 0;

        return CubDebug(cudaMemcpyAsync(d_counters, counters, sizeof(OffsetT) * 2, cudaMemcpyHostToDevice, stream));

#endif

    }


    __host__ __device__ __forceinline__ cudaError_t ResetDrain(cudaStream_t stream = 0)

    {

#if (CUB_PTX_ARCH > 0)

        (void)stream;

        d_counters[DRAIN] = 0;

        return cudaSuccess;

#else

        return CubDebug(cudaMemsetAsync(d_counters + DRAIN, 0, sizeof(OffsetT), stream));

#endif

    }


    __host__ __device__ __forceinline__ cudaError_t ResetFill(cudaStream_t stream = 0)

    {

#if (CUB_PTX_ARCH > 0)

        (void)stream;

        d_counters[FILL] = 0;

        return cudaSuccess;

#else

        return CubDebug(cudaMemsetAsync(d_counters + FILL, 0, sizeof(OffsetT), stream));

#endif

    }


    __host__ __device__ __forceinline__ cudaError_t FillSize(

        OffsetT &fill_size,

        cudaStream_t stream = 0)

    {

#if (CUB_PTX_ARCH > 0)

        (void)stream;

        fill_size = d_counters[FILL];

        return cudaSuccess;

#else

        return CubDebug(cudaMemcpyAsync(&fill_size, d_counters + FILL, sizeof(OffsetT), cudaMemcpyDeviceToHost, stream));

#endif

    }


    __device__ __forceinline__ OffsetT Drain(OffsetT num_items)

    {

        return atomicAdd(d_counters + DRAIN, num_items);

    }


    __device__ __forceinline__ OffsetT Fill(OffsetT num_items)

    {

        return atomicAdd(d_counters + FILL, num_items);

    }

};


#ifndef DOXYGEN_SHOULD_SKIP_THIS    // Do not document


template <typename OffsetT>

__global__ void FillAndResetDrainKernel(

    GridQueue<OffsetT>   grid_queue,

    OffsetT              num_items)

{

    grid_queue.FillAndResetDrain(num_items);

}


#endif // DOXYGEN_SHOULD_SKIP_THIS


       // end group GridModule


}               // CUB namespace

CUB_NS_POSTFIX  // Optional outer namespace(s)


cub::GridQueue
GridQueue is a descriptor utility for dynamic queue management.
Definition grid_queue.cuh:83

cub::GridQueue::ResetFill
__host__ __device__ __forceinline__ cudaError_t ResetFill(cudaStream_t stream=0)
This operation resets the fill counter. To be called by the host or by a kernel prior to that which w...
Definition grid_queue.cuh:154

cub::GridQueue::Fill
__device__ __forceinline__ OffsetT Fill(OffsetT num_items)
Fill num_items into the queue. Returns offset from which to write items. To be called from CUDA kerne...
Definition grid_queue.cuh:189

cub::GridQueue::ResetDrain
__host__ __device__ __forceinline__ cudaError_t ResetDrain(cudaStream_t stream=0)
This operation resets the drain so that it may advance to meet the existing fill-size....
Definition grid_queue.cuh:141

cub::GridQueue::FillSize
__host__ __device__ __forceinline__ cudaError_t FillSize(OffsetT &fill_size, cudaStream_t stream=0)
Returns the fill-size established by the parent or by the previous kernel.
Definition grid_queue.cuh:167

cub::GridQueue::d_counters
OffsetT * d_counters
Pair of counters.
Definition grid_queue.cuh:94

cub::GridQueue::GridQueue
__host__ __device__ __forceinline__ GridQueue()
Constructs an invalid GridQueue descriptor.
Definition grid_queue.cuh:107

cub::GridQueue::Drain
__device__ __forceinline__ OffsetT Drain(OffsetT num_items)
Drain num_items from the queue. Returns offset from which to read items. To be called from CUDA kerne...
Definition grid_queue.cuh:182

cub::GridQueue::FillAndResetDrain
__host__ __device__ __forceinline__ cudaError_t FillAndResetDrain(OffsetT fill_size, cudaStream_t stream=0)
This operation sets the fill-size and resets the drain counter, preparing the GridQueue for draining ...
Definition grid_queue.cuh:122

cub::GridQueue::GridQueue
__host__ __device__ __forceinline__ GridQueue(void *d_storage)
Constructs a GridQueue descriptor around the device storage allocation.
Definition grid_queue.cuh:114

cub::GridQueue::AllocationSize
__host__ __device__ static __forceinline__ size_t AllocationSize()
Returns the device allocation size in bytes needed to construct a GridQueue instance.
Definition grid_queue.cuh:100

cub::FillAndResetDrainKernel
__global__ void FillAndResetDrainKernel(GridQueue< OffsetT > grid_queue, OffsetT num_items)
Definition grid_queue.cuh:203

CubDebug
#define CubDebug(e)
Debug macro.
Definition util_debug.cuh:94

cub
Optional outer namespace(s)
Definition agent_histogram.cuh:48

cub::num_items
KeyT const ValueT ValueT OffsetT OffsetT num_items
[in] Total number of input data items
Definition dispatch_radix_sort.cuh:168

cub::OffsetT
OffsetT OffsetT
[in] Total number of input data items
Definition dispatch_radix_sort.cuh:75