OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
grid_queue.cuh
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
34 #pragma once
35 
36 #include "../util_namespace.cuh"
37 #include "../util_debug.cuh"
38 
40 CUB_NS_PREFIX
41 
43 namespace cub {
44 
45 
81 template <typename OffsetT>
82 class GridQueue
83 {
84 private:
85 
87  enum
88  {
89  FILL = 0,
90  DRAIN = 1,
91  };
92 
95 
96 public:
97 
99  __host__ __device__ __forceinline__
100  static size_t AllocationSize()
101  {
102  return sizeof(OffsetT) * 2;
103  }
104 
105 
107  __host__ __device__ __forceinline__ GridQueue()
108  :
109  d_counters(NULL)
110  {}
111 
112 
114  __host__ __device__ __forceinline__ GridQueue(
115  void *d_storage)
116  :
117  d_counters((OffsetT*) d_storage)
118  {}
119 
120 
122  __host__ __device__ __forceinline__ cudaError_t FillAndResetDrain(
123  OffsetT fill_size,
124  cudaStream_t stream = 0)
125  {
126 #if (CUB_PTX_ARCH > 0)
127  (void)stream;
128  d_counters[FILL] = fill_size;
129  d_counters[DRAIN] = 0;
130  return cudaSuccess;
131 #else
132  OffsetT counters[2];
133  counters[FILL] = fill_size;
134  counters[DRAIN] = 0;
135  return CubDebug(cudaMemcpyAsync(d_counters, counters, sizeof(OffsetT) * 2, cudaMemcpyHostToDevice, stream));
136 #endif
137  }
138 
139 
141  __host__ __device__ __forceinline__ cudaError_t ResetDrain(cudaStream_t stream = 0)
142  {
143 #if (CUB_PTX_ARCH > 0)
144  (void)stream;
145  d_counters[DRAIN] = 0;
146  return cudaSuccess;
147 #else
148  return CubDebug(cudaMemsetAsync(d_counters + DRAIN, 0, sizeof(OffsetT), stream));
149 #endif
150  }
151 
152 
154  __host__ __device__ __forceinline__ cudaError_t ResetFill(cudaStream_t stream = 0)
155  {
156 #if (CUB_PTX_ARCH > 0)
157  (void)stream;
158  d_counters[FILL] = 0;
159  return cudaSuccess;
160 #else
161  return CubDebug(cudaMemsetAsync(d_counters + FILL, 0, sizeof(OffsetT), stream));
162 #endif
163  }
164 
165 
167  __host__ __device__ __forceinline__ cudaError_t FillSize(
168  OffsetT &fill_size,
169  cudaStream_t stream = 0)
170  {
171 #if (CUB_PTX_ARCH > 0)
172  (void)stream;
173  fill_size = d_counters[FILL];
174  return cudaSuccess;
175 #else
176  return CubDebug(cudaMemcpyAsync(&fill_size, d_counters + FILL, sizeof(OffsetT), cudaMemcpyDeviceToHost, stream));
177 #endif
178  }
179 
180 
182  __device__ __forceinline__ OffsetT Drain(OffsetT num_items)
183  {
184  return atomicAdd(d_counters + DRAIN, num_items);
185  }
186 
187 
189  __device__ __forceinline__ OffsetT Fill(OffsetT num_items)
190  {
191  return atomicAdd(d_counters + FILL, num_items);
192  }
193 };
194 
195 
196 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
197 
198 
202 template <typename OffsetT>
203 __global__ void FillAndResetDrainKernel(
204  GridQueue<OffsetT> grid_queue,
206 {
207  grid_queue.FillAndResetDrain(num_items);
208 }
209 
210 
211 
212 #endif // DOXYGEN_SHOULD_SKIP_THIS
213 
214  // end group GridModule
216 
217 } // CUB namespace
218 CUB_NS_POSTFIX // Optional outer namespace(s)
219 
220 
__host__ __device__ __forceinline__ cudaError_t FillAndResetDrain(OffsetT fill_size, cudaStream_t stream=0)
This operation sets the fill-size and resets the drain counter, preparing the GridQueue for draining ...
Definition: grid_queue.cuh:122
__host__ __device__ __forceinline__ GridQueue()
Constructs an invalid GridQueue descriptor.
Definition: grid_queue.cuh:107
KeyT const ValueT ValueT OffsetT OffsetT num_items
[in] Total number of input data items
Optional outer namespace(s)
__device__ __forceinline__ OffsetT Fill(OffsetT num_items)
Fill num_items into the queue. Returns offset from which to write items. To be called from CUDA kerne...
Definition: grid_queue.cuh:189
__host__ __device__ static __forceinline__ size_t AllocationSize()
Returns the device allocation size in bytes needed to construct a GridQueue instance.
Definition: grid_queue.cuh:100
__host__ __device__ __forceinline__ cudaError_t ResetFill(cudaStream_t stream=0)
This operation resets the fill counter. To be called by the host or by a kernel prior to that which w...
Definition: grid_queue.cuh:154
OffsetT OffsetT
[in] Total number of input data items
__host__ __device__ __forceinline__ cudaError_t ResetDrain(cudaStream_t stream=0)
This operation resets the drain so that it may advance to meet the existing fill-size....
Definition: grid_queue.cuh:141
__device__ __forceinline__ OffsetT Drain(OffsetT num_items)
Drain num_items from the queue. Returns offset from which to read items. To be called from CUDA kerne...
Definition: grid_queue.cuh:182
OffsetT * d_counters
Pair of counters.
Definition: grid_queue.cuh:94
__host__ __device__ __forceinline__ GridQueue(void *d_storage)
Constructs a GridQueue descriptor around the device storage allocation.
Definition: grid_queue.cuh:114
#define CubDebug(e)
Debug macro.
Definition: util_debug.cuh:94
__global__ void FillAndResetDrainKernel(GridQueue< OffsetT > grid_queue, OffsetT num_items)
Definition: grid_queue.cuh:203
GridQueue is a descriptor utility for dynamic queue management.
Definition: grid_queue.cuh:82
__host__ __device__ __forceinline__ cudaError_t FillSize(OffsetT &fill_size, cudaStream_t stream=0)
Returns the fill-size established by the parent or by the previous kernel.
Definition: grid_queue.cuh:167