OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
grid_barrier.cuh
Go to the documentation of this file.
1/******************************************************************************
2 * Copyright (c) 2011, Duane Merrill. All rights reserved.
3 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of the NVIDIA CORPORATION nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 ******************************************************************************/
28
34#pragma once
35
36#include "../util_debug.cuh"
37#include "../util_namespace.cuh"
38#include "../thread/thread_load.cuh"
39
41CUB_NS_PREFIX
42
44namespace cub {
45
46
57{
58protected :
59
60 typedef unsigned int SyncFlag;
61
62 // Counters in global device memory
63 SyncFlag* d_sync;
64
65public:
66
70 GridBarrier() : d_sync(NULL) {}
71
72
76 __device__ __forceinline__ void Sync() const
77 {
78 volatile SyncFlag *d_vol_sync = d_sync;
79
80 // Threadfence and syncthreads to make sure global writes are visible before
81 // thread-0 reports in with its sync counter
82 __threadfence();
83 CTA_SYNC();
84
85 if (blockIdx.x == 0)
86 {
87 // Report in ourselves
88 if (threadIdx.x == 0)
89 {
90 d_vol_sync[blockIdx.x] = 1;
91 }
92
93 CTA_SYNC();
94
95 // Wait for everyone else to report in
96 for (int peer_block = threadIdx.x; peer_block < gridDim.x; peer_block += blockDim.x)
97 {
98 while (ThreadLoad<LOAD_CG>(d_sync + peer_block) == 0)
99 {
100 __threadfence_block();
101 }
102 }
103
104 CTA_SYNC();
105
106 // Let everyone know it's safe to proceed
107 for (int peer_block = threadIdx.x; peer_block < gridDim.x; peer_block += blockDim.x)
108 {
109 d_vol_sync[peer_block] = 0;
110 }
111 }
112 else
113 {
114 if (threadIdx.x == 0)
115 {
116 // Report in
117 d_vol_sync[blockIdx.x] = 1;
118
119 // Wait for acknowledgment
120 while (ThreadLoad<LOAD_CG>(d_sync + blockIdx.x) == 1)
121 {
122 __threadfence_block();
123 }
124 }
125
126 CTA_SYNC();
127 }
128 }
129};
130
131
139{
140protected:
141
142 // Number of bytes backed by d_sync
143 size_t sync_bytes;
144
145public:
146
150 GridBarrierLifetime() : GridBarrier(), sync_bytes(0) {}
151
152
156 cudaError_t HostReset()
157 {
158 cudaError_t retval = cudaSuccess;
159 if (d_sync)
160 {
161 CubDebug(retval = cudaFree(d_sync));
162 d_sync = NULL;
163 }
164 sync_bytes = 0;
165 return retval;
166 }
167
168
173 {
174 HostReset();
175 }
176
177
182 cudaError_t Setup(int sweep_grid_size)
183 {
184 cudaError_t retval = cudaSuccess;
185 do {
186 size_t new_sync_bytes = sweep_grid_size * sizeof(SyncFlag);
187 if (new_sync_bytes > sync_bytes)
188 {
189 if (d_sync)
190 {
191 if (CubDebug(retval = cudaFree(d_sync))) break;
192 }
193
194 sync_bytes = new_sync_bytes;
195
196 // Allocate and initialize to zero
197 if (CubDebug(retval = cudaMalloc((void**) &d_sync, sync_bytes))) break;
198 if (CubDebug(retval = cudaMemset(d_sync, 0, new_sync_bytes))) break;
199 }
200 } while (0);
201
202 return retval;
203 }
204};
205
206 // end group GridModule
208
209} // CUB namespace
210CUB_NS_POSTFIX // Optional outer namespace(s)
211
GridBarrierLifetime extends GridBarrier to provide lifetime management of the temporary device storag...
cudaError_t Setup(int sweep_grid_size)
GridBarrier implements a software global barrier among thread blocks within a CUDA grid.
__device__ __forceinline__ void Sync() const
#define CubDebug(e)
Debug macro.
CTA_SYNC()
Definition util_ptx.cuh:255
Optional outer namespace(s)