OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
util_arch.cuh
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
34 #pragma once
35 
36 #include "util_namespace.cuh"
37 
39 CUB_NS_PREFIX
40 
42 namespace cub {
43 
44 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
45 
46 #if (__CUDACC_VER_MAJOR__ >= 9) && !defined(CUB_USE_COOPERATIVE_GROUPS)
47  #define CUB_USE_COOPERATIVE_GROUPS
48 #endif
49 
51 #ifndef CUB_PTX_ARCH
52  #ifndef __CUDA_ARCH__
53  #define CUB_PTX_ARCH 0
54  #else
55  #define CUB_PTX_ARCH __CUDA_ARCH__
56  #endif
57 #endif
58 
59 
61 #ifndef CUB_RUNTIME_FUNCTION
62  #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__>= 350 && defined(__CUDACC_RDC__))
63  #define CUB_RUNTIME_ENABLED
64  #define CUB_RUNTIME_FUNCTION __host__ __device__
65  #else
66  #define CUB_RUNTIME_FUNCTION __host__
67  #endif
68 #endif
69 
70 
72 #ifndef CUB_LOG_WARP_THREADS
73  #define CUB_LOG_WARP_THREADS(arch) \
74  (5)
75  #define CUB_WARP_THREADS(arch) \
76  (1 << CUB_LOG_WARP_THREADS(arch))
77 
78  #define CUB_PTX_WARP_THREADS CUB_WARP_THREADS(CUB_PTX_ARCH)
79  #define CUB_PTX_LOG_WARP_THREADS CUB_LOG_WARP_THREADS(CUB_PTX_ARCH)
80 #endif
81 
82 
84 #ifndef CUB_LOG_SMEM_BANKS
85  #define CUB_LOG_SMEM_BANKS(arch) \
86  ((arch >= 200) ? \
87  (5) : \
88  (4))
89  #define CUB_SMEM_BANKS(arch) \
90  (1 << CUB_LOG_SMEM_BANKS(arch))
91 
92  #define CUB_PTX_LOG_SMEM_BANKS CUB_LOG_SMEM_BANKS(CUB_PTX_ARCH)
93  #define CUB_PTX_SMEM_BANKS CUB_SMEM_BANKS(CUB_PTX_ARCH)
94 #endif
95 
96 
98 #ifndef CUB_SUBSCRIPTION_FACTOR
99  #define CUB_SUBSCRIPTION_FACTOR(arch) \
100  ((arch >= 300) ? \
101  (5) : \
102  ((arch >= 200) ? \
103  (3) : \
104  (10)))
105  #define CUB_PTX_SUBSCRIPTION_FACTOR CUB_SUBSCRIPTION_FACTOR(CUB_PTX_ARCH)
106 #endif
107 
108 
110 #ifndef CUB_PREFER_CONFLICT_OVER_PADDING
111  #define CUB_PREFER_CONFLICT_OVER_PADDING(arch) \
112  ((arch >= 300) ? \
113  (1) : \
114  (4))
115  #define CUB_PTX_PREFER_CONFLICT_OVER_PADDING CUB_PREFER_CONFLICT_OVER_PADDING(CUB_PTX_ARCH)
116 #endif
117 
118 
120 #ifndef CUB_SCALED_BLOCK_THREADS
121  #define CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \
122  (CUB_MIN( \
123  NOMINAL_4B_BLOCK_THREADS, \
124  CUB_WARP_THREADS(PTX_ARCH) * CUB_MAX( \
125  2, \
126  (NOMINAL_4B_BLOCK_THREADS / CUB_WARP_THREADS(PTX_ARCH)) * 4 / sizeof(T))))
127 #endif
128 
130 #ifndef CUB_SCALED_ITEMS_PER_THREAD
131  #define CUB_SCALED_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \
132  CUB_MAX( \
133  1, \
134  (sizeof(T) < 4) ? \
135  ((NOMINAL_4B_ITEMS_PER_THREAD * NOMINAL_4B_BLOCK_THREADS * 4) / CUB_MAX(4, sizeof(T))) / CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) / 2 : \
136  ((NOMINAL_4B_ITEMS_PER_THREAD * NOMINAL_4B_BLOCK_THREADS * 4) / CUB_MAX(4, sizeof(T))) / CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH))
137 #endif
138 
140 #ifndef CUB_SCALED_GRANULARITIES
141  #define CUB_SCALED_GRANULARITIES(NOMINAL_4B_BLOCK_THREADS, NOMINAL_4B_ITEMS_PER_THREAD, T) \
142  CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, 200), \
143  CUB_SCALED_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, 200)
144 #endif
145 
146 
147 
148 #endif // Do not document
149 
150 } // CUB namespace
151 CUB_NS_POSTFIX // Optional outer namespace(s)
Optional outer namespace(s)