OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
cudify_cuda.hpp
1#ifndef __CUDIFY_CUDA_HPP__
2#define __CUDIFY_CUDA_HPP__
3
4#define CUDA_ON_BACKEND CUDA_BACKEND_CUDA
5
6constexpr int default_kernel_wg_threads_ = 1024;
7
8#if CUDART_VERSION >= 11000 && defined(__NVCC__)
9 #include "cub/util_type.cuh"
10 #include "cub/block/block_scan.cuh"
11#endif
12
13#ifdef __NVCC__
14#include "operators.hpp"
15
16#ifndef GPU_HOST_DEVICE
17 #define GPU_HOST_DEVICE __forceinline__ __device__ __host__
18#endif
19
20template<typename lambda_f>
21__global__ void kernel_launch_lambda(lambda_f f)
22{
23 dim3 bid = blockIdx;
24 dim3 tid = threadIdx;
25 f(bid,tid);
26}
27
28template<typename lambda_f>
29__global__ void kernel_launch_lambda_tls(lambda_f f)
30{
31 f();
32}
33
34
35#endif
36
37static void init_wrappers()
38{}
39
40#if defined(SE_CLASS1) || defined(CUDA_CHECK_LAUNCH)
41
42#define CUDA_LAUNCH(cuda_call,ite, ...) \
43 {\
44 cudaDeviceSynchronize(); \
45 {\
46 cudaError_t e = cudaGetLastError();\
47 if (e != cudaSuccess)\
48 {\
49 std::string error = cudaGetErrorString(e);\
50 std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
51 }\
52 }\
53 CHECK_SE_CLASS1_PRE\
54 if (ite.wthr.x != 0)\
55 {cuda_call<<<ite.wthr,ite.thr>>>(__VA_ARGS__);}\
56 cudaDeviceSynchronize(); \
57 {\
58 cudaError_t e = cudaGetLastError();\
59 if (e != cudaSuccess)\
60 {\
61 std::string error = cudaGetErrorString(e);\
62 std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
63 }\
64 CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
65 }\
66 }
67
68#define CUDA_LAUNCH_DIM3(cuda_call,wthr,thr, ...) \
69 {\
70 cudaDeviceSynchronize(); \
71 {\
72 cudaError_t e = cudaGetLastError();\
73 if (e != cudaSuccess)\
74 {\
75 std::string error = cudaGetErrorString(e);\
76 std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
77 }\
78 }\
79 CHECK_SE_CLASS1_PRE\
80 cuda_call<<<wthr,thr>>>(__VA_ARGS__);\
81 cudaDeviceSynchronize(); \
82 {\
83 cudaError_t e = cudaGetLastError();\
84 if (e != cudaSuccess)\
85 {\
86 std::string error = cudaGetErrorString(e);\
87 std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
88 }\
89 CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
90 }\
91 }
92
93#define CUDA_LAUNCH_DIM3_DEBUG_SE1(cuda_call,wthr,thr, ...) \
94 {\
95 cudaDeviceSynchronize(); \
96 {\
97 cudaError_t e = cudaGetLastError();\
98 if (e != cudaSuccess)\
99 {\
100 std::string error = cudaGetErrorString(e);\
101 std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
102 }\
103 }\
104 CHECK_SE_CLASS1_PRE\
105 cuda_call<<<wthr,thr>>>(__VA_ARGS__);\
106 cudaDeviceSynchronize(); \
107 {\
108 cudaError_t e = cudaGetLastError();\
109 if (e != cudaSuccess)\
110 {\
111 std::string error = cudaGetErrorString(e);\
112 std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
113 }\
114 }\
115 }
116
117#define CUDA_LAUNCH_LAMBDA(ite, lambda_f, ...) \
118 {\
119 cudaDeviceSynchronize(); \
120 {\
121 cudaError_t e = cudaGetLastError();\
122 if (e != cudaSuccess)\
123 {\
124 std::string error = cudaGetErrorString(e);\
125 std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
126 }\
127 }\
128 CHECK_SE_CLASS1_PRE\
129 if (ite.wthr.x != 0)\
130 {kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
131 cudaDeviceSynchronize(); \
132 {\
133 cudaError_t e = cudaGetLastError();\
134 if (e != cudaSuccess)\
135 {\
136 std::string error = cudaGetErrorString(e);\
137 std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
138 }\
139 CHECK_SE_CLASS1_POST("lambda",0)\
140 }\
141 }
142
143#define CUDA_LAUNCH_LAMBDA_TLS(ite, lambda_f, ...) \
144 {\
145 cudaDeviceSynchronize(); \
146 {\
147 cudaError_t e = cudaGetLastError();\
148 if (e != cudaSuccess)\
149 {\
150 std::string error = cudaGetErrorString(e);\
151 std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
152 }\
153 }\
154 CHECK_SE_CLASS1_PRE\
155 if (ite.wthr.x != 0)\
156 {kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
157 cudaDeviceSynchronize(); \
158 {\
159 cudaError_t e = cudaGetLastError();\
160 if (e != cudaSuccess)\
161 {\
162 std::string error = cudaGetErrorString(e);\
163 std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
164 }\
165 CHECK_SE_CLASS1_POST("lambda",0)\
166 }\
167 }
168
169#define CUDA_LAUNCH_LAMBDA_DIM3_TLS(wthr_,thr_, lambda_f, ...) \
170 {\
171 cudaDeviceSynchronize(); \
172 {\
173 cudaError_t e = cudaGetLastError();\
174 if (e != cudaSuccess)\
175 {\
176 std::string error = cudaGetErrorString(e);\
177 std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
178 }\
179 }\
180 CHECK_SE_CLASS1_PRE\
181 if (ite.wthr.x != 0)\
182 {kernel_launch_lambda<<<wthr_,thr_>>>(lambda_f);}\
183 cudaDeviceSynchronize(); \
184 {\
185 cudaError_t e = cudaGetLastError();\
186 if (e != cudaSuccess)\
187 {\
188 std::string error = cudaGetErrorString(e);\
189 std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
190 }\
191 CHECK_SE_CLASS1_POST("lambda",0)\
192 }\
193 }
194
195#define CUDA_CHECK() \
196 {\
197 cudaDeviceSynchronize(); \
198 {\
199 cudaError_t e = cudaGetLastError();\
200 if (e != cudaSuccess)\
201 {\
202 std::string error = cudaGetErrorString(e);\
203 std::cout << "Cuda an error has occurred before, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
204 }\
205 }\
206 CHECK_SE_CLASS1_PRE\
207 cudaDeviceSynchronize(); \
208 {\
209 cudaError_t e = cudaGetLastError();\
210 if (e != cudaSuccess)\
211 {\
212 std::string error = cudaGetErrorString(e);\
213 std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
214 }\
215 CHECK_SE_CLASS1_POST("no call","no args")\
216 }\
217 }
218
219#else
220
221#define CUDA_LAUNCH(cuda_call,ite, ...) \
222 if (ite.wthr.x != 0)\
223 {cuda_call<<<ite.wthr,ite.thr>>>(__VA_ARGS__);}
224
225#define CUDA_LAUNCH_DIM3(cuda_call,wthr,thr, ...) \
226 cuda_call<<<wthr,thr>>>(__VA_ARGS__);
227
228#define CUDA_LAUNCH_LAMBDA(ite,lambda_f, ...) \
229 kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);
230
231#define CUDA_CHECK()
232
233#define CUDA_LAUNCH_LAMBDA_TLS(ite, lambda_f, ...) \
234 {\
235 if (ite.wthr.x != 0)\
236 {kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
237 }
238
239#define CUDA_LAUNCH_LAMBDA_DIM3(wthr_,thr_, lambda_f, ...) \
240 {\
241 dim3 wthr__(wthr_);\
242 dim3 thr__(thr_);\
243 if (ite.wthr.x != 0)\
244 {kernel_launch_lambda<<<wthr__,thr__>>>(lambda_f);}\
245 }
246
247#define CUDA_LAUNCH_LAMBDA_DIM3_TLS(wthr_,thr_, lambda_f, ...) \
248 {\
249 dim3 wthr__(wthr_);\
250 dim3 thr__(thr_);\
251 if (ite.wthr.x != 0)\
252 {kernel_launch_lambda_tls<<<wthr__,thr__>>>(lambda_f);}\
253 }
254
255#endif
256
257#endif