OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
cudify_cuda.hpp
1 #ifndef __CUDIFY_CUDA_HPP__
2 #define __CUDIFY_CUDA_HPP__
3 
4 #define CUDA_ON_BACKEND CUDA_BACKEND_CUDA
5 
6 constexpr int default_kernel_wg_threads_ = 1024;
7 
8 #if CUDART_VERSION >= 11000 && defined(__NVCC__)
9  #include "cub/util_type.cuh"
10  #include "cub/block/block_scan.cuh"
11 #endif
12 
13 #ifdef __NVCC__
14 
15 template<typename lambda_f>
16 __global__ void kernel_launch_lambda(lambda_f f)
17 {
18  dim3 bid = blockIdx;
19  dim3 tid = threadIdx;
20  f(bid,tid);
21 }
22 
23 template<typename lambda_f>
24 __global__ void kernel_launch_lambda_tls(lambda_f f)
25 {
26  f();
27 }
28 
29 
30 #endif
31 
32 static void init_wrappers()
33 {}
34 
35 #if defined(SE_CLASS1) || defined(CUDA_CHECK_LAUNCH)
36 
37 #define CUDA_LAUNCH(cuda_call,ite, ...) \
38  {\
39  cudaDeviceSynchronize(); \
40  {\
41  cudaError_t e = cudaGetLastError();\
42  if (e != cudaSuccess)\
43  {\
44  std::string error = cudaGetErrorString(e);\
45  std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
46  }\
47  }\
48  CHECK_SE_CLASS1_PRE\
49  if (ite.wthr.x != 0)\
50  {cuda_call<<<ite.wthr,ite.thr>>>(__VA_ARGS__);}\
51  cudaDeviceSynchronize(); \
52  {\
53  cudaError_t e = cudaGetLastError();\
54  if (e != cudaSuccess)\
55  {\
56  std::string error = cudaGetErrorString(e);\
57  std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
58  }\
59  CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
60  }\
61  }
62 
63 #define CUDA_LAUNCH_DIM3(cuda_call,wthr,thr, ...) \
64  {\
65  cudaDeviceSynchronize(); \
66  {\
67  cudaError_t e = cudaGetLastError();\
68  if (e != cudaSuccess)\
69  {\
70  std::string error = cudaGetErrorString(e);\
71  std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
72  }\
73  }\
74  CHECK_SE_CLASS1_PRE\
75  cuda_call<<<wthr,thr>>>(__VA_ARGS__);\
76  cudaDeviceSynchronize(); \
77  {\
78  cudaError_t e = cudaGetLastError();\
79  if (e != cudaSuccess)\
80  {\
81  std::string error = cudaGetErrorString(e);\
82  std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
83  }\
84  CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
85  }\
86  }
87 
88 #define CUDA_LAUNCH_DIM3_DEBUG_SE1(cuda_call,wthr,thr, ...) \
89  {\
90  cudaDeviceSynchronize(); \
91  {\
92  cudaError_t e = cudaGetLastError();\
93  if (e != cudaSuccess)\
94  {\
95  std::string error = cudaGetErrorString(e);\
96  std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
97  }\
98  }\
99  CHECK_SE_CLASS1_PRE\
100  cuda_call<<<wthr,thr>>>(__VA_ARGS__);\
101  cudaDeviceSynchronize(); \
102  {\
103  cudaError_t e = cudaGetLastError();\
104  if (e != cudaSuccess)\
105  {\
106  std::string error = cudaGetErrorString(e);\
107  std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
108  }\
109  }\
110  }
111 
112 #define CUDA_LAUNCH_LAMBDA(ite, lambda_f, ...) \
113  {\
114  cudaDeviceSynchronize(); \
115  {\
116  cudaError_t e = cudaGetLastError();\
117  if (e != cudaSuccess)\
118  {\
119  std::string error = cudaGetErrorString(e);\
120  std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
121  }\
122  }\
123  CHECK_SE_CLASS1_PRE\
124  if (ite.wthr.x != 0)\
125  {kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
126  cudaDeviceSynchronize(); \
127  {\
128  cudaError_t e = cudaGetLastError();\
129  if (e != cudaSuccess)\
130  {\
131  std::string error = cudaGetErrorString(e);\
132  std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
133  }\
134  CHECK_SE_CLASS1_POST("lambda",0)\
135  }\
136  }
137 
138 #define CUDA_LAUNCH_LAMBDA_TLS(ite, lambda_f, ...) \
139  {\
140  cudaDeviceSynchronize(); \
141  {\
142  cudaError_t e = cudaGetLastError();\
143  if (e != cudaSuccess)\
144  {\
145  std::string error = cudaGetErrorString(e);\
146  std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
147  }\
148  }\
149  CHECK_SE_CLASS1_PRE\
150  if (ite.wthr.x != 0)\
151  {kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
152  cudaDeviceSynchronize(); \
153  {\
154  cudaError_t e = cudaGetLastError();\
155  if (e != cudaSuccess)\
156  {\
157  std::string error = cudaGetErrorString(e);\
158  std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
159  }\
160  CHECK_SE_CLASS1_POST("lambda",0)\
161  }\
162  }
163 
164 #define CUDA_LAUNCH_LAMBDA_DIM3_TLS(wthr_,thr_, lambda_f, ...) \
165  {\
166  cudaDeviceSynchronize(); \
167  {\
168  cudaError_t e = cudaGetLastError();\
169  if (e != cudaSuccess)\
170  {\
171  std::string error = cudaGetErrorString(e);\
172  std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
173  }\
174  }\
175  CHECK_SE_CLASS1_PRE\
176  if (ite.wthr.x != 0)\
177  {kernel_launch_lambda<<<wthr_,thr_>>>(lambda_f);}\
178  cudaDeviceSynchronize(); \
179  {\
180  cudaError_t e = cudaGetLastError();\
181  if (e != cudaSuccess)\
182  {\
183  std::string error = cudaGetErrorString(e);\
184  std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
185  }\
186  CHECK_SE_CLASS1_POST("lambda",0)\
187  }\
188  }
189 
190 #define CUDA_CHECK() \
191  {\
192  cudaDeviceSynchronize(); \
193  {\
194  cudaError_t e = cudaGetLastError();\
195  if (e != cudaSuccess)\
196  {\
197  std::string error = cudaGetErrorString(e);\
198  std::cout << "Cuda an error has occurred before, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
199  }\
200  }\
201  CHECK_SE_CLASS1_PRE\
202  cudaDeviceSynchronize(); \
203  {\
204  cudaError_t e = cudaGetLastError();\
205  if (e != cudaSuccess)\
206  {\
207  std::string error = cudaGetErrorString(e);\
208  std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
209  }\
210  CHECK_SE_CLASS1_POST("no call","no args")\
211  }\
212  }
213 
214 #else
215 
216 #define CUDA_LAUNCH(cuda_call,ite, ...) \
217  if (ite.wthr.x != 0)\
218  {cuda_call<<<ite.wthr,ite.thr>>>(__VA_ARGS__);}
219 
220 #define CUDA_LAUNCH_DIM3(cuda_call,wthr,thr, ...) \
221  cuda_call<<<wthr,thr>>>(__VA_ARGS__);
222 
223 #define CUDA_LAUNCH_LAMBDA(ite,lambda_f, ...) \
224  kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);
225 
226 #define CUDA_CHECK()
227 
228 #define CUDA_LAUNCH_LAMBDA_TLS(ite, lambda_f, ...) \
229  {\
230  if (ite.wthr.x != 0)\
231  {kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
232  }
233 
234 #define CUDA_LAUNCH_LAMBDA_DIM3(wthr_,thr_, lambda_f, ...) \
235  {\
236  dim3 wthr__(wthr_);\
237  dim3 thr__(thr_);\
238  if (ite.wthr.x != 0)\
239  {kernel_launch_lambda<<<wthr__,thr__>>>(lambda_f);}\
240  }
241 
242 #define CUDA_LAUNCH_LAMBDA_DIM3_TLS(wthr_,thr_, lambda_f, ...) \
243  {\
244  dim3 wthr__(wthr_);\
245  dim3 thr__(thr_);\
246  if (ite.wthr.x != 0)\
247  {kernel_launch_lambda_tls<<<wthr__,thr__>>>(lambda_f);}\
248  }
249 
250 #endif
251 
252 #endif