1#ifndef __CUDIFY_CUDA_HPP__
2#define __CUDIFY_CUDA_HPP__
4#define CUDA_ON_BACKEND CUDA_BACKEND_CUDA
6constexpr int default_kernel_wg_threads_ = 1024;
8#if CUDART_VERSION >= 11000 && defined(__NVCC__)
9 #include "cub/util_type.cuh"
10 #include "cub/block/block_scan.cuh"
14#include "operators.hpp"
16#ifndef GPU_HOST_DEVICE
17 #define GPU_HOST_DEVICE __forceinline__ __device__ __host__
20template<
typename lambda_f>
21__global__
void kernel_launch_lambda(lambda_f f)
28template<
typename lambda_f>
29__global__
void kernel_launch_lambda_tls(lambda_f f)
37static void init_wrappers()
40#if defined(SE_CLASS1) || defined(CUDA_CHECK_LAUNCH)
42#define CUDA_LAUNCH(cuda_call,ite, ...) \
44 cudaDeviceSynchronize(); \
46 cudaError_t e = cudaGetLastError();\
47 if (e != cudaSuccess)\
49 std::string error = cudaGetErrorString(e);\
50 std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
55 {cuda_call<<<ite.wthr,ite.thr>>>(__VA_ARGS__);}\
56 cudaDeviceSynchronize(); \
58 cudaError_t e = cudaGetLastError();\
59 if (e != cudaSuccess)\
61 std::string error = cudaGetErrorString(e);\
62 std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
64 CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
68#define CUDA_LAUNCH_DIM3(cuda_call,wthr,thr, ...) \
70 cudaDeviceSynchronize(); \
72 cudaError_t e = cudaGetLastError();\
73 if (e != cudaSuccess)\
75 std::string error = cudaGetErrorString(e);\
76 std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
80 cuda_call<<<wthr,thr>>>(__VA_ARGS__);\
81 cudaDeviceSynchronize(); \
83 cudaError_t e = cudaGetLastError();\
84 if (e != cudaSuccess)\
86 std::string error = cudaGetErrorString(e);\
87 std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
89 CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
93#define CUDA_LAUNCH_DIM3_DEBUG_SE1(cuda_call,wthr,thr, ...) \
95 cudaDeviceSynchronize(); \
97 cudaError_t e = cudaGetLastError();\
98 if (e != cudaSuccess)\
100 std::string error = cudaGetErrorString(e);\
101 std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
105 cuda_call<<<wthr,thr>>>(__VA_ARGS__);\
106 cudaDeviceSynchronize(); \
108 cudaError_t e = cudaGetLastError();\
109 if (e != cudaSuccess)\
111 std::string error = cudaGetErrorString(e);\
112 std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
117#define CUDA_LAUNCH_LAMBDA(ite, lambda_f, ...) \
119 cudaDeviceSynchronize(); \
121 cudaError_t e = cudaGetLastError();\
122 if (e != cudaSuccess)\
124 std::string error = cudaGetErrorString(e);\
125 std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
129 if (ite.wthr.x != 0)\
130 {kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
131 cudaDeviceSynchronize(); \
133 cudaError_t e = cudaGetLastError();\
134 if (e != cudaSuccess)\
136 std::string error = cudaGetErrorString(e);\
137 std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
139 CHECK_SE_CLASS1_POST("lambda",0)\
143#define CUDA_LAUNCH_LAMBDA_TLS(ite, lambda_f, ...) \
145 cudaDeviceSynchronize(); \
147 cudaError_t e = cudaGetLastError();\
148 if (e != cudaSuccess)\
150 std::string error = cudaGetErrorString(e);\
151 std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
155 if (ite.wthr.x != 0)\
156 {kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
157 cudaDeviceSynchronize(); \
159 cudaError_t e = cudaGetLastError();\
160 if (e != cudaSuccess)\
162 std::string error = cudaGetErrorString(e);\
163 std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
165 CHECK_SE_CLASS1_POST("lambda",0)\
169#define CUDA_LAUNCH_LAMBDA_DIM3_TLS(wthr_,thr_, lambda_f, ...) \
171 cudaDeviceSynchronize(); \
173 cudaError_t e = cudaGetLastError();\
174 if (e != cudaSuccess)\
176 std::string error = cudaGetErrorString(e);\
177 std::cout << "Cuda an error has occurred before this CUDA_LAUNCH, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
181 if (ite.wthr.x != 0)\
182 {kernel_launch_lambda<<<wthr_,thr_>>>(lambda_f);}\
183 cudaDeviceSynchronize(); \
185 cudaError_t e = cudaGetLastError();\
186 if (e != cudaSuccess)\
188 std::string error = cudaGetErrorString(e);\
189 std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
191 CHECK_SE_CLASS1_POST("lambda",0)\
195#define CUDA_CHECK() \
197 cudaDeviceSynchronize(); \
199 cudaError_t e = cudaGetLastError();\
200 if (e != cudaSuccess)\
202 std::string error = cudaGetErrorString(e);\
203 std::cout << "Cuda an error has occurred before, detected in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
207 cudaDeviceSynchronize(); \
209 cudaError_t e = cudaGetLastError();\
210 if (e != cudaSuccess)\
212 std::string error = cudaGetErrorString(e);\
213 std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
215 CHECK_SE_CLASS1_POST("no call","no args")\
221#define CUDA_LAUNCH(cuda_call,ite, ...) \
222 if (ite.wthr.x != 0)\
223 {cuda_call<<<ite.wthr,ite.thr>>>(__VA_ARGS__);}
225#define CUDA_LAUNCH_DIM3(cuda_call,wthr,thr, ...) \
226 cuda_call<<<wthr,thr>>>(__VA_ARGS__);
228#define CUDA_LAUNCH_LAMBDA(ite,lambda_f, ...) \
229 kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);
233#define CUDA_LAUNCH_LAMBDA_TLS(ite, lambda_f, ...) \
235 if (ite.wthr.x != 0)\
236 {kernel_launch_lambda<<<ite.wthr,ite.thr>>>(lambda_f);}\
239#define CUDA_LAUNCH_LAMBDA_DIM3(wthr_,thr_, lambda_f, ...) \
243 if (ite.wthr.x != 0)\
244 {kernel_launch_lambda<<<wthr__,thr__>>>(lambda_f);}\
247#define CUDA_LAUNCH_LAMBDA_DIM3_TLS(wthr_,thr_, lambda_f, ...) \
251 if (ite.wthr.x != 0)\
252 {kernel_launch_lambda_tls<<<wthr__,thr__>>>(lambda_f);}\