1 #ifndef CUDIFY_HARDWARE_COMMON_HPP_
2 #define CUDIFY_HARDWARE_COMMON_HPP_
5 #include <initializer_list>
8 #if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
10 #ifndef OPENMP_MAX_NUM_THREADS
11 #define OPENMP_MAX_NUM_THREADS 896
21 cudaErrorInvalidValue = 1,
22 cudaErrorMemoryAllocation = 2,
23 cudaErrorInitializationError = 3,
24 cudaErrorCudartUnloading = 4,
25 cudaErrorProfilerDisabled = 5,
26 cudaErrorProfilerNotInitialized = 6,
27 cudaErrorProfilerAlreadyStarted = 7,
28 cudaErrorProfilerAlreadyStopped = 8,
29 cudaErrorInvalidConfiguration = 9,
30 cudaErrorInvalidPitchValue = 12,
31 cudaErrorInvalidSymbol = 13,
32 cudaErrorInvalidHostPointer = 16,
33 cudaErrorInvalidDevicePointer = 17,
34 cudaErrorInvalidTexture = 18,
35 cudaErrorInvalidTextureBinding = 19,
36 cudaErrorInvalidChannelDescriptor = 20,
37 cudaErrorInvalidMemcpyDirection = 21,
38 cudaErrorAddressOfConstant = 22,
39 cudaErrorTextureFetchFailed = 23,
40 cudaErrorTextureNotBound = 24,
41 cudaErrorSynchronizationError = 25,
42 cudaErrorInvalidFilterSetting = 26,
43 cudaErrorInvalidNormSetting = 27,
44 cudaErrorMixedDeviceExecution = 28,
45 cudaErrorNotYetImplemented = 31,
46 cudaErrorMemoryValueTooLarge = 32,
47 cudaErrorStubLibrary = 34,
48 cudaErrorInsufficientDriver = 35,
49 cudaErrorCallRequiresNewerDriver = 36,
50 cudaErrorInvalidSurface = 37,
51 cudaErrorDuplicateVariableName = 43,
52 cudaErrorDuplicateTextureName = 44,
53 cudaErrorDuplicateSurfaceName = 45,
54 cudaErrorDevicesUnavailable = 46,
55 cudaErrorIncompatibleDriverContext = 49,
56 cudaErrorMissingConfiguration = 52,
57 cudaErrorPriorLaunchFailure = 53,
58 cudaErrorLaunchMaxDepthExceeded = 65,
59 cudaErrorLaunchFileScopedTex = 66,
60 cudaErrorLaunchFileScopedSurf = 67,
61 cudaErrorSyncDepthExceeded = 68,
62 cudaErrorLaunchPendingCountExceeded = 69,
63 cudaErrorInvalidDeviceFunction = 98,
64 cudaErrorNoDevice = 100,
65 cudaErrorInvalidDevice = 101,
66 cudaErrorDeviceNotLicensed = 102,
67 cudaErrorSoftwareValidityNotEstablished = 103,
68 cudaErrorStartupFailure = 127,
69 cudaErrorInvalidKernelImage = 200,
70 cudaErrorDeviceUninitialized = 201,
71 cudaErrorMapBufferObjectFailed = 205,
72 cudaErrorUnmapBufferObjectFailed = 206,
73 cudaErrorArrayIsMapped = 207,
74 cudaErrorAlreadyMapped = 208,
75 cudaErrorNoKernelImageForDevice = 209,
76 cudaErrorAlreadyAcquired = 210,
77 cudaErrorNotMapped = 211,
78 cudaErrorNotMappedAsArray = 212,
79 cudaErrorNotMappedAsPointer = 213,
80 cudaErrorECCUncorrectable = 214,
81 cudaErrorUnsupportedLimit = 215,
82 cudaErrorDeviceAlreadyInUse = 216,
83 cudaErrorPeerAccessUnsupported = 217,
84 cudaErrorInvalidPtx = 218,
85 cudaErrorInvalidGraphicsContext = 219,
86 cudaErrorNvlinkUncorrectable = 220,
87 cudaErrorJitCompilerNotFound = 221,
88 cudaErrorUnsupportedPtxVersion = 222,
89 cudaErrorJitCompilationDisabled = 223,
90 cudaErrorUnsupportedExecAffinity = 224,
91 cudaErrorInvalidSource = 300,
92 cudaErrorFileNotFound = 301,
93 cudaErrorSharedObjectSymbolNotFound = 302,
94 cudaErrorSharedObjectInitFailed = 303,
95 cudaErrorOperatingSystem = 304,
96 cudaErrorInvalidResourceHandle = 400,
97 cudaErrorIllegalState = 401,
98 cudaErrorSymbolNotFound = 500,
99 cudaErrorNotReady = 600,
100 cudaErrorIllegalAddress = 700,
101 cudaErrorLaunchOutOfResources = 701,
102 cudaErrorLaunchTimeout = 702,
103 cudaErrorLaunchIncompatibleTexturing = 703,
104 cudaErrorPeerAccessAlreadyEnabled = 704,
105 cudaErrorPeerAccessNotEnabled = 705,
106 cudaErrorSetOnActiveProcess = 708,
107 cudaErrorContextIsDestroyed = 709,
108 cudaErrorAssert = 710,
109 cudaErrorTooManyPeers = 711,
110 cudaErrorHostMemoryAlreadyRegistered = 712,
111 cudaErrorHostMemoryNotRegistered = 713,
112 cudaErrorHardwareStackError = 714,
113 cudaErrorIllegalInstruction = 715,
114 cudaErrorMisalignedAddress = 716,
115 cudaErrorInvalidAddressSpace = 717,
116 cudaErrorInvalidPc = 718,
117 cudaErrorLaunchFailure = 719,
118 cudaErrorCooperativeLaunchTooLarge = 720,
119 cudaErrorNotPermitted = 800,
120 cudaErrorNotSupported = 801,
121 cudaErrorSystemNotReady = 802,
122 cudaErrorSystemDriverMismatch = 803,
123 cudaErrorCompatNotSupportedOnDevice = 804,
124 cudaErrorMpsConnectionFailed = 805,
125 cudaErrorMpsRpcFailure = 806,
126 cudaErrorMpsServerNotReady = 807,
127 cudaErrorMpsMaxClientsReached = 808,
128 cudaErrorMpsMaxConnectionsReached = 809,
129 cudaErrorStreamCaptureUnsupported = 900,
130 cudaErrorStreamCaptureInvalidated = 901,
131 cudaErrorStreamCaptureMerge = 902,
132 cudaErrorStreamCaptureUnmatched = 903,
133 cudaErrorStreamCaptureUnjoined = 904,
134 cudaErrorStreamCaptureIsolation = 905,
135 cudaErrorStreamCaptureImplicit = 906,
136 cudaErrorCapturedEvent = 907,
137 cudaErrorStreamCaptureWrongThread = 908,
138 cudaErrorTimeout = 909,
139 cudaErrorGraphExecUpdateFailure = 910,
140 cudaErrorUnknown = 999,
141 cudaErrorApiFailureBase = 10000
144 typedef cudaError cudaError_t;
148 unsigned int x, y, z;
153 unsigned int x, y, z;
155 constexpr dim3(
unsigned int vx = 1,
unsigned int vy = 1,
unsigned int vz = 1) : x(
vx), y(vy), z(vz) {}
156 constexpr dim3(uint3 v) : x(v.x), y(v.y), z(v.z) {}
157 constexpr
operator uint3(
void)
const {
return uint3{x, y, z}; }
159 constexpr dim3(
const dim3 & d) : x(d.x), y(d.y), z(d.z) {}
162 dim3(
const std::initializer_list<T> & list)
164 auto it = list.begin();
179 cudaMemcpyHostToHost = 0,
180 cudaMemcpyHostToDevice = 1,
181 cudaMemcpyDeviceToHost = 2,
182 cudaMemcpyDeviceToDevice = 3,
183 cudaMemcpyDefault = 4
186 const static char unk_error[] =
"Unknown error";
188 static const char* cudaGetErrorName ( cudaError error )
193 static const char* cudaGetErrorString ( cudaError error )
198 static void cudaDeviceSynchronize()
201 static cudaError cudaMemcpyFromSymbol(
void * dev_mem,
const unsigned char * global_cuda_error_array,
size_t sz)
203 memcpy(dev_mem,global_cuda_error_array,sz);
204 return cudaError::cudaSuccess;
207 static cudaError cudaMemcpyToSymbol(
unsigned char * global_cuda_error_array,
const void * dev_mem,
size_t sz,
size_t offset = 0, cudaMemcpyKind kind = cudaMemcpyHostToDevice )
209 memcpy(global_cuda_error_array + offset,dev_mem,sz);
210 return cudaError::cudaSuccess;
213 static cudaError cudaMemcpy(
void* dst,
const void* src,
size_t count, cudaMemcpyKind kind)
215 memcpy(dst,src,count);
217 return cudaError::cudaSuccess;
221 static cudaError cudaHostGetDevicePointer(
void** pDevice,
void* pHost,
unsigned int flags)
225 return cudaError::cudaSuccess;
238 static __inline__ __host__ __device__ float4 make_float4(
float x,
float y,
float z,
float w)
240 float4 t; t.x = x; t.y = y; t.z = z; t.w = w;
return t;