xref: /llvm-project/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp (revision bd8a8181288c9e16eb90fff78cbbc63b4687963a)
1 //===--- cuda/dynamic_cuda/cuda.pp ------------------------------- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implement subset of cuda api by calling into cuda library via dlopen
10 // Does the dlopen/dlsym calls as part of the call to cuInit
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Support/DynamicLibrary.h"
15 
16 #include "Shared/Debug.h"
17 
18 #include "DLWrap.h"
19 #include "cuda.h"
20 
21 #include <memory>
22 #include <string>
23 #include <unordered_map>
24 
25 DLWRAP_INITIALIZE()
26 
27 DLWRAP_INTERNAL(cuInit, 1)
28 
29 DLWRAP(cuCtxGetDevice, 1)
30 DLWRAP(cuDeviceGet, 2)
31 DLWRAP(cuDeviceGetAttribute, 3)
32 DLWRAP(cuDeviceGetCount, 1)
33 DLWRAP(cuFuncGetAttribute, 3)
34 
35 // Device info
36 DLWRAP(cuDeviceGetName, 3)
37 DLWRAP(cuDeviceTotalMem, 2)
38 DLWRAP(cuDriverGetVersion, 1)
39 
40 DLWRAP(cuGetErrorString, 2)
41 DLWRAP(cuLaunchKernel, 11)
42 DLWRAP(cuLaunchHostFunc, 3)
43 
44 DLWRAP(cuMemAlloc, 2)
45 DLWRAP(cuMemAllocHost, 2)
46 DLWRAP(cuMemAllocManaged, 3)
47 DLWRAP(cuMemAllocAsync, 3)
48 
49 DLWRAP(cuMemcpyDtoDAsync, 4)
50 DLWRAP(cuMemcpyDtoH, 3)
51 DLWRAP(cuMemcpyDtoHAsync, 4)
52 DLWRAP(cuMemcpyHtoD, 3)
53 DLWRAP(cuMemcpyHtoDAsync, 4)
54 
55 DLWRAP(cuMemFree, 1)
56 DLWRAP(cuMemFreeHost, 1)
57 DLWRAP(cuMemFreeAsync, 2)
58 
59 DLWRAP(cuModuleGetFunction, 3)
60 DLWRAP(cuModuleGetGlobal, 4)
61 
62 DLWRAP(cuModuleUnload, 1)
63 DLWRAP(cuStreamCreate, 2)
64 DLWRAP(cuStreamDestroy, 1)
65 DLWRAP(cuStreamSynchronize, 1)
66 DLWRAP(cuStreamQuery, 1)
67 DLWRAP(cuStreamAddCallback, 4)
68 DLWRAP(cuCtxSetCurrent, 1)
69 DLWRAP(cuDevicePrimaryCtxRelease, 1)
70 DLWRAP(cuDevicePrimaryCtxGetState, 3)
71 DLWRAP(cuDevicePrimaryCtxSetFlags, 2)
72 DLWRAP(cuDevicePrimaryCtxRetain, 2)
73 DLWRAP(cuModuleLoadDataEx, 5)
74 
75 DLWRAP(cuDeviceCanAccessPeer, 3)
76 DLWRAP(cuCtxEnablePeerAccess, 2)
77 DLWRAP(cuMemcpyPeerAsync, 6)
78 
79 DLWRAP(cuCtxGetLimit, 2)
80 DLWRAP(cuCtxSetLimit, 2)
81 
82 DLWRAP(cuEventCreate, 2)
83 DLWRAP(cuEventRecord, 2)
84 DLWRAP(cuStreamWaitEvent, 3)
85 DLWRAP(cuEventSynchronize, 1)
86 DLWRAP(cuEventDestroy, 1)
87 
88 DLWRAP_FINALIZE()
89 
90 DLWRAP(cuMemUnmap, 2)
91 DLWRAP(cuMemRelease, 1)
92 DLWRAP(cuMemAddressFree, 2)
93 DLWRAP(cuMemGetInfo, 2)
94 DLWRAP(cuMemAddressReserve, 5)
95 DLWRAP(cuMemMap, 5)
96 DLWRAP(cuMemCreate, 4)
97 DLWRAP(cuMemSetAccess, 4)
98 DLWRAP(cuMemGetAllocationGranularity, 3)
99 
100 #ifndef DYNAMIC_CUDA_PATH
101 #define DYNAMIC_CUDA_PATH "libcuda.so"
102 #endif
103 
104 #ifndef TARGET_NAME
105 #define TARGET_NAME CUDA
106 #endif
107 #ifndef DEBUG_PREFIX
108 #define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
109 #endif
110 
111 static bool checkForCUDA() {
112   // return true if dlopen succeeded and all functions found
113 
114   // Prefer _v2 versions of functions if found in the library
115   std::unordered_map<std::string, const char *> TryFirst = {
116       {"cuMemAlloc", "cuMemAlloc_v2"},
117       {"cuMemFree", "cuMemFree_v2"},
118       {"cuMemcpyDtoH", "cuMemcpyDtoH_v2"},
119       {"cuMemcpyHtoD", "cuMemcpyHtoD_v2"},
120       {"cuStreamDestroy", "cuStreamDestroy_v2"},
121       {"cuModuleGetGlobal", "cuModuleGetGlobal_v2"},
122       {"cuMemcpyDtoHAsync", "cuMemcpyDtoHAsync_v2"},
123       {"cuMemcpyDtoDAsync", "cuMemcpyDtoDAsync_v2"},
124       {"cuMemcpyHtoDAsync", "cuMemcpyHtoDAsync_v2"},
125       {"cuDevicePrimaryCtxRelease", "cuDevicePrimaryCtxRelease_v2"},
126       {"cuDevicePrimaryCtxSetFlags", "cuDevicePrimaryCtxSetFlags_v2"},
127   };
128 
129   const char *CudaLib = DYNAMIC_CUDA_PATH;
130   std::string ErrMsg;
131   auto DynlibHandle = std::make_unique<llvm::sys::DynamicLibrary>(
132       llvm::sys::DynamicLibrary::getPermanentLibrary(CudaLib, &ErrMsg));
133   if (!DynlibHandle->isValid()) {
134     DP("Unable to load library '%s': %s!\n", CudaLib, ErrMsg.c_str());
135     return false;
136   }
137 
138   for (size_t I = 0; I < dlwrap::size(); I++) {
139     const char *Sym = dlwrap::symbol(I);
140 
141     auto It = TryFirst.find(Sym);
142     if (It != TryFirst.end()) {
143       const char *First = It->second;
144       void *P = DynlibHandle->getAddressOfSymbol(First);
145       if (P) {
146         DP("Implementing %s with dlsym(%s) -> %p\n", Sym, First, P);
147         *dlwrap::pointer(I) = P;
148         continue;
149       }
150     }
151 
152     void *P = DynlibHandle->getAddressOfSymbol(Sym);
153     if (P == nullptr) {
154       DP("Unable to find '%s' in '%s'!\n", Sym, CudaLib);
155       return false;
156     }
157     DP("Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P);
158 
159     *dlwrap::pointer(I) = P;
160   }
161 
162   return true;
163 }
164 
165 CUresult cuInit(unsigned X) {
166   // Note: Called exactly once from cuda rtl.cpp in a global constructor so
167   // does not need to handle being called repeatedly or concurrently
168   if (!checkForCUDA()) {
169     return CUDA_ERROR_INVALID_HANDLE;
170   }
171   return dlwrap_cuInit(X);
172 }
173