xref: /llvm-project/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp (revision bd8a8181288c9e16eb90fff78cbbc63b4687963a)
1330d8983SJohannes Doerfert //===--- cuda/dynamic_cuda/cuda.pp ------------------------------- C++ -*-===//
2330d8983SJohannes Doerfert //
3330d8983SJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4330d8983SJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information.
5330d8983SJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6330d8983SJohannes Doerfert //
7330d8983SJohannes Doerfert //===----------------------------------------------------------------------===//
8330d8983SJohannes Doerfert //
9330d8983SJohannes Doerfert // Implement subset of cuda api by calling into cuda library via dlopen
10330d8983SJohannes Doerfert // Does the dlopen/dlsym calls as part of the call to cuInit
11330d8983SJohannes Doerfert //
12330d8983SJohannes Doerfert //===----------------------------------------------------------------------===//
13330d8983SJohannes Doerfert 
14330d8983SJohannes Doerfert #include "llvm/Support/DynamicLibrary.h"
15330d8983SJohannes Doerfert 
16330d8983SJohannes Doerfert #include "Shared/Debug.h"
17330d8983SJohannes Doerfert 
18330d8983SJohannes Doerfert #include "DLWrap.h"
19330d8983SJohannes Doerfert #include "cuda.h"
20330d8983SJohannes Doerfert 
21330d8983SJohannes Doerfert #include <memory>
22330d8983SJohannes Doerfert #include <string>
23330d8983SJohannes Doerfert #include <unordered_map>
24330d8983SJohannes Doerfert 
25330d8983SJohannes Doerfert DLWRAP_INITIALIZE()
26330d8983SJohannes Doerfert 
27330d8983SJohannes Doerfert DLWRAP_INTERNAL(cuInit, 1)
28330d8983SJohannes Doerfert 
29330d8983SJohannes Doerfert DLWRAP(cuCtxGetDevice, 1)
30330d8983SJohannes Doerfert DLWRAP(cuDeviceGet, 2)
31330d8983SJohannes Doerfert DLWRAP(cuDeviceGetAttribute, 3)
32330d8983SJohannes Doerfert DLWRAP(cuDeviceGetCount, 1)
33330d8983SJohannes Doerfert DLWRAP(cuFuncGetAttribute, 3)
34330d8983SJohannes Doerfert 
35330d8983SJohannes Doerfert // Device info
36330d8983SJohannes Doerfert DLWRAP(cuDeviceGetName, 3)
37330d8983SJohannes Doerfert DLWRAP(cuDeviceTotalMem, 2)
38330d8983SJohannes Doerfert DLWRAP(cuDriverGetVersion, 1)
39330d8983SJohannes Doerfert 
40330d8983SJohannes Doerfert DLWRAP(cuGetErrorString, 2)
41330d8983SJohannes Doerfert DLWRAP(cuLaunchKernel, 11)
42*bd8a8181SJoseph Huber DLWRAP(cuLaunchHostFunc, 3)
43330d8983SJohannes Doerfert 
44330d8983SJohannes Doerfert DLWRAP(cuMemAlloc, 2)
45330d8983SJohannes Doerfert DLWRAP(cuMemAllocHost, 2)
46330d8983SJohannes Doerfert DLWRAP(cuMemAllocManaged, 3)
47330d8983SJohannes Doerfert DLWRAP(cuMemAllocAsync, 3)
48330d8983SJohannes Doerfert 
49330d8983SJohannes Doerfert DLWRAP(cuMemcpyDtoDAsync, 4)
50330d8983SJohannes Doerfert DLWRAP(cuMemcpyDtoH, 3)
51330d8983SJohannes Doerfert DLWRAP(cuMemcpyDtoHAsync, 4)
52330d8983SJohannes Doerfert DLWRAP(cuMemcpyHtoD, 3)
53330d8983SJohannes Doerfert DLWRAP(cuMemcpyHtoDAsync, 4)
54330d8983SJohannes Doerfert 
55330d8983SJohannes Doerfert DLWRAP(cuMemFree, 1)
56330d8983SJohannes Doerfert DLWRAP(cuMemFreeHost, 1)
57330d8983SJohannes Doerfert DLWRAP(cuMemFreeAsync, 2)
58330d8983SJohannes Doerfert 
59330d8983SJohannes Doerfert DLWRAP(cuModuleGetFunction, 3)
60330d8983SJohannes Doerfert DLWRAP(cuModuleGetGlobal, 4)
61330d8983SJohannes Doerfert 
62330d8983SJohannes Doerfert DLWRAP(cuModuleUnload, 1)
63330d8983SJohannes Doerfert DLWRAP(cuStreamCreate, 2)
64330d8983SJohannes Doerfert DLWRAP(cuStreamDestroy, 1)
65330d8983SJohannes Doerfert DLWRAP(cuStreamSynchronize, 1)
66330d8983SJohannes Doerfert DLWRAP(cuStreamQuery, 1)
67134401deSJoseph Huber DLWRAP(cuStreamAddCallback, 4)
68330d8983SJohannes Doerfert DLWRAP(cuCtxSetCurrent, 1)
69330d8983SJohannes Doerfert DLWRAP(cuDevicePrimaryCtxRelease, 1)
70330d8983SJohannes Doerfert DLWRAP(cuDevicePrimaryCtxGetState, 3)
71330d8983SJohannes Doerfert DLWRAP(cuDevicePrimaryCtxSetFlags, 2)
72330d8983SJohannes Doerfert DLWRAP(cuDevicePrimaryCtxRetain, 2)
73330d8983SJohannes Doerfert DLWRAP(cuModuleLoadDataEx, 5)
74330d8983SJohannes Doerfert 
75330d8983SJohannes Doerfert DLWRAP(cuDeviceCanAccessPeer, 3)
76330d8983SJohannes Doerfert DLWRAP(cuCtxEnablePeerAccess, 2)
77330d8983SJohannes Doerfert DLWRAP(cuMemcpyPeerAsync, 6)
78330d8983SJohannes Doerfert 
79330d8983SJohannes Doerfert DLWRAP(cuCtxGetLimit, 2)
80330d8983SJohannes Doerfert DLWRAP(cuCtxSetLimit, 2)
81330d8983SJohannes Doerfert 
82330d8983SJohannes Doerfert DLWRAP(cuEventCreate, 2)
83330d8983SJohannes Doerfert DLWRAP(cuEventRecord, 2)
84330d8983SJohannes Doerfert DLWRAP(cuStreamWaitEvent, 3)
85330d8983SJohannes Doerfert DLWRAP(cuEventSynchronize, 1)
86330d8983SJohannes Doerfert DLWRAP(cuEventDestroy, 1)
87330d8983SJohannes Doerfert 
88330d8983SJohannes Doerfert DLWRAP_FINALIZE()
89330d8983SJohannes Doerfert 
90330d8983SJohannes Doerfert DLWRAP(cuMemUnmap, 2)
91330d8983SJohannes Doerfert DLWRAP(cuMemRelease, 1)
92330d8983SJohannes Doerfert DLWRAP(cuMemAddressFree, 2)
93330d8983SJohannes Doerfert DLWRAP(cuMemGetInfo, 2)
94330d8983SJohannes Doerfert DLWRAP(cuMemAddressReserve, 5)
95330d8983SJohannes Doerfert DLWRAP(cuMemMap, 5)
96330d8983SJohannes Doerfert DLWRAP(cuMemCreate, 4)
97330d8983SJohannes Doerfert DLWRAP(cuMemSetAccess, 4)
98330d8983SJohannes Doerfert DLWRAP(cuMemGetAllocationGranularity, 3)
99330d8983SJohannes Doerfert 
100330d8983SJohannes Doerfert #ifndef DYNAMIC_CUDA_PATH
101330d8983SJohannes Doerfert #define DYNAMIC_CUDA_PATH "libcuda.so"
102330d8983SJohannes Doerfert #endif
103330d8983SJohannes Doerfert 
104330d8983SJohannes Doerfert #ifndef TARGET_NAME
105330d8983SJohannes Doerfert #define TARGET_NAME CUDA
106330d8983SJohannes Doerfert #endif
107330d8983SJohannes Doerfert #ifndef DEBUG_PREFIX
108330d8983SJohannes Doerfert #define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
109330d8983SJohannes Doerfert #endif
110330d8983SJohannes Doerfert 
111330d8983SJohannes Doerfert static bool checkForCUDA() {
112330d8983SJohannes Doerfert   // return true if dlopen succeeded and all functions found
113330d8983SJohannes Doerfert 
114330d8983SJohannes Doerfert   // Prefer _v2 versions of functions if found in the library
115330d8983SJohannes Doerfert   std::unordered_map<std::string, const char *> TryFirst = {
116330d8983SJohannes Doerfert       {"cuMemAlloc", "cuMemAlloc_v2"},
117330d8983SJohannes Doerfert       {"cuMemFree", "cuMemFree_v2"},
118330d8983SJohannes Doerfert       {"cuMemcpyDtoH", "cuMemcpyDtoH_v2"},
119330d8983SJohannes Doerfert       {"cuMemcpyHtoD", "cuMemcpyHtoD_v2"},
120330d8983SJohannes Doerfert       {"cuStreamDestroy", "cuStreamDestroy_v2"},
121330d8983SJohannes Doerfert       {"cuModuleGetGlobal", "cuModuleGetGlobal_v2"},
122330d8983SJohannes Doerfert       {"cuMemcpyDtoHAsync", "cuMemcpyDtoHAsync_v2"},
123330d8983SJohannes Doerfert       {"cuMemcpyDtoDAsync", "cuMemcpyDtoDAsync_v2"},
124330d8983SJohannes Doerfert       {"cuMemcpyHtoDAsync", "cuMemcpyHtoDAsync_v2"},
125330d8983SJohannes Doerfert       {"cuDevicePrimaryCtxRelease", "cuDevicePrimaryCtxRelease_v2"},
126330d8983SJohannes Doerfert       {"cuDevicePrimaryCtxSetFlags", "cuDevicePrimaryCtxSetFlags_v2"},
127330d8983SJohannes Doerfert   };
128330d8983SJohannes Doerfert 
129330d8983SJohannes Doerfert   const char *CudaLib = DYNAMIC_CUDA_PATH;
130330d8983SJohannes Doerfert   std::string ErrMsg;
131330d8983SJohannes Doerfert   auto DynlibHandle = std::make_unique<llvm::sys::DynamicLibrary>(
132330d8983SJohannes Doerfert       llvm::sys::DynamicLibrary::getPermanentLibrary(CudaLib, &ErrMsg));
133330d8983SJohannes Doerfert   if (!DynlibHandle->isValid()) {
134330d8983SJohannes Doerfert     DP("Unable to load library '%s': %s!\n", CudaLib, ErrMsg.c_str());
135330d8983SJohannes Doerfert     return false;
136330d8983SJohannes Doerfert   }
137330d8983SJohannes Doerfert 
138330d8983SJohannes Doerfert   for (size_t I = 0; I < dlwrap::size(); I++) {
139330d8983SJohannes Doerfert     const char *Sym = dlwrap::symbol(I);
140330d8983SJohannes Doerfert 
141330d8983SJohannes Doerfert     auto It = TryFirst.find(Sym);
142330d8983SJohannes Doerfert     if (It != TryFirst.end()) {
143330d8983SJohannes Doerfert       const char *First = It->second;
144330d8983SJohannes Doerfert       void *P = DynlibHandle->getAddressOfSymbol(First);
145330d8983SJohannes Doerfert       if (P) {
146330d8983SJohannes Doerfert         DP("Implementing %s with dlsym(%s) -> %p\n", Sym, First, P);
147330d8983SJohannes Doerfert         *dlwrap::pointer(I) = P;
148330d8983SJohannes Doerfert         continue;
149330d8983SJohannes Doerfert       }
150330d8983SJohannes Doerfert     }
151330d8983SJohannes Doerfert 
152330d8983SJohannes Doerfert     void *P = DynlibHandle->getAddressOfSymbol(Sym);
153330d8983SJohannes Doerfert     if (P == nullptr) {
154330d8983SJohannes Doerfert       DP("Unable to find '%s' in '%s'!\n", Sym, CudaLib);
155330d8983SJohannes Doerfert       return false;
156330d8983SJohannes Doerfert     }
157330d8983SJohannes Doerfert     DP("Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P);
158330d8983SJohannes Doerfert 
159330d8983SJohannes Doerfert     *dlwrap::pointer(I) = P;
160330d8983SJohannes Doerfert   }
161330d8983SJohannes Doerfert 
162330d8983SJohannes Doerfert   return true;
163330d8983SJohannes Doerfert }
164330d8983SJohannes Doerfert 
165330d8983SJohannes Doerfert CUresult cuInit(unsigned X) {
166330d8983SJohannes Doerfert   // Note: Called exactly once from cuda rtl.cpp in a global constructor so
167330d8983SJohannes Doerfert   // does not need to handle being called repeatedly or concurrently
168330d8983SJohannes Doerfert   if (!checkForCUDA()) {
169330d8983SJohannes Doerfert     return CUDA_ERROR_INVALID_HANDLE;
170330d8983SJohannes Doerfert   }
171330d8983SJohannes Doerfert   return dlwrap_cuInit(X);
172330d8983SJohannes Doerfert }
173