1 //===--- cuda/dynamic_cuda/cuda.pp ------------------------------- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implement subset of cuda api by calling into cuda library via dlopen 10 // Does the dlopen/dlsym calls as part of the call to cuInit 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Support/DynamicLibrary.h" 15 16 #include "Shared/Debug.h" 17 18 #include "DLWrap.h" 19 #include "cuda.h" 20 21 #include <memory> 22 #include <string> 23 #include <unordered_map> 24 25 DLWRAP_INITIALIZE() 26 27 DLWRAP_INTERNAL(cuInit, 1) 28 29 DLWRAP(cuCtxGetDevice, 1) 30 DLWRAP(cuDeviceGet, 2) 31 DLWRAP(cuDeviceGetAttribute, 3) 32 DLWRAP(cuDeviceGetCount, 1) 33 DLWRAP(cuFuncGetAttribute, 3) 34 35 // Device info 36 DLWRAP(cuDeviceGetName, 3) 37 DLWRAP(cuDeviceTotalMem, 2) 38 DLWRAP(cuDriverGetVersion, 1) 39 40 DLWRAP(cuGetErrorString, 2) 41 DLWRAP(cuLaunchKernel, 11) 42 DLWRAP(cuLaunchHostFunc, 3) 43 44 DLWRAP(cuMemAlloc, 2) 45 DLWRAP(cuMemAllocHost, 2) 46 DLWRAP(cuMemAllocManaged, 3) 47 DLWRAP(cuMemAllocAsync, 3) 48 49 DLWRAP(cuMemcpyDtoDAsync, 4) 50 DLWRAP(cuMemcpyDtoH, 3) 51 DLWRAP(cuMemcpyDtoHAsync, 4) 52 DLWRAP(cuMemcpyHtoD, 3) 53 DLWRAP(cuMemcpyHtoDAsync, 4) 54 55 DLWRAP(cuMemFree, 1) 56 DLWRAP(cuMemFreeHost, 1) 57 DLWRAP(cuMemFreeAsync, 2) 58 59 DLWRAP(cuModuleGetFunction, 3) 60 DLWRAP(cuModuleGetGlobal, 4) 61 62 DLWRAP(cuModuleUnload, 1) 63 DLWRAP(cuStreamCreate, 2) 64 DLWRAP(cuStreamDestroy, 1) 65 DLWRAP(cuStreamSynchronize, 1) 66 DLWRAP(cuStreamQuery, 1) 67 DLWRAP(cuStreamAddCallback, 4) 68 DLWRAP(cuCtxSetCurrent, 1) 69 DLWRAP(cuDevicePrimaryCtxRelease, 1) 70 DLWRAP(cuDevicePrimaryCtxGetState, 3) 71 DLWRAP(cuDevicePrimaryCtxSetFlags, 2) 72 DLWRAP(cuDevicePrimaryCtxRetain, 2) 73 DLWRAP(cuModuleLoadDataEx, 5) 74 75 DLWRAP(cuDeviceCanAccessPeer, 3) 76 DLWRAP(cuCtxEnablePeerAccess, 2) 77 DLWRAP(cuMemcpyPeerAsync, 6) 78 79 DLWRAP(cuCtxGetLimit, 2) 80 DLWRAP(cuCtxSetLimit, 2) 81 82 DLWRAP(cuEventCreate, 2) 83 DLWRAP(cuEventRecord, 2) 84 DLWRAP(cuStreamWaitEvent, 3) 85 DLWRAP(cuEventSynchronize, 1) 86 DLWRAP(cuEventDestroy, 1) 87 88 DLWRAP_FINALIZE() 89 90 DLWRAP(cuMemUnmap, 2) 91 DLWRAP(cuMemRelease, 1) 92 DLWRAP(cuMemAddressFree, 2) 93 DLWRAP(cuMemGetInfo, 2) 94 DLWRAP(cuMemAddressReserve, 5) 95 DLWRAP(cuMemMap, 5) 96 DLWRAP(cuMemCreate, 4) 97 DLWRAP(cuMemSetAccess, 4) 98 DLWRAP(cuMemGetAllocationGranularity, 3) 99 100 #ifndef DYNAMIC_CUDA_PATH 101 #define DYNAMIC_CUDA_PATH "libcuda.so" 102 #endif 103 104 #ifndef TARGET_NAME 105 #define TARGET_NAME CUDA 106 #endif 107 #ifndef DEBUG_PREFIX 108 #define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL" 109 #endif 110 111 static bool checkForCUDA() { 112 // return true if dlopen succeeded and all functions found 113 114 // Prefer _v2 versions of functions if found in the library 115 std::unordered_map<std::string, const char *> TryFirst = { 116 {"cuMemAlloc", "cuMemAlloc_v2"}, 117 {"cuMemFree", "cuMemFree_v2"}, 118 {"cuMemcpyDtoH", "cuMemcpyDtoH_v2"}, 119 {"cuMemcpyHtoD", "cuMemcpyHtoD_v2"}, 120 {"cuStreamDestroy", "cuStreamDestroy_v2"}, 121 {"cuModuleGetGlobal", "cuModuleGetGlobal_v2"}, 122 {"cuMemcpyDtoHAsync", "cuMemcpyDtoHAsync_v2"}, 123 {"cuMemcpyDtoDAsync", "cuMemcpyDtoDAsync_v2"}, 124 {"cuMemcpyHtoDAsync", "cuMemcpyHtoDAsync_v2"}, 125 {"cuDevicePrimaryCtxRelease", "cuDevicePrimaryCtxRelease_v2"}, 126 {"cuDevicePrimaryCtxSetFlags", "cuDevicePrimaryCtxSetFlags_v2"}, 127 }; 128 129 const char *CudaLib = DYNAMIC_CUDA_PATH; 130 std::string ErrMsg; 131 auto DynlibHandle = std::make_unique<llvm::sys::DynamicLibrary>( 132 llvm::sys::DynamicLibrary::getPermanentLibrary(CudaLib, &ErrMsg)); 133 if (!DynlibHandle->isValid()) { 134 DP("Unable to load library '%s': %s!\n", CudaLib, ErrMsg.c_str()); 135 return false; 136 } 137 138 for (size_t I = 0; I < dlwrap::size(); I++) { 139 const char *Sym = dlwrap::symbol(I); 140 141 auto It = TryFirst.find(Sym); 142 if (It != TryFirst.end()) { 143 const char *First = It->second; 144 void *P = DynlibHandle->getAddressOfSymbol(First); 145 if (P) { 146 DP("Implementing %s with dlsym(%s) -> %p\n", Sym, First, P); 147 *dlwrap::pointer(I) = P; 148 continue; 149 } 150 } 151 152 void *P = DynlibHandle->getAddressOfSymbol(Sym); 153 if (P == nullptr) { 154 DP("Unable to find '%s' in '%s'!\n", Sym, CudaLib); 155 return false; 156 } 157 DP("Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P); 158 159 *dlwrap::pointer(I) = P; 160 } 161 162 return true; 163 } 164 165 CUresult cuInit(unsigned X) { 166 // Note: Called exactly once from cuda rtl.cpp in a global constructor so 167 // does not need to handle being called repeatedly or concurrently 168 if (!checkForCUDA()) { 169 return CUDA_ERROR_INVALID_HANDLE; 170 } 171 return dlwrap_cuInit(X); 172 } 173