1 //===- Target.cpp - MLIR LLVM NVVM target compilation -----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This files defines NVVM target related functions including registration 10 // calls for the `#nvvm.target` compilation attribute. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "mlir/Target/LLVM/NVVM/Target.h" 15 16 #include "mlir/Dialect/GPU/IR/CompilationInterfaces.h" 17 #include "mlir/Dialect/GPU/IR/GPUDialect.h" 18 #include "mlir/Dialect/LLVMIR/NVVMDialect.h" 19 #include "mlir/IR/BuiltinAttributeInterfaces.h" 20 #include "mlir/IR/BuiltinDialect.h" 21 #include "mlir/IR/BuiltinTypes.h" 22 #include "mlir/IR/DialectResourceBlobManager.h" 23 #include "mlir/Target/LLVM/NVVM/Utils.h" 24 #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" 25 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" 26 #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h" 27 #include "mlir/Target/LLVMIR/Export.h" 28 29 #include "llvm/Config/llvm-config.h" 30 #include "llvm/Support/FileSystem.h" 31 #include "llvm/Support/FileUtilities.h" 32 #include "llvm/Support/FormatVariadic.h" 33 #include "llvm/Support/MemoryBuffer.h" 34 #include "llvm/Support/Path.h" 35 #include "llvm/Support/Process.h" 36 #include "llvm/Support/Program.h" 37 #include "llvm/Support/TargetSelect.h" 38 #include "llvm/Support/raw_ostream.h" 39 40 #include <cstdint> 41 #include <cstdlib> 42 43 using namespace mlir; 44 using namespace mlir::NVVM; 45 46 #ifndef __DEFAULT_CUDATOOLKIT_PATH__ 47 #define __DEFAULT_CUDATOOLKIT_PATH__ "" 48 #endif 49 50 extern "C" const char _mlir_embedded_libdevice[]; 51 extern "C" const unsigned _mlir_embedded_libdevice_size; 52 53 namespace { 54 // Implementation of the `TargetAttrInterface` model. 55 class NVVMTargetAttrImpl 56 : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> { 57 public: 58 std::optional<SmallVector<char, 0>> 59 serializeToObject(Attribute attribute, Operation *module, 60 const gpu::TargetOptions &options) const; 61 62 Attribute createObject(Attribute attribute, Operation *module, 63 const SmallVector<char, 0> &object, 64 const gpu::TargetOptions &options) const; 65 }; 66 } // namespace 67 68 // Register the NVVM dialect, the NVVM translation & the target interface. 69 void mlir::NVVM::registerNVVMTargetInterfaceExternalModels( 70 DialectRegistry ®istry) { 71 registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) { 72 NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx); 73 }); 74 } 75 76 void mlir::NVVM::registerNVVMTargetInterfaceExternalModels( 77 MLIRContext &context) { 78 DialectRegistry registry; 79 registerNVVMTargetInterfaceExternalModels(registry); 80 context.appendDialectRegistry(registry); 81 } 82 83 // Search for the CUDA toolkit path. 84 StringRef mlir::NVVM::getCUDAToolkitPath() { 85 if (const char *var = std::getenv("CUDA_ROOT")) 86 return var; 87 if (const char *var = std::getenv("CUDA_HOME")) 88 return var; 89 if (const char *var = std::getenv("CUDA_PATH")) 90 return var; 91 return __DEFAULT_CUDATOOLKIT_PATH__; 92 } 93 94 SerializeGPUModuleBase::SerializeGPUModuleBase( 95 Operation &module, NVVMTargetAttr target, 96 const gpu::TargetOptions &targetOptions) 97 : ModuleToObject(module, target.getTriple(), target.getChip(), 98 target.getFeatures(), target.getO(), 99 targetOptions.getInitialLlvmIRCallback(), 100 targetOptions.getLinkedLlvmIRCallback(), 101 targetOptions.getOptimizedLlvmIRCallback(), 102 targetOptions.getISACallback()), 103 target(target), toolkitPath(targetOptions.getToolkitPath()), 104 librariesToLink(targetOptions.getLibrariesToLink()) { 105 106 // If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath` 107 if (toolkitPath.empty()) 108 toolkitPath = getCUDAToolkitPath(); 109 110 // Append the files in the target attribute. 111 if (target.getLink()) 112 librariesToLink.append(target.getLink().begin(), target.getLink().end()); 113 114 // Append libdevice to the files to be loaded. 115 (void)appendStandardLibs(); 116 } 117 118 void SerializeGPUModuleBase::init() { 119 static llvm::once_flag initializeBackendOnce; 120 llvm::call_once(initializeBackendOnce, []() { 121 // If the `NVPTX` LLVM target was built, initialize it. 122 #if LLVM_HAS_NVPTX_TARGET 123 LLVMInitializeNVPTXTarget(); 124 LLVMInitializeNVPTXTargetInfo(); 125 LLVMInitializeNVPTXTargetMC(); 126 LLVMInitializeNVPTXAsmPrinter(); 127 #endif 128 }); 129 } 130 131 NVVMTargetAttr SerializeGPUModuleBase::getTarget() const { return target; } 132 133 StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; } 134 135 ArrayRef<Attribute> SerializeGPUModuleBase::getLibrariesToLink() const { 136 return librariesToLink; 137 } 138 139 // Try to append `libdevice` from a CUDA toolkit installation. 140 LogicalResult SerializeGPUModuleBase::appendStandardLibs() { 141 #if MLIR_NVVM_EMBED_LIBDEVICE 142 // If libdevice is embedded in the binary, we don't look it up on the 143 // filesystem. 144 MLIRContext *ctx = target.getContext(); 145 auto type = 146 RankedTensorType::get(ArrayRef<int64_t>{_mlir_embedded_libdevice_size}, 147 IntegerType::get(ctx, 8)); 148 auto resourceManager = DenseResourceElementsHandle::getManagerInterface(ctx); 149 150 // Lookup if we already loaded the resource, otherwise create it. 151 DialectResourceBlobManager::BlobEntry *blob = 152 resourceManager.getBlobManager().lookup("_mlir_embedded_libdevice"); 153 if (blob) { 154 librariesToLink.push_back(DenseResourceElementsAttr::get( 155 type, DenseResourceElementsHandle( 156 blob, ctx->getLoadedDialect<BuiltinDialect>()))); 157 return success(); 158 } 159 160 // Allocate a resource using one of the UnManagedResourceBlob method to wrap 161 // the embedded data. 162 auto unmanagedBlob = UnmanagedAsmResourceBlob::allocateInferAlign( 163 ArrayRef<char>{_mlir_embedded_libdevice, _mlir_embedded_libdevice_size}); 164 librariesToLink.push_back(DenseResourceElementsAttr::get( 165 type, resourceManager.insert("_mlir_embedded_libdevice", 166 std::move(unmanagedBlob)))); 167 #else 168 StringRef pathRef = getToolkitPath(); 169 if (!pathRef.empty()) { 170 SmallVector<char, 256> path; 171 path.insert(path.begin(), pathRef.begin(), pathRef.end()); 172 pathRef = StringRef(path.data(), path.size()); 173 if (!llvm::sys::fs::is_directory(pathRef)) { 174 getOperation().emitError() << "CUDA path: " << pathRef 175 << " does not exist or is not a directory.\n"; 176 return failure(); 177 } 178 llvm::sys::path::append(path, "nvvm", "libdevice", "libdevice.10.bc"); 179 pathRef = StringRef(path.data(), path.size()); 180 if (!llvm::sys::fs::is_regular_file(pathRef)) { 181 getOperation().emitError() << "LibDevice path: " << pathRef 182 << " does not exist or is not a file.\n"; 183 return failure(); 184 } 185 librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef)); 186 } 187 #endif 188 return success(); 189 } 190 191 std::optional<SmallVector<std::unique_ptr<llvm::Module>>> 192 SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) { 193 SmallVector<std::unique_ptr<llvm::Module>> bcFiles; 194 if (failed(loadBitcodeFilesFromList(module.getContext(), librariesToLink, 195 bcFiles, true))) 196 return std::nullopt; 197 return std::move(bcFiles); 198 } 199 200 namespace { 201 class NVPTXSerializer : public SerializeGPUModuleBase { 202 public: 203 NVPTXSerializer(Operation &module, NVVMTargetAttr target, 204 const gpu::TargetOptions &targetOptions); 205 206 /// Returns the GPU module op being serialized. 207 gpu::GPUModuleOp getOperation(); 208 209 /// Compiles PTX to cubin using `ptxas`. 210 std::optional<SmallVector<char, 0>> 211 compileToBinary(const std::string &ptxCode); 212 213 /// Compiles PTX to cubin using the `nvptxcompiler` library. 214 std::optional<SmallVector<char, 0>> 215 compileToBinaryNVPTX(const std::string &ptxCode); 216 217 /// Serializes the LLVM module to an object format, depending on the 218 /// compilation target selected in target options. 219 std::optional<SmallVector<char, 0>> 220 moduleToObject(llvm::Module &llvmModule) override; 221 222 private: 223 using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>; 224 225 /// Creates a temp file. 226 std::optional<TmpFile> createTemp(StringRef name, StringRef suffix); 227 228 /// Finds the `tool` path, where `tool` is the name of the binary to search, 229 /// i.e. `ptxas` or `fatbinary`. The search order is: 230 /// 1. The toolkit path in `targetOptions`. 231 /// 2. In the system PATH. 232 /// 3. The path from `getCUDAToolkitPath()`. 233 std::optional<std::string> findTool(StringRef tool); 234 235 /// Target options. 236 gpu::TargetOptions targetOptions; 237 }; 238 } // namespace 239 240 NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target, 241 const gpu::TargetOptions &targetOptions) 242 : SerializeGPUModuleBase(module, target, targetOptions), 243 targetOptions(targetOptions) {} 244 245 std::optional<NVPTXSerializer::TmpFile> 246 NVPTXSerializer::createTemp(StringRef name, StringRef suffix) { 247 llvm::SmallString<128> filename; 248 std::error_code ec = 249 llvm::sys::fs::createTemporaryFile(name, suffix, filename); 250 if (ec) { 251 getOperation().emitError() << "Couldn't create the temp file: `" << filename 252 << "`, error message: " << ec.message(); 253 return std::nullopt; 254 } 255 return TmpFile(filename, llvm::FileRemover(filename.c_str())); 256 } 257 258 gpu::GPUModuleOp NVPTXSerializer::getOperation() { 259 return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation()); 260 } 261 262 std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) { 263 // Find the `tool` path. 264 // 1. Check the toolkit path given in the command line. 265 StringRef pathRef = targetOptions.getToolkitPath(); 266 SmallVector<char, 256> path; 267 if (!pathRef.empty()) { 268 path.insert(path.begin(), pathRef.begin(), pathRef.end()); 269 llvm::sys::path::append(path, "bin", tool); 270 if (llvm::sys::fs::can_execute(path)) 271 return StringRef(path.data(), path.size()).str(); 272 } 273 274 // 2. Check PATH. 275 if (std::optional<std::string> toolPath = 276 llvm::sys::Process::FindInEnvPath("PATH", tool)) 277 return *toolPath; 278 279 // 3. Check `getCUDAToolkitPath()`. 280 pathRef = getCUDAToolkitPath(); 281 path.clear(); 282 if (!pathRef.empty()) { 283 path.insert(path.begin(), pathRef.begin(), pathRef.end()); 284 llvm::sys::path::append(path, "bin", tool); 285 if (llvm::sys::fs::can_execute(path)) 286 return StringRef(path.data(), path.size()).str(); 287 } 288 getOperation().emitError() 289 << "Couldn't find the `" << tool 290 << "` binary. Please specify the toolkit " 291 "path, add the compiler to $PATH, or set one of the environment " 292 "variables in `NVVM::getCUDAToolkitPath()`."; 293 return std::nullopt; 294 } 295 296 // TODO: clean this method & have a generic tool driver or never emit binaries 297 // with this mechanism and let another stage take care of it. 298 std::optional<SmallVector<char, 0>> 299 NVPTXSerializer::compileToBinary(const std::string &ptxCode) { 300 // Determine if the serializer should create a fatbinary with the PTX embeded 301 // or a simple CUBIN binary. 302 const bool createFatbin = 303 targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin; 304 305 // Find the `ptxas` & `fatbinary` tools. 306 std::optional<std::string> ptxasCompiler = findTool("ptxas"); 307 if (!ptxasCompiler) 308 return std::nullopt; 309 std::optional<std::string> fatbinaryTool; 310 if (createFatbin) { 311 fatbinaryTool = findTool("fatbinary"); 312 if (!fatbinaryTool) 313 return std::nullopt; 314 } 315 Location loc = getOperation().getLoc(); 316 317 // Base name for all temp files: mlir-<module name>-<target triple>-<chip>. 318 std::string basename = 319 llvm::formatv("mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(), 320 getTarget().getTriple(), getTarget().getChip()); 321 322 // Create temp files: 323 std::optional<TmpFile> ptxFile = createTemp(basename, "ptx"); 324 if (!ptxFile) 325 return std::nullopt; 326 std::optional<TmpFile> logFile = createTemp(basename, "log"); 327 if (!logFile) 328 return std::nullopt; 329 std::optional<TmpFile> binaryFile = createTemp(basename, "bin"); 330 if (!binaryFile) 331 return std::nullopt; 332 TmpFile cubinFile; 333 if (createFatbin) { 334 Twine cubinFilename = ptxFile->first + ".cubin"; 335 cubinFile = TmpFile(cubinFilename.str(), llvm::FileRemover(cubinFilename)); 336 } else { 337 cubinFile.first = binaryFile->first; 338 } 339 340 std::error_code ec; 341 // Dump the PTX to a temp file. 342 { 343 llvm::raw_fd_ostream ptxStream(ptxFile->first, ec); 344 if (ec) { 345 emitError(loc) << "Couldn't open the file: `" << ptxFile->first 346 << "`, error message: " << ec.message(); 347 return std::nullopt; 348 } 349 ptxStream << ptxCode; 350 if (ptxStream.has_error()) { 351 emitError(loc) << "An error occurred while writing the PTX to: `" 352 << ptxFile->first << "`."; 353 return std::nullopt; 354 } 355 ptxStream.flush(); 356 } 357 358 // Command redirects. 359 std::optional<StringRef> redirects[] = { 360 std::nullopt, 361 logFile->first, 362 logFile->first, 363 }; 364 365 // Get any extra args passed in `targetOptions`. 366 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts = 367 targetOptions.tokenizeCmdOptions(); 368 369 // Create ptxas args. 370 std::string optLevel = std::to_string(this->optLevel); 371 SmallVector<StringRef, 12> ptxasArgs( 372 {StringRef("ptxas"), StringRef("-arch"), getTarget().getChip(), 373 StringRef(ptxFile->first), StringRef("-o"), StringRef(cubinFile.first), 374 "--opt-level", optLevel}); 375 376 bool useFatbin32 = false; 377 for (const auto *cArg : cmdOpts.second) { 378 // All `cmdOpts` are for `ptxas` except `-32` which passes `-32` to 379 // `fatbinary`, indicating a 32-bit target. By default a 64-bit target is 380 // assumed. 381 if (StringRef arg(cArg); arg != "-32") 382 ptxasArgs.push_back(arg); 383 else 384 useFatbin32 = true; 385 } 386 387 // Create the `fatbinary` args. 388 StringRef chip = getTarget().getChip(); 389 // Remove the arch prefix to obtain the compute capability. 390 chip.consume_front("sm_"), chip.consume_front("compute_"); 391 // Embed the cubin object. 392 std::string cubinArg = 393 llvm::formatv("--image3=kind=elf,sm={0},file={1}", chip, cubinFile.first) 394 .str(); 395 // Embed the PTX file so the driver can JIT if needed. 396 std::string ptxArg = 397 llvm::formatv("--image3=kind=ptx,sm={0},file={1}", chip, ptxFile->first) 398 .str(); 399 SmallVector<StringRef, 6> fatbinArgs({StringRef("fatbinary"), 400 useFatbin32 ? "-32" : "-64", cubinArg, 401 ptxArg, "--create", binaryFile->first}); 402 403 // Dump tool invocation commands. 404 #define DEBUG_TYPE "serialize-to-binary" 405 LLVM_DEBUG({ 406 llvm::dbgs() << "Tool invocation for module: " 407 << getOperation().getNameAttr() << "\n"; 408 llvm::interleave(ptxasArgs, llvm::dbgs(), " "); 409 llvm::dbgs() << "\n"; 410 if (createFatbin) { 411 llvm::interleave(fatbinArgs, llvm::dbgs(), " "); 412 llvm::dbgs() << "\n"; 413 } 414 }); 415 #undef DEBUG_TYPE 416 417 // Helper function for printing tool error logs. 418 std::string message; 419 auto emitLogError = 420 [&](StringRef toolName) -> std::optional<SmallVector<char, 0>> { 421 if (message.empty()) { 422 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr = 423 llvm::MemoryBuffer::getFile(logFile->first); 424 if (toolStderr) 425 emitError(loc) << toolName << " invocation failed. Log:\n" 426 << toolStderr->get()->getBuffer(); 427 else 428 emitError(loc) << toolName << " invocation failed."; 429 return std::nullopt; 430 } 431 emitError(loc) << toolName 432 << " invocation failed, error message: " << message; 433 return std::nullopt; 434 }; 435 436 // Invoke PTXAS. 437 if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs, 438 /*Env=*/std::nullopt, 439 /*Redirects=*/redirects, 440 /*SecondsToWait=*/0, 441 /*MemoryLimit=*/0, 442 /*ErrMsg=*/&message)) 443 return emitLogError("`ptxas`"); 444 #define DEBUG_TYPE "dump-sass" 445 LLVM_DEBUG({ 446 std::optional<std::string> nvdisasm = findTool("nvdisasm"); 447 SmallVector<StringRef> nvdisasmArgs( 448 {StringRef("nvdisasm"), StringRef(cubinFile.first)}); 449 if (llvm::sys::ExecuteAndWait(nvdisasm.value(), nvdisasmArgs, 450 /*Env=*/std::nullopt, 451 /*Redirects=*/redirects, 452 /*SecondsToWait=*/0, 453 /*MemoryLimit=*/0, 454 /*ErrMsg=*/&message)) 455 return emitLogError("`nvdisasm`"); 456 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer = 457 llvm::MemoryBuffer::getFile(logFile->first); 458 if (logBuffer && !(*logBuffer)->getBuffer().empty()) { 459 llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n"; 460 llvm::dbgs().flush(); 461 } 462 }); 463 #undef DEBUG_TYPE 464 465 // Invoke `fatbin`. 466 message.clear(); 467 if (createFatbin && llvm::sys::ExecuteAndWait(*fatbinaryTool, fatbinArgs, 468 /*Env=*/std::nullopt, 469 /*Redirects=*/redirects, 470 /*SecondsToWait=*/0, 471 /*MemoryLimit=*/0, 472 /*ErrMsg=*/&message)) 473 return emitLogError("`fatbinary`"); 474 475 // Dump the output of the tools, helpful if the verbose flag was passed. 476 #define DEBUG_TYPE "serialize-to-binary" 477 LLVM_DEBUG({ 478 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer = 479 llvm::MemoryBuffer::getFile(logFile->first); 480 if (logBuffer && !(*logBuffer)->getBuffer().empty()) { 481 llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n"; 482 llvm::dbgs().flush(); 483 } 484 }); 485 #undef DEBUG_TYPE 486 487 // Read the fatbin. 488 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer = 489 llvm::MemoryBuffer::getFile(binaryFile->first); 490 if (!binaryBuffer) { 491 emitError(loc) << "Couldn't open the file: `" << binaryFile->first 492 << "`, error message: " << binaryBuffer.getError().message(); 493 return std::nullopt; 494 } 495 StringRef fatbin = (*binaryBuffer)->getBuffer(); 496 return SmallVector<char, 0>(fatbin.begin(), fatbin.end()); 497 } 498 499 #if MLIR_ENABLE_NVPTXCOMPILER 500 #include "nvPTXCompiler.h" 501 502 #define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \ 503 do { \ 504 if (auto status = (expr)) { \ 505 emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \ 506 << status; \ 507 return std::nullopt; \ 508 } \ 509 } while (false) 510 511 #include "nvFatbin.h" 512 513 #define RETURN_ON_NVFATBIN_ERROR(expr) \ 514 do { \ 515 auto result = (expr); \ 516 if (result != nvFatbinResult::NVFATBIN_SUCCESS) { \ 517 emitError(loc) << llvm::Twine(#expr).concat(" failed with error: ") \ 518 << nvFatbinGetErrorString(result); \ 519 return std::nullopt; \ 520 } \ 521 } while (false) 522 523 std::optional<SmallVector<char, 0>> 524 NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) { 525 Location loc = getOperation().getLoc(); 526 nvPTXCompilerHandle compiler = nullptr; 527 nvPTXCompileResult status; 528 size_t logSize; 529 530 // Create the options. 531 std::string optLevel = std::to_string(this->optLevel); 532 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts = 533 targetOptions.tokenizeCmdOptions(); 534 cmdOpts.second.append( 535 {"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()}); 536 537 // Create the compiler handle. 538 RETURN_ON_NVPTXCOMPILER_ERROR( 539 nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str())); 540 541 // Try to compile the binary. 542 status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(), 543 cmdOpts.second.data()); 544 545 // Check if compilation failed. 546 if (status != NVPTXCOMPILE_SUCCESS) { 547 RETURN_ON_NVPTXCOMPILER_ERROR( 548 nvPTXCompilerGetErrorLogSize(compiler, &logSize)); 549 if (logSize != 0) { 550 SmallVector<char> log(logSize + 1, 0); 551 RETURN_ON_NVPTXCOMPILER_ERROR( 552 nvPTXCompilerGetErrorLog(compiler, log.data())); 553 emitError(loc) << "NVPTX compiler invocation failed, error log: " 554 << log.data(); 555 } else 556 emitError(loc) << "NVPTX compiler invocation failed with error code: " 557 << status; 558 return std::nullopt; 559 } 560 561 // Retrieve the binary. 562 size_t elfSize; 563 RETURN_ON_NVPTXCOMPILER_ERROR( 564 nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize)); 565 SmallVector<char, 0> binary(elfSize, 0); 566 RETURN_ON_NVPTXCOMPILER_ERROR( 567 nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data())); 568 569 // Dump the log of the compiler, helpful if the verbose flag was passed. 570 #define DEBUG_TYPE "serialize-to-binary" 571 LLVM_DEBUG({ 572 RETURN_ON_NVPTXCOMPILER_ERROR( 573 nvPTXCompilerGetInfoLogSize(compiler, &logSize)); 574 if (logSize != 0) { 575 SmallVector<char> log(logSize + 1, 0); 576 RETURN_ON_NVPTXCOMPILER_ERROR( 577 nvPTXCompilerGetInfoLog(compiler, log.data())); 578 llvm::dbgs() << "NVPTX compiler invocation for module: " 579 << getOperation().getNameAttr() << "\n"; 580 llvm::dbgs() << "Arguments: "; 581 llvm::interleave(cmdOpts.second, llvm::dbgs(), " "); 582 llvm::dbgs() << "\nOutput\n" << log.data() << "\n"; 583 llvm::dbgs().flush(); 584 } 585 }); 586 #undef DEBUG_TYPE 587 RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler)); 588 589 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin) { 590 bool useFatbin32 = llvm::any_of(cmdOpts.second, [](const char *option) { 591 return llvm::StringRef(option) == "-32"; 592 }); 593 594 const char *cubinOpts[1] = {useFatbin32 ? "-32" : "-64"}; 595 nvFatbinHandle handle; 596 597 auto chip = getTarget().getChip(); 598 chip.consume_front("sm_"); 599 600 RETURN_ON_NVFATBIN_ERROR(nvFatbinCreate(&handle, cubinOpts, 1)); 601 RETURN_ON_NVFATBIN_ERROR(nvFatbinAddCubin( 602 handle, binary.data(), binary.size(), chip.data(), nullptr)); 603 RETURN_ON_NVFATBIN_ERROR(nvFatbinAddPTX( 604 handle, ptxCode.data(), ptxCode.size(), chip.data(), nullptr, nullptr)); 605 606 size_t fatbinSize; 607 RETURN_ON_NVFATBIN_ERROR(nvFatbinSize(handle, &fatbinSize)); 608 SmallVector<char, 0> fatbin(fatbinSize, 0); 609 RETURN_ON_NVFATBIN_ERROR(nvFatbinGet(handle, (void *)fatbin.data())); 610 RETURN_ON_NVFATBIN_ERROR(nvFatbinDestroy(&handle)); 611 return fatbin; 612 } 613 614 return binary; 615 } 616 #endif // MLIR_ENABLE_NVPTXCOMPILER 617 618 std::optional<SmallVector<char, 0>> 619 NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) { 620 // Return LLVM IR if the compilation target is `offload`. 621 #define DEBUG_TYPE "serialize-to-llvm" 622 LLVM_DEBUG({ 623 llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr() 624 << "\n"; 625 llvm::dbgs() << llvmModule << "\n"; 626 llvm::dbgs().flush(); 627 }); 628 #undef DEBUG_TYPE 629 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload) 630 return SerializeGPUModuleBase::moduleToObject(llvmModule); 631 632 #if !LLVM_HAS_NVPTX_TARGET 633 getOperation()->emitError( 634 "The `NVPTX` target was not built. Please enable it when building LLVM."); 635 return std::nullopt; 636 #endif // LLVM_HAS_NVPTX_TARGET 637 638 // Emit PTX code. 639 std::optional<llvm::TargetMachine *> targetMachine = 640 getOrCreateTargetMachine(); 641 if (!targetMachine) { 642 getOperation().emitError() << "Target Machine unavailable for triple " 643 << triple << ", can't optimize with LLVM\n"; 644 return std::nullopt; 645 } 646 std::optional<std::string> serializedISA = 647 translateToISA(llvmModule, **targetMachine); 648 if (!serializedISA) { 649 getOperation().emitError() << "Failed translating the module to ISA."; 650 return std::nullopt; 651 } 652 if (isaCallback) 653 isaCallback(serializedISA.value()); 654 655 #define DEBUG_TYPE "serialize-to-isa" 656 LLVM_DEBUG({ 657 llvm::dbgs() << "PTX for module: " << getOperation().getNameAttr() << "\n"; 658 llvm::dbgs() << *serializedISA << "\n"; 659 llvm::dbgs().flush(); 660 }); 661 #undef DEBUG_TYPE 662 663 // Return PTX if the compilation target is `assembly`. 664 if (targetOptions.getCompilationTarget() == 665 gpu::CompilationTarget::Assembly) { 666 // Make sure to include the null terminator. 667 StringRef bin(serializedISA->c_str(), serializedISA->size() + 1); 668 return SmallVector<char, 0>(bin.begin(), bin.end()); 669 } 670 671 // Compile to binary. 672 #if MLIR_ENABLE_NVPTXCOMPILER 673 return compileToBinaryNVPTX(*serializedISA); 674 #else 675 return compileToBinary(*serializedISA); 676 #endif // MLIR_ENABLE_NVPTXCOMPILER 677 } 678 679 std::optional<SmallVector<char, 0>> 680 NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module, 681 const gpu::TargetOptions &options) const { 682 assert(module && "The module must be non null."); 683 if (!module) 684 return std::nullopt; 685 if (!mlir::isa<gpu::GPUModuleOp>(module)) { 686 module->emitError("Module must be a GPU module."); 687 return std::nullopt; 688 } 689 NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options); 690 serializer.init(); 691 return serializer.run(); 692 } 693 694 Attribute 695 NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module, 696 const SmallVector<char, 0> &object, 697 const gpu::TargetOptions &options) const { 698 auto target = cast<NVVMTargetAttr>(attribute); 699 gpu::CompilationTarget format = options.getCompilationTarget(); 700 DictionaryAttr objectProps; 701 Builder builder(attribute.getContext()); 702 SmallVector<NamedAttribute, 2> properties; 703 if (format == gpu::CompilationTarget::Assembly) 704 properties.push_back( 705 builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO()))); 706 707 if (StringRef section = options.getELFSection(); !section.empty()) 708 properties.push_back(builder.getNamedAttr(gpu::elfSectionName, 709 builder.getStringAttr(section))); 710 711 if (!properties.empty()) 712 objectProps = builder.getDictionaryAttr(properties); 713 714 return builder.getAttr<gpu::ObjectAttr>( 715 attribute, format, 716 builder.getStringAttr(StringRef(object.data(), object.size())), 717 objectProps, /*kernels=*/nullptr); 718 } 719