1 //=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // \file 9 // 10 // The pass bind printfs to a kernel arg pointer that will be bound to a buffer 11 // later by the runtime. 12 // 13 // This pass traverses the functions in the module and converts 14 // each call to printf to a sequence of operations that 15 // store the following into the printf buffer: 16 // - format string (passed as a module's metadata unique ID) 17 // - bitwise copies of printf arguments 18 // The backend passes will need to store metadata in the kernel 19 //===----------------------------------------------------------------------===// 20 21 #include "AMDGPU.h" 22 #include "llvm/ADT/SmallString.h" 23 #include "llvm/ADT/StringExtras.h" 24 #include "llvm/ADT/Triple.h" 25 #include "llvm/Analysis/InstructionSimplify.h" 26 #include "llvm/Analysis/TargetLibraryInfo.h" 27 #include "llvm/CodeGen/Passes.h" 28 #include "llvm/IR/Constants.h" 29 #include "llvm/IR/DataLayout.h" 30 #include "llvm/IR/Dominators.h" 31 #include "llvm/IR/GlobalVariable.h" 32 #include "llvm/IR/IRBuilder.h" 33 #include "llvm/IR/InstVisitor.h" 34 #include "llvm/IR/Instructions.h" 35 #include "llvm/IR/Module.h" 36 #include "llvm/IR/Type.h" 37 #include "llvm/Support/CommandLine.h" 38 #include "llvm/Support/Debug.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 41 using namespace llvm; 42 43 #define DEBUG_TYPE "printfToRuntime" 44 #define DWORD_ALIGN 4 45 46 namespace { 47 class LLVM_LIBRARY_VISIBILITY AMDGPUPrintfRuntimeBinding final 48 : public ModulePass, 49 public InstVisitor<AMDGPUPrintfRuntimeBinding> { 50 51 public: 52 static char ID; 53 54 explicit AMDGPUPrintfRuntimeBinding(); 55 56 void visitCallSite(CallSite CS) { 57 Function *F = CS.getCalledFunction(); 58 if (F && F->hasName() && F->getName() == "printf") 59 Printfs.push_back(CS.getInstruction()); 60 } 61 62 private: 63 bool runOnModule(Module &M) override; 64 void getConversionSpecifiers(SmallVectorImpl<char> &OpConvSpecifiers, 65 StringRef fmt, size_t num_ops) const; 66 67 bool shouldPrintAsStr(char Specifier, Type *OpType) const; 68 bool 69 lowerPrintfForGpu(Module &M, 70 function_ref<const TargetLibraryInfo &(Function &)> GetTLI); 71 72 void getAnalysisUsage(AnalysisUsage &AU) const override { 73 AU.addRequired<TargetLibraryInfoWrapperPass>(); 74 AU.addRequired<DominatorTreeWrapperPass>(); 75 } 76 77 Value *simplify(Instruction *I, const TargetLibraryInfo *TLI) { 78 return SimplifyInstruction(I, {*TD, TLI, DT}); 79 } 80 81 const DataLayout *TD; 82 const DominatorTree *DT; 83 SmallVector<Value *, 32> Printfs; 84 }; 85 } // namespace 86 87 char AMDGPUPrintfRuntimeBinding::ID = 0; 88 89 INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding, 90 "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering", 91 false, false) 92 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 93 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 94 INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding", 95 "AMDGPU Printf lowering", false, false) 96 97 char &llvm::AMDGPUPrintfRuntimeBindingID = AMDGPUPrintfRuntimeBinding::ID; 98 99 namespace llvm { 100 ModulePass *createAMDGPUPrintfRuntimeBinding() { 101 return new AMDGPUPrintfRuntimeBinding(); 102 } 103 } // namespace llvm 104 105 AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() 106 : ModulePass(ID), TD(nullptr), DT(nullptr) { 107 initializeAMDGPUPrintfRuntimeBindingPass(*PassRegistry::getPassRegistry()); 108 } 109 110 void AMDGPUPrintfRuntimeBinding::getConversionSpecifiers( 111 SmallVectorImpl<char> &OpConvSpecifiers, StringRef Fmt, 112 size_t NumOps) const { 113 // not all format characters are collected. 114 // At this time the format characters of interest 115 // are %p and %s, which use to know if we 116 // are either storing a literal string or a 117 // pointer to the printf buffer. 118 static const char ConvSpecifiers[] = "cdieEfgGaosuxXp"; 119 size_t CurFmtSpecifierIdx = 0; 120 size_t PrevFmtSpecifierIdx = 0; 121 122 while ((CurFmtSpecifierIdx = Fmt.find_first_of( 123 ConvSpecifiers, CurFmtSpecifierIdx)) != StringRef::npos) { 124 bool ArgDump = false; 125 StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx, 126 CurFmtSpecifierIdx - PrevFmtSpecifierIdx); 127 size_t pTag = CurFmt.find_last_of("%"); 128 if (pTag != StringRef::npos) { 129 ArgDump = true; 130 while (pTag && CurFmt[--pTag] == '%') { 131 ArgDump = !ArgDump; 132 } 133 } 134 135 if (ArgDump) 136 OpConvSpecifiers.push_back(Fmt[CurFmtSpecifierIdx]); 137 138 PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx; 139 } 140 } 141 142 bool AMDGPUPrintfRuntimeBinding::shouldPrintAsStr(char Specifier, 143 Type *OpType) const { 144 if (Specifier != 's') 145 return false; 146 const PointerType *PT = dyn_cast<PointerType>(OpType); 147 if (!PT || PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) 148 return false; 149 Type *ElemType = PT->getContainedType(0); 150 if (ElemType->getTypeID() != Type::IntegerTyID) 151 return false; 152 IntegerType *ElemIType = cast<IntegerType>(ElemType); 153 return ElemIType->getBitWidth() == 8; 154 } 155 156 bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu( 157 Module &M, function_ref<const TargetLibraryInfo &(Function &)> GetTLI) { 158 LLVMContext &Ctx = M.getContext(); 159 IRBuilder<> Builder(Ctx); 160 Type *I32Ty = Type::getInt32Ty(Ctx); 161 unsigned UniqID = 0; 162 // NB: This is important for this string size to be divizable by 4 163 const char NonLiteralStr[4] = "???"; 164 165 for (auto P : Printfs) { 166 CallInst *CI = dyn_cast<CallInst>(P); 167 168 unsigned NumOps = CI->getNumArgOperands(); 169 170 SmallString<16> OpConvSpecifiers; 171 Value *Op = CI->getArgOperand(0); 172 173 if (auto LI = dyn_cast<LoadInst>(Op)) { 174 Op = LI->getPointerOperand(); 175 for (auto Use : Op->users()) { 176 if (auto SI = dyn_cast<StoreInst>(Use)) { 177 Op = SI->getValueOperand(); 178 break; 179 } 180 } 181 } 182 183 if (auto I = dyn_cast<Instruction>(Op)) { 184 Value *Op_simplified = simplify(I, &GetTLI(*I->getFunction())); 185 if (Op_simplified) 186 Op = Op_simplified; 187 } 188 189 ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Op); 190 191 if (ConstExpr) { 192 GlobalVariable *GVar = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); 193 194 StringRef Str("unknown"); 195 if (GVar && GVar->hasInitializer()) { 196 auto Init = GVar->getInitializer(); 197 if (auto CA = dyn_cast<ConstantDataArray>(Init)) { 198 if (CA->isString()) 199 Str = CA->getAsCString(); 200 } else if (isa<ConstantAggregateZero>(Init)) { 201 Str = ""; 202 } 203 // 204 // we need this call to ascertain 205 // that we are printing a string 206 // or a pointer. It takes out the 207 // specifiers and fills up the first 208 // arg 209 getConversionSpecifiers(OpConvSpecifiers, Str, NumOps - 1); 210 } 211 // Add metadata for the string 212 std::string AStreamHolder; 213 raw_string_ostream Sizes(AStreamHolder); 214 int Sum = DWORD_ALIGN; 215 Sizes << CI->getNumArgOperands() - 1; 216 Sizes << ':'; 217 for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() && 218 ArgCount <= OpConvSpecifiers.size(); 219 ArgCount++) { 220 Value *Arg = CI->getArgOperand(ArgCount); 221 Type *ArgType = Arg->getType(); 222 unsigned ArgSize = TD->getTypeAllocSizeInBits(ArgType); 223 ArgSize = ArgSize / 8; 224 // 225 // ArgSize by design should be a multiple of DWORD_ALIGN, 226 // expand the arguments that do not follow this rule. 227 // 228 if (ArgSize % DWORD_ALIGN != 0) { 229 llvm::Type *ResType = llvm::Type::getInt32Ty(Ctx); 230 VectorType *LLVMVecType = llvm::dyn_cast<llvm::VectorType>(ArgType); 231 int NumElem = LLVMVecType ? LLVMVecType->getNumElements() : 1; 232 if (LLVMVecType && NumElem > 1) 233 ResType = llvm::VectorType::get(ResType, NumElem); 234 Builder.SetInsertPoint(CI); 235 Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 236 if (OpConvSpecifiers[ArgCount - 1] == 'x' || 237 OpConvSpecifiers[ArgCount - 1] == 'X' || 238 OpConvSpecifiers[ArgCount - 1] == 'u' || 239 OpConvSpecifiers[ArgCount - 1] == 'o') 240 Arg = Builder.CreateZExt(Arg, ResType); 241 else 242 Arg = Builder.CreateSExt(Arg, ResType); 243 ArgType = Arg->getType(); 244 ArgSize = TD->getTypeAllocSizeInBits(ArgType); 245 ArgSize = ArgSize / 8; 246 CI->setOperand(ArgCount, Arg); 247 } 248 if (OpConvSpecifiers[ArgCount - 1] == 'f') { 249 ConstantFP *FpCons = dyn_cast<ConstantFP>(Arg); 250 if (FpCons) 251 ArgSize = 4; 252 else { 253 FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg); 254 if (FpExt && FpExt->getType()->isDoubleTy() && 255 FpExt->getOperand(0)->getType()->isFloatTy()) 256 ArgSize = 4; 257 } 258 } 259 if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { 260 if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Arg)) { 261 GlobalVariable *GV = 262 dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); 263 if (GV && GV->hasInitializer()) { 264 Constant *Init = GV->getInitializer(); 265 ConstantDataArray *CA = dyn_cast<ConstantDataArray>(Init); 266 if (Init->isZeroValue() || CA->isString()) { 267 size_t SizeStr = Init->isZeroValue() 268 ? 1 269 : (strlen(CA->getAsCString().data()) + 1); 270 size_t Rem = SizeStr % DWORD_ALIGN; 271 size_t NSizeStr = 0; 272 LLVM_DEBUG(dbgs() << "Printf string original size = " << SizeStr 273 << '\n'); 274 if (Rem) { 275 NSizeStr = SizeStr + (DWORD_ALIGN - Rem); 276 } else { 277 NSizeStr = SizeStr; 278 } 279 ArgSize = NSizeStr; 280 } 281 } else { 282 ArgSize = sizeof(NonLiteralStr); 283 } 284 } else { 285 ArgSize = sizeof(NonLiteralStr); 286 } 287 } 288 LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSize 289 << " for type: " << *ArgType << '\n'); 290 Sizes << ArgSize << ':'; 291 Sum += ArgSize; 292 } 293 LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str() 294 << '\n'); 295 for (size_t I = 0; I < Str.size(); ++I) { 296 // Rest of the C escape sequences (e.g. \') are handled correctly 297 // by the MDParser 298 switch (Str[I]) { 299 case '\a': 300 Sizes << "\\a"; 301 break; 302 case '\b': 303 Sizes << "\\b"; 304 break; 305 case '\f': 306 Sizes << "\\f"; 307 break; 308 case '\n': 309 Sizes << "\\n"; 310 break; 311 case '\r': 312 Sizes << "\\r"; 313 break; 314 case '\v': 315 Sizes << "\\v"; 316 break; 317 case ':': 318 // ':' cannot be scanned by Flex, as it is defined as a delimiter 319 // Replace it with it's octal representation \72 320 Sizes << "\\72"; 321 break; 322 default: 323 Sizes << Str[I]; 324 break; 325 } 326 } 327 328 // Insert the printf_alloc call 329 Builder.SetInsertPoint(CI); 330 Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 331 332 AttributeList Attr = AttributeList::get(Ctx, AttributeList::FunctionIndex, 333 Attribute::NoUnwind); 334 335 Type *SizetTy = Type::getInt32Ty(Ctx); 336 337 Type *Tys_alloc[1] = {SizetTy}; 338 Type *I8Ptr = PointerType::get(Type::getInt8Ty(Ctx), 1); 339 FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false); 340 FunctionCallee PrintfAllocFn = 341 M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr); 342 343 LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n'); 344 std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str().c_str(); 345 MDString *fmtStrArray = MDString::get(Ctx, fmtstr); 346 347 // Instead of creating global variables, the 348 // printf format strings are extracted 349 // and passed as metadata. This avoids 350 // polluting llvm's symbol tables in this module. 351 // Metadata is going to be extracted 352 // by the backend passes and inserted 353 // into the OpenCL binary as appropriate. 354 StringRef amd("llvm.printf.fmts"); 355 NamedMDNode *metaD = M.getOrInsertNamedMetadata(amd); 356 MDNode *myMD = MDNode::get(Ctx, fmtStrArray); 357 metaD->addOperand(myMD); 358 Value *sumC = ConstantInt::get(SizetTy, Sum, false); 359 SmallVector<Value *, 1> alloc_args; 360 alloc_args.push_back(sumC); 361 CallInst *pcall = 362 CallInst::Create(PrintfAllocFn, alloc_args, "printf_alloc_fn", CI); 363 364 // 365 // Insert code to split basicblock with a 366 // piece of hammock code. 367 // basicblock splits after buffer overflow check 368 // 369 ConstantPointerNull *zeroIntPtr = 370 ConstantPointerNull::get(PointerType::get(Type::getInt8Ty(Ctx), 1)); 371 ICmpInst *cmp = 372 dyn_cast<ICmpInst>(Builder.CreateICmpNE(pcall, zeroIntPtr, "")); 373 if (!CI->use_empty()) { 374 Value *result = 375 Builder.CreateSExt(Builder.CreateNot(cmp), I32Ty, "printf_res"); 376 CI->replaceAllUsesWith(result); 377 } 378 SplitBlock(CI->getParent(), cmp); 379 Instruction *Brnch = 380 SplitBlockAndInsertIfThen(cmp, cmp->getNextNode(), false); 381 382 Builder.SetInsertPoint(Brnch); 383 384 // store unique printf id in the buffer 385 // 386 SmallVector<Value *, 1> ZeroIdxList; 387 ConstantInt *zeroInt = 388 ConstantInt::get(Ctx, APInt(32, StringRef("0"), 10)); 389 ZeroIdxList.push_back(zeroInt); 390 391 GetElementPtrInst *BufferIdx = 392 dyn_cast<GetElementPtrInst>(GetElementPtrInst::Create( 393 nullptr, pcall, ZeroIdxList, "PrintBuffID", Brnch)); 394 395 Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS); 396 Value *id_gep_cast = 397 new BitCastInst(BufferIdx, idPointer, "PrintBuffIdCast", Brnch); 398 399 StoreInst *stbuff = 400 new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast); 401 stbuff->insertBefore(Brnch); // to Remove unused variable warning 402 403 SmallVector<Value *, 2> FourthIdxList; 404 ConstantInt *fourInt = 405 ConstantInt::get(Ctx, APInt(32, StringRef("4"), 10)); 406 407 FourthIdxList.push_back(fourInt); // 1st 4 bytes hold the printf_id 408 // the following GEP is the buffer pointer 409 BufferIdx = cast<GetElementPtrInst>(GetElementPtrInst::Create( 410 nullptr, pcall, FourthIdxList, "PrintBuffGep", Brnch)); 411 412 Type *Int32Ty = Type::getInt32Ty(Ctx); 413 Type *Int64Ty = Type::getInt64Ty(Ctx); 414 for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() && 415 ArgCount <= OpConvSpecifiers.size(); 416 ArgCount++) { 417 Value *Arg = CI->getArgOperand(ArgCount); 418 Type *ArgType = Arg->getType(); 419 SmallVector<Value *, 32> WhatToStore; 420 if (ArgType->isFPOrFPVectorTy() && 421 (ArgType->getTypeID() != Type::VectorTyID)) { 422 Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty; 423 if (OpConvSpecifiers[ArgCount - 1] == 'f') { 424 ConstantFP *fpCons = dyn_cast<ConstantFP>(Arg); 425 if (fpCons) { 426 APFloat Val(fpCons->getValueAPF()); 427 bool Lost = false; 428 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, 429 &Lost); 430 Arg = ConstantFP::get(Ctx, Val); 431 IType = Int32Ty; 432 } else { 433 FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg); 434 if (FpExt && FpExt->getType()->isDoubleTy() && 435 FpExt->getOperand(0)->getType()->isFloatTy()) { 436 Arg = FpExt->getOperand(0); 437 IType = Int32Ty; 438 } 439 } 440 } 441 Arg = new BitCastInst(Arg, IType, "PrintArgFP", Brnch); 442 WhatToStore.push_back(Arg); 443 } else if (ArgType->getTypeID() == Type::PointerTyID) { 444 if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { 445 const char *S = NonLiteralStr; 446 if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Arg)) { 447 GlobalVariable *GV = 448 dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); 449 if (GV && GV->hasInitializer()) { 450 Constant *Init = GV->getInitializer(); 451 ConstantDataArray *CA = dyn_cast<ConstantDataArray>(Init); 452 if (Init->isZeroValue() || CA->isString()) { 453 S = Init->isZeroValue() ? "" : CA->getAsCString().data(); 454 } 455 } 456 } 457 size_t SizeStr = strlen(S) + 1; 458 size_t Rem = SizeStr % DWORD_ALIGN; 459 size_t NSizeStr = 0; 460 if (Rem) { 461 NSizeStr = SizeStr + (DWORD_ALIGN - Rem); 462 } else { 463 NSizeStr = SizeStr; 464 } 465 if (S[0]) { 466 char *MyNewStr = new char[NSizeStr](); 467 strcpy(MyNewStr, S); 468 int NumInts = NSizeStr / 4; 469 int CharC = 0; 470 while (NumInts) { 471 int ANum = *(int *)(MyNewStr + CharC); 472 CharC += 4; 473 NumInts--; 474 Value *ANumV = ConstantInt::get(Int32Ty, ANum, false); 475 WhatToStore.push_back(ANumV); 476 } 477 delete[] MyNewStr; 478 } else { 479 // Empty string, give a hint to RT it is no NULL 480 Value *ANumV = ConstantInt::get(Int32Ty, 0xFFFFFF00, false); 481 WhatToStore.push_back(ANumV); 482 } 483 } else { 484 uint64_t Size = TD->getTypeAllocSizeInBits(ArgType); 485 assert((Size == 32 || Size == 64) && "unsupported size"); 486 Type *DstType = (Size == 32) ? Int32Ty : Int64Ty; 487 Arg = new PtrToIntInst(Arg, DstType, "PrintArgPtr", Brnch); 488 WhatToStore.push_back(Arg); 489 } 490 } else if (ArgType->getTypeID() == Type::VectorTyID) { 491 Type *IType = NULL; 492 uint32_t EleCount = cast<VectorType>(ArgType)->getNumElements(); 493 uint32_t EleSize = ArgType->getScalarSizeInBits(); 494 uint32_t TotalSize = EleCount * EleSize; 495 if (EleCount == 3) { 496 IntegerType *Int32Ty = Type::getInt32Ty(ArgType->getContext()); 497 Constant *Indices[4] = { 498 ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 1), 499 ConstantInt::get(Int32Ty, 2), ConstantInt::get(Int32Ty, 2)}; 500 Constant *Mask = ConstantVector::get(Indices); 501 ShuffleVectorInst *Shuffle = new ShuffleVectorInst(Arg, Arg, Mask); 502 Shuffle->insertBefore(Brnch); 503 Arg = Shuffle; 504 ArgType = Arg->getType(); 505 TotalSize += EleSize; 506 } 507 switch (EleSize) { 508 default: 509 EleCount = TotalSize / 64; 510 IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext())); 511 break; 512 case 8: 513 if (EleCount >= 8) { 514 EleCount = TotalSize / 64; 515 IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext())); 516 } else if (EleCount >= 3) { 517 EleCount = 1; 518 IType = dyn_cast<Type>(Type::getInt32Ty(ArgType->getContext())); 519 } else { 520 EleCount = 1; 521 IType = dyn_cast<Type>(Type::getInt16Ty(ArgType->getContext())); 522 } 523 break; 524 case 16: 525 if (EleCount >= 3) { 526 EleCount = TotalSize / 64; 527 IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext())); 528 } else { 529 EleCount = 1; 530 IType = dyn_cast<Type>(Type::getInt32Ty(ArgType->getContext())); 531 } 532 break; 533 } 534 if (EleCount > 1) { 535 IType = dyn_cast<Type>(VectorType::get(IType, EleCount)); 536 } 537 Arg = new BitCastInst(Arg, IType, "PrintArgVect", Brnch); 538 WhatToStore.push_back(Arg); 539 } else { 540 WhatToStore.push_back(Arg); 541 } 542 for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) { 543 Value *TheBtCast = WhatToStore[I]; 544 unsigned ArgSize = 545 TD->getTypeAllocSizeInBits(TheBtCast->getType()) / 8; 546 SmallVector<Value *, 1> BuffOffset; 547 BuffOffset.push_back(ConstantInt::get(I32Ty, ArgSize)); 548 549 Type *ArgPointer = PointerType::get(TheBtCast->getType(), 1); 550 Value *CastedGEP = 551 new BitCastInst(BufferIdx, ArgPointer, "PrintBuffPtrCast", Brnch); 552 StoreInst *StBuff = new StoreInst(TheBtCast, CastedGEP, Brnch); 553 LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n" 554 << *StBuff << '\n'); 555 (void)StBuff; 556 if (I + 1 == E && ArgCount + 1 == CI->getNumArgOperands()) 557 break; 558 BufferIdx = dyn_cast<GetElementPtrInst>(GetElementPtrInst::Create( 559 nullptr, BufferIdx, BuffOffset, "PrintBuffNextPtr", Brnch)); 560 LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n" 561 << *BufferIdx << '\n'); 562 } 563 } 564 } 565 } 566 567 // erase the printf calls 568 for (auto P : Printfs) { 569 CallInst *CI = dyn_cast<CallInst>(P); 570 CI->eraseFromParent(); 571 } 572 573 Printfs.clear(); 574 return true; 575 } 576 577 bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) { 578 Triple TT(M.getTargetTriple()); 579 if (TT.getArch() == Triple::r600) 580 return false; 581 582 visit(M); 583 584 if (Printfs.empty()) 585 return false; 586 587 TD = &M.getDataLayout(); 588 auto DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 589 DT = DTWP ? &DTWP->getDomTree() : nullptr; 590 auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { 591 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 592 }; 593 594 return lowerPrintfForGpu(M, GetTLI); 595 } 596