1 //=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // \file 9 // 10 // The pass bind printfs to a kernel arg pointer that will be bound to a buffer 11 // later by the runtime. 12 // 13 // This pass traverses the functions in the module and converts 14 // each call to printf to a sequence of operations that 15 // store the following into the printf buffer: 16 // - format string (passed as a module's metadata unique ID) 17 // - bitwise copies of printf arguments 18 // The backend passes will need to store metadata in the kernel 19 //===----------------------------------------------------------------------===// 20 21 #include "AMDGPU.h" 22 #include "llvm/ADT/SmallString.h" 23 #include "llvm/ADT/StringExtras.h" 24 #include "llvm/ADT/Triple.h" 25 #include "llvm/Analysis/InstructionSimplify.h" 26 #include "llvm/Analysis/TargetLibraryInfo.h" 27 #include "llvm/CodeGen/Passes.h" 28 #include "llvm/IR/Constants.h" 29 #include "llvm/IR/DataLayout.h" 30 #include "llvm/IR/Dominators.h" 31 #include "llvm/IR/GlobalVariable.h" 32 #include "llvm/IR/IRBuilder.h" 33 #include "llvm/IR/InstVisitor.h" 34 #include "llvm/IR/Instructions.h" 35 #include "llvm/IR/Module.h" 36 #include "llvm/IR/Type.h" 37 #include "llvm/Support/CommandLine.h" 38 #include "llvm/Support/Debug.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 41 using namespace llvm; 42 43 #define DEBUG_TYPE "printfToRuntime" 44 #define DWORD_ALIGN 4 45 46 namespace { 47 class LLVM_LIBRARY_VISIBILITY AMDGPUPrintfRuntimeBinding final 48 : public ModulePass, 49 public InstVisitor<AMDGPUPrintfRuntimeBinding> { 50 51 public: 52 static char ID; 53 54 explicit AMDGPUPrintfRuntimeBinding(); 55 56 void visitCallSite(CallSite CS) { 57 Function *F = CS.getCalledFunction(); 58 if (F && F->hasName() && F->getName() == "printf") 59 Printfs.push_back(CS.getInstruction()); 60 } 61 62 private: 63 bool runOnModule(Module &M) override; 64 void getConversionSpecifiers(SmallVectorImpl<char> &OpConvSpecifiers, 65 StringRef fmt, size_t num_ops) const; 66 67 bool shouldPrintAsStr(char Specifier, Type *OpType) const; 68 bool confirmSpirModule(Module &M) const; 69 bool confirmOpenCLVersion200(Module &M) const; 70 bool lowerPrintfForGpu(Module &M); 71 72 void getAnalysisUsage(AnalysisUsage &AU) const override { 73 AU.addRequired<TargetLibraryInfoWrapperPass>(); 74 AU.addRequired<DominatorTreeWrapperPass>(); 75 } 76 77 Value *simplify(Instruction *I) { 78 return SimplifyInstruction(I, {*TD, TLI, DT}); 79 } 80 81 const DataLayout *TD; 82 const DominatorTree *DT; 83 const TargetLibraryInfo *TLI; 84 SmallVector<Value *, 32> Printfs; 85 }; 86 } // namespace 87 88 char AMDGPUPrintfRuntimeBinding::ID = 0; 89 90 INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding, 91 "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering", 92 false, false) 93 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 94 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 95 INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding", 96 "AMDGPU Printf lowering", false, false) 97 98 char &llvm::AMDGPUPrintfRuntimeBindingID = AMDGPUPrintfRuntimeBinding::ID; 99 100 namespace llvm { 101 ModulePass *createAMDGPUPrintfRuntimeBinding() { 102 return new AMDGPUPrintfRuntimeBinding(); 103 } 104 } // namespace llvm 105 106 AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() 107 : ModulePass(ID), TD(nullptr), DT(nullptr), TLI(nullptr) { 108 initializeAMDGPUPrintfRuntimeBindingPass(*PassRegistry::getPassRegistry()); 109 } 110 111 bool AMDGPUPrintfRuntimeBinding::confirmOpenCLVersion200(Module &M) const { 112 NamedMDNode *OCLVersion = M.getNamedMetadata("opencl.ocl.version"); 113 if (!OCLVersion || OCLVersion->getNumOperands() != 1) 114 return false; 115 MDNode *Ver = OCLVersion->getOperand(0); 116 if (Ver->getNumOperands() != 2) 117 return false; 118 ConstantInt *Major = mdconst::dyn_extract<ConstantInt>(Ver->getOperand(0)); 119 ConstantInt *Minor = mdconst::dyn_extract<ConstantInt>(Ver->getOperand(1)); 120 if (!Major || !Minor) 121 return false; 122 return Major->getZExtValue() == 2; 123 } 124 125 void AMDGPUPrintfRuntimeBinding::getConversionSpecifiers( 126 SmallVectorImpl<char> &OpConvSpecifiers, StringRef Fmt, 127 size_t NumOps) const { 128 // not all format characters are collected. 129 // At this time the format characters of interest 130 // are %p and %s, which use to know if we 131 // are either storing a literal string or a 132 // pointer to the printf buffer. 133 static const char ConvSpecifiers[] = "cdieEfgGaosuxXp"; 134 size_t CurFmtSpecifierIdx = 0; 135 size_t PrevFmtSpecifierIdx = 0; 136 137 while ((CurFmtSpecifierIdx = Fmt.find_first_of( 138 ConvSpecifiers, CurFmtSpecifierIdx)) != StringRef::npos) { 139 bool ArgDump = false; 140 StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx, 141 CurFmtSpecifierIdx - PrevFmtSpecifierIdx); 142 size_t pTag = CurFmt.find_last_of("%"); 143 if (pTag != StringRef::npos) { 144 ArgDump = true; 145 while (pTag && CurFmt[--pTag] == '%') { 146 ArgDump = !ArgDump; 147 } 148 } 149 150 if (ArgDump) 151 OpConvSpecifiers.push_back(Fmt[CurFmtSpecifierIdx]); 152 153 PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx; 154 } 155 } 156 157 bool AMDGPUPrintfRuntimeBinding::shouldPrintAsStr(char Specifier, 158 Type *OpType) const { 159 if (Specifier != 's') 160 return false; 161 const PointerType *PT = dyn_cast<PointerType>(OpType); 162 if (!PT || PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) 163 return false; 164 Type *ElemType = PT->getContainedType(0); 165 if (ElemType->getTypeID() != Type::IntegerTyID) 166 return false; 167 IntegerType *ElemIType = cast<IntegerType>(ElemType); 168 return ElemIType->getBitWidth() == 8; 169 } 170 171 bool AMDGPUPrintfRuntimeBinding::confirmSpirModule(Module &M) const { 172 NamedMDNode *SPIRVersion = M.getNamedMetadata("opencl.spir.version"); 173 return SPIRVersion ? true : false; 174 } 175 176 bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(Module &M) { 177 LLVMContext &Ctx = M.getContext(); 178 IRBuilder<> Builder(Ctx); 179 Type *I32Ty = Type::getInt32Ty(Ctx); 180 unsigned UniqID = 0; 181 // NB: This is important for this string size to be divizable by 4 182 const char NonLiteralStr[4] = "???"; 183 184 for (auto P : Printfs) { 185 CallInst *CI = dyn_cast<CallInst>(P); 186 187 unsigned NumOps = CI->getNumArgOperands(); 188 189 SmallString<16> OpConvSpecifiers; 190 Value *Op = CI->getArgOperand(0); 191 192 if (auto LI = dyn_cast<LoadInst>(Op)) { 193 Op = LI->getPointerOperand(); 194 for (auto Use : Op->users()) { 195 if (auto SI = dyn_cast<StoreInst>(Use)) { 196 Op = SI->getValueOperand(); 197 break; 198 } 199 } 200 } 201 202 if (auto I = dyn_cast<Instruction>(Op)) { 203 Value *Op_simplified = simplify(I); 204 if (Op_simplified) 205 Op = Op_simplified; 206 } 207 208 ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Op); 209 210 if (ConstExpr) { 211 GlobalVariable *GVar = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); 212 213 StringRef Str("unknown"); 214 if (GVar && GVar->hasInitializer()) { 215 auto Init = GVar->getInitializer(); 216 if (auto CA = dyn_cast<ConstantDataArray>(Init)) { 217 if (CA->isString()) 218 Str = CA->getAsCString(); 219 } else if (isa<ConstantAggregateZero>(Init)) { 220 Str = ""; 221 } 222 // 223 // we need this call to ascertain 224 // that we are printing a string 225 // or a pointer. It takes out the 226 // specifiers and fills up the first 227 // arg 228 getConversionSpecifiers(OpConvSpecifiers, Str, NumOps - 1); 229 } 230 // Add metadata for the string 231 std::string AStreamHolder; 232 raw_string_ostream Sizes(AStreamHolder); 233 int Sum = DWORD_ALIGN; 234 Sizes << CI->getNumArgOperands() - 1; 235 Sizes << ':'; 236 for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() && 237 ArgCount <= OpConvSpecifiers.size(); 238 ArgCount++) { 239 Value *Arg = CI->getArgOperand(ArgCount); 240 Type *ArgType = Arg->getType(); 241 unsigned ArgSize = TD->getTypeAllocSizeInBits(ArgType); 242 ArgSize = ArgSize / 8; 243 // 244 // ArgSize by design should be a multiple of DWORD_ALIGN, 245 // expand the arguments that do not follow this rule. 246 // 247 if (ArgSize % DWORD_ALIGN != 0) { 248 llvm::Type *ResType = llvm::Type::getInt32Ty(Ctx); 249 VectorType *LLVMVecType = llvm::dyn_cast<llvm::VectorType>(ArgType); 250 int NumElem = LLVMVecType ? LLVMVecType->getNumElements() : 1; 251 if (LLVMVecType && NumElem > 1) 252 ResType = llvm::VectorType::get(ResType, NumElem); 253 Builder.SetInsertPoint(CI); 254 Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 255 if (OpConvSpecifiers[ArgCount - 1] == 'x' || 256 OpConvSpecifiers[ArgCount - 1] == 'X' || 257 OpConvSpecifiers[ArgCount - 1] == 'u' || 258 OpConvSpecifiers[ArgCount - 1] == 'o') 259 Arg = Builder.CreateZExt(Arg, ResType); 260 else 261 Arg = Builder.CreateSExt(Arg, ResType); 262 ArgType = Arg->getType(); 263 ArgSize = TD->getTypeAllocSizeInBits(ArgType); 264 ArgSize = ArgSize / 8; 265 CI->setOperand(ArgCount, Arg); 266 } 267 if (OpConvSpecifiers[ArgCount - 1] == 'f') { 268 ConstantFP *FpCons = dyn_cast<ConstantFP>(Arg); 269 if (FpCons) 270 ArgSize = 4; 271 else { 272 FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg); 273 if (FpExt && FpExt->getType()->isDoubleTy() && 274 FpExt->getOperand(0)->getType()->isFloatTy()) 275 ArgSize = 4; 276 } 277 } 278 if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { 279 if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Arg)) { 280 GlobalVariable *GV = 281 dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); 282 if (GV && GV->hasInitializer()) { 283 Constant *Init = GV->getInitializer(); 284 ConstantDataArray *CA = dyn_cast<ConstantDataArray>(Init); 285 if (Init->isZeroValue() || CA->isString()) { 286 size_t SizeStr = Init->isZeroValue() 287 ? 1 288 : (strlen(CA->getAsCString().data()) + 1); 289 size_t Rem = SizeStr % DWORD_ALIGN; 290 size_t NSizeStr = 0; 291 LLVM_DEBUG(dbgs() << "Printf string original size = " << SizeStr 292 << '\n'); 293 if (Rem) { 294 NSizeStr = SizeStr + (DWORD_ALIGN - Rem); 295 } else { 296 NSizeStr = SizeStr; 297 } 298 ArgSize = NSizeStr; 299 } 300 } else { 301 ArgSize = sizeof(NonLiteralStr); 302 } 303 } else { 304 ArgSize = sizeof(NonLiteralStr); 305 } 306 } 307 LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSize 308 << " for type: " << *ArgType << '\n'); 309 Sizes << ArgSize << ':'; 310 Sum += ArgSize; 311 } 312 LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str() 313 << '\n'); 314 for (size_t I = 0; I < Str.size(); ++I) { 315 // Rest of the C escape sequences (e.g. \') are handled correctly 316 // by the MDParser 317 switch (Str[I]) { 318 case '\a': 319 Sizes << "\\a"; 320 break; 321 case '\b': 322 Sizes << "\\b"; 323 break; 324 case '\f': 325 Sizes << "\\f"; 326 break; 327 case '\n': 328 Sizes << "\\n"; 329 break; 330 case '\r': 331 Sizes << "\\r"; 332 break; 333 case '\v': 334 Sizes << "\\v"; 335 break; 336 case ':': 337 // ':' cannot be scanned by Flex, as it is defined as a delimiter 338 // Replace it with it's octal representation \72 339 Sizes << "\\72"; 340 break; 341 default: 342 Sizes << Str[I]; 343 break; 344 } 345 } 346 347 // Insert the printf_alloc call 348 Builder.SetInsertPoint(CI); 349 Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 350 351 AttributeList Attr = AttributeList::get(Ctx, AttributeList::FunctionIndex, 352 Attribute::NoUnwind); 353 354 Type *SizetTy = Type::getInt32Ty(Ctx); 355 356 Type *Tys_alloc[1] = {SizetTy}; 357 Type *I8Ptr = PointerType::get(Type::getInt8Ty(Ctx), 1); 358 FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false); 359 FunctionCallee PrintfAllocFn = 360 M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr); 361 362 LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n'); 363 std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str().c_str(); 364 MDString *fmtStrArray = MDString::get(Ctx, fmtstr); 365 366 // Instead of creating global variables, the 367 // printf format strings are extracted 368 // and passed as metadata. This avoids 369 // polluting llvm's symbol tables in this module. 370 // Metadata is going to be extracted 371 // by the backend passes and inserted 372 // into the OpenCL binary as appropriate. 373 StringRef amd("llvm.printf.fmts"); 374 NamedMDNode *metaD = M.getOrInsertNamedMetadata(amd); 375 MDNode *myMD = MDNode::get(Ctx, fmtStrArray); 376 metaD->addOperand(myMD); 377 Value *sumC = ConstantInt::get(SizetTy, Sum, false); 378 SmallVector<Value *, 1> alloc_args; 379 alloc_args.push_back(sumC); 380 CallInst *pcall = 381 CallInst::Create(PrintfAllocFn, alloc_args, "printf_alloc_fn", CI); 382 383 // 384 // Insert code to split basicblock with a 385 // piece of hammock code. 386 // basicblock splits after buffer overflow check 387 // 388 ConstantPointerNull *zeroIntPtr = 389 ConstantPointerNull::get(PointerType::get(Type::getInt8Ty(Ctx), 1)); 390 ICmpInst *cmp = 391 dyn_cast<ICmpInst>(Builder.CreateICmpNE(pcall, zeroIntPtr, "")); 392 if (!CI->use_empty()) { 393 Value *result = 394 Builder.CreateSExt(Builder.CreateNot(cmp), I32Ty, "printf_res"); 395 CI->replaceAllUsesWith(result); 396 } 397 SplitBlock(CI->getParent(), cmp); 398 Instruction *Brnch = 399 SplitBlockAndInsertIfThen(cmp, cmp->getNextNode(), false); 400 401 Builder.SetInsertPoint(Brnch); 402 403 // store unique printf id in the buffer 404 // 405 SmallVector<Value *, 1> ZeroIdxList; 406 ConstantInt *zeroInt = 407 ConstantInt::get(Ctx, APInt(32, StringRef("0"), 10)); 408 ZeroIdxList.push_back(zeroInt); 409 410 GetElementPtrInst *BufferIdx = 411 dyn_cast<GetElementPtrInst>(GetElementPtrInst::Create( 412 nullptr, pcall, ZeroIdxList, "PrintBuffID", Brnch)); 413 414 Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS); 415 Value *id_gep_cast = 416 new BitCastInst(BufferIdx, idPointer, "PrintBuffIdCast", Brnch); 417 418 StoreInst *stbuff = 419 new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast); 420 stbuff->insertBefore(Brnch); // to Remove unused variable warning 421 422 SmallVector<Value *, 2> FourthIdxList; 423 ConstantInt *fourInt = 424 ConstantInt::get(Ctx, APInt(32, StringRef("4"), 10)); 425 426 FourthIdxList.push_back(fourInt); // 1st 4 bytes hold the printf_id 427 // the following GEP is the buffer pointer 428 BufferIdx = cast<GetElementPtrInst>(GetElementPtrInst::Create( 429 nullptr, pcall, FourthIdxList, "PrintBuffGep", Brnch)); 430 431 Type *Int32Ty = Type::getInt32Ty(Ctx); 432 Type *Int64Ty = Type::getInt64Ty(Ctx); 433 for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() && 434 ArgCount <= OpConvSpecifiers.size(); 435 ArgCount++) { 436 Value *Arg = CI->getArgOperand(ArgCount); 437 Type *ArgType = Arg->getType(); 438 SmallVector<Value *, 32> WhatToStore; 439 if (ArgType->isFPOrFPVectorTy() && 440 (ArgType->getTypeID() != Type::VectorTyID)) { 441 Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty; 442 if (OpConvSpecifiers[ArgCount - 1] == 'f') { 443 ConstantFP *fpCons = dyn_cast<ConstantFP>(Arg); 444 if (fpCons) { 445 APFloat Val(fpCons->getValueAPF()); 446 bool Lost = false; 447 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, 448 &Lost); 449 Arg = ConstantFP::get(Ctx, Val); 450 IType = Int32Ty; 451 } else { 452 FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg); 453 if (FpExt && FpExt->getType()->isDoubleTy() && 454 FpExt->getOperand(0)->getType()->isFloatTy()) { 455 Arg = FpExt->getOperand(0); 456 IType = Int32Ty; 457 } 458 } 459 } 460 Arg = new BitCastInst(Arg, IType, "PrintArgFP", Brnch); 461 WhatToStore.push_back(Arg); 462 } else if (ArgType->getTypeID() == Type::PointerTyID) { 463 if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { 464 const char *S = NonLiteralStr; 465 if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Arg)) { 466 GlobalVariable *GV = 467 dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); 468 if (GV && GV->hasInitializer()) { 469 Constant *Init = GV->getInitializer(); 470 ConstantDataArray *CA = dyn_cast<ConstantDataArray>(Init); 471 if (Init->isZeroValue() || CA->isString()) { 472 S = Init->isZeroValue() ? "" : CA->getAsCString().data(); 473 } 474 } 475 } 476 size_t SizeStr = strlen(S) + 1; 477 size_t Rem = SizeStr % DWORD_ALIGN; 478 size_t NSizeStr = 0; 479 if (Rem) { 480 NSizeStr = SizeStr + (DWORD_ALIGN - Rem); 481 } else { 482 NSizeStr = SizeStr; 483 } 484 if (S[0]) { 485 char *MyNewStr = new char[NSizeStr](); 486 strcpy(MyNewStr, S); 487 int NumInts = NSizeStr / 4; 488 int CharC = 0; 489 while (NumInts) { 490 int ANum = *(int *)(MyNewStr + CharC); 491 CharC += 4; 492 NumInts--; 493 Value *ANumV = ConstantInt::get(Int32Ty, ANum, false); 494 WhatToStore.push_back(ANumV); 495 } 496 delete[] MyNewStr; 497 } else { 498 // Empty string, give a hint to RT it is no NULL 499 Value *ANumV = ConstantInt::get(Int32Ty, 0xFFFFFF00, false); 500 WhatToStore.push_back(ANumV); 501 } 502 } else { 503 uint64_t Size = TD->getTypeAllocSizeInBits(ArgType); 504 assert((Size == 32 || Size == 64) && "unsupported size"); 505 Type *DstType = (Size == 32) ? Int32Ty : Int64Ty; 506 Arg = new PtrToIntInst(Arg, DstType, "PrintArgPtr", Brnch); 507 WhatToStore.push_back(Arg); 508 } 509 } else if (ArgType->getTypeID() == Type::VectorTyID) { 510 Type *IType = NULL; 511 uint32_t EleCount = cast<VectorType>(ArgType)->getNumElements(); 512 uint32_t EleSize = ArgType->getScalarSizeInBits(); 513 uint32_t TotalSize = EleCount * EleSize; 514 if (EleCount == 3) { 515 IntegerType *Int32Ty = Type::getInt32Ty(ArgType->getContext()); 516 Constant *Indices[4] = { 517 ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 1), 518 ConstantInt::get(Int32Ty, 2), ConstantInt::get(Int32Ty, 2)}; 519 Constant *Mask = ConstantVector::get(Indices); 520 ShuffleVectorInst *Shuffle = new ShuffleVectorInst(Arg, Arg, Mask); 521 Shuffle->insertBefore(Brnch); 522 Arg = Shuffle; 523 ArgType = Arg->getType(); 524 TotalSize += EleSize; 525 } 526 switch (EleSize) { 527 default: 528 EleCount = TotalSize / 64; 529 IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext())); 530 break; 531 case 8: 532 if (EleCount >= 8) { 533 EleCount = TotalSize / 64; 534 IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext())); 535 } else if (EleCount >= 3) { 536 EleCount = 1; 537 IType = dyn_cast<Type>(Type::getInt32Ty(ArgType->getContext())); 538 } else { 539 EleCount = 1; 540 IType = dyn_cast<Type>(Type::getInt16Ty(ArgType->getContext())); 541 } 542 break; 543 case 16: 544 if (EleCount >= 3) { 545 EleCount = TotalSize / 64; 546 IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext())); 547 } else { 548 EleCount = 1; 549 IType = dyn_cast<Type>(Type::getInt32Ty(ArgType->getContext())); 550 } 551 break; 552 } 553 if (EleCount > 1) { 554 IType = dyn_cast<Type>(VectorType::get(IType, EleCount)); 555 } 556 Arg = new BitCastInst(Arg, IType, "PrintArgVect", Brnch); 557 WhatToStore.push_back(Arg); 558 } else { 559 WhatToStore.push_back(Arg); 560 } 561 for (auto W : WhatToStore) { 562 Value *TheBtCast = W; 563 unsigned ArgSize = 564 TD->getTypeAllocSizeInBits(TheBtCast->getType()) / 8; 565 SmallVector<Value *, 1> BuffOffset; 566 BuffOffset.push_back(ConstantInt::get(I32Ty, ArgSize)); 567 568 Type *ArgPointer = PointerType::get(TheBtCast->getType(), 1); 569 Value *CastedGEP = 570 new BitCastInst(BufferIdx, ArgPointer, "PrintBuffPtrCast", Brnch); 571 StoreInst *StBuff = new StoreInst(TheBtCast, CastedGEP, Brnch); 572 LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n" 573 << *StBuff << '\n'); 574 (void)StBuff; 575 ++W; 576 if (W == *WhatToStore.end() && 577 ArgCount + 1 == CI->getNumArgOperands()) 578 break; 579 BufferIdx = dyn_cast<GetElementPtrInst>(GetElementPtrInst::Create( 580 nullptr, BufferIdx, BuffOffset, "PrintBuffNextPtr", Brnch)); 581 LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n" 582 << *BufferIdx << '\n'); 583 } 584 } 585 } 586 } 587 588 // erase the printf calls 589 for (auto P : Printfs) { 590 CallInst *CI = dyn_cast<CallInst>(P); 591 CI->eraseFromParent(); 592 } 593 594 Printfs.clear(); 595 return true; 596 } 597 598 bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) { 599 Triple TT(M.getTargetTriple()); 600 if (TT.getArch() == Triple::r600) 601 return false; 602 603 visit(M); 604 605 if (Printfs.empty()) 606 return false; 607 608 TD = &M.getDataLayout(); 609 auto DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 610 DT = DTWP ? &DTWP->getDomTree() : nullptr; 611 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); 612 613 return lowerPrintfForGpu(M); 614 } 615