1 //===- AArch64StackTagging.cpp - Stack tagging in IR --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===// 9 10 #include "AArch64.h" 11 #include "AArch64InstrInfo.h" 12 #include "AArch64Subtarget.h" 13 #include "AArch64TargetMachine.h" 14 #include "llvm/ADT/APInt.h" 15 #include "llvm/ADT/MapVector.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/Statistic.h" 18 #include "llvm/Analysis/AliasAnalysis.h" 19 #include "llvm/Analysis/CFG.h" 20 #include "llvm/Analysis/LoopInfo.h" 21 #include "llvm/Analysis/PostDominators.h" 22 #include "llvm/Analysis/ScalarEvolution.h" 23 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 24 #include "llvm/Analysis/StackSafetyAnalysis.h" 25 #include "llvm/BinaryFormat/Dwarf.h" 26 #include "llvm/CodeGen/LiveRegUnits.h" 27 #include "llvm/CodeGen/MachineBasicBlock.h" 28 #include "llvm/CodeGen/MachineFunction.h" 29 #include "llvm/CodeGen/MachineFunctionPass.h" 30 #include "llvm/CodeGen/MachineInstr.h" 31 #include "llvm/CodeGen/MachineInstrBuilder.h" 32 #include "llvm/CodeGen/MachineLoopInfo.h" 33 #include "llvm/CodeGen/MachineOperand.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/TargetPassConfig.h" 36 #include "llvm/CodeGen/TargetRegisterInfo.h" 37 #include "llvm/IR/DebugLoc.h" 38 #include "llvm/IR/Dominators.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/IR/GetElementPtrTypeIterator.h" 41 #include "llvm/IR/IRBuilder.h" 42 #include "llvm/IR/InstIterator.h" 43 #include "llvm/IR/Instruction.h" 44 #include "llvm/IR/Instructions.h" 45 #include "llvm/IR/IntrinsicInst.h" 46 #include "llvm/IR/IntrinsicsAArch64.h" 47 #include "llvm/IR/Metadata.h" 48 #include "llvm/IR/ValueHandle.h" 49 #include "llvm/InitializePasses.h" 50 #include "llvm/Pass.h" 51 #include "llvm/Support/Casting.h" 52 #include "llvm/Support/Debug.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include "llvm/Transforms/Utils/Local.h" 55 #include "llvm/Transforms/Utils/MemoryTaggingSupport.h" 56 #include <cassert> 57 #include <iterator> 58 #include <memory> 59 #include <utility> 60 61 using namespace llvm; 62 63 #define DEBUG_TYPE "aarch64-stack-tagging" 64 65 static cl::opt<bool> ClMergeInit( 66 "stack-tagging-merge-init", cl::Hidden, cl::init(true), 67 cl::desc("merge stack variable initializers with tagging when possible")); 68 69 static cl::opt<bool> 70 ClUseStackSafety("stack-tagging-use-stack-safety", cl::Hidden, 71 cl::init(true), 72 cl::desc("Use Stack Safety analysis results")); 73 74 static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit", 75 cl::init(40), cl::Hidden); 76 77 static cl::opt<unsigned> 78 ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit", cl::init(272), 79 cl::Hidden); 80 81 static cl::opt<size_t> ClMaxLifetimes( 82 "stack-tagging-max-lifetimes-for-alloca", cl::Hidden, cl::init(3), 83 cl::ReallyHidden, 84 cl::desc("How many lifetime ends to handle for a single alloca."), 85 cl::Optional); 86 87 // Mode for selecting how to insert frame record info into the stack ring 88 // buffer. 89 enum StackTaggingRecordStackHistoryMode { 90 // Do not record frame record info. 91 none, 92 93 // Insert instructions into the prologue for storing into the stack ring 94 // buffer directly. 95 instr, 96 }; 97 98 static cl::opt<StackTaggingRecordStackHistoryMode> ClRecordStackHistory( 99 "stack-tagging-record-stack-history", 100 cl::desc("Record stack frames with tagged allocations in a thread-local " 101 "ring buffer"), 102 cl::values(clEnumVal(none, "Do not record stack ring history"), 103 clEnumVal(instr, "Insert instructions into the prologue for " 104 "storing into the stack ring buffer")), 105 cl::Hidden, cl::init(none)); 106 107 static const Align kTagGranuleSize = Align(16); 108 109 namespace { 110 111 class InitializerBuilder { 112 uint64_t Size; 113 const DataLayout *DL; 114 Value *BasePtr; 115 Function *SetTagFn; 116 Function *SetTagZeroFn; 117 Function *StgpFn; 118 119 // List of initializers sorted by start offset. 120 struct Range { 121 uint64_t Start, End; 122 Instruction *Inst; 123 }; 124 SmallVector<Range, 4> Ranges; 125 // 8-aligned offset => 8-byte initializer 126 // Missing keys are zero initialized. 127 std::map<uint64_t, Value *> Out; 128 129 public: 130 InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr, 131 Function *SetTagFn, Function *SetTagZeroFn, 132 Function *StgpFn) 133 : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn), 134 SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {} 135 136 bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) { 137 auto I = 138 llvm::lower_bound(Ranges, Start, [](const Range &LHS, uint64_t RHS) { 139 return LHS.End <= RHS; 140 }); 141 if (I != Ranges.end() && End > I->Start) { 142 // Overlap - bail. 143 return false; 144 } 145 Ranges.insert(I, {Start, End, Inst}); 146 return true; 147 } 148 149 bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) { 150 int64_t StoreSize = DL->getTypeStoreSize(SI->getOperand(0)->getType()); 151 if (!addRange(Offset, Offset + StoreSize, SI)) 152 return false; 153 IRBuilder<> IRB(SI); 154 applyStore(IRB, Offset, Offset + StoreSize, SI->getOperand(0)); 155 return true; 156 } 157 158 bool addMemSet(uint64_t Offset, MemSetInst *MSI) { 159 uint64_t StoreSize = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 160 if (!addRange(Offset, Offset + StoreSize, MSI)) 161 return false; 162 IRBuilder<> IRB(MSI); 163 applyMemSet(IRB, Offset, Offset + StoreSize, 164 cast<ConstantInt>(MSI->getValue())); 165 return true; 166 } 167 168 void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End, 169 ConstantInt *V) { 170 // Out[] does not distinguish between zero and undef, and we already know 171 // that this memset does not overlap with any other initializer. Nothing to 172 // do for memset(0). 173 if (V->isZero()) 174 return; 175 for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) { 176 uint64_t Cst = 0x0101010101010101UL; 177 int LowBits = Offset < Start ? (Start - Offset) * 8 : 0; 178 if (LowBits) 179 Cst = (Cst >> LowBits) << LowBits; 180 int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0; 181 if (HighBits) 182 Cst = (Cst << HighBits) >> HighBits; 183 ConstantInt *C = 184 ConstantInt::get(IRB.getInt64Ty(), Cst * V->getZExtValue()); 185 186 Value *&CurrentV = Out[Offset]; 187 if (!CurrentV) { 188 CurrentV = C; 189 } else { 190 CurrentV = IRB.CreateOr(CurrentV, C); 191 } 192 } 193 } 194 195 // Take a 64-bit slice of the value starting at the given offset (in bytes). 196 // Offset can be negative. Pad with zeroes on both sides when necessary. 197 Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) { 198 if (Offset > 0) { 199 V = IRB.CreateLShr(V, Offset * 8); 200 V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty()); 201 } else if (Offset < 0) { 202 V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty()); 203 V = IRB.CreateShl(V, -Offset * 8); 204 } else { 205 V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty()); 206 } 207 return V; 208 } 209 210 void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End, 211 Value *StoredValue) { 212 StoredValue = flatten(IRB, StoredValue); 213 for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) { 214 Value *V = sliceValue(IRB, StoredValue, Offset - Start); 215 Value *&CurrentV = Out[Offset]; 216 if (!CurrentV) { 217 CurrentV = V; 218 } else { 219 CurrentV = IRB.CreateOr(CurrentV, V); 220 } 221 } 222 } 223 224 void generate(IRBuilder<> &IRB) { 225 LLVM_DEBUG(dbgs() << "Combined initializer\n"); 226 // No initializers => the entire allocation is undef. 227 if (Ranges.empty()) { 228 emitUndef(IRB, 0, Size); 229 return; 230 } 231 232 // Look through 8-byte initializer list 16 bytes at a time; 233 // If one of the two 8-byte halfs is non-zero non-undef, emit STGP. 234 // Otherwise, emit zeroes up to next available item. 235 uint64_t LastOffset = 0; 236 for (uint64_t Offset = 0; Offset < Size; Offset += 16) { 237 auto I1 = Out.find(Offset); 238 auto I2 = Out.find(Offset + 8); 239 if (I1 == Out.end() && I2 == Out.end()) 240 continue; 241 242 if (Offset > LastOffset) 243 emitZeroes(IRB, LastOffset, Offset - LastOffset); 244 245 Value *Store1 = I1 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty()) 246 : I1->second; 247 Value *Store2 = I2 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty()) 248 : I2->second; 249 emitPair(IRB, Offset, Store1, Store2); 250 LastOffset = Offset + 16; 251 } 252 253 // memset(0) does not update Out[], therefore the tail can be either undef 254 // or zero. 255 if (LastOffset < Size) 256 emitZeroes(IRB, LastOffset, Size - LastOffset); 257 258 for (const auto &R : Ranges) { 259 R.Inst->eraseFromParent(); 260 } 261 } 262 263 void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) { 264 LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size 265 << ") zero\n"); 266 Value *Ptr = BasePtr; 267 if (Offset) 268 Ptr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), Ptr, Offset); 269 IRB.CreateCall(SetTagZeroFn, 270 {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)}); 271 } 272 273 void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) { 274 LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size 275 << ") undef\n"); 276 Value *Ptr = BasePtr; 277 if (Offset) 278 Ptr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), Ptr, Offset); 279 IRB.CreateCall(SetTagFn, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)}); 280 } 281 282 void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) { 283 LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + 16 << "):\n"); 284 LLVM_DEBUG(dbgs() << " " << *A << "\n " << *B << "\n"); 285 Value *Ptr = BasePtr; 286 if (Offset) 287 Ptr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), Ptr, Offset); 288 IRB.CreateCall(StgpFn, {Ptr, A, B}); 289 } 290 291 Value *flatten(IRBuilder<> &IRB, Value *V) { 292 if (V->getType()->isIntegerTy()) 293 return V; 294 // vector of pointers -> vector of ints 295 if (VectorType *VecTy = dyn_cast<VectorType>(V->getType())) { 296 LLVMContext &Ctx = IRB.getContext(); 297 Type *EltTy = VecTy->getElementType(); 298 if (EltTy->isPointerTy()) { 299 uint32_t EltSize = DL->getTypeSizeInBits(EltTy); 300 auto *NewTy = FixedVectorType::get( 301 IntegerType::get(Ctx, EltSize), 302 cast<FixedVectorType>(VecTy)->getNumElements()); 303 V = IRB.CreatePointerCast(V, NewTy); 304 } 305 } 306 return IRB.CreateBitOrPointerCast( 307 V, IRB.getIntNTy(DL->getTypeStoreSize(V->getType()) * 8)); 308 } 309 }; 310 311 class AArch64StackTagging : public FunctionPass { 312 const bool MergeInit; 313 const bool UseStackSafety; 314 315 public: 316 static char ID; // Pass ID, replacement for typeid 317 318 AArch64StackTagging(bool IsOptNone = false) 319 : FunctionPass(ID), 320 MergeInit(ClMergeInit.getNumOccurrences() ? ClMergeInit : !IsOptNone), 321 UseStackSafety(ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety 322 : !IsOptNone) { 323 initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry()); 324 } 325 326 void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr, 327 uint64_t Size); 328 void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size); 329 330 Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr, 331 uint64_t Size, InitializerBuilder &IB); 332 333 Instruction *insertBaseTaggedPointer( 334 const Module &M, 335 const MapVector<AllocaInst *, memtag::AllocaInfo> &Allocas, 336 const DominatorTree *DT); 337 bool runOnFunction(Function &F) override; 338 339 StringRef getPassName() const override { return "AArch64 Stack Tagging"; } 340 341 private: 342 Function *F = nullptr; 343 Function *SetTagFunc = nullptr; 344 const DataLayout *DL = nullptr; 345 AAResults *AA = nullptr; 346 const StackSafetyGlobalInfo *SSI = nullptr; 347 348 void getAnalysisUsage(AnalysisUsage &AU) const override { 349 AU.setPreservesCFG(); 350 if (UseStackSafety) 351 AU.addRequired<StackSafetyGlobalInfoWrapperPass>(); 352 if (MergeInit) 353 AU.addRequired<AAResultsWrapperPass>(); 354 } 355 }; 356 357 } // end anonymous namespace 358 359 char AArch64StackTagging::ID = 0; 360 361 INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging", 362 false, false) 363 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 364 INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass) 365 INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging", 366 false, false) 367 368 FunctionPass *llvm::createAArch64StackTaggingPass(bool IsOptNone) { 369 return new AArch64StackTagging(IsOptNone); 370 } 371 372 Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst, 373 Value *StartPtr, 374 uint64_t Size, 375 InitializerBuilder &IB) { 376 MemoryLocation AllocaLoc{StartPtr, Size}; 377 Instruction *LastInst = StartInst; 378 BasicBlock::iterator BI(StartInst); 379 380 unsigned Count = 0; 381 for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) { 382 if (!isa<DbgInfoIntrinsic>(*BI)) 383 ++Count; 384 385 if (isNoModRef(AA->getModRefInfo(&*BI, AllocaLoc))) 386 continue; 387 388 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { 389 // If the instruction is readnone, ignore it, otherwise bail out. We 390 // don't even allow readonly here because we don't want something like: 391 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). 392 if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) 393 break; 394 continue; 395 } 396 397 if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) { 398 if (!NextStore->isSimple()) 399 break; 400 401 // Check to see if this store is to a constant offset from the start ptr. 402 std::optional<int64_t> Offset = 403 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, *DL); 404 if (!Offset) 405 break; 406 407 if (!IB.addStore(*Offset, NextStore, DL)) 408 break; 409 LastInst = NextStore; 410 } else { 411 MemSetInst *MSI = cast<MemSetInst>(BI); 412 413 if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) 414 break; 415 416 if (!isa<ConstantInt>(MSI->getValue())) 417 break; 418 419 // Check to see if this store is to a constant offset from the start ptr. 420 std::optional<int64_t> Offset = 421 MSI->getDest()->getPointerOffsetFrom(StartPtr, *DL); 422 if (!Offset) 423 break; 424 425 if (!IB.addMemSet(*Offset, MSI)) 426 break; 427 LastInst = MSI; 428 } 429 } 430 return LastInst; 431 } 432 433 void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore, 434 Value *Ptr, uint64_t Size) { 435 auto SetTagZeroFunc = 436 Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag_zero); 437 auto StgpFunc = 438 Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_stgp); 439 440 InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc); 441 bool LittleEndian = 442 Triple(AI->getModule()->getTargetTriple()).isLittleEndian(); 443 // Current implementation of initializer merging assumes little endianness. 444 if (MergeInit && !F->hasOptNone() && LittleEndian && 445 Size < ClMergeInitSizeLimit) { 446 LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI 447 << ", size = " << Size << "\n"); 448 InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB); 449 } 450 451 IRBuilder<> IRB(InsertBefore); 452 IB.generate(IRB); 453 } 454 455 void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore, 456 uint64_t Size) { 457 IRBuilder<> IRB(InsertBefore); 458 IRB.CreateCall(SetTagFunc, {IRB.CreatePointerCast(AI, IRB.getPtrTy()), 459 ConstantInt::get(IRB.getInt64Ty(), Size)}); 460 } 461 462 Instruction *AArch64StackTagging::insertBaseTaggedPointer( 463 const Module &M, 464 const MapVector<AllocaInst *, memtag::AllocaInfo> &AllocasToInstrument, 465 const DominatorTree *DT) { 466 BasicBlock *PrologueBB = nullptr; 467 // Try sinking IRG as deep as possible to avoid hurting shrink wrap. 468 for (auto &I : AllocasToInstrument) { 469 const memtag::AllocaInfo &Info = I.second; 470 AllocaInst *AI = Info.AI; 471 if (!PrologueBB) { 472 PrologueBB = AI->getParent(); 473 continue; 474 } 475 PrologueBB = DT->findNearestCommonDominator(PrologueBB, AI->getParent()); 476 } 477 assert(PrologueBB); 478 479 IRBuilder<> IRB(&PrologueBB->front()); 480 Function *IRG_SP = 481 Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_irg_sp); 482 Instruction *Base = 483 IRB.CreateCall(IRG_SP, {Constant::getNullValue(IRB.getInt64Ty())}); 484 Base->setName("basetag"); 485 auto TargetTriple = Triple(M.getTargetTriple()); 486 // This is not a stable ABI for now, so only allow in dev builds with API 487 // level 10000. 488 // The ThreadLong format is the same as with HWASan, but the entries for 489 // stack MTE take two slots (16 bytes). 490 if (ClRecordStackHistory == instr && TargetTriple.isAndroid() && 491 TargetTriple.isAArch64() && !TargetTriple.isAndroidVersionLT(10000) && 492 !AllocasToInstrument.empty()) { 493 constexpr int StackMteSlot = -3; 494 constexpr uint64_t TagMask = 0xFULL << 56; 495 496 auto *IntptrTy = IRB.getIntPtrTy(M.getDataLayout()); 497 Value *SlotPtr = memtag::getAndroidSlotPtr(IRB, StackMteSlot); 498 auto *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr); 499 Value *FP = memtag::getFP(IRB); 500 Value *Tag = IRB.CreateAnd(IRB.CreatePtrToInt(Base, IntptrTy), TagMask); 501 Value *TaggedFP = IRB.CreateOr(FP, Tag); 502 Value *PC = memtag::getPC(TargetTriple, IRB); 503 Value *RecordPtr = IRB.CreateIntToPtr(ThreadLong, IRB.getPtrTy(0)); 504 IRB.CreateStore(PC, RecordPtr); 505 IRB.CreateStore(TaggedFP, IRB.CreateConstGEP1_64(IntptrTy, RecordPtr, 1)); 506 // Update the ring buffer. Top byte of ThreadLong defines the size of the 507 // buffer in pages, it must be a power of two, and the start of the buffer 508 // must be aligned by twice that much. Therefore wrap around of the ring 509 // buffer is simply Addr &= ~((ThreadLong >> 56) << 12). 510 // The use of AShr instead of LShr is due to 511 // https://bugs.llvm.org/show_bug.cgi?id=39030 512 // Runtime library makes sure not to use the highest bit. 513 Value *WrapMask = IRB.CreateXor( 514 IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true), 515 ConstantInt::get(IntptrTy, (uint64_t)-1)); 516 Value *ThreadLongNew = IRB.CreateAnd( 517 IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 16)), WrapMask); 518 IRB.CreateStore(ThreadLongNew, SlotPtr); 519 } 520 return Base; 521 } 522 523 // FIXME: check for MTE extension 524 bool AArch64StackTagging::runOnFunction(Function &Fn) { 525 if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag)) 526 return false; 527 528 if (UseStackSafety) 529 SSI = &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult(); 530 F = &Fn; 531 DL = &Fn.getDataLayout(); 532 if (MergeInit) 533 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 534 535 memtag::StackInfoBuilder SIB(SSI); 536 for (Instruction &I : instructions(F)) 537 SIB.visit(I); 538 memtag::StackInfo &SInfo = SIB.get(); 539 540 if (SInfo.AllocasToInstrument.empty()) 541 return false; 542 543 std::unique_ptr<DominatorTree> DeleteDT; 544 DominatorTree *DT = nullptr; 545 if (auto *P = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) 546 DT = &P->getDomTree(); 547 548 if (DT == nullptr) { 549 DeleteDT = std::make_unique<DominatorTree>(*F); 550 DT = DeleteDT.get(); 551 } 552 553 std::unique_ptr<PostDominatorTree> DeletePDT; 554 PostDominatorTree *PDT = nullptr; 555 if (auto *P = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>()) 556 PDT = &P->getPostDomTree(); 557 558 if (PDT == nullptr) { 559 DeletePDT = std::make_unique<PostDominatorTree>(*F); 560 PDT = DeletePDT.get(); 561 } 562 563 std::unique_ptr<LoopInfo> DeleteLI; 564 LoopInfo *LI = nullptr; 565 if (auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>()) { 566 LI = &LIWP->getLoopInfo(); 567 } else { 568 DeleteLI = std::make_unique<LoopInfo>(*DT); 569 LI = DeleteLI.get(); 570 } 571 572 SetTagFunc = 573 Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag); 574 575 Instruction *Base = 576 insertBaseTaggedPointer(*Fn.getParent(), SInfo.AllocasToInstrument, DT); 577 578 int NextTag = 0; 579 for (auto &I : SInfo.AllocasToInstrument) { 580 memtag::AllocaInfo &Info = I.second; 581 assert(Info.AI && SIB.isInterestingAlloca(*Info.AI)); 582 memtag::alignAndPadAlloca(Info, kTagGranuleSize); 583 AllocaInst *AI = Info.AI; 584 int Tag = NextTag; 585 NextTag = (NextTag + 1) % 16; 586 // Replace alloca with tagp(alloca). 587 IRBuilder<> IRB(Info.AI->getNextNode()); 588 Function *TagP = Intrinsic::getDeclaration( 589 F->getParent(), Intrinsic::aarch64_tagp, {Info.AI->getType()}); 590 Instruction *TagPCall = 591 IRB.CreateCall(TagP, {Constant::getNullValue(Info.AI->getType()), Base, 592 ConstantInt::get(IRB.getInt64Ty(), Tag)}); 593 if (Info.AI->hasName()) 594 TagPCall->setName(Info.AI->getName() + ".tag"); 595 // Does not replace metadata, so we don't have to handle DbgVariableRecords. 596 Info.AI->replaceUsesWithIf(TagPCall, [&](const Use &U) { 597 return !memtag::isLifetimeIntrinsic(U.getUser()); 598 }); 599 TagPCall->setOperand(0, Info.AI); 600 601 // Calls to functions that may return twice (e.g. setjmp) confuse the 602 // postdominator analysis, and will leave us to keep memory tagged after 603 // function return. Work around this by always untagging at every return 604 // statement if return_twice functions are called. 605 bool StandardLifetime = 606 !SInfo.CallsReturnTwice && 607 SInfo.UnrecognizedLifetimes.empty() && 608 memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, DT, LI, 609 ClMaxLifetimes); 610 if (StandardLifetime) { 611 IntrinsicInst *Start = Info.LifetimeStart[0]; 612 uint64_t Size = 613 cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue(); 614 Size = alignTo(Size, kTagGranuleSize); 615 tagAlloca(AI, Start->getNextNode(), TagPCall, Size); 616 617 auto TagEnd = [&](Instruction *Node) { untagAlloca(AI, Node, Size); }; 618 if (!DT || !PDT || 619 !memtag::forAllReachableExits(*DT, *PDT, *LI, Start, Info.LifetimeEnd, 620 SInfo.RetVec, TagEnd)) { 621 for (auto *End : Info.LifetimeEnd) 622 End->eraseFromParent(); 623 } 624 } else { 625 uint64_t Size = *Info.AI->getAllocationSize(*DL); 626 Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getPtrTy()); 627 tagAlloca(AI, &*IRB.GetInsertPoint(), Ptr, Size); 628 for (auto *RI : SInfo.RetVec) { 629 untagAlloca(AI, RI, Size); 630 } 631 // We may have inserted tag/untag outside of any lifetime interval. 632 // Remove all lifetime intrinsics for this alloca. 633 for (auto *II : Info.LifetimeStart) 634 II->eraseFromParent(); 635 for (auto *II : Info.LifetimeEnd) 636 II->eraseFromParent(); 637 } 638 639 memtag::annotateDebugRecords(Info, static_cast<unsigned long>(Tag)); 640 } 641 642 // If we have instrumented at least one alloca, all unrecognized lifetime 643 // intrinsics have to go. 644 for (auto *I : SInfo.UnrecognizedLifetimes) 645 I->eraseFromParent(); 646 647 return true; 648 } 649