xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/Target/AArch64/AArch64StackTagging.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===- AArch64StackTagging.cpp - Stack tagging in IR --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
10 
11 #include "AArch64.h"
12 #include "AArch64InstrInfo.h"
13 #include "AArch64Subtarget.h"
14 #include "AArch64TargetMachine.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DepthFirstIterator.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/AliasAnalysis.h"
23 #include "llvm/Analysis/CFG.h"
24 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/Analysis/PostDominators.h"
26 #include "llvm/Analysis/ScalarEvolution.h"
27 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
28 #include "llvm/Analysis/StackSafetyAnalysis.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/CodeGen/LiveRegUnits.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineFunctionPass.h"
34 #include "llvm/CodeGen/MachineInstr.h"
35 #include "llvm/CodeGen/MachineInstrBuilder.h"
36 #include "llvm/CodeGen/MachineLoopInfo.h"
37 #include "llvm/CodeGen/MachineOperand.h"
38 #include "llvm/CodeGen/MachineRegisterInfo.h"
39 #include "llvm/CodeGen/TargetPassConfig.h"
40 #include "llvm/CodeGen/TargetRegisterInfo.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/Dominators.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/GetElementPtrTypeIterator.h"
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/IntrinsicInst.h"
48 #include "llvm/IR/IntrinsicsAArch64.h"
49 #include "llvm/IR/Metadata.h"
50 #include "llvm/InitializePasses.h"
51 #include "llvm/Pass.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/Debug.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include "llvm/Transforms/Utils/Local.h"
56 #include <cassert>
57 #include <iterator>
58 #include <utility>
59 
60 using namespace llvm;
61 
62 #define DEBUG_TYPE "aarch64-stack-tagging"
63 
64 static cl::opt<bool> ClMergeInit(
65     "stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore,
66     cl::desc("merge stack variable initializers with tagging when possible"));
67 
68 static cl::opt<bool>
69     ClUseStackSafety("stack-tagging-use-stack-safety", cl::Hidden,
70                      cl::init(true), cl::ZeroOrMore,
71                      cl::desc("Use Stack Safety analysis results"));
72 
73 static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit",
74                                      cl::init(40), cl::Hidden);
75 
76 static cl::opt<unsigned>
77     ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit", cl::init(272),
78                          cl::Hidden);
79 
80 static const Align kTagGranuleSize = Align(16);
81 
82 namespace {
83 
84 class InitializerBuilder {
85   uint64_t Size;
86   const DataLayout *DL;
87   Value *BasePtr;
88   Function *SetTagFn;
89   Function *SetTagZeroFn;
90   Function *StgpFn;
91 
92   // List of initializers sorted by start offset.
93   struct Range {
94     uint64_t Start, End;
95     Instruction *Inst;
96   };
97   SmallVector<Range, 4> Ranges;
98   // 8-aligned offset => 8-byte initializer
99   // Missing keys are zero initialized.
100   std::map<uint64_t, Value *> Out;
101 
102 public:
InitializerBuilder(uint64_t Size,const DataLayout * DL,Value * BasePtr,Function * SetTagFn,Function * SetTagZeroFn,Function * StgpFn)103   InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr,
104                      Function *SetTagFn, Function *SetTagZeroFn,
105                      Function *StgpFn)
106       : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn),
107         SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {}
108 
addRange(uint64_t Start,uint64_t End,Instruction * Inst)109   bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) {
110     auto I =
111         llvm::lower_bound(Ranges, Start, [](const Range &LHS, uint64_t RHS) {
112           return LHS.End <= RHS;
113         });
114     if (I != Ranges.end() && End > I->Start) {
115       // Overlap - bail.
116       return false;
117     }
118     Ranges.insert(I, {Start, End, Inst});
119     return true;
120   }
121 
addStore(uint64_t Offset,StoreInst * SI,const DataLayout * DL)122   bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) {
123     int64_t StoreSize = DL->getTypeStoreSize(SI->getOperand(0)->getType());
124     if (!addRange(Offset, Offset + StoreSize, SI))
125       return false;
126     IRBuilder<> IRB(SI);
127     applyStore(IRB, Offset, Offset + StoreSize, SI->getOperand(0));
128     return true;
129   }
130 
addMemSet(uint64_t Offset,MemSetInst * MSI)131   bool addMemSet(uint64_t Offset, MemSetInst *MSI) {
132     uint64_t StoreSize = cast<ConstantInt>(MSI->getLength())->getZExtValue();
133     if (!addRange(Offset, Offset + StoreSize, MSI))
134       return false;
135     IRBuilder<> IRB(MSI);
136     applyMemSet(IRB, Offset, Offset + StoreSize,
137                 cast<ConstantInt>(MSI->getValue()));
138     return true;
139   }
140 
applyMemSet(IRBuilder<> & IRB,int64_t Start,int64_t End,ConstantInt * V)141   void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End,
142                    ConstantInt *V) {
143     // Out[] does not distinguish between zero and undef, and we already know
144     // that this memset does not overlap with any other initializer. Nothing to
145     // do for memset(0).
146     if (V->isZero())
147       return;
148     for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
149       uint64_t Cst = 0x0101010101010101UL;
150       int LowBits = Offset < Start ? (Start - Offset) * 8 : 0;
151       if (LowBits)
152         Cst = (Cst >> LowBits) << LowBits;
153       int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0;
154       if (HighBits)
155         Cst = (Cst << HighBits) >> HighBits;
156       ConstantInt *C =
157           ConstantInt::get(IRB.getInt64Ty(), Cst * V->getZExtValue());
158 
159       Value *&CurrentV = Out[Offset];
160       if (!CurrentV) {
161         CurrentV = C;
162       } else {
163         CurrentV = IRB.CreateOr(CurrentV, C);
164       }
165     }
166   }
167 
168   // Take a 64-bit slice of the value starting at the given offset (in bytes).
169   // Offset can be negative. Pad with zeroes on both sides when necessary.
sliceValue(IRBuilder<> & IRB,Value * V,int64_t Offset)170   Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) {
171     if (Offset > 0) {
172       V = IRB.CreateLShr(V, Offset * 8);
173       V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
174     } else if (Offset < 0) {
175       V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
176       V = IRB.CreateShl(V, -Offset * 8);
177     } else {
178       V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
179     }
180     return V;
181   }
182 
applyStore(IRBuilder<> & IRB,int64_t Start,int64_t End,Value * StoredValue)183   void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End,
184                   Value *StoredValue) {
185     StoredValue = flatten(IRB, StoredValue);
186     for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
187       Value *V = sliceValue(IRB, StoredValue, Offset - Start);
188       Value *&CurrentV = Out[Offset];
189       if (!CurrentV) {
190         CurrentV = V;
191       } else {
192         CurrentV = IRB.CreateOr(CurrentV, V);
193       }
194     }
195   }
196 
generate(IRBuilder<> & IRB)197   void generate(IRBuilder<> &IRB) {
198     LLVM_DEBUG(dbgs() << "Combined initializer\n");
199     // No initializers => the entire allocation is undef.
200     if (Ranges.empty()) {
201       emitUndef(IRB, 0, Size);
202       return;
203     }
204 
205     // Look through 8-byte initializer list 16 bytes at a time;
206     // If one of the two 8-byte halfs is non-zero non-undef, emit STGP.
207     // Otherwise, emit zeroes up to next available item.
208     uint64_t LastOffset = 0;
209     for (uint64_t Offset = 0; Offset < Size; Offset += 16) {
210       auto I1 = Out.find(Offset);
211       auto I2 = Out.find(Offset + 8);
212       if (I1 == Out.end() && I2 == Out.end())
213         continue;
214 
215       if (Offset > LastOffset)
216         emitZeroes(IRB, LastOffset, Offset - LastOffset);
217 
218       Value *Store1 = I1 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
219                                       : I1->second;
220       Value *Store2 = I2 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
221                                       : I2->second;
222       emitPair(IRB, Offset, Store1, Store2);
223       LastOffset = Offset + 16;
224     }
225 
226     // memset(0) does not update Out[], therefore the tail can be either undef
227     // or zero.
228     if (LastOffset < Size)
229       emitZeroes(IRB, LastOffset, Size - LastOffset);
230 
231     for (const auto &R : Ranges) {
232       R.Inst->eraseFromParent();
233     }
234   }
235 
emitZeroes(IRBuilder<> & IRB,uint64_t Offset,uint64_t Size)236   void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
237     LLVM_DEBUG(dbgs() << "  [" << Offset << ", " << Offset + Size
238                       << ") zero\n");
239     Value *Ptr = BasePtr;
240     if (Offset)
241       Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
242     IRB.CreateCall(SetTagZeroFn,
243                    {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
244   }
245 
emitUndef(IRBuilder<> & IRB,uint64_t Offset,uint64_t Size)246   void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
247     LLVM_DEBUG(dbgs() << "  [" << Offset << ", " << Offset + Size
248                       << ") undef\n");
249     Value *Ptr = BasePtr;
250     if (Offset)
251       Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
252     IRB.CreateCall(SetTagFn, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
253   }
254 
emitPair(IRBuilder<> & IRB,uint64_t Offset,Value * A,Value * B)255   void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) {
256     LLVM_DEBUG(dbgs() << "  [" << Offset << ", " << Offset + 16 << "):\n");
257     LLVM_DEBUG(dbgs() << "    " << *A << "\n    " << *B << "\n");
258     Value *Ptr = BasePtr;
259     if (Offset)
260       Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
261     IRB.CreateCall(StgpFn, {Ptr, A, B});
262   }
263 
flatten(IRBuilder<> & IRB,Value * V)264   Value *flatten(IRBuilder<> &IRB, Value *V) {
265     if (V->getType()->isIntegerTy())
266       return V;
267     // vector of pointers -> vector of ints
268     if (VectorType *VecTy = dyn_cast<VectorType>(V->getType())) {
269       LLVMContext &Ctx = IRB.getContext();
270       Type *EltTy = VecTy->getElementType();
271       if (EltTy->isPointerTy()) {
272         uint32_t EltSize = DL->getTypeSizeInBits(EltTy);
273         auto *NewTy = FixedVectorType::get(
274             IntegerType::get(Ctx, EltSize),
275             cast<FixedVectorType>(VecTy)->getNumElements());
276         V = IRB.CreatePointerCast(V, NewTy);
277       }
278     }
279     return IRB.CreateBitOrPointerCast(
280         V, IRB.getIntNTy(DL->getTypeStoreSize(V->getType()) * 8));
281   }
282 };
283 
284 class AArch64StackTagging : public FunctionPass {
285   struct AllocaInfo {
286     AllocaInst *AI;
287     TrackingVH<Instruction> OldAI; // Track through RAUW to replace debug uses.
288     SmallVector<IntrinsicInst *, 2> LifetimeStart;
289     SmallVector<IntrinsicInst *, 2> LifetimeEnd;
290     SmallVector<DbgVariableIntrinsic *, 2> DbgVariableIntrinsics;
291     int Tag; // -1 for non-tagged allocations
292   };
293 
294   const bool MergeInit;
295   const bool UseStackSafety;
296 
297 public:
298   static char ID; // Pass ID, replacement for typeid
299 
AArch64StackTagging(bool IsOptNone=false)300   AArch64StackTagging(bool IsOptNone = false)
301       : FunctionPass(ID),
302         MergeInit(ClMergeInit.getNumOccurrences() ? ClMergeInit : !IsOptNone),
303         UseStackSafety(ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety
304                                                             : !IsOptNone) {
305     initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry());
306   }
307 
308   bool isInterestingAlloca(const AllocaInst &AI);
309   void alignAndPadAlloca(AllocaInfo &Info);
310 
311   void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr,
312                  uint64_t Size);
313   void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size);
314 
315   Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr,
316                                    uint64_t Size, InitializerBuilder &IB);
317 
318   Instruction *
319   insertBaseTaggedPointer(const MapVector<AllocaInst *, AllocaInfo> &Allocas,
320                           const DominatorTree *DT);
321   bool runOnFunction(Function &F) override;
322 
getPassName() const323   StringRef getPassName() const override { return "AArch64 Stack Tagging"; }
324 
325 private:
326   Function *F = nullptr;
327   Function *SetTagFunc = nullptr;
328   const DataLayout *DL = nullptr;
329   AAResults *AA = nullptr;
330   const StackSafetyGlobalInfo *SSI = nullptr;
331 
getAnalysisUsage(AnalysisUsage & AU) const332   void getAnalysisUsage(AnalysisUsage &AU) const override {
333     AU.setPreservesCFG();
334     if (UseStackSafety)
335       AU.addRequired<StackSafetyGlobalInfoWrapperPass>();
336     if (MergeInit)
337       AU.addRequired<AAResultsWrapperPass>();
338   }
339 };
340 
341 } // end anonymous namespace
342 
343 char AArch64StackTagging::ID = 0;
344 
345 INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
346                       false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)347 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
348 INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass)
349 INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
350                     false, false)
351 
352 FunctionPass *llvm::createAArch64StackTaggingPass(bool IsOptNone) {
353   return new AArch64StackTagging(IsOptNone);
354 }
355 
collectInitializers(Instruction * StartInst,Value * StartPtr,uint64_t Size,InitializerBuilder & IB)356 Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst,
357                                                       Value *StartPtr,
358                                                       uint64_t Size,
359                                                       InitializerBuilder &IB) {
360   MemoryLocation AllocaLoc{StartPtr, Size};
361   Instruction *LastInst = StartInst;
362   BasicBlock::iterator BI(StartInst);
363 
364   unsigned Count = 0;
365   for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) {
366     if (!isa<DbgInfoIntrinsic>(*BI))
367       ++Count;
368 
369     if (isNoModRef(AA->getModRefInfo(&*BI, AllocaLoc)))
370       continue;
371 
372     if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
373       // If the instruction is readnone, ignore it, otherwise bail out.  We
374       // don't even allow readonly here because we don't want something like:
375       // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
376       if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
377         break;
378       continue;
379     }
380 
381     if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
382       if (!NextStore->isSimple())
383         break;
384 
385       // Check to see if this store is to a constant offset from the start ptr.
386       Optional<int64_t> Offset =
387           isPointerOffset(StartPtr, NextStore->getPointerOperand(), *DL);
388       if (!Offset)
389         break;
390 
391       if (!IB.addStore(*Offset, NextStore, DL))
392         break;
393       LastInst = NextStore;
394     } else {
395       MemSetInst *MSI = cast<MemSetInst>(BI);
396 
397       if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
398         break;
399 
400       if (!isa<ConstantInt>(MSI->getValue()))
401         break;
402 
403       // Check to see if this store is to a constant offset from the start ptr.
404       Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), *DL);
405       if (!Offset)
406         break;
407 
408       if (!IB.addMemSet(*Offset, MSI))
409         break;
410       LastInst = MSI;
411     }
412   }
413   return LastInst;
414 }
415 
isInterestingAlloca(const AllocaInst & AI)416 bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {
417   // FIXME: support dynamic allocas
418   bool IsInteresting =
419       AI.getAllocatedType()->isSized() && AI.isStaticAlloca() &&
420       // alloca() may be called with 0 size, ignore it.
421       AI.getAllocationSizeInBits(*DL).getValue() > 0 &&
422       // inalloca allocas are not treated as static, and we don't want
423       // dynamic alloca instrumentation for them as well.
424       !AI.isUsedWithInAlloca() &&
425       // swifterror allocas are register promoted by ISel
426       !AI.isSwiftError() &&
427       // safe allocas are not interesting
428       !(SSI && SSI->isSafe(AI));
429   return IsInteresting;
430 }
431 
tagAlloca(AllocaInst * AI,Instruction * InsertBefore,Value * Ptr,uint64_t Size)432 void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore,
433                                     Value *Ptr, uint64_t Size) {
434   auto SetTagZeroFunc =
435       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag_zero);
436   auto StgpFunc =
437       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_stgp);
438 
439   InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc);
440   bool LittleEndian =
441       Triple(AI->getModule()->getTargetTriple()).isLittleEndian();
442   // Current implementation of initializer merging assumes little endianness.
443   if (MergeInit && !F->hasOptNone() && LittleEndian &&
444       Size < ClMergeInitSizeLimit) {
445     LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI
446                       << ", size = " << Size << "\n");
447     InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB);
448   }
449 
450   IRBuilder<> IRB(InsertBefore);
451   IB.generate(IRB);
452 }
453 
untagAlloca(AllocaInst * AI,Instruction * InsertBefore,uint64_t Size)454 void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore,
455                                       uint64_t Size) {
456   IRBuilder<> IRB(InsertBefore);
457   IRB.CreateCall(SetTagFunc, {IRB.CreatePointerCast(AI, IRB.getInt8PtrTy()),
458                               ConstantInt::get(IRB.getInt64Ty(), Size)});
459 }
460 
insertBaseTaggedPointer(const MapVector<AllocaInst *,AllocaInfo> & Allocas,const DominatorTree * DT)461 Instruction *AArch64StackTagging::insertBaseTaggedPointer(
462     const MapVector<AllocaInst *, AllocaInfo> &Allocas,
463     const DominatorTree *DT) {
464   BasicBlock *PrologueBB = nullptr;
465   // Try sinking IRG as deep as possible to avoid hurting shrink wrap.
466   for (auto &I : Allocas) {
467     const AllocaInfo &Info = I.second;
468     AllocaInst *AI = Info.AI;
469     if (Info.Tag < 0)
470       continue;
471     if (!PrologueBB) {
472       PrologueBB = AI->getParent();
473       continue;
474     }
475     PrologueBB = DT->findNearestCommonDominator(PrologueBB, AI->getParent());
476   }
477   assert(PrologueBB);
478 
479   IRBuilder<> IRB(&PrologueBB->front());
480   Function *IRG_SP =
481       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_irg_sp);
482   Instruction *Base =
483       IRB.CreateCall(IRG_SP, {Constant::getNullValue(IRB.getInt64Ty())});
484   Base->setName("basetag");
485   return Base;
486 }
487 
alignAndPadAlloca(AllocaInfo & Info)488 void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
489   const Align NewAlignment =
490       max(MaybeAlign(Info.AI->getAlignment()), kTagGranuleSize);
491   Info.AI->setAlignment(NewAlignment);
492 
493   uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
494   uint64_t AlignedSize = alignTo(Size, kTagGranuleSize);
495   if (Size == AlignedSize)
496     return;
497 
498   // Add padding to the alloca.
499   Type *AllocatedType =
500       Info.AI->isArrayAllocation()
501           ? ArrayType::get(
502                 Info.AI->getAllocatedType(),
503                 cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue())
504           : Info.AI->getAllocatedType();
505   Type *PaddingType =
506       ArrayType::get(Type::getInt8Ty(F->getContext()), AlignedSize - Size);
507   Type *TypeWithPadding = StructType::get(AllocatedType, PaddingType);
508   auto *NewAI = new AllocaInst(
509       TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI);
510   NewAI->takeName(Info.AI);
511   NewAI->setAlignment(Info.AI->getAlign());
512   NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca());
513   NewAI->setSwiftError(Info.AI->isSwiftError());
514   NewAI->copyMetadata(*Info.AI);
515 
516   auto *NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI);
517   Info.AI->replaceAllUsesWith(NewPtr);
518   Info.AI->eraseFromParent();
519   Info.AI = NewAI;
520 }
521 
522 // Helper function to check for post-dominance.
postDominates(const PostDominatorTree * PDT,const IntrinsicInst * A,const IntrinsicInst * B)523 static bool postDominates(const PostDominatorTree *PDT, const IntrinsicInst *A,
524                           const IntrinsicInst *B) {
525   const BasicBlock *ABB = A->getParent();
526   const BasicBlock *BBB = B->getParent();
527 
528   if (ABB != BBB)
529     return PDT->dominates(ABB, BBB);
530 
531   for (const Instruction &I : *ABB) {
532     if (&I == B)
533       return true;
534     if (&I == A)
535       return false;
536   }
537   llvm_unreachable("Corrupt instruction list");
538 }
539 
540 // FIXME: check for MTE extension
runOnFunction(Function & Fn)541 bool AArch64StackTagging::runOnFunction(Function &Fn) {
542   if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))
543     return false;
544 
545   if (UseStackSafety)
546     SSI = &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult();
547   F = &Fn;
548   DL = &Fn.getParent()->getDataLayout();
549   if (MergeInit)
550     AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
551 
552   MapVector<AllocaInst *, AllocaInfo> Allocas; // need stable iteration order
553   SmallVector<Instruction *, 8> RetVec;
554   SmallVector<Instruction *, 4> UnrecognizedLifetimes;
555 
556   for (auto &BB : *F) {
557     for (BasicBlock::iterator IT = BB.begin(); IT != BB.end(); ++IT) {
558       Instruction *I = &*IT;
559       if (auto *AI = dyn_cast<AllocaInst>(I)) {
560         Allocas[AI].AI = AI;
561         Allocas[AI].OldAI = AI;
562         continue;
563       }
564 
565       if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(I)) {
566         for (Value *V : DVI->location_ops())
567           if (auto *AI = dyn_cast_or_null<AllocaInst>(V))
568             Allocas[AI].DbgVariableIntrinsics.push_back(DVI);
569         continue;
570       }
571 
572       auto *II = dyn_cast<IntrinsicInst>(I);
573       if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
574                  II->getIntrinsicID() == Intrinsic::lifetime_end)) {
575         AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
576         if (!AI) {
577           UnrecognizedLifetimes.push_back(I);
578           continue;
579         }
580         if (II->getIntrinsicID() == Intrinsic::lifetime_start)
581           Allocas[AI].LifetimeStart.push_back(II);
582         else
583           Allocas[AI].LifetimeEnd.push_back(II);
584       }
585 
586       if (isa<ReturnInst>(I) || isa<ResumeInst>(I) || isa<CleanupReturnInst>(I))
587         RetVec.push_back(I);
588     }
589   }
590 
591   if (Allocas.empty())
592     return false;
593 
594   int NextTag = 0;
595   int NumInterestingAllocas = 0;
596   for (auto &I : Allocas) {
597     AllocaInfo &Info = I.second;
598     assert(Info.AI);
599 
600     if (!isInterestingAlloca(*Info.AI)) {
601       Info.Tag = -1;
602       continue;
603     }
604 
605     alignAndPadAlloca(Info);
606     NumInterestingAllocas++;
607     Info.Tag = NextTag;
608     NextTag = (NextTag + 1) % 16;
609   }
610 
611   if (NumInterestingAllocas == 0)
612     return true;
613 
614   std::unique_ptr<DominatorTree> DeleteDT;
615   DominatorTree *DT = nullptr;
616   if (auto *P = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
617     DT = &P->getDomTree();
618 
619   if (DT == nullptr && (NumInterestingAllocas > 1 ||
620                         !F->hasFnAttribute(Attribute::OptimizeNone))) {
621     DeleteDT = std::make_unique<DominatorTree>(*F);
622     DT = DeleteDT.get();
623   }
624 
625   std::unique_ptr<PostDominatorTree> DeletePDT;
626   PostDominatorTree *PDT = nullptr;
627   if (auto *P = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>())
628     PDT = &P->getPostDomTree();
629 
630   if (PDT == nullptr && !F->hasFnAttribute(Attribute::OptimizeNone)) {
631     DeletePDT = std::make_unique<PostDominatorTree>(*F);
632     PDT = DeletePDT.get();
633   }
634 
635   SetTagFunc =
636       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag);
637 
638   Instruction *Base = insertBaseTaggedPointer(Allocas, DT);
639 
640   for (auto &I : Allocas) {
641     const AllocaInfo &Info = I.second;
642     AllocaInst *AI = Info.AI;
643     if (Info.Tag < 0)
644       continue;
645 
646     // Replace alloca with tagp(alloca).
647     IRBuilder<> IRB(Info.AI->getNextNode());
648     Function *TagP = Intrinsic::getDeclaration(
649         F->getParent(), Intrinsic::aarch64_tagp, {Info.AI->getType()});
650     Instruction *TagPCall =
651         IRB.CreateCall(TagP, {Constant::getNullValue(Info.AI->getType()), Base,
652                               ConstantInt::get(IRB.getInt64Ty(), Info.Tag)});
653     if (Info.AI->hasName())
654       TagPCall->setName(Info.AI->getName() + ".tag");
655     Info.AI->replaceAllUsesWith(TagPCall);
656     TagPCall->setOperand(0, Info.AI);
657 
658     if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 &&
659         Info.LifetimeEnd.size() == 1) {
660       IntrinsicInst *Start = Info.LifetimeStart[0];
661       IntrinsicInst *End = Info.LifetimeEnd[0];
662       uint64_t Size =
663           cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
664       Size = alignTo(Size, kTagGranuleSize);
665       tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size);
666       // We need to ensure that if we tag some object, we certainly untag it
667       // before the function exits.
668       if (PDT != nullptr && postDominates(PDT, End, Start)) {
669         untagAlloca(AI, End, Size);
670       } else {
671         SmallVector<Instruction *, 8> ReachableRetVec;
672         unsigned NumCoveredExits = 0;
673         for (auto &RI : RetVec) {
674           if (!isPotentiallyReachable(Start, RI, nullptr, DT))
675             continue;
676           ReachableRetVec.push_back(RI);
677           if (DT != nullptr && DT->dominates(End, RI))
678             ++NumCoveredExits;
679         }
680         // If there's a mix of covered and non-covered exits, just put the untag
681         // on exits, so we avoid the redundancy of untagging twice.
682         if (NumCoveredExits == ReachableRetVec.size()) {
683           untagAlloca(AI, End, Size);
684         } else {
685           for (auto &RI : ReachableRetVec)
686             untagAlloca(AI, RI, Size);
687           // We may have inserted untag outside of the lifetime interval.
688           // Remove the lifetime end call for this alloca.
689           End->eraseFromParent();
690         }
691       }
692     } else {
693       uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
694       Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy());
695       tagAlloca(AI, &*IRB.GetInsertPoint(), Ptr, Size);
696       for (auto &RI : RetVec) {
697         untagAlloca(AI, RI, Size);
698       }
699       // We may have inserted tag/untag outside of any lifetime interval.
700       // Remove all lifetime intrinsics for this alloca.
701       for (auto &II : Info.LifetimeStart)
702         II->eraseFromParent();
703       for (auto &II : Info.LifetimeEnd)
704         II->eraseFromParent();
705     }
706 
707     // Fixup debug intrinsics to point to the new alloca.
708     for (auto DVI : Info.DbgVariableIntrinsics)
709       DVI->replaceVariableLocationOp(Info.OldAI, Info.AI);
710   }
711 
712   // If we have instrumented at least one alloca, all unrecognized lifetime
713   // instrinsics have to go.
714   for (auto &I : UnrecognizedLifetimes)
715     I->eraseFromParent();
716 
717   return true;
718 }
719