xref: /llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp (revision 4f42e165164ba2bfca7b87be2a533ef09e8777e0)
1 //===- HWAddressSanitizer.cpp - memory access error detector --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of HWAddressSanitizer, an address basic correctness
11 /// checker based on tagged addressing.
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Analysis/BlockFrequencyInfo.h"
22 #include "llvm/Analysis/DomTreeUpdater.h"
23 #include "llvm/Analysis/GlobalsModRef.h"
24 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
25 #include "llvm/Analysis/PostDominators.h"
26 #include "llvm/Analysis/ProfileSummaryInfo.h"
27 #include "llvm/Analysis/StackSafetyAnalysis.h"
28 #include "llvm/Analysis/TargetLibraryInfo.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/BinaryFormat/Dwarf.h"
31 #include "llvm/BinaryFormat/ELF.h"
32 #include "llvm/IR/Attributes.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/Constant.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Dominators.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/IRBuilder.h"
41 #include "llvm/IR/InlineAsm.h"
42 #include "llvm/IR/InstIterator.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Intrinsics.h"
47 #include "llvm/IR/LLVMContext.h"
48 #include "llvm/IR/MDBuilder.h"
49 #include "llvm/IR/Module.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Value.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/CommandLine.h"
54 #include "llvm/Support/Debug.h"
55 #include "llvm/Support/MD5.h"
56 #include "llvm/Support/RandomNumberGenerator.h"
57 #include "llvm/Support/raw_ostream.h"
58 #include "llvm/TargetParser/Triple.h"
59 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
60 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
61 #include "llvm/Transforms/Utils/Instrumentation.h"
62 #include "llvm/Transforms/Utils/Local.h"
63 #include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
64 #include "llvm/Transforms/Utils/ModuleUtils.h"
65 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
66 #include <optional>
67 #include <random>
68 
69 using namespace llvm;
70 
71 #define DEBUG_TYPE "hwasan"
72 
73 const char kHwasanModuleCtorName[] = "hwasan.module_ctor";
74 const char kHwasanNoteName[] = "hwasan.note";
75 const char kHwasanInitName[] = "__hwasan_init";
76 const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk";
77 
78 const char kHwasanShadowMemoryDynamicAddress[] =
79     "__hwasan_shadow_memory_dynamic_address";
80 
81 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
82 static const size_t kNumberOfAccessSizes = 5;
83 
84 static const size_t kDefaultShadowScale = 4;
85 
86 static const unsigned kShadowBaseAlignment = 32;
87 
88 namespace {
89 enum class OffsetKind {
90   kFixed = 0,
91   kGlobal,
92   kIfunc,
93   kTls,
94 };
95 }
96 
97 static cl::opt<std::string>
98     ClMemoryAccessCallbackPrefix("hwasan-memory-access-callback-prefix",
99                                  cl::desc("Prefix for memory access callbacks"),
100                                  cl::Hidden, cl::init("__hwasan_"));
101 
102 static cl::opt<bool> ClKasanMemIntrinCallbackPrefix(
103     "hwasan-kernel-mem-intrinsic-prefix",
104     cl::desc("Use prefix for memory intrinsics in KASAN mode"), cl::Hidden,
105     cl::init(false));
106 
107 static cl::opt<bool> ClInstrumentWithCalls(
108     "hwasan-instrument-with-calls",
109     cl::desc("instrument reads and writes with callbacks"), cl::Hidden,
110     cl::init(false));
111 
112 static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
113                                        cl::desc("instrument read instructions"),
114                                        cl::Hidden, cl::init(true));
115 
116 static cl::opt<bool>
117     ClInstrumentWrites("hwasan-instrument-writes",
118                        cl::desc("instrument write instructions"), cl::Hidden,
119                        cl::init(true));
120 
121 static cl::opt<bool> ClInstrumentAtomics(
122     "hwasan-instrument-atomics",
123     cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
124     cl::init(true));
125 
126 static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval",
127                                        cl::desc("instrument byval arguments"),
128                                        cl::Hidden, cl::init(true));
129 
130 static cl::opt<bool>
131     ClRecover("hwasan-recover",
132               cl::desc("Enable recovery mode (continue-after-error)."),
133               cl::Hidden, cl::init(false));
134 
135 static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
136                                        cl::desc("instrument stack (allocas)"),
137                                        cl::Hidden, cl::init(true));
138 
139 static cl::opt<bool>
140     ClUseStackSafety("hwasan-use-stack-safety", cl::Hidden, cl::init(true),
141                      cl::Hidden, cl::desc("Use Stack Safety analysis results"),
142                      cl::Optional);
143 
144 static cl::opt<size_t> ClMaxLifetimes(
145     "hwasan-max-lifetimes-for-alloca", cl::Hidden, cl::init(3),
146     cl::ReallyHidden,
147     cl::desc("How many lifetime ends to handle for a single alloca."),
148     cl::Optional);
149 
150 static cl::opt<bool>
151     ClUseAfterScope("hwasan-use-after-scope",
152                     cl::desc("detect use after scope within function"),
153                     cl::Hidden, cl::init(true));
154 
155 static cl::opt<bool> ClGenerateTagsWithCalls(
156     "hwasan-generate-tags-with-calls",
157     cl::desc("generate new tags with runtime library calls"), cl::Hidden,
158     cl::init(false));
159 
160 static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
161                                cl::Hidden, cl::init(false));
162 
163 static cl::opt<int> ClMatchAllTag(
164     "hwasan-match-all-tag",
165     cl::desc("don't report bad accesses via pointers with this tag"),
166     cl::Hidden, cl::init(-1));
167 
168 static cl::opt<bool>
169     ClEnableKhwasan("hwasan-kernel",
170                     cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
171                     cl::Hidden, cl::init(false));
172 
173 // These flags allow to change the shadow mapping and control how shadow memory
174 // is accessed. The shadow mapping looks like:
175 //    Shadow = (Mem >> scale) + offset
176 
177 static cl::opt<uint64_t>
178     ClMappingOffset("hwasan-mapping-offset",
179                     cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
180                     cl::Hidden);
181 
182 static cl::opt<OffsetKind> ClMappingOffsetDynamic(
183     "hwasan-mapping-offset-dynamic",
184     cl::desc("HWASan shadow mapping dynamic offset location"), cl::Hidden,
185     cl::values(clEnumValN(OffsetKind::kGlobal, "global", "Use global"),
186                clEnumValN(OffsetKind::kIfunc, "ifunc", "Use ifunc global"),
187                clEnumValN(OffsetKind::kTls, "tls", "Use TLS")));
188 
189 static cl::opt<bool>
190     ClFrameRecords("hwasan-with-frame-record",
191                    cl::desc("Use ring buffer for stack allocations"),
192                    cl::Hidden);
193 
194 static cl::opt<int> ClHotPercentileCutoff("hwasan-percentile-cutoff-hot",
195                                           cl::desc("Hot percentile cutoff."));
196 
197 static cl::opt<float>
198     ClRandomSkipRate("hwasan-random-rate",
199                      cl::desc("Probability value in the range [0.0, 1.0] "
200                               "to keep instrumentation of a function."));
201 
202 STATISTIC(NumTotalFuncs, "Number of total funcs");
203 STATISTIC(NumInstrumentedFuncs, "Number of instrumented funcs");
204 STATISTIC(NumNoProfileSummaryFuncs, "Number of funcs without PS");
205 
206 // Mode for selecting how to insert frame record info into the stack ring
207 // buffer.
208 enum RecordStackHistoryMode {
209   // Do not record frame record info.
210   none,
211 
212   // Insert instructions into the prologue for storing into the stack ring
213   // buffer directly.
214   instr,
215 
216   // Add a call to __hwasan_add_frame_record in the runtime.
217   libcall,
218 };
219 
220 static cl::opt<RecordStackHistoryMode> ClRecordStackHistory(
221     "hwasan-record-stack-history",
222     cl::desc("Record stack frames with tagged allocations in a thread-local "
223              "ring buffer"),
224     cl::values(clEnumVal(none, "Do not record stack ring history"),
225                clEnumVal(instr, "Insert instructions into the prologue for "
226                                 "storing into the stack ring buffer directly"),
227                clEnumVal(libcall, "Add a call to __hwasan_add_frame_record for "
228                                   "storing into the stack ring buffer")),
229     cl::Hidden, cl::init(instr));
230 
231 static cl::opt<bool>
232     ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
233                               cl::desc("instrument memory intrinsics"),
234                               cl::Hidden, cl::init(true));
235 
236 static cl::opt<bool>
237     ClInstrumentLandingPads("hwasan-instrument-landing-pads",
238                             cl::desc("instrument landing pads"), cl::Hidden,
239                             cl::init(false));
240 
241 static cl::opt<bool> ClUseShortGranules(
242     "hwasan-use-short-granules",
243     cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
244     cl::init(false));
245 
246 static cl::opt<bool> ClInstrumentPersonalityFunctions(
247     "hwasan-instrument-personality-functions",
248     cl::desc("instrument personality functions"), cl::Hidden);
249 
250 static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
251                                        cl::desc("inline all checks"),
252                                        cl::Hidden, cl::init(false));
253 
254 static cl::opt<bool> ClInlineFastPathChecks("hwasan-inline-fast-path-checks",
255                                             cl::desc("inline all checks"),
256                                             cl::Hidden, cl::init(false));
257 
258 // Enabled from clang by "-fsanitize-hwaddress-experimental-aliasing".
259 static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases",
260                                       cl::desc("Use page aliasing in HWASan"),
261                                       cl::Hidden, cl::init(false));
262 
263 namespace {
264 
265 template <typename T> T optOr(cl::opt<T> &Opt, T Other) {
266   return Opt.getNumOccurrences() ? Opt : Other;
267 }
268 
269 bool shouldUsePageAliases(const Triple &TargetTriple) {
270   return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64;
271 }
272 
273 bool shouldInstrumentStack(const Triple &TargetTriple) {
274   return !shouldUsePageAliases(TargetTriple) && ClInstrumentStack;
275 }
276 
277 bool shouldInstrumentWithCalls(const Triple &TargetTriple) {
278   return optOr(ClInstrumentWithCalls, TargetTriple.getArch() == Triple::x86_64);
279 }
280 
281 bool mightUseStackSafetyAnalysis(bool DisableOptimization) {
282   return optOr(ClUseStackSafety, !DisableOptimization);
283 }
284 
285 bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
286                                   bool DisableOptimization) {
287   return shouldInstrumentStack(TargetTriple) &&
288          mightUseStackSafetyAnalysis(DisableOptimization);
289 }
290 
291 bool shouldDetectUseAfterScope(const Triple &TargetTriple) {
292   return ClUseAfterScope && shouldInstrumentStack(TargetTriple);
293 }
294 
295 /// An instrumentation pass implementing detection of addressability bugs
296 /// using tagged pointers.
297 class HWAddressSanitizer {
298 public:
299   HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
300                      const StackSafetyGlobalInfo *SSI)
301       : M(M), SSI(SSI) {
302     this->Recover = optOr(ClRecover, Recover);
303     this->CompileKernel = optOr(ClEnableKhwasan, CompileKernel);
304     this->Rng = ClRandomSkipRate.getNumOccurrences() ? M.createRNG(DEBUG_TYPE)
305                                                      : nullptr;
306 
307     initializeModule();
308   }
309 
310   void sanitizeFunction(Function &F, FunctionAnalysisManager &FAM);
311 
312 private:
313   struct ShadowTagCheckInfo {
314     Instruction *TagMismatchTerm = nullptr;
315     Value *PtrLong = nullptr;
316     Value *AddrLong = nullptr;
317     Value *PtrTag = nullptr;
318     Value *MemTag = nullptr;
319   };
320 
321   bool selectiveInstrumentationShouldSkip(Function &F,
322                                           FunctionAnalysisManager &FAM) const;
323   void initializeModule();
324   void createHwasanCtorComdat();
325   void removeFnAttributes(Function *F);
326 
327   void initializeCallbacks(Module &M);
328 
329   Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val);
330 
331   Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
332   Value *getShadowNonTls(IRBuilder<> &IRB);
333 
334   void untagPointerOperand(Instruction *I, Value *Addr);
335   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
336 
337   int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex);
338   ShadowTagCheckInfo insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore,
339                                           DomTreeUpdater &DTU, LoopInfo *LI);
340   void instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
341                                   unsigned AccessSizeIndex,
342                                   Instruction *InsertBefore,
343                                   DomTreeUpdater &DTU, LoopInfo *LI);
344   void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
345                                  unsigned AccessSizeIndex,
346                                  Instruction *InsertBefore, DomTreeUpdater &DTU,
347                                  LoopInfo *LI);
348   bool ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE, MemIntrinsic *MI);
349   void instrumentMemIntrinsic(MemIntrinsic *MI);
350   bool instrumentMemAccess(InterestingMemoryOperand &O, DomTreeUpdater &DTU,
351                            LoopInfo *LI, const DataLayout &DL);
352   bool ignoreAccessWithoutRemark(Instruction *Inst, Value *Ptr);
353   bool ignoreAccess(OptimizationRemarkEmitter &ORE, Instruction *Inst,
354                     Value *Ptr);
355 
356   void getInterestingMemoryOperands(
357       OptimizationRemarkEmitter &ORE, Instruction *I,
358       const TargetLibraryInfo &TLI,
359       SmallVectorImpl<InterestingMemoryOperand> &Interesting);
360 
361   void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
362   Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
363   Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
364   bool instrumentStack(memtag::StackInfo &Info, Value *StackTag, Value *UARTag,
365                        const DominatorTree &DT, const PostDominatorTree &PDT,
366                        const LoopInfo &LI);
367   bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
368   Value *getNextTagWithCall(IRBuilder<> &IRB);
369   Value *getStackBaseTag(IRBuilder<> &IRB);
370   Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, unsigned AllocaNo);
371   Value *getUARTag(IRBuilder<> &IRB);
372 
373   Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB);
374   Value *applyTagMask(IRBuilder<> &IRB, Value *OldTag);
375   unsigned retagMask(unsigned AllocaNo);
376 
377   void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
378 
379   void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
380   void instrumentGlobals();
381 
382   Value *getCachedFP(IRBuilder<> &IRB);
383   Value *getFrameRecordInfo(IRBuilder<> &IRB);
384 
385   void instrumentPersonalityFunctions();
386 
387   LLVMContext *C;
388   Module &M;
389   const StackSafetyGlobalInfo *SSI;
390   Triple TargetTriple;
391   std::unique_ptr<RandomNumberGenerator> Rng;
392 
393   /// This struct defines the shadow mapping using the rule:
394   /// If `kFixed`, then
395   ///   shadow = (mem >> Scale) + Offset.
396   /// If `kGlobal`, then
397   ///   extern char* __hwasan_shadow_memory_dynamic_address;
398   ///   shadow = (mem >> Scale) + __hwasan_shadow_memory_dynamic_address
399   /// If `kIfunc`, then
400   ///   extern char __hwasan_shadow[];
401   ///   shadow = (mem >> Scale) + &__hwasan_shadow
402   /// If `kTls`, then
403   ///   extern char *__hwasan_tls;
404   ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
405   ///
406   /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
407   /// ring buffer for storing stack allocations on targets that support it.
408   class ShadowMapping {
409     OffsetKind Kind;
410     uint64_t Offset;
411     uint8_t Scale;
412     bool WithFrameRecord;
413 
414     void SetFixed(uint64_t O) {
415       Kind = OffsetKind::kFixed;
416       Offset = O;
417     }
418 
419   public:
420     void init(Triple &TargetTriple, bool InstrumentWithCalls);
421     Align getObjectAlignment() const { return Align(1ULL << Scale); }
422     bool isInGlobal() const { return Kind == OffsetKind::kGlobal; }
423     bool isInIfunc() const { return Kind == OffsetKind::kIfunc; }
424     bool isInTls() const { return Kind == OffsetKind::kTls; }
425     bool isFixed() const { return Kind == OffsetKind::kFixed; }
426     uint8_t scale() const { return Scale; };
427     uint64_t offset() const {
428       assert(isFixed());
429       return Offset;
430     };
431     bool withFrameRecord() const { return WithFrameRecord; };
432   };
433 
434   ShadowMapping Mapping;
435 
436   Type *VoidTy = Type::getVoidTy(M.getContext());
437   Type *IntptrTy = M.getDataLayout().getIntPtrType(M.getContext());
438   PointerType *PtrTy = PointerType::getUnqual(M.getContext());
439   Type *Int8Ty = Type::getInt8Ty(M.getContext());
440   Type *Int32Ty = Type::getInt32Ty(M.getContext());
441   Type *Int64Ty = Type::getInt64Ty(M.getContext());
442 
443   bool CompileKernel;
444   bool Recover;
445   bool OutlinedChecks;
446   bool InlineFastPath;
447   bool UseShortGranules;
448   bool InstrumentLandingPads;
449   bool InstrumentWithCalls;
450   bool InstrumentStack;
451   bool InstrumentGlobals;
452   bool DetectUseAfterScope;
453   bool UsePageAliases;
454   bool UseMatchAllCallback;
455 
456   std::optional<uint8_t> MatchAllTag;
457 
458   unsigned PointerTagShift;
459   uint64_t TagMaskByte;
460 
461   Function *HwasanCtorFunction;
462 
463   FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
464   FunctionCallee HwasanMemoryAccessCallbackSized[2];
465 
466   FunctionCallee HwasanMemmove, HwasanMemcpy, HwasanMemset;
467   FunctionCallee HwasanHandleVfork;
468 
469   FunctionCallee HwasanTagMemoryFunc;
470   FunctionCallee HwasanGenerateTagFunc;
471   FunctionCallee HwasanRecordFrameRecordFunc;
472 
473   Constant *ShadowGlobal;
474 
475   Value *ShadowBase = nullptr;
476   Value *StackBaseTag = nullptr;
477   Value *CachedFP = nullptr;
478   GlobalValue *ThreadPtrGlobal = nullptr;
479 };
480 
481 } // end anonymous namespace
482 
483 PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
484                                               ModuleAnalysisManager &MAM) {
485   // Return early if nosanitize_hwaddress module flag is present for the module.
486   if (checkIfAlreadyInstrumented(M, "nosanitize_hwaddress"))
487     return PreservedAnalyses::all();
488   const StackSafetyGlobalInfo *SSI = nullptr;
489   auto TargetTriple = llvm::Triple(M.getTargetTriple());
490   if (shouldUseStackSafetyAnalysis(TargetTriple, Options.DisableOptimization))
491     SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M);
492 
493   HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
494   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
495   for (Function &F : M)
496     HWASan.sanitizeFunction(F, FAM);
497 
498   PreservedAnalyses PA = PreservedAnalyses::none();
499   // DominatorTreeAnalysis, PostDominatorTreeAnalysis, and LoopAnalysis
500   // are incrementally updated throughout this pass whenever
501   // SplitBlockAndInsertIfThen is called.
502   PA.preserve<DominatorTreeAnalysis>();
503   PA.preserve<PostDominatorTreeAnalysis>();
504   PA.preserve<LoopAnalysis>();
505   // GlobalsAA is considered stateless and does not get invalidated unless
506   // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
507   // make changes that require GlobalsAA to be invalidated.
508   PA.abandon<GlobalsAA>();
509   return PA;
510 }
511 void HWAddressSanitizerPass::printPipeline(
512     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
513   static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline(
514       OS, MapClassName2PassName);
515   OS << '<';
516   if (Options.CompileKernel)
517     OS << "kernel;";
518   if (Options.Recover)
519     OS << "recover";
520   OS << '>';
521 }
522 
523 void HWAddressSanitizer::createHwasanCtorComdat() {
524   std::tie(HwasanCtorFunction, std::ignore) =
525       getOrCreateSanitizerCtorAndInitFunctions(
526           M, kHwasanModuleCtorName, kHwasanInitName,
527           /*InitArgTypes=*/{},
528           /*InitArgs=*/{},
529           // This callback is invoked when the functions are created the first
530           // time. Hook them into the global ctors list in that case:
531           [&](Function *Ctor, FunctionCallee) {
532             Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
533             Ctor->setComdat(CtorComdat);
534             appendToGlobalCtors(M, Ctor, 0, Ctor);
535           });
536 
537   // Create a note that contains pointers to the list of global
538   // descriptors. Adding a note to the output file will cause the linker to
539   // create a PT_NOTE program header pointing to the note that we can use to
540   // find the descriptor list starting from the program headers. A function
541   // provided by the runtime initializes the shadow memory for the globals by
542   // accessing the descriptor list via the note. The dynamic loader needs to
543   // call this function whenever a library is loaded.
544   //
545   // The reason why we use a note for this instead of a more conventional
546   // approach of having a global constructor pass a descriptor list pointer to
547   // the runtime is because of an order of initialization problem. With
548   // constructors we can encounter the following problematic scenario:
549   //
550   // 1) library A depends on library B and also interposes one of B's symbols
551   // 2) B's constructors are called before A's (as required for correctness)
552   // 3) during construction, B accesses one of its "own" globals (actually
553   //    interposed by A) and triggers a HWASAN failure due to the initialization
554   //    for A not having happened yet
555   //
556   // Even without interposition it is possible to run into similar situations in
557   // cases where two libraries mutually depend on each other.
558   //
559   // We only need one note per binary, so put everything for the note in a
560   // comdat. This needs to be a comdat with an .init_array section to prevent
561   // newer versions of lld from discarding the note.
562   //
563   // Create the note even if we aren't instrumenting globals. This ensures that
564   // binaries linked from object files with both instrumented and
565   // non-instrumented globals will end up with a note, even if a comdat from an
566   // object file with non-instrumented globals is selected. The note is harmless
567   // if the runtime doesn't support it, since it will just be ignored.
568   Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
569 
570   Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
571   auto *Start =
572       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
573                          nullptr, "__start_hwasan_globals");
574   Start->setVisibility(GlobalValue::HiddenVisibility);
575   auto *Stop =
576       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
577                          nullptr, "__stop_hwasan_globals");
578   Stop->setVisibility(GlobalValue::HiddenVisibility);
579 
580   // Null-terminated so actually 8 bytes, which are required in order to align
581   // the note properly.
582   auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
583 
584   auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
585                                  Int32Ty, Int32Ty);
586   auto *Note =
587       new GlobalVariable(M, NoteTy, /*isConstant=*/true,
588                          GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
589   Note->setSection(".note.hwasan.globals");
590   Note->setComdat(NoteComdat);
591   Note->setAlignment(Align(4));
592 
593   // The pointers in the note need to be relative so that the note ends up being
594   // placed in rodata, which is the standard location for notes.
595   auto CreateRelPtr = [&](Constant *Ptr) {
596     return ConstantExpr::getTrunc(
597         ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
598                              ConstantExpr::getPtrToInt(Note, Int64Ty)),
599         Int32Ty);
600   };
601   Note->setInitializer(ConstantStruct::getAnon(
602       {ConstantInt::get(Int32Ty, 8),                           // n_namesz
603        ConstantInt::get(Int32Ty, 8),                           // n_descsz
604        ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
605        Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
606   appendToCompilerUsed(M, Note);
607 
608   // Create a zero-length global in hwasan_globals so that the linker will
609   // always create start and stop symbols.
610   auto *Dummy = new GlobalVariable(
611       M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
612       Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
613   Dummy->setSection("hwasan_globals");
614   Dummy->setComdat(NoteComdat);
615   Dummy->setMetadata(LLVMContext::MD_associated,
616                      MDNode::get(*C, ValueAsMetadata::get(Note)));
617   appendToCompilerUsed(M, Dummy);
618 }
619 
620 void HWAddressSanitizer::removeFnAttributes(Function *F) {
621   // Remove memory attributes that are invalid with HWASan.
622   // HWASan checks read from shadow, which invalidates memory(argmem: *)
623   // Short granule checks on function arguments read from the argument memory
624   // (last byte of the granule), which invalidates writeonly.
625   //
626   // This is not only true for sanitized functions, because AttrInfer can
627   // infer those attributes on libc functions, which is not true if those
628   // are instrumented (Android) or intercepted.
629   //
630   // We might want to model HWASan shadow memory more opaquely to get rid of
631   // this problem altogether, by hiding the shadow memory write in an
632   // intrinsic, essentially like in the AArch64StackTagging pass. But that's
633   // for another day.
634 
635   // The API is weird. `onlyReadsMemory` actually means "does not write", and
636   // `onlyWritesMemory` actually means "does not read". So we reconstruct
637   // "accesses memory" && "does not read" <=> "writes".
638   bool Changed = false;
639   if (!F->doesNotAccessMemory()) {
640     bool WritesMemory = !F->onlyReadsMemory();
641     bool ReadsMemory = !F->onlyWritesMemory();
642     if ((WritesMemory && !ReadsMemory) || F->onlyAccessesArgMemory()) {
643       F->removeFnAttr(Attribute::Memory);
644       Changed = true;
645     }
646   }
647   for (Argument &A : F->args()) {
648     if (A.hasAttribute(Attribute::WriteOnly)) {
649       A.removeAttr(Attribute::WriteOnly);
650       Changed = true;
651     }
652   }
653   if (Changed) {
654     // nobuiltin makes sure later passes don't restore assumptions about
655     // the function.
656     F->addFnAttr(Attribute::NoBuiltin);
657   }
658 }
659 
660 /// Module-level initialization.
661 ///
662 /// inserts a call to __hwasan_init to the module's constructor list.
663 void HWAddressSanitizer::initializeModule() {
664   LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
665   TargetTriple = Triple(M.getTargetTriple());
666 
667   for (Function &F : M.functions())
668     removeFnAttributes(&F);
669 
670   // x86_64 currently has two modes:
671   // - Intel LAM (default)
672   // - pointer aliasing (heap only)
673   bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
674   UsePageAliases = shouldUsePageAliases(TargetTriple);
675   InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple);
676   InstrumentStack = shouldInstrumentStack(TargetTriple);
677   DetectUseAfterScope = shouldDetectUseAfterScope(TargetTriple);
678   PointerTagShift = IsX86_64 ? 57 : 56;
679   TagMaskByte = IsX86_64 ? 0x3F : 0xFF;
680 
681   Mapping.init(TargetTriple, InstrumentWithCalls);
682 
683   C = &(M.getContext());
684   IRBuilder<> IRB(*C);
685 
686   HwasanCtorFunction = nullptr;
687 
688   // Older versions of Android do not have the required runtime support for
689   // short granules, global or personality function instrumentation. On other
690   // platforms we currently require using the latest version of the runtime.
691   bool NewRuntime =
692       !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
693 
694   UseShortGranules = optOr(ClUseShortGranules, NewRuntime);
695   OutlinedChecks = (TargetTriple.isAArch64() || TargetTriple.isRISCV64()) &&
696                    TargetTriple.isOSBinFormatELF() &&
697                    !optOr(ClInlineAllChecks, Recover);
698 
699   // These platforms may prefer less inlining to reduce binary size.
700   InlineFastPath = optOr(ClInlineFastPathChecks, !(TargetTriple.isAndroid() ||
701                                                    TargetTriple.isOSFuchsia()));
702 
703   if (ClMatchAllTag.getNumOccurrences()) {
704     if (ClMatchAllTag != -1) {
705       MatchAllTag = ClMatchAllTag & 0xFF;
706     }
707   } else if (CompileKernel) {
708     MatchAllTag = 0xFF;
709   }
710   UseMatchAllCallback = !CompileKernel && MatchAllTag.has_value();
711 
712   // If we don't have personality function support, fall back to landing pads.
713   InstrumentLandingPads = optOr(ClInstrumentLandingPads, !NewRuntime);
714 
715   InstrumentGlobals =
716       !CompileKernel && !UsePageAliases && optOr(ClGlobals, NewRuntime);
717 
718   if (!CompileKernel) {
719     createHwasanCtorComdat();
720 
721     if (InstrumentGlobals)
722       instrumentGlobals();
723 
724     bool InstrumentPersonalityFunctions =
725         optOr(ClInstrumentPersonalityFunctions, NewRuntime);
726     if (InstrumentPersonalityFunctions)
727       instrumentPersonalityFunctions();
728   }
729 
730   if (!TargetTriple.isAndroid()) {
731     Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
732       auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
733                                     GlobalValue::ExternalLinkage, nullptr,
734                                     "__hwasan_tls", nullptr,
735                                     GlobalVariable::InitialExecTLSModel);
736       appendToCompilerUsed(M, GV);
737       return GV;
738     });
739     ThreadPtrGlobal = cast<GlobalVariable>(C);
740   }
741 }
742 
743 void HWAddressSanitizer::initializeCallbacks(Module &M) {
744   IRBuilder<> IRB(*C);
745   const std::string MatchAllStr = UseMatchAllCallback ? "_match_all" : "";
746   FunctionType *HwasanMemoryAccessCallbackSizedFnTy,
747       *HwasanMemoryAccessCallbackFnTy, *HwasanMemTransferFnTy,
748       *HwasanMemsetFnTy;
749   if (UseMatchAllCallback) {
750     HwasanMemoryAccessCallbackSizedFnTy =
751         FunctionType::get(VoidTy, {IntptrTy, IntptrTy, Int8Ty}, false);
752     HwasanMemoryAccessCallbackFnTy =
753         FunctionType::get(VoidTy, {IntptrTy, Int8Ty}, false);
754     HwasanMemTransferFnTy =
755         FunctionType::get(PtrTy, {PtrTy, PtrTy, IntptrTy, Int8Ty}, false);
756     HwasanMemsetFnTy =
757         FunctionType::get(PtrTy, {PtrTy, Int32Ty, IntptrTy, Int8Ty}, false);
758   } else {
759     HwasanMemoryAccessCallbackSizedFnTy =
760         FunctionType::get(VoidTy, {IntptrTy, IntptrTy}, false);
761     HwasanMemoryAccessCallbackFnTy =
762         FunctionType::get(VoidTy, {IntptrTy}, false);
763     HwasanMemTransferFnTy =
764         FunctionType::get(PtrTy, {PtrTy, PtrTy, IntptrTy}, false);
765     HwasanMemsetFnTy =
766         FunctionType::get(PtrTy, {PtrTy, Int32Ty, IntptrTy}, false);
767   }
768 
769   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
770     const std::string TypeStr = AccessIsWrite ? "store" : "load";
771     const std::string EndingStr = Recover ? "_noabort" : "";
772 
773     HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
774         ClMemoryAccessCallbackPrefix + TypeStr + "N" + MatchAllStr + EndingStr,
775         HwasanMemoryAccessCallbackSizedFnTy);
776 
777     for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
778          AccessSizeIndex++) {
779       HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
780           M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr +
781                                     itostr(1ULL << AccessSizeIndex) +
782                                     MatchAllStr + EndingStr,
783                                 HwasanMemoryAccessCallbackFnTy);
784     }
785   }
786 
787   const std::string MemIntrinCallbackPrefix =
788       (CompileKernel && !ClKasanMemIntrinCallbackPrefix)
789           ? std::string("")
790           : ClMemoryAccessCallbackPrefix;
791 
792   HwasanMemmove = M.getOrInsertFunction(
793       MemIntrinCallbackPrefix + "memmove" + MatchAllStr, HwasanMemTransferFnTy);
794   HwasanMemcpy = M.getOrInsertFunction(
795       MemIntrinCallbackPrefix + "memcpy" + MatchAllStr, HwasanMemTransferFnTy);
796   HwasanMemset = M.getOrInsertFunction(
797       MemIntrinCallbackPrefix + "memset" + MatchAllStr, HwasanMemsetFnTy);
798 
799   HwasanTagMemoryFunc = M.getOrInsertFunction("__hwasan_tag_memory", VoidTy,
800                                               PtrTy, Int8Ty, IntptrTy);
801   HwasanGenerateTagFunc =
802       M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
803 
804   HwasanRecordFrameRecordFunc =
805       M.getOrInsertFunction("__hwasan_add_frame_record", VoidTy, Int64Ty);
806 
807   ShadowGlobal =
808       M.getOrInsertGlobal("__hwasan_shadow", ArrayType::get(Int8Ty, 0));
809 
810   HwasanHandleVfork =
811       M.getOrInsertFunction("__hwasan_handle_vfork", VoidTy, IntptrTy);
812 }
813 
814 Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
815   // An empty inline asm with input reg == output reg.
816   // An opaque no-op cast, basically.
817   // This prevents code bloat as a result of rematerializing trivial definitions
818   // such as constants or global addresses at every load and store.
819   InlineAsm *Asm =
820       InlineAsm::get(FunctionType::get(PtrTy, {Val->getType()}, false),
821                      StringRef(""), StringRef("=r,0"),
822                      /*hasSideEffects=*/false);
823   return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow");
824 }
825 
826 Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
827   return getOpaqueNoopCast(IRB, ShadowGlobal);
828 }
829 
830 Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
831   if (Mapping.isFixed()) {
832     return getOpaqueNoopCast(
833         IRB, ConstantExpr::getIntToPtr(
834                  ConstantInt::get(IntptrTy, Mapping.offset()), PtrTy));
835   }
836 
837   if (Mapping.isInIfunc())
838     return getDynamicShadowIfunc(IRB);
839 
840   Value *GlobalDynamicAddress =
841       IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
842           kHwasanShadowMemoryDynamicAddress, PtrTy);
843   return IRB.CreateLoad(PtrTy, GlobalDynamicAddress);
844 }
845 
846 bool HWAddressSanitizer::ignoreAccessWithoutRemark(Instruction *Inst,
847                                                    Value *Ptr) {
848   // Do not instrument accesses from different address spaces; we cannot deal
849   // with them.
850   Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
851   if (PtrTy->getPointerAddressSpace() != 0)
852     return true;
853 
854   // Ignore swifterror addresses.
855   // swifterror memory addresses are mem2reg promoted by instruction
856   // selection. As such they cannot have regular uses like an instrumentation
857   // function and it makes no sense to track them as memory.
858   if (Ptr->isSwiftError())
859     return true;
860 
861   if (findAllocaForValue(Ptr)) {
862     if (!InstrumentStack)
863       return true;
864     if (SSI && SSI->stackAccessIsSafe(*Inst))
865       return true;
866   }
867 
868   if (isa<GlobalVariable>(getUnderlyingObject(Ptr))) {
869     if (!InstrumentGlobals)
870       return true;
871     // TODO: Optimize inbound global accesses, like Asan `instrumentMop`.
872   }
873 
874   return false;
875 }
876 
877 bool HWAddressSanitizer::ignoreAccess(OptimizationRemarkEmitter &ORE,
878                                       Instruction *Inst, Value *Ptr) {
879   bool Ignored = ignoreAccessWithoutRemark(Inst, Ptr);
880   if (Ignored) {
881     ORE.emit(
882         [&]() { return OptimizationRemark(DEBUG_TYPE, "ignoreAccess", Inst); });
883   } else {
884     ORE.emit([&]() {
885       return OptimizationRemarkMissed(DEBUG_TYPE, "ignoreAccess", Inst);
886     });
887   }
888   return Ignored;
889 }
890 
891 void HWAddressSanitizer::getInterestingMemoryOperands(
892     OptimizationRemarkEmitter &ORE, Instruction *I,
893     const TargetLibraryInfo &TLI,
894     SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
895   // Skip memory accesses inserted by another instrumentation.
896   if (I->hasMetadata(LLVMContext::MD_nosanitize))
897     return;
898 
899   // Do not instrument the load fetching the dynamic shadow address.
900   if (ShadowBase == I)
901     return;
902 
903   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
904     if (!ClInstrumentReads || ignoreAccess(ORE, I, LI->getPointerOperand()))
905       return;
906     Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
907                              LI->getType(), LI->getAlign());
908   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
909     if (!ClInstrumentWrites || ignoreAccess(ORE, I, SI->getPointerOperand()))
910       return;
911     Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
912                              SI->getValueOperand()->getType(), SI->getAlign());
913   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
914     if (!ClInstrumentAtomics || ignoreAccess(ORE, I, RMW->getPointerOperand()))
915       return;
916     Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
917                              RMW->getValOperand()->getType(), std::nullopt);
918   } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
919     if (!ClInstrumentAtomics || ignoreAccess(ORE, I, XCHG->getPointerOperand()))
920       return;
921     Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
922                              XCHG->getCompareOperand()->getType(),
923                              std::nullopt);
924   } else if (auto *CI = dyn_cast<CallInst>(I)) {
925     for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
926       if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
927           ignoreAccess(ORE, I, CI->getArgOperand(ArgNo)))
928         continue;
929       Type *Ty = CI->getParamByValType(ArgNo);
930       Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
931     }
932     maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
933   }
934 }
935 
936 static unsigned getPointerOperandIndex(Instruction *I) {
937   if (LoadInst *LI = dyn_cast<LoadInst>(I))
938     return LI->getPointerOperandIndex();
939   if (StoreInst *SI = dyn_cast<StoreInst>(I))
940     return SI->getPointerOperandIndex();
941   if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
942     return RMW->getPointerOperandIndex();
943   if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
944     return XCHG->getPointerOperandIndex();
945   report_fatal_error("Unexpected instruction");
946   return -1;
947 }
948 
949 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
950   size_t Res = llvm::countr_zero(TypeSize / 8);
951   assert(Res < kNumberOfAccessSizes);
952   return Res;
953 }
954 
955 void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
956   if (TargetTriple.isAArch64() || TargetTriple.getArch() == Triple::x86_64 ||
957       TargetTriple.isRISCV64())
958     return;
959 
960   IRBuilder<> IRB(I);
961   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
962   Value *UntaggedPtr =
963       IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
964   I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
965 }
966 
967 Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
968   // Mem >> Scale
969   Value *Shadow = IRB.CreateLShr(Mem, Mapping.scale());
970   if (Mapping.isFixed() && Mapping.offset() == 0)
971     return IRB.CreateIntToPtr(Shadow, PtrTy);
972   // (Mem >> Scale) + Offset
973   return IRB.CreatePtrAdd(ShadowBase, Shadow);
974 }
975 
976 int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite,
977                                           unsigned AccessSizeIndex) {
978   return (CompileKernel << HWASanAccessInfo::CompileKernelShift) |
979          (MatchAllTag.has_value() << HWASanAccessInfo::HasMatchAllShift) |
980          (MatchAllTag.value_or(0) << HWASanAccessInfo::MatchAllShift) |
981          (Recover << HWASanAccessInfo::RecoverShift) |
982          (IsWrite << HWASanAccessInfo::IsWriteShift) |
983          (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
984 }
985 
986 HWAddressSanitizer::ShadowTagCheckInfo
987 HWAddressSanitizer::insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore,
988                                          DomTreeUpdater &DTU, LoopInfo *LI) {
989   ShadowTagCheckInfo R;
990 
991   IRBuilder<> IRB(InsertBefore);
992 
993   R.PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
994   R.PtrTag =
995       IRB.CreateTrunc(IRB.CreateLShr(R.PtrLong, PointerTagShift), Int8Ty);
996   R.AddrLong = untagPointer(IRB, R.PtrLong);
997   Value *Shadow = memToShadow(R.AddrLong, IRB);
998   R.MemTag = IRB.CreateLoad(Int8Ty, Shadow);
999   Value *TagMismatch = IRB.CreateICmpNE(R.PtrTag, R.MemTag);
1000 
1001   if (MatchAllTag.has_value()) {
1002     Value *TagNotIgnored = IRB.CreateICmpNE(
1003         R.PtrTag, ConstantInt::get(R.PtrTag->getType(), *MatchAllTag));
1004     TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
1005   }
1006 
1007   R.TagMismatchTerm = SplitBlockAndInsertIfThen(
1008       TagMismatch, InsertBefore, false,
1009       MDBuilder(*C).createUnlikelyBranchWeights(), &DTU, LI);
1010 
1011   return R;
1012 }
1013 
1014 void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
1015                                                     unsigned AccessSizeIndex,
1016                                                     Instruction *InsertBefore,
1017                                                     DomTreeUpdater &DTU,
1018                                                     LoopInfo *LI) {
1019   assert(!UsePageAliases);
1020   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
1021 
1022   if (InlineFastPath)
1023     InsertBefore =
1024         insertShadowTagCheck(Ptr, InsertBefore, DTU, LI).TagMismatchTerm;
1025 
1026   IRBuilder<> IRB(InsertBefore);
1027   bool UseFixedShadowIntrinsic = false;
1028   // The memaccess fixed shadow intrinsic is only supported on AArch64,
1029   // which allows a 16-bit immediate to be left-shifted by 32.
1030   // Since kShadowBaseAlignment == 32, and Linux by default will not
1031   // mmap above 48-bits, practically any valid shadow offset is
1032   // representable.
1033   // In particular, an offset of 4TB (1024 << 32) is representable, and
1034   // ought to be good enough for anybody.
1035   if (TargetTriple.isAArch64() && Mapping.isFixed()) {
1036     uint16_t OffsetShifted = Mapping.offset() >> 32;
1037     UseFixedShadowIntrinsic =
1038         static_cast<uint64_t>(OffsetShifted) << 32 == Mapping.offset();
1039   }
1040 
1041   if (UseFixedShadowIntrinsic) {
1042     IRB.CreateIntrinsic(
1043         UseShortGranules
1044             ? Intrinsic::hwasan_check_memaccess_shortgranules_fixedshadow
1045             : Intrinsic::hwasan_check_memaccess_fixedshadow,
1046         {},
1047         {Ptr, ConstantInt::get(Int32Ty, AccessInfo),
1048          ConstantInt::get(Int64Ty, Mapping.offset())});
1049   } else {
1050     IRB.CreateIntrinsic(
1051         UseShortGranules ? Intrinsic::hwasan_check_memaccess_shortgranules
1052                          : Intrinsic::hwasan_check_memaccess,
1053         {}, {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
1054   }
1055 }
1056 
1057 void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
1058                                                    unsigned AccessSizeIndex,
1059                                                    Instruction *InsertBefore,
1060                                                    DomTreeUpdater &DTU,
1061                                                    LoopInfo *LI) {
1062   assert(!UsePageAliases);
1063   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
1064 
1065   ShadowTagCheckInfo TCI = insertShadowTagCheck(Ptr, InsertBefore, DTU, LI);
1066 
1067   IRBuilder<> IRB(TCI.TagMismatchTerm);
1068   Value *OutOfShortGranuleTagRange =
1069       IRB.CreateICmpUGT(TCI.MemTag, ConstantInt::get(Int8Ty, 15));
1070   Instruction *CheckFailTerm = SplitBlockAndInsertIfThen(
1071       OutOfShortGranuleTagRange, TCI.TagMismatchTerm, !Recover,
1072       MDBuilder(*C).createUnlikelyBranchWeights(), &DTU, LI);
1073 
1074   IRB.SetInsertPoint(TCI.TagMismatchTerm);
1075   Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(TCI.PtrLong, 15), Int8Ty);
1076   PtrLowBits = IRB.CreateAdd(
1077       PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
1078   Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, TCI.MemTag);
1079   SplitBlockAndInsertIfThen(PtrLowBitsOOB, TCI.TagMismatchTerm, false,
1080                             MDBuilder(*C).createUnlikelyBranchWeights(), &DTU,
1081                             LI, CheckFailTerm->getParent());
1082 
1083   IRB.SetInsertPoint(TCI.TagMismatchTerm);
1084   Value *InlineTagAddr = IRB.CreateOr(TCI.AddrLong, 15);
1085   InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, PtrTy);
1086   Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
1087   Value *InlineTagMismatch = IRB.CreateICmpNE(TCI.PtrTag, InlineTag);
1088   SplitBlockAndInsertIfThen(InlineTagMismatch, TCI.TagMismatchTerm, false,
1089                             MDBuilder(*C).createUnlikelyBranchWeights(), &DTU,
1090                             LI, CheckFailTerm->getParent());
1091 
1092   IRB.SetInsertPoint(CheckFailTerm);
1093   InlineAsm *Asm;
1094   switch (TargetTriple.getArch()) {
1095   case Triple::x86_64:
1096     // The signal handler will find the data address in rdi.
1097     Asm = InlineAsm::get(
1098         FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
1099         "int3\nnopl " +
1100             itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
1101             "(%rax)",
1102         "{rdi}",
1103         /*hasSideEffects=*/true);
1104     break;
1105   case Triple::aarch64:
1106   case Triple::aarch64_be:
1107     // The signal handler will find the data address in x0.
1108     Asm = InlineAsm::get(
1109         FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
1110         "brk #" + itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
1111         "{x0}",
1112         /*hasSideEffects=*/true);
1113     break;
1114   case Triple::riscv64:
1115     // The signal handler will find the data address in x10.
1116     Asm = InlineAsm::get(
1117         FunctionType::get(VoidTy, {TCI.PtrLong->getType()}, false),
1118         "ebreak\naddiw x0, x11, " +
1119             itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
1120         "{x10}",
1121         /*hasSideEffects=*/true);
1122     break;
1123   default:
1124     report_fatal_error("unsupported architecture");
1125   }
1126   IRB.CreateCall(Asm, TCI.PtrLong);
1127   if (Recover)
1128     cast<BranchInst>(CheckFailTerm)
1129         ->setSuccessor(0, TCI.TagMismatchTerm->getParent());
1130 }
1131 
1132 bool HWAddressSanitizer::ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE,
1133                                             MemIntrinsic *MI) {
1134   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1135     return (!ClInstrumentWrites || ignoreAccess(ORE, MTI, MTI->getDest())) &&
1136            (!ClInstrumentReads || ignoreAccess(ORE, MTI, MTI->getSource()));
1137   }
1138   if (isa<MemSetInst>(MI))
1139     return !ClInstrumentWrites || ignoreAccess(ORE, MI, MI->getDest());
1140   return false;
1141 }
1142 
1143 void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
1144   IRBuilder<> IRB(MI);
1145   if (isa<MemTransferInst>(MI)) {
1146     SmallVector<Value *, 4> Args{
1147         MI->getOperand(0), MI->getOperand(1),
1148         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)};
1149 
1150     if (UseMatchAllCallback)
1151       Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1152     IRB.CreateCall(isa<MemMoveInst>(MI) ? HwasanMemmove : HwasanMemcpy, Args);
1153   } else if (isa<MemSetInst>(MI)) {
1154     SmallVector<Value *, 4> Args{
1155         MI->getOperand(0),
1156         IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
1157         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)};
1158     if (UseMatchAllCallback)
1159       Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1160     IRB.CreateCall(HwasanMemset, Args);
1161   }
1162   MI->eraseFromParent();
1163 }
1164 
1165 bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O,
1166                                              DomTreeUpdater &DTU, LoopInfo *LI,
1167                                              const DataLayout &DL) {
1168   Value *Addr = O.getPtr();
1169 
1170   LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
1171 
1172   // If the pointer is statically known to be zero, the tag check will pass
1173   // since:
1174   // 1) it has a zero tag
1175   // 2) the shadow memory corresponding to address 0 is initialized to zero and
1176   //    never updated.
1177   // We can therefore elide the tag check.
1178   llvm::KnownBits Known(DL.getPointerTypeSizeInBits(Addr->getType()));
1179   llvm::computeKnownBits(Addr, Known, DL);
1180   if (Known.isZero())
1181     return false;
1182 
1183   if (O.MaybeMask)
1184     return false; // FIXME
1185 
1186   IRBuilder<> IRB(O.getInsn());
1187   if (!O.TypeStoreSize.isScalable() && isPowerOf2_64(O.TypeStoreSize) &&
1188       (O.TypeStoreSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
1189       (!O.Alignment || *O.Alignment >= Mapping.getObjectAlignment() ||
1190        *O.Alignment >= O.TypeStoreSize / 8)) {
1191     size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeStoreSize);
1192     if (InstrumentWithCalls) {
1193       SmallVector<Value *, 2> Args{IRB.CreatePointerCast(Addr, IntptrTy)};
1194       if (UseMatchAllCallback)
1195         Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1196       IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
1197                      Args);
1198     } else if (OutlinedChecks) {
1199       instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn(),
1200                                  DTU, LI);
1201     } else {
1202       instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn(),
1203                                 DTU, LI);
1204     }
1205   } else {
1206     SmallVector<Value *, 3> Args{
1207         IRB.CreatePointerCast(Addr, IntptrTy),
1208         IRB.CreateUDiv(IRB.CreateTypeSize(IntptrTy, O.TypeStoreSize),
1209                        ConstantInt::get(IntptrTy, 8))};
1210     if (UseMatchAllCallback)
1211       Args.emplace_back(ConstantInt::get(Int8Ty, *MatchAllTag));
1212     IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite], Args);
1213   }
1214   untagPointerOperand(O.getInsn(), Addr);
1215 
1216   return true;
1217 }
1218 
1219 void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
1220                                    size_t Size) {
1221   size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1222   if (!UseShortGranules)
1223     Size = AlignedSize;
1224 
1225   Tag = IRB.CreateTrunc(Tag, Int8Ty);
1226   if (InstrumentWithCalls) {
1227     IRB.CreateCall(HwasanTagMemoryFunc,
1228                    {IRB.CreatePointerCast(AI, PtrTy), Tag,
1229                     ConstantInt::get(IntptrTy, AlignedSize)});
1230   } else {
1231     size_t ShadowSize = Size >> Mapping.scale();
1232     Value *AddrLong = untagPointer(IRB, IRB.CreatePointerCast(AI, IntptrTy));
1233     Value *ShadowPtr = memToShadow(AddrLong, IRB);
1234     // If this memset is not inlined, it will be intercepted in the hwasan
1235     // runtime library. That's OK, because the interceptor skips the checks if
1236     // the address is in the shadow region.
1237     // FIXME: the interceptor is not as fast as real memset. Consider lowering
1238     // llvm.memset right here into either a sequence of stores, or a call to
1239     // hwasan_tag_memory.
1240     if (ShadowSize)
1241       IRB.CreateMemSet(ShadowPtr, Tag, ShadowSize, Align(1));
1242     if (Size != AlignedSize) {
1243       const uint8_t SizeRemainder = Size % Mapping.getObjectAlignment().value();
1244       IRB.CreateStore(ConstantInt::get(Int8Ty, SizeRemainder),
1245                       IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
1246       IRB.CreateStore(
1247           Tag, IRB.CreateConstGEP1_32(Int8Ty, IRB.CreatePointerCast(AI, PtrTy),
1248                                       AlignedSize - 1));
1249     }
1250   }
1251 }
1252 
1253 unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
1254   if (TargetTriple.getArch() == Triple::x86_64)
1255     return AllocaNo & TagMaskByte;
1256 
1257   // A list of 8-bit numbers that have at most one run of non-zero bits.
1258   // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
1259   // masks.
1260   // The list does not include the value 255, which is used for UAR.
1261   //
1262   // Because we are more likely to use earlier elements of this list than later
1263   // ones, it is sorted in increasing order of probability of collision with a
1264   // mask allocated (temporally) nearby. The program that generated this list
1265   // can be found at:
1266   // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
1267   static const unsigned FastMasks[] = {
1268       0,   128, 64, 192, 32,  96,  224, 112, 240, 48, 16,  120,
1269       248, 56,  24, 8,   124, 252, 60,  28,  12,  4,  126, 254,
1270       62,  30,  14, 6,   2,   127, 63,  31,  15,  7,  3,   1};
1271   return FastMasks[AllocaNo % std::size(FastMasks)];
1272 }
1273 
1274 Value *HWAddressSanitizer::applyTagMask(IRBuilder<> &IRB, Value *OldTag) {
1275   if (TagMaskByte == 0xFF)
1276     return OldTag; // No need to clear the tag byte.
1277   return IRB.CreateAnd(OldTag,
1278                        ConstantInt::get(OldTag->getType(), TagMaskByte));
1279 }
1280 
1281 Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
1282   return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
1283 }
1284 
1285 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
1286   if (ClGenerateTagsWithCalls)
1287     return nullptr;
1288   if (StackBaseTag)
1289     return StackBaseTag;
1290   // Extract some entropy from the stack pointer for the tags.
1291   // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
1292   // between functions).
1293   Value *FramePointerLong = getCachedFP(IRB);
1294   Value *StackTag =
1295       applyTagMask(IRB, IRB.CreateXor(FramePointerLong,
1296                                       IRB.CreateLShr(FramePointerLong, 20)));
1297   StackTag->setName("hwasan.stack.base.tag");
1298   return StackTag;
1299 }
1300 
1301 Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
1302                                         unsigned AllocaNo) {
1303   if (ClGenerateTagsWithCalls)
1304     return getNextTagWithCall(IRB);
1305   return IRB.CreateXor(
1306       StackTag, ConstantInt::get(StackTag->getType(), retagMask(AllocaNo)));
1307 }
1308 
1309 Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB) {
1310   Value *FramePointerLong = getCachedFP(IRB);
1311   Value *UARTag =
1312       applyTagMask(IRB, IRB.CreateLShr(FramePointerLong, PointerTagShift));
1313 
1314   UARTag->setName("hwasan.uar.tag");
1315   return UARTag;
1316 }
1317 
1318 // Add a tag to an address.
1319 Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
1320                                       Value *PtrLong, Value *Tag) {
1321   assert(!UsePageAliases);
1322   Value *TaggedPtrLong;
1323   if (CompileKernel) {
1324     // Kernel addresses have 0xFF in the most significant byte.
1325     Value *ShiftedTag =
1326         IRB.CreateOr(IRB.CreateShl(Tag, PointerTagShift),
1327                      ConstantInt::get(IntptrTy, (1ULL << PointerTagShift) - 1));
1328     TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
1329   } else {
1330     // Userspace can simply do OR (tag << PointerTagShift);
1331     Value *ShiftedTag = IRB.CreateShl(Tag, PointerTagShift);
1332     TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
1333   }
1334   return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
1335 }
1336 
1337 // Remove tag from an address.
1338 Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
1339   assert(!UsePageAliases);
1340   Value *UntaggedPtrLong;
1341   if (CompileKernel) {
1342     // Kernel addresses have 0xFF in the most significant byte.
1343     UntaggedPtrLong =
1344         IRB.CreateOr(PtrLong, ConstantInt::get(PtrLong->getType(),
1345                                                TagMaskByte << PointerTagShift));
1346   } else {
1347     // Userspace addresses have 0x00.
1348     UntaggedPtrLong = IRB.CreateAnd(
1349         PtrLong, ConstantInt::get(PtrLong->getType(),
1350                                   ~(TagMaskByte << PointerTagShift)));
1351   }
1352   return UntaggedPtrLong;
1353 }
1354 
1355 Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB) {
1356   // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
1357   // in Bionic's libc/platform/bionic/tls_defines.h.
1358   constexpr int SanitizerSlot = 6;
1359   if (TargetTriple.isAArch64() && TargetTriple.isAndroid())
1360     return memtag::getAndroidSlotPtr(IRB, SanitizerSlot);
1361   return ThreadPtrGlobal;
1362 }
1363 
1364 Value *HWAddressSanitizer::getCachedFP(IRBuilder<> &IRB) {
1365   if (!CachedFP)
1366     CachedFP = memtag::getFP(IRB);
1367   return CachedFP;
1368 }
1369 
1370 Value *HWAddressSanitizer::getFrameRecordInfo(IRBuilder<> &IRB) {
1371   // Prepare ring buffer data.
1372   Value *PC = memtag::getPC(TargetTriple, IRB);
1373   Value *FP = getCachedFP(IRB);
1374 
1375   // Mix FP and PC.
1376   // Assumptions:
1377   // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero)
1378   // FP is 0xfffffffffffFFFF0  (4 lower bits are zero)
1379   // We only really need ~20 lower non-zero bits (FFFF), so we mix like this:
1380   //       0xFFFFPPPPPPPPPPPP
1381   //
1382   // FP works because in AArch64FrameLowering::getFrameIndexReference, we
1383   // prefer FP-relative offsets for functions compiled with HWASan.
1384   FP = IRB.CreateShl(FP, 44);
1385   return IRB.CreateOr(PC, FP);
1386 }
1387 
1388 void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
1389   if (!Mapping.isInTls())
1390     ShadowBase = getShadowNonTls(IRB);
1391   else if (!WithFrameRecord && TargetTriple.isAndroid())
1392     ShadowBase = getDynamicShadowIfunc(IRB);
1393 
1394   if (!WithFrameRecord && ShadowBase)
1395     return;
1396 
1397   Value *SlotPtr = nullptr;
1398   Value *ThreadLong = nullptr;
1399   Value *ThreadLongMaybeUntagged = nullptr;
1400 
1401   auto getThreadLongMaybeUntagged = [&]() {
1402     if (!SlotPtr)
1403       SlotPtr = getHwasanThreadSlotPtr(IRB);
1404     if (!ThreadLong)
1405       ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
1406     // Extract the address field from ThreadLong. Unnecessary on AArch64 with
1407     // TBI.
1408     return TargetTriple.isAArch64() ? ThreadLong
1409                                     : untagPointer(IRB, ThreadLong);
1410   };
1411 
1412   if (WithFrameRecord) {
1413     switch (ClRecordStackHistory) {
1414     case libcall: {
1415       // Emit a runtime call into hwasan rather than emitting instructions for
1416       // recording stack history.
1417       Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1418       IRB.CreateCall(HwasanRecordFrameRecordFunc, {FrameRecordInfo});
1419       break;
1420     }
1421     case instr: {
1422       ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1423 
1424       StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
1425 
1426       // Store data to ring buffer.
1427       Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1428       Value *RecordPtr =
1429           IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IRB.getPtrTy(0));
1430       IRB.CreateStore(FrameRecordInfo, RecordPtr);
1431 
1432       IRB.CreateStore(memtag::incrementThreadLong(IRB, ThreadLong, 8), SlotPtr);
1433       break;
1434     }
1435     case none: {
1436       llvm_unreachable(
1437           "A stack history recording mode should've been selected.");
1438     }
1439     }
1440   }
1441 
1442   if (!ShadowBase) {
1443     if (!ThreadLongMaybeUntagged)
1444       ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1445 
1446     // Get shadow base address by aligning RecordPtr up.
1447     // Note: this is not correct if the pointer is already aligned.
1448     // Runtime library will make sure this never happens.
1449     ShadowBase = IRB.CreateAdd(
1450         IRB.CreateOr(
1451             ThreadLongMaybeUntagged,
1452             ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
1453         ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
1454     ShadowBase = IRB.CreateIntToPtr(ShadowBase, PtrTy);
1455   }
1456 }
1457 
1458 bool HWAddressSanitizer::instrumentLandingPads(
1459     SmallVectorImpl<Instruction *> &LandingPadVec) {
1460   for (auto *LP : LandingPadVec) {
1461     IRBuilder<> IRB(LP->getNextNonDebugInstruction());
1462     IRB.CreateCall(
1463         HwasanHandleVfork,
1464         {memtag::readRegister(
1465             IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp" : "sp")});
1466   }
1467   return true;
1468 }
1469 
1470 bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
1471                                          Value *StackTag, Value *UARTag,
1472                                          const DominatorTree &DT,
1473                                          const PostDominatorTree &PDT,
1474                                          const LoopInfo &LI) {
1475   // Ideally, we want to calculate tagged stack base pointer, and rewrite all
1476   // alloca addresses using that. Unfortunately, offsets are not known yet
1477   // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
1478   // temp, shift-OR it into each alloca address and xor with the retag mask.
1479   // This generates one extra instruction per alloca use.
1480   unsigned int I = 0;
1481 
1482   for (auto &KV : SInfo.AllocasToInstrument) {
1483     auto N = I++;
1484     auto *AI = KV.first;
1485     memtag::AllocaInfo &Info = KV.second;
1486     IRBuilder<> IRB(AI->getNextNonDebugInstruction());
1487 
1488     // Replace uses of the alloca with tagged address.
1489     Value *Tag = getAllocaTag(IRB, StackTag, N);
1490     Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
1491     Value *AINoTagLong = untagPointer(IRB, AILong);
1492     Value *Replacement = tagPointer(IRB, AI->getType(), AINoTagLong, Tag);
1493     std::string Name =
1494         AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
1495     Replacement->setName(Name + ".hwasan");
1496 
1497     size_t Size = memtag::getAllocaSizeInBytes(*AI);
1498     size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1499 
1500     Value *AICast = IRB.CreatePointerCast(AI, PtrTy);
1501 
1502     auto HandleLifetime = [&](IntrinsicInst *II) {
1503       // Set the lifetime intrinsic to cover the whole alloca. This reduces the
1504       // set of assumptions we need to make about the lifetime. Without this we
1505       // would need to ensure that we can track the lifetime pointer to a
1506       // constant offset from the alloca, and would still need to change the
1507       // size to include the extra alignment we use for the untagging to make
1508       // the size consistent.
1509       //
1510       // The check for standard lifetime below makes sure that we have exactly
1511       // one set of start / end in any execution (i.e. the ends are not
1512       // reachable from each other), so this will not cause any problems.
1513       II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize));
1514       II->setArgOperand(1, AICast);
1515     };
1516     llvm::for_each(Info.LifetimeStart, HandleLifetime);
1517     llvm::for_each(Info.LifetimeEnd, HandleLifetime);
1518 
1519     AI->replaceUsesWithIf(Replacement, [AICast, AILong](const Use &U) {
1520       auto *User = U.getUser();
1521       return User != AILong && User != AICast &&
1522              !memtag::isLifetimeIntrinsic(User);
1523     });
1524 
1525     memtag::annotateDebugRecords(Info, retagMask(N));
1526 
1527     auto TagEnd = [&](Instruction *Node) {
1528       IRB.SetInsertPoint(Node);
1529       // When untagging, use the `AlignedSize` because we need to set the tags
1530       // for the entire alloca to original. If we used `Size` here, we would
1531       // keep the last granule tagged, and store zero in the last byte of the
1532       // last granule, due to how short granules are implemented.
1533       tagAlloca(IRB, AI, UARTag, AlignedSize);
1534     };
1535     // Calls to functions that may return twice (e.g. setjmp) confuse the
1536     // postdominator analysis, and will leave us to keep memory tagged after
1537     // function return. Work around this by always untagging at every return
1538     // statement if return_twice functions are called.
1539     bool StandardLifetime =
1540         !SInfo.CallsReturnTwice &&
1541         SInfo.UnrecognizedLifetimes.empty() &&
1542         memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, &DT,
1543                                    &LI, ClMaxLifetimes);
1544     if (DetectUseAfterScope && StandardLifetime) {
1545       IntrinsicInst *Start = Info.LifetimeStart[0];
1546       IRB.SetInsertPoint(Start->getNextNode());
1547       tagAlloca(IRB, AI, Tag, Size);
1548       if (!memtag::forAllReachableExits(DT, PDT, LI, Start, Info.LifetimeEnd,
1549                                         SInfo.RetVec, TagEnd)) {
1550         for (auto *End : Info.LifetimeEnd)
1551           End->eraseFromParent();
1552       }
1553     } else {
1554       tagAlloca(IRB, AI, Tag, Size);
1555       for (auto *RI : SInfo.RetVec)
1556         TagEnd(RI);
1557       // We inserted tagging outside of the lifetimes, so we have to remove
1558       // them.
1559       for (auto &II : Info.LifetimeStart)
1560         II->eraseFromParent();
1561       for (auto &II : Info.LifetimeEnd)
1562         II->eraseFromParent();
1563     }
1564     memtag::alignAndPadAlloca(Info, Mapping.getObjectAlignment());
1565   }
1566   for (auto &I : SInfo.UnrecognizedLifetimes)
1567     I->eraseFromParent();
1568   return true;
1569 }
1570 
1571 static void emitRemark(const Function &F, OptimizationRemarkEmitter &ORE,
1572                        bool Skip) {
1573   if (Skip) {
1574     ORE.emit([&]() {
1575       return OptimizationRemark(DEBUG_TYPE, "Skip", &F)
1576              << "Skipped: F=" << ore::NV("Function", &F);
1577     });
1578   } else {
1579     ORE.emit([&]() {
1580       return OptimizationRemarkMissed(DEBUG_TYPE, "Sanitize", &F)
1581              << "Sanitized: F=" << ore::NV("Function", &F);
1582     });
1583   }
1584 }
1585 
1586 bool HWAddressSanitizer::selectiveInstrumentationShouldSkip(
1587     Function &F, FunctionAnalysisManager &FAM) const {
1588   auto SkipHot = [&]() {
1589     if (!ClHotPercentileCutoff.getNumOccurrences())
1590       return false;
1591     auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1592     ProfileSummaryInfo *PSI =
1593         MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
1594     if (!PSI || !PSI->hasProfileSummary()) {
1595       ++NumNoProfileSummaryFuncs;
1596       return false;
1597     }
1598     return PSI->isFunctionHotInCallGraphNthPercentile(
1599         ClHotPercentileCutoff, &F, FAM.getResult<BlockFrequencyAnalysis>(F));
1600   };
1601 
1602   auto SkipRandom = [&]() {
1603     if (!ClRandomSkipRate.getNumOccurrences())
1604       return false;
1605     std::bernoulli_distribution D(ClRandomSkipRate);
1606     return !D(*Rng);
1607   };
1608 
1609   bool Skip = SkipRandom() || SkipHot();
1610   emitRemark(F, FAM.getResult<OptimizationRemarkEmitterAnalysis>(F), Skip);
1611   return Skip;
1612 }
1613 
1614 void HWAddressSanitizer::sanitizeFunction(Function &F,
1615                                           FunctionAnalysisManager &FAM) {
1616   if (&F == HwasanCtorFunction)
1617     return;
1618 
1619   // Do not apply any instrumentation for naked functions.
1620   if (F.hasFnAttribute(Attribute::Naked))
1621     return;
1622 
1623   if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
1624     return;
1625 
1626   if (F.empty())
1627     return;
1628 
1629   NumTotalFuncs++;
1630 
1631   OptimizationRemarkEmitter &ORE =
1632       FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1633 
1634   if (selectiveInstrumentationShouldSkip(F, FAM))
1635     return;
1636 
1637   NumInstrumentedFuncs++;
1638 
1639   LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
1640 
1641   SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
1642   SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
1643   SmallVector<Instruction *, 8> LandingPadVec;
1644   const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
1645 
1646   memtag::StackInfoBuilder SIB(SSI, DEBUG_TYPE);
1647   for (auto &Inst : instructions(F)) {
1648     if (InstrumentStack) {
1649       SIB.visit(ORE, Inst);
1650     }
1651 
1652     if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
1653       LandingPadVec.push_back(&Inst);
1654 
1655     getInterestingMemoryOperands(ORE, &Inst, TLI, OperandsToInstrument);
1656 
1657     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
1658       if (!ignoreMemIntrinsic(ORE, MI))
1659         IntrinToInstrument.push_back(MI);
1660   }
1661 
1662   memtag::StackInfo &SInfo = SIB.get();
1663 
1664   initializeCallbacks(*F.getParent());
1665 
1666   if (!LandingPadVec.empty())
1667     instrumentLandingPads(LandingPadVec);
1668 
1669   if (SInfo.AllocasToInstrument.empty() && F.hasPersonalityFn() &&
1670       F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
1671     // __hwasan_personality_thunk is a no-op for functions without an
1672     // instrumented stack, so we can drop it.
1673     F.setPersonalityFn(nullptr);
1674   }
1675 
1676   if (SInfo.AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
1677       IntrinToInstrument.empty())
1678     return;
1679 
1680   assert(!ShadowBase);
1681 
1682   BasicBlock::iterator InsertPt = F.getEntryBlock().begin();
1683   IRBuilder<> EntryIRB(&F.getEntryBlock(), InsertPt);
1684   emitPrologue(EntryIRB,
1685                /*WithFrameRecord*/ ClRecordStackHistory != none &&
1686                    Mapping.withFrameRecord() &&
1687                    !SInfo.AllocasToInstrument.empty());
1688 
1689   if (!SInfo.AllocasToInstrument.empty()) {
1690     const DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
1691     const PostDominatorTree &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
1692     const LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
1693     Value *StackTag = getStackBaseTag(EntryIRB);
1694     Value *UARTag = getUARTag(EntryIRB);
1695     instrumentStack(SInfo, StackTag, UARTag, DT, PDT, LI);
1696   }
1697 
1698   // If we split the entry block, move any allocas that were originally in the
1699   // entry block back into the entry block so that they aren't treated as
1700   // dynamic allocas.
1701   if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
1702     InsertPt = F.getEntryBlock().begin();
1703     for (Instruction &I :
1704          llvm::make_early_inc_range(*EntryIRB.GetInsertBlock())) {
1705       if (auto *AI = dyn_cast<AllocaInst>(&I))
1706         if (isa<ConstantInt>(AI->getArraySize()))
1707           I.moveBefore(F.getEntryBlock(), InsertPt);
1708     }
1709   }
1710 
1711   DominatorTree *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
1712   PostDominatorTree *PDT = FAM.getCachedResult<PostDominatorTreeAnalysis>(F);
1713   LoopInfo *LI = FAM.getCachedResult<LoopAnalysis>(F);
1714   DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy);
1715   const DataLayout &DL = F.getDataLayout();
1716   for (auto &Operand : OperandsToInstrument)
1717     instrumentMemAccess(Operand, DTU, LI, DL);
1718   DTU.flush();
1719 
1720   if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
1721     for (auto *Inst : IntrinToInstrument)
1722       instrumentMemIntrinsic(Inst);
1723   }
1724 
1725   ShadowBase = nullptr;
1726   StackBaseTag = nullptr;
1727   CachedFP = nullptr;
1728 }
1729 
1730 void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
1731   assert(!UsePageAliases);
1732   Constant *Initializer = GV->getInitializer();
1733   uint64_t SizeInBytes =
1734       M.getDataLayout().getTypeAllocSize(Initializer->getType());
1735   uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
1736   if (SizeInBytes != NewSize) {
1737     // Pad the initializer out to the next multiple of 16 bytes and add the
1738     // required short granule tag.
1739     std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
1740     Init.back() = Tag;
1741     Constant *Padding = ConstantDataArray::get(*C, Init);
1742     Initializer = ConstantStruct::getAnon({Initializer, Padding});
1743   }
1744 
1745   auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
1746                                    GlobalValue::ExternalLinkage, Initializer,
1747                                    GV->getName() + ".hwasan");
1748   NewGV->copyAttributesFrom(GV);
1749   NewGV->setLinkage(GlobalValue::PrivateLinkage);
1750   NewGV->copyMetadata(GV, 0);
1751   NewGV->setAlignment(
1752       std::max(GV->getAlign().valueOrOne(), Mapping.getObjectAlignment()));
1753 
1754   // It is invalid to ICF two globals that have different tags. In the case
1755   // where the size of the global is a multiple of the tag granularity the
1756   // contents of the globals may be the same but the tags (i.e. symbol values)
1757   // may be different, and the symbols are not considered during ICF. In the
1758   // case where the size is not a multiple of the granularity, the short granule
1759   // tags would discriminate two globals with different tags, but there would
1760   // otherwise be nothing stopping such a global from being incorrectly ICF'd
1761   // with an uninstrumented (i.e. tag 0) global that happened to have the short
1762   // granule tag in the last byte.
1763   NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
1764 
1765   // Descriptor format (assuming little-endian):
1766   // bytes 0-3: relative address of global
1767   // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
1768   // it isn't, we create multiple descriptors)
1769   // byte 7: tag
1770   auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
1771   const uint64_t MaxDescriptorSize = 0xfffff0;
1772   for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
1773        DescriptorPos += MaxDescriptorSize) {
1774     auto *Descriptor =
1775         new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
1776                            nullptr, GV->getName() + ".hwasan.descriptor");
1777     auto *GVRelPtr = ConstantExpr::getTrunc(
1778         ConstantExpr::getAdd(
1779             ConstantExpr::getSub(
1780                 ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1781                 ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
1782             ConstantInt::get(Int64Ty, DescriptorPos)),
1783         Int32Ty);
1784     uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
1785     auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
1786     Descriptor->setComdat(NewGV->getComdat());
1787     Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
1788     Descriptor->setSection("hwasan_globals");
1789     Descriptor->setMetadata(LLVMContext::MD_associated,
1790                             MDNode::get(*C, ValueAsMetadata::get(NewGV)));
1791     appendToCompilerUsed(M, Descriptor);
1792   }
1793 
1794   Constant *Aliasee = ConstantExpr::getIntToPtr(
1795       ConstantExpr::getAdd(
1796           ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1797           ConstantInt::get(Int64Ty, uint64_t(Tag) << PointerTagShift)),
1798       GV->getType());
1799   auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
1800                                     GV->getLinkage(), "", Aliasee, &M);
1801   Alias->setVisibility(GV->getVisibility());
1802   Alias->takeName(GV);
1803   GV->replaceAllUsesWith(Alias);
1804   GV->eraseFromParent();
1805 }
1806 
1807 void HWAddressSanitizer::instrumentGlobals() {
1808   std::vector<GlobalVariable *> Globals;
1809   for (GlobalVariable &GV : M.globals()) {
1810     if (GV.hasSanitizerMetadata() && GV.getSanitizerMetadata().NoHWAddress)
1811       continue;
1812 
1813     if (GV.isDeclarationForLinker() || GV.getName().starts_with("llvm.") ||
1814         GV.isThreadLocal())
1815       continue;
1816 
1817     // Common symbols can't have aliases point to them, so they can't be tagged.
1818     if (GV.hasCommonLinkage())
1819       continue;
1820 
1821     // Globals with custom sections may be used in __start_/__stop_ enumeration,
1822     // which would be broken both by adding tags and potentially by the extra
1823     // padding/alignment that we insert.
1824     if (GV.hasSection())
1825       continue;
1826 
1827     Globals.push_back(&GV);
1828   }
1829 
1830   MD5 Hasher;
1831   Hasher.update(M.getSourceFileName());
1832   MD5::MD5Result Hash;
1833   Hasher.final(Hash);
1834   uint8_t Tag = Hash[0];
1835 
1836   assert(TagMaskByte >= 16);
1837 
1838   for (GlobalVariable *GV : Globals) {
1839     // Don't allow globals to be tagged with something that looks like a
1840     // short-granule tag, otherwise we lose inter-granule overflow detection, as
1841     // the fast path shadow-vs-address check succeeds.
1842     if (Tag < 16 || Tag > TagMaskByte)
1843       Tag = 16;
1844     instrumentGlobal(GV, Tag++);
1845   }
1846 }
1847 
1848 void HWAddressSanitizer::instrumentPersonalityFunctions() {
1849   // We need to untag stack frames as we unwind past them. That is the job of
1850   // the personality function wrapper, which either wraps an existing
1851   // personality function or acts as a personality function on its own. Each
1852   // function that has a personality function or that can be unwound past has
1853   // its personality function changed to a thunk that calls the personality
1854   // function wrapper in the runtime.
1855   MapVector<Constant *, std::vector<Function *>> PersonalityFns;
1856   for (Function &F : M) {
1857     if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
1858       continue;
1859 
1860     if (F.hasPersonalityFn()) {
1861       PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F);
1862     } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
1863       PersonalityFns[nullptr].push_back(&F);
1864     }
1865   }
1866 
1867   if (PersonalityFns.empty())
1868     return;
1869 
1870   FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
1871       "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty, PtrTy,
1872       PtrTy, PtrTy, PtrTy, PtrTy);
1873   FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
1874   FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
1875 
1876   for (auto &P : PersonalityFns) {
1877     std::string ThunkName = kHwasanPersonalityThunkName;
1878     if (P.first)
1879       ThunkName += ("." + P.first->getName()).str();
1880     FunctionType *ThunkFnTy = FunctionType::get(
1881         Int32Ty, {Int32Ty, Int32Ty, Int64Ty, PtrTy, PtrTy}, false);
1882     bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
1883                                cast<GlobalValue>(P.first)->hasLocalLinkage());
1884     auto *ThunkFn = Function::Create(ThunkFnTy,
1885                                      IsLocal ? GlobalValue::InternalLinkage
1886                                              : GlobalValue::LinkOnceODRLinkage,
1887                                      ThunkName, &M);
1888     if (!IsLocal) {
1889       ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
1890       ThunkFn->setComdat(M.getOrInsertComdat(ThunkName));
1891     }
1892 
1893     auto *BB = BasicBlock::Create(*C, "entry", ThunkFn);
1894     IRBuilder<> IRB(BB);
1895     CallInst *WrapperCall = IRB.CreateCall(
1896         HwasanPersonalityWrapper,
1897         {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
1898          ThunkFn->getArg(3), ThunkFn->getArg(4),
1899          P.first ? P.first : Constant::getNullValue(PtrTy),
1900          UnwindGetGR.getCallee(), UnwindGetCFA.getCallee()});
1901     WrapperCall->setTailCall();
1902     IRB.CreateRet(WrapperCall);
1903 
1904     for (Function *F : P.second)
1905       F->setPersonalityFn(ThunkFn);
1906   }
1907 }
1908 
1909 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple,
1910                                              bool InstrumentWithCalls) {
1911   // Start with defaults.
1912   Scale = kDefaultShadowScale;
1913   Kind = OffsetKind::kTls;
1914   WithFrameRecord = true;
1915 
1916   // Tune for the target.
1917   if (TargetTriple.isOSFuchsia()) {
1918     // Fuchsia is always PIE, which means that the beginning of the address
1919     // space is always available.
1920     SetFixed(0);
1921   } else if (ClEnableKhwasan || InstrumentWithCalls) {
1922     SetFixed(0);
1923     WithFrameRecord = false;
1924   }
1925 
1926   WithFrameRecord = optOr(ClFrameRecords, WithFrameRecord);
1927 
1928   // Apply the last of ClMappingOffset and ClMappingOffsetDynamic.
1929   Kind = optOr(ClMappingOffsetDynamic, Kind);
1930   if (ClMappingOffset.getNumOccurrences() > 0 &&
1931       !(ClMappingOffsetDynamic.getNumOccurrences() > 0 &&
1932         ClMappingOffsetDynamic.getPosition() > ClMappingOffset.getPosition())) {
1933     SetFixed(ClMappingOffset);
1934   }
1935 }
1936