xref: /llvm-project/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp (revision 416f1c465db62d829283f6902ef35e027e127aa7)
1 //===----- TypeSanitizer.cpp - type-based-aliasing-violation detector -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of TypeSanitizer, a type-based-aliasing-violation
10 // detector.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Instrumentation/TypeSanitizer.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/SmallSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/Analysis/MemoryLocation.h"
21 #include "llvm/Analysis/TargetLibraryInfo.h"
22 #include "llvm/IR/DataLayout.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/InstIterator.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/IR/LLVMContext.h"
30 #include "llvm/IR/MDBuilder.h"
31 #include "llvm/IR/Metadata.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/ProfileData/InstrProf.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/MD5.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/Regex.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
42 #include "llvm/Transforms/Utils/Local.h"
43 #include "llvm/Transforms/Utils/ModuleUtils.h"
44 
45 #include <cctype>
46 
47 using namespace llvm;
48 
49 #define DEBUG_TYPE "tysan"
50 
51 static const char *const kTysanModuleCtorName = "tysan.module_ctor";
52 static const char *const kTysanInitName = "__tysan_init";
53 static const char *const kTysanCheckName = "__tysan_check";
54 static const char *const kTysanGVNamePrefix = "__tysan_v1_";
55 
56 static const char *const kTysanShadowMemoryAddress =
57     "__tysan_shadow_memory_address";
58 static const char *const kTysanAppMemMask = "__tysan_app_memory_mask";
59 
60 static cl::opt<bool>
61     ClWritesAlwaysSetType("tysan-writes-always-set-type",
62                           cl::desc("Writes always set the type"), cl::Hidden,
63                           cl::init(false));
64 
65 STATISTIC(NumInstrumentedAccesses, "Number of instrumented accesses");
66 
67 namespace {
68 
69 /// TypeSanitizer: instrument the code in module to find type-based aliasing
70 /// violations.
71 struct TypeSanitizer {
72   TypeSanitizer(Module &M);
73   bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
74   void instrumentGlobals(Module &M);
75 
76 private:
77   typedef SmallDenseMap<const MDNode *, GlobalVariable *, 8>
78       TypeDescriptorsMapTy;
79   typedef SmallDenseMap<const MDNode *, std::string, 8> TypeNameMapTy;
80 
81   void initializeCallbacks(Module &M);
82 
83   Instruction *getShadowBase(Function &F);
84   Instruction *getAppMemMask(Function &F);
85 
86   bool instrumentWithShadowUpdate(IRBuilder<> &IRB, const MDNode *TBAAMD,
87                                   Value *Ptr, uint64_t AccessSize, bool IsRead,
88                                   bool IsWrite, Value *ShadowBase,
89                                   Value *AppMemMask, bool ForceSetType,
90                                   bool SanitizeFunction,
91                                   TypeDescriptorsMapTy &TypeDescriptors,
92                                   const DataLayout &DL);
93 
94   /// Memory-related intrinsics/instructions reset the type of the destination
95   /// memory (including allocas and byval arguments).
96   bool instrumentMemInst(Value *I, Instruction *ShadowBase,
97                          Instruction *AppMemMask, const DataLayout &DL);
98 
99   std::string getAnonymousStructIdentifier(const MDNode *MD,
100                                            TypeNameMapTy &TypeNames);
101   bool generateTypeDescriptor(const MDNode *MD,
102                               TypeDescriptorsMapTy &TypeDescriptors,
103                               TypeNameMapTy &TypeNames, Module &M);
104   bool generateBaseTypeDescriptor(const MDNode *MD,
105                                   TypeDescriptorsMapTy &TypeDescriptors,
106                                   TypeNameMapTy &TypeNames, Module &M);
107 
108   const Triple TargetTriple;
109   Regex AnonNameRegex;
110   Type *IntptrTy;
111   uint64_t PtrShift;
112   IntegerType *OrdTy;
113 
114   /// Callbacks to run-time library are computed in initializeCallbacks.
115   FunctionCallee TysanCheck;
116   FunctionCallee TysanCtorFunction;
117 
118   /// Callback to set types for gloabls.
119   Function *TysanGlobalsSetTypeFunction;
120 };
121 } // namespace
122 
123 TypeSanitizer::TypeSanitizer(Module &M)
124     : TargetTriple(Triple(M.getTargetTriple())),
125       AnonNameRegex("^_ZTS.*N[1-9][0-9]*_GLOBAL__N") {
126   const DataLayout &DL = M.getDataLayout();
127   IntptrTy = DL.getIntPtrType(M.getContext());
128   PtrShift = countr_zero(IntptrTy->getPrimitiveSizeInBits() / 8);
129 
130   TysanGlobalsSetTypeFunction = M.getFunction("__tysan_set_globals_types");
131   initializeCallbacks(M);
132 }
133 
134 void TypeSanitizer::initializeCallbacks(Module &M) {
135   IRBuilder<> IRB(M.getContext());
136   OrdTy = IRB.getInt32Ty();
137 
138   AttributeList Attr;
139   Attr = Attr.addFnAttribute(M.getContext(), Attribute::NoUnwind);
140   // Initialize the callbacks.
141   TysanCheck =
142       M.getOrInsertFunction(kTysanCheckName, Attr, IRB.getVoidTy(),
143                             IRB.getPtrTy(), // Pointer to data to be read.
144                             OrdTy,          // Size of the data in bytes.
145                             IRB.getPtrTy(), // Pointer to type descriptor.
146                             OrdTy           // Flags.
147       );
148 
149   TysanCtorFunction =
150       M.getOrInsertFunction(kTysanModuleCtorName, Attr, IRB.getVoidTy());
151 }
152 
153 void TypeSanitizer::instrumentGlobals(Module &M) {
154   TysanGlobalsSetTypeFunction = nullptr;
155 
156   NamedMDNode *Globals = M.getNamedMetadata("llvm.tysan.globals");
157   if (!Globals)
158     return;
159 
160   TysanGlobalsSetTypeFunction = Function::Create(
161       FunctionType::get(Type::getVoidTy(M.getContext()), false),
162       GlobalValue::InternalLinkage, "__tysan_set_globals_types", &M);
163   BasicBlock *BB =
164       BasicBlock::Create(M.getContext(), "", TysanGlobalsSetTypeFunction);
165   ReturnInst::Create(M.getContext(), BB);
166 
167   const DataLayout &DL = M.getDataLayout();
168   Value *ShadowBase = getShadowBase(*TysanGlobalsSetTypeFunction);
169   Value *AppMemMask = getAppMemMask(*TysanGlobalsSetTypeFunction);
170   TypeDescriptorsMapTy TypeDescriptors;
171   TypeNameMapTy TypeNames;
172 
173   for (const auto &GMD : Globals->operands()) {
174     auto *GV = mdconst::dyn_extract_or_null<GlobalVariable>(GMD->getOperand(0));
175     if (!GV)
176       continue;
177     const MDNode *TBAAMD = cast<MDNode>(GMD->getOperand(1));
178     if (!generateBaseTypeDescriptor(TBAAMD, TypeDescriptors, TypeNames, M))
179       continue;
180 
181     IRBuilder<> IRB(
182         TysanGlobalsSetTypeFunction->getEntryBlock().getTerminator());
183     Type *AccessTy = GV->getValueType();
184     assert(AccessTy->isSized());
185     uint64_t AccessSize = DL.getTypeStoreSize(AccessTy);
186     instrumentWithShadowUpdate(IRB, TBAAMD, GV, AccessSize, false, false,
187                                ShadowBase, AppMemMask, true, false,
188                                TypeDescriptors, DL);
189   }
190 
191   if (TysanGlobalsSetTypeFunction) {
192     IRBuilder<> IRB(cast<Function>(TysanCtorFunction.getCallee())
193                         ->getEntryBlock()
194                         .getTerminator());
195     IRB.CreateCall(TysanGlobalsSetTypeFunction, {});
196   }
197 }
198 
199 static const char LUT[] = "0123456789abcdef";
200 
201 static std::string encodeName(StringRef Name) {
202   size_t Length = Name.size();
203   std::string Output = kTysanGVNamePrefix;
204   Output.reserve(Output.size() + 3 * Length);
205   for (size_t i = 0; i < Length; ++i) {
206     const unsigned char c = Name[i];
207     if (isalnum(c)) {
208       Output.push_back(c);
209       continue;
210     }
211 
212     if (c == '_') {
213       Output.append("__");
214       continue;
215     }
216 
217     Output.push_back('_');
218     Output.push_back(LUT[c >> 4]);
219     Output.push_back(LUT[c & 15]);
220   }
221 
222   return Output;
223 }
224 
225 std::string
226 TypeSanitizer::getAnonymousStructIdentifier(const MDNode *MD,
227                                             TypeNameMapTy &TypeNames) {
228   MD5 Hash;
229 
230   for (int i = 1, e = MD->getNumOperands(); i < e; i += 2) {
231     const MDNode *MemberNode = dyn_cast<MDNode>(MD->getOperand(i));
232     if (!MemberNode)
233       return "";
234 
235     auto TNI = TypeNames.find(MemberNode);
236     std::string MemberName;
237     if (TNI != TypeNames.end()) {
238       MemberName = TNI->second;
239     } else {
240       if (MemberNode->getNumOperands() < 1)
241         return "";
242       MDString *MemberNameNode = dyn_cast<MDString>(MemberNode->getOperand(0));
243       if (!MemberNameNode)
244         return "";
245       MemberName = MemberNameNode->getString().str();
246       if (MemberName.empty())
247         MemberName = getAnonymousStructIdentifier(MemberNode, TypeNames);
248       if (MemberName.empty())
249         return "";
250       TypeNames[MemberNode] = MemberName;
251     }
252 
253     Hash.update(MemberName);
254     Hash.update("\0");
255 
256     uint64_t Offset =
257         mdconst::extract<ConstantInt>(MD->getOperand(i + 1))->getZExtValue();
258     Hash.update(utostr(Offset));
259     Hash.update("\0");
260   }
261 
262   MD5::MD5Result HashResult;
263   Hash.final(HashResult);
264   return "__anonymous_" + std::string(HashResult.digest().str());
265 }
266 
267 bool TypeSanitizer::generateBaseTypeDescriptor(
268     const MDNode *MD, TypeDescriptorsMapTy &TypeDescriptors,
269     TypeNameMapTy &TypeNames, Module &M) {
270   if (MD->getNumOperands() < 1)
271     return false;
272 
273   MDString *NameNode = dyn_cast<MDString>(MD->getOperand(0));
274   if (!NameNode)
275     return false;
276 
277   std::string Name = NameNode->getString().str();
278   if (Name.empty())
279     Name = getAnonymousStructIdentifier(MD, TypeNames);
280   if (Name.empty())
281     return false;
282   TypeNames[MD] = Name;
283   std::string EncodedName = encodeName(Name);
284 
285   GlobalVariable *GV =
286       dyn_cast_or_null<GlobalVariable>(M.getNamedValue(EncodedName));
287   if (GV) {
288     TypeDescriptors[MD] = GV;
289     return true;
290   }
291 
292   SmallVector<std::pair<Constant *, uint64_t>> Members;
293   for (int i = 1, e = MD->getNumOperands(); i < e; i += 2) {
294     const MDNode *MemberNode = dyn_cast<MDNode>(MD->getOperand(i));
295     if (!MemberNode)
296       return false;
297 
298     Constant *Member;
299     auto TDI = TypeDescriptors.find(MemberNode);
300     if (TDI != TypeDescriptors.end()) {
301       Member = TDI->second;
302     } else {
303       if (!generateBaseTypeDescriptor(MemberNode, TypeDescriptors, TypeNames,
304                                       M))
305         return false;
306 
307       Member = TypeDescriptors[MemberNode];
308     }
309 
310     uint64_t Offset =
311         mdconst::extract<ConstantInt>(MD->getOperand(i + 1))->getZExtValue();
312 
313     Members.push_back(std::make_pair(Member, Offset));
314   }
315 
316   // The descriptor for a scalar is:
317   //   [2, member count, [type pointer, offset]..., name]
318 
319   LLVMContext &C = MD->getContext();
320   Constant *NameData = ConstantDataArray::getString(C, NameNode->getString());
321   SmallVector<Type *> TDSubTys;
322   SmallVector<Constant *> TDSubData;
323 
324   auto PushTDSub = [&](Constant *C) {
325     TDSubTys.push_back(C->getType());
326     TDSubData.push_back(C);
327   };
328 
329   PushTDSub(ConstantInt::get(IntptrTy, 2));
330   PushTDSub(ConstantInt::get(IntptrTy, Members.size()));
331 
332   // Types that are in an anonymous namespace are local to this module.
333   // FIXME: This should really be marked by the frontend in the metadata
334   // instead of having us guess this from the mangled name. Moreover, the regex
335   // here can pick up (unlikely) names in the non-reserved namespace (because
336   // it needs to search into the type to pick up cases where the type in the
337   // anonymous namespace is a template parameter, etc.).
338   bool ShouldBeComdat = !AnonNameRegex.match(NameNode->getString());
339   for (auto &Member : Members) {
340     PushTDSub(Member.first);
341     PushTDSub(ConstantInt::get(IntptrTy, Member.second));
342   }
343 
344   PushTDSub(NameData);
345 
346   StructType *TDTy = StructType::get(C, TDSubTys);
347   Constant *TD = ConstantStruct::get(TDTy, TDSubData);
348 
349   GlobalVariable *TDGV =
350       new GlobalVariable(TDTy, true,
351                          !ShouldBeComdat ? GlobalValue::InternalLinkage
352                                          : GlobalValue::LinkOnceODRLinkage,
353                          TD, EncodedName);
354   M.insertGlobalVariable(TDGV);
355 
356   if (ShouldBeComdat) {
357     if (TargetTriple.isOSBinFormatELF()) {
358       Comdat *TDComdat = M.getOrInsertComdat(EncodedName);
359       TDGV->setComdat(TDComdat);
360     }
361     appendToUsed(M, TDGV);
362   }
363 
364   TypeDescriptors[MD] = TDGV;
365   return true;
366 }
367 
368 bool TypeSanitizer::generateTypeDescriptor(
369     const MDNode *MD, TypeDescriptorsMapTy &TypeDescriptors,
370     TypeNameMapTy &TypeNames, Module &M) {
371   // Here we need to generate a type descriptor corresponding to this TBAA
372   // metadata node. Under the current scheme there are three kinds of TBAA
373   // metadata nodes: scalar nodes, struct nodes, and struct tag nodes.
374 
375   if (MD->getNumOperands() < 3)
376     return false;
377 
378   const MDNode *BaseNode = dyn_cast<MDNode>(MD->getOperand(0));
379   if (!BaseNode)
380     return false;
381 
382   // This is a struct tag (element-access) node.
383 
384   const MDNode *AccessNode = dyn_cast<MDNode>(MD->getOperand(1));
385   if (!AccessNode)
386     return false;
387 
388   Constant *Base;
389   auto TDI = TypeDescriptors.find(BaseNode);
390   if (TDI != TypeDescriptors.end()) {
391     Base = TDI->second;
392   } else {
393     if (!generateBaseTypeDescriptor(BaseNode, TypeDescriptors, TypeNames, M))
394       return false;
395 
396     Base = TypeDescriptors[BaseNode];
397   }
398 
399   Constant *Access;
400   TDI = TypeDescriptors.find(AccessNode);
401   if (TDI != TypeDescriptors.end()) {
402     Access = TDI->second;
403   } else {
404     if (!generateBaseTypeDescriptor(AccessNode, TypeDescriptors, TypeNames, M))
405       return false;
406 
407     Access = TypeDescriptors[AccessNode];
408   }
409 
410   uint64_t Offset =
411       mdconst::extract<ConstantInt>(MD->getOperand(2))->getZExtValue();
412   std::string EncodedName =
413       std::string(Base->getName()) + "_o_" + utostr(Offset);
414 
415   GlobalVariable *GV =
416       dyn_cast_or_null<GlobalVariable>(M.getNamedValue(EncodedName));
417   if (GV) {
418     TypeDescriptors[MD] = GV;
419     return true;
420   }
421 
422   // The descriptor for a scalar is:
423   //   [1, base-type pointer, access-type pointer, offset]
424 
425   StructType *TDTy =
426       StructType::get(IntptrTy, Base->getType(), Access->getType(), IntptrTy);
427   Constant *TD =
428       ConstantStruct::get(TDTy, ConstantInt::get(IntptrTy, 1), Base, Access,
429                           ConstantInt::get(IntptrTy, Offset));
430 
431   bool ShouldBeComdat = cast<GlobalVariable>(Base)->getLinkage() ==
432                         GlobalValue::LinkOnceODRLinkage;
433 
434   GlobalVariable *TDGV =
435       new GlobalVariable(TDTy, true,
436                          !ShouldBeComdat ? GlobalValue::InternalLinkage
437                                          : GlobalValue::LinkOnceODRLinkage,
438                          TD, EncodedName);
439   M.insertGlobalVariable(TDGV);
440 
441   if (ShouldBeComdat) {
442     if (TargetTriple.isOSBinFormatELF()) {
443       Comdat *TDComdat = M.getOrInsertComdat(EncodedName);
444       TDGV->setComdat(TDComdat);
445     }
446     appendToUsed(M, TDGV);
447   }
448 
449   TypeDescriptors[MD] = TDGV;
450   return true;
451 }
452 
453 Instruction *TypeSanitizer::getShadowBase(Function &F) {
454   IRBuilder<> IRB(&F.front().front());
455   Constant *GlobalShadowAddress =
456       F.getParent()->getOrInsertGlobal(kTysanShadowMemoryAddress, IntptrTy);
457   return IRB.CreateLoad(IntptrTy, GlobalShadowAddress, "shadow.base");
458 }
459 
460 Instruction *TypeSanitizer::getAppMemMask(Function &F) {
461   IRBuilder<> IRB(&F.front().front());
462   Value *GlobalAppMemMask =
463       F.getParent()->getOrInsertGlobal(kTysanAppMemMask, IntptrTy);
464   return IRB.CreateLoad(IntptrTy, GlobalAppMemMask, "app.mem.mask");
465 }
466 
467 /// Collect all loads and stores, and for what TBAA nodes we need to generate
468 /// type descriptors.
469 void collectMemAccessInfo(
470     Function &F, const TargetLibraryInfo &TLI,
471     SmallVectorImpl<std::pair<Instruction *, MemoryLocation>> &MemoryAccesses,
472     SmallSetVector<const MDNode *, 8> &TBAAMetadata,
473     SmallVectorImpl<Value *> &MemTypeResetInsts) {
474   // Traverse all instructions, collect loads/stores/returns, check for calls.
475   for (Instruction &Inst : instructions(F)) {
476     // Skip memory accesses inserted by another instrumentation.
477     if (Inst.getMetadata(LLVMContext::MD_nosanitize))
478       continue;
479 
480     if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
481         isa<AtomicCmpXchgInst>(Inst) || isa<AtomicRMWInst>(Inst)) {
482       MemoryLocation MLoc = MemoryLocation::get(&Inst);
483 
484       // Swift errors are special (we can't introduce extra uses on them).
485       if (MLoc.Ptr->isSwiftError())
486         continue;
487 
488       // Skip non-address-space-0 pointers; we don't know how to handle them.
489       Type *PtrTy = cast<PointerType>(MLoc.Ptr->getType());
490       if (PtrTy->getPointerAddressSpace() != 0)
491         continue;
492 
493       if (MLoc.AATags.TBAA)
494         TBAAMetadata.insert(MLoc.AATags.TBAA);
495       MemoryAccesses.push_back(std::make_pair(&Inst, MLoc));
496     } else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
497       if (CallInst *CI = dyn_cast<CallInst>(&Inst))
498         maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
499 
500       if (isa<MemIntrinsic>(Inst)) {
501         MemTypeResetInsts.push_back(&Inst);
502       } else if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
503         if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
504             II->getIntrinsicID() == Intrinsic::lifetime_end)
505           MemTypeResetInsts.push_back(&Inst);
506       }
507     } else if (isa<AllocaInst>(Inst)) {
508       MemTypeResetInsts.push_back(&Inst);
509     }
510   }
511 }
512 
513 bool TypeSanitizer::sanitizeFunction(Function &F,
514                                      const TargetLibraryInfo &TLI) {
515   if (F.isDeclaration())
516     return false;
517   // This is required to prevent instrumenting call to __tysan_init from within
518   // the module constructor.
519   if (&F == TysanCtorFunction.getCallee() || &F == TysanGlobalsSetTypeFunction)
520     return false;
521   initializeCallbacks(*F.getParent());
522 
523   // We need to collect all loads and stores, and know for what TBAA nodes we
524   // need to generate type descriptors.
525   SmallVector<std::pair<Instruction *, MemoryLocation>> MemoryAccesses;
526   SmallSetVector<const MDNode *, 8> TBAAMetadata;
527   SmallVector<Value *> MemTypeResetInsts;
528   collectMemAccessInfo(F, TLI, MemoryAccesses, TBAAMetadata, MemTypeResetInsts);
529 
530   // byval arguments also need their types reset (they're new stack memory,
531   // just like allocas).
532   for (auto &A : F.args())
533     if (A.hasByValAttr())
534       MemTypeResetInsts.push_back(&A);
535 
536   Module &M = *F.getParent();
537   TypeDescriptorsMapTy TypeDescriptors;
538   TypeNameMapTy TypeNames;
539   bool Res = false;
540   for (const MDNode *MD : TBAAMetadata) {
541     if (TypeDescriptors.count(MD))
542       continue;
543 
544     if (!generateTypeDescriptor(MD, TypeDescriptors, TypeNames, M))
545       return Res; // Giving up.
546 
547     Res = true;
548   }
549 
550   const DataLayout &DL = F.getParent()->getDataLayout();
551   bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeType);
552   bool NeedsInstrumentation =
553       MemTypeResetInsts.empty() && MemoryAccesses.empty();
554   Instruction *ShadowBase = NeedsInstrumentation ? nullptr : getShadowBase(F);
555   Instruction *AppMemMask = NeedsInstrumentation ? nullptr : getAppMemMask(F);
556   for (const auto &[I, MLoc] : MemoryAccesses) {
557     IRBuilder<> IRB(I);
558     assert(MLoc.Size.isPrecise());
559     if (instrumentWithShadowUpdate(
560             IRB, MLoc.AATags.TBAA, const_cast<Value *>(MLoc.Ptr),
561             MLoc.Size.getValue(), I->mayReadFromMemory(), I->mayWriteToMemory(),
562             ShadowBase, AppMemMask, false, SanitizeFunction, TypeDescriptors,
563             DL)) {
564       ++NumInstrumentedAccesses;
565       Res = true;
566     }
567   }
568 
569   for (auto Inst : MemTypeResetInsts)
570     Res |= instrumentMemInst(Inst, ShadowBase, AppMemMask, DL);
571 
572   return Res;
573 }
574 
575 static Value *convertToShadowDataInt(IRBuilder<> &IRB, Value *Ptr,
576                                      Type *IntptrTy, uint64_t PtrShift,
577                                      Value *ShadowBase, Value *AppMemMask) {
578   return IRB.CreateAdd(
579       IRB.CreateShl(
580           IRB.CreateAnd(IRB.CreatePtrToInt(Ptr, IntptrTy, "app.ptr.int"),
581                         AppMemMask, "app.ptr.masked"),
582           PtrShift, "app.ptr.shifted"),
583       ShadowBase, "shadow.ptr.int");
584 }
585 
586 bool TypeSanitizer::instrumentWithShadowUpdate(
587     IRBuilder<> &IRB, const MDNode *TBAAMD, Value *Ptr, uint64_t AccessSize,
588     bool IsRead, bool IsWrite, Value *ShadowBase, Value *AppMemMask,
589     bool ForceSetType, bool SanitizeFunction,
590     TypeDescriptorsMapTy &TypeDescriptors, const DataLayout &DL) {
591   Constant *TDGV;
592   if (TBAAMD)
593     TDGV = TypeDescriptors[TBAAMD];
594   else
595     TDGV = Constant::getNullValue(IRB.getPtrTy());
596 
597   Value *TD = IRB.CreateBitCast(TDGV, IRB.getPtrTy());
598 
599   Value *ShadowDataInt = convertToShadowDataInt(IRB, Ptr, IntptrTy, PtrShift,
600                                                 ShadowBase, AppMemMask);
601   Type *Int8PtrPtrTy = PointerType::get(IRB.getContext(), 0);
602   Value *ShadowData =
603       IRB.CreateIntToPtr(ShadowDataInt, Int8PtrPtrTy, "shadow.ptr");
604 
605   auto SetType = [&]() {
606     IRB.CreateStore(TD, ShadowData);
607 
608     // Now fill the remainder of the shadow memory corresponding to the
609     // remainder of the the bytes of the type with a bad type descriptor.
610     for (uint64_t i = 1; i < AccessSize; ++i) {
611       Value *BadShadowData = IRB.CreateIntToPtr(
612           IRB.CreateAdd(ShadowDataInt,
613                         ConstantInt::get(IntptrTy, i << PtrShift),
614                         "shadow.byte." + Twine(i) + ".offset"),
615           Int8PtrPtrTy, "shadow.byte." + Twine(i) + ".ptr");
616 
617       // This is the TD value, -i, which is used to indicate that the byte is
618       // i bytes after the first byte of the type.
619       Value *BadTD =
620           IRB.CreateIntToPtr(ConstantInt::getSigned(IntptrTy, -i),
621                              IRB.getPtrTy(), "bad.descriptor" + Twine(i));
622       IRB.CreateStore(BadTD, BadShadowData);
623     }
624   };
625 
626   if (ForceSetType || (ClWritesAlwaysSetType && IsWrite)) {
627     // In the mode where writes always set the type, for a write (which does
628     // not also read), we just set the type.
629     SetType();
630     return true;
631   }
632 
633   assert((!ClWritesAlwaysSetType || IsRead) &&
634          "should have handled case above");
635   LLVMContext &C = IRB.getContext();
636   MDNode *UnlikelyBW = MDBuilder(C).createBranchWeights(1, 100000);
637 
638   if (!SanitizeFunction) {
639     // If we're not sanitizing this function, then we only care whether we
640     // need to *set* the type.
641     Value *LoadedTD = IRB.CreateLoad(IRB.getPtrTy(), ShadowData, "shadow.desc");
642     Value *NullTDCmp = IRB.CreateIsNull(LoadedTD, "desc.set");
643     Instruction *NullTDTerm = SplitBlockAndInsertIfThen(
644         NullTDCmp, &*IRB.GetInsertPoint(), false, UnlikelyBW);
645     IRB.SetInsertPoint(NullTDTerm);
646     NullTDTerm->getParent()->setName("set.type");
647     SetType();
648     return true;
649   }
650   // We need to check the type here. If the type is unknown, then the read
651   // sets the type. If the type is known, then it is checked. If the type
652   // doesn't match, then we call the runtime (which may yet determine that
653   // the mismatch is okay).
654   //
655   // The checks generated below have the following strucutre.
656   //
657   //   ; First we load the descriptor for the load from shadow memory and
658   //   ; compare it against the type descriptor for the current access type.
659   //   %shadow.desc = load ptr %shadow.data
660   //   %bad.desc = icmp ne %shadow.desc, %td
661   //   br %bad.desc, %bad.bb, %good.bb
662   //
663   // bad.bb:
664   //   %shadow.desc.null = icmp eq %shadow.desc, null
665   //   br %shadow.desc.null, %null.td.bb, %good.td.bb
666   //
667   // null.td.bb:
668   //   ; The typ is unknown, set it if all bytes in the value are also unknown.
669   //   ; To check, we load the shadow data for all bytes of the access. For the
670   //   ; pseudo code below, assume an access of size 1.
671   //   %shadow.data.int = add %shadow.data.int, 0
672   //   %l = load (inttoptr %shadow.data.int)
673   //   %is.not.null = icmp ne %l, null
674   //   %not.all.unknown = %is.not.null
675   //   br %no.all.unknown, before.set.type.bb
676   //
677   // before.set.type.bb:
678   //   ; Call runtime to check mismatch.
679   //   call void @__tysan_check()
680   //   br %set.type.bb
681   //
682   // set.type.bb:
683   //   ; Now fill the remainder of the shadow memory corresponding to the
684   //   ; remainder of the the bytes of the type with a bad type descriptor.
685   //   store %TD, %shadow.data
686   //   br %continue.bb
687   //
688   // good.td.bb::
689   //   ; We have a non-trivial mismatch. Call the runtime.
690   //   call void @__tysan_check()
691   //   br %continue.bb
692   //
693   // good.bb:
694   //  ; We appear to have the right type. Make sure that all other bytes in
695   //  ; the type are still marked as interior bytes. If not, call the runtime.
696   //   %shadow.data.int = add %shadow.data.int, 0
697   //   %l = load (inttoptr %shadow.data.int)
698   //   %not.all.interior = icmp sge %l, 0
699   //   br %not.all.interior, label %check.rt.bb, label %continue.bb
700   //
701   //  check.rt.bb:
702   //   call void @__tysan_check()
703   //   br %continue.bb
704 
705   Constant *Flags = ConstantInt::get(OrdTy, int(IsRead) | (int(IsWrite) << 1));
706 
707   Value *LoadedTD = IRB.CreateLoad(IRB.getPtrTy(), ShadowData, "shadow.desc");
708   Value *BadTDCmp = IRB.CreateICmpNE(LoadedTD, TD, "bad.desc");
709   Instruction *BadTDTerm, *GoodTDTerm;
710   SplitBlockAndInsertIfThenElse(BadTDCmp, &*IRB.GetInsertPoint(), &BadTDTerm,
711                                 &GoodTDTerm, UnlikelyBW);
712   IRB.SetInsertPoint(BadTDTerm);
713 
714   // We now know that the types did not match (we're on the slow path). If
715   // the type is unknown, then set it.
716   Value *NullTDCmp = IRB.CreateIsNull(LoadedTD);
717   Instruction *NullTDTerm, *MismatchTerm;
718   SplitBlockAndInsertIfThenElse(NullTDCmp, &*IRB.GetInsertPoint(), &NullTDTerm,
719                                 &MismatchTerm);
720 
721   // If the type is unknown, then set the type.
722   IRB.SetInsertPoint(NullTDTerm);
723 
724   // We're about to set the type. Make sure that all bytes in the value are
725   // also of unknown type.
726   Value *Size = ConstantInt::get(OrdTy, AccessSize);
727   Value *NotAllUnkTD = IRB.getFalse();
728   for (uint64_t i = 1; i < AccessSize; ++i) {
729     Value *UnkShadowData = IRB.CreateIntToPtr(
730         IRB.CreateAdd(ShadowDataInt, ConstantInt::get(IntptrTy, i << PtrShift)),
731         Int8PtrPtrTy);
732     Value *ILdTD = IRB.CreateLoad(IRB.getPtrTy(), UnkShadowData);
733     NotAllUnkTD = IRB.CreateOr(NotAllUnkTD, IRB.CreateIsNotNull(ILdTD));
734   }
735 
736   Instruction *BeforeSetType = &*IRB.GetInsertPoint();
737   Instruction *BadUTDTerm =
738       SplitBlockAndInsertIfThen(NotAllUnkTD, BeforeSetType, false, UnlikelyBW);
739   IRB.SetInsertPoint(BadUTDTerm);
740   IRB.CreateCall(TysanCheck, {IRB.CreateBitCast(Ptr, IRB.getPtrTy()), Size,
741                               (Value *)TD, (Value *)Flags});
742 
743   IRB.SetInsertPoint(BeforeSetType);
744   SetType();
745 
746   // We have a non-trivial mismatch. Call the runtime.
747   IRB.SetInsertPoint(MismatchTerm);
748   IRB.CreateCall(TysanCheck, {IRB.CreateBitCast(Ptr, IRB.getPtrTy()), Size,
749                               (Value *)TD, (Value *)Flags});
750 
751   // We appear to have the right type. Make sure that all other bytes in
752   // the type are still marked as interior bytes. If not, call the runtime.
753   IRB.SetInsertPoint(GoodTDTerm);
754   Value *NotAllBadTD = IRB.getFalse();
755   for (uint64_t i = 1; i < AccessSize; ++i) {
756     Value *BadShadowData = IRB.CreateIntToPtr(
757         IRB.CreateAdd(ShadowDataInt, ConstantInt::get(IntptrTy, i << PtrShift)),
758         Int8PtrPtrTy);
759     Value *ILdTD = IRB.CreatePtrToInt(
760         IRB.CreateLoad(IRB.getPtrTy(), BadShadowData), IntptrTy);
761     NotAllBadTD = IRB.CreateOr(
762         NotAllBadTD, IRB.CreateICmpSGE(ILdTD, ConstantInt::get(IntptrTy, 0)));
763   }
764 
765   Instruction *BadITDTerm = SplitBlockAndInsertIfThen(
766       NotAllBadTD, &*IRB.GetInsertPoint(), false, UnlikelyBW);
767   IRB.SetInsertPoint(BadITDTerm);
768   IRB.CreateCall(TysanCheck, {IRB.CreateBitCast(Ptr, IRB.getPtrTy()), Size,
769                               (Value *)TD, (Value *)Flags});
770   return true;
771 }
772 
773 bool TypeSanitizer::instrumentMemInst(Value *V, Instruction *ShadowBase,
774                                       Instruction *AppMemMask,
775                                       const DataLayout &DL) {
776   BasicBlock::iterator IP;
777   BasicBlock *BB;
778   Function *F;
779 
780   if (auto *I = dyn_cast<Instruction>(V)) {
781     IP = BasicBlock::iterator(I);
782     BB = I->getParent();
783     F = BB->getParent();
784   } else {
785     auto *A = cast<Argument>(V);
786     F = A->getParent();
787     BB = &F->getEntryBlock();
788     IP = BB->getFirstInsertionPt();
789 
790     // Find the next insert point after both ShadowBase and AppMemMask.
791     if (IP->comesBefore(ShadowBase))
792       IP = ShadowBase->getNextNode()->getIterator();
793     if (IP->comesBefore(AppMemMask))
794       IP = AppMemMask->getNextNode()->getIterator();
795   }
796 
797   Value *Dest, *Size, *Src = nullptr;
798   bool NeedsMemMove = false;
799   IRBuilder<> IRB(BB, IP);
800 
801   if (auto *A = dyn_cast<Argument>(V)) {
802     assert(A->hasByValAttr() && "Type reset for non-byval argument?");
803 
804     Dest = A;
805     Size =
806         ConstantInt::get(IntptrTy, DL.getTypeAllocSize(A->getParamByValType()));
807   } else {
808     auto *I = cast<Instruction>(V);
809     if (auto *MI = dyn_cast<MemIntrinsic>(I)) {
810       if (MI->getDestAddressSpace() != 0)
811         return false;
812 
813       Dest = MI->getDest();
814       Size = MI->getLength();
815 
816       if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {
817         if (MTI->getSourceAddressSpace() == 0) {
818           Src = MTI->getSource();
819           NeedsMemMove = isa<MemMoveInst>(MTI);
820         }
821       }
822     } else if (auto *II = dyn_cast<IntrinsicInst>(I)) {
823       if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
824           II->getIntrinsicID() != Intrinsic::lifetime_end)
825         return false;
826 
827       Size = II->getArgOperand(0);
828       Dest = II->getArgOperand(1);
829     } else if (auto *AI = dyn_cast<AllocaInst>(I)) {
830       // We need to clear the types for new stack allocations (or else we might
831       // read stale type information from a previous function execution).
832 
833       IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(I)));
834       IRB.SetInstDebugLocation(I);
835 
836       Size = IRB.CreateMul(
837           IRB.CreateZExtOrTrunc(AI->getArraySize(), IntptrTy),
838           ConstantInt::get(IntptrTy,
839                            DL.getTypeAllocSize(AI->getAllocatedType())));
840       Dest = I;
841     } else {
842       return false;
843     }
844   }
845 
846   if (!ShadowBase)
847     ShadowBase = getShadowBase(*F);
848   if (!AppMemMask)
849     AppMemMask = getAppMemMask(*F);
850 
851   Value *ShadowDataInt = IRB.CreateAdd(
852       IRB.CreateShl(
853           IRB.CreateAnd(IRB.CreatePtrToInt(Dest, IntptrTy), AppMemMask),
854           PtrShift),
855       ShadowBase);
856   Value *ShadowData = IRB.CreateIntToPtr(ShadowDataInt, IRB.getPtrTy());
857 
858   if (!Src) {
859     IRB.CreateMemSet(ShadowData, IRB.getInt8(0), IRB.CreateShl(Size, PtrShift),
860                      Align(1ull << PtrShift));
861     return true;
862   }
863 
864   Value *SrcShadowDataInt = IRB.CreateAdd(
865       IRB.CreateShl(
866           IRB.CreateAnd(IRB.CreatePtrToInt(Src, IntptrTy), AppMemMask),
867           PtrShift),
868       ShadowBase);
869   Value *SrcShadowData = IRB.CreateIntToPtr(SrcShadowDataInt, IRB.getPtrTy());
870 
871   if (NeedsMemMove) {
872     IRB.CreateMemMove(ShadowData, Align(1ull << PtrShift), SrcShadowData,
873                       Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
874   } else {
875     IRB.CreateMemCpy(ShadowData, Align(1ull << PtrShift), SrcShadowData,
876                      Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
877   }
878 
879   return true;
880 }
881 
882 PreservedAnalyses TypeSanitizerPass::run(Module &M,
883                                          ModuleAnalysisManager &MAM) {
884   Function *TysanCtorFunction;
885   std::tie(TysanCtorFunction, std::ignore) =
886       createSanitizerCtorAndInitFunctions(M, kTysanModuleCtorName,
887                                           kTysanInitName, /*InitArgTypes=*/{},
888                                           /*InitArgs=*/{});
889 
890   TypeSanitizer TySan(M);
891   TySan.instrumentGlobals(M);
892   appendToGlobalCtors(M, TysanCtorFunction, 0);
893 
894   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
895   for (Function &F : M) {
896     const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
897     TySan.sanitizeFunction(F, TLI);
898   }
899 
900   return PreservedAnalyses::none();
901 }
902