1 //===----- TypeSanitizer.cpp - type-based-aliasing-violation detector -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of TypeSanitizer, a type-based-aliasing-violation 10 // detector. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Instrumentation/TypeSanitizer.h" 15 #include "llvm/ADT/SetVector.h" 16 #include "llvm/ADT/SmallSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/Analysis/MemoryLocation.h" 21 #include "llvm/Analysis/TargetLibraryInfo.h" 22 #include "llvm/IR/DataLayout.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/InstIterator.h" 26 #include "llvm/IR/Instructions.h" 27 #include "llvm/IR/IntrinsicInst.h" 28 #include "llvm/IR/Intrinsics.h" 29 #include "llvm/IR/LLVMContext.h" 30 #include "llvm/IR/MDBuilder.h" 31 #include "llvm/IR/Metadata.h" 32 #include "llvm/IR/Module.h" 33 #include "llvm/IR/Type.h" 34 #include "llvm/ProfileData/InstrProf.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/MD5.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/Regex.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 42 #include "llvm/Transforms/Utils/Local.h" 43 #include "llvm/Transforms/Utils/ModuleUtils.h" 44 45 #include <cctype> 46 47 using namespace llvm; 48 49 #define DEBUG_TYPE "tysan" 50 51 static const char *const kTysanModuleCtorName = "tysan.module_ctor"; 52 static const char *const kTysanInitName = "__tysan_init"; 53 static const char *const kTysanCheckName = "__tysan_check"; 54 static const char *const kTysanGVNamePrefix = "__tysan_v1_"; 55 56 static const char *const kTysanShadowMemoryAddress = 57 "__tysan_shadow_memory_address"; 58 static const char *const kTysanAppMemMask = "__tysan_app_memory_mask"; 59 60 static cl::opt<bool> 61 ClWritesAlwaysSetType("tysan-writes-always-set-type", 62 cl::desc("Writes always set the type"), cl::Hidden, 63 cl::init(false)); 64 65 STATISTIC(NumInstrumentedAccesses, "Number of instrumented accesses"); 66 67 namespace { 68 69 /// TypeSanitizer: instrument the code in module to find type-based aliasing 70 /// violations. 71 struct TypeSanitizer { 72 TypeSanitizer(Module &M); 73 bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI); 74 void instrumentGlobals(Module &M); 75 76 private: 77 typedef SmallDenseMap<const MDNode *, GlobalVariable *, 8> 78 TypeDescriptorsMapTy; 79 typedef SmallDenseMap<const MDNode *, std::string, 8> TypeNameMapTy; 80 81 void initializeCallbacks(Module &M); 82 83 Instruction *getShadowBase(Function &F); 84 Instruction *getAppMemMask(Function &F); 85 86 bool instrumentWithShadowUpdate(IRBuilder<> &IRB, const MDNode *TBAAMD, 87 Value *Ptr, uint64_t AccessSize, bool IsRead, 88 bool IsWrite, Value *ShadowBase, 89 Value *AppMemMask, bool ForceSetType, 90 bool SanitizeFunction, 91 TypeDescriptorsMapTy &TypeDescriptors, 92 const DataLayout &DL); 93 94 /// Memory-related intrinsics/instructions reset the type of the destination 95 /// memory (including allocas and byval arguments). 96 bool instrumentMemInst(Value *I, Instruction *ShadowBase, 97 Instruction *AppMemMask, const DataLayout &DL); 98 99 std::string getAnonymousStructIdentifier(const MDNode *MD, 100 TypeNameMapTy &TypeNames); 101 bool generateTypeDescriptor(const MDNode *MD, 102 TypeDescriptorsMapTy &TypeDescriptors, 103 TypeNameMapTy &TypeNames, Module &M); 104 bool generateBaseTypeDescriptor(const MDNode *MD, 105 TypeDescriptorsMapTy &TypeDescriptors, 106 TypeNameMapTy &TypeNames, Module &M); 107 108 const Triple TargetTriple; 109 Regex AnonNameRegex; 110 Type *IntptrTy; 111 uint64_t PtrShift; 112 IntegerType *OrdTy; 113 114 /// Callbacks to run-time library are computed in initializeCallbacks. 115 FunctionCallee TysanCheck; 116 FunctionCallee TysanCtorFunction; 117 118 /// Callback to set types for gloabls. 119 Function *TysanGlobalsSetTypeFunction; 120 }; 121 } // namespace 122 123 TypeSanitizer::TypeSanitizer(Module &M) 124 : TargetTriple(Triple(M.getTargetTriple())), 125 AnonNameRegex("^_ZTS.*N[1-9][0-9]*_GLOBAL__N") { 126 const DataLayout &DL = M.getDataLayout(); 127 IntptrTy = DL.getIntPtrType(M.getContext()); 128 PtrShift = countr_zero(IntptrTy->getPrimitiveSizeInBits() / 8); 129 130 TysanGlobalsSetTypeFunction = M.getFunction("__tysan_set_globals_types"); 131 initializeCallbacks(M); 132 } 133 134 void TypeSanitizer::initializeCallbacks(Module &M) { 135 IRBuilder<> IRB(M.getContext()); 136 OrdTy = IRB.getInt32Ty(); 137 138 AttributeList Attr; 139 Attr = Attr.addFnAttribute(M.getContext(), Attribute::NoUnwind); 140 // Initialize the callbacks. 141 TysanCheck = 142 M.getOrInsertFunction(kTysanCheckName, Attr, IRB.getVoidTy(), 143 IRB.getPtrTy(), // Pointer to data to be read. 144 OrdTy, // Size of the data in bytes. 145 IRB.getPtrTy(), // Pointer to type descriptor. 146 OrdTy // Flags. 147 ); 148 149 TysanCtorFunction = 150 M.getOrInsertFunction(kTysanModuleCtorName, Attr, IRB.getVoidTy()); 151 } 152 153 void TypeSanitizer::instrumentGlobals(Module &M) { 154 TysanGlobalsSetTypeFunction = nullptr; 155 156 NamedMDNode *Globals = M.getNamedMetadata("llvm.tysan.globals"); 157 if (!Globals) 158 return; 159 160 TysanGlobalsSetTypeFunction = Function::Create( 161 FunctionType::get(Type::getVoidTy(M.getContext()), false), 162 GlobalValue::InternalLinkage, "__tysan_set_globals_types", &M); 163 BasicBlock *BB = 164 BasicBlock::Create(M.getContext(), "", TysanGlobalsSetTypeFunction); 165 ReturnInst::Create(M.getContext(), BB); 166 167 const DataLayout &DL = M.getDataLayout(); 168 Value *ShadowBase = getShadowBase(*TysanGlobalsSetTypeFunction); 169 Value *AppMemMask = getAppMemMask(*TysanGlobalsSetTypeFunction); 170 TypeDescriptorsMapTy TypeDescriptors; 171 TypeNameMapTy TypeNames; 172 173 for (const auto &GMD : Globals->operands()) { 174 auto *GV = mdconst::dyn_extract_or_null<GlobalVariable>(GMD->getOperand(0)); 175 if (!GV) 176 continue; 177 const MDNode *TBAAMD = cast<MDNode>(GMD->getOperand(1)); 178 if (!generateBaseTypeDescriptor(TBAAMD, TypeDescriptors, TypeNames, M)) 179 continue; 180 181 IRBuilder<> IRB( 182 TysanGlobalsSetTypeFunction->getEntryBlock().getTerminator()); 183 Type *AccessTy = GV->getValueType(); 184 assert(AccessTy->isSized()); 185 uint64_t AccessSize = DL.getTypeStoreSize(AccessTy); 186 instrumentWithShadowUpdate(IRB, TBAAMD, GV, AccessSize, false, false, 187 ShadowBase, AppMemMask, true, false, 188 TypeDescriptors, DL); 189 } 190 191 if (TysanGlobalsSetTypeFunction) { 192 IRBuilder<> IRB(cast<Function>(TysanCtorFunction.getCallee()) 193 ->getEntryBlock() 194 .getTerminator()); 195 IRB.CreateCall(TysanGlobalsSetTypeFunction, {}); 196 } 197 } 198 199 static const char LUT[] = "0123456789abcdef"; 200 201 static std::string encodeName(StringRef Name) { 202 size_t Length = Name.size(); 203 std::string Output = kTysanGVNamePrefix; 204 Output.reserve(Output.size() + 3 * Length); 205 for (size_t i = 0; i < Length; ++i) { 206 const unsigned char c = Name[i]; 207 if (isalnum(c)) { 208 Output.push_back(c); 209 continue; 210 } 211 212 if (c == '_') { 213 Output.append("__"); 214 continue; 215 } 216 217 Output.push_back('_'); 218 Output.push_back(LUT[c >> 4]); 219 Output.push_back(LUT[c & 15]); 220 } 221 222 return Output; 223 } 224 225 std::string 226 TypeSanitizer::getAnonymousStructIdentifier(const MDNode *MD, 227 TypeNameMapTy &TypeNames) { 228 MD5 Hash; 229 230 for (int i = 1, e = MD->getNumOperands(); i < e; i += 2) { 231 const MDNode *MemberNode = dyn_cast<MDNode>(MD->getOperand(i)); 232 if (!MemberNode) 233 return ""; 234 235 auto TNI = TypeNames.find(MemberNode); 236 std::string MemberName; 237 if (TNI != TypeNames.end()) { 238 MemberName = TNI->second; 239 } else { 240 if (MemberNode->getNumOperands() < 1) 241 return ""; 242 MDString *MemberNameNode = dyn_cast<MDString>(MemberNode->getOperand(0)); 243 if (!MemberNameNode) 244 return ""; 245 MemberName = MemberNameNode->getString().str(); 246 if (MemberName.empty()) 247 MemberName = getAnonymousStructIdentifier(MemberNode, TypeNames); 248 if (MemberName.empty()) 249 return ""; 250 TypeNames[MemberNode] = MemberName; 251 } 252 253 Hash.update(MemberName); 254 Hash.update("\0"); 255 256 uint64_t Offset = 257 mdconst::extract<ConstantInt>(MD->getOperand(i + 1))->getZExtValue(); 258 Hash.update(utostr(Offset)); 259 Hash.update("\0"); 260 } 261 262 MD5::MD5Result HashResult; 263 Hash.final(HashResult); 264 return "__anonymous_" + std::string(HashResult.digest().str()); 265 } 266 267 bool TypeSanitizer::generateBaseTypeDescriptor( 268 const MDNode *MD, TypeDescriptorsMapTy &TypeDescriptors, 269 TypeNameMapTy &TypeNames, Module &M) { 270 if (MD->getNumOperands() < 1) 271 return false; 272 273 MDString *NameNode = dyn_cast<MDString>(MD->getOperand(0)); 274 if (!NameNode) 275 return false; 276 277 std::string Name = NameNode->getString().str(); 278 if (Name.empty()) 279 Name = getAnonymousStructIdentifier(MD, TypeNames); 280 if (Name.empty()) 281 return false; 282 TypeNames[MD] = Name; 283 std::string EncodedName = encodeName(Name); 284 285 GlobalVariable *GV = 286 dyn_cast_or_null<GlobalVariable>(M.getNamedValue(EncodedName)); 287 if (GV) { 288 TypeDescriptors[MD] = GV; 289 return true; 290 } 291 292 SmallVector<std::pair<Constant *, uint64_t>> Members; 293 for (int i = 1, e = MD->getNumOperands(); i < e; i += 2) { 294 const MDNode *MemberNode = dyn_cast<MDNode>(MD->getOperand(i)); 295 if (!MemberNode) 296 return false; 297 298 Constant *Member; 299 auto TDI = TypeDescriptors.find(MemberNode); 300 if (TDI != TypeDescriptors.end()) { 301 Member = TDI->second; 302 } else { 303 if (!generateBaseTypeDescriptor(MemberNode, TypeDescriptors, TypeNames, 304 M)) 305 return false; 306 307 Member = TypeDescriptors[MemberNode]; 308 } 309 310 uint64_t Offset = 311 mdconst::extract<ConstantInt>(MD->getOperand(i + 1))->getZExtValue(); 312 313 Members.push_back(std::make_pair(Member, Offset)); 314 } 315 316 // The descriptor for a scalar is: 317 // [2, member count, [type pointer, offset]..., name] 318 319 LLVMContext &C = MD->getContext(); 320 Constant *NameData = ConstantDataArray::getString(C, NameNode->getString()); 321 SmallVector<Type *> TDSubTys; 322 SmallVector<Constant *> TDSubData; 323 324 auto PushTDSub = [&](Constant *C) { 325 TDSubTys.push_back(C->getType()); 326 TDSubData.push_back(C); 327 }; 328 329 PushTDSub(ConstantInt::get(IntptrTy, 2)); 330 PushTDSub(ConstantInt::get(IntptrTy, Members.size())); 331 332 // Types that are in an anonymous namespace are local to this module. 333 // FIXME: This should really be marked by the frontend in the metadata 334 // instead of having us guess this from the mangled name. Moreover, the regex 335 // here can pick up (unlikely) names in the non-reserved namespace (because 336 // it needs to search into the type to pick up cases where the type in the 337 // anonymous namespace is a template parameter, etc.). 338 bool ShouldBeComdat = !AnonNameRegex.match(NameNode->getString()); 339 for (auto &Member : Members) { 340 PushTDSub(Member.first); 341 PushTDSub(ConstantInt::get(IntptrTy, Member.second)); 342 } 343 344 PushTDSub(NameData); 345 346 StructType *TDTy = StructType::get(C, TDSubTys); 347 Constant *TD = ConstantStruct::get(TDTy, TDSubData); 348 349 GlobalVariable *TDGV = 350 new GlobalVariable(TDTy, true, 351 !ShouldBeComdat ? GlobalValue::InternalLinkage 352 : GlobalValue::LinkOnceODRLinkage, 353 TD, EncodedName); 354 M.insertGlobalVariable(TDGV); 355 356 if (ShouldBeComdat) { 357 if (TargetTriple.isOSBinFormatELF()) { 358 Comdat *TDComdat = M.getOrInsertComdat(EncodedName); 359 TDGV->setComdat(TDComdat); 360 } 361 appendToUsed(M, TDGV); 362 } 363 364 TypeDescriptors[MD] = TDGV; 365 return true; 366 } 367 368 bool TypeSanitizer::generateTypeDescriptor( 369 const MDNode *MD, TypeDescriptorsMapTy &TypeDescriptors, 370 TypeNameMapTy &TypeNames, Module &M) { 371 // Here we need to generate a type descriptor corresponding to this TBAA 372 // metadata node. Under the current scheme there are three kinds of TBAA 373 // metadata nodes: scalar nodes, struct nodes, and struct tag nodes. 374 375 if (MD->getNumOperands() < 3) 376 return false; 377 378 const MDNode *BaseNode = dyn_cast<MDNode>(MD->getOperand(0)); 379 if (!BaseNode) 380 return false; 381 382 // This is a struct tag (element-access) node. 383 384 const MDNode *AccessNode = dyn_cast<MDNode>(MD->getOperand(1)); 385 if (!AccessNode) 386 return false; 387 388 Constant *Base; 389 auto TDI = TypeDescriptors.find(BaseNode); 390 if (TDI != TypeDescriptors.end()) { 391 Base = TDI->second; 392 } else { 393 if (!generateBaseTypeDescriptor(BaseNode, TypeDescriptors, TypeNames, M)) 394 return false; 395 396 Base = TypeDescriptors[BaseNode]; 397 } 398 399 Constant *Access; 400 TDI = TypeDescriptors.find(AccessNode); 401 if (TDI != TypeDescriptors.end()) { 402 Access = TDI->second; 403 } else { 404 if (!generateBaseTypeDescriptor(AccessNode, TypeDescriptors, TypeNames, M)) 405 return false; 406 407 Access = TypeDescriptors[AccessNode]; 408 } 409 410 uint64_t Offset = 411 mdconst::extract<ConstantInt>(MD->getOperand(2))->getZExtValue(); 412 std::string EncodedName = 413 std::string(Base->getName()) + "_o_" + utostr(Offset); 414 415 GlobalVariable *GV = 416 dyn_cast_or_null<GlobalVariable>(M.getNamedValue(EncodedName)); 417 if (GV) { 418 TypeDescriptors[MD] = GV; 419 return true; 420 } 421 422 // The descriptor for a scalar is: 423 // [1, base-type pointer, access-type pointer, offset] 424 425 StructType *TDTy = 426 StructType::get(IntptrTy, Base->getType(), Access->getType(), IntptrTy); 427 Constant *TD = 428 ConstantStruct::get(TDTy, ConstantInt::get(IntptrTy, 1), Base, Access, 429 ConstantInt::get(IntptrTy, Offset)); 430 431 bool ShouldBeComdat = cast<GlobalVariable>(Base)->getLinkage() == 432 GlobalValue::LinkOnceODRLinkage; 433 434 GlobalVariable *TDGV = 435 new GlobalVariable(TDTy, true, 436 !ShouldBeComdat ? GlobalValue::InternalLinkage 437 : GlobalValue::LinkOnceODRLinkage, 438 TD, EncodedName); 439 M.insertGlobalVariable(TDGV); 440 441 if (ShouldBeComdat) { 442 if (TargetTriple.isOSBinFormatELF()) { 443 Comdat *TDComdat = M.getOrInsertComdat(EncodedName); 444 TDGV->setComdat(TDComdat); 445 } 446 appendToUsed(M, TDGV); 447 } 448 449 TypeDescriptors[MD] = TDGV; 450 return true; 451 } 452 453 Instruction *TypeSanitizer::getShadowBase(Function &F) { 454 IRBuilder<> IRB(&F.front().front()); 455 Constant *GlobalShadowAddress = 456 F.getParent()->getOrInsertGlobal(kTysanShadowMemoryAddress, IntptrTy); 457 return IRB.CreateLoad(IntptrTy, GlobalShadowAddress, "shadow.base"); 458 } 459 460 Instruction *TypeSanitizer::getAppMemMask(Function &F) { 461 IRBuilder<> IRB(&F.front().front()); 462 Value *GlobalAppMemMask = 463 F.getParent()->getOrInsertGlobal(kTysanAppMemMask, IntptrTy); 464 return IRB.CreateLoad(IntptrTy, GlobalAppMemMask, "app.mem.mask"); 465 } 466 467 /// Collect all loads and stores, and for what TBAA nodes we need to generate 468 /// type descriptors. 469 void collectMemAccessInfo( 470 Function &F, const TargetLibraryInfo &TLI, 471 SmallVectorImpl<std::pair<Instruction *, MemoryLocation>> &MemoryAccesses, 472 SmallSetVector<const MDNode *, 8> &TBAAMetadata, 473 SmallVectorImpl<Value *> &MemTypeResetInsts) { 474 // Traverse all instructions, collect loads/stores/returns, check for calls. 475 for (Instruction &Inst : instructions(F)) { 476 // Skip memory accesses inserted by another instrumentation. 477 if (Inst.getMetadata(LLVMContext::MD_nosanitize)) 478 continue; 479 480 if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst) || 481 isa<AtomicCmpXchgInst>(Inst) || isa<AtomicRMWInst>(Inst)) { 482 MemoryLocation MLoc = MemoryLocation::get(&Inst); 483 484 // Swift errors are special (we can't introduce extra uses on them). 485 if (MLoc.Ptr->isSwiftError()) 486 continue; 487 488 // Skip non-address-space-0 pointers; we don't know how to handle them. 489 Type *PtrTy = cast<PointerType>(MLoc.Ptr->getType()); 490 if (PtrTy->getPointerAddressSpace() != 0) 491 continue; 492 493 if (MLoc.AATags.TBAA) 494 TBAAMetadata.insert(MLoc.AATags.TBAA); 495 MemoryAccesses.push_back(std::make_pair(&Inst, MLoc)); 496 } else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) { 497 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) 498 maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI); 499 500 if (isa<MemIntrinsic>(Inst)) { 501 MemTypeResetInsts.push_back(&Inst); 502 } else if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) { 503 if (II->getIntrinsicID() == Intrinsic::lifetime_start || 504 II->getIntrinsicID() == Intrinsic::lifetime_end) 505 MemTypeResetInsts.push_back(&Inst); 506 } 507 } else if (isa<AllocaInst>(Inst)) { 508 MemTypeResetInsts.push_back(&Inst); 509 } 510 } 511 } 512 513 bool TypeSanitizer::sanitizeFunction(Function &F, 514 const TargetLibraryInfo &TLI) { 515 if (F.isDeclaration()) 516 return false; 517 // This is required to prevent instrumenting call to __tysan_init from within 518 // the module constructor. 519 if (&F == TysanCtorFunction.getCallee() || &F == TysanGlobalsSetTypeFunction) 520 return false; 521 initializeCallbacks(*F.getParent()); 522 523 // We need to collect all loads and stores, and know for what TBAA nodes we 524 // need to generate type descriptors. 525 SmallVector<std::pair<Instruction *, MemoryLocation>> MemoryAccesses; 526 SmallSetVector<const MDNode *, 8> TBAAMetadata; 527 SmallVector<Value *> MemTypeResetInsts; 528 collectMemAccessInfo(F, TLI, MemoryAccesses, TBAAMetadata, MemTypeResetInsts); 529 530 // byval arguments also need their types reset (they're new stack memory, 531 // just like allocas). 532 for (auto &A : F.args()) 533 if (A.hasByValAttr()) 534 MemTypeResetInsts.push_back(&A); 535 536 Module &M = *F.getParent(); 537 TypeDescriptorsMapTy TypeDescriptors; 538 TypeNameMapTy TypeNames; 539 bool Res = false; 540 for (const MDNode *MD : TBAAMetadata) { 541 if (TypeDescriptors.count(MD)) 542 continue; 543 544 if (!generateTypeDescriptor(MD, TypeDescriptors, TypeNames, M)) 545 return Res; // Giving up. 546 547 Res = true; 548 } 549 550 const DataLayout &DL = F.getParent()->getDataLayout(); 551 bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeType); 552 bool NeedsInstrumentation = 553 MemTypeResetInsts.empty() && MemoryAccesses.empty(); 554 Instruction *ShadowBase = NeedsInstrumentation ? nullptr : getShadowBase(F); 555 Instruction *AppMemMask = NeedsInstrumentation ? nullptr : getAppMemMask(F); 556 for (const auto &[I, MLoc] : MemoryAccesses) { 557 IRBuilder<> IRB(I); 558 assert(MLoc.Size.isPrecise()); 559 if (instrumentWithShadowUpdate( 560 IRB, MLoc.AATags.TBAA, const_cast<Value *>(MLoc.Ptr), 561 MLoc.Size.getValue(), I->mayReadFromMemory(), I->mayWriteToMemory(), 562 ShadowBase, AppMemMask, false, SanitizeFunction, TypeDescriptors, 563 DL)) { 564 ++NumInstrumentedAccesses; 565 Res = true; 566 } 567 } 568 569 for (auto Inst : MemTypeResetInsts) 570 Res |= instrumentMemInst(Inst, ShadowBase, AppMemMask, DL); 571 572 return Res; 573 } 574 575 static Value *convertToShadowDataInt(IRBuilder<> &IRB, Value *Ptr, 576 Type *IntptrTy, uint64_t PtrShift, 577 Value *ShadowBase, Value *AppMemMask) { 578 return IRB.CreateAdd( 579 IRB.CreateShl( 580 IRB.CreateAnd(IRB.CreatePtrToInt(Ptr, IntptrTy, "app.ptr.int"), 581 AppMemMask, "app.ptr.masked"), 582 PtrShift, "app.ptr.shifted"), 583 ShadowBase, "shadow.ptr.int"); 584 } 585 586 bool TypeSanitizer::instrumentWithShadowUpdate( 587 IRBuilder<> &IRB, const MDNode *TBAAMD, Value *Ptr, uint64_t AccessSize, 588 bool IsRead, bool IsWrite, Value *ShadowBase, Value *AppMemMask, 589 bool ForceSetType, bool SanitizeFunction, 590 TypeDescriptorsMapTy &TypeDescriptors, const DataLayout &DL) { 591 Constant *TDGV; 592 if (TBAAMD) 593 TDGV = TypeDescriptors[TBAAMD]; 594 else 595 TDGV = Constant::getNullValue(IRB.getPtrTy()); 596 597 Value *TD = IRB.CreateBitCast(TDGV, IRB.getPtrTy()); 598 599 Value *ShadowDataInt = convertToShadowDataInt(IRB, Ptr, IntptrTy, PtrShift, 600 ShadowBase, AppMemMask); 601 Type *Int8PtrPtrTy = PointerType::get(IRB.getContext(), 0); 602 Value *ShadowData = 603 IRB.CreateIntToPtr(ShadowDataInt, Int8PtrPtrTy, "shadow.ptr"); 604 605 auto SetType = [&]() { 606 IRB.CreateStore(TD, ShadowData); 607 608 // Now fill the remainder of the shadow memory corresponding to the 609 // remainder of the the bytes of the type with a bad type descriptor. 610 for (uint64_t i = 1; i < AccessSize; ++i) { 611 Value *BadShadowData = IRB.CreateIntToPtr( 612 IRB.CreateAdd(ShadowDataInt, 613 ConstantInt::get(IntptrTy, i << PtrShift), 614 "shadow.byte." + Twine(i) + ".offset"), 615 Int8PtrPtrTy, "shadow.byte." + Twine(i) + ".ptr"); 616 617 // This is the TD value, -i, which is used to indicate that the byte is 618 // i bytes after the first byte of the type. 619 Value *BadTD = 620 IRB.CreateIntToPtr(ConstantInt::getSigned(IntptrTy, -i), 621 IRB.getPtrTy(), "bad.descriptor" + Twine(i)); 622 IRB.CreateStore(BadTD, BadShadowData); 623 } 624 }; 625 626 if (ForceSetType || (ClWritesAlwaysSetType && IsWrite)) { 627 // In the mode where writes always set the type, for a write (which does 628 // not also read), we just set the type. 629 SetType(); 630 return true; 631 } 632 633 assert((!ClWritesAlwaysSetType || IsRead) && 634 "should have handled case above"); 635 LLVMContext &C = IRB.getContext(); 636 MDNode *UnlikelyBW = MDBuilder(C).createBranchWeights(1, 100000); 637 638 if (!SanitizeFunction) { 639 // If we're not sanitizing this function, then we only care whether we 640 // need to *set* the type. 641 Value *LoadedTD = IRB.CreateLoad(IRB.getPtrTy(), ShadowData, "shadow.desc"); 642 Value *NullTDCmp = IRB.CreateIsNull(LoadedTD, "desc.set"); 643 Instruction *NullTDTerm = SplitBlockAndInsertIfThen( 644 NullTDCmp, &*IRB.GetInsertPoint(), false, UnlikelyBW); 645 IRB.SetInsertPoint(NullTDTerm); 646 NullTDTerm->getParent()->setName("set.type"); 647 SetType(); 648 return true; 649 } 650 // We need to check the type here. If the type is unknown, then the read 651 // sets the type. If the type is known, then it is checked. If the type 652 // doesn't match, then we call the runtime (which may yet determine that 653 // the mismatch is okay). 654 // 655 // The checks generated below have the following strucutre. 656 // 657 // ; First we load the descriptor for the load from shadow memory and 658 // ; compare it against the type descriptor for the current access type. 659 // %shadow.desc = load ptr %shadow.data 660 // %bad.desc = icmp ne %shadow.desc, %td 661 // br %bad.desc, %bad.bb, %good.bb 662 // 663 // bad.bb: 664 // %shadow.desc.null = icmp eq %shadow.desc, null 665 // br %shadow.desc.null, %null.td.bb, %good.td.bb 666 // 667 // null.td.bb: 668 // ; The typ is unknown, set it if all bytes in the value are also unknown. 669 // ; To check, we load the shadow data for all bytes of the access. For the 670 // ; pseudo code below, assume an access of size 1. 671 // %shadow.data.int = add %shadow.data.int, 0 672 // %l = load (inttoptr %shadow.data.int) 673 // %is.not.null = icmp ne %l, null 674 // %not.all.unknown = %is.not.null 675 // br %no.all.unknown, before.set.type.bb 676 // 677 // before.set.type.bb: 678 // ; Call runtime to check mismatch. 679 // call void @__tysan_check() 680 // br %set.type.bb 681 // 682 // set.type.bb: 683 // ; Now fill the remainder of the shadow memory corresponding to the 684 // ; remainder of the the bytes of the type with a bad type descriptor. 685 // store %TD, %shadow.data 686 // br %continue.bb 687 // 688 // good.td.bb:: 689 // ; We have a non-trivial mismatch. Call the runtime. 690 // call void @__tysan_check() 691 // br %continue.bb 692 // 693 // good.bb: 694 // ; We appear to have the right type. Make sure that all other bytes in 695 // ; the type are still marked as interior bytes. If not, call the runtime. 696 // %shadow.data.int = add %shadow.data.int, 0 697 // %l = load (inttoptr %shadow.data.int) 698 // %not.all.interior = icmp sge %l, 0 699 // br %not.all.interior, label %check.rt.bb, label %continue.bb 700 // 701 // check.rt.bb: 702 // call void @__tysan_check() 703 // br %continue.bb 704 705 Constant *Flags = ConstantInt::get(OrdTy, int(IsRead) | (int(IsWrite) << 1)); 706 707 Value *LoadedTD = IRB.CreateLoad(IRB.getPtrTy(), ShadowData, "shadow.desc"); 708 Value *BadTDCmp = IRB.CreateICmpNE(LoadedTD, TD, "bad.desc"); 709 Instruction *BadTDTerm, *GoodTDTerm; 710 SplitBlockAndInsertIfThenElse(BadTDCmp, &*IRB.GetInsertPoint(), &BadTDTerm, 711 &GoodTDTerm, UnlikelyBW); 712 IRB.SetInsertPoint(BadTDTerm); 713 714 // We now know that the types did not match (we're on the slow path). If 715 // the type is unknown, then set it. 716 Value *NullTDCmp = IRB.CreateIsNull(LoadedTD); 717 Instruction *NullTDTerm, *MismatchTerm; 718 SplitBlockAndInsertIfThenElse(NullTDCmp, &*IRB.GetInsertPoint(), &NullTDTerm, 719 &MismatchTerm); 720 721 // If the type is unknown, then set the type. 722 IRB.SetInsertPoint(NullTDTerm); 723 724 // We're about to set the type. Make sure that all bytes in the value are 725 // also of unknown type. 726 Value *Size = ConstantInt::get(OrdTy, AccessSize); 727 Value *NotAllUnkTD = IRB.getFalse(); 728 for (uint64_t i = 1; i < AccessSize; ++i) { 729 Value *UnkShadowData = IRB.CreateIntToPtr( 730 IRB.CreateAdd(ShadowDataInt, ConstantInt::get(IntptrTy, i << PtrShift)), 731 Int8PtrPtrTy); 732 Value *ILdTD = IRB.CreateLoad(IRB.getPtrTy(), UnkShadowData); 733 NotAllUnkTD = IRB.CreateOr(NotAllUnkTD, IRB.CreateIsNotNull(ILdTD)); 734 } 735 736 Instruction *BeforeSetType = &*IRB.GetInsertPoint(); 737 Instruction *BadUTDTerm = 738 SplitBlockAndInsertIfThen(NotAllUnkTD, BeforeSetType, false, UnlikelyBW); 739 IRB.SetInsertPoint(BadUTDTerm); 740 IRB.CreateCall(TysanCheck, {IRB.CreateBitCast(Ptr, IRB.getPtrTy()), Size, 741 (Value *)TD, (Value *)Flags}); 742 743 IRB.SetInsertPoint(BeforeSetType); 744 SetType(); 745 746 // We have a non-trivial mismatch. Call the runtime. 747 IRB.SetInsertPoint(MismatchTerm); 748 IRB.CreateCall(TysanCheck, {IRB.CreateBitCast(Ptr, IRB.getPtrTy()), Size, 749 (Value *)TD, (Value *)Flags}); 750 751 // We appear to have the right type. Make sure that all other bytes in 752 // the type are still marked as interior bytes. If not, call the runtime. 753 IRB.SetInsertPoint(GoodTDTerm); 754 Value *NotAllBadTD = IRB.getFalse(); 755 for (uint64_t i = 1; i < AccessSize; ++i) { 756 Value *BadShadowData = IRB.CreateIntToPtr( 757 IRB.CreateAdd(ShadowDataInt, ConstantInt::get(IntptrTy, i << PtrShift)), 758 Int8PtrPtrTy); 759 Value *ILdTD = IRB.CreatePtrToInt( 760 IRB.CreateLoad(IRB.getPtrTy(), BadShadowData), IntptrTy); 761 NotAllBadTD = IRB.CreateOr( 762 NotAllBadTD, IRB.CreateICmpSGE(ILdTD, ConstantInt::get(IntptrTy, 0))); 763 } 764 765 Instruction *BadITDTerm = SplitBlockAndInsertIfThen( 766 NotAllBadTD, &*IRB.GetInsertPoint(), false, UnlikelyBW); 767 IRB.SetInsertPoint(BadITDTerm); 768 IRB.CreateCall(TysanCheck, {IRB.CreateBitCast(Ptr, IRB.getPtrTy()), Size, 769 (Value *)TD, (Value *)Flags}); 770 return true; 771 } 772 773 bool TypeSanitizer::instrumentMemInst(Value *V, Instruction *ShadowBase, 774 Instruction *AppMemMask, 775 const DataLayout &DL) { 776 BasicBlock::iterator IP; 777 BasicBlock *BB; 778 Function *F; 779 780 if (auto *I = dyn_cast<Instruction>(V)) { 781 IP = BasicBlock::iterator(I); 782 BB = I->getParent(); 783 F = BB->getParent(); 784 } else { 785 auto *A = cast<Argument>(V); 786 F = A->getParent(); 787 BB = &F->getEntryBlock(); 788 IP = BB->getFirstInsertionPt(); 789 790 // Find the next insert point after both ShadowBase and AppMemMask. 791 if (IP->comesBefore(ShadowBase)) 792 IP = ShadowBase->getNextNode()->getIterator(); 793 if (IP->comesBefore(AppMemMask)) 794 IP = AppMemMask->getNextNode()->getIterator(); 795 } 796 797 Value *Dest, *Size, *Src = nullptr; 798 bool NeedsMemMove = false; 799 IRBuilder<> IRB(BB, IP); 800 801 if (auto *A = dyn_cast<Argument>(V)) { 802 assert(A->hasByValAttr() && "Type reset for non-byval argument?"); 803 804 Dest = A; 805 Size = 806 ConstantInt::get(IntptrTy, DL.getTypeAllocSize(A->getParamByValType())); 807 } else { 808 auto *I = cast<Instruction>(V); 809 if (auto *MI = dyn_cast<MemIntrinsic>(I)) { 810 if (MI->getDestAddressSpace() != 0) 811 return false; 812 813 Dest = MI->getDest(); 814 Size = MI->getLength(); 815 816 if (auto *MTI = dyn_cast<MemTransferInst>(MI)) { 817 if (MTI->getSourceAddressSpace() == 0) { 818 Src = MTI->getSource(); 819 NeedsMemMove = isa<MemMoveInst>(MTI); 820 } 821 } 822 } else if (auto *II = dyn_cast<IntrinsicInst>(I)) { 823 if (II->getIntrinsicID() != Intrinsic::lifetime_start && 824 II->getIntrinsicID() != Intrinsic::lifetime_end) 825 return false; 826 827 Size = II->getArgOperand(0); 828 Dest = II->getArgOperand(1); 829 } else if (auto *AI = dyn_cast<AllocaInst>(I)) { 830 // We need to clear the types for new stack allocations (or else we might 831 // read stale type information from a previous function execution). 832 833 IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(I))); 834 IRB.SetInstDebugLocation(I); 835 836 Size = IRB.CreateMul( 837 IRB.CreateZExtOrTrunc(AI->getArraySize(), IntptrTy), 838 ConstantInt::get(IntptrTy, 839 DL.getTypeAllocSize(AI->getAllocatedType()))); 840 Dest = I; 841 } else { 842 return false; 843 } 844 } 845 846 if (!ShadowBase) 847 ShadowBase = getShadowBase(*F); 848 if (!AppMemMask) 849 AppMemMask = getAppMemMask(*F); 850 851 Value *ShadowDataInt = IRB.CreateAdd( 852 IRB.CreateShl( 853 IRB.CreateAnd(IRB.CreatePtrToInt(Dest, IntptrTy), AppMemMask), 854 PtrShift), 855 ShadowBase); 856 Value *ShadowData = IRB.CreateIntToPtr(ShadowDataInt, IRB.getPtrTy()); 857 858 if (!Src) { 859 IRB.CreateMemSet(ShadowData, IRB.getInt8(0), IRB.CreateShl(Size, PtrShift), 860 Align(1ull << PtrShift)); 861 return true; 862 } 863 864 Value *SrcShadowDataInt = IRB.CreateAdd( 865 IRB.CreateShl( 866 IRB.CreateAnd(IRB.CreatePtrToInt(Src, IntptrTy), AppMemMask), 867 PtrShift), 868 ShadowBase); 869 Value *SrcShadowData = IRB.CreateIntToPtr(SrcShadowDataInt, IRB.getPtrTy()); 870 871 if (NeedsMemMove) { 872 IRB.CreateMemMove(ShadowData, Align(1ull << PtrShift), SrcShadowData, 873 Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift)); 874 } else { 875 IRB.CreateMemCpy(ShadowData, Align(1ull << PtrShift), SrcShadowData, 876 Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift)); 877 } 878 879 return true; 880 } 881 882 PreservedAnalyses TypeSanitizerPass::run(Module &M, 883 ModuleAnalysisManager &MAM) { 884 Function *TysanCtorFunction; 885 std::tie(TysanCtorFunction, std::ignore) = 886 createSanitizerCtorAndInitFunctions(M, kTysanModuleCtorName, 887 kTysanInitName, /*InitArgTypes=*/{}, 888 /*InitArgs=*/{}); 889 890 TypeSanitizer TySan(M); 891 TySan.instrumentGlobals(M); 892 appendToGlobalCtors(M, TysanCtorFunction, 0); 893 894 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 895 for (Function &F : M) { 896 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F); 897 TySan.sanitizeFunction(F, TLI); 898 } 899 900 return PreservedAnalyses::none(); 901 } 902