1 //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of SanitizerBinaryMetadata. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" 14 #include "llvm/ADT/SetVector.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/ADT/StringExtras.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/Analysis/CaptureTracking.h" 21 #include "llvm/Analysis/ValueTracking.h" 22 #include "llvm/IR/Constant.h" 23 #include "llvm/IR/DerivedTypes.h" 24 #include "llvm/IR/Function.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/GlobalVariable.h" 27 #include "llvm/IR/IRBuilder.h" 28 #include "llvm/IR/Instruction.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/LLVMContext.h" 31 #include "llvm/IR/MDBuilder.h" 32 #include "llvm/IR/Metadata.h" 33 #include "llvm/IR/Module.h" 34 #include "llvm/IR/Type.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/ProfileData/InstrProf.h" 37 #include "llvm/Support/Allocator.h" 38 #include "llvm/Support/CommandLine.h" 39 #include "llvm/Support/SpecialCaseList.h" 40 #include "llvm/Support/StringSaver.h" 41 #include "llvm/Support/VirtualFileSystem.h" 42 #include "llvm/TargetParser/Triple.h" 43 #include "llvm/Transforms/Utils/ModuleUtils.h" 44 45 #include <array> 46 #include <cstdint> 47 #include <memory> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "sanmd" 52 53 namespace { 54 55 //===--- Constants --------------------------------------------------------===// 56 57 constexpr uint32_t kVersionBase = 2; // occupies lower 16 bits 58 constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized 59 constexpr int kCtorDtorPriority = 2; 60 61 // Pairs of names of initialization callback functions and which section 62 // contains the relevant metadata. 63 class MetadataInfo { 64 public: 65 const StringRef FunctionPrefix; 66 const StringRef SectionSuffix; 67 68 static const MetadataInfo Covered; 69 static const MetadataInfo Atomics; 70 71 private: 72 // Forbid construction elsewhere. 73 explicit constexpr MetadataInfo(StringRef FunctionPrefix, 74 StringRef SectionSuffix) 75 : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {} 76 }; 77 const MetadataInfo MetadataInfo::Covered{ 78 "__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection}; 79 const MetadataInfo MetadataInfo::Atomics{ 80 "__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection}; 81 82 // The only instances of MetadataInfo are the constants above, so a set of 83 // them may simply store pointers to them. To deterministically generate code, 84 // we need to use a set with stable iteration order, such as SetVector. 85 using MetadataInfoSet = SetVector<const MetadataInfo *>; 86 87 //===--- Command-line options ---------------------------------------------===// 88 89 cl::opt<bool> ClWeakCallbacks( 90 "sanitizer-metadata-weak-callbacks", 91 cl::desc("Declare callbacks extern weak, and only call if non-null."), 92 cl::Hidden, cl::init(true)); 93 cl::opt<bool> 94 ClNoSanitize("sanitizer-metadata-nosanitize-attr", 95 cl::desc("Mark some metadata features uncovered in functions " 96 "with associated no_sanitize attributes."), 97 cl::Hidden, cl::init(true)); 98 99 cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered", 100 cl::desc("Emit PCs for covered functions."), 101 cl::Hidden, cl::init(false)); 102 cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics", 103 cl::desc("Emit PCs for atomic operations."), 104 cl::Hidden, cl::init(false)); 105 cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar", 106 cl::desc("Emit PCs for start of functions that are " 107 "subject for use-after-return checking"), 108 cl::Hidden, cl::init(false)); 109 110 //===--- Statistics -------------------------------------------------------===// 111 112 STATISTIC(NumMetadataCovered, "Metadata attached to covered functions"); 113 STATISTIC(NumMetadataAtomics, "Metadata attached to atomics"); 114 STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions"); 115 116 //===----------------------------------------------------------------------===// 117 118 // Apply opt overrides. 119 SanitizerBinaryMetadataOptions && 120 transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) { 121 Opts.Covered |= ClEmitCovered; 122 Opts.Atomics |= ClEmitAtomics; 123 Opts.UAR |= ClEmitUAR; 124 return std::move(Opts); 125 } 126 127 class SanitizerBinaryMetadata { 128 public: 129 SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts, 130 std::unique_ptr<SpecialCaseList> Ignorelist) 131 : Mod(M), Options(transformOptionsFromCl(std::move(Opts))), 132 Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()), 133 VersionStr(utostr(getVersion())), IRB(M.getContext()) { 134 // FIXME: Make it work with other formats. 135 assert(TargetTriple.isOSBinFormatELF() && "ELF only"); 136 assert(!(TargetTriple.isNVPTX() || TargetTriple.isAMDGPU()) && 137 "Device targets are not supported"); 138 } 139 140 bool run(); 141 142 private: 143 uint32_t getVersion() const { 144 uint32_t Version = kVersionBase; 145 const auto CM = Mod.getCodeModel(); 146 if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large)) 147 Version |= kVersionPtrSizeRel; 148 return Version; 149 } 150 151 void runOn(Function &F, MetadataInfoSet &MIS); 152 153 // Determines which set of metadata to collect for this instruction. 154 // 155 // Returns true if covered metadata is required to unambiguously interpret 156 // other metadata. For example, if we are interested in atomics metadata, any 157 // function with memory operations (atomic or not) requires covered metadata 158 // to determine if a memory operation is atomic or not in modules compiled 159 // with SanitizerBinaryMetadata. 160 bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB, 161 uint64_t &FeatureMask); 162 163 // Get start/end section marker pointer. 164 GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty); 165 166 // Returns the target-dependent section name. 167 StringRef getSectionName(StringRef SectionSuffix); 168 169 // Returns the section start marker name. 170 StringRef getSectionStart(StringRef SectionSuffix); 171 172 // Returns the section end marker name. 173 StringRef getSectionEnd(StringRef SectionSuffix); 174 175 // Returns true if the access to the address should be considered "atomic". 176 bool pretendAtomicAccess(const Value *Addr); 177 178 Module &Mod; 179 const SanitizerBinaryMetadataOptions Options; 180 std::unique_ptr<SpecialCaseList> Ignorelist; 181 const Triple TargetTriple; 182 const std::string VersionStr; 183 IRBuilder<> IRB; 184 BumpPtrAllocator Alloc; 185 UniqueStringSaver StringPool{Alloc}; 186 }; 187 188 bool SanitizerBinaryMetadata::run() { 189 MetadataInfoSet MIS; 190 191 for (Function &F : Mod) 192 runOn(F, MIS); 193 194 if (MIS.empty()) 195 return false; 196 197 // 198 // Setup constructors and call all initialization functions for requested 199 // metadata features. 200 // 201 202 auto *PtrTy = IRB.getPtrTy(); 203 auto *Int32Ty = IRB.getInt32Ty(); 204 const std::array<Type *, 3> InitTypes = {Int32Ty, PtrTy, PtrTy}; 205 auto *Version = ConstantInt::get(Int32Ty, getVersion()); 206 207 for (const MetadataInfo *MI : MIS) { 208 const std::array<Value *, InitTypes.size()> InitArgs = { 209 Version, 210 getSectionMarker(getSectionStart(MI->SectionSuffix), PtrTy), 211 getSectionMarker(getSectionEnd(MI->SectionSuffix), PtrTy), 212 }; 213 214 // Calls to the initialization functions with different versions cannot be 215 // merged. Give the structors unique names based on the version, which will 216 // also be used as the COMDAT key. 217 const std::string StructorPrefix = (MI->FunctionPrefix + VersionStr).str(); 218 219 // We declare the _add and _del functions as weak, and only call them if 220 // there is a valid symbol linked. This allows building binaries with 221 // semantic metadata, but without having callbacks. When a tool that wants 222 // the metadata is linked which provides the callbacks, they will be called. 223 Function *Ctor = 224 createSanitizerCtorAndInitFunctions( 225 Mod, StructorPrefix + ".module_ctor", 226 (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs, 227 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) 228 .first; 229 Function *Dtor = 230 createSanitizerCtorAndInitFunctions( 231 Mod, StructorPrefix + ".module_dtor", 232 (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs, 233 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) 234 .first; 235 Constant *CtorComdatKey = nullptr; 236 Constant *DtorComdatKey = nullptr; 237 if (TargetTriple.supportsCOMDAT()) { 238 // Use COMDAT to deduplicate constructor/destructor function. The COMDAT 239 // key needs to be a non-local linkage. 240 Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName())); 241 Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName())); 242 Ctor->setLinkage(GlobalValue::ExternalLinkage); 243 Dtor->setLinkage(GlobalValue::ExternalLinkage); 244 // DSOs should _not_ call another constructor/destructor! 245 Ctor->setVisibility(GlobalValue::HiddenVisibility); 246 Dtor->setVisibility(GlobalValue::HiddenVisibility); 247 CtorComdatKey = Ctor; 248 DtorComdatKey = Dtor; 249 } 250 appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorComdatKey); 251 appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorComdatKey); 252 } 253 254 return true; 255 } 256 257 void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) { 258 if (F.empty()) 259 return; 260 // Do not apply any instrumentation for naked functions. 261 if (F.hasFnAttribute(Attribute::Naked)) 262 return; 263 if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) 264 return; 265 if (Ignorelist && Ignorelist->inSection("metadata", "fun", F.getName())) 266 return; 267 // Don't touch available_externally functions, their actual body is elsewhere. 268 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) 269 return; 270 271 MDBuilder MDB(F.getContext()); 272 273 // The metadata features enabled for this function, stored along covered 274 // metadata (if enabled). 275 uint64_t FeatureMask = 0; 276 // Don't emit unnecessary covered metadata for all functions to save space. 277 bool RequiresCovered = false; 278 279 if (Options.Atomics || Options.UAR) { 280 for (BasicBlock &BB : F) 281 for (Instruction &I : BB) 282 RequiresCovered |= runOn(I, MIS, MDB, FeatureMask); 283 } 284 285 if (ClNoSanitize && F.hasFnAttribute("no_sanitize_thread")) 286 FeatureMask &= ~kSanitizerBinaryMetadataAtomics; 287 if (F.isVarArg()) 288 FeatureMask &= ~kSanitizerBinaryMetadataUAR; 289 if (FeatureMask & kSanitizerBinaryMetadataUAR) { 290 RequiresCovered = true; 291 NumMetadataUAR++; 292 } 293 294 // Covered metadata is always emitted if explicitly requested, otherwise only 295 // if some other metadata requires it to unambiguously interpret it for 296 // modules compiled with SanitizerBinaryMetadata. 297 if (Options.Covered || (FeatureMask && RequiresCovered)) { 298 NumMetadataCovered++; 299 const auto *MI = &MetadataInfo::Covered; 300 MIS.insert(MI); 301 const StringRef Section = getSectionName(MI->SectionSuffix); 302 // The feature mask will be placed after the function size. 303 Constant *CFM = IRB.getInt64(FeatureMask); 304 F.setMetadata(LLVMContext::MD_pcsections, 305 MDB.createPCSections({{Section, {CFM}}})); 306 } 307 } 308 309 bool isUARSafeCall(CallInst *CI) { 310 auto *F = CI->getCalledFunction(); 311 // There are no intrinsic functions that leak arguments. 312 // If the called function does not return, the current function 313 // does not return as well, so no possibility of use-after-return. 314 // Sanitizer function also don't leak or don't return. 315 // It's safe to both pass pointers to local variables to them 316 // and to tail-call them. 317 return F && (F->isIntrinsic() || F->doesNotReturn() || 318 F->getName().starts_with("__asan_") || 319 F->getName().starts_with("__hwsan_") || 320 F->getName().starts_with("__ubsan_") || 321 F->getName().starts_with("__msan_") || 322 F->getName().starts_with("__tsan_")); 323 } 324 325 bool hasUseAfterReturnUnsafeUses(Value &V) { 326 for (User *U : V.users()) { 327 if (auto *I = dyn_cast<Instruction>(U)) { 328 if (I->isLifetimeStartOrEnd() || I->isDroppable()) 329 continue; 330 if (auto *CI = dyn_cast<CallInst>(U)) { 331 if (isUARSafeCall(CI)) 332 continue; 333 } 334 if (isa<LoadInst>(U)) 335 continue; 336 if (auto *SI = dyn_cast<StoreInst>(U)) { 337 // If storing TO the alloca, then the address isn't taken. 338 if (SI->getOperand(1) == &V) 339 continue; 340 } 341 if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) { 342 if (!hasUseAfterReturnUnsafeUses(*GEPI)) 343 continue; 344 } else if (auto *BCI = dyn_cast<BitCastInst>(U)) { 345 if (!hasUseAfterReturnUnsafeUses(*BCI)) 346 continue; 347 } 348 } 349 return true; 350 } 351 return false; 352 } 353 354 bool useAfterReturnUnsafe(Instruction &I) { 355 if (isa<AllocaInst>(I)) 356 return hasUseAfterReturnUnsafeUses(I); 357 // Tail-called functions are not necessary intercepted 358 // at runtime because there is no call instruction. 359 // So conservatively mark the caller as requiring checking. 360 else if (auto *CI = dyn_cast<CallInst>(&I)) 361 return CI->isTailCall() && !isUARSafeCall(CI); 362 return false; 363 } 364 365 bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) { 366 if (!Addr) 367 return false; 368 369 Addr = Addr->stripInBoundsOffsets(); 370 auto *GV = dyn_cast<GlobalVariable>(Addr); 371 if (!GV) 372 return false; 373 374 // Some compiler-generated accesses are known racy, to avoid false positives 375 // in data-race analysis pretend they're atomic. 376 if (GV->hasSection()) { 377 const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat(); 378 const auto ProfSec = 379 getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false); 380 if (GV->getSection().ends_with(ProfSec)) 381 return true; 382 } 383 if (GV->getName().starts_with("__llvm_gcov") || 384 GV->getName().starts_with("__llvm_gcda")) 385 return true; 386 387 return false; 388 } 389 390 // Returns true if the memory at `Addr` may be shared with other threads. 391 bool maybeSharedMutable(const Value *Addr) { 392 // By default assume memory may be shared. 393 if (!Addr) 394 return true; 395 396 if (isa<AllocaInst>(getUnderlyingObject(Addr)) && 397 !PointerMayBeCaptured(Addr, true, true)) 398 return false; // Object is on stack but does not escape. 399 400 Addr = Addr->stripInBoundsOffsets(); 401 if (auto *GV = dyn_cast<GlobalVariable>(Addr)) { 402 if (GV->isConstant()) 403 return false; // Shared, but not mutable. 404 } 405 406 return true; 407 } 408 409 bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS, 410 MDBuilder &MDB, uint64_t &FeatureMask) { 411 SmallVector<const MetadataInfo *, 1> InstMetadata; 412 bool RequiresCovered = false; 413 414 // Only call if at least 1 type of metadata is requested. 415 assert(Options.UAR || Options.Atomics); 416 417 if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) { 418 if (useAfterReturnUnsafe(I)) 419 FeatureMask |= kSanitizerBinaryMetadataUAR; 420 } 421 422 if (Options.Atomics) { 423 const Value *Addr = nullptr; 424 if (auto *SI = dyn_cast<StoreInst>(&I)) 425 Addr = SI->getPointerOperand(); 426 else if (auto *LI = dyn_cast<LoadInst>(&I)) 427 Addr = LI->getPointerOperand(); 428 429 if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) { 430 auto SSID = getAtomicSyncScopeID(&I); 431 if ((SSID.has_value() && *SSID != SyncScope::SingleThread) || 432 pretendAtomicAccess(Addr)) { 433 NumMetadataAtomics++; 434 InstMetadata.push_back(&MetadataInfo::Atomics); 435 } 436 FeatureMask |= kSanitizerBinaryMetadataAtomics; 437 RequiresCovered = true; 438 } 439 } 440 441 // Attach MD_pcsections to instruction. 442 if (!InstMetadata.empty()) { 443 MIS.insert(InstMetadata.begin(), InstMetadata.end()); 444 SmallVector<MDBuilder::PCSection, 1> Sections; 445 for (const auto &MI : InstMetadata) 446 Sections.push_back({getSectionName(MI->SectionSuffix), {}}); 447 I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections)); 448 } 449 450 return RequiresCovered; 451 } 452 453 GlobalVariable * 454 SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) { 455 // Use ExternalWeak so that if all sections are discarded due to section 456 // garbage collection, the linker will not report undefined symbol errors. 457 auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false, 458 GlobalVariable::ExternalWeakLinkage, 459 /*Initializer=*/nullptr, MarkerName); 460 Marker->setVisibility(GlobalValue::HiddenVisibility); 461 return Marker; 462 } 463 464 StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) { 465 // FIXME: Other TargetTriples. 466 // Request ULEB128 encoding for all integer constants. 467 return StringPool.save(SectionSuffix + VersionStr + "!C"); 468 } 469 470 StringRef SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) { 471 // Twine only concatenates 2 strings; with >2 strings, concatenating them 472 // creates Twine temporaries, and returning the final Twine no longer works 473 // because we'd end up with a stack-use-after-return. So here we also use the 474 // StringPool to store the new string. 475 return StringPool.save("__start_" + SectionSuffix + VersionStr); 476 } 477 478 StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) { 479 return StringPool.save("__stop_" + SectionSuffix + VersionStr); 480 } 481 482 } // namespace 483 484 SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass( 485 SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles) 486 : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {} 487 488 PreservedAnalyses 489 SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) { 490 std::unique_ptr<SpecialCaseList> Ignorelist; 491 if (!IgnorelistFiles.empty()) { 492 Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles, 493 *vfs::getRealFileSystem()); 494 if (Ignorelist->inSection("metadata", "src", M.getSourceFileName())) 495 return PreservedAnalyses::all(); 496 } 497 498 SanitizerBinaryMetadata Pass(M, Options, std::move(Ignorelist)); 499 if (Pass.run()) 500 return PreservedAnalyses::none(); 501 return PreservedAnalyses::all(); 502 } 503