197c22205SMarco Elver //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===// 297c22205SMarco Elver // 397c22205SMarco Elver // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 497c22205SMarco Elver // See https://llvm.org/LICENSE.txt for license information. 597c22205SMarco Elver // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 697c22205SMarco Elver // 797c22205SMarco Elver //===----------------------------------------------------------------------===// 897c22205SMarco Elver // 997c22205SMarco Elver // This file is a part of SanitizerBinaryMetadata. 1097c22205SMarco Elver // 1197c22205SMarco Elver //===----------------------------------------------------------------------===// 1297c22205SMarco Elver 1397c22205SMarco Elver #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" 1497c22205SMarco Elver #include "llvm/ADT/SetVector.h" 1597c22205SMarco Elver #include "llvm/ADT/SmallVector.h" 1697c22205SMarco Elver #include "llvm/ADT/Statistic.h" 17f5b9e11eSMarco Elver #include "llvm/ADT/StringExtras.h" 1897c22205SMarco Elver #include "llvm/ADT/StringRef.h" 1997c22205SMarco Elver #include "llvm/ADT/Twine.h" 20960b4c3bSMarco Elver #include "llvm/Analysis/CaptureTracking.h" 21960b4c3bSMarco Elver #include "llvm/Analysis/ValueTracking.h" 2297c22205SMarco Elver #include "llvm/IR/Constant.h" 2397c22205SMarco Elver #include "llvm/IR/DerivedTypes.h" 2497c22205SMarco Elver #include "llvm/IR/Function.h" 2597c22205SMarco Elver #include "llvm/IR/GlobalValue.h" 2697c22205SMarco Elver #include "llvm/IR/GlobalVariable.h" 2797c22205SMarco Elver #include "llvm/IR/IRBuilder.h" 2897c22205SMarco Elver #include "llvm/IR/Instruction.h" 2997c22205SMarco Elver #include "llvm/IR/Instructions.h" 3097c22205SMarco Elver #include "llvm/IR/LLVMContext.h" 3197c22205SMarco Elver #include "llvm/IR/MDBuilder.h" 3297c22205SMarco Elver #include "llvm/IR/Metadata.h" 3397c22205SMarco Elver #include "llvm/IR/Module.h" 3497c22205SMarco Elver #include "llvm/IR/Type.h" 3597c22205SMarco Elver #include "llvm/IR/Value.h" 36764c88a5SMarco Elver #include "llvm/ProfileData/InstrProf.h" 37bf9814b7SMarco Elver #include "llvm/Support/Allocator.h" 3897c22205SMarco Elver #include "llvm/Support/CommandLine.h" 39421215b9SMarco Elver #include "llvm/Support/SpecialCaseList.h" 40bf9814b7SMarco Elver #include "llvm/Support/StringSaver.h" 41421215b9SMarco Elver #include "llvm/Support/VirtualFileSystem.h" 4262c7f035SArchibald Elliott #include "llvm/TargetParser/Triple.h" 4397c22205SMarco Elver #include "llvm/Transforms/Utils/ModuleUtils.h" 4497c22205SMarco Elver 4597c22205SMarco Elver #include <array> 4697c22205SMarco Elver #include <cstdint> 47421215b9SMarco Elver #include <memory> 4897c22205SMarco Elver 4997c22205SMarco Elver using namespace llvm; 5097c22205SMarco Elver 5197c22205SMarco Elver #define DEBUG_TYPE "sanmd" 5297c22205SMarco Elver 5397c22205SMarco Elver namespace { 5497c22205SMarco Elver 5597c22205SMarco Elver //===--- Constants --------------------------------------------------------===// 5697c22205SMarco Elver 573d53b527SMarco Elver constexpr uint32_t kVersionBase = 2; // occupies lower 16 bits 5897c22205SMarco Elver constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized 5997c22205SMarco Elver constexpr int kCtorDtorPriority = 2; 6097c22205SMarco Elver 6197c22205SMarco Elver // Pairs of names of initialization callback functions and which section 6297c22205SMarco Elver // contains the relevant metadata. 6397c22205SMarco Elver class MetadataInfo { 6497c22205SMarco Elver public: 6597c22205SMarco Elver const StringRef FunctionPrefix; 6697c22205SMarco Elver const StringRef SectionSuffix; 6797c22205SMarco Elver 6897c22205SMarco Elver static const MetadataInfo Covered; 6997c22205SMarco Elver static const MetadataInfo Atomics; 7097c22205SMarco Elver 7197c22205SMarco Elver private: 7297c22205SMarco Elver // Forbid construction elsewhere. 7397c22205SMarco Elver explicit constexpr MetadataInfo(StringRef FunctionPrefix, 74960b4c3bSMarco Elver StringRef SectionSuffix) 75960b4c3bSMarco Elver : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {} 7697c22205SMarco Elver }; 77960b4c3bSMarco Elver const MetadataInfo MetadataInfo::Covered{ 78960b4c3bSMarco Elver "__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection}; 79960b4c3bSMarco Elver const MetadataInfo MetadataInfo::Atomics{ 80960b4c3bSMarco Elver "__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection}; 8197c22205SMarco Elver 8297c22205SMarco Elver // The only instances of MetadataInfo are the constants above, so a set of 8397c22205SMarco Elver // them may simply store pointers to them. To deterministically generate code, 8497c22205SMarco Elver // we need to use a set with stable iteration order, such as SetVector. 8597c22205SMarco Elver using MetadataInfoSet = SetVector<const MetadataInfo *>; 8697c22205SMarco Elver 8797c22205SMarco Elver //===--- Command-line options ---------------------------------------------===// 8897c22205SMarco Elver 895265adc7SMarco Elver cl::opt<bool> ClWeakCallbacks( 905265adc7SMarco Elver "sanitizer-metadata-weak-callbacks", 915265adc7SMarco Elver cl::desc("Declare callbacks extern weak, and only call if non-null."), 925265adc7SMarco Elver cl::Hidden, cl::init(true)); 935f605e25SMarco Elver cl::opt<bool> 945f605e25SMarco Elver ClNoSanitize("sanitizer-metadata-nosanitize-attr", 955f605e25SMarco Elver cl::desc("Mark some metadata features uncovered in functions " 965f605e25SMarco Elver "with associated no_sanitize attributes."), 975f605e25SMarco Elver cl::Hidden, cl::init(true)); 985265adc7SMarco Elver 9997c22205SMarco Elver cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered", 10097c22205SMarco Elver cl::desc("Emit PCs for covered functions."), 10197c22205SMarco Elver cl::Hidden, cl::init(false)); 10297c22205SMarco Elver cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics", 10397c22205SMarco Elver cl::desc("Emit PCs for atomic operations."), 10497c22205SMarco Elver cl::Hidden, cl::init(false)); 105dbe8c2c3SDmitry Vyukov cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar", 106dbe8c2c3SDmitry Vyukov cl::desc("Emit PCs for start of functions that are " 107dbe8c2c3SDmitry Vyukov "subject for use-after-return checking"), 108dbe8c2c3SDmitry Vyukov cl::Hidden, cl::init(false)); 10997c22205SMarco Elver 11097c22205SMarco Elver //===--- Statistics -------------------------------------------------------===// 11197c22205SMarco Elver 11297c22205SMarco Elver STATISTIC(NumMetadataCovered, "Metadata attached to covered functions"); 11397c22205SMarco Elver STATISTIC(NumMetadataAtomics, "Metadata attached to atomics"); 114dbe8c2c3SDmitry Vyukov STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions"); 11597c22205SMarco Elver 11697c22205SMarco Elver //===----------------------------------------------------------------------===// 11797c22205SMarco Elver 11897c22205SMarco Elver // Apply opt overrides. 11997c22205SMarco Elver SanitizerBinaryMetadataOptions && 12097c22205SMarco Elver transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) { 12197c22205SMarco Elver Opts.Covered |= ClEmitCovered; 12297c22205SMarco Elver Opts.Atomics |= ClEmitAtomics; 123dbe8c2c3SDmitry Vyukov Opts.UAR |= ClEmitUAR; 12497c22205SMarco Elver return std::move(Opts); 12597c22205SMarco Elver } 12697c22205SMarco Elver 12797c22205SMarco Elver class SanitizerBinaryMetadata { 12897c22205SMarco Elver public: 129421215b9SMarco Elver SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts, 130421215b9SMarco Elver std::unique_ptr<SpecialCaseList> Ignorelist) 13197c22205SMarco Elver : Mod(M), Options(transformOptionsFromCl(std::move(Opts))), 132421215b9SMarco Elver Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()), 133f5b9e11eSMarco Elver VersionStr(utostr(getVersion())), IRB(M.getContext()) { 13497c22205SMarco Elver // FIXME: Make it work with other formats. 13597c22205SMarco Elver assert(TargetTriple.isOSBinFormatELF() && "ELF only"); 13661ed6495SMarco Elver assert(!(TargetTriple.isNVPTX() || TargetTriple.isAMDGPU()) && 13761ed6495SMarco Elver "Device targets are not supported"); 13897c22205SMarco Elver } 13997c22205SMarco Elver 14097c22205SMarco Elver bool run(); 14197c22205SMarco Elver 14297c22205SMarco Elver private: 14397c22205SMarco Elver uint32_t getVersion() const { 14497c22205SMarco Elver uint32_t Version = kVersionBase; 14597c22205SMarco Elver const auto CM = Mod.getCodeModel(); 14697c22205SMarco Elver if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large)) 14797c22205SMarco Elver Version |= kVersionPtrSizeRel; 14897c22205SMarco Elver return Version; 14997c22205SMarco Elver } 15097c22205SMarco Elver 15197c22205SMarco Elver void runOn(Function &F, MetadataInfoSet &MIS); 15297c22205SMarco Elver 15397c22205SMarco Elver // Determines which set of metadata to collect for this instruction. 15497c22205SMarco Elver // 15597c22205SMarco Elver // Returns true if covered metadata is required to unambiguously interpret 15697c22205SMarco Elver // other metadata. For example, if we are interested in atomics metadata, any 15797c22205SMarco Elver // function with memory operations (atomic or not) requires covered metadata 15897c22205SMarco Elver // to determine if a memory operation is atomic or not in modules compiled 15997c22205SMarco Elver // with SanitizerBinaryMetadata. 160dbe8c2c3SDmitry Vyukov bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB, 161bf9814b7SMarco Elver uint64_t &FeatureMask); 16297c22205SMarco Elver 16397c22205SMarco Elver // Get start/end section marker pointer. 16497c22205SMarco Elver GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty); 16597c22205SMarco Elver 16697c22205SMarco Elver // Returns the target-dependent section name. 16797c22205SMarco Elver StringRef getSectionName(StringRef SectionSuffix); 16897c22205SMarco Elver 16997c22205SMarco Elver // Returns the section start marker name. 170f5b9e11eSMarco Elver StringRef getSectionStart(StringRef SectionSuffix); 17197c22205SMarco Elver 17297c22205SMarco Elver // Returns the section end marker name. 173f5b9e11eSMarco Elver StringRef getSectionEnd(StringRef SectionSuffix); 17497c22205SMarco Elver 175764c88a5SMarco Elver // Returns true if the access to the address should be considered "atomic". 176960b4c3bSMarco Elver bool pretendAtomicAccess(const Value *Addr); 177764c88a5SMarco Elver 17897c22205SMarco Elver Module &Mod; 17997c22205SMarco Elver const SanitizerBinaryMetadataOptions Options; 180421215b9SMarco Elver std::unique_ptr<SpecialCaseList> Ignorelist; 18197c22205SMarco Elver const Triple TargetTriple; 182f5b9e11eSMarco Elver const std::string VersionStr; 18397c22205SMarco Elver IRBuilder<> IRB; 184bf9814b7SMarco Elver BumpPtrAllocator Alloc; 185bf9814b7SMarco Elver UniqueStringSaver StringPool{Alloc}; 18697c22205SMarco Elver }; 18797c22205SMarco Elver 18897c22205SMarco Elver bool SanitizerBinaryMetadata::run() { 18997c22205SMarco Elver MetadataInfoSet MIS; 19097c22205SMarco Elver 19197c22205SMarco Elver for (Function &F : Mod) 19297c22205SMarco Elver runOn(F, MIS); 19397c22205SMarco Elver 19497c22205SMarco Elver if (MIS.empty()) 19597c22205SMarco Elver return false; 19697c22205SMarco Elver 19797c22205SMarco Elver // 19897c22205SMarco Elver // Setup constructors and call all initialization functions for requested 19997c22205SMarco Elver // metadata features. 20097c22205SMarco Elver // 20197c22205SMarco Elver 202107185faSFangrui Song auto *PtrTy = IRB.getPtrTy(); 20397c22205SMarco Elver auto *Int32Ty = IRB.getInt32Ty(); 204107185faSFangrui Song const std::array<Type *, 3> InitTypes = {Int32Ty, PtrTy, PtrTy}; 20597c22205SMarco Elver auto *Version = ConstantInt::get(Int32Ty, getVersion()); 20697c22205SMarco Elver 20797c22205SMarco Elver for (const MetadataInfo *MI : MIS) { 20897c22205SMarco Elver const std::array<Value *, InitTypes.size()> InitArgs = { 20997c22205SMarco Elver Version, 210107185faSFangrui Song getSectionMarker(getSectionStart(MI->SectionSuffix), PtrTy), 211107185faSFangrui Song getSectionMarker(getSectionEnd(MI->SectionSuffix), PtrTy), 21297c22205SMarco Elver }; 213f5b9e11eSMarco Elver 214f5b9e11eSMarco Elver // Calls to the initialization functions with different versions cannot be 215f5b9e11eSMarco Elver // merged. Give the structors unique names based on the version, which will 216f5b9e11eSMarco Elver // also be used as the COMDAT key. 217f5b9e11eSMarco Elver const std::string StructorPrefix = (MI->FunctionPrefix + VersionStr).str(); 218f5b9e11eSMarco Elver 2195265adc7SMarco Elver // We declare the _add and _del functions as weak, and only call them if 2205265adc7SMarco Elver // there is a valid symbol linked. This allows building binaries with 2215265adc7SMarco Elver // semantic metadata, but without having callbacks. When a tool that wants 2225265adc7SMarco Elver // the metadata is linked which provides the callbacks, they will be called. 22397c22205SMarco Elver Function *Ctor = 22497c22205SMarco Elver createSanitizerCtorAndInitFunctions( 225f5b9e11eSMarco Elver Mod, StructorPrefix + ".module_ctor", 2265265adc7SMarco Elver (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs, 2275265adc7SMarco Elver /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) 22897c22205SMarco Elver .first; 22997c22205SMarco Elver Function *Dtor = 23097c22205SMarco Elver createSanitizerCtorAndInitFunctions( 231f5b9e11eSMarco Elver Mod, StructorPrefix + ".module_dtor", 2325265adc7SMarco Elver (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs, 2335265adc7SMarco Elver /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) 23497c22205SMarco Elver .first; 2358c469d16SMarco Elver Constant *CtorComdatKey = nullptr; 2368c469d16SMarco Elver Constant *DtorComdatKey = nullptr; 23797c22205SMarco Elver if (TargetTriple.supportsCOMDAT()) { 2386ce8e716SFangrui Song // Use COMDAT to deduplicate constructor/destructor function. The COMDAT 2396ce8e716SFangrui Song // key needs to be a non-local linkage. 24097c22205SMarco Elver Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName())); 24197c22205SMarco Elver Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName())); 2426ce8e716SFangrui Song Ctor->setLinkage(GlobalValue::ExternalLinkage); 2436ce8e716SFangrui Song Dtor->setLinkage(GlobalValue::ExternalLinkage); 2448c469d16SMarco Elver // DSOs should _not_ call another constructor/destructor! 2458c469d16SMarco Elver Ctor->setVisibility(GlobalValue::HiddenVisibility); 2468c469d16SMarco Elver Dtor->setVisibility(GlobalValue::HiddenVisibility); 2478c469d16SMarco Elver CtorComdatKey = Ctor; 2488c469d16SMarco Elver DtorComdatKey = Dtor; 24997c22205SMarco Elver } 2508c469d16SMarco Elver appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorComdatKey); 2518c469d16SMarco Elver appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorComdatKey); 25297c22205SMarco Elver } 25397c22205SMarco Elver 25497c22205SMarco Elver return true; 25597c22205SMarco Elver } 25697c22205SMarco Elver 25797c22205SMarco Elver void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) { 25897c22205SMarco Elver if (F.empty()) 25997c22205SMarco Elver return; 260*942e872dSAntonio Frighetto // Do not apply any instrumentation for naked functions. 261*942e872dSAntonio Frighetto if (F.hasFnAttribute(Attribute::Naked)) 262*942e872dSAntonio Frighetto return; 26397c22205SMarco Elver if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) 26497c22205SMarco Elver return; 265421215b9SMarco Elver if (Ignorelist && Ignorelist->inSection("metadata", "fun", F.getName())) 266421215b9SMarco Elver return; 26797c22205SMarco Elver // Don't touch available_externally functions, their actual body is elsewhere. 26897c22205SMarco Elver if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) 26997c22205SMarco Elver return; 27097c22205SMarco Elver 27197c22205SMarco Elver MDBuilder MDB(F.getContext()); 27297c22205SMarco Elver 27397c22205SMarco Elver // The metadata features enabled for this function, stored along covered 27497c22205SMarco Elver // metadata (if enabled). 275bf9814b7SMarco Elver uint64_t FeatureMask = 0; 27697c22205SMarco Elver // Don't emit unnecessary covered metadata for all functions to save space. 27797c22205SMarco Elver bool RequiresCovered = false; 278960b4c3bSMarco Elver 279960b4c3bSMarco Elver if (Options.Atomics || Options.UAR) { 28097c22205SMarco Elver for (BasicBlock &BB : F) 28197c22205SMarco Elver for (Instruction &I : BB) 282dbe8c2c3SDmitry Vyukov RequiresCovered |= runOn(I, MIS, MDB, FeatureMask); 28397c22205SMarco Elver } 28497c22205SMarco Elver 2855f605e25SMarco Elver if (ClNoSanitize && F.hasFnAttribute("no_sanitize_thread")) 2865f605e25SMarco Elver FeatureMask &= ~kSanitizerBinaryMetadataAtomics; 287dbe8c2c3SDmitry Vyukov if (F.isVarArg()) 288dbe8c2c3SDmitry Vyukov FeatureMask &= ~kSanitizerBinaryMetadataUAR; 2895addb736SDmitry Vyukov if (FeatureMask & kSanitizerBinaryMetadataUAR) { 2905addb736SDmitry Vyukov RequiresCovered = true; 291dbe8c2c3SDmitry Vyukov NumMetadataUAR++; 2925addb736SDmitry Vyukov } 293dbe8c2c3SDmitry Vyukov 29497c22205SMarco Elver // Covered metadata is always emitted if explicitly requested, otherwise only 29597c22205SMarco Elver // if some other metadata requires it to unambiguously interpret it for 29697c22205SMarco Elver // modules compiled with SanitizerBinaryMetadata. 297dbe8c2c3SDmitry Vyukov if (Options.Covered || (FeatureMask && RequiresCovered)) { 29897c22205SMarco Elver NumMetadataCovered++; 29997c22205SMarco Elver const auto *MI = &MetadataInfo::Covered; 30097c22205SMarco Elver MIS.insert(MI); 30197c22205SMarco Elver const StringRef Section = getSectionName(MI->SectionSuffix); 302bf9814b7SMarco Elver // The feature mask will be placed after the function size. 303bf9814b7SMarco Elver Constant *CFM = IRB.getInt64(FeatureMask); 30497c22205SMarco Elver F.setMetadata(LLVMContext::MD_pcsections, 30597c22205SMarco Elver MDB.createPCSections({{Section, {CFM}}})); 30697c22205SMarco Elver } 30797c22205SMarco Elver } 30897c22205SMarco Elver 309f7f01599SDmitry Vyukov bool isUARSafeCall(CallInst *CI) { 310f7f01599SDmitry Vyukov auto *F = CI->getCalledFunction(); 311f7f01599SDmitry Vyukov // There are no intrinsic functions that leak arguments. 312f7f01599SDmitry Vyukov // If the called function does not return, the current function 313f7f01599SDmitry Vyukov // does not return as well, so no possibility of use-after-return. 314f7f01599SDmitry Vyukov // Sanitizer function also don't leak or don't return. 315f7f01599SDmitry Vyukov // It's safe to both pass pointers to local variables to them 316f7f01599SDmitry Vyukov // and to tail-call them. 317f7f01599SDmitry Vyukov return F && (F->isIntrinsic() || F->doesNotReturn() || 3183ca4fe80SSimon Pilgrim F->getName().starts_with("__asan_") || 3193ca4fe80SSimon Pilgrim F->getName().starts_with("__hwsan_") || 3203ca4fe80SSimon Pilgrim F->getName().starts_with("__ubsan_") || 3213ca4fe80SSimon Pilgrim F->getName().starts_with("__msan_") || 3223ca4fe80SSimon Pilgrim F->getName().starts_with("__tsan_")); 323f7f01599SDmitry Vyukov } 324f7f01599SDmitry Vyukov 3255addb736SDmitry Vyukov bool hasUseAfterReturnUnsafeUses(Value &V) { 3265addb736SDmitry Vyukov for (User *U : V.users()) { 3275addb736SDmitry Vyukov if (auto *I = dyn_cast<Instruction>(U)) { 3285addb736SDmitry Vyukov if (I->isLifetimeStartOrEnd() || I->isDroppable()) 3295addb736SDmitry Vyukov continue; 330f7f01599SDmitry Vyukov if (auto *CI = dyn_cast<CallInst>(U)) { 331f7f01599SDmitry Vyukov if (isUARSafeCall(CI)) 332f7f01599SDmitry Vyukov continue; 333f7f01599SDmitry Vyukov } 3345addb736SDmitry Vyukov if (isa<LoadInst>(U)) 3355addb736SDmitry Vyukov continue; 3365addb736SDmitry Vyukov if (auto *SI = dyn_cast<StoreInst>(U)) { 3375addb736SDmitry Vyukov // If storing TO the alloca, then the address isn't taken. 3385addb736SDmitry Vyukov if (SI->getOperand(1) == &V) 3395addb736SDmitry Vyukov continue; 3405addb736SDmitry Vyukov } 3415addb736SDmitry Vyukov if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) { 3425addb736SDmitry Vyukov if (!hasUseAfterReturnUnsafeUses(*GEPI)) 3435addb736SDmitry Vyukov continue; 3445addb736SDmitry Vyukov } else if (auto *BCI = dyn_cast<BitCastInst>(U)) { 3455addb736SDmitry Vyukov if (!hasUseAfterReturnUnsafeUses(*BCI)) 3465addb736SDmitry Vyukov continue; 3475addb736SDmitry Vyukov } 3485addb736SDmitry Vyukov } 3495addb736SDmitry Vyukov return true; 3505addb736SDmitry Vyukov } 3515addb736SDmitry Vyukov return false; 3525addb736SDmitry Vyukov } 3535addb736SDmitry Vyukov 3545addb736SDmitry Vyukov bool useAfterReturnUnsafe(Instruction &I) { 3555addb736SDmitry Vyukov if (isa<AllocaInst>(I)) 3565addb736SDmitry Vyukov return hasUseAfterReturnUnsafeUses(I); 3575addb736SDmitry Vyukov // Tail-called functions are not necessary intercepted 3585addb736SDmitry Vyukov // at runtime because there is no call instruction. 3595addb736SDmitry Vyukov // So conservatively mark the caller as requiring checking. 3605addb736SDmitry Vyukov else if (auto *CI = dyn_cast<CallInst>(&I)) 361f7f01599SDmitry Vyukov return CI->isTailCall() && !isUARSafeCall(CI); 3625addb736SDmitry Vyukov return false; 3635addb736SDmitry Vyukov } 3645addb736SDmitry Vyukov 365960b4c3bSMarco Elver bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) { 366960b4c3bSMarco Elver if (!Addr) 367960b4c3bSMarco Elver return false; 368764c88a5SMarco Elver 369764c88a5SMarco Elver Addr = Addr->stripInBoundsOffsets(); 370764c88a5SMarco Elver auto *GV = dyn_cast<GlobalVariable>(Addr); 371764c88a5SMarco Elver if (!GV) 372764c88a5SMarco Elver return false; 373764c88a5SMarco Elver 374960b4c3bSMarco Elver // Some compiler-generated accesses are known racy, to avoid false positives 375960b4c3bSMarco Elver // in data-race analysis pretend they're atomic. 376764c88a5SMarco Elver if (GV->hasSection()) { 377764c88a5SMarco Elver const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat(); 378764c88a5SMarco Elver const auto ProfSec = 379764c88a5SMarco Elver getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false); 3803ca4fe80SSimon Pilgrim if (GV->getSection().ends_with(ProfSec)) 381764c88a5SMarco Elver return true; 382764c88a5SMarco Elver } 3833ca4fe80SSimon Pilgrim if (GV->getName().starts_with("__llvm_gcov") || 3843ca4fe80SSimon Pilgrim GV->getName().starts_with("__llvm_gcda")) 385764c88a5SMarco Elver return true; 386764c88a5SMarco Elver 387764c88a5SMarco Elver return false; 388764c88a5SMarco Elver } 389764c88a5SMarco Elver 390960b4c3bSMarco Elver // Returns true if the memory at `Addr` may be shared with other threads. 391960b4c3bSMarco Elver bool maybeSharedMutable(const Value *Addr) { 392960b4c3bSMarco Elver // By default assume memory may be shared. 393960b4c3bSMarco Elver if (!Addr) 394960b4c3bSMarco Elver return true; 395960b4c3bSMarco Elver 396960b4c3bSMarco Elver if (isa<AllocaInst>(getUnderlyingObject(Addr)) && 397960b4c3bSMarco Elver !PointerMayBeCaptured(Addr, true, true)) 398960b4c3bSMarco Elver return false; // Object is on stack but does not escape. 399960b4c3bSMarco Elver 400960b4c3bSMarco Elver Addr = Addr->stripInBoundsOffsets(); 401960b4c3bSMarco Elver if (auto *GV = dyn_cast<GlobalVariable>(Addr)) { 402960b4c3bSMarco Elver if (GV->isConstant()) 403960b4c3bSMarco Elver return false; // Shared, but not mutable. 404960b4c3bSMarco Elver } 405960b4c3bSMarco Elver 406960b4c3bSMarco Elver return true; 407960b4c3bSMarco Elver } 408960b4c3bSMarco Elver 40997c22205SMarco Elver bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS, 410bf9814b7SMarco Elver MDBuilder &MDB, uint64_t &FeatureMask) { 41197c22205SMarco Elver SmallVector<const MetadataInfo *, 1> InstMetadata; 41297c22205SMarco Elver bool RequiresCovered = false; 41397c22205SMarco Elver 414960b4c3bSMarco Elver // Only call if at least 1 type of metadata is requested. 415960b4c3bSMarco Elver assert(Options.UAR || Options.Atomics); 416960b4c3bSMarco Elver 4175addb736SDmitry Vyukov if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) { 4185addb736SDmitry Vyukov if (useAfterReturnUnsafe(I)) 419dbe8c2c3SDmitry Vyukov FeatureMask |= kSanitizerBinaryMetadataUAR; 420dbe8c2c3SDmitry Vyukov } 421dbe8c2c3SDmitry Vyukov 422960b4c3bSMarco Elver if (Options.Atomics) { 423960b4c3bSMarco Elver const Value *Addr = nullptr; 424764c88a5SMarco Elver if (auto *SI = dyn_cast<StoreInst>(&I)) 425764c88a5SMarco Elver Addr = SI->getPointerOperand(); 426764c88a5SMarco Elver else if (auto *LI = dyn_cast<LoadInst>(&I)) 427764c88a5SMarco Elver Addr = LI->getPointerOperand(); 428764c88a5SMarco Elver 429960b4c3bSMarco Elver if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) { 430960b4c3bSMarco Elver auto SSID = getAtomicSyncScopeID(&I); 431960b4c3bSMarco Elver if ((SSID.has_value() && *SSID != SyncScope::SingleThread) || 432960b4c3bSMarco Elver pretendAtomicAccess(Addr)) { 43397c22205SMarco Elver NumMetadataAtomics++; 43497c22205SMarco Elver InstMetadata.push_back(&MetadataInfo::Atomics); 43597c22205SMarco Elver } 436960b4c3bSMarco Elver FeatureMask |= kSanitizerBinaryMetadataAtomics; 43797c22205SMarco Elver RequiresCovered = true; 43897c22205SMarco Elver } 439960b4c3bSMarco Elver } 44097c22205SMarco Elver 44197c22205SMarco Elver // Attach MD_pcsections to instruction. 44297c22205SMarco Elver if (!InstMetadata.empty()) { 44397c22205SMarco Elver MIS.insert(InstMetadata.begin(), InstMetadata.end()); 44497c22205SMarco Elver SmallVector<MDBuilder::PCSection, 1> Sections; 44597c22205SMarco Elver for (const auto &MI : InstMetadata) 44697c22205SMarco Elver Sections.push_back({getSectionName(MI->SectionSuffix), {}}); 44797c22205SMarco Elver I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections)); 44897c22205SMarco Elver } 44997c22205SMarco Elver 45097c22205SMarco Elver return RequiresCovered; 45197c22205SMarco Elver } 45297c22205SMarco Elver 45397c22205SMarco Elver GlobalVariable * 45497c22205SMarco Elver SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) { 455eecb22d8SFangrui Song // Use ExternalWeak so that if all sections are discarded due to section 456eecb22d8SFangrui Song // garbage collection, the linker will not report undefined symbol errors. 45797c22205SMarco Elver auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false, 458eecb22d8SFangrui Song GlobalVariable::ExternalWeakLinkage, 45997c22205SMarco Elver /*Initializer=*/nullptr, MarkerName); 46097c22205SMarco Elver Marker->setVisibility(GlobalValue::HiddenVisibility); 46197c22205SMarco Elver return Marker; 46297c22205SMarco Elver } 46397c22205SMarco Elver 46497c22205SMarco Elver StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) { 465bf9814b7SMarco Elver // FIXME: Other TargetTriples. 466bf9814b7SMarco Elver // Request ULEB128 encoding for all integer constants. 467f5b9e11eSMarco Elver return StringPool.save(SectionSuffix + VersionStr + "!C"); 46897c22205SMarco Elver } 46997c22205SMarco Elver 470f5b9e11eSMarco Elver StringRef SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) { 471f5b9e11eSMarco Elver // Twine only concatenates 2 strings; with >2 strings, concatenating them 472f5b9e11eSMarco Elver // creates Twine temporaries, and returning the final Twine no longer works 473f5b9e11eSMarco Elver // because we'd end up with a stack-use-after-return. So here we also use the 474f5b9e11eSMarco Elver // StringPool to store the new string. 475f5b9e11eSMarco Elver return StringPool.save("__start_" + SectionSuffix + VersionStr); 47697c22205SMarco Elver } 47797c22205SMarco Elver 478f5b9e11eSMarco Elver StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) { 479f5b9e11eSMarco Elver return StringPool.save("__stop_" + SectionSuffix + VersionStr); 48097c22205SMarco Elver } 48197c22205SMarco Elver 48297c22205SMarco Elver } // namespace 48397c22205SMarco Elver 48497c22205SMarco Elver SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass( 485421215b9SMarco Elver SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles) 486421215b9SMarco Elver : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {} 48797c22205SMarco Elver 48897c22205SMarco Elver PreservedAnalyses 48997c22205SMarco Elver SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) { 490421215b9SMarco Elver std::unique_ptr<SpecialCaseList> Ignorelist; 491421215b9SMarco Elver if (!IgnorelistFiles.empty()) { 492421215b9SMarco Elver Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles, 493421215b9SMarco Elver *vfs::getRealFileSystem()); 494421215b9SMarco Elver if (Ignorelist->inSection("metadata", "src", M.getSourceFileName())) 495421215b9SMarco Elver return PreservedAnalyses::all(); 496421215b9SMarco Elver } 497421215b9SMarco Elver 498421215b9SMarco Elver SanitizerBinaryMetadata Pass(M, Options, std::move(Ignorelist)); 49997c22205SMarco Elver if (Pass.run()) 50097c22205SMarco Elver return PreservedAnalyses::none(); 50197c22205SMarco Elver return PreservedAnalyses::all(); 50297c22205SMarco Elver } 503