1bdd1243dSDimitry Andric //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===// 2bdd1243dSDimitry Andric // 3bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6bdd1243dSDimitry Andric // 7bdd1243dSDimitry Andric //===----------------------------------------------------------------------===// 8bdd1243dSDimitry Andric // 9bdd1243dSDimitry Andric // This file is a part of SanitizerBinaryMetadata. 10bdd1243dSDimitry Andric // 11bdd1243dSDimitry Andric //===----------------------------------------------------------------------===// 12bdd1243dSDimitry Andric 13bdd1243dSDimitry Andric #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" 14bdd1243dSDimitry Andric #include "llvm/ADT/SetVector.h" 15bdd1243dSDimitry Andric #include "llvm/ADT/SmallVector.h" 16bdd1243dSDimitry Andric #include "llvm/ADT/Statistic.h" 17*0fca6ea1SDimitry Andric #include "llvm/ADT/StringExtras.h" 18bdd1243dSDimitry Andric #include "llvm/ADT/StringRef.h" 19bdd1243dSDimitry Andric #include "llvm/ADT/Twine.h" 2006c3fb27SDimitry Andric #include "llvm/Analysis/CaptureTracking.h" 2106c3fb27SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 22bdd1243dSDimitry Andric #include "llvm/IR/Constant.h" 23bdd1243dSDimitry Andric #include "llvm/IR/DerivedTypes.h" 24bdd1243dSDimitry Andric #include "llvm/IR/Function.h" 25bdd1243dSDimitry Andric #include "llvm/IR/GlobalValue.h" 26bdd1243dSDimitry Andric #include "llvm/IR/GlobalVariable.h" 27bdd1243dSDimitry Andric #include "llvm/IR/IRBuilder.h" 28bdd1243dSDimitry Andric #include "llvm/IR/Instruction.h" 29bdd1243dSDimitry Andric #include "llvm/IR/Instructions.h" 30bdd1243dSDimitry Andric #include "llvm/IR/LLVMContext.h" 31bdd1243dSDimitry Andric #include "llvm/IR/MDBuilder.h" 32bdd1243dSDimitry Andric #include "llvm/IR/Metadata.h" 33bdd1243dSDimitry Andric #include "llvm/IR/Module.h" 34bdd1243dSDimitry Andric #include "llvm/IR/Type.h" 35bdd1243dSDimitry Andric #include "llvm/IR/Value.h" 3606c3fb27SDimitry Andric #include "llvm/ProfileData/InstrProf.h" 3706c3fb27SDimitry Andric #include "llvm/Support/Allocator.h" 38bdd1243dSDimitry Andric #include "llvm/Support/CommandLine.h" 39bdd1243dSDimitry Andric #include "llvm/Support/Debug.h" 4006c3fb27SDimitry Andric #include "llvm/Support/SpecialCaseList.h" 4106c3fb27SDimitry Andric #include "llvm/Support/StringSaver.h" 4206c3fb27SDimitry Andric #include "llvm/Support/VirtualFileSystem.h" 4306c3fb27SDimitry Andric #include "llvm/TargetParser/Triple.h" 44bdd1243dSDimitry Andric #include "llvm/Transforms/Utils/ModuleUtils.h" 45bdd1243dSDimitry Andric 46bdd1243dSDimitry Andric #include <array> 47bdd1243dSDimitry Andric #include <cstdint> 4806c3fb27SDimitry Andric #include <memory> 49bdd1243dSDimitry Andric 50bdd1243dSDimitry Andric using namespace llvm; 51bdd1243dSDimitry Andric 52bdd1243dSDimitry Andric #define DEBUG_TYPE "sanmd" 53bdd1243dSDimitry Andric 54bdd1243dSDimitry Andric namespace { 55bdd1243dSDimitry Andric 56bdd1243dSDimitry Andric //===--- Constants --------------------------------------------------------===// 57bdd1243dSDimitry Andric 5806c3fb27SDimitry Andric constexpr uint32_t kVersionBase = 2; // occupies lower 16 bits 59bdd1243dSDimitry Andric constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized 60bdd1243dSDimitry Andric constexpr int kCtorDtorPriority = 2; 61bdd1243dSDimitry Andric 62bdd1243dSDimitry Andric // Pairs of names of initialization callback functions and which section 63bdd1243dSDimitry Andric // contains the relevant metadata. 64bdd1243dSDimitry Andric class MetadataInfo { 65bdd1243dSDimitry Andric public: 66bdd1243dSDimitry Andric const StringRef FunctionPrefix; 67bdd1243dSDimitry Andric const StringRef SectionSuffix; 68bdd1243dSDimitry Andric 69bdd1243dSDimitry Andric static const MetadataInfo Covered; 70bdd1243dSDimitry Andric static const MetadataInfo Atomics; 71bdd1243dSDimitry Andric 72bdd1243dSDimitry Andric private: 73bdd1243dSDimitry Andric // Forbid construction elsewhere. 74bdd1243dSDimitry Andric explicit constexpr MetadataInfo(StringRef FunctionPrefix, 7506c3fb27SDimitry Andric StringRef SectionSuffix) 7606c3fb27SDimitry Andric : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {} 77bdd1243dSDimitry Andric }; 7806c3fb27SDimitry Andric const MetadataInfo MetadataInfo::Covered{ 7906c3fb27SDimitry Andric "__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection}; 8006c3fb27SDimitry Andric const MetadataInfo MetadataInfo::Atomics{ 8106c3fb27SDimitry Andric "__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection}; 82bdd1243dSDimitry Andric 83bdd1243dSDimitry Andric // The only instances of MetadataInfo are the constants above, so a set of 84bdd1243dSDimitry Andric // them may simply store pointers to them. To deterministically generate code, 85bdd1243dSDimitry Andric // we need to use a set with stable iteration order, such as SetVector. 86bdd1243dSDimitry Andric using MetadataInfoSet = SetVector<const MetadataInfo *>; 87bdd1243dSDimitry Andric 88bdd1243dSDimitry Andric //===--- Command-line options ---------------------------------------------===// 89bdd1243dSDimitry Andric 90bdd1243dSDimitry Andric cl::opt<bool> ClWeakCallbacks( 91bdd1243dSDimitry Andric "sanitizer-metadata-weak-callbacks", 92bdd1243dSDimitry Andric cl::desc("Declare callbacks extern weak, and only call if non-null."), 93bdd1243dSDimitry Andric cl::Hidden, cl::init(true)); 9406c3fb27SDimitry Andric cl::opt<bool> 9506c3fb27SDimitry Andric ClNoSanitize("sanitizer-metadata-nosanitize-attr", 9606c3fb27SDimitry Andric cl::desc("Mark some metadata features uncovered in functions " 9706c3fb27SDimitry Andric "with associated no_sanitize attributes."), 9806c3fb27SDimitry Andric cl::Hidden, cl::init(true)); 99bdd1243dSDimitry Andric 100bdd1243dSDimitry Andric cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered", 101bdd1243dSDimitry Andric cl::desc("Emit PCs for covered functions."), 102bdd1243dSDimitry Andric cl::Hidden, cl::init(false)); 103bdd1243dSDimitry Andric cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics", 104bdd1243dSDimitry Andric cl::desc("Emit PCs for atomic operations."), 105bdd1243dSDimitry Andric cl::Hidden, cl::init(false)); 106bdd1243dSDimitry Andric cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar", 107bdd1243dSDimitry Andric cl::desc("Emit PCs for start of functions that are " 108bdd1243dSDimitry Andric "subject for use-after-return checking"), 109bdd1243dSDimitry Andric cl::Hidden, cl::init(false)); 110bdd1243dSDimitry Andric 111bdd1243dSDimitry Andric //===--- Statistics -------------------------------------------------------===// 112bdd1243dSDimitry Andric 113bdd1243dSDimitry Andric STATISTIC(NumMetadataCovered, "Metadata attached to covered functions"); 114bdd1243dSDimitry Andric STATISTIC(NumMetadataAtomics, "Metadata attached to atomics"); 115bdd1243dSDimitry Andric STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions"); 116bdd1243dSDimitry Andric 117bdd1243dSDimitry Andric //===----------------------------------------------------------------------===// 118bdd1243dSDimitry Andric 119bdd1243dSDimitry Andric // Apply opt overrides. 120bdd1243dSDimitry Andric SanitizerBinaryMetadataOptions && 121bdd1243dSDimitry Andric transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) { 122bdd1243dSDimitry Andric Opts.Covered |= ClEmitCovered; 123bdd1243dSDimitry Andric Opts.Atomics |= ClEmitAtomics; 124bdd1243dSDimitry Andric Opts.UAR |= ClEmitUAR; 125bdd1243dSDimitry Andric return std::move(Opts); 126bdd1243dSDimitry Andric } 127bdd1243dSDimitry Andric 128bdd1243dSDimitry Andric class SanitizerBinaryMetadata { 129bdd1243dSDimitry Andric public: 13006c3fb27SDimitry Andric SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts, 13106c3fb27SDimitry Andric std::unique_ptr<SpecialCaseList> Ignorelist) 132bdd1243dSDimitry Andric : Mod(M), Options(transformOptionsFromCl(std::move(Opts))), 13306c3fb27SDimitry Andric Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()), 134*0fca6ea1SDimitry Andric VersionStr(utostr(getVersion())), IRB(M.getContext()) { 135bdd1243dSDimitry Andric // FIXME: Make it work with other formats. 136bdd1243dSDimitry Andric assert(TargetTriple.isOSBinFormatELF() && "ELF only"); 13706c3fb27SDimitry Andric assert(!(TargetTriple.isNVPTX() || TargetTriple.isAMDGPU()) && 13806c3fb27SDimitry Andric "Device targets are not supported"); 139bdd1243dSDimitry Andric } 140bdd1243dSDimitry Andric 141bdd1243dSDimitry Andric bool run(); 142bdd1243dSDimitry Andric 143bdd1243dSDimitry Andric private: 144bdd1243dSDimitry Andric uint32_t getVersion() const { 145bdd1243dSDimitry Andric uint32_t Version = kVersionBase; 146bdd1243dSDimitry Andric const auto CM = Mod.getCodeModel(); 147bdd1243dSDimitry Andric if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large)) 148bdd1243dSDimitry Andric Version |= kVersionPtrSizeRel; 149bdd1243dSDimitry Andric return Version; 150bdd1243dSDimitry Andric } 151bdd1243dSDimitry Andric 152bdd1243dSDimitry Andric void runOn(Function &F, MetadataInfoSet &MIS); 153bdd1243dSDimitry Andric 154bdd1243dSDimitry Andric // Determines which set of metadata to collect for this instruction. 155bdd1243dSDimitry Andric // 156bdd1243dSDimitry Andric // Returns true if covered metadata is required to unambiguously interpret 157bdd1243dSDimitry Andric // other metadata. For example, if we are interested in atomics metadata, any 158bdd1243dSDimitry Andric // function with memory operations (atomic or not) requires covered metadata 159bdd1243dSDimitry Andric // to determine if a memory operation is atomic or not in modules compiled 160bdd1243dSDimitry Andric // with SanitizerBinaryMetadata. 161bdd1243dSDimitry Andric bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB, 16206c3fb27SDimitry Andric uint64_t &FeatureMask); 163bdd1243dSDimitry Andric 164bdd1243dSDimitry Andric // Get start/end section marker pointer. 165bdd1243dSDimitry Andric GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty); 166bdd1243dSDimitry Andric 167bdd1243dSDimitry Andric // Returns the target-dependent section name. 168bdd1243dSDimitry Andric StringRef getSectionName(StringRef SectionSuffix); 169bdd1243dSDimitry Andric 170bdd1243dSDimitry Andric // Returns the section start marker name. 171*0fca6ea1SDimitry Andric StringRef getSectionStart(StringRef SectionSuffix); 172bdd1243dSDimitry Andric 173bdd1243dSDimitry Andric // Returns the section end marker name. 174*0fca6ea1SDimitry Andric StringRef getSectionEnd(StringRef SectionSuffix); 175bdd1243dSDimitry Andric 17606c3fb27SDimitry Andric // Returns true if the access to the address should be considered "atomic". 17706c3fb27SDimitry Andric bool pretendAtomicAccess(const Value *Addr); 17806c3fb27SDimitry Andric 179bdd1243dSDimitry Andric Module &Mod; 180bdd1243dSDimitry Andric const SanitizerBinaryMetadataOptions Options; 18106c3fb27SDimitry Andric std::unique_ptr<SpecialCaseList> Ignorelist; 182bdd1243dSDimitry Andric const Triple TargetTriple; 183*0fca6ea1SDimitry Andric const std::string VersionStr; 184bdd1243dSDimitry Andric IRBuilder<> IRB; 18506c3fb27SDimitry Andric BumpPtrAllocator Alloc; 18606c3fb27SDimitry Andric UniqueStringSaver StringPool{Alloc}; 187bdd1243dSDimitry Andric }; 188bdd1243dSDimitry Andric 189bdd1243dSDimitry Andric bool SanitizerBinaryMetadata::run() { 190bdd1243dSDimitry Andric MetadataInfoSet MIS; 191bdd1243dSDimitry Andric 192bdd1243dSDimitry Andric for (Function &F : Mod) 193bdd1243dSDimitry Andric runOn(F, MIS); 194bdd1243dSDimitry Andric 195bdd1243dSDimitry Andric if (MIS.empty()) 196bdd1243dSDimitry Andric return false; 197bdd1243dSDimitry Andric 198bdd1243dSDimitry Andric // 199bdd1243dSDimitry Andric // Setup constructors and call all initialization functions for requested 200bdd1243dSDimitry Andric // metadata features. 201bdd1243dSDimitry Andric // 202bdd1243dSDimitry Andric 2035f757f3fSDimitry Andric auto *PtrTy = IRB.getPtrTy(); 204bdd1243dSDimitry Andric auto *Int32Ty = IRB.getInt32Ty(); 2055f757f3fSDimitry Andric const std::array<Type *, 3> InitTypes = {Int32Ty, PtrTy, PtrTy}; 206bdd1243dSDimitry Andric auto *Version = ConstantInt::get(Int32Ty, getVersion()); 207bdd1243dSDimitry Andric 208bdd1243dSDimitry Andric for (const MetadataInfo *MI : MIS) { 209bdd1243dSDimitry Andric const std::array<Value *, InitTypes.size()> InitArgs = { 210bdd1243dSDimitry Andric Version, 2115f757f3fSDimitry Andric getSectionMarker(getSectionStart(MI->SectionSuffix), PtrTy), 2125f757f3fSDimitry Andric getSectionMarker(getSectionEnd(MI->SectionSuffix), PtrTy), 213bdd1243dSDimitry Andric }; 214*0fca6ea1SDimitry Andric 215*0fca6ea1SDimitry Andric // Calls to the initialization functions with different versions cannot be 216*0fca6ea1SDimitry Andric // merged. Give the structors unique names based on the version, which will 217*0fca6ea1SDimitry Andric // also be used as the COMDAT key. 218*0fca6ea1SDimitry Andric const std::string StructorPrefix = (MI->FunctionPrefix + VersionStr).str(); 219*0fca6ea1SDimitry Andric 220bdd1243dSDimitry Andric // We declare the _add and _del functions as weak, and only call them if 221bdd1243dSDimitry Andric // there is a valid symbol linked. This allows building binaries with 222bdd1243dSDimitry Andric // semantic metadata, but without having callbacks. When a tool that wants 223bdd1243dSDimitry Andric // the metadata is linked which provides the callbacks, they will be called. 224bdd1243dSDimitry Andric Function *Ctor = 225bdd1243dSDimitry Andric createSanitizerCtorAndInitFunctions( 226*0fca6ea1SDimitry Andric Mod, StructorPrefix + ".module_ctor", 227bdd1243dSDimitry Andric (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs, 228bdd1243dSDimitry Andric /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) 229bdd1243dSDimitry Andric .first; 230bdd1243dSDimitry Andric Function *Dtor = 231bdd1243dSDimitry Andric createSanitizerCtorAndInitFunctions( 232*0fca6ea1SDimitry Andric Mod, StructorPrefix + ".module_dtor", 233bdd1243dSDimitry Andric (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs, 234bdd1243dSDimitry Andric /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) 235bdd1243dSDimitry Andric .first; 23606c3fb27SDimitry Andric Constant *CtorComdatKey = nullptr; 23706c3fb27SDimitry Andric Constant *DtorComdatKey = nullptr; 238bdd1243dSDimitry Andric if (TargetTriple.supportsCOMDAT()) { 23906c3fb27SDimitry Andric // Use COMDAT to deduplicate constructor/destructor function. The COMDAT 24006c3fb27SDimitry Andric // key needs to be a non-local linkage. 241bdd1243dSDimitry Andric Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName())); 242bdd1243dSDimitry Andric Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName())); 24306c3fb27SDimitry Andric Ctor->setLinkage(GlobalValue::ExternalLinkage); 24406c3fb27SDimitry Andric Dtor->setLinkage(GlobalValue::ExternalLinkage); 24506c3fb27SDimitry Andric // DSOs should _not_ call another constructor/destructor! 24606c3fb27SDimitry Andric Ctor->setVisibility(GlobalValue::HiddenVisibility); 24706c3fb27SDimitry Andric Dtor->setVisibility(GlobalValue::HiddenVisibility); 24806c3fb27SDimitry Andric CtorComdatKey = Ctor; 24906c3fb27SDimitry Andric DtorComdatKey = Dtor; 250bdd1243dSDimitry Andric } 25106c3fb27SDimitry Andric appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorComdatKey); 25206c3fb27SDimitry Andric appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorComdatKey); 253bdd1243dSDimitry Andric } 254bdd1243dSDimitry Andric 255bdd1243dSDimitry Andric return true; 256bdd1243dSDimitry Andric } 257bdd1243dSDimitry Andric 258bdd1243dSDimitry Andric void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) { 259bdd1243dSDimitry Andric if (F.empty()) 260bdd1243dSDimitry Andric return; 261bdd1243dSDimitry Andric if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) 262bdd1243dSDimitry Andric return; 26306c3fb27SDimitry Andric if (Ignorelist && Ignorelist->inSection("metadata", "fun", F.getName())) 26406c3fb27SDimitry Andric return; 265bdd1243dSDimitry Andric // Don't touch available_externally functions, their actual body is elsewhere. 266bdd1243dSDimitry Andric if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) 267bdd1243dSDimitry Andric return; 268bdd1243dSDimitry Andric 269bdd1243dSDimitry Andric MDBuilder MDB(F.getContext()); 270bdd1243dSDimitry Andric 271bdd1243dSDimitry Andric // The metadata features enabled for this function, stored along covered 272bdd1243dSDimitry Andric // metadata (if enabled). 27306c3fb27SDimitry Andric uint64_t FeatureMask = 0; 274bdd1243dSDimitry Andric // Don't emit unnecessary covered metadata for all functions to save space. 275bdd1243dSDimitry Andric bool RequiresCovered = false; 27606c3fb27SDimitry Andric 27706c3fb27SDimitry Andric if (Options.Atomics || Options.UAR) { 278bdd1243dSDimitry Andric for (BasicBlock &BB : F) 279bdd1243dSDimitry Andric for (Instruction &I : BB) 280bdd1243dSDimitry Andric RequiresCovered |= runOn(I, MIS, MDB, FeatureMask); 281bdd1243dSDimitry Andric } 282bdd1243dSDimitry Andric 28306c3fb27SDimitry Andric if (ClNoSanitize && F.hasFnAttribute("no_sanitize_thread")) 28406c3fb27SDimitry Andric FeatureMask &= ~kSanitizerBinaryMetadataAtomics; 285bdd1243dSDimitry Andric if (F.isVarArg()) 286bdd1243dSDimitry Andric FeatureMask &= ~kSanitizerBinaryMetadataUAR; 287bdd1243dSDimitry Andric if (FeatureMask & kSanitizerBinaryMetadataUAR) { 288bdd1243dSDimitry Andric RequiresCovered = true; 289bdd1243dSDimitry Andric NumMetadataUAR++; 290bdd1243dSDimitry Andric } 291bdd1243dSDimitry Andric 292bdd1243dSDimitry Andric // Covered metadata is always emitted if explicitly requested, otherwise only 293bdd1243dSDimitry Andric // if some other metadata requires it to unambiguously interpret it for 294bdd1243dSDimitry Andric // modules compiled with SanitizerBinaryMetadata. 295bdd1243dSDimitry Andric if (Options.Covered || (FeatureMask && RequiresCovered)) { 296bdd1243dSDimitry Andric NumMetadataCovered++; 297bdd1243dSDimitry Andric const auto *MI = &MetadataInfo::Covered; 298bdd1243dSDimitry Andric MIS.insert(MI); 299bdd1243dSDimitry Andric const StringRef Section = getSectionName(MI->SectionSuffix); 30006c3fb27SDimitry Andric // The feature mask will be placed after the function size. 30106c3fb27SDimitry Andric Constant *CFM = IRB.getInt64(FeatureMask); 302bdd1243dSDimitry Andric F.setMetadata(LLVMContext::MD_pcsections, 303bdd1243dSDimitry Andric MDB.createPCSections({{Section, {CFM}}})); 304bdd1243dSDimitry Andric } 305bdd1243dSDimitry Andric } 306bdd1243dSDimitry Andric 307bdd1243dSDimitry Andric bool isUARSafeCall(CallInst *CI) { 308bdd1243dSDimitry Andric auto *F = CI->getCalledFunction(); 309bdd1243dSDimitry Andric // There are no intrinsic functions that leak arguments. 310bdd1243dSDimitry Andric // If the called function does not return, the current function 311bdd1243dSDimitry Andric // does not return as well, so no possibility of use-after-return. 312bdd1243dSDimitry Andric // Sanitizer function also don't leak or don't return. 313bdd1243dSDimitry Andric // It's safe to both pass pointers to local variables to them 314bdd1243dSDimitry Andric // and to tail-call them. 315bdd1243dSDimitry Andric return F && (F->isIntrinsic() || F->doesNotReturn() || 3165f757f3fSDimitry Andric F->getName().starts_with("__asan_") || 3175f757f3fSDimitry Andric F->getName().starts_with("__hwsan_") || 3185f757f3fSDimitry Andric F->getName().starts_with("__ubsan_") || 3195f757f3fSDimitry Andric F->getName().starts_with("__msan_") || 3205f757f3fSDimitry Andric F->getName().starts_with("__tsan_")); 321bdd1243dSDimitry Andric } 322bdd1243dSDimitry Andric 323bdd1243dSDimitry Andric bool hasUseAfterReturnUnsafeUses(Value &V) { 324bdd1243dSDimitry Andric for (User *U : V.users()) { 325bdd1243dSDimitry Andric if (auto *I = dyn_cast<Instruction>(U)) { 326bdd1243dSDimitry Andric if (I->isLifetimeStartOrEnd() || I->isDroppable()) 327bdd1243dSDimitry Andric continue; 328bdd1243dSDimitry Andric if (auto *CI = dyn_cast<CallInst>(U)) { 329bdd1243dSDimitry Andric if (isUARSafeCall(CI)) 330bdd1243dSDimitry Andric continue; 331bdd1243dSDimitry Andric } 332bdd1243dSDimitry Andric if (isa<LoadInst>(U)) 333bdd1243dSDimitry Andric continue; 334bdd1243dSDimitry Andric if (auto *SI = dyn_cast<StoreInst>(U)) { 335bdd1243dSDimitry Andric // If storing TO the alloca, then the address isn't taken. 336bdd1243dSDimitry Andric if (SI->getOperand(1) == &V) 337bdd1243dSDimitry Andric continue; 338bdd1243dSDimitry Andric } 339bdd1243dSDimitry Andric if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) { 340bdd1243dSDimitry Andric if (!hasUseAfterReturnUnsafeUses(*GEPI)) 341bdd1243dSDimitry Andric continue; 342bdd1243dSDimitry Andric } else if (auto *BCI = dyn_cast<BitCastInst>(U)) { 343bdd1243dSDimitry Andric if (!hasUseAfterReturnUnsafeUses(*BCI)) 344bdd1243dSDimitry Andric continue; 345bdd1243dSDimitry Andric } 346bdd1243dSDimitry Andric } 347bdd1243dSDimitry Andric return true; 348bdd1243dSDimitry Andric } 349bdd1243dSDimitry Andric return false; 350bdd1243dSDimitry Andric } 351bdd1243dSDimitry Andric 352bdd1243dSDimitry Andric bool useAfterReturnUnsafe(Instruction &I) { 353bdd1243dSDimitry Andric if (isa<AllocaInst>(I)) 354bdd1243dSDimitry Andric return hasUseAfterReturnUnsafeUses(I); 355bdd1243dSDimitry Andric // Tail-called functions are not necessary intercepted 356bdd1243dSDimitry Andric // at runtime because there is no call instruction. 357bdd1243dSDimitry Andric // So conservatively mark the caller as requiring checking. 358bdd1243dSDimitry Andric else if (auto *CI = dyn_cast<CallInst>(&I)) 359bdd1243dSDimitry Andric return CI->isTailCall() && !isUARSafeCall(CI); 360bdd1243dSDimitry Andric return false; 361bdd1243dSDimitry Andric } 362bdd1243dSDimitry Andric 36306c3fb27SDimitry Andric bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) { 36406c3fb27SDimitry Andric if (!Addr) 36506c3fb27SDimitry Andric return false; 36606c3fb27SDimitry Andric 36706c3fb27SDimitry Andric Addr = Addr->stripInBoundsOffsets(); 36806c3fb27SDimitry Andric auto *GV = dyn_cast<GlobalVariable>(Addr); 36906c3fb27SDimitry Andric if (!GV) 37006c3fb27SDimitry Andric return false; 37106c3fb27SDimitry Andric 37206c3fb27SDimitry Andric // Some compiler-generated accesses are known racy, to avoid false positives 37306c3fb27SDimitry Andric // in data-race analysis pretend they're atomic. 37406c3fb27SDimitry Andric if (GV->hasSection()) { 37506c3fb27SDimitry Andric const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat(); 37606c3fb27SDimitry Andric const auto ProfSec = 37706c3fb27SDimitry Andric getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false); 3785f757f3fSDimitry Andric if (GV->getSection().ends_with(ProfSec)) 37906c3fb27SDimitry Andric return true; 38006c3fb27SDimitry Andric } 3815f757f3fSDimitry Andric if (GV->getName().starts_with("__llvm_gcov") || 3825f757f3fSDimitry Andric GV->getName().starts_with("__llvm_gcda")) 38306c3fb27SDimitry Andric return true; 38406c3fb27SDimitry Andric 38506c3fb27SDimitry Andric return false; 38606c3fb27SDimitry Andric } 38706c3fb27SDimitry Andric 38806c3fb27SDimitry Andric // Returns true if the memory at `Addr` may be shared with other threads. 38906c3fb27SDimitry Andric bool maybeSharedMutable(const Value *Addr) { 39006c3fb27SDimitry Andric // By default assume memory may be shared. 39106c3fb27SDimitry Andric if (!Addr) 39206c3fb27SDimitry Andric return true; 39306c3fb27SDimitry Andric 39406c3fb27SDimitry Andric if (isa<AllocaInst>(getUnderlyingObject(Addr)) && 39506c3fb27SDimitry Andric !PointerMayBeCaptured(Addr, true, true)) 39606c3fb27SDimitry Andric return false; // Object is on stack but does not escape. 39706c3fb27SDimitry Andric 39806c3fb27SDimitry Andric Addr = Addr->stripInBoundsOffsets(); 39906c3fb27SDimitry Andric if (auto *GV = dyn_cast<GlobalVariable>(Addr)) { 40006c3fb27SDimitry Andric if (GV->isConstant()) 40106c3fb27SDimitry Andric return false; // Shared, but not mutable. 40206c3fb27SDimitry Andric } 40306c3fb27SDimitry Andric 40406c3fb27SDimitry Andric return true; 40506c3fb27SDimitry Andric } 40606c3fb27SDimitry Andric 407bdd1243dSDimitry Andric bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS, 40806c3fb27SDimitry Andric MDBuilder &MDB, uint64_t &FeatureMask) { 409bdd1243dSDimitry Andric SmallVector<const MetadataInfo *, 1> InstMetadata; 410bdd1243dSDimitry Andric bool RequiresCovered = false; 411bdd1243dSDimitry Andric 41206c3fb27SDimitry Andric // Only call if at least 1 type of metadata is requested. 41306c3fb27SDimitry Andric assert(Options.UAR || Options.Atomics); 41406c3fb27SDimitry Andric 415bdd1243dSDimitry Andric if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) { 416bdd1243dSDimitry Andric if (useAfterReturnUnsafe(I)) 417bdd1243dSDimitry Andric FeatureMask |= kSanitizerBinaryMetadataUAR; 418bdd1243dSDimitry Andric } 419bdd1243dSDimitry Andric 42006c3fb27SDimitry Andric if (Options.Atomics) { 42106c3fb27SDimitry Andric const Value *Addr = nullptr; 42206c3fb27SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(&I)) 42306c3fb27SDimitry Andric Addr = SI->getPointerOperand(); 42406c3fb27SDimitry Andric else if (auto *LI = dyn_cast<LoadInst>(&I)) 42506c3fb27SDimitry Andric Addr = LI->getPointerOperand(); 42606c3fb27SDimitry Andric 42706c3fb27SDimitry Andric if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) { 428bdd1243dSDimitry Andric auto SSID = getAtomicSyncScopeID(&I); 42906c3fb27SDimitry Andric if ((SSID.has_value() && *SSID != SyncScope::SingleThread) || 43006c3fb27SDimitry Andric pretendAtomicAccess(Addr)) { 431bdd1243dSDimitry Andric NumMetadataAtomics++; 432bdd1243dSDimitry Andric InstMetadata.push_back(&MetadataInfo::Atomics); 433bdd1243dSDimitry Andric } 43406c3fb27SDimitry Andric FeatureMask |= kSanitizerBinaryMetadataAtomics; 435bdd1243dSDimitry Andric RequiresCovered = true; 436bdd1243dSDimitry Andric } 43706c3fb27SDimitry Andric } 438bdd1243dSDimitry Andric 439bdd1243dSDimitry Andric // Attach MD_pcsections to instruction. 440bdd1243dSDimitry Andric if (!InstMetadata.empty()) { 441bdd1243dSDimitry Andric MIS.insert(InstMetadata.begin(), InstMetadata.end()); 442bdd1243dSDimitry Andric SmallVector<MDBuilder::PCSection, 1> Sections; 443bdd1243dSDimitry Andric for (const auto &MI : InstMetadata) 444bdd1243dSDimitry Andric Sections.push_back({getSectionName(MI->SectionSuffix), {}}); 445bdd1243dSDimitry Andric I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections)); 446bdd1243dSDimitry Andric } 447bdd1243dSDimitry Andric 448bdd1243dSDimitry Andric return RequiresCovered; 449bdd1243dSDimitry Andric } 450bdd1243dSDimitry Andric 451bdd1243dSDimitry Andric GlobalVariable * 452bdd1243dSDimitry Andric SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) { 453bdd1243dSDimitry Andric // Use ExternalWeak so that if all sections are discarded due to section 454bdd1243dSDimitry Andric // garbage collection, the linker will not report undefined symbol errors. 455bdd1243dSDimitry Andric auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false, 456bdd1243dSDimitry Andric GlobalVariable::ExternalWeakLinkage, 457bdd1243dSDimitry Andric /*Initializer=*/nullptr, MarkerName); 458bdd1243dSDimitry Andric Marker->setVisibility(GlobalValue::HiddenVisibility); 459bdd1243dSDimitry Andric return Marker; 460bdd1243dSDimitry Andric } 461bdd1243dSDimitry Andric 462bdd1243dSDimitry Andric StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) { 46306c3fb27SDimitry Andric // FIXME: Other TargetTriples. 46406c3fb27SDimitry Andric // Request ULEB128 encoding for all integer constants. 465*0fca6ea1SDimitry Andric return StringPool.save(SectionSuffix + VersionStr + "!C"); 466bdd1243dSDimitry Andric } 467bdd1243dSDimitry Andric 468*0fca6ea1SDimitry Andric StringRef SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) { 469*0fca6ea1SDimitry Andric // Twine only concatenates 2 strings; with >2 strings, concatenating them 470*0fca6ea1SDimitry Andric // creates Twine temporaries, and returning the final Twine no longer works 471*0fca6ea1SDimitry Andric // because we'd end up with a stack-use-after-return. So here we also use the 472*0fca6ea1SDimitry Andric // StringPool to store the new string. 473*0fca6ea1SDimitry Andric return StringPool.save("__start_" + SectionSuffix + VersionStr); 474bdd1243dSDimitry Andric } 475bdd1243dSDimitry Andric 476*0fca6ea1SDimitry Andric StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) { 477*0fca6ea1SDimitry Andric return StringPool.save("__stop_" + SectionSuffix + VersionStr); 478bdd1243dSDimitry Andric } 479bdd1243dSDimitry Andric 480bdd1243dSDimitry Andric } // namespace 481bdd1243dSDimitry Andric 482bdd1243dSDimitry Andric SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass( 48306c3fb27SDimitry Andric SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles) 48406c3fb27SDimitry Andric : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {} 485bdd1243dSDimitry Andric 486bdd1243dSDimitry Andric PreservedAnalyses 487bdd1243dSDimitry Andric SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) { 48806c3fb27SDimitry Andric std::unique_ptr<SpecialCaseList> Ignorelist; 48906c3fb27SDimitry Andric if (!IgnorelistFiles.empty()) { 49006c3fb27SDimitry Andric Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles, 49106c3fb27SDimitry Andric *vfs::getRealFileSystem()); 49206c3fb27SDimitry Andric if (Ignorelist->inSection("metadata", "src", M.getSourceFileName())) 49306c3fb27SDimitry Andric return PreservedAnalyses::all(); 49406c3fb27SDimitry Andric } 49506c3fb27SDimitry Andric 49606c3fb27SDimitry Andric SanitizerBinaryMetadata Pass(M, Options, std::move(Ignorelist)); 497bdd1243dSDimitry Andric if (Pass.run()) 498bdd1243dSDimitry Andric return PreservedAnalyses::none(); 499bdd1243dSDimitry Andric return PreservedAnalyses::all(); 500bdd1243dSDimitry Andric } 501