xref: /llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp (revision 4d12a14357b136e996f8789786f1b76348b5582b)
197c22205SMarco Elver //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===//
297c22205SMarco Elver //
397c22205SMarco Elver // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
497c22205SMarco Elver // See https://llvm.org/LICENSE.txt for license information.
597c22205SMarco Elver // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
697c22205SMarco Elver //
797c22205SMarco Elver //===----------------------------------------------------------------------===//
897c22205SMarco Elver //
997c22205SMarco Elver // This file is a part of SanitizerBinaryMetadata.
1097c22205SMarco Elver //
1197c22205SMarco Elver //===----------------------------------------------------------------------===//
1297c22205SMarco Elver 
1397c22205SMarco Elver #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
1497c22205SMarco Elver #include "llvm/ADT/SetVector.h"
1597c22205SMarco Elver #include "llvm/ADT/SmallVector.h"
1697c22205SMarco Elver #include "llvm/ADT/Statistic.h"
17f5b9e11eSMarco Elver #include "llvm/ADT/StringExtras.h"
1897c22205SMarco Elver #include "llvm/ADT/StringRef.h"
1997c22205SMarco Elver #include "llvm/ADT/Twine.h"
20960b4c3bSMarco Elver #include "llvm/Analysis/CaptureTracking.h"
21960b4c3bSMarco Elver #include "llvm/Analysis/ValueTracking.h"
2297c22205SMarco Elver #include "llvm/IR/Constant.h"
2397c22205SMarco Elver #include "llvm/IR/DerivedTypes.h"
2497c22205SMarco Elver #include "llvm/IR/Function.h"
2597c22205SMarco Elver #include "llvm/IR/GlobalValue.h"
2697c22205SMarco Elver #include "llvm/IR/GlobalVariable.h"
2797c22205SMarco Elver #include "llvm/IR/IRBuilder.h"
2897c22205SMarco Elver #include "llvm/IR/Instruction.h"
2997c22205SMarco Elver #include "llvm/IR/Instructions.h"
3097c22205SMarco Elver #include "llvm/IR/LLVMContext.h"
3197c22205SMarco Elver #include "llvm/IR/MDBuilder.h"
3297c22205SMarco Elver #include "llvm/IR/Metadata.h"
3397c22205SMarco Elver #include "llvm/IR/Module.h"
3497c22205SMarco Elver #include "llvm/IR/Type.h"
3597c22205SMarco Elver #include "llvm/IR/Value.h"
36764c88a5SMarco Elver #include "llvm/ProfileData/InstrProf.h"
37bf9814b7SMarco Elver #include "llvm/Support/Allocator.h"
3897c22205SMarco Elver #include "llvm/Support/CommandLine.h"
39421215b9SMarco Elver #include "llvm/Support/SpecialCaseList.h"
40bf9814b7SMarco Elver #include "llvm/Support/StringSaver.h"
41421215b9SMarco Elver #include "llvm/Support/VirtualFileSystem.h"
4262c7f035SArchibald Elliott #include "llvm/TargetParser/Triple.h"
4397c22205SMarco Elver #include "llvm/Transforms/Utils/ModuleUtils.h"
4497c22205SMarco Elver 
4597c22205SMarco Elver #include <array>
4697c22205SMarco Elver #include <cstdint>
47421215b9SMarco Elver #include <memory>
4897c22205SMarco Elver 
4997c22205SMarco Elver using namespace llvm;
5097c22205SMarco Elver 
5197c22205SMarco Elver #define DEBUG_TYPE "sanmd"
5297c22205SMarco Elver 
5397c22205SMarco Elver namespace {
5497c22205SMarco Elver 
5597c22205SMarco Elver //===--- Constants --------------------------------------------------------===//
5697c22205SMarco Elver 
573d53b527SMarco Elver constexpr uint32_t kVersionBase = 2;                // occupies lower 16 bits
5897c22205SMarco Elver constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized
5997c22205SMarco Elver constexpr int kCtorDtorPriority = 2;
6097c22205SMarco Elver 
6197c22205SMarco Elver // Pairs of names of initialization callback functions and which section
6297c22205SMarco Elver // contains the relevant metadata.
6397c22205SMarco Elver class MetadataInfo {
6497c22205SMarco Elver public:
6597c22205SMarco Elver   const StringRef FunctionPrefix;
6697c22205SMarco Elver   const StringRef SectionSuffix;
6797c22205SMarco Elver 
6897c22205SMarco Elver   static const MetadataInfo Covered;
6997c22205SMarco Elver   static const MetadataInfo Atomics;
7097c22205SMarco Elver 
7197c22205SMarco Elver private:
7297c22205SMarco Elver   // Forbid construction elsewhere.
7397c22205SMarco Elver   explicit constexpr MetadataInfo(StringRef FunctionPrefix,
74960b4c3bSMarco Elver                                   StringRef SectionSuffix)
75960b4c3bSMarco Elver       : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {}
7697c22205SMarco Elver };
77960b4c3bSMarco Elver const MetadataInfo MetadataInfo::Covered{
78960b4c3bSMarco Elver     "__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection};
79960b4c3bSMarco Elver const MetadataInfo MetadataInfo::Atomics{
80960b4c3bSMarco Elver     "__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection};
8197c22205SMarco Elver 
8297c22205SMarco Elver // The only instances of MetadataInfo are the constants above, so a set of
8397c22205SMarco Elver // them may simply store pointers to them. To deterministically generate code,
8497c22205SMarco Elver // we need to use a set with stable iteration order, such as SetVector.
8597c22205SMarco Elver using MetadataInfoSet = SetVector<const MetadataInfo *>;
8697c22205SMarco Elver 
8797c22205SMarco Elver //===--- Command-line options ---------------------------------------------===//
8897c22205SMarco Elver 
895265adc7SMarco Elver cl::opt<bool> ClWeakCallbacks(
905265adc7SMarco Elver     "sanitizer-metadata-weak-callbacks",
915265adc7SMarco Elver     cl::desc("Declare callbacks extern weak, and only call if non-null."),
925265adc7SMarco Elver     cl::Hidden, cl::init(true));
935f605e25SMarco Elver cl::opt<bool>
945f605e25SMarco Elver     ClNoSanitize("sanitizer-metadata-nosanitize-attr",
955f605e25SMarco Elver                  cl::desc("Mark some metadata features uncovered in functions "
965f605e25SMarco Elver                           "with associated no_sanitize attributes."),
975f605e25SMarco Elver                  cl::Hidden, cl::init(true));
985265adc7SMarco Elver 
9997c22205SMarco Elver cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered",
10097c22205SMarco Elver                             cl::desc("Emit PCs for covered functions."),
10197c22205SMarco Elver                             cl::Hidden, cl::init(false));
10297c22205SMarco Elver cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics",
10397c22205SMarco Elver                             cl::desc("Emit PCs for atomic operations."),
10497c22205SMarco Elver                             cl::Hidden, cl::init(false));
105dbe8c2c3SDmitry Vyukov cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar",
106dbe8c2c3SDmitry Vyukov                         cl::desc("Emit PCs for start of functions that are "
107dbe8c2c3SDmitry Vyukov                                  "subject for use-after-return checking"),
108dbe8c2c3SDmitry Vyukov                         cl::Hidden, cl::init(false));
10997c22205SMarco Elver 
11097c22205SMarco Elver //===--- Statistics -------------------------------------------------------===//
11197c22205SMarco Elver 
11297c22205SMarco Elver STATISTIC(NumMetadataCovered, "Metadata attached to covered functions");
11397c22205SMarco Elver STATISTIC(NumMetadataAtomics, "Metadata attached to atomics");
114dbe8c2c3SDmitry Vyukov STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions");
11597c22205SMarco Elver 
11697c22205SMarco Elver //===----------------------------------------------------------------------===//
11797c22205SMarco Elver 
11897c22205SMarco Elver // Apply opt overrides.
11997c22205SMarco Elver SanitizerBinaryMetadataOptions &&
12097c22205SMarco Elver transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) {
12197c22205SMarco Elver   Opts.Covered |= ClEmitCovered;
12297c22205SMarco Elver   Opts.Atomics |= ClEmitAtomics;
123dbe8c2c3SDmitry Vyukov   Opts.UAR |= ClEmitUAR;
12497c22205SMarco Elver   return std::move(Opts);
12597c22205SMarco Elver }
12697c22205SMarco Elver 
12797c22205SMarco Elver class SanitizerBinaryMetadata {
12897c22205SMarco Elver public:
129421215b9SMarco Elver   SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts,
130421215b9SMarco Elver                           std::unique_ptr<SpecialCaseList> Ignorelist)
13197c22205SMarco Elver       : Mod(M), Options(transformOptionsFromCl(std::move(Opts))),
132421215b9SMarco Elver         Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()),
133f5b9e11eSMarco Elver         VersionStr(utostr(getVersion())), IRB(M.getContext()) {
13497c22205SMarco Elver     // FIXME: Make it work with other formats.
13597c22205SMarco Elver     assert(TargetTriple.isOSBinFormatELF() && "ELF only");
13661ed6495SMarco Elver     assert(!(TargetTriple.isNVPTX() || TargetTriple.isAMDGPU()) &&
13761ed6495SMarco Elver            "Device targets are not supported");
13897c22205SMarco Elver   }
13997c22205SMarco Elver 
14097c22205SMarco Elver   bool run();
14197c22205SMarco Elver 
14297c22205SMarco Elver private:
14397c22205SMarco Elver   uint32_t getVersion() const {
14497c22205SMarco Elver     uint32_t Version = kVersionBase;
14597c22205SMarco Elver     const auto CM = Mod.getCodeModel();
14697c22205SMarco Elver     if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large))
14797c22205SMarco Elver       Version |= kVersionPtrSizeRel;
14897c22205SMarco Elver     return Version;
14997c22205SMarco Elver   }
15097c22205SMarco Elver 
15197c22205SMarco Elver   void runOn(Function &F, MetadataInfoSet &MIS);
15297c22205SMarco Elver 
15397c22205SMarco Elver   // Determines which set of metadata to collect for this instruction.
15497c22205SMarco Elver   //
15597c22205SMarco Elver   // Returns true if covered metadata is required to unambiguously interpret
15697c22205SMarco Elver   // other metadata. For example, if we are interested in atomics metadata, any
15797c22205SMarco Elver   // function with memory operations (atomic or not) requires covered metadata
15897c22205SMarco Elver   // to determine if a memory operation is atomic or not in modules compiled
15997c22205SMarco Elver   // with SanitizerBinaryMetadata.
160dbe8c2c3SDmitry Vyukov   bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
161bf9814b7SMarco Elver              uint64_t &FeatureMask);
16297c22205SMarco Elver 
16397c22205SMarco Elver   // Get start/end section marker pointer.
16497c22205SMarco Elver   GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
16597c22205SMarco Elver 
16697c22205SMarco Elver   // Returns the target-dependent section name.
16797c22205SMarco Elver   StringRef getSectionName(StringRef SectionSuffix);
16897c22205SMarco Elver 
16997c22205SMarco Elver   // Returns the section start marker name.
170f5b9e11eSMarco Elver   StringRef getSectionStart(StringRef SectionSuffix);
17197c22205SMarco Elver 
17297c22205SMarco Elver   // Returns the section end marker name.
173f5b9e11eSMarco Elver   StringRef getSectionEnd(StringRef SectionSuffix);
17497c22205SMarco Elver 
175764c88a5SMarco Elver   // Returns true if the access to the address should be considered "atomic".
176960b4c3bSMarco Elver   bool pretendAtomicAccess(const Value *Addr);
177764c88a5SMarco Elver 
17897c22205SMarco Elver   Module &Mod;
17997c22205SMarco Elver   const SanitizerBinaryMetadataOptions Options;
180421215b9SMarco Elver   std::unique_ptr<SpecialCaseList> Ignorelist;
18197c22205SMarco Elver   const Triple TargetTriple;
182f5b9e11eSMarco Elver   const std::string VersionStr;
18397c22205SMarco Elver   IRBuilder<> IRB;
184bf9814b7SMarco Elver   BumpPtrAllocator Alloc;
185bf9814b7SMarco Elver   UniqueStringSaver StringPool{Alloc};
18697c22205SMarco Elver };
18797c22205SMarco Elver 
18897c22205SMarco Elver bool SanitizerBinaryMetadata::run() {
18997c22205SMarco Elver   MetadataInfoSet MIS;
19097c22205SMarco Elver 
19197c22205SMarco Elver   for (Function &F : Mod)
19297c22205SMarco Elver     runOn(F, MIS);
19397c22205SMarco Elver 
19497c22205SMarco Elver   if (MIS.empty())
19597c22205SMarco Elver     return false;
19697c22205SMarco Elver 
19797c22205SMarco Elver   //
19897c22205SMarco Elver   // Setup constructors and call all initialization functions for requested
19997c22205SMarco Elver   // metadata features.
20097c22205SMarco Elver   //
20197c22205SMarco Elver 
202107185faSFangrui Song   auto *PtrTy = IRB.getPtrTy();
20397c22205SMarco Elver   auto *Int32Ty = IRB.getInt32Ty();
204107185faSFangrui Song   const std::array<Type *, 3> InitTypes = {Int32Ty, PtrTy, PtrTy};
20597c22205SMarco Elver   auto *Version = ConstantInt::get(Int32Ty, getVersion());
20697c22205SMarco Elver 
20797c22205SMarco Elver   for (const MetadataInfo *MI : MIS) {
20897c22205SMarco Elver     const std::array<Value *, InitTypes.size()> InitArgs = {
20997c22205SMarco Elver         Version,
210107185faSFangrui Song         getSectionMarker(getSectionStart(MI->SectionSuffix), PtrTy),
211107185faSFangrui Song         getSectionMarker(getSectionEnd(MI->SectionSuffix), PtrTy),
21297c22205SMarco Elver     };
213f5b9e11eSMarco Elver 
214f5b9e11eSMarco Elver     // Calls to the initialization functions with different versions cannot be
215f5b9e11eSMarco Elver     // merged. Give the structors unique names based on the version, which will
216f5b9e11eSMarco Elver     // also be used as the COMDAT key.
217f5b9e11eSMarco Elver     const std::string StructorPrefix = (MI->FunctionPrefix + VersionStr).str();
218f5b9e11eSMarco Elver 
2195265adc7SMarco Elver     // We declare the _add and _del functions as weak, and only call them if
2205265adc7SMarco Elver     // there is a valid symbol linked. This allows building binaries with
2215265adc7SMarco Elver     // semantic metadata, but without having callbacks. When a tool that wants
2225265adc7SMarco Elver     // the metadata is linked which provides the callbacks, they will be called.
22397c22205SMarco Elver     Function *Ctor =
22497c22205SMarco Elver         createSanitizerCtorAndInitFunctions(
225f5b9e11eSMarco Elver             Mod, StructorPrefix + ".module_ctor",
2265265adc7SMarco Elver             (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs,
2275265adc7SMarco Elver             /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
22897c22205SMarco Elver             .first;
22997c22205SMarco Elver     Function *Dtor =
23097c22205SMarco Elver         createSanitizerCtorAndInitFunctions(
231f5b9e11eSMarco Elver             Mod, StructorPrefix + ".module_dtor",
2325265adc7SMarco Elver             (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs,
2335265adc7SMarco Elver             /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
23497c22205SMarco Elver             .first;
2358c469d16SMarco Elver     Constant *CtorComdatKey = nullptr;
2368c469d16SMarco Elver     Constant *DtorComdatKey = nullptr;
23797c22205SMarco Elver     if (TargetTriple.supportsCOMDAT()) {
2386ce8e716SFangrui Song       // Use COMDAT to deduplicate constructor/destructor function. The COMDAT
2396ce8e716SFangrui Song       // key needs to be a non-local linkage.
24097c22205SMarco Elver       Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName()));
24197c22205SMarco Elver       Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName()));
2426ce8e716SFangrui Song       Ctor->setLinkage(GlobalValue::ExternalLinkage);
2436ce8e716SFangrui Song       Dtor->setLinkage(GlobalValue::ExternalLinkage);
2448c469d16SMarco Elver       // DSOs should _not_ call another constructor/destructor!
2458c469d16SMarco Elver       Ctor->setVisibility(GlobalValue::HiddenVisibility);
2468c469d16SMarco Elver       Dtor->setVisibility(GlobalValue::HiddenVisibility);
2478c469d16SMarco Elver       CtorComdatKey = Ctor;
2488c469d16SMarco Elver       DtorComdatKey = Dtor;
24997c22205SMarco Elver     }
2508c469d16SMarco Elver     appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorComdatKey);
2518c469d16SMarco Elver     appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorComdatKey);
25297c22205SMarco Elver   }
25397c22205SMarco Elver 
25497c22205SMarco Elver   return true;
25597c22205SMarco Elver }
25697c22205SMarco Elver 
25797c22205SMarco Elver void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
25897c22205SMarco Elver   if (F.empty())
25997c22205SMarco Elver     return;
260*942e872dSAntonio Frighetto   // Do not apply any instrumentation for naked functions.
261*942e872dSAntonio Frighetto   if (F.hasFnAttribute(Attribute::Naked))
262*942e872dSAntonio Frighetto     return;
26397c22205SMarco Elver   if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
26497c22205SMarco Elver     return;
265421215b9SMarco Elver   if (Ignorelist && Ignorelist->inSection("metadata", "fun", F.getName()))
266421215b9SMarco Elver     return;
26797c22205SMarco Elver   // Don't touch available_externally functions, their actual body is elsewhere.
26897c22205SMarco Elver   if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
26997c22205SMarco Elver     return;
27097c22205SMarco Elver 
27197c22205SMarco Elver   MDBuilder MDB(F.getContext());
27297c22205SMarco Elver 
27397c22205SMarco Elver   // The metadata features enabled for this function, stored along covered
27497c22205SMarco Elver   // metadata (if enabled).
275bf9814b7SMarco Elver   uint64_t FeatureMask = 0;
27697c22205SMarco Elver   // Don't emit unnecessary covered metadata for all functions to save space.
27797c22205SMarco Elver   bool RequiresCovered = false;
278960b4c3bSMarco Elver 
279960b4c3bSMarco Elver   if (Options.Atomics || Options.UAR) {
28097c22205SMarco Elver     for (BasicBlock &BB : F)
28197c22205SMarco Elver       for (Instruction &I : BB)
282dbe8c2c3SDmitry Vyukov         RequiresCovered |= runOn(I, MIS, MDB, FeatureMask);
28397c22205SMarco Elver   }
28497c22205SMarco Elver 
2855f605e25SMarco Elver   if (ClNoSanitize && F.hasFnAttribute("no_sanitize_thread"))
2865f605e25SMarco Elver     FeatureMask &= ~kSanitizerBinaryMetadataAtomics;
287dbe8c2c3SDmitry Vyukov   if (F.isVarArg())
288dbe8c2c3SDmitry Vyukov     FeatureMask &= ~kSanitizerBinaryMetadataUAR;
2895addb736SDmitry Vyukov   if (FeatureMask & kSanitizerBinaryMetadataUAR) {
2905addb736SDmitry Vyukov     RequiresCovered = true;
291dbe8c2c3SDmitry Vyukov     NumMetadataUAR++;
2925addb736SDmitry Vyukov   }
293dbe8c2c3SDmitry Vyukov 
29497c22205SMarco Elver   // Covered metadata is always emitted if explicitly requested, otherwise only
29597c22205SMarco Elver   // if some other metadata requires it to unambiguously interpret it for
29697c22205SMarco Elver   // modules compiled with SanitizerBinaryMetadata.
297dbe8c2c3SDmitry Vyukov   if (Options.Covered || (FeatureMask && RequiresCovered)) {
29897c22205SMarco Elver     NumMetadataCovered++;
29997c22205SMarco Elver     const auto *MI = &MetadataInfo::Covered;
30097c22205SMarco Elver     MIS.insert(MI);
30197c22205SMarco Elver     const StringRef Section = getSectionName(MI->SectionSuffix);
302bf9814b7SMarco Elver     // The feature mask will be placed after the function size.
303bf9814b7SMarco Elver     Constant *CFM = IRB.getInt64(FeatureMask);
30497c22205SMarco Elver     F.setMetadata(LLVMContext::MD_pcsections,
30597c22205SMarco Elver                   MDB.createPCSections({{Section, {CFM}}}));
30697c22205SMarco Elver   }
30797c22205SMarco Elver }
30897c22205SMarco Elver 
309f7f01599SDmitry Vyukov bool isUARSafeCall(CallInst *CI) {
310f7f01599SDmitry Vyukov   auto *F = CI->getCalledFunction();
311f7f01599SDmitry Vyukov   // There are no intrinsic functions that leak arguments.
312f7f01599SDmitry Vyukov   // If the called function does not return, the current function
313f7f01599SDmitry Vyukov   // does not return as well, so no possibility of use-after-return.
314f7f01599SDmitry Vyukov   // Sanitizer function also don't leak or don't return.
315f7f01599SDmitry Vyukov   // It's safe to both pass pointers to local variables to them
316f7f01599SDmitry Vyukov   // and to tail-call them.
317f7f01599SDmitry Vyukov   return F && (F->isIntrinsic() || F->doesNotReturn() ||
3183ca4fe80SSimon Pilgrim                F->getName().starts_with("__asan_") ||
3193ca4fe80SSimon Pilgrim                F->getName().starts_with("__hwsan_") ||
3203ca4fe80SSimon Pilgrim                F->getName().starts_with("__ubsan_") ||
3213ca4fe80SSimon Pilgrim                F->getName().starts_with("__msan_") ||
3223ca4fe80SSimon Pilgrim                F->getName().starts_with("__tsan_"));
323f7f01599SDmitry Vyukov }
324f7f01599SDmitry Vyukov 
3255addb736SDmitry Vyukov bool hasUseAfterReturnUnsafeUses(Value &V) {
3265addb736SDmitry Vyukov   for (User *U : V.users()) {
3275addb736SDmitry Vyukov     if (auto *I = dyn_cast<Instruction>(U)) {
3285addb736SDmitry Vyukov       if (I->isLifetimeStartOrEnd() || I->isDroppable())
3295addb736SDmitry Vyukov         continue;
330f7f01599SDmitry Vyukov       if (auto *CI = dyn_cast<CallInst>(U)) {
331f7f01599SDmitry Vyukov         if (isUARSafeCall(CI))
332f7f01599SDmitry Vyukov           continue;
333f7f01599SDmitry Vyukov       }
3345addb736SDmitry Vyukov       if (isa<LoadInst>(U))
3355addb736SDmitry Vyukov         continue;
3365addb736SDmitry Vyukov       if (auto *SI = dyn_cast<StoreInst>(U)) {
3375addb736SDmitry Vyukov         // If storing TO the alloca, then the address isn't taken.
3385addb736SDmitry Vyukov         if (SI->getOperand(1) == &V)
3395addb736SDmitry Vyukov           continue;
3405addb736SDmitry Vyukov       }
3415addb736SDmitry Vyukov       if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
3425addb736SDmitry Vyukov         if (!hasUseAfterReturnUnsafeUses(*GEPI))
3435addb736SDmitry Vyukov           continue;
3445addb736SDmitry Vyukov       } else if (auto *BCI = dyn_cast<BitCastInst>(U)) {
3455addb736SDmitry Vyukov         if (!hasUseAfterReturnUnsafeUses(*BCI))
3465addb736SDmitry Vyukov           continue;
3475addb736SDmitry Vyukov       }
3485addb736SDmitry Vyukov     }
3495addb736SDmitry Vyukov     return true;
3505addb736SDmitry Vyukov   }
3515addb736SDmitry Vyukov   return false;
3525addb736SDmitry Vyukov }
3535addb736SDmitry Vyukov 
3545addb736SDmitry Vyukov bool useAfterReturnUnsafe(Instruction &I) {
3555addb736SDmitry Vyukov   if (isa<AllocaInst>(I))
3565addb736SDmitry Vyukov     return hasUseAfterReturnUnsafeUses(I);
3575addb736SDmitry Vyukov   // Tail-called functions are not necessary intercepted
3585addb736SDmitry Vyukov   // at runtime because there is no call instruction.
3595addb736SDmitry Vyukov   // So conservatively mark the caller as requiring checking.
3605addb736SDmitry Vyukov   else if (auto *CI = dyn_cast<CallInst>(&I))
361f7f01599SDmitry Vyukov     return CI->isTailCall() && !isUARSafeCall(CI);
3625addb736SDmitry Vyukov   return false;
3635addb736SDmitry Vyukov }
3645addb736SDmitry Vyukov 
365960b4c3bSMarco Elver bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) {
366960b4c3bSMarco Elver   if (!Addr)
367960b4c3bSMarco Elver     return false;
368764c88a5SMarco Elver 
369764c88a5SMarco Elver   Addr = Addr->stripInBoundsOffsets();
370764c88a5SMarco Elver   auto *GV = dyn_cast<GlobalVariable>(Addr);
371764c88a5SMarco Elver   if (!GV)
372764c88a5SMarco Elver     return false;
373764c88a5SMarco Elver 
374960b4c3bSMarco Elver   // Some compiler-generated accesses are known racy, to avoid false positives
375960b4c3bSMarco Elver   // in data-race analysis pretend they're atomic.
376764c88a5SMarco Elver   if (GV->hasSection()) {
377764c88a5SMarco Elver     const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat();
378764c88a5SMarco Elver     const auto ProfSec =
379764c88a5SMarco Elver         getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false);
3803ca4fe80SSimon Pilgrim     if (GV->getSection().ends_with(ProfSec))
381764c88a5SMarco Elver       return true;
382764c88a5SMarco Elver   }
3833ca4fe80SSimon Pilgrim   if (GV->getName().starts_with("__llvm_gcov") ||
3843ca4fe80SSimon Pilgrim       GV->getName().starts_with("__llvm_gcda"))
385764c88a5SMarco Elver     return true;
386764c88a5SMarco Elver 
387764c88a5SMarco Elver   return false;
388764c88a5SMarco Elver }
389764c88a5SMarco Elver 
390960b4c3bSMarco Elver // Returns true if the memory at `Addr` may be shared with other threads.
391960b4c3bSMarco Elver bool maybeSharedMutable(const Value *Addr) {
392960b4c3bSMarco Elver   // By default assume memory may be shared.
393960b4c3bSMarco Elver   if (!Addr)
394960b4c3bSMarco Elver     return true;
395960b4c3bSMarco Elver 
396960b4c3bSMarco Elver   if (isa<AllocaInst>(getUnderlyingObject(Addr)) &&
397960b4c3bSMarco Elver       !PointerMayBeCaptured(Addr, true, true))
398960b4c3bSMarco Elver     return false; // Object is on stack but does not escape.
399960b4c3bSMarco Elver 
400960b4c3bSMarco Elver   Addr = Addr->stripInBoundsOffsets();
401960b4c3bSMarco Elver   if (auto *GV = dyn_cast<GlobalVariable>(Addr)) {
402960b4c3bSMarco Elver     if (GV->isConstant())
403960b4c3bSMarco Elver       return false; // Shared, but not mutable.
404960b4c3bSMarco Elver   }
405960b4c3bSMarco Elver 
406960b4c3bSMarco Elver   return true;
407960b4c3bSMarco Elver }
408960b4c3bSMarco Elver 
40997c22205SMarco Elver bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
410bf9814b7SMarco Elver                                     MDBuilder &MDB, uint64_t &FeatureMask) {
41197c22205SMarco Elver   SmallVector<const MetadataInfo *, 1> InstMetadata;
41297c22205SMarco Elver   bool RequiresCovered = false;
41397c22205SMarco Elver 
414960b4c3bSMarco Elver   // Only call if at least 1 type of metadata is requested.
415960b4c3bSMarco Elver   assert(Options.UAR || Options.Atomics);
416960b4c3bSMarco Elver 
4175addb736SDmitry Vyukov   if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) {
4185addb736SDmitry Vyukov     if (useAfterReturnUnsafe(I))
419dbe8c2c3SDmitry Vyukov       FeatureMask |= kSanitizerBinaryMetadataUAR;
420dbe8c2c3SDmitry Vyukov   }
421dbe8c2c3SDmitry Vyukov 
422960b4c3bSMarco Elver   if (Options.Atomics) {
423960b4c3bSMarco Elver     const Value *Addr = nullptr;
424764c88a5SMarco Elver     if (auto *SI = dyn_cast<StoreInst>(&I))
425764c88a5SMarco Elver       Addr = SI->getPointerOperand();
426764c88a5SMarco Elver     else if (auto *LI = dyn_cast<LoadInst>(&I))
427764c88a5SMarco Elver       Addr = LI->getPointerOperand();
428764c88a5SMarco Elver 
429960b4c3bSMarco Elver     if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) {
430960b4c3bSMarco Elver       auto SSID = getAtomicSyncScopeID(&I);
431960b4c3bSMarco Elver       if ((SSID.has_value() && *SSID != SyncScope::SingleThread) ||
432960b4c3bSMarco Elver           pretendAtomicAccess(Addr)) {
43397c22205SMarco Elver         NumMetadataAtomics++;
43497c22205SMarco Elver         InstMetadata.push_back(&MetadataInfo::Atomics);
43597c22205SMarco Elver       }
436960b4c3bSMarco Elver       FeatureMask |= kSanitizerBinaryMetadataAtomics;
43797c22205SMarco Elver       RequiresCovered = true;
43897c22205SMarco Elver     }
439960b4c3bSMarco Elver   }
44097c22205SMarco Elver 
44197c22205SMarco Elver   // Attach MD_pcsections to instruction.
44297c22205SMarco Elver   if (!InstMetadata.empty()) {
44397c22205SMarco Elver     MIS.insert(InstMetadata.begin(), InstMetadata.end());
44497c22205SMarco Elver     SmallVector<MDBuilder::PCSection, 1> Sections;
44597c22205SMarco Elver     for (const auto &MI : InstMetadata)
44697c22205SMarco Elver       Sections.push_back({getSectionName(MI->SectionSuffix), {}});
44797c22205SMarco Elver     I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections));
44897c22205SMarco Elver   }
44997c22205SMarco Elver 
45097c22205SMarco Elver   return RequiresCovered;
45197c22205SMarco Elver }
45297c22205SMarco Elver 
45397c22205SMarco Elver GlobalVariable *
45497c22205SMarco Elver SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {
455eecb22d8SFangrui Song   // Use ExternalWeak so that if all sections are discarded due to section
456eecb22d8SFangrui Song   // garbage collection, the linker will not report undefined symbol errors.
45797c22205SMarco Elver   auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false,
458eecb22d8SFangrui Song                                     GlobalVariable::ExternalWeakLinkage,
45997c22205SMarco Elver                                     /*Initializer=*/nullptr, MarkerName);
46097c22205SMarco Elver   Marker->setVisibility(GlobalValue::HiddenVisibility);
46197c22205SMarco Elver   return Marker;
46297c22205SMarco Elver }
46397c22205SMarco Elver 
46497c22205SMarco Elver StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
465bf9814b7SMarco Elver   // FIXME: Other TargetTriples.
466bf9814b7SMarco Elver   // Request ULEB128 encoding for all integer constants.
467f5b9e11eSMarco Elver   return StringPool.save(SectionSuffix + VersionStr + "!C");
46897c22205SMarco Elver }
46997c22205SMarco Elver 
470f5b9e11eSMarco Elver StringRef SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {
471f5b9e11eSMarco Elver   // Twine only concatenates 2 strings; with >2 strings, concatenating them
472f5b9e11eSMarco Elver   // creates Twine temporaries, and returning the final Twine no longer works
473f5b9e11eSMarco Elver   // because we'd end up with a stack-use-after-return. So here we also use the
474f5b9e11eSMarco Elver   // StringPool to store the new string.
475f5b9e11eSMarco Elver   return StringPool.save("__start_" + SectionSuffix + VersionStr);
47697c22205SMarco Elver }
47797c22205SMarco Elver 
478f5b9e11eSMarco Elver StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
479f5b9e11eSMarco Elver   return StringPool.save("__stop_" + SectionSuffix + VersionStr);
48097c22205SMarco Elver }
48197c22205SMarco Elver 
48297c22205SMarco Elver } // namespace
48397c22205SMarco Elver 
48497c22205SMarco Elver SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
485421215b9SMarco Elver     SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles)
486421215b9SMarco Elver     : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {}
48797c22205SMarco Elver 
48897c22205SMarco Elver PreservedAnalyses
48997c22205SMarco Elver SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {
490421215b9SMarco Elver   std::unique_ptr<SpecialCaseList> Ignorelist;
491421215b9SMarco Elver   if (!IgnorelistFiles.empty()) {
492421215b9SMarco Elver     Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles,
493421215b9SMarco Elver                                               *vfs::getRealFileSystem());
494421215b9SMarco Elver     if (Ignorelist->inSection("metadata", "src", M.getSourceFileName()))
495421215b9SMarco Elver       return PreservedAnalyses::all();
496421215b9SMarco Elver   }
497421215b9SMarco Elver 
498421215b9SMarco Elver   SanitizerBinaryMetadata Pass(M, Options, std::move(Ignorelist));
49997c22205SMarco Elver   if (Pass.run())
50097c22205SMarco Elver     return PreservedAnalyses::none();
50197c22205SMarco Elver   return PreservedAnalyses::all();
50297c22205SMarco Elver }
503