xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1bdd1243dSDimitry Andric //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===//
2bdd1243dSDimitry Andric //
3bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6bdd1243dSDimitry Andric //
7bdd1243dSDimitry Andric //===----------------------------------------------------------------------===//
8bdd1243dSDimitry Andric //
9bdd1243dSDimitry Andric // This file is a part of SanitizerBinaryMetadata.
10bdd1243dSDimitry Andric //
11bdd1243dSDimitry Andric //===----------------------------------------------------------------------===//
12bdd1243dSDimitry Andric 
13bdd1243dSDimitry Andric #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
14bdd1243dSDimitry Andric #include "llvm/ADT/SetVector.h"
15bdd1243dSDimitry Andric #include "llvm/ADT/SmallVector.h"
16bdd1243dSDimitry Andric #include "llvm/ADT/Statistic.h"
17*0fca6ea1SDimitry Andric #include "llvm/ADT/StringExtras.h"
18bdd1243dSDimitry Andric #include "llvm/ADT/StringRef.h"
19bdd1243dSDimitry Andric #include "llvm/ADT/Twine.h"
2006c3fb27SDimitry Andric #include "llvm/Analysis/CaptureTracking.h"
2106c3fb27SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
22bdd1243dSDimitry Andric #include "llvm/IR/Constant.h"
23bdd1243dSDimitry Andric #include "llvm/IR/DerivedTypes.h"
24bdd1243dSDimitry Andric #include "llvm/IR/Function.h"
25bdd1243dSDimitry Andric #include "llvm/IR/GlobalValue.h"
26bdd1243dSDimitry Andric #include "llvm/IR/GlobalVariable.h"
27bdd1243dSDimitry Andric #include "llvm/IR/IRBuilder.h"
28bdd1243dSDimitry Andric #include "llvm/IR/Instruction.h"
29bdd1243dSDimitry Andric #include "llvm/IR/Instructions.h"
30bdd1243dSDimitry Andric #include "llvm/IR/LLVMContext.h"
31bdd1243dSDimitry Andric #include "llvm/IR/MDBuilder.h"
32bdd1243dSDimitry Andric #include "llvm/IR/Metadata.h"
33bdd1243dSDimitry Andric #include "llvm/IR/Module.h"
34bdd1243dSDimitry Andric #include "llvm/IR/Type.h"
35bdd1243dSDimitry Andric #include "llvm/IR/Value.h"
3606c3fb27SDimitry Andric #include "llvm/ProfileData/InstrProf.h"
3706c3fb27SDimitry Andric #include "llvm/Support/Allocator.h"
38bdd1243dSDimitry Andric #include "llvm/Support/CommandLine.h"
39bdd1243dSDimitry Andric #include "llvm/Support/Debug.h"
4006c3fb27SDimitry Andric #include "llvm/Support/SpecialCaseList.h"
4106c3fb27SDimitry Andric #include "llvm/Support/StringSaver.h"
4206c3fb27SDimitry Andric #include "llvm/Support/VirtualFileSystem.h"
4306c3fb27SDimitry Andric #include "llvm/TargetParser/Triple.h"
44bdd1243dSDimitry Andric #include "llvm/Transforms/Utils/ModuleUtils.h"
45bdd1243dSDimitry Andric 
46bdd1243dSDimitry Andric #include <array>
47bdd1243dSDimitry Andric #include <cstdint>
4806c3fb27SDimitry Andric #include <memory>
49bdd1243dSDimitry Andric 
50bdd1243dSDimitry Andric using namespace llvm;
51bdd1243dSDimitry Andric 
52bdd1243dSDimitry Andric #define DEBUG_TYPE "sanmd"
53bdd1243dSDimitry Andric 
54bdd1243dSDimitry Andric namespace {
55bdd1243dSDimitry Andric 
56bdd1243dSDimitry Andric //===--- Constants --------------------------------------------------------===//
57bdd1243dSDimitry Andric 
5806c3fb27SDimitry Andric constexpr uint32_t kVersionBase = 2;                // occupies lower 16 bits
59bdd1243dSDimitry Andric constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized
60bdd1243dSDimitry Andric constexpr int kCtorDtorPriority = 2;
61bdd1243dSDimitry Andric 
62bdd1243dSDimitry Andric // Pairs of names of initialization callback functions and which section
63bdd1243dSDimitry Andric // contains the relevant metadata.
64bdd1243dSDimitry Andric class MetadataInfo {
65bdd1243dSDimitry Andric public:
66bdd1243dSDimitry Andric   const StringRef FunctionPrefix;
67bdd1243dSDimitry Andric   const StringRef SectionSuffix;
68bdd1243dSDimitry Andric 
69bdd1243dSDimitry Andric   static const MetadataInfo Covered;
70bdd1243dSDimitry Andric   static const MetadataInfo Atomics;
71bdd1243dSDimitry Andric 
72bdd1243dSDimitry Andric private:
73bdd1243dSDimitry Andric   // Forbid construction elsewhere.
74bdd1243dSDimitry Andric   explicit constexpr MetadataInfo(StringRef FunctionPrefix,
7506c3fb27SDimitry Andric                                   StringRef SectionSuffix)
7606c3fb27SDimitry Andric       : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {}
77bdd1243dSDimitry Andric };
7806c3fb27SDimitry Andric const MetadataInfo MetadataInfo::Covered{
7906c3fb27SDimitry Andric     "__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection};
8006c3fb27SDimitry Andric const MetadataInfo MetadataInfo::Atomics{
8106c3fb27SDimitry Andric     "__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection};
82bdd1243dSDimitry Andric 
83bdd1243dSDimitry Andric // The only instances of MetadataInfo are the constants above, so a set of
84bdd1243dSDimitry Andric // them may simply store pointers to them. To deterministically generate code,
85bdd1243dSDimitry Andric // we need to use a set with stable iteration order, such as SetVector.
86bdd1243dSDimitry Andric using MetadataInfoSet = SetVector<const MetadataInfo *>;
87bdd1243dSDimitry Andric 
88bdd1243dSDimitry Andric //===--- Command-line options ---------------------------------------------===//
89bdd1243dSDimitry Andric 
90bdd1243dSDimitry Andric cl::opt<bool> ClWeakCallbacks(
91bdd1243dSDimitry Andric     "sanitizer-metadata-weak-callbacks",
92bdd1243dSDimitry Andric     cl::desc("Declare callbacks extern weak, and only call if non-null."),
93bdd1243dSDimitry Andric     cl::Hidden, cl::init(true));
9406c3fb27SDimitry Andric cl::opt<bool>
9506c3fb27SDimitry Andric     ClNoSanitize("sanitizer-metadata-nosanitize-attr",
9606c3fb27SDimitry Andric                  cl::desc("Mark some metadata features uncovered in functions "
9706c3fb27SDimitry Andric                           "with associated no_sanitize attributes."),
9806c3fb27SDimitry Andric                  cl::Hidden, cl::init(true));
99bdd1243dSDimitry Andric 
100bdd1243dSDimitry Andric cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered",
101bdd1243dSDimitry Andric                             cl::desc("Emit PCs for covered functions."),
102bdd1243dSDimitry Andric                             cl::Hidden, cl::init(false));
103bdd1243dSDimitry Andric cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics",
104bdd1243dSDimitry Andric                             cl::desc("Emit PCs for atomic operations."),
105bdd1243dSDimitry Andric                             cl::Hidden, cl::init(false));
106bdd1243dSDimitry Andric cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar",
107bdd1243dSDimitry Andric                         cl::desc("Emit PCs for start of functions that are "
108bdd1243dSDimitry Andric                                  "subject for use-after-return checking"),
109bdd1243dSDimitry Andric                         cl::Hidden, cl::init(false));
110bdd1243dSDimitry Andric 
111bdd1243dSDimitry Andric //===--- Statistics -------------------------------------------------------===//
112bdd1243dSDimitry Andric 
113bdd1243dSDimitry Andric STATISTIC(NumMetadataCovered, "Metadata attached to covered functions");
114bdd1243dSDimitry Andric STATISTIC(NumMetadataAtomics, "Metadata attached to atomics");
115bdd1243dSDimitry Andric STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions");
116bdd1243dSDimitry Andric 
117bdd1243dSDimitry Andric //===----------------------------------------------------------------------===//
118bdd1243dSDimitry Andric 
119bdd1243dSDimitry Andric // Apply opt overrides.
120bdd1243dSDimitry Andric SanitizerBinaryMetadataOptions &&
121bdd1243dSDimitry Andric transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) {
122bdd1243dSDimitry Andric   Opts.Covered |= ClEmitCovered;
123bdd1243dSDimitry Andric   Opts.Atomics |= ClEmitAtomics;
124bdd1243dSDimitry Andric   Opts.UAR |= ClEmitUAR;
125bdd1243dSDimitry Andric   return std::move(Opts);
126bdd1243dSDimitry Andric }
127bdd1243dSDimitry Andric 
128bdd1243dSDimitry Andric class SanitizerBinaryMetadata {
129bdd1243dSDimitry Andric public:
13006c3fb27SDimitry Andric   SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts,
13106c3fb27SDimitry Andric                           std::unique_ptr<SpecialCaseList> Ignorelist)
132bdd1243dSDimitry Andric       : Mod(M), Options(transformOptionsFromCl(std::move(Opts))),
13306c3fb27SDimitry Andric         Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()),
134*0fca6ea1SDimitry Andric         VersionStr(utostr(getVersion())), IRB(M.getContext()) {
135bdd1243dSDimitry Andric     // FIXME: Make it work with other formats.
136bdd1243dSDimitry Andric     assert(TargetTriple.isOSBinFormatELF() && "ELF only");
13706c3fb27SDimitry Andric     assert(!(TargetTriple.isNVPTX() || TargetTriple.isAMDGPU()) &&
13806c3fb27SDimitry Andric            "Device targets are not supported");
139bdd1243dSDimitry Andric   }
140bdd1243dSDimitry Andric 
141bdd1243dSDimitry Andric   bool run();
142bdd1243dSDimitry Andric 
143bdd1243dSDimitry Andric private:
144bdd1243dSDimitry Andric   uint32_t getVersion() const {
145bdd1243dSDimitry Andric     uint32_t Version = kVersionBase;
146bdd1243dSDimitry Andric     const auto CM = Mod.getCodeModel();
147bdd1243dSDimitry Andric     if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large))
148bdd1243dSDimitry Andric       Version |= kVersionPtrSizeRel;
149bdd1243dSDimitry Andric     return Version;
150bdd1243dSDimitry Andric   }
151bdd1243dSDimitry Andric 
152bdd1243dSDimitry Andric   void runOn(Function &F, MetadataInfoSet &MIS);
153bdd1243dSDimitry Andric 
154bdd1243dSDimitry Andric   // Determines which set of metadata to collect for this instruction.
155bdd1243dSDimitry Andric   //
156bdd1243dSDimitry Andric   // Returns true if covered metadata is required to unambiguously interpret
157bdd1243dSDimitry Andric   // other metadata. For example, if we are interested in atomics metadata, any
158bdd1243dSDimitry Andric   // function with memory operations (atomic or not) requires covered metadata
159bdd1243dSDimitry Andric   // to determine if a memory operation is atomic or not in modules compiled
160bdd1243dSDimitry Andric   // with SanitizerBinaryMetadata.
161bdd1243dSDimitry Andric   bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
16206c3fb27SDimitry Andric              uint64_t &FeatureMask);
163bdd1243dSDimitry Andric 
164bdd1243dSDimitry Andric   // Get start/end section marker pointer.
165bdd1243dSDimitry Andric   GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
166bdd1243dSDimitry Andric 
167bdd1243dSDimitry Andric   // Returns the target-dependent section name.
168bdd1243dSDimitry Andric   StringRef getSectionName(StringRef SectionSuffix);
169bdd1243dSDimitry Andric 
170bdd1243dSDimitry Andric   // Returns the section start marker name.
171*0fca6ea1SDimitry Andric   StringRef getSectionStart(StringRef SectionSuffix);
172bdd1243dSDimitry Andric 
173bdd1243dSDimitry Andric   // Returns the section end marker name.
174*0fca6ea1SDimitry Andric   StringRef getSectionEnd(StringRef SectionSuffix);
175bdd1243dSDimitry Andric 
17606c3fb27SDimitry Andric   // Returns true if the access to the address should be considered "atomic".
17706c3fb27SDimitry Andric   bool pretendAtomicAccess(const Value *Addr);
17806c3fb27SDimitry Andric 
179bdd1243dSDimitry Andric   Module &Mod;
180bdd1243dSDimitry Andric   const SanitizerBinaryMetadataOptions Options;
18106c3fb27SDimitry Andric   std::unique_ptr<SpecialCaseList> Ignorelist;
182bdd1243dSDimitry Andric   const Triple TargetTriple;
183*0fca6ea1SDimitry Andric   const std::string VersionStr;
184bdd1243dSDimitry Andric   IRBuilder<> IRB;
18506c3fb27SDimitry Andric   BumpPtrAllocator Alloc;
18606c3fb27SDimitry Andric   UniqueStringSaver StringPool{Alloc};
187bdd1243dSDimitry Andric };
188bdd1243dSDimitry Andric 
189bdd1243dSDimitry Andric bool SanitizerBinaryMetadata::run() {
190bdd1243dSDimitry Andric   MetadataInfoSet MIS;
191bdd1243dSDimitry Andric 
192bdd1243dSDimitry Andric   for (Function &F : Mod)
193bdd1243dSDimitry Andric     runOn(F, MIS);
194bdd1243dSDimitry Andric 
195bdd1243dSDimitry Andric   if (MIS.empty())
196bdd1243dSDimitry Andric     return false;
197bdd1243dSDimitry Andric 
198bdd1243dSDimitry Andric   //
199bdd1243dSDimitry Andric   // Setup constructors and call all initialization functions for requested
200bdd1243dSDimitry Andric   // metadata features.
201bdd1243dSDimitry Andric   //
202bdd1243dSDimitry Andric 
2035f757f3fSDimitry Andric   auto *PtrTy = IRB.getPtrTy();
204bdd1243dSDimitry Andric   auto *Int32Ty = IRB.getInt32Ty();
2055f757f3fSDimitry Andric   const std::array<Type *, 3> InitTypes = {Int32Ty, PtrTy, PtrTy};
206bdd1243dSDimitry Andric   auto *Version = ConstantInt::get(Int32Ty, getVersion());
207bdd1243dSDimitry Andric 
208bdd1243dSDimitry Andric   for (const MetadataInfo *MI : MIS) {
209bdd1243dSDimitry Andric     const std::array<Value *, InitTypes.size()> InitArgs = {
210bdd1243dSDimitry Andric         Version,
2115f757f3fSDimitry Andric         getSectionMarker(getSectionStart(MI->SectionSuffix), PtrTy),
2125f757f3fSDimitry Andric         getSectionMarker(getSectionEnd(MI->SectionSuffix), PtrTy),
213bdd1243dSDimitry Andric     };
214*0fca6ea1SDimitry Andric 
215*0fca6ea1SDimitry Andric     // Calls to the initialization functions with different versions cannot be
216*0fca6ea1SDimitry Andric     // merged. Give the structors unique names based on the version, which will
217*0fca6ea1SDimitry Andric     // also be used as the COMDAT key.
218*0fca6ea1SDimitry Andric     const std::string StructorPrefix = (MI->FunctionPrefix + VersionStr).str();
219*0fca6ea1SDimitry Andric 
220bdd1243dSDimitry Andric     // We declare the _add and _del functions as weak, and only call them if
221bdd1243dSDimitry Andric     // there is a valid symbol linked. This allows building binaries with
222bdd1243dSDimitry Andric     // semantic metadata, but without having callbacks. When a tool that wants
223bdd1243dSDimitry Andric     // the metadata is linked which provides the callbacks, they will be called.
224bdd1243dSDimitry Andric     Function *Ctor =
225bdd1243dSDimitry Andric         createSanitizerCtorAndInitFunctions(
226*0fca6ea1SDimitry Andric             Mod, StructorPrefix + ".module_ctor",
227bdd1243dSDimitry Andric             (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs,
228bdd1243dSDimitry Andric             /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
229bdd1243dSDimitry Andric             .first;
230bdd1243dSDimitry Andric     Function *Dtor =
231bdd1243dSDimitry Andric         createSanitizerCtorAndInitFunctions(
232*0fca6ea1SDimitry Andric             Mod, StructorPrefix + ".module_dtor",
233bdd1243dSDimitry Andric             (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs,
234bdd1243dSDimitry Andric             /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
235bdd1243dSDimitry Andric             .first;
23606c3fb27SDimitry Andric     Constant *CtorComdatKey = nullptr;
23706c3fb27SDimitry Andric     Constant *DtorComdatKey = nullptr;
238bdd1243dSDimitry Andric     if (TargetTriple.supportsCOMDAT()) {
23906c3fb27SDimitry Andric       // Use COMDAT to deduplicate constructor/destructor function. The COMDAT
24006c3fb27SDimitry Andric       // key needs to be a non-local linkage.
241bdd1243dSDimitry Andric       Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName()));
242bdd1243dSDimitry Andric       Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName()));
24306c3fb27SDimitry Andric       Ctor->setLinkage(GlobalValue::ExternalLinkage);
24406c3fb27SDimitry Andric       Dtor->setLinkage(GlobalValue::ExternalLinkage);
24506c3fb27SDimitry Andric       // DSOs should _not_ call another constructor/destructor!
24606c3fb27SDimitry Andric       Ctor->setVisibility(GlobalValue::HiddenVisibility);
24706c3fb27SDimitry Andric       Dtor->setVisibility(GlobalValue::HiddenVisibility);
24806c3fb27SDimitry Andric       CtorComdatKey = Ctor;
24906c3fb27SDimitry Andric       DtorComdatKey = Dtor;
250bdd1243dSDimitry Andric     }
25106c3fb27SDimitry Andric     appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorComdatKey);
25206c3fb27SDimitry Andric     appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorComdatKey);
253bdd1243dSDimitry Andric   }
254bdd1243dSDimitry Andric 
255bdd1243dSDimitry Andric   return true;
256bdd1243dSDimitry Andric }
257bdd1243dSDimitry Andric 
258bdd1243dSDimitry Andric void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
259bdd1243dSDimitry Andric   if (F.empty())
260bdd1243dSDimitry Andric     return;
261bdd1243dSDimitry Andric   if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
262bdd1243dSDimitry Andric     return;
26306c3fb27SDimitry Andric   if (Ignorelist && Ignorelist->inSection("metadata", "fun", F.getName()))
26406c3fb27SDimitry Andric     return;
265bdd1243dSDimitry Andric   // Don't touch available_externally functions, their actual body is elsewhere.
266bdd1243dSDimitry Andric   if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
267bdd1243dSDimitry Andric     return;
268bdd1243dSDimitry Andric 
269bdd1243dSDimitry Andric   MDBuilder MDB(F.getContext());
270bdd1243dSDimitry Andric 
271bdd1243dSDimitry Andric   // The metadata features enabled for this function, stored along covered
272bdd1243dSDimitry Andric   // metadata (if enabled).
27306c3fb27SDimitry Andric   uint64_t FeatureMask = 0;
274bdd1243dSDimitry Andric   // Don't emit unnecessary covered metadata for all functions to save space.
275bdd1243dSDimitry Andric   bool RequiresCovered = false;
27606c3fb27SDimitry Andric 
27706c3fb27SDimitry Andric   if (Options.Atomics || Options.UAR) {
278bdd1243dSDimitry Andric     for (BasicBlock &BB : F)
279bdd1243dSDimitry Andric       for (Instruction &I : BB)
280bdd1243dSDimitry Andric         RequiresCovered |= runOn(I, MIS, MDB, FeatureMask);
281bdd1243dSDimitry Andric   }
282bdd1243dSDimitry Andric 
28306c3fb27SDimitry Andric   if (ClNoSanitize && F.hasFnAttribute("no_sanitize_thread"))
28406c3fb27SDimitry Andric     FeatureMask &= ~kSanitizerBinaryMetadataAtomics;
285bdd1243dSDimitry Andric   if (F.isVarArg())
286bdd1243dSDimitry Andric     FeatureMask &= ~kSanitizerBinaryMetadataUAR;
287bdd1243dSDimitry Andric   if (FeatureMask & kSanitizerBinaryMetadataUAR) {
288bdd1243dSDimitry Andric     RequiresCovered = true;
289bdd1243dSDimitry Andric     NumMetadataUAR++;
290bdd1243dSDimitry Andric   }
291bdd1243dSDimitry Andric 
292bdd1243dSDimitry Andric   // Covered metadata is always emitted if explicitly requested, otherwise only
293bdd1243dSDimitry Andric   // if some other metadata requires it to unambiguously interpret it for
294bdd1243dSDimitry Andric   // modules compiled with SanitizerBinaryMetadata.
295bdd1243dSDimitry Andric   if (Options.Covered || (FeatureMask && RequiresCovered)) {
296bdd1243dSDimitry Andric     NumMetadataCovered++;
297bdd1243dSDimitry Andric     const auto *MI = &MetadataInfo::Covered;
298bdd1243dSDimitry Andric     MIS.insert(MI);
299bdd1243dSDimitry Andric     const StringRef Section = getSectionName(MI->SectionSuffix);
30006c3fb27SDimitry Andric     // The feature mask will be placed after the function size.
30106c3fb27SDimitry Andric     Constant *CFM = IRB.getInt64(FeatureMask);
302bdd1243dSDimitry Andric     F.setMetadata(LLVMContext::MD_pcsections,
303bdd1243dSDimitry Andric                   MDB.createPCSections({{Section, {CFM}}}));
304bdd1243dSDimitry Andric   }
305bdd1243dSDimitry Andric }
306bdd1243dSDimitry Andric 
307bdd1243dSDimitry Andric bool isUARSafeCall(CallInst *CI) {
308bdd1243dSDimitry Andric   auto *F = CI->getCalledFunction();
309bdd1243dSDimitry Andric   // There are no intrinsic functions that leak arguments.
310bdd1243dSDimitry Andric   // If the called function does not return, the current function
311bdd1243dSDimitry Andric   // does not return as well, so no possibility of use-after-return.
312bdd1243dSDimitry Andric   // Sanitizer function also don't leak or don't return.
313bdd1243dSDimitry Andric   // It's safe to both pass pointers to local variables to them
314bdd1243dSDimitry Andric   // and to tail-call them.
315bdd1243dSDimitry Andric   return F && (F->isIntrinsic() || F->doesNotReturn() ||
3165f757f3fSDimitry Andric                F->getName().starts_with("__asan_") ||
3175f757f3fSDimitry Andric                F->getName().starts_with("__hwsan_") ||
3185f757f3fSDimitry Andric                F->getName().starts_with("__ubsan_") ||
3195f757f3fSDimitry Andric                F->getName().starts_with("__msan_") ||
3205f757f3fSDimitry Andric                F->getName().starts_with("__tsan_"));
321bdd1243dSDimitry Andric }
322bdd1243dSDimitry Andric 
323bdd1243dSDimitry Andric bool hasUseAfterReturnUnsafeUses(Value &V) {
324bdd1243dSDimitry Andric   for (User *U : V.users()) {
325bdd1243dSDimitry Andric     if (auto *I = dyn_cast<Instruction>(U)) {
326bdd1243dSDimitry Andric       if (I->isLifetimeStartOrEnd() || I->isDroppable())
327bdd1243dSDimitry Andric         continue;
328bdd1243dSDimitry Andric       if (auto *CI = dyn_cast<CallInst>(U)) {
329bdd1243dSDimitry Andric         if (isUARSafeCall(CI))
330bdd1243dSDimitry Andric           continue;
331bdd1243dSDimitry Andric       }
332bdd1243dSDimitry Andric       if (isa<LoadInst>(U))
333bdd1243dSDimitry Andric         continue;
334bdd1243dSDimitry Andric       if (auto *SI = dyn_cast<StoreInst>(U)) {
335bdd1243dSDimitry Andric         // If storing TO the alloca, then the address isn't taken.
336bdd1243dSDimitry Andric         if (SI->getOperand(1) == &V)
337bdd1243dSDimitry Andric           continue;
338bdd1243dSDimitry Andric       }
339bdd1243dSDimitry Andric       if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
340bdd1243dSDimitry Andric         if (!hasUseAfterReturnUnsafeUses(*GEPI))
341bdd1243dSDimitry Andric           continue;
342bdd1243dSDimitry Andric       } else if (auto *BCI = dyn_cast<BitCastInst>(U)) {
343bdd1243dSDimitry Andric         if (!hasUseAfterReturnUnsafeUses(*BCI))
344bdd1243dSDimitry Andric           continue;
345bdd1243dSDimitry Andric       }
346bdd1243dSDimitry Andric     }
347bdd1243dSDimitry Andric     return true;
348bdd1243dSDimitry Andric   }
349bdd1243dSDimitry Andric   return false;
350bdd1243dSDimitry Andric }
351bdd1243dSDimitry Andric 
352bdd1243dSDimitry Andric bool useAfterReturnUnsafe(Instruction &I) {
353bdd1243dSDimitry Andric   if (isa<AllocaInst>(I))
354bdd1243dSDimitry Andric     return hasUseAfterReturnUnsafeUses(I);
355bdd1243dSDimitry Andric   // Tail-called functions are not necessary intercepted
356bdd1243dSDimitry Andric   // at runtime because there is no call instruction.
357bdd1243dSDimitry Andric   // So conservatively mark the caller as requiring checking.
358bdd1243dSDimitry Andric   else if (auto *CI = dyn_cast<CallInst>(&I))
359bdd1243dSDimitry Andric     return CI->isTailCall() && !isUARSafeCall(CI);
360bdd1243dSDimitry Andric   return false;
361bdd1243dSDimitry Andric }
362bdd1243dSDimitry Andric 
36306c3fb27SDimitry Andric bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) {
36406c3fb27SDimitry Andric   if (!Addr)
36506c3fb27SDimitry Andric     return false;
36606c3fb27SDimitry Andric 
36706c3fb27SDimitry Andric   Addr = Addr->stripInBoundsOffsets();
36806c3fb27SDimitry Andric   auto *GV = dyn_cast<GlobalVariable>(Addr);
36906c3fb27SDimitry Andric   if (!GV)
37006c3fb27SDimitry Andric     return false;
37106c3fb27SDimitry Andric 
37206c3fb27SDimitry Andric   // Some compiler-generated accesses are known racy, to avoid false positives
37306c3fb27SDimitry Andric   // in data-race analysis pretend they're atomic.
37406c3fb27SDimitry Andric   if (GV->hasSection()) {
37506c3fb27SDimitry Andric     const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat();
37606c3fb27SDimitry Andric     const auto ProfSec =
37706c3fb27SDimitry Andric         getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false);
3785f757f3fSDimitry Andric     if (GV->getSection().ends_with(ProfSec))
37906c3fb27SDimitry Andric       return true;
38006c3fb27SDimitry Andric   }
3815f757f3fSDimitry Andric   if (GV->getName().starts_with("__llvm_gcov") ||
3825f757f3fSDimitry Andric       GV->getName().starts_with("__llvm_gcda"))
38306c3fb27SDimitry Andric     return true;
38406c3fb27SDimitry Andric 
38506c3fb27SDimitry Andric   return false;
38606c3fb27SDimitry Andric }
38706c3fb27SDimitry Andric 
38806c3fb27SDimitry Andric // Returns true if the memory at `Addr` may be shared with other threads.
38906c3fb27SDimitry Andric bool maybeSharedMutable(const Value *Addr) {
39006c3fb27SDimitry Andric   // By default assume memory may be shared.
39106c3fb27SDimitry Andric   if (!Addr)
39206c3fb27SDimitry Andric     return true;
39306c3fb27SDimitry Andric 
39406c3fb27SDimitry Andric   if (isa<AllocaInst>(getUnderlyingObject(Addr)) &&
39506c3fb27SDimitry Andric       !PointerMayBeCaptured(Addr, true, true))
39606c3fb27SDimitry Andric     return false; // Object is on stack but does not escape.
39706c3fb27SDimitry Andric 
39806c3fb27SDimitry Andric   Addr = Addr->stripInBoundsOffsets();
39906c3fb27SDimitry Andric   if (auto *GV = dyn_cast<GlobalVariable>(Addr)) {
40006c3fb27SDimitry Andric     if (GV->isConstant())
40106c3fb27SDimitry Andric       return false; // Shared, but not mutable.
40206c3fb27SDimitry Andric   }
40306c3fb27SDimitry Andric 
40406c3fb27SDimitry Andric   return true;
40506c3fb27SDimitry Andric }
40606c3fb27SDimitry Andric 
407bdd1243dSDimitry Andric bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
40806c3fb27SDimitry Andric                                     MDBuilder &MDB, uint64_t &FeatureMask) {
409bdd1243dSDimitry Andric   SmallVector<const MetadataInfo *, 1> InstMetadata;
410bdd1243dSDimitry Andric   bool RequiresCovered = false;
411bdd1243dSDimitry Andric 
41206c3fb27SDimitry Andric   // Only call if at least 1 type of metadata is requested.
41306c3fb27SDimitry Andric   assert(Options.UAR || Options.Atomics);
41406c3fb27SDimitry Andric 
415bdd1243dSDimitry Andric   if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) {
416bdd1243dSDimitry Andric     if (useAfterReturnUnsafe(I))
417bdd1243dSDimitry Andric       FeatureMask |= kSanitizerBinaryMetadataUAR;
418bdd1243dSDimitry Andric   }
419bdd1243dSDimitry Andric 
42006c3fb27SDimitry Andric   if (Options.Atomics) {
42106c3fb27SDimitry Andric     const Value *Addr = nullptr;
42206c3fb27SDimitry Andric     if (auto *SI = dyn_cast<StoreInst>(&I))
42306c3fb27SDimitry Andric       Addr = SI->getPointerOperand();
42406c3fb27SDimitry Andric     else if (auto *LI = dyn_cast<LoadInst>(&I))
42506c3fb27SDimitry Andric       Addr = LI->getPointerOperand();
42606c3fb27SDimitry Andric 
42706c3fb27SDimitry Andric     if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) {
428bdd1243dSDimitry Andric       auto SSID = getAtomicSyncScopeID(&I);
42906c3fb27SDimitry Andric       if ((SSID.has_value() && *SSID != SyncScope::SingleThread) ||
43006c3fb27SDimitry Andric           pretendAtomicAccess(Addr)) {
431bdd1243dSDimitry Andric         NumMetadataAtomics++;
432bdd1243dSDimitry Andric         InstMetadata.push_back(&MetadataInfo::Atomics);
433bdd1243dSDimitry Andric       }
43406c3fb27SDimitry Andric       FeatureMask |= kSanitizerBinaryMetadataAtomics;
435bdd1243dSDimitry Andric       RequiresCovered = true;
436bdd1243dSDimitry Andric     }
43706c3fb27SDimitry Andric   }
438bdd1243dSDimitry Andric 
439bdd1243dSDimitry Andric   // Attach MD_pcsections to instruction.
440bdd1243dSDimitry Andric   if (!InstMetadata.empty()) {
441bdd1243dSDimitry Andric     MIS.insert(InstMetadata.begin(), InstMetadata.end());
442bdd1243dSDimitry Andric     SmallVector<MDBuilder::PCSection, 1> Sections;
443bdd1243dSDimitry Andric     for (const auto &MI : InstMetadata)
444bdd1243dSDimitry Andric       Sections.push_back({getSectionName(MI->SectionSuffix), {}});
445bdd1243dSDimitry Andric     I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections));
446bdd1243dSDimitry Andric   }
447bdd1243dSDimitry Andric 
448bdd1243dSDimitry Andric   return RequiresCovered;
449bdd1243dSDimitry Andric }
450bdd1243dSDimitry Andric 
451bdd1243dSDimitry Andric GlobalVariable *
452bdd1243dSDimitry Andric SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {
453bdd1243dSDimitry Andric   // Use ExternalWeak so that if all sections are discarded due to section
454bdd1243dSDimitry Andric   // garbage collection, the linker will not report undefined symbol errors.
455bdd1243dSDimitry Andric   auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false,
456bdd1243dSDimitry Andric                                     GlobalVariable::ExternalWeakLinkage,
457bdd1243dSDimitry Andric                                     /*Initializer=*/nullptr, MarkerName);
458bdd1243dSDimitry Andric   Marker->setVisibility(GlobalValue::HiddenVisibility);
459bdd1243dSDimitry Andric   return Marker;
460bdd1243dSDimitry Andric }
461bdd1243dSDimitry Andric 
462bdd1243dSDimitry Andric StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
46306c3fb27SDimitry Andric   // FIXME: Other TargetTriples.
46406c3fb27SDimitry Andric   // Request ULEB128 encoding for all integer constants.
465*0fca6ea1SDimitry Andric   return StringPool.save(SectionSuffix + VersionStr + "!C");
466bdd1243dSDimitry Andric }
467bdd1243dSDimitry Andric 
468*0fca6ea1SDimitry Andric StringRef SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {
469*0fca6ea1SDimitry Andric   // Twine only concatenates 2 strings; with >2 strings, concatenating them
470*0fca6ea1SDimitry Andric   // creates Twine temporaries, and returning the final Twine no longer works
471*0fca6ea1SDimitry Andric   // because we'd end up with a stack-use-after-return. So here we also use the
472*0fca6ea1SDimitry Andric   // StringPool to store the new string.
473*0fca6ea1SDimitry Andric   return StringPool.save("__start_" + SectionSuffix + VersionStr);
474bdd1243dSDimitry Andric }
475bdd1243dSDimitry Andric 
476*0fca6ea1SDimitry Andric StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
477*0fca6ea1SDimitry Andric   return StringPool.save("__stop_" + SectionSuffix + VersionStr);
478bdd1243dSDimitry Andric }
479bdd1243dSDimitry Andric 
480bdd1243dSDimitry Andric } // namespace
481bdd1243dSDimitry Andric 
482bdd1243dSDimitry Andric SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
48306c3fb27SDimitry Andric     SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles)
48406c3fb27SDimitry Andric     : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {}
485bdd1243dSDimitry Andric 
486bdd1243dSDimitry Andric PreservedAnalyses
487bdd1243dSDimitry Andric SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {
48806c3fb27SDimitry Andric   std::unique_ptr<SpecialCaseList> Ignorelist;
48906c3fb27SDimitry Andric   if (!IgnorelistFiles.empty()) {
49006c3fb27SDimitry Andric     Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles,
49106c3fb27SDimitry Andric                                               *vfs::getRealFileSystem());
49206c3fb27SDimitry Andric     if (Ignorelist->inSection("metadata", "src", M.getSourceFileName()))
49306c3fb27SDimitry Andric       return PreservedAnalyses::all();
49406c3fb27SDimitry Andric   }
49506c3fb27SDimitry Andric 
49606c3fb27SDimitry Andric   SanitizerBinaryMetadata Pass(M, Options, std::move(Ignorelist));
497bdd1243dSDimitry Andric   if (Pass.run())
498bdd1243dSDimitry Andric     return PreservedAnalyses::none();
499bdd1243dSDimitry Andric   return PreservedAnalyses::all();
500bdd1243dSDimitry Andric }
501