xref: /llvm-project/llvm/tools/llvm-cfi-verify/lib/FileAnalysis.h (revision 74ff52345d1203995febf6f090a0ff9e91c92b4a)
1 //===- FileAnalysis.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CFI_VERIFY_FILE_ANALYSIS_H
10 #define LLVM_CFI_VERIFY_FILE_ANALYSIS_H
11 
12 #include "llvm/ADT/DenseMap.h"
13 #include "llvm/ADT/SmallSet.h"
14 #include "llvm/BinaryFormat/ELF.h"
15 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
16 #include "llvm/MC/MCAsmInfo.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
19 #include "llvm/MC/MCInst.h"
20 #include "llvm/MC/MCInstPrinter.h"
21 #include "llvm/MC/MCInstrAnalysis.h"
22 #include "llvm/MC/MCInstrDesc.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCObjectFileInfo.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/MC/MCSubtargetInfo.h"
27 #include "llvm/MC/TargetRegistry.h"
28 #include "llvm/Object/Binary.h"
29 #include "llvm/Object/COFF.h"
30 #include "llvm/Object/ELFObjectFile.h"
31 #include "llvm/Object/ObjectFile.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Error.h"
35 #include "llvm/Support/MemoryBuffer.h"
36 #include "llvm/Support/TargetSelect.h"
37 #include "llvm/Support/raw_ostream.h"
38 
39 #include <functional>
40 #include <set>
41 #include <string>
42 
43 namespace llvm {
44 namespace cfi_verify {
45 
46 struct GraphResult;
47 
48 extern bool IgnoreDWARFFlag;
49 
50 enum class CFIProtectionStatus {
51   // This instruction is protected by CFI.
52   PROTECTED,
53   // The instruction is not an indirect control flow instruction, and thus
54   // shouldn't be protected.
55   FAIL_NOT_INDIRECT_CF,
56   // There is a path to the instruction that was unexpected.
57   FAIL_ORPHANS,
58   // There is a path to the instruction from a conditional branch that does not
59   // properly check the destination for this vcall/icall.
60   FAIL_BAD_CONDITIONAL_BRANCH,
61   // One of the operands of the indirect CF instruction is modified between the
62   // CFI-check and execution.
63   FAIL_REGISTER_CLOBBERED,
64   // The instruction referenced does not exist. This normally indicates an
65   // error in the program, where you try and validate a graph that was created
66   // in a different FileAnalysis object.
67   FAIL_INVALID_INSTRUCTION,
68 };
69 
70 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status);
71 
72 // Disassembler and analysis tool for machine code files. Keeps track of non-
73 // sequential control flows, including indirect control flow instructions.
74 class FileAnalysis {
75 public:
76   // A metadata struct for an instruction.
77   struct Instr {
78     uint64_t VMAddress;       // Virtual memory address of this instruction.
79     MCInst Instruction;       // Instruction.
80     uint64_t InstructionSize; // Size of this instruction.
81     bool Valid; // Is this a valid instruction? If false, Instr::Instruction is
82                 // undefined.
83   };
84 
85   // Construct a FileAnalysis from a file path.
86   static Expected<FileAnalysis> Create(StringRef Filename);
87 
88   // Construct and take ownership of the supplied object. Do not use this
89   // constructor, prefer to use FileAnalysis::Create instead.
90   FileAnalysis(object::OwningBinary<object::Binary> Binary);
91   FileAnalysis() = delete;
92   FileAnalysis(const FileAnalysis &) = delete;
93   FileAnalysis(FileAnalysis &&Other) = default;
94 
95   // Returns the instruction at the provided address. Returns nullptr if there
96   // is no instruction at the provided address.
97   const Instr *getInstruction(uint64_t Address) const;
98 
99   // Returns the instruction at the provided adress, dying if the instruction is
100   // not found.
101   const Instr &getInstructionOrDie(uint64_t Address) const;
102 
103   // Returns a pointer to the previous/next instruction in sequence,
104   // respectively. Returns nullptr if the next/prev instruction doesn't exist,
105   // or if the provided instruction doesn't exist.
106   const Instr *getPrevInstructionSequential(const Instr &InstrMeta) const;
107   const Instr *getNextInstructionSequential(const Instr &InstrMeta) const;
108 
109   // Returns whether this instruction is used by CFI to trap the program.
110   bool isCFITrap(const Instr &InstrMeta) const;
111 
112   // Returns whether this instruction is a call to a function that will trap on
113   // CFI violations (i.e., it serves as a trap in this instance).
114   bool willTrapOnCFIViolation(const Instr &InstrMeta) const;
115 
116   // Returns whether this function can fall through to the next instruction.
117   // Undefined (and bad) instructions cannot fall through, and instruction that
118   // modify the control flow can only fall through if they are conditional
119   // branches or calls.
120   bool canFallThrough(const Instr &InstrMeta) const;
121 
122   // Returns the definitive next instruction. This is different from the next
123   // instruction sequentially as it will follow unconditional branches (assuming
124   // they can be resolved at compile time, i.e. not indirect). This method
125   // returns nullptr if the provided instruction does not transfer control flow
126   // to exactly one instruction that is known deterministically at compile time.
127   // Also returns nullptr if the deterministic target does not exist in this
128   // file.
129   const Instr *getDefiniteNextInstruction(const Instr &InstrMeta) const;
130 
131   // Get a list of deterministic control flows that lead to the provided
132   // instruction. This list includes all static control flow cross-references as
133   // well as the previous instruction if it can fall through.
134   std::set<const Instr *>
135   getDirectControlFlowXRefs(const Instr &InstrMeta) const;
136 
137   // Returns whether this instruction uses a register operand.
138   bool usesRegisterOperand(const Instr &InstrMeta) const;
139 
140   // Returns the list of indirect instructions.
141   const std::set<object::SectionedAddress> &getIndirectInstructions() const;
142 
143   const MCRegisterInfo *getRegisterInfo() const;
144   const MCInstrInfo *getMCInstrInfo() const;
145   const MCInstrAnalysis *getMCInstrAnalysis() const;
146 
147   // Returns the inlining information for the provided address.
148   Expected<DIInliningInfo>
149   symbolizeInlinedCode(object::SectionedAddress Address);
150 
151   // Returns whether the provided Graph represents a protected indirect control
152   // flow instruction in this file.
153   CFIProtectionStatus validateCFIProtection(const GraphResult &Graph) const;
154 
155   // Returns the first place the operand register is clobbered between the CFI-
156   // check and the indirect CF instruction execution. We do this by walking
157   // backwards from the indirect CF and ensuring there is at most one load
158   // involving the operand register (which is the indirect CF itself on x86).
159   // If the register is not modified, returns the address of the indirect CF
160   // instruction. The result is undefined if the provided graph does not fall
161   // under either the FAIL_REGISTER_CLOBBERED or PROTECTED status (see
162   // CFIProtectionStatus).
163   uint64_t indirectCFOperandClobber(const GraphResult& Graph) const;
164 
165   // Prints an instruction to the provided stream using this object's pretty-
166   // printers.
167   void printInstruction(const Instr &InstrMeta, raw_ostream &OS) const;
168 
169 protected:
170   // Construct a blank object with the provided triple and features. Used in
171   // testing, where a sub class will dependency inject protected methods to
172   // allow analysis of raw binary, without requiring a fully valid ELF file.
173   FileAnalysis(const Triple &ObjectTriple, const SubtargetFeatures &Features);
174 
175   // Add an instruction to this object.
176   void addInstruction(const Instr &Instruction);
177 
178   // Disassemble and parse the provided bytes into this object. Instruction
179   // address calculation is done relative to the provided SectionAddress.
180   void parseSectionContents(ArrayRef<uint8_t> SectionBytes,
181                             object::SectionedAddress Address);
182 
183   // Constructs and initialises members required for disassembly.
184   Error initialiseDisassemblyMembers();
185 
186   // Parses code sections from the internal object file. Saves them into the
187   // internal members. Should only be called once by Create().
188   Error parseCodeSections();
189 
190   // Parses the symbol table to look for the addresses of functions that will
191   // trap on CFI violations.
192   Error parseSymbolTable();
193 
194 private:
195   // Members that describe the input file.
196   object::OwningBinary<object::Binary> Binary;
197   const object::ObjectFile *Object = nullptr;
198   Triple ObjectTriple;
199   std::string ArchName;
200   std::string MCPU;
201   const Target *ObjectTarget = nullptr;
202   SubtargetFeatures Features;
203 
204   // Members required for disassembly.
205   std::unique_ptr<const MCRegisterInfo> RegisterInfo;
206   std::unique_ptr<const MCAsmInfo> AsmInfo;
207   std::unique_ptr<MCSubtargetInfo> SubtargetInfo;
208   std::unique_ptr<const MCInstrInfo> MII;
209   std::unique_ptr<MCContext> Context;
210   std::unique_ptr<const MCDisassembler> Disassembler;
211   std::unique_ptr<const MCInstrAnalysis> MIA;
212   std::unique_ptr<MCInstPrinter> Printer;
213 
214   // Symbolizer used for debug information parsing.
215   std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
216 
217   // A mapping between the virtual memory address to the instruction metadata
218   // struct. TODO(hctim): Reimplement this as a sorted vector to avoid per-
219   // insertion allocation.
220   std::map<uint64_t, Instr> Instructions;
221 
222   // Contains a mapping between a specific address, and a list of instructions
223   // that use this address as a branch target (including call instructions).
224   DenseMap<uint64_t, std::vector<uint64_t>> StaticBranchTargetings;
225 
226   // A list of addresses of indirect control flow instructions.
227   std::set<object::SectionedAddress> IndirectInstructions;
228 
229   // The addresses of functions that will trap on CFI violations.
230   SmallSet<uint64_t, 4> TrapOnFailFunctionAddresses;
231 };
232 
233 class UnsupportedDisassembly : public ErrorInfo<UnsupportedDisassembly> {
234 public:
235   static char ID;
236   std::string Text;
237 
238   UnsupportedDisassembly(StringRef Text);
239 
240   void log(raw_ostream &OS) const override;
241   std::error_code convertToErrorCode() const override;
242 };
243 
244 } // namespace cfi_verify
245 } // namespace llvm
246 
247 #endif // LLVM_CFI_VERIFY_FILE_ANALYSIS_H
248