xref: /llvm-project/llvm/tools/llvm-cfi-verify/lib/FileAnalysis.cpp (revision 468919e18231d0c30b5c0f84a87145db06e3554b)
1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "FileAnalysis.h"
10 #include "GraphBuilder.h"
11 
12 #include "llvm/BinaryFormat/ELF.h"
13 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
14 #include "llvm/MC/MCAsmInfo.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCInstPrinter.h"
19 #include "llvm/MC/MCInstrAnalysis.h"
20 #include "llvm/MC/MCInstrDesc.h"
21 #include "llvm/MC/MCInstrInfo.h"
22 #include "llvm/MC/MCObjectFileInfo.h"
23 #include "llvm/MC/MCRegisterInfo.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/Object/Binary.h"
26 #include "llvm/Object/COFF.h"
27 #include "llvm/Object/ELFObjectFile.h"
28 #include "llvm/Object/ObjectFile.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Error.h"
32 #include "llvm/Support/MemoryBuffer.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/TargetSelect.h"
35 #include "llvm/Support/raw_ostream.h"
36 
37 
38 using Instr = llvm::cfi_verify::FileAnalysis::Instr;
39 using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer;
40 
41 namespace llvm {
42 namespace cfi_verify {
43 
44 bool IgnoreDWARFFlag;
45 
46 static cl::opt<bool, true> IgnoreDWARFArg(
47     "ignore-dwarf",
48     cl::desc(
49         "Ignore all DWARF data. This relaxes the requirements for all "
50         "statically linked libraries to have been compiled with '-g', but "
51         "will result in false positives for 'CFI unprotected' instructions."),
52     cl::location(IgnoreDWARFFlag), cl::init(false));
53 
54 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) {
55   switch (Status) {
56   case CFIProtectionStatus::PROTECTED:
57     return "PROTECTED";
58   case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF:
59     return "FAIL_NOT_INDIRECT_CF";
60   case CFIProtectionStatus::FAIL_ORPHANS:
61     return "FAIL_ORPHANS";
62   case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH:
63     return "FAIL_BAD_CONDITIONAL_BRANCH";
64   case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED:
65     return "FAIL_REGISTER_CLOBBERED";
66   case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION:
67     return "FAIL_INVALID_INSTRUCTION";
68   }
69   llvm_unreachable("Attempted to stringify an unknown enum value.");
70 }
71 
72 Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) {
73   // Open the filename provided.
74   Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
75       object::createBinary(Filename);
76   if (!BinaryOrErr)
77     return BinaryOrErr.takeError();
78 
79   // Construct the object and allow it to take ownership of the binary.
80   object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get());
81   FileAnalysis Analysis(std::move(Binary));
82 
83   Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary());
84   if (!Analysis.Object)
85     return make_error<UnsupportedDisassembly>("Failed to cast object");
86 
87   switch (Analysis.Object->getArch()) {
88     case Triple::x86:
89     case Triple::x86_64:
90     case Triple::aarch64:
91     case Triple::aarch64_be:
92       break;
93     default:
94       return make_error<UnsupportedDisassembly>("Unsupported architecture.");
95   }
96 
97   Analysis.ObjectTriple = Analysis.Object->makeTriple();
98   Analysis.Features = Analysis.Object->getFeatures();
99 
100   // Init the rest of the object.
101   if (auto InitResponse = Analysis.initialiseDisassemblyMembers())
102     return std::move(InitResponse);
103 
104   if (auto SectionParseResponse = Analysis.parseCodeSections())
105     return std::move(SectionParseResponse);
106 
107   if (auto SymbolTableParseResponse = Analysis.parseSymbolTable())
108     return std::move(SymbolTableParseResponse);
109 
110   return std::move(Analysis);
111 }
112 
113 FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary)
114     : Binary(std::move(Binary)) {}
115 
116 FileAnalysis::FileAnalysis(const Triple &ObjectTriple,
117                            const SubtargetFeatures &Features)
118     : ObjectTriple(ObjectTriple), Features(Features) {}
119 
120 const Instr *
121 FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const {
122   std::map<uint64_t, Instr>::const_iterator KV =
123       Instructions.find(InstrMeta.VMAddress);
124   if (KV == Instructions.end() || KV == Instructions.begin())
125     return nullptr;
126 
127   if (!(--KV)->second.Valid)
128     return nullptr;
129 
130   return &KV->second;
131 }
132 
133 const Instr *
134 FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const {
135   std::map<uint64_t, Instr>::const_iterator KV =
136       Instructions.find(InstrMeta.VMAddress);
137   if (KV == Instructions.end() || ++KV == Instructions.end())
138     return nullptr;
139 
140   if (!KV->second.Valid)
141     return nullptr;
142 
143   return &KV->second;
144 }
145 
146 bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const {
147   for (const auto &Operand : InstrMeta.Instruction) {
148     if (Operand.isReg())
149       return true;
150   }
151   return false;
152 }
153 
154 const Instr *FileAnalysis::getInstruction(uint64_t Address) const {
155   const auto &InstrKV = Instructions.find(Address);
156   if (InstrKV == Instructions.end())
157     return nullptr;
158 
159   return &InstrKV->second;
160 }
161 
162 const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const {
163   const auto &InstrKV = Instructions.find(Address);
164   assert(InstrKV != Instructions.end() && "Address doesn't exist.");
165   return InstrKV->second;
166 }
167 
168 bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const {
169   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
170   return InstrDesc.isTrap() || willTrapOnCFIViolation(InstrMeta);
171 }
172 
173 bool FileAnalysis::willTrapOnCFIViolation(const Instr &InstrMeta) const {
174   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
175   if (!InstrDesc.isCall())
176     return false;
177   uint64_t Target;
178   if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress,
179                            InstrMeta.InstructionSize, Target))
180     return false;
181   return TrapOnFailFunctionAddresses.count(Target) > 0;
182 }
183 
184 bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const {
185   if (!InstrMeta.Valid)
186     return false;
187 
188   if (isCFITrap(InstrMeta))
189     return false;
190 
191   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
192   if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo))
193     return InstrDesc.isConditionalBranch();
194 
195   return true;
196 }
197 
198 const Instr *
199 FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const {
200   if (!InstrMeta.Valid)
201     return nullptr;
202 
203   if (isCFITrap(InstrMeta))
204     return nullptr;
205 
206   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
207   const Instr *NextMetaPtr;
208   if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) {
209     if (InstrDesc.isConditionalBranch())
210       return nullptr;
211 
212     uint64_t Target;
213     if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress,
214                              InstrMeta.InstructionSize, Target))
215       return nullptr;
216 
217     NextMetaPtr = getInstruction(Target);
218   } else {
219     NextMetaPtr =
220         getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize);
221   }
222 
223   if (!NextMetaPtr || !NextMetaPtr->Valid)
224     return nullptr;
225 
226   return NextMetaPtr;
227 }
228 
229 std::set<const Instr *>
230 FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const {
231   std::set<const Instr *> CFCrossReferences;
232   const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta);
233 
234   if (PrevInstruction && canFallThrough(*PrevInstruction))
235     CFCrossReferences.insert(PrevInstruction);
236 
237   const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress);
238   if (TargetRefsKV == StaticBranchTargetings.end())
239     return CFCrossReferences;
240 
241   for (uint64_t SourceInstrAddress : TargetRefsKV->second) {
242     const auto &SourceInstrKV = Instructions.find(SourceInstrAddress);
243     if (SourceInstrKV == Instructions.end()) {
244       errs() << "Failed to find source instruction at address "
245              << format_hex(SourceInstrAddress, 2)
246              << " for the cross-reference to instruction at address "
247              << format_hex(InstrMeta.VMAddress, 2) << ".\n";
248       continue;
249     }
250 
251     CFCrossReferences.insert(&SourceInstrKV->second);
252   }
253 
254   return CFCrossReferences;
255 }
256 
257 const std::set<object::SectionedAddress> &
258 FileAnalysis::getIndirectInstructions() const {
259   return IndirectInstructions;
260 }
261 
262 const MCRegisterInfo *FileAnalysis::getRegisterInfo() const {
263   return RegisterInfo.get();
264 }
265 
266 const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); }
267 
268 const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const {
269   return MIA.get();
270 }
271 
272 Expected<DIInliningInfo>
273 FileAnalysis::symbolizeInlinedCode(object::SectionedAddress Address) {
274   assert(Symbolizer != nullptr && "Symbolizer is invalid.");
275 
276   return Symbolizer->symbolizeInlinedCode(Object->getFileName(), Address);
277 }
278 
279 CFIProtectionStatus
280 FileAnalysis::validateCFIProtection(const GraphResult &Graph) const {
281   const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress);
282   if (!InstrMetaPtr)
283     return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION;
284 
285   const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode());
286   if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo))
287     return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
288 
289   if (!usesRegisterOperand(*InstrMetaPtr))
290     return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
291 
292   if (!Graph.OrphanedNodes.empty())
293     return CFIProtectionStatus::FAIL_ORPHANS;
294 
295   for (const auto &BranchNode : Graph.ConditionalBranchNodes) {
296     if (!BranchNode.CFIProtection)
297       return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH;
298   }
299 
300   if (indirectCFOperandClobber(Graph) != Graph.BaseAddress)
301     return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED;
302 
303   return CFIProtectionStatus::PROTECTED;
304 }
305 
306 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const {
307   assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty.");
308 
309   // Get the set of registers we must check to ensure they're not clobbered.
310   const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress);
311   DenseSet<unsigned> RegisterNumbers;
312   for (const auto &Operand : IndirectCF.Instruction) {
313     if (Operand.isReg())
314       RegisterNumbers.insert(Operand.getReg());
315   }
316   assert(RegisterNumbers.size() && "Zero register operands on indirect CF.");
317 
318   // Now check all branches to indirect CFs and ensure no clobbering happens.
319   for (const auto &Branch : Graph.ConditionalBranchNodes) {
320     uint64_t Node;
321     if (Branch.IndirectCFIsOnTargetPath)
322       Node = Branch.Target;
323     else
324       Node = Branch.Fallthrough;
325 
326     // Some architectures (e.g., AArch64) cannot load in an indirect branch, so
327     // we allow them one load.
328     bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad();
329 
330     // We walk backwards from the indirect CF.  It is the last node returned by
331     // Graph.flattenAddress, so we skip it since we already handled it.
332     DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers;
333     std::vector<uint64_t> Nodes = Graph.flattenAddress(Node);
334     for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) {
335       Node = *I;
336       const Instr &NodeInstr = getInstructionOrDie(Node);
337       const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode());
338 
339       for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end();
340            RI != RE; ++RI) {
341         unsigned RegNum = *RI;
342         if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum,
343                                       *RegisterInfo)) {
344           if (!canLoad || !InstrDesc.mayLoad())
345             return Node;
346           canLoad = false;
347           CurRegisterNumbers.erase(RI);
348           // Add the registers this load reads to those we check for clobbers.
349           for (unsigned i = InstrDesc.getNumDefs(),
350                         e = InstrDesc.getNumOperands(); i != e; i++) {
351             const auto Operand = NodeInstr.Instruction.getOperand(i);
352             if (Operand.isReg())
353               CurRegisterNumbers.insert(Operand.getReg());
354           }
355           break;
356         }
357       }
358     }
359   }
360 
361   return Graph.BaseAddress;
362 }
363 
364 void FileAnalysis::printInstruction(const Instr &InstrMeta,
365                                     raw_ostream &OS) const {
366   Printer->printInst(&InstrMeta.Instruction, OS, "", *SubtargetInfo.get());
367 }
368 
369 Error FileAnalysis::initialiseDisassemblyMembers() {
370   std::string TripleName = ObjectTriple.getTriple();
371   ArchName = "";
372   MCPU = "";
373   std::string ErrorString;
374 
375   Symbolizer.reset(new LLVMSymbolizer());
376 
377   ObjectTarget =
378       TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString);
379   if (!ObjectTarget)
380     return make_error<UnsupportedDisassembly>(
381         (Twine("Couldn't find target \"") + ObjectTriple.getTriple() +
382          "\", failed with error: " + ErrorString)
383             .str());
384 
385   RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName));
386   if (!RegisterInfo)
387     return make_error<UnsupportedDisassembly>(
388         "Failed to initialise RegisterInfo.");
389 
390   AsmInfo.reset(ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName));
391   if (!AsmInfo)
392     return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo.");
393 
394   SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo(
395       TripleName, MCPU, Features.getString()));
396   if (!SubtargetInfo)
397     return make_error<UnsupportedDisassembly>(
398         "Failed to initialise SubtargetInfo.");
399 
400   MII.reset(ObjectTarget->createMCInstrInfo());
401   if (!MII)
402     return make_error<UnsupportedDisassembly>("Failed to initialise MII.");
403 
404   Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI));
405 
406   Disassembler.reset(
407       ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context));
408 
409   if (!Disassembler)
410     return make_error<UnsupportedDisassembly>(
411         "No disassembler available for target");
412 
413   MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get()));
414 
415   Printer.reset(ObjectTarget->createMCInstPrinter(
416       ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII,
417       *RegisterInfo));
418 
419   return Error::success();
420 }
421 
422 Error FileAnalysis::parseCodeSections() {
423   if (!IgnoreDWARFFlag) {
424     std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object);
425     if (!DWARF)
426       return make_error<StringError>("Could not create DWARF information.",
427                                      inconvertibleErrorCode());
428 
429     bool LineInfoValid = false;
430 
431     for (auto &Unit : DWARF->compile_units()) {
432       const auto &LineTable = DWARF->getLineTableForUnit(Unit.get());
433       if (LineTable && !LineTable->Rows.empty()) {
434         LineInfoValid = true;
435         break;
436       }
437     }
438 
439     if (!LineInfoValid)
440       return make_error<StringError>(
441           "DWARF line information missing. Did you compile with '-g'?",
442           inconvertibleErrorCode());
443   }
444 
445   for (const object::SectionRef &Section : Object->sections()) {
446     // Ensure only executable sections get analysed.
447     if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR))
448       continue;
449 
450     // Avoid checking the PLT since it produces spurious failures on AArch64
451     // when ignoring DWARF data.
452     StringRef SectionName;
453     if (!Section.getName(SectionName) && SectionName == ".plt")
454       continue;
455 
456     Expected<StringRef> Contents = Section.getContents();
457     if (!Contents)
458       return Contents.takeError();
459     ArrayRef<uint8_t> SectionBytes = arrayRefFromStringRef(*Contents);
460 
461     parseSectionContents(SectionBytes,
462                          {Section.getAddress(), Section.getIndex()});
463   }
464   return Error::success();
465 }
466 
467 void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes,
468                                         object::SectionedAddress Address) {
469   assert(Symbolizer && "Symbolizer is uninitialised.");
470   MCInst Instruction;
471   Instr InstrMeta;
472   uint64_t InstructionSize;
473 
474   for (uint64_t Byte = 0; Byte < SectionBytes.size();) {
475     bool ValidInstruction =
476         Disassembler->getInstruction(Instruction, InstructionSize,
477                                      SectionBytes.drop_front(Byte), 0, nulls(),
478                                      outs()) == MCDisassembler::Success;
479 
480     Byte += InstructionSize;
481 
482     uint64_t VMAddress = Address.Address + Byte - InstructionSize;
483     InstrMeta.Instruction = Instruction;
484     InstrMeta.VMAddress = VMAddress;
485     InstrMeta.InstructionSize = InstructionSize;
486     InstrMeta.Valid = ValidInstruction;
487 
488     addInstruction(InstrMeta);
489 
490     if (!ValidInstruction)
491       continue;
492 
493     // Skip additional parsing for instructions that do not affect the control
494     // flow.
495     const auto &InstrDesc = MII->get(Instruction.getOpcode());
496     if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo))
497       continue;
498 
499     uint64_t Target;
500     if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) {
501       // If the target can be evaluated, it's not indirect.
502       StaticBranchTargetings[Target].push_back(VMAddress);
503       continue;
504     }
505 
506     if (!usesRegisterOperand(InstrMeta))
507       continue;
508 
509     if (InstrDesc.isReturn())
510       continue;
511 
512     // Check if this instruction exists in the range of the DWARF metadata.
513     if (!IgnoreDWARFFlag) {
514       auto LineInfo = Symbolizer->symbolizeCode(
515           Object->getFileName(), {VMAddress, Address.SectionIndex});
516       if (!LineInfo) {
517         handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) {
518           errs() << "Symbolizer failed to get line: " << E.message() << "\n";
519         });
520         continue;
521       }
522 
523       if (LineInfo->FileName == "<invalid>")
524         continue;
525     }
526 
527     IndirectInstructions.insert({VMAddress, Address.SectionIndex});
528   }
529 }
530 
531 void FileAnalysis::addInstruction(const Instr &Instruction) {
532   const auto &KV =
533       Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction));
534   if (!KV.second) {
535     errs() << "Failed to add instruction at address "
536            << format_hex(Instruction.VMAddress, 2)
537            << ": Instruction at this address already exists.\n";
538     exit(EXIT_FAILURE);
539   }
540 }
541 
542 Error FileAnalysis::parseSymbolTable() {
543   // Functions that will trap on CFI violations.
544   SmallSet<StringRef, 4> TrapOnFailFunctions;
545   TrapOnFailFunctions.insert("__cfi_slowpath");
546   TrapOnFailFunctions.insert("__cfi_slowpath_diag");
547   TrapOnFailFunctions.insert("abort");
548 
549   // Look through the list of symbols for functions that will trap on CFI
550   // violations.
551   for (auto &Sym : Object->symbols()) {
552     auto SymNameOrErr = Sym.getName();
553     if (!SymNameOrErr)
554       consumeError(SymNameOrErr.takeError());
555     else if (TrapOnFailFunctions.count(*SymNameOrErr) > 0) {
556       auto AddrOrErr = Sym.getAddress();
557       if (!AddrOrErr)
558         consumeError(AddrOrErr.takeError());
559       else
560         TrapOnFailFunctionAddresses.insert(*AddrOrErr);
561     }
562   }
563   if (auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Object)) {
564     for (const auto &Addr : ElfObject->getPltAddresses()) {
565       object::SymbolRef Sym(Addr.first, Object);
566       auto SymNameOrErr = Sym.getName();
567       if (!SymNameOrErr)
568         consumeError(SymNameOrErr.takeError());
569       else if (TrapOnFailFunctions.count(*SymNameOrErr) > 0)
570         TrapOnFailFunctionAddresses.insert(Addr.second);
571     }
572   }
573   return Error::success();
574 }
575 
576 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) : Text(Text) {}
577 
578 char UnsupportedDisassembly::ID;
579 void UnsupportedDisassembly::log(raw_ostream &OS) const {
580   OS << "Could not initialise disassembler: " << Text;
581 }
582 
583 std::error_code UnsupportedDisassembly::convertToErrorCode() const {
584   return std::error_code();
585 }
586 
587 } // namespace cfi_verify
588 } // namespace llvm
589