1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "FileAnalysis.h" 10 #include "GraphBuilder.h" 11 12 #include "llvm/BinaryFormat/ELF.h" 13 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 14 #include "llvm/MC/MCAsmInfo.h" 15 #include "llvm/MC/MCContext.h" 16 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 17 #include "llvm/MC/MCInst.h" 18 #include "llvm/MC/MCInstPrinter.h" 19 #include "llvm/MC/MCInstrAnalysis.h" 20 #include "llvm/MC/MCInstrDesc.h" 21 #include "llvm/MC/MCInstrInfo.h" 22 #include "llvm/MC/MCObjectFileInfo.h" 23 #include "llvm/MC/MCRegisterInfo.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/MC/MCTargetOptions.h" 26 #include "llvm/Object/Binary.h" 27 #include "llvm/Object/COFF.h" 28 #include "llvm/Object/ELFObjectFile.h" 29 #include "llvm/Object/ObjectFile.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Error.h" 33 #include "llvm/Support/MemoryBuffer.h" 34 #include "llvm/Support/TargetRegistry.h" 35 #include "llvm/Support/TargetSelect.h" 36 #include "llvm/Support/raw_ostream.h" 37 38 39 using Instr = llvm::cfi_verify::FileAnalysis::Instr; 40 using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer; 41 42 namespace llvm { 43 namespace cfi_verify { 44 45 bool IgnoreDWARFFlag; 46 47 static cl::opt<bool, true> IgnoreDWARFArg( 48 "ignore-dwarf", 49 cl::desc( 50 "Ignore all DWARF data. This relaxes the requirements for all " 51 "statically linked libraries to have been compiled with '-g', but " 52 "will result in false positives for 'CFI unprotected' instructions."), 53 cl::location(IgnoreDWARFFlag), cl::init(false)); 54 55 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) { 56 switch (Status) { 57 case CFIProtectionStatus::PROTECTED: 58 return "PROTECTED"; 59 case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF: 60 return "FAIL_NOT_INDIRECT_CF"; 61 case CFIProtectionStatus::FAIL_ORPHANS: 62 return "FAIL_ORPHANS"; 63 case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH: 64 return "FAIL_BAD_CONDITIONAL_BRANCH"; 65 case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED: 66 return "FAIL_REGISTER_CLOBBERED"; 67 case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION: 68 return "FAIL_INVALID_INSTRUCTION"; 69 } 70 llvm_unreachable("Attempted to stringify an unknown enum value."); 71 } 72 73 Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) { 74 // Open the filename provided. 75 Expected<object::OwningBinary<object::Binary>> BinaryOrErr = 76 object::createBinary(Filename); 77 if (!BinaryOrErr) 78 return BinaryOrErr.takeError(); 79 80 // Construct the object and allow it to take ownership of the binary. 81 object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get()); 82 FileAnalysis Analysis(std::move(Binary)); 83 84 Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary()); 85 if (!Analysis.Object) 86 return make_error<UnsupportedDisassembly>("Failed to cast object"); 87 88 switch (Analysis.Object->getArch()) { 89 case Triple::x86: 90 case Triple::x86_64: 91 case Triple::aarch64: 92 case Triple::aarch64_be: 93 break; 94 default: 95 return make_error<UnsupportedDisassembly>("Unsupported architecture."); 96 } 97 98 Analysis.ObjectTriple = Analysis.Object->makeTriple(); 99 Analysis.Features = Analysis.Object->getFeatures(); 100 101 // Init the rest of the object. 102 if (auto InitResponse = Analysis.initialiseDisassemblyMembers()) 103 return std::move(InitResponse); 104 105 if (auto SectionParseResponse = Analysis.parseCodeSections()) 106 return std::move(SectionParseResponse); 107 108 if (auto SymbolTableParseResponse = Analysis.parseSymbolTable()) 109 return std::move(SymbolTableParseResponse); 110 111 return std::move(Analysis); 112 } 113 114 FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary) 115 : Binary(std::move(Binary)) {} 116 117 FileAnalysis::FileAnalysis(const Triple &ObjectTriple, 118 const SubtargetFeatures &Features) 119 : ObjectTriple(ObjectTriple), Features(Features) {} 120 121 const Instr * 122 FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const { 123 std::map<uint64_t, Instr>::const_iterator KV = 124 Instructions.find(InstrMeta.VMAddress); 125 if (KV == Instructions.end() || KV == Instructions.begin()) 126 return nullptr; 127 128 if (!(--KV)->second.Valid) 129 return nullptr; 130 131 return &KV->second; 132 } 133 134 const Instr * 135 FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const { 136 std::map<uint64_t, Instr>::const_iterator KV = 137 Instructions.find(InstrMeta.VMAddress); 138 if (KV == Instructions.end() || ++KV == Instructions.end()) 139 return nullptr; 140 141 if (!KV->second.Valid) 142 return nullptr; 143 144 return &KV->second; 145 } 146 147 bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const { 148 for (const auto &Operand : InstrMeta.Instruction) { 149 if (Operand.isReg()) 150 return true; 151 } 152 return false; 153 } 154 155 const Instr *FileAnalysis::getInstruction(uint64_t Address) const { 156 const auto &InstrKV = Instructions.find(Address); 157 if (InstrKV == Instructions.end()) 158 return nullptr; 159 160 return &InstrKV->second; 161 } 162 163 const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const { 164 const auto &InstrKV = Instructions.find(Address); 165 assert(InstrKV != Instructions.end() && "Address doesn't exist."); 166 return InstrKV->second; 167 } 168 169 bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const { 170 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 171 return InstrDesc.isTrap() || willTrapOnCFIViolation(InstrMeta); 172 } 173 174 bool FileAnalysis::willTrapOnCFIViolation(const Instr &InstrMeta) const { 175 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 176 if (!InstrDesc.isCall()) 177 return false; 178 uint64_t Target; 179 if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, 180 InstrMeta.InstructionSize, Target)) 181 return false; 182 return TrapOnFailFunctionAddresses.count(Target) > 0; 183 } 184 185 bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const { 186 if (!InstrMeta.Valid) 187 return false; 188 189 if (isCFITrap(InstrMeta)) 190 return false; 191 192 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 193 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) 194 return InstrDesc.isConditionalBranch(); 195 196 return true; 197 } 198 199 const Instr * 200 FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const { 201 if (!InstrMeta.Valid) 202 return nullptr; 203 204 if (isCFITrap(InstrMeta)) 205 return nullptr; 206 207 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 208 const Instr *NextMetaPtr; 209 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) { 210 if (InstrDesc.isConditionalBranch()) 211 return nullptr; 212 213 uint64_t Target; 214 if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, 215 InstrMeta.InstructionSize, Target)) 216 return nullptr; 217 218 NextMetaPtr = getInstruction(Target); 219 } else { 220 NextMetaPtr = 221 getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize); 222 } 223 224 if (!NextMetaPtr || !NextMetaPtr->Valid) 225 return nullptr; 226 227 return NextMetaPtr; 228 } 229 230 std::set<const Instr *> 231 FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const { 232 std::set<const Instr *> CFCrossReferences; 233 const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta); 234 235 if (PrevInstruction && canFallThrough(*PrevInstruction)) 236 CFCrossReferences.insert(PrevInstruction); 237 238 const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress); 239 if (TargetRefsKV == StaticBranchTargetings.end()) 240 return CFCrossReferences; 241 242 for (uint64_t SourceInstrAddress : TargetRefsKV->second) { 243 const auto &SourceInstrKV = Instructions.find(SourceInstrAddress); 244 if (SourceInstrKV == Instructions.end()) { 245 errs() << "Failed to find source instruction at address " 246 << format_hex(SourceInstrAddress, 2) 247 << " for the cross-reference to instruction at address " 248 << format_hex(InstrMeta.VMAddress, 2) << ".\n"; 249 continue; 250 } 251 252 CFCrossReferences.insert(&SourceInstrKV->second); 253 } 254 255 return CFCrossReferences; 256 } 257 258 const std::set<object::SectionedAddress> & 259 FileAnalysis::getIndirectInstructions() const { 260 return IndirectInstructions; 261 } 262 263 const MCRegisterInfo *FileAnalysis::getRegisterInfo() const { 264 return RegisterInfo.get(); 265 } 266 267 const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); } 268 269 const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const { 270 return MIA.get(); 271 } 272 273 Expected<DIInliningInfo> 274 FileAnalysis::symbolizeInlinedCode(object::SectionedAddress Address) { 275 assert(Symbolizer != nullptr && "Symbolizer is invalid."); 276 277 return Symbolizer->symbolizeInlinedCode(std::string(Object->getFileName()), 278 Address); 279 } 280 281 CFIProtectionStatus 282 FileAnalysis::validateCFIProtection(const GraphResult &Graph) const { 283 const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress); 284 if (!InstrMetaPtr) 285 return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION; 286 287 const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode()); 288 if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo)) 289 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; 290 291 if (!usesRegisterOperand(*InstrMetaPtr)) 292 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; 293 294 if (!Graph.OrphanedNodes.empty()) 295 return CFIProtectionStatus::FAIL_ORPHANS; 296 297 for (const auto &BranchNode : Graph.ConditionalBranchNodes) { 298 if (!BranchNode.CFIProtection) 299 return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH; 300 } 301 302 if (indirectCFOperandClobber(Graph) != Graph.BaseAddress) 303 return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED; 304 305 return CFIProtectionStatus::PROTECTED; 306 } 307 308 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const { 309 assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty."); 310 311 // Get the set of registers we must check to ensure they're not clobbered. 312 const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress); 313 DenseSet<unsigned> RegisterNumbers; 314 for (const auto &Operand : IndirectCF.Instruction) { 315 if (Operand.isReg()) 316 RegisterNumbers.insert(Operand.getReg()); 317 } 318 assert(RegisterNumbers.size() && "Zero register operands on indirect CF."); 319 320 // Now check all branches to indirect CFs and ensure no clobbering happens. 321 for (const auto &Branch : Graph.ConditionalBranchNodes) { 322 uint64_t Node; 323 if (Branch.IndirectCFIsOnTargetPath) 324 Node = Branch.Target; 325 else 326 Node = Branch.Fallthrough; 327 328 // Some architectures (e.g., AArch64) cannot load in an indirect branch, so 329 // we allow them one load. 330 bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad(); 331 332 // We walk backwards from the indirect CF. It is the last node returned by 333 // Graph.flattenAddress, so we skip it since we already handled it. 334 DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers; 335 std::vector<uint64_t> Nodes = Graph.flattenAddress(Node); 336 for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) { 337 Node = *I; 338 const Instr &NodeInstr = getInstructionOrDie(Node); 339 const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode()); 340 341 for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end(); 342 RI != RE; ++RI) { 343 unsigned RegNum = *RI; 344 if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum, 345 *RegisterInfo)) { 346 if (!canLoad || !InstrDesc.mayLoad()) 347 return Node; 348 canLoad = false; 349 CurRegisterNumbers.erase(RI); 350 // Add the registers this load reads to those we check for clobbers. 351 for (unsigned i = InstrDesc.getNumDefs(), 352 e = InstrDesc.getNumOperands(); i != e; i++) { 353 const auto Operand = NodeInstr.Instruction.getOperand(i); 354 if (Operand.isReg()) 355 CurRegisterNumbers.insert(Operand.getReg()); 356 } 357 break; 358 } 359 } 360 } 361 } 362 363 return Graph.BaseAddress; 364 } 365 366 void FileAnalysis::printInstruction(const Instr &InstrMeta, 367 raw_ostream &OS) const { 368 Printer->printInst(&InstrMeta.Instruction, 0, "", *SubtargetInfo.get(), OS); 369 } 370 371 Error FileAnalysis::initialiseDisassemblyMembers() { 372 std::string TripleName = ObjectTriple.getTriple(); 373 ArchName = ""; 374 MCPU = ""; 375 std::string ErrorString; 376 377 Symbolizer.reset(new LLVMSymbolizer()); 378 379 ObjectTarget = 380 TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString); 381 if (!ObjectTarget) 382 return make_error<UnsupportedDisassembly>( 383 (Twine("Couldn't find target \"") + ObjectTriple.getTriple() + 384 "\", failed with error: " + ErrorString) 385 .str()); 386 387 RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName)); 388 if (!RegisterInfo) 389 return make_error<UnsupportedDisassembly>( 390 "Failed to initialise RegisterInfo."); 391 392 MCTargetOptions MCOptions; 393 AsmInfo.reset( 394 ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName, MCOptions)); 395 if (!AsmInfo) 396 return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo."); 397 398 SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo( 399 TripleName, MCPU, Features.getString())); 400 if (!SubtargetInfo) 401 return make_error<UnsupportedDisassembly>( 402 "Failed to initialise SubtargetInfo."); 403 404 MII.reset(ObjectTarget->createMCInstrInfo()); 405 if (!MII) 406 return make_error<UnsupportedDisassembly>("Failed to initialise MII."); 407 408 Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI)); 409 410 Disassembler.reset( 411 ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context)); 412 413 if (!Disassembler) 414 return make_error<UnsupportedDisassembly>( 415 "No disassembler available for target"); 416 417 MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get())); 418 419 Printer.reset(ObjectTarget->createMCInstPrinter( 420 ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII, 421 *RegisterInfo)); 422 423 return Error::success(); 424 } 425 426 Error FileAnalysis::parseCodeSections() { 427 if (!IgnoreDWARFFlag) { 428 std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object); 429 if (!DWARF) 430 return make_error<StringError>("Could not create DWARF information.", 431 inconvertibleErrorCode()); 432 433 bool LineInfoValid = false; 434 435 for (auto &Unit : DWARF->compile_units()) { 436 const auto &LineTable = DWARF->getLineTableForUnit(Unit.get()); 437 if (LineTable && !LineTable->Rows.empty()) { 438 LineInfoValid = true; 439 break; 440 } 441 } 442 443 if (!LineInfoValid) 444 return make_error<StringError>( 445 "DWARF line information missing. Did you compile with '-g'?", 446 inconvertibleErrorCode()); 447 } 448 449 for (const object::SectionRef &Section : Object->sections()) { 450 // Ensure only executable sections get analysed. 451 if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR)) 452 continue; 453 454 // Avoid checking the PLT since it produces spurious failures on AArch64 455 // when ignoring DWARF data. 456 Expected<StringRef> NameOrErr = Section.getName(); 457 if (NameOrErr && *NameOrErr == ".plt") 458 continue; 459 consumeError(NameOrErr.takeError()); 460 461 Expected<StringRef> Contents = Section.getContents(); 462 if (!Contents) 463 return Contents.takeError(); 464 ArrayRef<uint8_t> SectionBytes = arrayRefFromStringRef(*Contents); 465 466 parseSectionContents(SectionBytes, 467 {Section.getAddress(), Section.getIndex()}); 468 } 469 return Error::success(); 470 } 471 472 void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes, 473 object::SectionedAddress Address) { 474 assert(Symbolizer && "Symbolizer is uninitialised."); 475 MCInst Instruction; 476 Instr InstrMeta; 477 uint64_t InstructionSize; 478 479 for (uint64_t Byte = 0; Byte < SectionBytes.size();) { 480 bool ValidInstruction = 481 Disassembler->getInstruction(Instruction, InstructionSize, 482 SectionBytes.drop_front(Byte), 0, 483 outs()) == MCDisassembler::Success; 484 485 Byte += InstructionSize; 486 487 uint64_t VMAddress = Address.Address + Byte - InstructionSize; 488 InstrMeta.Instruction = Instruction; 489 InstrMeta.VMAddress = VMAddress; 490 InstrMeta.InstructionSize = InstructionSize; 491 InstrMeta.Valid = ValidInstruction; 492 493 addInstruction(InstrMeta); 494 495 if (!ValidInstruction) 496 continue; 497 498 // Skip additional parsing for instructions that do not affect the control 499 // flow. 500 const auto &InstrDesc = MII->get(Instruction.getOpcode()); 501 if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo)) 502 continue; 503 504 uint64_t Target; 505 if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) { 506 // If the target can be evaluated, it's not indirect. 507 StaticBranchTargetings[Target].push_back(VMAddress); 508 continue; 509 } 510 511 if (!usesRegisterOperand(InstrMeta)) 512 continue; 513 514 if (InstrDesc.isReturn()) 515 continue; 516 517 // Check if this instruction exists in the range of the DWARF metadata. 518 if (!IgnoreDWARFFlag) { 519 auto LineInfo = 520 Symbolizer->symbolizeCode(std::string(Object->getFileName()), 521 {VMAddress, Address.SectionIndex}); 522 if (!LineInfo) { 523 handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) { 524 errs() << "Symbolizer failed to get line: " << E.message() << "\n"; 525 }); 526 continue; 527 } 528 529 if (LineInfo->FileName == DILineInfo::BadString) 530 continue; 531 } 532 533 IndirectInstructions.insert({VMAddress, Address.SectionIndex}); 534 } 535 } 536 537 void FileAnalysis::addInstruction(const Instr &Instruction) { 538 const auto &KV = 539 Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction)); 540 if (!KV.second) { 541 errs() << "Failed to add instruction at address " 542 << format_hex(Instruction.VMAddress, 2) 543 << ": Instruction at this address already exists.\n"; 544 exit(EXIT_FAILURE); 545 } 546 } 547 548 Error FileAnalysis::parseSymbolTable() { 549 // Functions that will trap on CFI violations. 550 SmallSet<StringRef, 4> TrapOnFailFunctions; 551 TrapOnFailFunctions.insert("__cfi_slowpath"); 552 TrapOnFailFunctions.insert("__cfi_slowpath_diag"); 553 TrapOnFailFunctions.insert("abort"); 554 555 // Look through the list of symbols for functions that will trap on CFI 556 // violations. 557 for (auto &Sym : Object->symbols()) { 558 auto SymNameOrErr = Sym.getName(); 559 if (!SymNameOrErr) 560 consumeError(SymNameOrErr.takeError()); 561 else if (TrapOnFailFunctions.count(*SymNameOrErr) > 0) { 562 auto AddrOrErr = Sym.getAddress(); 563 if (!AddrOrErr) 564 consumeError(AddrOrErr.takeError()); 565 else 566 TrapOnFailFunctionAddresses.insert(*AddrOrErr); 567 } 568 } 569 if (auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Object)) { 570 for (const auto &Addr : ElfObject->getPltAddresses()) { 571 object::SymbolRef Sym(Addr.first, Object); 572 auto SymNameOrErr = Sym.getName(); 573 if (!SymNameOrErr) 574 consumeError(SymNameOrErr.takeError()); 575 else if (TrapOnFailFunctions.count(*SymNameOrErr) > 0) 576 TrapOnFailFunctionAddresses.insert(Addr.second); 577 } 578 } 579 return Error::success(); 580 } 581 582 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) 583 : Text(std::string(Text)) {} 584 585 char UnsupportedDisassembly::ID; 586 void UnsupportedDisassembly::log(raw_ostream &OS) const { 587 OS << "Could not initialise disassembler: " << Text; 588 } 589 590 std::error_code UnsupportedDisassembly::convertToErrorCode() const { 591 return std::error_code(); 592 } 593 594 } // namespace cfi_verify 595 } // namespace llvm 596