1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "FileAnalysis.h" 10 #include "GraphBuilder.h" 11 12 #include "llvm/BinaryFormat/ELF.h" 13 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 14 #include "llvm/MC/MCAsmInfo.h" 15 #include "llvm/MC/MCContext.h" 16 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 17 #include "llvm/MC/MCInst.h" 18 #include "llvm/MC/MCInstPrinter.h" 19 #include "llvm/MC/MCInstrAnalysis.h" 20 #include "llvm/MC/MCInstrDesc.h" 21 #include "llvm/MC/MCInstrInfo.h" 22 #include "llvm/MC/MCObjectFileInfo.h" 23 #include "llvm/MC/MCRegisterInfo.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/MC/MCTargetOptions.h" 26 #include "llvm/Object/Binary.h" 27 #include "llvm/Object/COFF.h" 28 #include "llvm/Object/ELFObjectFile.h" 29 #include "llvm/Object/ObjectFile.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Error.h" 33 #include "llvm/Support/MemoryBuffer.h" 34 #include "llvm/Support/TargetRegistry.h" 35 #include "llvm/Support/TargetSelect.h" 36 #include "llvm/Support/raw_ostream.h" 37 38 39 using Instr = llvm::cfi_verify::FileAnalysis::Instr; 40 using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer; 41 42 namespace llvm { 43 namespace cfi_verify { 44 45 bool IgnoreDWARFFlag; 46 47 static cl::opt<bool, true> IgnoreDWARFArg( 48 "ignore-dwarf", 49 cl::desc( 50 "Ignore all DWARF data. This relaxes the requirements for all " 51 "statically linked libraries to have been compiled with '-g', but " 52 "will result in false positives for 'CFI unprotected' instructions."), 53 cl::location(IgnoreDWARFFlag), cl::init(false)); 54 55 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) { 56 switch (Status) { 57 case CFIProtectionStatus::PROTECTED: 58 return "PROTECTED"; 59 case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF: 60 return "FAIL_NOT_INDIRECT_CF"; 61 case CFIProtectionStatus::FAIL_ORPHANS: 62 return "FAIL_ORPHANS"; 63 case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH: 64 return "FAIL_BAD_CONDITIONAL_BRANCH"; 65 case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED: 66 return "FAIL_REGISTER_CLOBBERED"; 67 case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION: 68 return "FAIL_INVALID_INSTRUCTION"; 69 } 70 llvm_unreachable("Attempted to stringify an unknown enum value."); 71 } 72 73 Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) { 74 // Open the filename provided. 75 Expected<object::OwningBinary<object::Binary>> BinaryOrErr = 76 object::createBinary(Filename); 77 if (!BinaryOrErr) 78 return BinaryOrErr.takeError(); 79 80 // Construct the object and allow it to take ownership of the binary. 81 object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get()); 82 FileAnalysis Analysis(std::move(Binary)); 83 84 Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary()); 85 if (!Analysis.Object) 86 return make_error<UnsupportedDisassembly>("Failed to cast object"); 87 88 switch (Analysis.Object->getArch()) { 89 case Triple::x86: 90 case Triple::x86_64: 91 case Triple::aarch64: 92 case Triple::aarch64_be: 93 break; 94 default: 95 return make_error<UnsupportedDisassembly>("Unsupported architecture."); 96 } 97 98 Analysis.ObjectTriple = Analysis.Object->makeTriple(); 99 Analysis.Features = Analysis.Object->getFeatures(); 100 101 // Init the rest of the object. 102 if (auto InitResponse = Analysis.initialiseDisassemblyMembers()) 103 return std::move(InitResponse); 104 105 if (auto SectionParseResponse = Analysis.parseCodeSections()) 106 return std::move(SectionParseResponse); 107 108 if (auto SymbolTableParseResponse = Analysis.parseSymbolTable()) 109 return std::move(SymbolTableParseResponse); 110 111 return std::move(Analysis); 112 } 113 114 FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary) 115 : Binary(std::move(Binary)) {} 116 117 FileAnalysis::FileAnalysis(const Triple &ObjectTriple, 118 const SubtargetFeatures &Features) 119 : ObjectTriple(ObjectTriple), Features(Features) {} 120 121 const Instr * 122 FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const { 123 std::map<uint64_t, Instr>::const_iterator KV = 124 Instructions.find(InstrMeta.VMAddress); 125 if (KV == Instructions.end() || KV == Instructions.begin()) 126 return nullptr; 127 128 if (!(--KV)->second.Valid) 129 return nullptr; 130 131 return &KV->second; 132 } 133 134 const Instr * 135 FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const { 136 std::map<uint64_t, Instr>::const_iterator KV = 137 Instructions.find(InstrMeta.VMAddress); 138 if (KV == Instructions.end() || ++KV == Instructions.end()) 139 return nullptr; 140 141 if (!KV->second.Valid) 142 return nullptr; 143 144 return &KV->second; 145 } 146 147 bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const { 148 for (const auto &Operand : InstrMeta.Instruction) { 149 if (Operand.isReg()) 150 return true; 151 } 152 return false; 153 } 154 155 const Instr *FileAnalysis::getInstruction(uint64_t Address) const { 156 const auto &InstrKV = Instructions.find(Address); 157 if (InstrKV == Instructions.end()) 158 return nullptr; 159 160 return &InstrKV->second; 161 } 162 163 const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const { 164 const auto &InstrKV = Instructions.find(Address); 165 assert(InstrKV != Instructions.end() && "Address doesn't exist."); 166 return InstrKV->second; 167 } 168 169 bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const { 170 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 171 return InstrDesc.isTrap() || willTrapOnCFIViolation(InstrMeta); 172 } 173 174 bool FileAnalysis::willTrapOnCFIViolation(const Instr &InstrMeta) const { 175 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 176 if (!InstrDesc.isCall()) 177 return false; 178 uint64_t Target; 179 if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, 180 InstrMeta.InstructionSize, Target)) 181 return false; 182 return TrapOnFailFunctionAddresses.count(Target) > 0; 183 } 184 185 bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const { 186 if (!InstrMeta.Valid) 187 return false; 188 189 if (isCFITrap(InstrMeta)) 190 return false; 191 192 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 193 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) 194 return InstrDesc.isConditionalBranch(); 195 196 return true; 197 } 198 199 const Instr * 200 FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const { 201 if (!InstrMeta.Valid) 202 return nullptr; 203 204 if (isCFITrap(InstrMeta)) 205 return nullptr; 206 207 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 208 const Instr *NextMetaPtr; 209 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) { 210 if (InstrDesc.isConditionalBranch()) 211 return nullptr; 212 213 uint64_t Target; 214 if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, 215 InstrMeta.InstructionSize, Target)) 216 return nullptr; 217 218 NextMetaPtr = getInstruction(Target); 219 } else { 220 NextMetaPtr = 221 getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize); 222 } 223 224 if (!NextMetaPtr || !NextMetaPtr->Valid) 225 return nullptr; 226 227 return NextMetaPtr; 228 } 229 230 std::set<const Instr *> 231 FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const { 232 std::set<const Instr *> CFCrossReferences; 233 const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta); 234 235 if (PrevInstruction && canFallThrough(*PrevInstruction)) 236 CFCrossReferences.insert(PrevInstruction); 237 238 const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress); 239 if (TargetRefsKV == StaticBranchTargetings.end()) 240 return CFCrossReferences; 241 242 for (uint64_t SourceInstrAddress : TargetRefsKV->second) { 243 const auto &SourceInstrKV = Instructions.find(SourceInstrAddress); 244 if (SourceInstrKV == Instructions.end()) { 245 errs() << "Failed to find source instruction at address " 246 << format_hex(SourceInstrAddress, 2) 247 << " for the cross-reference to instruction at address " 248 << format_hex(InstrMeta.VMAddress, 2) << ".\n"; 249 continue; 250 } 251 252 CFCrossReferences.insert(&SourceInstrKV->second); 253 } 254 255 return CFCrossReferences; 256 } 257 258 const std::set<object::SectionedAddress> & 259 FileAnalysis::getIndirectInstructions() const { 260 return IndirectInstructions; 261 } 262 263 const MCRegisterInfo *FileAnalysis::getRegisterInfo() const { 264 return RegisterInfo.get(); 265 } 266 267 const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); } 268 269 const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const { 270 return MIA.get(); 271 } 272 273 Expected<DIInliningInfo> 274 FileAnalysis::symbolizeInlinedCode(object::SectionedAddress Address) { 275 assert(Symbolizer != nullptr && "Symbolizer is invalid."); 276 277 return Symbolizer->symbolizeInlinedCode(Object->getFileName(), Address); 278 } 279 280 CFIProtectionStatus 281 FileAnalysis::validateCFIProtection(const GraphResult &Graph) const { 282 const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress); 283 if (!InstrMetaPtr) 284 return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION; 285 286 const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode()); 287 if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo)) 288 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; 289 290 if (!usesRegisterOperand(*InstrMetaPtr)) 291 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; 292 293 if (!Graph.OrphanedNodes.empty()) 294 return CFIProtectionStatus::FAIL_ORPHANS; 295 296 for (const auto &BranchNode : Graph.ConditionalBranchNodes) { 297 if (!BranchNode.CFIProtection) 298 return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH; 299 } 300 301 if (indirectCFOperandClobber(Graph) != Graph.BaseAddress) 302 return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED; 303 304 return CFIProtectionStatus::PROTECTED; 305 } 306 307 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const { 308 assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty."); 309 310 // Get the set of registers we must check to ensure they're not clobbered. 311 const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress); 312 DenseSet<unsigned> RegisterNumbers; 313 for (const auto &Operand : IndirectCF.Instruction) { 314 if (Operand.isReg()) 315 RegisterNumbers.insert(Operand.getReg()); 316 } 317 assert(RegisterNumbers.size() && "Zero register operands on indirect CF."); 318 319 // Now check all branches to indirect CFs and ensure no clobbering happens. 320 for (const auto &Branch : Graph.ConditionalBranchNodes) { 321 uint64_t Node; 322 if (Branch.IndirectCFIsOnTargetPath) 323 Node = Branch.Target; 324 else 325 Node = Branch.Fallthrough; 326 327 // Some architectures (e.g., AArch64) cannot load in an indirect branch, so 328 // we allow them one load. 329 bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad(); 330 331 // We walk backwards from the indirect CF. It is the last node returned by 332 // Graph.flattenAddress, so we skip it since we already handled it. 333 DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers; 334 std::vector<uint64_t> Nodes = Graph.flattenAddress(Node); 335 for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) { 336 Node = *I; 337 const Instr &NodeInstr = getInstructionOrDie(Node); 338 const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode()); 339 340 for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end(); 341 RI != RE; ++RI) { 342 unsigned RegNum = *RI; 343 if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum, 344 *RegisterInfo)) { 345 if (!canLoad || !InstrDesc.mayLoad()) 346 return Node; 347 canLoad = false; 348 CurRegisterNumbers.erase(RI); 349 // Add the registers this load reads to those we check for clobbers. 350 for (unsigned i = InstrDesc.getNumDefs(), 351 e = InstrDesc.getNumOperands(); i != e; i++) { 352 const auto Operand = NodeInstr.Instruction.getOperand(i); 353 if (Operand.isReg()) 354 CurRegisterNumbers.insert(Operand.getReg()); 355 } 356 break; 357 } 358 } 359 } 360 } 361 362 return Graph.BaseAddress; 363 } 364 365 void FileAnalysis::printInstruction(const Instr &InstrMeta, 366 raw_ostream &OS) const { 367 Printer->printInst(&InstrMeta.Instruction, 0, "", *SubtargetInfo.get(), OS); 368 } 369 370 Error FileAnalysis::initialiseDisassemblyMembers() { 371 std::string TripleName = ObjectTriple.getTriple(); 372 ArchName = ""; 373 MCPU = ""; 374 std::string ErrorString; 375 376 Symbolizer.reset(new LLVMSymbolizer()); 377 378 ObjectTarget = 379 TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString); 380 if (!ObjectTarget) 381 return make_error<UnsupportedDisassembly>( 382 (Twine("Couldn't find target \"") + ObjectTriple.getTriple() + 383 "\", failed with error: " + ErrorString) 384 .str()); 385 386 RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName)); 387 if (!RegisterInfo) 388 return make_error<UnsupportedDisassembly>( 389 "Failed to initialise RegisterInfo."); 390 391 MCTargetOptions MCOptions; 392 AsmInfo.reset( 393 ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName, MCOptions)); 394 if (!AsmInfo) 395 return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo."); 396 397 SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo( 398 TripleName, MCPU, Features.getString())); 399 if (!SubtargetInfo) 400 return make_error<UnsupportedDisassembly>( 401 "Failed to initialise SubtargetInfo."); 402 403 MII.reset(ObjectTarget->createMCInstrInfo()); 404 if (!MII) 405 return make_error<UnsupportedDisassembly>("Failed to initialise MII."); 406 407 Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI)); 408 409 Disassembler.reset( 410 ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context)); 411 412 if (!Disassembler) 413 return make_error<UnsupportedDisassembly>( 414 "No disassembler available for target"); 415 416 MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get())); 417 418 Printer.reset(ObjectTarget->createMCInstPrinter( 419 ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII, 420 *RegisterInfo)); 421 422 return Error::success(); 423 } 424 425 Error FileAnalysis::parseCodeSections() { 426 if (!IgnoreDWARFFlag) { 427 std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object); 428 if (!DWARF) 429 return make_error<StringError>("Could not create DWARF information.", 430 inconvertibleErrorCode()); 431 432 bool LineInfoValid = false; 433 434 for (auto &Unit : DWARF->compile_units()) { 435 const auto &LineTable = DWARF->getLineTableForUnit(Unit.get()); 436 if (LineTable && !LineTable->Rows.empty()) { 437 LineInfoValid = true; 438 break; 439 } 440 } 441 442 if (!LineInfoValid) 443 return make_error<StringError>( 444 "DWARF line information missing. Did you compile with '-g'?", 445 inconvertibleErrorCode()); 446 } 447 448 for (const object::SectionRef &Section : Object->sections()) { 449 // Ensure only executable sections get analysed. 450 if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR)) 451 continue; 452 453 // Avoid checking the PLT since it produces spurious failures on AArch64 454 // when ignoring DWARF data. 455 Expected<StringRef> NameOrErr = Section.getName(); 456 if (NameOrErr && *NameOrErr == ".plt") 457 continue; 458 consumeError(NameOrErr.takeError()); 459 460 Expected<StringRef> Contents = Section.getContents(); 461 if (!Contents) 462 return Contents.takeError(); 463 ArrayRef<uint8_t> SectionBytes = arrayRefFromStringRef(*Contents); 464 465 parseSectionContents(SectionBytes, 466 {Section.getAddress(), Section.getIndex()}); 467 } 468 return Error::success(); 469 } 470 471 void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes, 472 object::SectionedAddress Address) { 473 assert(Symbolizer && "Symbolizer is uninitialised."); 474 MCInst Instruction; 475 Instr InstrMeta; 476 uint64_t InstructionSize; 477 478 for (uint64_t Byte = 0; Byte < SectionBytes.size();) { 479 bool ValidInstruction = 480 Disassembler->getInstruction(Instruction, InstructionSize, 481 SectionBytes.drop_front(Byte), 0, 482 outs()) == MCDisassembler::Success; 483 484 Byte += InstructionSize; 485 486 uint64_t VMAddress = Address.Address + Byte - InstructionSize; 487 InstrMeta.Instruction = Instruction; 488 InstrMeta.VMAddress = VMAddress; 489 InstrMeta.InstructionSize = InstructionSize; 490 InstrMeta.Valid = ValidInstruction; 491 492 addInstruction(InstrMeta); 493 494 if (!ValidInstruction) 495 continue; 496 497 // Skip additional parsing for instructions that do not affect the control 498 // flow. 499 const auto &InstrDesc = MII->get(Instruction.getOpcode()); 500 if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo)) 501 continue; 502 503 uint64_t Target; 504 if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) { 505 // If the target can be evaluated, it's not indirect. 506 StaticBranchTargetings[Target].push_back(VMAddress); 507 continue; 508 } 509 510 if (!usesRegisterOperand(InstrMeta)) 511 continue; 512 513 if (InstrDesc.isReturn()) 514 continue; 515 516 // Check if this instruction exists in the range of the DWARF metadata. 517 if (!IgnoreDWARFFlag) { 518 auto LineInfo = Symbolizer->symbolizeCode( 519 Object->getFileName(), {VMAddress, Address.SectionIndex}); 520 if (!LineInfo) { 521 handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) { 522 errs() << "Symbolizer failed to get line: " << E.message() << "\n"; 523 }); 524 continue; 525 } 526 527 if (LineInfo->FileName == DILineInfo::BadString) 528 continue; 529 } 530 531 IndirectInstructions.insert({VMAddress, Address.SectionIndex}); 532 } 533 } 534 535 void FileAnalysis::addInstruction(const Instr &Instruction) { 536 const auto &KV = 537 Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction)); 538 if (!KV.second) { 539 errs() << "Failed to add instruction at address " 540 << format_hex(Instruction.VMAddress, 2) 541 << ": Instruction at this address already exists.\n"; 542 exit(EXIT_FAILURE); 543 } 544 } 545 546 Error FileAnalysis::parseSymbolTable() { 547 // Functions that will trap on CFI violations. 548 SmallSet<StringRef, 4> TrapOnFailFunctions; 549 TrapOnFailFunctions.insert("__cfi_slowpath"); 550 TrapOnFailFunctions.insert("__cfi_slowpath_diag"); 551 TrapOnFailFunctions.insert("abort"); 552 553 // Look through the list of symbols for functions that will trap on CFI 554 // violations. 555 for (auto &Sym : Object->symbols()) { 556 auto SymNameOrErr = Sym.getName(); 557 if (!SymNameOrErr) 558 consumeError(SymNameOrErr.takeError()); 559 else if (TrapOnFailFunctions.count(*SymNameOrErr) > 0) { 560 auto AddrOrErr = Sym.getAddress(); 561 if (!AddrOrErr) 562 consumeError(AddrOrErr.takeError()); 563 else 564 TrapOnFailFunctionAddresses.insert(*AddrOrErr); 565 } 566 } 567 if (auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Object)) { 568 for (const auto &Addr : ElfObject->getPltAddresses()) { 569 object::SymbolRef Sym(Addr.first, Object); 570 auto SymNameOrErr = Sym.getName(); 571 if (!SymNameOrErr) 572 consumeError(SymNameOrErr.takeError()); 573 else if (TrapOnFailFunctions.count(*SymNameOrErr) > 0) 574 TrapOnFailFunctionAddresses.insert(Addr.second); 575 } 576 } 577 return Error::success(); 578 } 579 580 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) : Text(Text) {} 581 582 char UnsupportedDisassembly::ID; 583 void UnsupportedDisassembly::log(raw_ostream &OS) const { 584 OS << "Could not initialise disassembler: " << Text; 585 } 586 587 std::error_code UnsupportedDisassembly::convertToErrorCode() const { 588 return std::error_code(); 589 } 590 591 } // namespace cfi_verify 592 } // namespace llvm 593