1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "FileAnalysis.h" 10 #include "GraphBuilder.h" 11 12 #include "llvm/BinaryFormat/ELF.h" 13 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 14 #include "llvm/MC/MCAsmInfo.h" 15 #include "llvm/MC/MCContext.h" 16 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 17 #include "llvm/MC/MCInst.h" 18 #include "llvm/MC/MCInstPrinter.h" 19 #include "llvm/MC/MCInstrAnalysis.h" 20 #include "llvm/MC/MCInstrDesc.h" 21 #include "llvm/MC/MCInstrInfo.h" 22 #include "llvm/MC/MCObjectFileInfo.h" 23 #include "llvm/MC/MCRegisterInfo.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/Object/Binary.h" 26 #include "llvm/Object/COFF.h" 27 #include "llvm/Object/ELFObjectFile.h" 28 #include "llvm/Object/ObjectFile.h" 29 #include "llvm/Support/Casting.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Error.h" 32 #include "llvm/Support/MemoryBuffer.h" 33 #include "llvm/Support/TargetRegistry.h" 34 #include "llvm/Support/TargetSelect.h" 35 #include "llvm/Support/raw_ostream.h" 36 37 38 using Instr = llvm::cfi_verify::FileAnalysis::Instr; 39 using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer; 40 41 namespace llvm { 42 namespace cfi_verify { 43 44 bool IgnoreDWARFFlag; 45 46 static cl::opt<bool, true> IgnoreDWARFArg( 47 "ignore-dwarf", 48 cl::desc( 49 "Ignore all DWARF data. This relaxes the requirements for all " 50 "statically linked libraries to have been compiled with '-g', but " 51 "will result in false positives for 'CFI unprotected' instructions."), 52 cl::location(IgnoreDWARFFlag), cl::init(false)); 53 54 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) { 55 switch (Status) { 56 case CFIProtectionStatus::PROTECTED: 57 return "PROTECTED"; 58 case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF: 59 return "FAIL_NOT_INDIRECT_CF"; 60 case CFIProtectionStatus::FAIL_ORPHANS: 61 return "FAIL_ORPHANS"; 62 case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH: 63 return "FAIL_BAD_CONDITIONAL_BRANCH"; 64 case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED: 65 return "FAIL_REGISTER_CLOBBERED"; 66 case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION: 67 return "FAIL_INVALID_INSTRUCTION"; 68 } 69 llvm_unreachable("Attempted to stringify an unknown enum value."); 70 } 71 72 Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) { 73 // Open the filename provided. 74 Expected<object::OwningBinary<object::Binary>> BinaryOrErr = 75 object::createBinary(Filename); 76 if (!BinaryOrErr) 77 return BinaryOrErr.takeError(); 78 79 // Construct the object and allow it to take ownership of the binary. 80 object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get()); 81 FileAnalysis Analysis(std::move(Binary)); 82 83 Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary()); 84 if (!Analysis.Object) 85 return make_error<UnsupportedDisassembly>("Failed to cast object"); 86 87 switch (Analysis.Object->getArch()) { 88 case Triple::x86: 89 case Triple::x86_64: 90 case Triple::aarch64: 91 case Triple::aarch64_be: 92 break; 93 default: 94 return make_error<UnsupportedDisassembly>("Unsupported architecture."); 95 } 96 97 Analysis.ObjectTriple = Analysis.Object->makeTriple(); 98 Analysis.Features = Analysis.Object->getFeatures(); 99 100 // Init the rest of the object. 101 if (auto InitResponse = Analysis.initialiseDisassemblyMembers()) 102 return std::move(InitResponse); 103 104 if (auto SectionParseResponse = Analysis.parseCodeSections()) 105 return std::move(SectionParseResponse); 106 107 if (auto SymbolTableParseResponse = Analysis.parseSymbolTable()) 108 return std::move(SymbolTableParseResponse); 109 110 return std::move(Analysis); 111 } 112 113 FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary) 114 : Binary(std::move(Binary)) {} 115 116 FileAnalysis::FileAnalysis(const Triple &ObjectTriple, 117 const SubtargetFeatures &Features) 118 : ObjectTriple(ObjectTriple), Features(Features) {} 119 120 const Instr * 121 FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const { 122 std::map<uint64_t, Instr>::const_iterator KV = 123 Instructions.find(InstrMeta.VMAddress); 124 if (KV == Instructions.end() || KV == Instructions.begin()) 125 return nullptr; 126 127 if (!(--KV)->second.Valid) 128 return nullptr; 129 130 return &KV->second; 131 } 132 133 const Instr * 134 FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const { 135 std::map<uint64_t, Instr>::const_iterator KV = 136 Instructions.find(InstrMeta.VMAddress); 137 if (KV == Instructions.end() || ++KV == Instructions.end()) 138 return nullptr; 139 140 if (!KV->second.Valid) 141 return nullptr; 142 143 return &KV->second; 144 } 145 146 bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const { 147 for (const auto &Operand : InstrMeta.Instruction) { 148 if (Operand.isReg()) 149 return true; 150 } 151 return false; 152 } 153 154 const Instr *FileAnalysis::getInstruction(uint64_t Address) const { 155 const auto &InstrKV = Instructions.find(Address); 156 if (InstrKV == Instructions.end()) 157 return nullptr; 158 159 return &InstrKV->second; 160 } 161 162 const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const { 163 const auto &InstrKV = Instructions.find(Address); 164 assert(InstrKV != Instructions.end() && "Address doesn't exist."); 165 return InstrKV->second; 166 } 167 168 bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const { 169 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 170 return InstrDesc.isTrap() || willTrapOnCFIViolation(InstrMeta); 171 } 172 173 bool FileAnalysis::willTrapOnCFIViolation(const Instr &InstrMeta) const { 174 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 175 if (!InstrDesc.isCall()) 176 return false; 177 uint64_t Target; 178 if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, 179 InstrMeta.InstructionSize, Target)) 180 return false; 181 return TrapOnFailFunctionAddresses.count(Target) > 0; 182 } 183 184 bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const { 185 if (!InstrMeta.Valid) 186 return false; 187 188 if (isCFITrap(InstrMeta)) 189 return false; 190 191 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 192 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) 193 return InstrDesc.isConditionalBranch(); 194 195 return true; 196 } 197 198 const Instr * 199 FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const { 200 if (!InstrMeta.Valid) 201 return nullptr; 202 203 if (isCFITrap(InstrMeta)) 204 return nullptr; 205 206 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 207 const Instr *NextMetaPtr; 208 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) { 209 if (InstrDesc.isConditionalBranch()) 210 return nullptr; 211 212 uint64_t Target; 213 if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, 214 InstrMeta.InstructionSize, Target)) 215 return nullptr; 216 217 NextMetaPtr = getInstruction(Target); 218 } else { 219 NextMetaPtr = 220 getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize); 221 } 222 223 if (!NextMetaPtr || !NextMetaPtr->Valid) 224 return nullptr; 225 226 return NextMetaPtr; 227 } 228 229 std::set<const Instr *> 230 FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const { 231 std::set<const Instr *> CFCrossReferences; 232 const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta); 233 234 if (PrevInstruction && canFallThrough(*PrevInstruction)) 235 CFCrossReferences.insert(PrevInstruction); 236 237 const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress); 238 if (TargetRefsKV == StaticBranchTargetings.end()) 239 return CFCrossReferences; 240 241 for (uint64_t SourceInstrAddress : TargetRefsKV->second) { 242 const auto &SourceInstrKV = Instructions.find(SourceInstrAddress); 243 if (SourceInstrKV == Instructions.end()) { 244 errs() << "Failed to find source instruction at address " 245 << format_hex(SourceInstrAddress, 2) 246 << " for the cross-reference to instruction at address " 247 << format_hex(InstrMeta.VMAddress, 2) << ".\n"; 248 continue; 249 } 250 251 CFCrossReferences.insert(&SourceInstrKV->second); 252 } 253 254 return CFCrossReferences; 255 } 256 257 const std::set<object::SectionedAddress> & 258 FileAnalysis::getIndirectInstructions() const { 259 return IndirectInstructions; 260 } 261 262 const MCRegisterInfo *FileAnalysis::getRegisterInfo() const { 263 return RegisterInfo.get(); 264 } 265 266 const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); } 267 268 const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const { 269 return MIA.get(); 270 } 271 272 Expected<DIInliningInfo> 273 FileAnalysis::symbolizeInlinedCode(object::SectionedAddress Address) { 274 assert(Symbolizer != nullptr && "Symbolizer is invalid."); 275 276 return Symbolizer->symbolizeInlinedCode(Object->getFileName(), Address); 277 } 278 279 CFIProtectionStatus 280 FileAnalysis::validateCFIProtection(const GraphResult &Graph) const { 281 const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress); 282 if (!InstrMetaPtr) 283 return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION; 284 285 const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode()); 286 if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo)) 287 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; 288 289 if (!usesRegisterOperand(*InstrMetaPtr)) 290 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; 291 292 if (!Graph.OrphanedNodes.empty()) 293 return CFIProtectionStatus::FAIL_ORPHANS; 294 295 for (const auto &BranchNode : Graph.ConditionalBranchNodes) { 296 if (!BranchNode.CFIProtection) 297 return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH; 298 } 299 300 if (indirectCFOperandClobber(Graph) != Graph.BaseAddress) 301 return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED; 302 303 return CFIProtectionStatus::PROTECTED; 304 } 305 306 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const { 307 assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty."); 308 309 // Get the set of registers we must check to ensure they're not clobbered. 310 const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress); 311 DenseSet<unsigned> RegisterNumbers; 312 for (const auto &Operand : IndirectCF.Instruction) { 313 if (Operand.isReg()) 314 RegisterNumbers.insert(Operand.getReg()); 315 } 316 assert(RegisterNumbers.size() && "Zero register operands on indirect CF."); 317 318 // Now check all branches to indirect CFs and ensure no clobbering happens. 319 for (const auto &Branch : Graph.ConditionalBranchNodes) { 320 uint64_t Node; 321 if (Branch.IndirectCFIsOnTargetPath) 322 Node = Branch.Target; 323 else 324 Node = Branch.Fallthrough; 325 326 // Some architectures (e.g., AArch64) cannot load in an indirect branch, so 327 // we allow them one load. 328 bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad(); 329 330 // We walk backwards from the indirect CF. It is the last node returned by 331 // Graph.flattenAddress, so we skip it since we already handled it. 332 DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers; 333 std::vector<uint64_t> Nodes = Graph.flattenAddress(Node); 334 for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) { 335 Node = *I; 336 const Instr &NodeInstr = getInstructionOrDie(Node); 337 const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode()); 338 339 for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end(); 340 RI != RE; ++RI) { 341 unsigned RegNum = *RI; 342 if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum, 343 *RegisterInfo)) { 344 if (!canLoad || !InstrDesc.mayLoad()) 345 return Node; 346 canLoad = false; 347 CurRegisterNumbers.erase(RI); 348 // Add the registers this load reads to those we check for clobbers. 349 for (unsigned i = InstrDesc.getNumDefs(), 350 e = InstrDesc.getNumOperands(); i != e; i++) { 351 const auto Operand = NodeInstr.Instruction.getOperand(i); 352 if (Operand.isReg()) 353 CurRegisterNumbers.insert(Operand.getReg()); 354 } 355 break; 356 } 357 } 358 } 359 } 360 361 return Graph.BaseAddress; 362 } 363 364 void FileAnalysis::printInstruction(const Instr &InstrMeta, 365 raw_ostream &OS) const { 366 Printer->printInst(&InstrMeta.Instruction, OS, "", *SubtargetInfo.get()); 367 } 368 369 Error FileAnalysis::initialiseDisassemblyMembers() { 370 std::string TripleName = ObjectTriple.getTriple(); 371 ArchName = ""; 372 MCPU = ""; 373 std::string ErrorString; 374 375 Symbolizer.reset(new LLVMSymbolizer()); 376 377 ObjectTarget = 378 TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString); 379 if (!ObjectTarget) 380 return make_error<UnsupportedDisassembly>( 381 (Twine("Couldn't find target \"") + ObjectTriple.getTriple() + 382 "\", failed with error: " + ErrorString) 383 .str()); 384 385 RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName)); 386 if (!RegisterInfo) 387 return make_error<UnsupportedDisassembly>( 388 "Failed to initialise RegisterInfo."); 389 390 AsmInfo.reset(ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName)); 391 if (!AsmInfo) 392 return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo."); 393 394 SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo( 395 TripleName, MCPU, Features.getString())); 396 if (!SubtargetInfo) 397 return make_error<UnsupportedDisassembly>( 398 "Failed to initialise SubtargetInfo."); 399 400 MII.reset(ObjectTarget->createMCInstrInfo()); 401 if (!MII) 402 return make_error<UnsupportedDisassembly>("Failed to initialise MII."); 403 404 Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI)); 405 406 Disassembler.reset( 407 ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context)); 408 409 if (!Disassembler) 410 return make_error<UnsupportedDisassembly>( 411 "No disassembler available for target"); 412 413 MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get())); 414 415 Printer.reset(ObjectTarget->createMCInstPrinter( 416 ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII, 417 *RegisterInfo)); 418 419 return Error::success(); 420 } 421 422 Error FileAnalysis::parseCodeSections() { 423 if (!IgnoreDWARFFlag) { 424 std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object); 425 if (!DWARF) 426 return make_error<StringError>("Could not create DWARF information.", 427 inconvertibleErrorCode()); 428 429 bool LineInfoValid = false; 430 431 for (auto &Unit : DWARF->compile_units()) { 432 const auto &LineTable = DWARF->getLineTableForUnit(Unit.get()); 433 if (LineTable && !LineTable->Rows.empty()) { 434 LineInfoValid = true; 435 break; 436 } 437 } 438 439 if (!LineInfoValid) 440 return make_error<StringError>( 441 "DWARF line information missing. Did you compile with '-g'?", 442 inconvertibleErrorCode()); 443 } 444 445 for (const object::SectionRef &Section : Object->sections()) { 446 // Ensure only executable sections get analysed. 447 if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR)) 448 continue; 449 450 // Avoid checking the PLT since it produces spurious failures on AArch64 451 // when ignoring DWARF data. 452 StringRef SectionName; 453 if (!Section.getName(SectionName) && SectionName == ".plt") 454 continue; 455 456 Expected<StringRef> Contents = Section.getContents(); 457 if (!Contents) 458 return Contents.takeError(); 459 ArrayRef<uint8_t> SectionBytes = arrayRefFromStringRef(*Contents); 460 461 parseSectionContents(SectionBytes, 462 {Section.getAddress(), Section.getIndex()}); 463 } 464 return Error::success(); 465 } 466 467 void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes, 468 object::SectionedAddress Address) { 469 assert(Symbolizer && "Symbolizer is uninitialised."); 470 MCInst Instruction; 471 Instr InstrMeta; 472 uint64_t InstructionSize; 473 474 for (uint64_t Byte = 0; Byte < SectionBytes.size();) { 475 bool ValidInstruction = 476 Disassembler->getInstruction(Instruction, InstructionSize, 477 SectionBytes.drop_front(Byte), 0, nulls(), 478 outs()) == MCDisassembler::Success; 479 480 Byte += InstructionSize; 481 482 uint64_t VMAddress = Address.Address + Byte - InstructionSize; 483 InstrMeta.Instruction = Instruction; 484 InstrMeta.VMAddress = VMAddress; 485 InstrMeta.InstructionSize = InstructionSize; 486 InstrMeta.Valid = ValidInstruction; 487 488 addInstruction(InstrMeta); 489 490 if (!ValidInstruction) 491 continue; 492 493 // Skip additional parsing for instructions that do not affect the control 494 // flow. 495 const auto &InstrDesc = MII->get(Instruction.getOpcode()); 496 if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo)) 497 continue; 498 499 uint64_t Target; 500 if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) { 501 // If the target can be evaluated, it's not indirect. 502 StaticBranchTargetings[Target].push_back(VMAddress); 503 continue; 504 } 505 506 if (!usesRegisterOperand(InstrMeta)) 507 continue; 508 509 if (InstrDesc.isReturn()) 510 continue; 511 512 // Check if this instruction exists in the range of the DWARF metadata. 513 if (!IgnoreDWARFFlag) { 514 auto LineInfo = Symbolizer->symbolizeCode( 515 Object->getFileName(), {VMAddress, Address.SectionIndex}); 516 if (!LineInfo) { 517 handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) { 518 errs() << "Symbolizer failed to get line: " << E.message() << "\n"; 519 }); 520 continue; 521 } 522 523 if (LineInfo->FileName == DILineInfo::BadString) 524 continue; 525 } 526 527 IndirectInstructions.insert({VMAddress, Address.SectionIndex}); 528 } 529 } 530 531 void FileAnalysis::addInstruction(const Instr &Instruction) { 532 const auto &KV = 533 Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction)); 534 if (!KV.second) { 535 errs() << "Failed to add instruction at address " 536 << format_hex(Instruction.VMAddress, 2) 537 << ": Instruction at this address already exists.\n"; 538 exit(EXIT_FAILURE); 539 } 540 } 541 542 Error FileAnalysis::parseSymbolTable() { 543 // Functions that will trap on CFI violations. 544 SmallSet<StringRef, 4> TrapOnFailFunctions; 545 TrapOnFailFunctions.insert("__cfi_slowpath"); 546 TrapOnFailFunctions.insert("__cfi_slowpath_diag"); 547 TrapOnFailFunctions.insert("abort"); 548 549 // Look through the list of symbols for functions that will trap on CFI 550 // violations. 551 for (auto &Sym : Object->symbols()) { 552 auto SymNameOrErr = Sym.getName(); 553 if (!SymNameOrErr) 554 consumeError(SymNameOrErr.takeError()); 555 else if (TrapOnFailFunctions.count(*SymNameOrErr) > 0) { 556 auto AddrOrErr = Sym.getAddress(); 557 if (!AddrOrErr) 558 consumeError(AddrOrErr.takeError()); 559 else 560 TrapOnFailFunctionAddresses.insert(*AddrOrErr); 561 } 562 } 563 if (auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Object)) { 564 for (const auto &Addr : ElfObject->getPltAddresses()) { 565 object::SymbolRef Sym(Addr.first, Object); 566 auto SymNameOrErr = Sym.getName(); 567 if (!SymNameOrErr) 568 consumeError(SymNameOrErr.takeError()); 569 else if (TrapOnFailFunctions.count(*SymNameOrErr) > 0) 570 TrapOnFailFunctionAddresses.insert(Addr.second); 571 } 572 } 573 return Error::success(); 574 } 575 576 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) : Text(Text) {} 577 578 char UnsupportedDisassembly::ID; 579 void UnsupportedDisassembly::log(raw_ostream &OS) const { 580 OS << "Could not initialise disassembler: " << Text; 581 } 582 583 std::error_code UnsupportedDisassembly::convertToErrorCode() const { 584 return std::error_code(); 585 } 586 587 } // namespace cfi_verify 588 } // namespace llvm 589