1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86EncodingOptimization.h" 11 #include "MCTargetDesc/X86FixupKinds.h" 12 #include "llvm/ADT/StringSwitch.h" 13 #include "llvm/BinaryFormat/ELF.h" 14 #include "llvm/BinaryFormat/MachO.h" 15 #include "llvm/MC/MCAsmBackend.h" 16 #include "llvm/MC/MCAssembler.h" 17 #include "llvm/MC/MCCodeEmitter.h" 18 #include "llvm/MC/MCContext.h" 19 #include "llvm/MC/MCDwarf.h" 20 #include "llvm/MC/MCELFObjectWriter.h" 21 #include "llvm/MC/MCELFStreamer.h" 22 #include "llvm/MC/MCExpr.h" 23 #include "llvm/MC/MCFixupKindInfo.h" 24 #include "llvm/MC/MCInst.h" 25 #include "llvm/MC/MCInstrInfo.h" 26 #include "llvm/MC/MCMachObjectWriter.h" 27 #include "llvm/MC/MCObjectStreamer.h" 28 #include "llvm/MC/MCObjectWriter.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/MC/MCSectionMachO.h" 31 #include "llvm/MC/MCSubtargetInfo.h" 32 #include "llvm/MC/MCValue.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/raw_ostream.h" 37 38 using namespace llvm; 39 40 namespace { 41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind 42 class X86AlignBranchKind { 43 private: 44 uint8_t AlignBranchKind = 0; 45 46 public: 47 void operator=(const std::string &Val) { 48 if (Val.empty()) 49 return; 50 SmallVector<StringRef, 6> BranchTypes; 51 StringRef(Val).split(BranchTypes, '+', -1, false); 52 for (auto BranchType : BranchTypes) { 53 if (BranchType == "fused") 54 addKind(X86::AlignBranchFused); 55 else if (BranchType == "jcc") 56 addKind(X86::AlignBranchJcc); 57 else if (BranchType == "jmp") 58 addKind(X86::AlignBranchJmp); 59 else if (BranchType == "call") 60 addKind(X86::AlignBranchCall); 61 else if (BranchType == "ret") 62 addKind(X86::AlignBranchRet); 63 else if (BranchType == "indirect") 64 addKind(X86::AlignBranchIndirect); 65 else { 66 errs() << "invalid argument " << BranchType.str() 67 << " to -x86-align-branch=; each element must be one of: fused, " 68 "jcc, jmp, call, ret, indirect.(plus separated)\n"; 69 } 70 } 71 } 72 73 operator uint8_t() const { return AlignBranchKind; } 74 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; } 75 }; 76 77 X86AlignBranchKind X86AlignBranchKindLoc; 78 79 cl::opt<unsigned> X86AlignBranchBoundary( 80 "x86-align-branch-boundary", cl::init(0), 81 cl::desc( 82 "Control how the assembler should align branches with NOP. If the " 83 "boundary's size is not 0, it should be a power of 2 and no less " 84 "than 32. Branches will be aligned to prevent from being across or " 85 "against the boundary of specified size. The default value 0 does not " 86 "align branches.")); 87 88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch( 89 "x86-align-branch", 90 cl::desc( 91 "Specify types of branches to align (plus separated list of types):" 92 "\njcc indicates conditional jumps" 93 "\nfused indicates fused conditional jumps" 94 "\njmp indicates direct unconditional jumps" 95 "\ncall indicates direct and indirect calls" 96 "\nret indicates rets" 97 "\nindirect indicates indirect unconditional jumps"), 98 cl::location(X86AlignBranchKindLoc)); 99 100 cl::opt<bool> X86AlignBranchWithin32BBoundaries( 101 "x86-branches-within-32B-boundaries", cl::init(false), 102 cl::desc( 103 "Align selected instructions to mitigate negative performance impact " 104 "of Intel's micro code update for errata skx102. May break " 105 "assumptions about labels corresponding to particular instructions, " 106 "and should be used with caution.")); 107 108 cl::opt<unsigned> X86PadMaxPrefixSize( 109 "x86-pad-max-prefix-size", cl::init(0), 110 cl::desc("Maximum number of prefixes to use for padding")); 111 112 cl::opt<bool> X86PadForAlign( 113 "x86-pad-for-align", cl::init(false), cl::Hidden, 114 cl::desc("Pad previous instructions to implement align directives")); 115 116 cl::opt<bool> X86PadForBranchAlign( 117 "x86-pad-for-branch-align", cl::init(true), cl::Hidden, 118 cl::desc("Pad previous instructions to implement branch alignment")); 119 120 class X86AsmBackend : public MCAsmBackend { 121 const MCSubtargetInfo &STI; 122 std::unique_ptr<const MCInstrInfo> MCII; 123 X86AlignBranchKind AlignBranchType; 124 Align AlignBoundary; 125 unsigned TargetPrefixMax = 0; 126 127 MCInst PrevInst; 128 unsigned PrevInstOpcode = 0; 129 MCBoundaryAlignFragment *PendingBA = nullptr; 130 std::pair<MCFragment *, size_t> PrevInstPosition; 131 bool IsRightAfterData = false; 132 133 uint8_t determinePaddingPrefix(const MCInst &Inst) const; 134 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; 135 bool needAlign(const MCInst &Inst) const; 136 bool canPadBranches(MCObjectStreamer &OS) const; 137 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const; 138 139 public: 140 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) 141 : MCAsmBackend(llvm::endianness::little), STI(STI), 142 MCII(T.createMCInstrInfo()) { 143 if (X86AlignBranchWithin32BBoundaries) { 144 // At the moment, this defaults to aligning fused branches, unconditional 145 // jumps, and (unfused) conditional jumps with nops. Both the 146 // instructions aligned and the alignment method (nop vs prefix) may 147 // change in the future. 148 AlignBoundary = assumeAligned(32); 149 AlignBranchType.addKind(X86::AlignBranchFused); 150 AlignBranchType.addKind(X86::AlignBranchJcc); 151 AlignBranchType.addKind(X86::AlignBranchJmp); 152 } 153 // Allow overriding defaults set by main flag 154 if (X86AlignBranchBoundary.getNumOccurrences()) 155 AlignBoundary = assumeAligned(X86AlignBranchBoundary); 156 if (X86AlignBranch.getNumOccurrences()) 157 AlignBranchType = X86AlignBranchKindLoc; 158 if (X86PadMaxPrefixSize.getNumOccurrences()) 159 TargetPrefixMax = X86PadMaxPrefixSize; 160 } 161 162 bool allowAutoPadding() const override; 163 bool allowEnhancedRelaxation() const override; 164 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst, 165 const MCSubtargetInfo &STI); 166 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst); 167 168 unsigned getNumFixupKinds() const override { 169 return X86::NumTargetFixupKinds; 170 } 171 172 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override; 173 174 const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; 175 176 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, 177 const MCValue &Target, 178 const MCSubtargetInfo *STI) override; 179 180 void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 181 const MCValue &Target, MutableArrayRef<char> Data, 182 uint64_t Value, bool IsResolved, 183 const MCSubtargetInfo *STI) const override; 184 185 bool mayNeedRelaxation(const MCInst &Inst, 186 const MCSubtargetInfo &STI) const override; 187 188 bool fixupNeedsRelaxation(const MCFixup &Fixup, 189 uint64_t Value) const override; 190 191 void relaxInstruction(MCInst &Inst, 192 const MCSubtargetInfo &STI) const override; 193 194 bool padInstructionViaRelaxation(MCRelaxableFragment &RF, 195 MCCodeEmitter &Emitter, 196 unsigned &RemainingSize) const; 197 198 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 199 unsigned &RemainingSize) const; 200 201 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 202 unsigned &RemainingSize) const; 203 204 bool finishLayout(const MCAssembler &Asm) const override; 205 206 unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override; 207 208 bool writeNopData(raw_ostream &OS, uint64_t Count, 209 const MCSubtargetInfo *STI) const override; 210 }; 211 } // end anonymous namespace 212 213 static bool isRelaxableBranch(unsigned Opcode) { 214 return Opcode == X86::JCC_1 || Opcode == X86::JMP_1; 215 } 216 217 static unsigned getRelaxedOpcodeBranch(unsigned Opcode, 218 bool Is16BitMode = false) { 219 switch (Opcode) { 220 default: 221 llvm_unreachable("invalid opcode for branch"); 222 case X86::JCC_1: 223 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4; 224 case X86::JMP_1: 225 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4; 226 } 227 } 228 229 static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) { 230 unsigned Opcode = MI.getOpcode(); 231 return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode) 232 : X86::getOpcodeForLongImmediateForm(Opcode); 233 } 234 235 static X86::CondCode getCondFromBranch(const MCInst &MI, 236 const MCInstrInfo &MCII) { 237 unsigned Opcode = MI.getOpcode(); 238 switch (Opcode) { 239 default: 240 return X86::COND_INVALID; 241 case X86::JCC_1: { 242 const MCInstrDesc &Desc = MCII.get(Opcode); 243 return static_cast<X86::CondCode>( 244 MI.getOperand(Desc.getNumOperands() - 1).getImm()); 245 } 246 } 247 } 248 249 static X86::SecondMacroFusionInstKind 250 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) { 251 X86::CondCode CC = getCondFromBranch(MI, MCII); 252 return classifySecondCondCodeInMacroFusion(CC); 253 } 254 255 /// Check if the instruction uses RIP relative addressing. 256 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) { 257 unsigned Opcode = MI.getOpcode(); 258 const MCInstrDesc &Desc = MCII.get(Opcode); 259 uint64_t TSFlags = Desc.TSFlags; 260 unsigned CurOp = X86II::getOperandBias(Desc); 261 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 262 if (MemoryOperand < 0) 263 return false; 264 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg; 265 unsigned BaseReg = MI.getOperand(BaseRegNum).getReg(); 266 return (BaseReg == X86::RIP); 267 } 268 269 /// Check if the instruction is a prefix. 270 static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII) { 271 return X86II::isPrefix(MCII.get(Opcode).TSFlags); 272 } 273 274 /// Check if the instruction is valid as the first instruction in macro fusion. 275 static bool isFirstMacroFusibleInst(const MCInst &Inst, 276 const MCInstrInfo &MCII) { 277 // An Intel instruction with RIP relative addressing is not macro fusible. 278 if (isRIPRelative(Inst, MCII)) 279 return false; 280 X86::FirstMacroFusionInstKind FIK = 281 X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode()); 282 return FIK != X86::FirstMacroFusionInstKind::Invalid; 283 } 284 285 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to 286 /// get a better peformance in some cases. Here, we determine which prefix is 287 /// the most suitable. 288 /// 289 /// If the instruction has a segment override prefix, use the existing one. 290 /// If the target is 64-bit, use the CS. 291 /// If the target is 32-bit, 292 /// - If the instruction has a ESP/EBP base register, use SS. 293 /// - Otherwise use DS. 294 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const { 295 assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) && 296 "Prefixes can be added only in 32-bit or 64-bit mode."); 297 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 298 uint64_t TSFlags = Desc.TSFlags; 299 300 // Determine where the memory operand starts, if present. 301 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 302 if (MemoryOperand != -1) 303 MemoryOperand += X86II::getOperandBias(Desc); 304 305 unsigned SegmentReg = 0; 306 if (MemoryOperand >= 0) { 307 // Check for explicit segment override on memory operand. 308 SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg(); 309 } 310 311 switch (TSFlags & X86II::FormMask) { 312 default: 313 break; 314 case X86II::RawFrmDstSrc: { 315 // Check segment override opcode prefix as needed (not for %ds). 316 if (Inst.getOperand(2).getReg() != X86::DS) 317 SegmentReg = Inst.getOperand(2).getReg(); 318 break; 319 } 320 case X86II::RawFrmSrc: { 321 // Check segment override opcode prefix as needed (not for %ds). 322 if (Inst.getOperand(1).getReg() != X86::DS) 323 SegmentReg = Inst.getOperand(1).getReg(); 324 break; 325 } 326 case X86II::RawFrmMemOffs: { 327 // Check segment override opcode prefix as needed. 328 SegmentReg = Inst.getOperand(1).getReg(); 329 break; 330 } 331 } 332 333 if (SegmentReg != 0) 334 return X86::getSegmentOverridePrefixForReg(SegmentReg); 335 336 if (STI.hasFeature(X86::Is64Bit)) 337 return X86::CS_Encoding; 338 339 if (MemoryOperand >= 0) { 340 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg; 341 unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg(); 342 if (BaseReg == X86::ESP || BaseReg == X86::EBP) 343 return X86::SS_Encoding; 344 } 345 return X86::DS_Encoding; 346 } 347 348 /// Check if the two instructions will be macro-fused on the target cpu. 349 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const { 350 const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode()); 351 if (!InstDesc.isConditionalBranch()) 352 return false; 353 if (!isFirstMacroFusibleInst(Cmp, *MCII)) 354 return false; 355 const X86::FirstMacroFusionInstKind CmpKind = 356 X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode()); 357 const X86::SecondMacroFusionInstKind BranchKind = 358 classifySecondInstInMacroFusion(Jcc, *MCII); 359 return X86::isMacroFused(CmpKind, BranchKind); 360 } 361 362 /// Check if the instruction has a variant symbol operand. 363 static bool hasVariantSymbol(const MCInst &MI) { 364 for (auto &Operand : MI) { 365 if (!Operand.isExpr()) 366 continue; 367 const MCExpr &Expr = *Operand.getExpr(); 368 if (Expr.getKind() == MCExpr::SymbolRef && 369 cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None) 370 return true; 371 } 372 return false; 373 } 374 375 bool X86AsmBackend::allowAutoPadding() const { 376 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone); 377 } 378 379 bool X86AsmBackend::allowEnhancedRelaxation() const { 380 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign; 381 } 382 383 /// X86 has certain instructions which enable interrupts exactly one 384 /// instruction *after* the instruction which stores to SS. Return true if the 385 /// given instruction may have such an interrupt delay slot. 386 static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) { 387 switch (InstOpcode) { 388 case X86::POPSS16: 389 case X86::POPSS32: 390 case X86::STI: 391 return true; 392 393 case X86::MOV16sr: 394 case X86::MOV32sr: 395 case X86::MOV64sr: 396 case X86::MOV16sm: 397 // In fact, this is only the case if the first operand is SS. However, as 398 // segment moves occur extremely rarely, this is just a minor pessimization. 399 return true; 400 } 401 return false; 402 } 403 404 /// Check if the instruction to be emitted is right after any data. 405 static bool 406 isRightAfterData(MCFragment *CurrentFragment, 407 const std::pair<MCFragment *, size_t> &PrevInstPosition) { 408 MCFragment *F = CurrentFragment; 409 // Since data is always emitted into a DataFragment, our check strategy is 410 // simple here. 411 // - If the fragment is a DataFragment 412 // - If it's empty (section start or data after align), return false. 413 // - If it's not the fragment where the previous instruction is, 414 // returns true. 415 // - If it's the fragment holding the previous instruction but its 416 // size changed since the previous instruction was emitted into 417 // it, returns true. 418 // - Otherwise returns false. 419 // - If the fragment is not a DataFragment, returns false. 420 if (auto *DF = dyn_cast_or_null<MCDataFragment>(F)) 421 return DF->getContents().size() && 422 (DF != PrevInstPosition.first || 423 DF->getContents().size() != PrevInstPosition.second); 424 425 return false; 426 } 427 428 /// \returns the fragment size if it has instructions, otherwise returns 0. 429 static size_t getSizeForInstFragment(const MCFragment *F) { 430 if (!F || !F->hasInstructions()) 431 return 0; 432 // MCEncodedFragmentWithContents being templated makes this tricky. 433 switch (F->getKind()) { 434 default: 435 llvm_unreachable("Unknown fragment with instructions!"); 436 case MCFragment::FT_Data: 437 return cast<MCDataFragment>(*F).getContents().size(); 438 case MCFragment::FT_Relaxable: 439 return cast<MCRelaxableFragment>(*F).getContents().size(); 440 case MCFragment::FT_CompactEncodedInst: 441 return cast<MCCompactEncodedInstFragment>(*F).getContents().size(); 442 } 443 } 444 445 /// Return true if we can insert NOP or prefixes automatically before the 446 /// the instruction to be emitted. 447 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const { 448 if (hasVariantSymbol(Inst)) 449 // Linker may rewrite the instruction with variant symbol operand(e.g. 450 // TLSCALL). 451 return false; 452 453 if (mayHaveInterruptDelaySlot(PrevInstOpcode)) 454 // If this instruction follows an interrupt enabling instruction with a one 455 // instruction delay, inserting a nop would change behavior. 456 return false; 457 458 if (isPrefix(PrevInstOpcode, *MCII)) 459 // If this instruction follows a prefix, inserting a nop/prefix would change 460 // semantic. 461 return false; 462 463 if (isPrefix(Inst.getOpcode(), *MCII)) 464 // If this instruction is a prefix, inserting a prefix would change 465 // semantic. 466 return false; 467 468 if (IsRightAfterData) 469 // If this instruction follows any data, there is no clear 470 // instruction boundary, inserting a nop/prefix would change semantic. 471 return false; 472 473 return true; 474 } 475 476 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const { 477 if (!OS.getAllowAutoPadding()) 478 return false; 479 assert(allowAutoPadding() && "incorrect initialization!"); 480 481 // We only pad in text section. 482 if (!OS.getCurrentSectionOnly()->isText()) 483 return false; 484 485 // To be Done: Currently don't deal with Bundle cases. 486 if (OS.getAssembler().isBundlingEnabled()) 487 return false; 488 489 // Branches only need to be aligned in 32-bit or 64-bit mode. 490 if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit))) 491 return false; 492 493 return true; 494 } 495 496 /// Check if the instruction operand needs to be aligned. 497 bool X86AsmBackend::needAlign(const MCInst &Inst) const { 498 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 499 return (Desc.isConditionalBranch() && 500 (AlignBranchType & X86::AlignBranchJcc)) || 501 (Desc.isUnconditionalBranch() && 502 (AlignBranchType & X86::AlignBranchJmp)) || 503 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) || 504 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) || 505 (Desc.isIndirectBranch() && 506 (AlignBranchType & X86::AlignBranchIndirect)); 507 } 508 509 /// Insert BoundaryAlignFragment before instructions to align branches. 510 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, 511 const MCInst &Inst, const MCSubtargetInfo &STI) { 512 // Used by canPadInst. Done here, because in emitInstructionEnd, the current 513 // fragment will have changed. 514 IsRightAfterData = 515 isRightAfterData(OS.getCurrentFragment(), PrevInstPosition); 516 517 if (!canPadBranches(OS)) 518 return; 519 520 // NB: PrevInst only valid if canPadBranches is true. 521 if (!isMacroFused(PrevInst, Inst)) 522 // Macro fusion doesn't happen indeed, clear the pending. 523 PendingBA = nullptr; 524 525 // When branch padding is enabled (basically the skx102 erratum => unlikely), 526 // we call canPadInst (not cheap) twice. However, in the common case, we can 527 // avoid unnecessary calls to that, as this is otherwise only used for 528 // relaxable fragments. 529 if (!canPadInst(Inst, OS)) 530 return; 531 532 if (PendingBA && PendingBA->getNext() == OS.getCurrentFragment()) { 533 // Macro fusion actually happens and there is no other fragment inserted 534 // after the previous instruction. 535 // 536 // Do nothing here since we already inserted a BoudaryAlign fragment when 537 // we met the first instruction in the fused pair and we'll tie them 538 // together in emitInstructionEnd. 539 // 540 // Note: When there is at least one fragment, such as MCAlignFragment, 541 // inserted after the previous instruction, e.g. 542 // 543 // \code 544 // cmp %rax %rcx 545 // .align 16 546 // je .Label0 547 // \ endcode 548 // 549 // We will treat the JCC as a unfused branch although it may be fused 550 // with the CMP. 551 return; 552 } 553 554 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) && 555 isFirstMacroFusibleInst(Inst, *MCII))) { 556 // If we meet a unfused branch or the first instuction in a fusiable pair, 557 // insert a BoundaryAlign fragment. 558 PendingBA = OS.getContext().allocFragment<MCBoundaryAlignFragment>( 559 AlignBoundary, STI); 560 OS.insert(PendingBA); 561 } 562 } 563 564 /// Set the last fragment to be aligned for the BoundaryAlignFragment. 565 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, 566 const MCInst &Inst) { 567 MCFragment *CF = OS.getCurrentFragment(); 568 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF)) 569 F->setAllowAutoPadding(canPadInst(Inst, OS)); 570 571 // Update PrevInstOpcode here, canPadInst() reads that. 572 PrevInstOpcode = Inst.getOpcode(); 573 PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); 574 575 if (!canPadBranches(OS)) 576 return; 577 578 // PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap. 579 PrevInst = Inst; 580 581 if (!needAlign(Inst) || !PendingBA) 582 return; 583 584 // Tie the aligned instructions into a pending BoundaryAlign. 585 PendingBA->setLastFragment(CF); 586 PendingBA = nullptr; 587 588 // We need to ensure that further data isn't added to the current 589 // DataFragment, so that we can get the size of instructions later in 590 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty 591 // DataFragment. 592 if (isa_and_nonnull<MCDataFragment>(CF)) 593 OS.insert(OS.getContext().allocFragment<MCDataFragment>()); 594 595 // Update the maximum alignment on the current section if necessary. 596 MCSection *Sec = OS.getCurrentSectionOnly(); 597 Sec->ensureMinAlignment(AlignBoundary); 598 } 599 600 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const { 601 if (STI.getTargetTriple().isOSBinFormatELF()) { 602 unsigned Type; 603 if (STI.getTargetTriple().getArch() == Triple::x86_64) { 604 Type = llvm::StringSwitch<unsigned>(Name) 605 #define ELF_RELOC(X, Y) .Case(#X, Y) 606 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def" 607 #undef ELF_RELOC 608 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE) 609 .Case("BFD_RELOC_8", ELF::R_X86_64_8) 610 .Case("BFD_RELOC_16", ELF::R_X86_64_16) 611 .Case("BFD_RELOC_32", ELF::R_X86_64_32) 612 .Case("BFD_RELOC_64", ELF::R_X86_64_64) 613 .Default(-1u); 614 } else { 615 Type = llvm::StringSwitch<unsigned>(Name) 616 #define ELF_RELOC(X, Y) .Case(#X, Y) 617 #include "llvm/BinaryFormat/ELFRelocs/i386.def" 618 #undef ELF_RELOC 619 .Case("BFD_RELOC_NONE", ELF::R_386_NONE) 620 .Case("BFD_RELOC_8", ELF::R_386_8) 621 .Case("BFD_RELOC_16", ELF::R_386_16) 622 .Case("BFD_RELOC_32", ELF::R_386_32) 623 .Default(-1u); 624 } 625 if (Type == -1u) 626 return std::nullopt; 627 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type); 628 } 629 return MCAsmBackend::getFixupKind(Name); 630 } 631 632 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { 633 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = { 634 {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 635 {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 636 {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 637 {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 638 {"reloc_signed_4byte", 0, 32, 0}, 639 {"reloc_signed_4byte_relax", 0, 32, 0}, 640 {"reloc_global_offset_table", 0, 32, 0}, 641 {"reloc_global_offset_table8", 0, 64, 0}, 642 {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 643 }; 644 645 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They 646 // do not require any extra processing. 647 if (Kind >= FirstLiteralRelocationKind) 648 return MCAsmBackend::getFixupKindInfo(FK_NONE); 649 650 if (Kind < FirstTargetFixupKind) 651 return MCAsmBackend::getFixupKindInfo(Kind); 652 653 assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && 654 "Invalid kind!"); 655 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!"); 656 return Infos[Kind - FirstTargetFixupKind]; 657 } 658 659 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &, 660 const MCFixup &Fixup, const MCValue &, 661 const MCSubtargetInfo *STI) { 662 return Fixup.getKind() >= FirstLiteralRelocationKind; 663 } 664 665 static unsigned getFixupKindSize(unsigned Kind) { 666 switch (Kind) { 667 default: 668 llvm_unreachable("invalid fixup kind!"); 669 case FK_NONE: 670 return 0; 671 case FK_PCRel_1: 672 case FK_SecRel_1: 673 case FK_Data_1: 674 return 1; 675 case FK_PCRel_2: 676 case FK_SecRel_2: 677 case FK_Data_2: 678 return 2; 679 case FK_PCRel_4: 680 case X86::reloc_riprel_4byte: 681 case X86::reloc_riprel_4byte_relax: 682 case X86::reloc_riprel_4byte_relax_rex: 683 case X86::reloc_riprel_4byte_movq_load: 684 case X86::reloc_signed_4byte: 685 case X86::reloc_signed_4byte_relax: 686 case X86::reloc_global_offset_table: 687 case X86::reloc_branch_4byte_pcrel: 688 case FK_SecRel_4: 689 case FK_Data_4: 690 return 4; 691 case FK_PCRel_8: 692 case FK_SecRel_8: 693 case FK_Data_8: 694 case X86::reloc_global_offset_table8: 695 return 8; 696 } 697 } 698 699 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 700 const MCValue &Target, 701 MutableArrayRef<char> Data, 702 uint64_t Value, bool IsResolved, 703 const MCSubtargetInfo *STI) const { 704 unsigned Kind = Fixup.getKind(); 705 if (Kind >= FirstLiteralRelocationKind) 706 return; 707 unsigned Size = getFixupKindSize(Kind); 708 709 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); 710 711 int64_t SignedValue = static_cast<int64_t>(Value); 712 if ((Target.isAbsolute() || IsResolved) && 713 getFixupKindInfo(Fixup.getKind()).Flags & 714 MCFixupKindInfo::FKF_IsPCRel) { 715 // check that PC relative fixup fits into the fixup size. 716 if (Size > 0 && !isIntN(Size * 8, SignedValue)) 717 Asm.getContext().reportError( 718 Fixup.getLoc(), "value of " + Twine(SignedValue) + 719 " is too large for field of " + Twine(Size) + 720 ((Size == 1) ? " byte." : " bytes.")); 721 } else { 722 // Check that uppper bits are either all zeros or all ones. 723 // Specifically ignore overflow/underflow as long as the leakage is 724 // limited to the lower bits. This is to remain compatible with 725 // other assemblers. 726 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) && 727 "Value does not fit in the Fixup field"); 728 } 729 730 for (unsigned i = 0; i != Size; ++i) 731 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); 732 } 733 734 bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI, 735 const MCSubtargetInfo &STI) const { 736 unsigned Opcode = MI.getOpcode(); 737 unsigned SkipOperands = X86::isCCMPCC(Opcode) ? 2 : 0; 738 return isRelaxableBranch(Opcode) || 739 (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode && 740 MI.getOperand(MI.getNumOperands() - 1 - SkipOperands).isExpr()); 741 } 742 743 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, 744 uint64_t Value) const { 745 // Relax if the value is too big for a (signed) i8. 746 return !isInt<8>(Value); 747 } 748 749 // FIXME: Can tblgen help at all here to verify there aren't other instructions 750 // we can relax? 751 void X86AsmBackend::relaxInstruction(MCInst &Inst, 752 const MCSubtargetInfo &STI) const { 753 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. 754 bool Is16BitMode = STI.hasFeature(X86::Is16Bit); 755 unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode); 756 757 if (RelaxedOp == Inst.getOpcode()) { 758 SmallString<256> Tmp; 759 raw_svector_ostream OS(Tmp); 760 Inst.dump_pretty(OS); 761 OS << "\n"; 762 report_fatal_error("unexpected instruction to relax: " + OS.str()); 763 } 764 765 Inst.setOpcode(RelaxedOp); 766 } 767 768 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, 769 MCCodeEmitter &Emitter, 770 unsigned &RemainingSize) const { 771 if (!RF.getAllowAutoPadding()) 772 return false; 773 // If the instruction isn't fully relaxed, shifting it around might require a 774 // larger value for one of the fixups then can be encoded. The outer loop 775 // will also catch this before moving to the next instruction, but we need to 776 // prevent padding this single instruction as well. 777 if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) 778 return false; 779 780 const unsigned OldSize = RF.getContents().size(); 781 if (OldSize == 15) 782 return false; 783 784 const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize); 785 const unsigned RemainingPrefixSize = [&]() -> unsigned { 786 SmallString<15> Code; 787 X86_MC::emitPrefix(Emitter, RF.getInst(), Code, STI); 788 assert(Code.size() < 15 && "The number of prefixes must be less than 15."); 789 790 // TODO: It turns out we need a decent amount of plumbing for the target 791 // specific bits to determine number of prefixes its safe to add. Various 792 // targets (older chips mostly, but also Atom family) encounter decoder 793 // stalls with too many prefixes. For testing purposes, we set the value 794 // externally for the moment. 795 unsigned ExistingPrefixSize = Code.size(); 796 if (TargetPrefixMax <= ExistingPrefixSize) 797 return 0; 798 return TargetPrefixMax - ExistingPrefixSize; 799 }(); 800 const unsigned PrefixBytesToAdd = 801 std::min(MaxPossiblePad, RemainingPrefixSize); 802 if (PrefixBytesToAdd == 0) 803 return false; 804 805 const uint8_t Prefix = determinePaddingPrefix(RF.getInst()); 806 807 SmallString<256> Code; 808 Code.append(PrefixBytesToAdd, Prefix); 809 Code.append(RF.getContents().begin(), RF.getContents().end()); 810 RF.getContents() = Code; 811 812 // Adjust the fixups for the change in offsets 813 for (auto &F : RF.getFixups()) { 814 F.setOffset(F.getOffset() + PrefixBytesToAdd); 815 } 816 817 RemainingSize -= PrefixBytesToAdd; 818 return true; 819 } 820 821 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF, 822 MCCodeEmitter &Emitter, 823 unsigned &RemainingSize) const { 824 if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) 825 // TODO: There are lots of other tricks we could apply for increasing 826 // encoding size without impacting performance. 827 return false; 828 829 MCInst Relaxed = RF.getInst(); 830 relaxInstruction(Relaxed, *RF.getSubtargetInfo()); 831 832 SmallVector<MCFixup, 4> Fixups; 833 SmallString<15> Code; 834 Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo()); 835 const unsigned OldSize = RF.getContents().size(); 836 const unsigned NewSize = Code.size(); 837 assert(NewSize >= OldSize && "size decrease during relaxation?"); 838 unsigned Delta = NewSize - OldSize; 839 if (Delta > RemainingSize) 840 return false; 841 RF.setInst(Relaxed); 842 RF.getContents() = Code; 843 RF.getFixups() = Fixups; 844 RemainingSize -= Delta; 845 return true; 846 } 847 848 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, 849 MCCodeEmitter &Emitter, 850 unsigned &RemainingSize) const { 851 bool Changed = false; 852 if (RemainingSize != 0) 853 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize); 854 if (RemainingSize != 0) 855 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize); 856 return Changed; 857 } 858 859 bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const { 860 // See if we can further relax some instructions to cut down on the number of 861 // nop bytes required for code alignment. The actual win is in reducing 862 // instruction count, not number of bytes. Modern X86-64 can easily end up 863 // decode limited. It is often better to reduce the number of instructions 864 // (i.e. eliminate nops) even at the cost of increasing the size and 865 // complexity of others. 866 if (!X86PadForAlign && !X86PadForBranchAlign) 867 return false; 868 869 // The processed regions are delimitered by LabeledFragments. -g may have more 870 // MCSymbols and therefore different relaxation results. X86PadForAlign is 871 // disabled by default to eliminate the -g vs non -g difference. 872 DenseSet<MCFragment *> LabeledFragments; 873 for (const MCSymbol &S : Asm.symbols()) 874 LabeledFragments.insert(S.getFragment(false)); 875 876 for (MCSection &Sec : Asm) { 877 if (!Sec.isText()) 878 continue; 879 880 SmallVector<MCRelaxableFragment *, 4> Relaxable; 881 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) { 882 MCFragment &F = *I; 883 884 if (LabeledFragments.count(&F)) 885 Relaxable.clear(); 886 887 if (F.getKind() == MCFragment::FT_Data || 888 F.getKind() == MCFragment::FT_CompactEncodedInst) 889 // Skip and ignore 890 continue; 891 892 if (F.getKind() == MCFragment::FT_Relaxable) { 893 auto &RF = cast<MCRelaxableFragment>(*I); 894 Relaxable.push_back(&RF); 895 continue; 896 } 897 898 auto canHandle = [](MCFragment &F) -> bool { 899 switch (F.getKind()) { 900 default: 901 return false; 902 case MCFragment::FT_Align: 903 return X86PadForAlign; 904 case MCFragment::FT_BoundaryAlign: 905 return X86PadForBranchAlign; 906 } 907 }; 908 // For any unhandled kind, assume we can't change layout. 909 if (!canHandle(F)) { 910 Relaxable.clear(); 911 continue; 912 } 913 914 const uint64_t OrigSize = Asm.computeFragmentSize(F); 915 916 // To keep the effects local, prefer to relax instructions closest to 917 // the align directive. This is purely about human understandability 918 // of the resulting code. If we later find a reason to expand 919 // particular instructions over others, we can adjust. 920 unsigned RemainingSize = OrigSize; 921 while (!Relaxable.empty() && RemainingSize != 0) { 922 auto &RF = *Relaxable.pop_back_val(); 923 // Give the backend a chance to play any tricks it wishes to increase 924 // the encoding size of the given instruction. Target independent code 925 // will try further relaxation, but target's may play further tricks. 926 padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize); 927 928 // If we have an instruction which hasn't been fully relaxed, we can't 929 // skip past it and insert bytes before it. Changing its starting 930 // offset might require a larger negative offset than it can encode. 931 // We don't need to worry about larger positive offsets as none of the 932 // possible offsets between this and our align are visible, and the 933 // ones afterwards aren't changing. 934 if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) 935 break; 936 } 937 Relaxable.clear(); 938 939 // BoundaryAlign explicitly tracks it's size (unlike align) 940 if (F.getKind() == MCFragment::FT_BoundaryAlign) 941 cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize); 942 943 // If we're looking at a boundary align, make sure we don't try to pad 944 // its target instructions for some following directive. Doing so would 945 // break the alignment of the current boundary align. 946 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) { 947 const MCFragment *LastFragment = BF->getLastFragment(); 948 if (!LastFragment) 949 continue; 950 while (&*I != LastFragment) 951 ++I; 952 } 953 } 954 } 955 956 return true; 957 } 958 959 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const { 960 if (STI.hasFeature(X86::Is16Bit)) 961 return 4; 962 if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit)) 963 return 1; 964 if (STI.hasFeature(X86::TuningFast7ByteNOP)) 965 return 7; 966 if (STI.hasFeature(X86::TuningFast15ByteNOP)) 967 return 15; 968 if (STI.hasFeature(X86::TuningFast11ByteNOP)) 969 return 11; 970 // FIXME: handle 32-bit mode 971 // 15-bytes is the longest single NOP instruction, but 10-bytes is 972 // commonly the longest that can be efficiently decoded. 973 return 10; 974 } 975 976 /// Write a sequence of optimal nops to the output, covering \p Count 977 /// bytes. 978 /// \return - true on success, false on failure 979 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, 980 const MCSubtargetInfo *STI) const { 981 static const char Nops32Bit[10][11] = { 982 // nop 983 "\x90", 984 // xchg %ax,%ax 985 "\x66\x90", 986 // nopl (%[re]ax) 987 "\x0f\x1f\x00", 988 // nopl 0(%[re]ax) 989 "\x0f\x1f\x40\x00", 990 // nopl 0(%[re]ax,%[re]ax,1) 991 "\x0f\x1f\x44\x00\x00", 992 // nopw 0(%[re]ax,%[re]ax,1) 993 "\x66\x0f\x1f\x44\x00\x00", 994 // nopl 0L(%[re]ax) 995 "\x0f\x1f\x80\x00\x00\x00\x00", 996 // nopl 0L(%[re]ax,%[re]ax,1) 997 "\x0f\x1f\x84\x00\x00\x00\x00\x00", 998 // nopw 0L(%[re]ax,%[re]ax,1) 999 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00", 1000 // nopw %cs:0L(%[re]ax,%[re]ax,1) 1001 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00", 1002 }; 1003 1004 // 16-bit mode uses different nop patterns than 32-bit. 1005 static const char Nops16Bit[4][11] = { 1006 // nop 1007 "\x90", 1008 // xchg %eax,%eax 1009 "\x66\x90", 1010 // lea 0(%si),%si 1011 "\x8d\x74\x00", 1012 // lea 0w(%si),%si 1013 "\x8d\xb4\x00\x00", 1014 }; 1015 1016 const char(*Nops)[11] = 1017 STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit; 1018 1019 uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI); 1020 1021 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining 1022 // length. 1023 do { 1024 const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength); 1025 const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; 1026 for (uint8_t i = 0; i < Prefixes; i++) 1027 OS << '\x66'; 1028 const uint8_t Rest = ThisNopLength - Prefixes; 1029 if (Rest != 0) 1030 OS.write(Nops[Rest - 1], Rest); 1031 Count -= ThisNopLength; 1032 } while (Count != 0); 1033 1034 return true; 1035 } 1036 1037 /* *** */ 1038 1039 namespace { 1040 1041 class ELFX86AsmBackend : public X86AsmBackend { 1042 public: 1043 uint8_t OSABI; 1044 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI) 1045 : X86AsmBackend(T, STI), OSABI(OSABI) {} 1046 }; 1047 1048 class ELFX86_32AsmBackend : public ELFX86AsmBackend { 1049 public: 1050 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, 1051 const MCSubtargetInfo &STI) 1052 : ELFX86AsmBackend(T, OSABI, STI) {} 1053 1054 std::unique_ptr<MCObjectTargetWriter> 1055 createObjectTargetWriter() const override { 1056 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386); 1057 } 1058 }; 1059 1060 class ELFX86_X32AsmBackend : public ELFX86AsmBackend { 1061 public: 1062 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, 1063 const MCSubtargetInfo &STI) 1064 : ELFX86AsmBackend(T, OSABI, STI) {} 1065 1066 std::unique_ptr<MCObjectTargetWriter> 1067 createObjectTargetWriter() const override { 1068 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1069 ELF::EM_X86_64); 1070 } 1071 }; 1072 1073 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend { 1074 public: 1075 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, 1076 const MCSubtargetInfo &STI) 1077 : ELFX86AsmBackend(T, OSABI, STI) {} 1078 1079 std::unique_ptr<MCObjectTargetWriter> 1080 createObjectTargetWriter() const override { 1081 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1082 ELF::EM_IAMCU); 1083 } 1084 }; 1085 1086 class ELFX86_64AsmBackend : public ELFX86AsmBackend { 1087 public: 1088 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, 1089 const MCSubtargetInfo &STI) 1090 : ELFX86AsmBackend(T, OSABI, STI) {} 1091 1092 std::unique_ptr<MCObjectTargetWriter> 1093 createObjectTargetWriter() const override { 1094 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64); 1095 } 1096 }; 1097 1098 class WindowsX86AsmBackend : public X86AsmBackend { 1099 bool Is64Bit; 1100 1101 public: 1102 WindowsX86AsmBackend(const Target &T, bool is64Bit, 1103 const MCSubtargetInfo &STI) 1104 : X86AsmBackend(T, STI) 1105 , Is64Bit(is64Bit) { 1106 } 1107 1108 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override { 1109 return StringSwitch<std::optional<MCFixupKind>>(Name) 1110 .Case("dir32", FK_Data_4) 1111 .Case("secrel32", FK_SecRel_4) 1112 .Case("secidx", FK_SecRel_2) 1113 .Default(MCAsmBackend::getFixupKind(Name)); 1114 } 1115 1116 std::unique_ptr<MCObjectTargetWriter> 1117 createObjectTargetWriter() const override { 1118 return createX86WinCOFFObjectWriter(Is64Bit); 1119 } 1120 }; 1121 1122 namespace CU { 1123 1124 /// Compact unwind encoding values. 1125 enum CompactUnwindEncodings { 1126 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after 1127 /// the return address, then [RE]SP is moved to [RE]BP. 1128 UNWIND_MODE_BP_FRAME = 0x01000000, 1129 1130 /// A frameless function with a small constant stack size. 1131 UNWIND_MODE_STACK_IMMD = 0x02000000, 1132 1133 /// A frameless function with a large constant stack size. 1134 UNWIND_MODE_STACK_IND = 0x03000000, 1135 1136 /// No compact unwind encoding is available. 1137 UNWIND_MODE_DWARF = 0x04000000, 1138 1139 /// Mask for encoding the frame registers. 1140 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF, 1141 1142 /// Mask for encoding the frameless registers. 1143 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF 1144 }; 1145 1146 } // namespace CU 1147 1148 class DarwinX86AsmBackend : public X86AsmBackend { 1149 const MCRegisterInfo &MRI; 1150 1151 /// Number of registers that can be saved in a compact unwind encoding. 1152 enum { CU_NUM_SAVED_REGS = 6 }; 1153 1154 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS]; 1155 Triple TT; 1156 bool Is64Bit; 1157 1158 unsigned OffsetSize; ///< Offset of a "push" instruction. 1159 unsigned MoveInstrSize; ///< Size of a "move" instruction. 1160 unsigned StackDivide; ///< Amount to adjust stack size by. 1161 protected: 1162 /// Size of a "push" instruction for the given register. 1163 unsigned PushInstrSize(unsigned Reg) const { 1164 switch (Reg) { 1165 case X86::EBX: 1166 case X86::ECX: 1167 case X86::EDX: 1168 case X86::EDI: 1169 case X86::ESI: 1170 case X86::EBP: 1171 case X86::RBX: 1172 case X86::RBP: 1173 return 1; 1174 case X86::R12: 1175 case X86::R13: 1176 case X86::R14: 1177 case X86::R15: 1178 return 2; 1179 } 1180 return 1; 1181 } 1182 1183 private: 1184 /// Get the compact unwind number for a given register. The number 1185 /// corresponds to the enum lists in compact_unwind_encoding.h. 1186 int getCompactUnwindRegNum(unsigned Reg) const { 1187 static const MCPhysReg CU32BitRegs[7] = { 1188 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 1189 }; 1190 static const MCPhysReg CU64BitRegs[] = { 1191 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 1192 }; 1193 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; 1194 for (int Idx = 1; *CURegs; ++CURegs, ++Idx) 1195 if (*CURegs == Reg) 1196 return Idx; 1197 1198 return -1; 1199 } 1200 1201 /// Return the registers encoded for a compact encoding with a frame 1202 /// pointer. 1203 uint32_t encodeCompactUnwindRegistersWithFrame() const { 1204 // Encode the registers in the order they were saved --- 3-bits per 1205 // register. The list of saved registers is assumed to be in reverse 1206 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS. 1207 uint32_t RegEnc = 0; 1208 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) { 1209 unsigned Reg = SavedRegs[i]; 1210 if (Reg == 0) break; 1211 1212 int CURegNum = getCompactUnwindRegNum(Reg); 1213 if (CURegNum == -1) return ~0U; 1214 1215 // Encode the 3-bit register number in order, skipping over 3-bits for 1216 // each register. 1217 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); 1218 } 1219 1220 assert((RegEnc & 0x3FFFF) == RegEnc && 1221 "Invalid compact register encoding!"); 1222 return RegEnc; 1223 } 1224 1225 /// Create the permutation encoding used with frameless stacks. It is 1226 /// passed the number of registers to be saved and an array of the registers 1227 /// saved. 1228 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const { 1229 // The saved registers are numbered from 1 to 6. In order to encode the 1230 // order in which they were saved, we re-number them according to their 1231 // place in the register order. The re-numbering is relative to the last 1232 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in 1233 // that order: 1234 // 1235 // Orig Re-Num 1236 // ---- ------ 1237 // 6 6 1238 // 2 2 1239 // 4 3 1240 // 5 3 1241 // 1242 for (unsigned i = 0; i < RegCount; ++i) { 1243 int CUReg = getCompactUnwindRegNum(SavedRegs[i]); 1244 if (CUReg == -1) return ~0U; 1245 SavedRegs[i] = CUReg; 1246 } 1247 1248 // Reverse the list. 1249 std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]); 1250 1251 uint32_t RenumRegs[CU_NUM_SAVED_REGS]; 1252 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){ 1253 unsigned Countless = 0; 1254 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) 1255 if (SavedRegs[j] < SavedRegs[i]) 1256 ++Countless; 1257 1258 RenumRegs[i] = SavedRegs[i] - Countless - 1; 1259 } 1260 1261 // Take the renumbered values and encode them into a 10-bit number. 1262 uint32_t permutationEncoding = 0; 1263 switch (RegCount) { 1264 case 6: 1265 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] 1266 + 6 * RenumRegs[2] + 2 * RenumRegs[3] 1267 + RenumRegs[4]; 1268 break; 1269 case 5: 1270 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] 1271 + 6 * RenumRegs[3] + 2 * RenumRegs[4] 1272 + RenumRegs[5]; 1273 break; 1274 case 4: 1275 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] 1276 + 3 * RenumRegs[4] + RenumRegs[5]; 1277 break; 1278 case 3: 1279 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] 1280 + RenumRegs[5]; 1281 break; 1282 case 2: 1283 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; 1284 break; 1285 case 1: 1286 permutationEncoding |= RenumRegs[5]; 1287 break; 1288 } 1289 1290 assert((permutationEncoding & 0x3FF) == permutationEncoding && 1291 "Invalid compact register encoding!"); 1292 return permutationEncoding; 1293 } 1294 1295 public: 1296 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, 1297 const MCSubtargetInfo &STI) 1298 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()), 1299 Is64Bit(TT.isArch64Bit()) { 1300 memset(SavedRegs, 0, sizeof(SavedRegs)); 1301 OffsetSize = Is64Bit ? 8 : 4; 1302 MoveInstrSize = Is64Bit ? 3 : 2; 1303 StackDivide = Is64Bit ? 8 : 4; 1304 } 1305 1306 std::unique_ptr<MCObjectTargetWriter> 1307 createObjectTargetWriter() const override { 1308 uint32_t CPUType = cantFail(MachO::getCPUType(TT)); 1309 uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT)); 1310 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType); 1311 } 1312 1313 /// Implementation of algorithm to generate the compact unwind encoding 1314 /// for the CFI instructions. 1315 uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI, 1316 const MCContext *Ctxt) const override { 1317 ArrayRef<MCCFIInstruction> Instrs = FI->Instructions; 1318 if (Instrs.empty()) return 0; 1319 if (!isDarwinCanonicalPersonality(FI->Personality) && 1320 !Ctxt->emitCompactUnwindNonCanonical()) 1321 return CU::UNWIND_MODE_DWARF; 1322 1323 // Reset the saved registers. 1324 unsigned SavedRegIdx = 0; 1325 memset(SavedRegs, 0, sizeof(SavedRegs)); 1326 1327 bool HasFP = false; 1328 1329 // Encode that we are using EBP/RBP as the frame pointer. 1330 uint64_t CompactUnwindEncoding = 0; 1331 1332 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2; 1333 unsigned InstrOffset = 0; 1334 unsigned StackAdjust = 0; 1335 uint64_t StackSize = 0; 1336 int64_t MinAbsOffset = std::numeric_limits<int64_t>::max(); 1337 1338 for (const MCCFIInstruction &Inst : Instrs) { 1339 switch (Inst.getOperation()) { 1340 default: 1341 // Any other CFI directives indicate a frame that we aren't prepared 1342 // to represent via compact unwind, so just bail out. 1343 return CU::UNWIND_MODE_DWARF; 1344 case MCCFIInstruction::OpDefCfaRegister: { 1345 // Defines a frame pointer. E.g. 1346 // 1347 // movq %rsp, %rbp 1348 // L0: 1349 // .cfi_def_cfa_register %rbp 1350 // 1351 HasFP = true; 1352 1353 // If the frame pointer is other than esp/rsp, we do not have a way to 1354 // generate a compact unwinding representation, so bail out. 1355 if (*MRI.getLLVMRegNum(Inst.getRegister(), true) != 1356 (Is64Bit ? X86::RBP : X86::EBP)) 1357 return CU::UNWIND_MODE_DWARF; 1358 1359 // Reset the counts. 1360 memset(SavedRegs, 0, sizeof(SavedRegs)); 1361 StackAdjust = 0; 1362 SavedRegIdx = 0; 1363 MinAbsOffset = std::numeric_limits<int64_t>::max(); 1364 InstrOffset += MoveInstrSize; 1365 break; 1366 } 1367 case MCCFIInstruction::OpDefCfaOffset: { 1368 // Defines a new offset for the CFA. E.g. 1369 // 1370 // With frame: 1371 // 1372 // pushq %rbp 1373 // L0: 1374 // .cfi_def_cfa_offset 16 1375 // 1376 // Without frame: 1377 // 1378 // subq $72, %rsp 1379 // L0: 1380 // .cfi_def_cfa_offset 80 1381 // 1382 StackSize = Inst.getOffset() / StackDivide; 1383 break; 1384 } 1385 case MCCFIInstruction::OpOffset: { 1386 // Defines a "push" of a callee-saved register. E.g. 1387 // 1388 // pushq %r15 1389 // pushq %r14 1390 // pushq %rbx 1391 // L0: 1392 // subq $120, %rsp 1393 // L1: 1394 // .cfi_offset %rbx, -40 1395 // .cfi_offset %r14, -32 1396 // .cfi_offset %r15, -24 1397 // 1398 if (SavedRegIdx == CU_NUM_SAVED_REGS) 1399 // If there are too many saved registers, we cannot use a compact 1400 // unwind encoding. 1401 return CU::UNWIND_MODE_DWARF; 1402 1403 unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); 1404 SavedRegs[SavedRegIdx++] = Reg; 1405 StackAdjust += OffsetSize; 1406 MinAbsOffset = std::min(MinAbsOffset, std::abs(Inst.getOffset())); 1407 InstrOffset += PushInstrSize(Reg); 1408 break; 1409 } 1410 } 1411 } 1412 1413 StackAdjust /= StackDivide; 1414 1415 if (HasFP) { 1416 if ((StackAdjust & 0xFF) != StackAdjust) 1417 // Offset was too big for a compact unwind encoding. 1418 return CU::UNWIND_MODE_DWARF; 1419 1420 // We don't attempt to track a real StackAdjust, so if the saved registers 1421 // aren't adjacent to rbp we can't cope. 1422 if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize) 1423 return CU::UNWIND_MODE_DWARF; 1424 1425 // Get the encoding of the saved registers when we have a frame pointer. 1426 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(); 1427 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1428 1429 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; 1430 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; 1431 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; 1432 } else { 1433 SubtractInstrIdx += InstrOffset; 1434 ++StackAdjust; 1435 1436 if ((StackSize & 0xFF) == StackSize) { 1437 // Frameless stack with a small stack size. 1438 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; 1439 1440 // Encode the stack size. 1441 CompactUnwindEncoding |= (StackSize & 0xFF) << 16; 1442 } else { 1443 if ((StackAdjust & 0x7) != StackAdjust) 1444 // The extra stack adjustments are too big for us to handle. 1445 return CU::UNWIND_MODE_DWARF; 1446 1447 // Frameless stack with an offset too large for us to encode compactly. 1448 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; 1449 1450 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' 1451 // instruction. 1452 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; 1453 1454 // Encode any extra stack adjustments (done via push instructions). 1455 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; 1456 } 1457 1458 // Encode the number of registers saved. (Reverse the list first.) 1459 std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]); 1460 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; 1461 1462 // Get the encoding of the saved registers when we don't have a frame 1463 // pointer. 1464 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx); 1465 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1466 1467 // Encode the register encoding. 1468 CompactUnwindEncoding |= 1469 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; 1470 } 1471 1472 return CompactUnwindEncoding; 1473 } 1474 }; 1475 1476 } // end anonymous namespace 1477 1478 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, 1479 const MCSubtargetInfo &STI, 1480 const MCRegisterInfo &MRI, 1481 const MCTargetOptions &Options) { 1482 const Triple &TheTriple = STI.getTargetTriple(); 1483 if (TheTriple.isOSBinFormatMachO()) 1484 return new DarwinX86AsmBackend(T, MRI, STI); 1485 1486 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1487 return new WindowsX86AsmBackend(T, false, STI); 1488 1489 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1490 1491 if (TheTriple.isOSIAMCU()) 1492 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI); 1493 1494 return new ELFX86_32AsmBackend(T, OSABI, STI); 1495 } 1496 1497 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, 1498 const MCSubtargetInfo &STI, 1499 const MCRegisterInfo &MRI, 1500 const MCTargetOptions &Options) { 1501 const Triple &TheTriple = STI.getTargetTriple(); 1502 if (TheTriple.isOSBinFormatMachO()) 1503 return new DarwinX86AsmBackend(T, MRI, STI); 1504 1505 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1506 return new WindowsX86AsmBackend(T, true, STI); 1507 1508 if (TheTriple.isUEFI()) { 1509 assert(TheTriple.isOSBinFormatCOFF() && 1510 "Only COFF format is supported in UEFI environment."); 1511 return new WindowsX86AsmBackend(T, true, STI); 1512 } 1513 1514 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1515 1516 if (TheTriple.isX32()) 1517 return new ELFX86_X32AsmBackend(T, OSABI, STI); 1518 return new ELFX86_64AsmBackend(T, OSABI, STI); 1519 } 1520 1521 namespace { 1522 class X86ELFStreamer : public MCELFStreamer { 1523 public: 1524 X86ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB, 1525 std::unique_ptr<MCObjectWriter> OW, 1526 std::unique_ptr<MCCodeEmitter> Emitter) 1527 : MCELFStreamer(Context, std::move(TAB), std::move(OW), 1528 std::move(Emitter)) {} 1529 1530 void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; 1531 }; 1532 } // end anonymous namespace 1533 1534 void X86_MC::emitInstruction(MCObjectStreamer &S, const MCInst &Inst, 1535 const MCSubtargetInfo &STI) { 1536 auto &Backend = static_cast<X86AsmBackend &>(S.getAssembler().getBackend()); 1537 Backend.emitInstructionBegin(S, Inst, STI); 1538 S.MCObjectStreamer::emitInstruction(Inst, STI); 1539 Backend.emitInstructionEnd(S, Inst); 1540 } 1541 1542 void X86ELFStreamer::emitInstruction(const MCInst &Inst, 1543 const MCSubtargetInfo &STI) { 1544 X86_MC::emitInstruction(*this, Inst, STI); 1545 } 1546 1547 MCStreamer *llvm::createX86ELFStreamer(const Triple &T, MCContext &Context, 1548 std::unique_ptr<MCAsmBackend> &&MAB, 1549 std::unique_ptr<MCObjectWriter> &&MOW, 1550 std::unique_ptr<MCCodeEmitter> &&MCE) { 1551 return new X86ELFStreamer(Context, std::move(MAB), std::move(MOW), 1552 std::move(MCE)); 1553 } 1554