1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86EncodingOptimization.h" 11 #include "MCTargetDesc/X86FixupKinds.h" 12 #include "llvm/ADT/StringSwitch.h" 13 #include "llvm/BinaryFormat/ELF.h" 14 #include "llvm/BinaryFormat/MachO.h" 15 #include "llvm/MC/MCAsmBackend.h" 16 #include "llvm/MC/MCAssembler.h" 17 #include "llvm/MC/MCCodeEmitter.h" 18 #include "llvm/MC/MCContext.h" 19 #include "llvm/MC/MCDwarf.h" 20 #include "llvm/MC/MCELFObjectWriter.h" 21 #include "llvm/MC/MCELFStreamer.h" 22 #include "llvm/MC/MCExpr.h" 23 #include "llvm/MC/MCFixupKindInfo.h" 24 #include "llvm/MC/MCInst.h" 25 #include "llvm/MC/MCInstrInfo.h" 26 #include "llvm/MC/MCObjectStreamer.h" 27 #include "llvm/MC/MCObjectWriter.h" 28 #include "llvm/MC/MCRegisterInfo.h" 29 #include "llvm/MC/MCSubtargetInfo.h" 30 #include "llvm/MC/MCValue.h" 31 #include "llvm/MC/TargetRegistry.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/raw_ostream.h" 35 36 using namespace llvm; 37 38 namespace { 39 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind 40 class X86AlignBranchKind { 41 private: 42 uint8_t AlignBranchKind = 0; 43 44 public: 45 void operator=(const std::string &Val) { 46 if (Val.empty()) 47 return; 48 SmallVector<StringRef, 6> BranchTypes; 49 StringRef(Val).split(BranchTypes, '+', -1, false); 50 for (auto BranchType : BranchTypes) { 51 if (BranchType == "fused") 52 addKind(X86::AlignBranchFused); 53 else if (BranchType == "jcc") 54 addKind(X86::AlignBranchJcc); 55 else if (BranchType == "jmp") 56 addKind(X86::AlignBranchJmp); 57 else if (BranchType == "call") 58 addKind(X86::AlignBranchCall); 59 else if (BranchType == "ret") 60 addKind(X86::AlignBranchRet); 61 else if (BranchType == "indirect") 62 addKind(X86::AlignBranchIndirect); 63 else { 64 errs() << "invalid argument " << BranchType.str() 65 << " to -x86-align-branch=; each element must be one of: fused, " 66 "jcc, jmp, call, ret, indirect.(plus separated)\n"; 67 } 68 } 69 } 70 71 operator uint8_t() const { return AlignBranchKind; } 72 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; } 73 }; 74 75 X86AlignBranchKind X86AlignBranchKindLoc; 76 77 cl::opt<unsigned> X86AlignBranchBoundary( 78 "x86-align-branch-boundary", cl::init(0), 79 cl::desc( 80 "Control how the assembler should align branches with NOP. If the " 81 "boundary's size is not 0, it should be a power of 2 and no less " 82 "than 32. Branches will be aligned to prevent from being across or " 83 "against the boundary of specified size. The default value 0 does not " 84 "align branches.")); 85 86 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch( 87 "x86-align-branch", 88 cl::desc( 89 "Specify types of branches to align (plus separated list of types):" 90 "\njcc indicates conditional jumps" 91 "\nfused indicates fused conditional jumps" 92 "\njmp indicates direct unconditional jumps" 93 "\ncall indicates direct and indirect calls" 94 "\nret indicates rets" 95 "\nindirect indicates indirect unconditional jumps"), 96 cl::location(X86AlignBranchKindLoc)); 97 98 cl::opt<bool> X86AlignBranchWithin32BBoundaries( 99 "x86-branches-within-32B-boundaries", cl::init(false), 100 cl::desc( 101 "Align selected instructions to mitigate negative performance impact " 102 "of Intel's micro code update for errata skx102. May break " 103 "assumptions about labels corresponding to particular instructions, " 104 "and should be used with caution.")); 105 106 cl::opt<unsigned> X86PadMaxPrefixSize( 107 "x86-pad-max-prefix-size", cl::init(0), 108 cl::desc("Maximum number of prefixes to use for padding")); 109 110 cl::opt<bool> X86PadForAlign( 111 "x86-pad-for-align", cl::init(false), cl::Hidden, 112 cl::desc("Pad previous instructions to implement align directives")); 113 114 cl::opt<bool> X86PadForBranchAlign( 115 "x86-pad-for-branch-align", cl::init(true), cl::Hidden, 116 cl::desc("Pad previous instructions to implement branch alignment")); 117 118 class X86AsmBackend : public MCAsmBackend { 119 const MCSubtargetInfo &STI; 120 std::unique_ptr<const MCInstrInfo> MCII; 121 X86AlignBranchKind AlignBranchType; 122 Align AlignBoundary; 123 unsigned TargetPrefixMax = 0; 124 125 MCInst PrevInst; 126 unsigned PrevInstOpcode = 0; 127 MCBoundaryAlignFragment *PendingBA = nullptr; 128 std::pair<MCFragment *, size_t> PrevInstPosition; 129 bool IsRightAfterData = false; 130 131 uint8_t determinePaddingPrefix(const MCInst &Inst) const; 132 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; 133 bool needAlign(const MCInst &Inst) const; 134 bool canPadBranches(MCObjectStreamer &OS) const; 135 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const; 136 137 public: 138 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) 139 : MCAsmBackend(llvm::endianness::little), STI(STI), 140 MCII(T.createMCInstrInfo()) { 141 if (X86AlignBranchWithin32BBoundaries) { 142 // At the moment, this defaults to aligning fused branches, unconditional 143 // jumps, and (unfused) conditional jumps with nops. Both the 144 // instructions aligned and the alignment method (nop vs prefix) may 145 // change in the future. 146 AlignBoundary = assumeAligned(32); 147 AlignBranchType.addKind(X86::AlignBranchFused); 148 AlignBranchType.addKind(X86::AlignBranchJcc); 149 AlignBranchType.addKind(X86::AlignBranchJmp); 150 } 151 // Allow overriding defaults set by main flag 152 if (X86AlignBranchBoundary.getNumOccurrences()) 153 AlignBoundary = assumeAligned(X86AlignBranchBoundary); 154 if (X86AlignBranch.getNumOccurrences()) 155 AlignBranchType = X86AlignBranchKindLoc; 156 if (X86PadMaxPrefixSize.getNumOccurrences()) 157 TargetPrefixMax = X86PadMaxPrefixSize; 158 } 159 160 bool allowAutoPadding() const override; 161 bool allowEnhancedRelaxation() const override; 162 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst, 163 const MCSubtargetInfo &STI); 164 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst); 165 166 unsigned getNumFixupKinds() const override { 167 return X86::NumTargetFixupKinds; 168 } 169 170 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override; 171 172 const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; 173 174 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, 175 const MCValue &Target, const uint64_t Value, 176 const MCSubtargetInfo *STI) override; 177 178 void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 179 const MCValue &Target, MutableArrayRef<char> Data, 180 uint64_t Value, bool IsResolved, 181 const MCSubtargetInfo *STI) const override; 182 183 bool mayNeedRelaxation(const MCInst &Inst, 184 const MCSubtargetInfo &STI) const override; 185 186 bool fixupNeedsRelaxation(const MCFixup &Fixup, 187 uint64_t Value) const override; 188 189 void relaxInstruction(MCInst &Inst, 190 const MCSubtargetInfo &STI) const override; 191 192 bool padInstructionViaRelaxation(MCRelaxableFragment &RF, 193 MCCodeEmitter &Emitter, 194 unsigned &RemainingSize) const; 195 196 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 197 unsigned &RemainingSize) const; 198 199 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 200 unsigned &RemainingSize) const; 201 202 void finishLayout(const MCAssembler &Asm) const override; 203 204 unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override; 205 206 bool writeNopData(raw_ostream &OS, uint64_t Count, 207 const MCSubtargetInfo *STI) const override; 208 }; 209 } // end anonymous namespace 210 211 static bool isRelaxableBranch(unsigned Opcode) { 212 return Opcode == X86::JCC_1 || Opcode == X86::JMP_1; 213 } 214 215 static unsigned getRelaxedOpcodeBranch(unsigned Opcode, 216 bool Is16BitMode = false) { 217 switch (Opcode) { 218 default: 219 llvm_unreachable("invalid opcode for branch"); 220 case X86::JCC_1: 221 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4; 222 case X86::JMP_1: 223 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4; 224 } 225 } 226 227 static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) { 228 unsigned Opcode = MI.getOpcode(); 229 return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode) 230 : X86::getOpcodeForLongImmediateForm(Opcode); 231 } 232 233 static X86::CondCode getCondFromBranch(const MCInst &MI, 234 const MCInstrInfo &MCII) { 235 unsigned Opcode = MI.getOpcode(); 236 switch (Opcode) { 237 default: 238 return X86::COND_INVALID; 239 case X86::JCC_1: { 240 const MCInstrDesc &Desc = MCII.get(Opcode); 241 return static_cast<X86::CondCode>( 242 MI.getOperand(Desc.getNumOperands() - 1).getImm()); 243 } 244 } 245 } 246 247 static X86::SecondMacroFusionInstKind 248 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) { 249 X86::CondCode CC = getCondFromBranch(MI, MCII); 250 return classifySecondCondCodeInMacroFusion(CC); 251 } 252 253 /// Check if the instruction uses RIP relative addressing. 254 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) { 255 unsigned Opcode = MI.getOpcode(); 256 const MCInstrDesc &Desc = MCII.get(Opcode); 257 uint64_t TSFlags = Desc.TSFlags; 258 unsigned CurOp = X86II::getOperandBias(Desc); 259 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 260 if (MemoryOperand < 0) 261 return false; 262 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg; 263 MCRegister BaseReg = MI.getOperand(BaseRegNum).getReg(); 264 return (BaseReg == X86::RIP); 265 } 266 267 /// Check if the instruction is a prefix. 268 static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII) { 269 return X86II::isPrefix(MCII.get(Opcode).TSFlags); 270 } 271 272 /// Check if the instruction is valid as the first instruction in macro fusion. 273 static bool isFirstMacroFusibleInst(const MCInst &Inst, 274 const MCInstrInfo &MCII) { 275 // An Intel instruction with RIP relative addressing is not macro fusible. 276 if (isRIPRelative(Inst, MCII)) 277 return false; 278 X86::FirstMacroFusionInstKind FIK = 279 X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode()); 280 return FIK != X86::FirstMacroFusionInstKind::Invalid; 281 } 282 283 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to 284 /// get a better peformance in some cases. Here, we determine which prefix is 285 /// the most suitable. 286 /// 287 /// If the instruction has a segment override prefix, use the existing one. 288 /// If the target is 64-bit, use the CS. 289 /// If the target is 32-bit, 290 /// - If the instruction has a ESP/EBP base register, use SS. 291 /// - Otherwise use DS. 292 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const { 293 assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) && 294 "Prefixes can be added only in 32-bit or 64-bit mode."); 295 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 296 uint64_t TSFlags = Desc.TSFlags; 297 298 // Determine where the memory operand starts, if present. 299 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 300 if (MemoryOperand != -1) 301 MemoryOperand += X86II::getOperandBias(Desc); 302 303 MCRegister SegmentReg; 304 if (MemoryOperand >= 0) { 305 // Check for explicit segment override on memory operand. 306 SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg(); 307 } 308 309 switch (TSFlags & X86II::FormMask) { 310 default: 311 break; 312 case X86II::RawFrmDstSrc: { 313 // Check segment override opcode prefix as needed (not for %ds). 314 if (Inst.getOperand(2).getReg() != X86::DS) 315 SegmentReg = Inst.getOperand(2).getReg(); 316 break; 317 } 318 case X86II::RawFrmSrc: { 319 // Check segment override opcode prefix as needed (not for %ds). 320 if (Inst.getOperand(1).getReg() != X86::DS) 321 SegmentReg = Inst.getOperand(1).getReg(); 322 break; 323 } 324 case X86II::RawFrmMemOffs: { 325 // Check segment override opcode prefix as needed. 326 SegmentReg = Inst.getOperand(1).getReg(); 327 break; 328 } 329 } 330 331 if (SegmentReg) 332 return X86::getSegmentOverridePrefixForReg(SegmentReg); 333 334 if (STI.hasFeature(X86::Is64Bit)) 335 return X86::CS_Encoding; 336 337 if (MemoryOperand >= 0) { 338 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg; 339 MCRegister BaseReg = Inst.getOperand(BaseRegNum).getReg(); 340 if (BaseReg == X86::ESP || BaseReg == X86::EBP) 341 return X86::SS_Encoding; 342 } 343 return X86::DS_Encoding; 344 } 345 346 /// Check if the two instructions will be macro-fused on the target cpu. 347 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const { 348 const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode()); 349 if (!InstDesc.isConditionalBranch()) 350 return false; 351 if (!isFirstMacroFusibleInst(Cmp, *MCII)) 352 return false; 353 const X86::FirstMacroFusionInstKind CmpKind = 354 X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode()); 355 const X86::SecondMacroFusionInstKind BranchKind = 356 classifySecondInstInMacroFusion(Jcc, *MCII); 357 return X86::isMacroFused(CmpKind, BranchKind); 358 } 359 360 /// Check if the instruction has a variant symbol operand. 361 static bool hasVariantSymbol(const MCInst &MI) { 362 for (auto &Operand : MI) { 363 if (!Operand.isExpr()) 364 continue; 365 const MCExpr &Expr = *Operand.getExpr(); 366 if (Expr.getKind() == MCExpr::SymbolRef && 367 cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None) 368 return true; 369 } 370 return false; 371 } 372 373 bool X86AsmBackend::allowAutoPadding() const { 374 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone); 375 } 376 377 bool X86AsmBackend::allowEnhancedRelaxation() const { 378 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign; 379 } 380 381 /// X86 has certain instructions which enable interrupts exactly one 382 /// instruction *after* the instruction which stores to SS. Return true if the 383 /// given instruction may have such an interrupt delay slot. 384 static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) { 385 switch (InstOpcode) { 386 case X86::POPSS16: 387 case X86::POPSS32: 388 case X86::STI: 389 return true; 390 391 case X86::MOV16sr: 392 case X86::MOV32sr: 393 case X86::MOV64sr: 394 case X86::MOV16sm: 395 // In fact, this is only the case if the first operand is SS. However, as 396 // segment moves occur extremely rarely, this is just a minor pessimization. 397 return true; 398 } 399 return false; 400 } 401 402 /// Check if the instruction to be emitted is right after any data. 403 static bool 404 isRightAfterData(MCFragment *CurrentFragment, 405 const std::pair<MCFragment *, size_t> &PrevInstPosition) { 406 MCFragment *F = CurrentFragment; 407 // Since data is always emitted into a DataFragment, our check strategy is 408 // simple here. 409 // - If the fragment is a DataFragment 410 // - If it's empty (section start or data after align), return false. 411 // - If it's not the fragment where the previous instruction is, 412 // returns true. 413 // - If it's the fragment holding the previous instruction but its 414 // size changed since the previous instruction was emitted into 415 // it, returns true. 416 // - Otherwise returns false. 417 // - If the fragment is not a DataFragment, returns false. 418 if (auto *DF = dyn_cast_or_null<MCDataFragment>(F)) 419 return DF->getContents().size() && 420 (DF != PrevInstPosition.first || 421 DF->getContents().size() != PrevInstPosition.second); 422 423 return false; 424 } 425 426 /// \returns the fragment size if it has instructions, otherwise returns 0. 427 static size_t getSizeForInstFragment(const MCFragment *F) { 428 if (!F || !F->hasInstructions()) 429 return 0; 430 // MCEncodedFragmentWithContents being templated makes this tricky. 431 switch (F->getKind()) { 432 default: 433 llvm_unreachable("Unknown fragment with instructions!"); 434 case MCFragment::FT_Data: 435 return cast<MCDataFragment>(*F).getContents().size(); 436 case MCFragment::FT_Relaxable: 437 return cast<MCRelaxableFragment>(*F).getContents().size(); 438 } 439 } 440 441 /// Return true if we can insert NOP or prefixes automatically before the 442 /// the instruction to be emitted. 443 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const { 444 if (hasVariantSymbol(Inst)) 445 // Linker may rewrite the instruction with variant symbol operand(e.g. 446 // TLSCALL). 447 return false; 448 449 if (mayHaveInterruptDelaySlot(PrevInstOpcode)) 450 // If this instruction follows an interrupt enabling instruction with a one 451 // instruction delay, inserting a nop would change behavior. 452 return false; 453 454 if (isPrefix(PrevInstOpcode, *MCII)) 455 // If this instruction follows a prefix, inserting a nop/prefix would change 456 // semantic. 457 return false; 458 459 if (isPrefix(Inst.getOpcode(), *MCII)) 460 // If this instruction is a prefix, inserting a prefix would change 461 // semantic. 462 return false; 463 464 if (IsRightAfterData) 465 // If this instruction follows any data, there is no clear 466 // instruction boundary, inserting a nop/prefix would change semantic. 467 return false; 468 469 return true; 470 } 471 472 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const { 473 if (!OS.getAllowAutoPadding()) 474 return false; 475 assert(allowAutoPadding() && "incorrect initialization!"); 476 477 // We only pad in text section. 478 if (!OS.getCurrentSectionOnly()->isText()) 479 return false; 480 481 // To be Done: Currently don't deal with Bundle cases. 482 if (OS.getAssembler().isBundlingEnabled()) 483 return false; 484 485 // Branches only need to be aligned in 32-bit or 64-bit mode. 486 if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit))) 487 return false; 488 489 return true; 490 } 491 492 /// Check if the instruction operand needs to be aligned. 493 bool X86AsmBackend::needAlign(const MCInst &Inst) const { 494 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 495 return (Desc.isConditionalBranch() && 496 (AlignBranchType & X86::AlignBranchJcc)) || 497 (Desc.isUnconditionalBranch() && 498 (AlignBranchType & X86::AlignBranchJmp)) || 499 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) || 500 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) || 501 (Desc.isIndirectBranch() && 502 (AlignBranchType & X86::AlignBranchIndirect)); 503 } 504 505 /// Insert BoundaryAlignFragment before instructions to align branches. 506 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, 507 const MCInst &Inst, const MCSubtargetInfo &STI) { 508 // Used by canPadInst. Done here, because in emitInstructionEnd, the current 509 // fragment will have changed. 510 IsRightAfterData = 511 isRightAfterData(OS.getCurrentFragment(), PrevInstPosition); 512 513 if (!canPadBranches(OS)) 514 return; 515 516 // NB: PrevInst only valid if canPadBranches is true. 517 if (!isMacroFused(PrevInst, Inst)) 518 // Macro fusion doesn't happen indeed, clear the pending. 519 PendingBA = nullptr; 520 521 // When branch padding is enabled (basically the skx102 erratum => unlikely), 522 // we call canPadInst (not cheap) twice. However, in the common case, we can 523 // avoid unnecessary calls to that, as this is otherwise only used for 524 // relaxable fragments. 525 if (!canPadInst(Inst, OS)) 526 return; 527 528 if (PendingBA && PendingBA->getNext() == OS.getCurrentFragment()) { 529 // Macro fusion actually happens and there is no other fragment inserted 530 // after the previous instruction. 531 // 532 // Do nothing here since we already inserted a BoudaryAlign fragment when 533 // we met the first instruction in the fused pair and we'll tie them 534 // together in emitInstructionEnd. 535 // 536 // Note: When there is at least one fragment, such as MCAlignFragment, 537 // inserted after the previous instruction, e.g. 538 // 539 // \code 540 // cmp %rax %rcx 541 // .align 16 542 // je .Label0 543 // \ endcode 544 // 545 // We will treat the JCC as a unfused branch although it may be fused 546 // with the CMP. 547 return; 548 } 549 550 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) && 551 isFirstMacroFusibleInst(Inst, *MCII))) { 552 // If we meet a unfused branch or the first instuction in a fusiable pair, 553 // insert a BoundaryAlign fragment. 554 PendingBA = OS.getContext().allocFragment<MCBoundaryAlignFragment>( 555 AlignBoundary, STI); 556 OS.insert(PendingBA); 557 } 558 } 559 560 /// Set the last fragment to be aligned for the BoundaryAlignFragment. 561 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, 562 const MCInst &Inst) { 563 MCFragment *CF = OS.getCurrentFragment(); 564 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF)) 565 F->setAllowAutoPadding(canPadInst(Inst, OS)); 566 567 // Update PrevInstOpcode here, canPadInst() reads that. 568 PrevInstOpcode = Inst.getOpcode(); 569 PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); 570 571 if (!canPadBranches(OS)) 572 return; 573 574 // PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap. 575 PrevInst = Inst; 576 577 if (!needAlign(Inst) || !PendingBA) 578 return; 579 580 // Tie the aligned instructions into a pending BoundaryAlign. 581 PendingBA->setLastFragment(CF); 582 PendingBA = nullptr; 583 584 // We need to ensure that further data isn't added to the current 585 // DataFragment, so that we can get the size of instructions later in 586 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty 587 // DataFragment. 588 if (isa_and_nonnull<MCDataFragment>(CF)) 589 OS.insert(OS.getContext().allocFragment<MCDataFragment>()); 590 591 // Update the maximum alignment on the current section if necessary. 592 MCSection *Sec = OS.getCurrentSectionOnly(); 593 Sec->ensureMinAlignment(AlignBoundary); 594 } 595 596 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const { 597 if (STI.getTargetTriple().isOSBinFormatELF()) { 598 unsigned Type; 599 if (STI.getTargetTriple().getArch() == Triple::x86_64) { 600 Type = llvm::StringSwitch<unsigned>(Name) 601 #define ELF_RELOC(X, Y) .Case(#X, Y) 602 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def" 603 #undef ELF_RELOC 604 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE) 605 .Case("BFD_RELOC_8", ELF::R_X86_64_8) 606 .Case("BFD_RELOC_16", ELF::R_X86_64_16) 607 .Case("BFD_RELOC_32", ELF::R_X86_64_32) 608 .Case("BFD_RELOC_64", ELF::R_X86_64_64) 609 .Default(-1u); 610 } else { 611 Type = llvm::StringSwitch<unsigned>(Name) 612 #define ELF_RELOC(X, Y) .Case(#X, Y) 613 #include "llvm/BinaryFormat/ELFRelocs/i386.def" 614 #undef ELF_RELOC 615 .Case("BFD_RELOC_NONE", ELF::R_386_NONE) 616 .Case("BFD_RELOC_8", ELF::R_386_8) 617 .Case("BFD_RELOC_16", ELF::R_386_16) 618 .Case("BFD_RELOC_32", ELF::R_386_32) 619 .Default(-1u); 620 } 621 if (Type == -1u) 622 return std::nullopt; 623 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type); 624 } 625 return MCAsmBackend::getFixupKind(Name); 626 } 627 628 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { 629 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = { 630 // clang-format off 631 {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 632 {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 633 {"reloc_riprel_4byte_movq_load_rex2", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 634 {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 635 {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 636 {"reloc_riprel_4byte_relax_rex2", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 637 {"reloc_riprel_4byte_relax_evex", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 638 {"reloc_signed_4byte", 0, 32, 0}, 639 {"reloc_signed_4byte_relax", 0, 32, 0}, 640 {"reloc_global_offset_table", 0, 32, 0}, 641 {"reloc_global_offset_table8", 0, 64, 0}, 642 {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 643 // clang-format on 644 }; 645 646 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They 647 // do not require any extra processing. 648 if (Kind >= FirstLiteralRelocationKind) 649 return MCAsmBackend::getFixupKindInfo(FK_NONE); 650 651 if (Kind < FirstTargetFixupKind) 652 return MCAsmBackend::getFixupKindInfo(Kind); 653 654 assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && 655 "Invalid kind!"); 656 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!"); 657 return Infos[Kind - FirstTargetFixupKind]; 658 } 659 660 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &, 661 const MCFixup &Fixup, const MCValue &, 662 const uint64_t, 663 const MCSubtargetInfo *STI) { 664 return Fixup.getKind() >= FirstLiteralRelocationKind; 665 } 666 667 static unsigned getFixupKindSize(unsigned Kind) { 668 switch (Kind) { 669 default: 670 llvm_unreachable("invalid fixup kind!"); 671 case FK_NONE: 672 return 0; 673 case FK_PCRel_1: 674 case FK_SecRel_1: 675 case FK_Data_1: 676 return 1; 677 case FK_PCRel_2: 678 case FK_SecRel_2: 679 case FK_Data_2: 680 return 2; 681 case FK_PCRel_4: 682 case X86::reloc_riprel_4byte: 683 case X86::reloc_riprel_4byte_relax: 684 case X86::reloc_riprel_4byte_relax_rex: 685 case X86::reloc_riprel_4byte_relax_rex2: 686 case X86::reloc_riprel_4byte_movq_load: 687 case X86::reloc_riprel_4byte_movq_load_rex2: 688 case X86::reloc_riprel_4byte_relax_evex: 689 case X86::reloc_signed_4byte: 690 case X86::reloc_signed_4byte_relax: 691 case X86::reloc_global_offset_table: 692 case X86::reloc_branch_4byte_pcrel: 693 case FK_SecRel_4: 694 case FK_Data_4: 695 return 4; 696 case FK_PCRel_8: 697 case FK_SecRel_8: 698 case FK_Data_8: 699 case X86::reloc_global_offset_table8: 700 return 8; 701 } 702 } 703 704 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 705 const MCValue &Target, 706 MutableArrayRef<char> Data, 707 uint64_t Value, bool IsResolved, 708 const MCSubtargetInfo *STI) const { 709 unsigned Kind = Fixup.getKind(); 710 if (Kind >= FirstLiteralRelocationKind) 711 return; 712 unsigned Size = getFixupKindSize(Kind); 713 714 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); 715 716 int64_t SignedValue = static_cast<int64_t>(Value); 717 if ((Target.isAbsolute() || IsResolved) && 718 getFixupKindInfo(Fixup.getKind()).Flags & 719 MCFixupKindInfo::FKF_IsPCRel) { 720 // check that PC relative fixup fits into the fixup size. 721 if (Size > 0 && !isIntN(Size * 8, SignedValue)) 722 Asm.getContext().reportError( 723 Fixup.getLoc(), "value of " + Twine(SignedValue) + 724 " is too large for field of " + Twine(Size) + 725 ((Size == 1) ? " byte." : " bytes.")); 726 } else { 727 // Check that uppper bits are either all zeros or all ones. 728 // Specifically ignore overflow/underflow as long as the leakage is 729 // limited to the lower bits. This is to remain compatible with 730 // other assemblers. 731 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) && 732 "Value does not fit in the Fixup field"); 733 } 734 735 for (unsigned i = 0; i != Size; ++i) 736 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); 737 } 738 739 bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI, 740 const MCSubtargetInfo &STI) const { 741 unsigned Opcode = MI.getOpcode(); 742 unsigned SkipOperands = X86::isCCMPCC(Opcode) ? 2 : 0; 743 return isRelaxableBranch(Opcode) || 744 (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode && 745 MI.getOperand(MI.getNumOperands() - 1 - SkipOperands).isExpr()); 746 } 747 748 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, 749 uint64_t Value) const { 750 // Relax if the value is too big for a (signed) i8. 751 return !isInt<8>(Value); 752 } 753 754 // FIXME: Can tblgen help at all here to verify there aren't other instructions 755 // we can relax? 756 void X86AsmBackend::relaxInstruction(MCInst &Inst, 757 const MCSubtargetInfo &STI) const { 758 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. 759 bool Is16BitMode = STI.hasFeature(X86::Is16Bit); 760 unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode); 761 762 if (RelaxedOp == Inst.getOpcode()) { 763 SmallString<256> Tmp; 764 raw_svector_ostream OS(Tmp); 765 Inst.dump_pretty(OS); 766 OS << "\n"; 767 report_fatal_error("unexpected instruction to relax: " + OS.str()); 768 } 769 770 Inst.setOpcode(RelaxedOp); 771 } 772 773 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, 774 MCCodeEmitter &Emitter, 775 unsigned &RemainingSize) const { 776 if (!RF.getAllowAutoPadding()) 777 return false; 778 // If the instruction isn't fully relaxed, shifting it around might require a 779 // larger value for one of the fixups then can be encoded. The outer loop 780 // will also catch this before moving to the next instruction, but we need to 781 // prevent padding this single instruction as well. 782 if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) 783 return false; 784 785 const unsigned OldSize = RF.getContents().size(); 786 if (OldSize == 15) 787 return false; 788 789 const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize); 790 const unsigned RemainingPrefixSize = [&]() -> unsigned { 791 SmallString<15> Code; 792 X86_MC::emitPrefix(Emitter, RF.getInst(), Code, STI); 793 assert(Code.size() < 15 && "The number of prefixes must be less than 15."); 794 795 // TODO: It turns out we need a decent amount of plumbing for the target 796 // specific bits to determine number of prefixes its safe to add. Various 797 // targets (older chips mostly, but also Atom family) encounter decoder 798 // stalls with too many prefixes. For testing purposes, we set the value 799 // externally for the moment. 800 unsigned ExistingPrefixSize = Code.size(); 801 if (TargetPrefixMax <= ExistingPrefixSize) 802 return 0; 803 return TargetPrefixMax - ExistingPrefixSize; 804 }(); 805 const unsigned PrefixBytesToAdd = 806 std::min(MaxPossiblePad, RemainingPrefixSize); 807 if (PrefixBytesToAdd == 0) 808 return false; 809 810 const uint8_t Prefix = determinePaddingPrefix(RF.getInst()); 811 812 SmallString<256> Code; 813 Code.append(PrefixBytesToAdd, Prefix); 814 Code.append(RF.getContents().begin(), RF.getContents().end()); 815 RF.setContents(Code); 816 817 // Adjust the fixups for the change in offsets 818 for (auto &F : RF.getFixups()) { 819 F.setOffset(F.getOffset() + PrefixBytesToAdd); 820 } 821 822 RemainingSize -= PrefixBytesToAdd; 823 return true; 824 } 825 826 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF, 827 MCCodeEmitter &Emitter, 828 unsigned &RemainingSize) const { 829 if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) 830 // TODO: There are lots of other tricks we could apply for increasing 831 // encoding size without impacting performance. 832 return false; 833 834 MCInst Relaxed = RF.getInst(); 835 relaxInstruction(Relaxed, *RF.getSubtargetInfo()); 836 837 SmallVector<MCFixup, 4> Fixups; 838 SmallString<15> Code; 839 Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo()); 840 const unsigned OldSize = RF.getContents().size(); 841 const unsigned NewSize = Code.size(); 842 assert(NewSize >= OldSize && "size decrease during relaxation?"); 843 unsigned Delta = NewSize - OldSize; 844 if (Delta > RemainingSize) 845 return false; 846 RF.setInst(Relaxed); 847 RF.setContents(Code); 848 RF.getFixups() = Fixups; 849 RemainingSize -= Delta; 850 return true; 851 } 852 853 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, 854 MCCodeEmitter &Emitter, 855 unsigned &RemainingSize) const { 856 bool Changed = false; 857 if (RemainingSize != 0) 858 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize); 859 if (RemainingSize != 0) 860 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize); 861 return Changed; 862 } 863 864 void X86AsmBackend::finishLayout(MCAssembler const &Asm) const { 865 // See if we can further relax some instructions to cut down on the number of 866 // nop bytes required for code alignment. The actual win is in reducing 867 // instruction count, not number of bytes. Modern X86-64 can easily end up 868 // decode limited. It is often better to reduce the number of instructions 869 // (i.e. eliminate nops) even at the cost of increasing the size and 870 // complexity of others. 871 if (!X86PadForAlign && !X86PadForBranchAlign) 872 return; 873 874 // The processed regions are delimitered by LabeledFragments. -g may have more 875 // MCSymbols and therefore different relaxation results. X86PadForAlign is 876 // disabled by default to eliminate the -g vs non -g difference. 877 DenseSet<MCFragment *> LabeledFragments; 878 for (const MCSymbol &S : Asm.symbols()) 879 LabeledFragments.insert(S.getFragment(false)); 880 881 for (MCSection &Sec : Asm) { 882 if (!Sec.isText()) 883 continue; 884 885 SmallVector<MCRelaxableFragment *, 4> Relaxable; 886 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) { 887 MCFragment &F = *I; 888 889 if (LabeledFragments.count(&F)) 890 Relaxable.clear(); 891 892 if (F.getKind() == MCFragment::FT_Data) // Skip and ignore 893 continue; 894 895 if (F.getKind() == MCFragment::FT_Relaxable) { 896 auto &RF = cast<MCRelaxableFragment>(*I); 897 Relaxable.push_back(&RF); 898 continue; 899 } 900 901 auto canHandle = [](MCFragment &F) -> bool { 902 switch (F.getKind()) { 903 default: 904 return false; 905 case MCFragment::FT_Align: 906 return X86PadForAlign; 907 case MCFragment::FT_BoundaryAlign: 908 return X86PadForBranchAlign; 909 } 910 }; 911 // For any unhandled kind, assume we can't change layout. 912 if (!canHandle(F)) { 913 Relaxable.clear(); 914 continue; 915 } 916 917 #ifndef NDEBUG 918 const uint64_t OrigOffset = Asm.getFragmentOffset(F); 919 #endif 920 const uint64_t OrigSize = Asm.computeFragmentSize(F); 921 922 // To keep the effects local, prefer to relax instructions closest to 923 // the align directive. This is purely about human understandability 924 // of the resulting code. If we later find a reason to expand 925 // particular instructions over others, we can adjust. 926 unsigned RemainingSize = OrigSize; 927 while (!Relaxable.empty() && RemainingSize != 0) { 928 auto &RF = *Relaxable.pop_back_val(); 929 // Give the backend a chance to play any tricks it wishes to increase 930 // the encoding size of the given instruction. Target independent code 931 // will try further relaxation, but target's may play further tricks. 932 if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize)) 933 Sec.setHasLayout(false); 934 935 // If we have an instruction which hasn't been fully relaxed, we can't 936 // skip past it and insert bytes before it. Changing its starting 937 // offset might require a larger negative offset than it can encode. 938 // We don't need to worry about larger positive offsets as none of the 939 // possible offsets between this and our align are visible, and the 940 // ones afterwards aren't changing. 941 if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) 942 break; 943 } 944 Relaxable.clear(); 945 946 // BoundaryAlign explicitly tracks it's size (unlike align) 947 if (F.getKind() == MCFragment::FT_BoundaryAlign) 948 cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize); 949 950 #ifndef NDEBUG 951 const uint64_t FinalOffset = Asm.getFragmentOffset(F); 952 const uint64_t FinalSize = Asm.computeFragmentSize(F); 953 assert(OrigOffset + OrigSize == FinalOffset + FinalSize && 954 "can't move start of next fragment!"); 955 assert(FinalSize == RemainingSize && "inconsistent size computation?"); 956 #endif 957 958 // If we're looking at a boundary align, make sure we don't try to pad 959 // its target instructions for some following directive. Doing so would 960 // break the alignment of the current boundary align. 961 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) { 962 const MCFragment *LastFragment = BF->getLastFragment(); 963 if (!LastFragment) 964 continue; 965 while (&*I != LastFragment) 966 ++I; 967 } 968 } 969 } 970 971 // The layout is done. Mark every fragment as valid. 972 for (MCSection &Section : Asm) { 973 Asm.getFragmentOffset(*Section.curFragList()->Tail); 974 Asm.computeFragmentSize(*Section.curFragList()->Tail); 975 } 976 } 977 978 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const { 979 if (STI.hasFeature(X86::Is16Bit)) 980 return 4; 981 if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit)) 982 return 1; 983 if (STI.hasFeature(X86::TuningFast7ByteNOP)) 984 return 7; 985 if (STI.hasFeature(X86::TuningFast15ByteNOP)) 986 return 15; 987 if (STI.hasFeature(X86::TuningFast11ByteNOP)) 988 return 11; 989 // FIXME: handle 32-bit mode 990 // 15-bytes is the longest single NOP instruction, but 10-bytes is 991 // commonly the longest that can be efficiently decoded. 992 return 10; 993 } 994 995 /// Write a sequence of optimal nops to the output, covering \p Count 996 /// bytes. 997 /// \return - true on success, false on failure 998 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, 999 const MCSubtargetInfo *STI) const { 1000 static const char Nops32Bit[10][11] = { 1001 // nop 1002 "\x90", 1003 // xchg %ax,%ax 1004 "\x66\x90", 1005 // nopl (%[re]ax) 1006 "\x0f\x1f\x00", 1007 // nopl 0(%[re]ax) 1008 "\x0f\x1f\x40\x00", 1009 // nopl 0(%[re]ax,%[re]ax,1) 1010 "\x0f\x1f\x44\x00\x00", 1011 // nopw 0(%[re]ax,%[re]ax,1) 1012 "\x66\x0f\x1f\x44\x00\x00", 1013 // nopl 0L(%[re]ax) 1014 "\x0f\x1f\x80\x00\x00\x00\x00", 1015 // nopl 0L(%[re]ax,%[re]ax,1) 1016 "\x0f\x1f\x84\x00\x00\x00\x00\x00", 1017 // nopw 0L(%[re]ax,%[re]ax,1) 1018 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00", 1019 // nopw %cs:0L(%[re]ax,%[re]ax,1) 1020 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00", 1021 }; 1022 1023 // 16-bit mode uses different nop patterns than 32-bit. 1024 static const char Nops16Bit[4][11] = { 1025 // nop 1026 "\x90", 1027 // xchg %eax,%eax 1028 "\x66\x90", 1029 // lea 0(%si),%si 1030 "\x8d\x74\x00", 1031 // lea 0w(%si),%si 1032 "\x8d\xb4\x00\x00", 1033 }; 1034 1035 const char(*Nops)[11] = 1036 STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit; 1037 1038 uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI); 1039 1040 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining 1041 // length. 1042 do { 1043 const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength); 1044 const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; 1045 for (uint8_t i = 0; i < Prefixes; i++) 1046 OS << '\x66'; 1047 const uint8_t Rest = ThisNopLength - Prefixes; 1048 if (Rest != 0) 1049 OS.write(Nops[Rest - 1], Rest); 1050 Count -= ThisNopLength; 1051 } while (Count != 0); 1052 1053 return true; 1054 } 1055 1056 /* *** */ 1057 1058 namespace { 1059 1060 class ELFX86AsmBackend : public X86AsmBackend { 1061 public: 1062 uint8_t OSABI; 1063 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI) 1064 : X86AsmBackend(T, STI), OSABI(OSABI) {} 1065 }; 1066 1067 class ELFX86_32AsmBackend : public ELFX86AsmBackend { 1068 public: 1069 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, 1070 const MCSubtargetInfo &STI) 1071 : ELFX86AsmBackend(T, OSABI, STI) {} 1072 1073 std::unique_ptr<MCObjectTargetWriter> 1074 createObjectTargetWriter() const override { 1075 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386); 1076 } 1077 }; 1078 1079 class ELFX86_X32AsmBackend : public ELFX86AsmBackend { 1080 public: 1081 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, 1082 const MCSubtargetInfo &STI) 1083 : ELFX86AsmBackend(T, OSABI, STI) {} 1084 1085 std::unique_ptr<MCObjectTargetWriter> 1086 createObjectTargetWriter() const override { 1087 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1088 ELF::EM_X86_64); 1089 } 1090 }; 1091 1092 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend { 1093 public: 1094 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, 1095 const MCSubtargetInfo &STI) 1096 : ELFX86AsmBackend(T, OSABI, STI) {} 1097 1098 std::unique_ptr<MCObjectTargetWriter> 1099 createObjectTargetWriter() const override { 1100 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1101 ELF::EM_IAMCU); 1102 } 1103 }; 1104 1105 class ELFX86_64AsmBackend : public ELFX86AsmBackend { 1106 public: 1107 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, 1108 const MCSubtargetInfo &STI) 1109 : ELFX86AsmBackend(T, OSABI, STI) {} 1110 1111 std::unique_ptr<MCObjectTargetWriter> 1112 createObjectTargetWriter() const override { 1113 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64); 1114 } 1115 }; 1116 1117 class WindowsX86AsmBackend : public X86AsmBackend { 1118 bool Is64Bit; 1119 1120 public: 1121 WindowsX86AsmBackend(const Target &T, bool is64Bit, 1122 const MCSubtargetInfo &STI) 1123 : X86AsmBackend(T, STI) 1124 , Is64Bit(is64Bit) { 1125 } 1126 1127 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override { 1128 return StringSwitch<std::optional<MCFixupKind>>(Name) 1129 .Case("dir32", FK_Data_4) 1130 .Case("secrel32", FK_SecRel_4) 1131 .Case("secidx", FK_SecRel_2) 1132 .Default(MCAsmBackend::getFixupKind(Name)); 1133 } 1134 1135 std::unique_ptr<MCObjectTargetWriter> 1136 createObjectTargetWriter() const override { 1137 return createX86WinCOFFObjectWriter(Is64Bit); 1138 } 1139 }; 1140 1141 namespace CU { 1142 1143 /// Compact unwind encoding values. 1144 enum CompactUnwindEncodings { 1145 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after 1146 /// the return address, then [RE]SP is moved to [RE]BP. 1147 UNWIND_MODE_BP_FRAME = 0x01000000, 1148 1149 /// A frameless function with a small constant stack size. 1150 UNWIND_MODE_STACK_IMMD = 0x02000000, 1151 1152 /// A frameless function with a large constant stack size. 1153 UNWIND_MODE_STACK_IND = 0x03000000, 1154 1155 /// No compact unwind encoding is available. 1156 UNWIND_MODE_DWARF = 0x04000000, 1157 1158 /// Mask for encoding the frame registers. 1159 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF, 1160 1161 /// Mask for encoding the frameless registers. 1162 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF 1163 }; 1164 1165 } // namespace CU 1166 1167 class DarwinX86AsmBackend : public X86AsmBackend { 1168 const MCRegisterInfo &MRI; 1169 1170 /// Number of registers that can be saved in a compact unwind encoding. 1171 enum { CU_NUM_SAVED_REGS = 6 }; 1172 1173 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS]; 1174 Triple TT; 1175 bool Is64Bit; 1176 1177 unsigned OffsetSize; ///< Offset of a "push" instruction. 1178 unsigned MoveInstrSize; ///< Size of a "move" instruction. 1179 unsigned StackDivide; ///< Amount to adjust stack size by. 1180 protected: 1181 /// Size of a "push" instruction for the given register. 1182 unsigned PushInstrSize(unsigned Reg) const { 1183 switch (Reg) { 1184 case X86::EBX: 1185 case X86::ECX: 1186 case X86::EDX: 1187 case X86::EDI: 1188 case X86::ESI: 1189 case X86::EBP: 1190 case X86::RBX: 1191 case X86::RBP: 1192 return 1; 1193 case X86::R12: 1194 case X86::R13: 1195 case X86::R14: 1196 case X86::R15: 1197 return 2; 1198 } 1199 return 1; 1200 } 1201 1202 private: 1203 /// Get the compact unwind number for a given register. The number 1204 /// corresponds to the enum lists in compact_unwind_encoding.h. 1205 int getCompactUnwindRegNum(unsigned Reg) const { 1206 static const MCPhysReg CU32BitRegs[7] = { 1207 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 1208 }; 1209 static const MCPhysReg CU64BitRegs[] = { 1210 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 1211 }; 1212 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; 1213 for (int Idx = 1; *CURegs; ++CURegs, ++Idx) 1214 if (*CURegs == Reg) 1215 return Idx; 1216 1217 return -1; 1218 } 1219 1220 /// Return the registers encoded for a compact encoding with a frame 1221 /// pointer. 1222 uint32_t encodeCompactUnwindRegistersWithFrame() const { 1223 // Encode the registers in the order they were saved --- 3-bits per 1224 // register. The list of saved registers is assumed to be in reverse 1225 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS. 1226 uint32_t RegEnc = 0; 1227 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) { 1228 unsigned Reg = SavedRegs[i]; 1229 if (Reg == 0) break; 1230 1231 int CURegNum = getCompactUnwindRegNum(Reg); 1232 if (CURegNum == -1) return ~0U; 1233 1234 // Encode the 3-bit register number in order, skipping over 3-bits for 1235 // each register. 1236 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); 1237 } 1238 1239 assert((RegEnc & 0x3FFFF) == RegEnc && 1240 "Invalid compact register encoding!"); 1241 return RegEnc; 1242 } 1243 1244 /// Create the permutation encoding used with frameless stacks. It is 1245 /// passed the number of registers to be saved and an array of the registers 1246 /// saved. 1247 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const { 1248 // The saved registers are numbered from 1 to 6. In order to encode the 1249 // order in which they were saved, we re-number them according to their 1250 // place in the register order. The re-numbering is relative to the last 1251 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in 1252 // that order: 1253 // 1254 // Orig Re-Num 1255 // ---- ------ 1256 // 6 6 1257 // 2 2 1258 // 4 3 1259 // 5 3 1260 // 1261 for (unsigned i = 0; i < RegCount; ++i) { 1262 int CUReg = getCompactUnwindRegNum(SavedRegs[i]); 1263 if (CUReg == -1) return ~0U; 1264 SavedRegs[i] = CUReg; 1265 } 1266 1267 // Reverse the list. 1268 std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]); 1269 1270 uint32_t RenumRegs[CU_NUM_SAVED_REGS]; 1271 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){ 1272 unsigned Countless = 0; 1273 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) 1274 if (SavedRegs[j] < SavedRegs[i]) 1275 ++Countless; 1276 1277 RenumRegs[i] = SavedRegs[i] - Countless - 1; 1278 } 1279 1280 // Take the renumbered values and encode them into a 10-bit number. 1281 uint32_t permutationEncoding = 0; 1282 switch (RegCount) { 1283 case 6: 1284 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] 1285 + 6 * RenumRegs[2] + 2 * RenumRegs[3] 1286 + RenumRegs[4]; 1287 break; 1288 case 5: 1289 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] 1290 + 6 * RenumRegs[3] + 2 * RenumRegs[4] 1291 + RenumRegs[5]; 1292 break; 1293 case 4: 1294 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] 1295 + 3 * RenumRegs[4] + RenumRegs[5]; 1296 break; 1297 case 3: 1298 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] 1299 + RenumRegs[5]; 1300 break; 1301 case 2: 1302 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; 1303 break; 1304 case 1: 1305 permutationEncoding |= RenumRegs[5]; 1306 break; 1307 } 1308 1309 assert((permutationEncoding & 0x3FF) == permutationEncoding && 1310 "Invalid compact register encoding!"); 1311 return permutationEncoding; 1312 } 1313 1314 public: 1315 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, 1316 const MCSubtargetInfo &STI) 1317 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()), 1318 Is64Bit(TT.isArch64Bit()) { 1319 memset(SavedRegs, 0, sizeof(SavedRegs)); 1320 OffsetSize = Is64Bit ? 8 : 4; 1321 MoveInstrSize = Is64Bit ? 3 : 2; 1322 StackDivide = Is64Bit ? 8 : 4; 1323 } 1324 1325 std::unique_ptr<MCObjectTargetWriter> 1326 createObjectTargetWriter() const override { 1327 uint32_t CPUType = cantFail(MachO::getCPUType(TT)); 1328 uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT)); 1329 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType); 1330 } 1331 1332 /// Implementation of algorithm to generate the compact unwind encoding 1333 /// for the CFI instructions. 1334 uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI, 1335 const MCContext *Ctxt) const override { 1336 ArrayRef<MCCFIInstruction> Instrs = FI->Instructions; 1337 if (Instrs.empty()) return 0; 1338 if (!isDarwinCanonicalPersonality(FI->Personality) && 1339 !Ctxt->emitCompactUnwindNonCanonical()) 1340 return CU::UNWIND_MODE_DWARF; 1341 1342 // Reset the saved registers. 1343 unsigned SavedRegIdx = 0; 1344 memset(SavedRegs, 0, sizeof(SavedRegs)); 1345 1346 bool HasFP = false; 1347 1348 // Encode that we are using EBP/RBP as the frame pointer. 1349 uint64_t CompactUnwindEncoding = 0; 1350 1351 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2; 1352 unsigned InstrOffset = 0; 1353 unsigned StackAdjust = 0; 1354 uint64_t StackSize = 0; 1355 int64_t MinAbsOffset = std::numeric_limits<int64_t>::max(); 1356 1357 for (const MCCFIInstruction &Inst : Instrs) { 1358 switch (Inst.getOperation()) { 1359 default: 1360 // Any other CFI directives indicate a frame that we aren't prepared 1361 // to represent via compact unwind, so just bail out. 1362 return CU::UNWIND_MODE_DWARF; 1363 case MCCFIInstruction::OpDefCfaRegister: { 1364 // Defines a frame pointer. E.g. 1365 // 1366 // movq %rsp, %rbp 1367 // L0: 1368 // .cfi_def_cfa_register %rbp 1369 // 1370 HasFP = true; 1371 1372 // If the frame pointer is other than esp/rsp, we do not have a way to 1373 // generate a compact unwinding representation, so bail out. 1374 if (*MRI.getLLVMRegNum(Inst.getRegister(), true) != 1375 (Is64Bit ? X86::RBP : X86::EBP)) 1376 return CU::UNWIND_MODE_DWARF; 1377 1378 // Reset the counts. 1379 memset(SavedRegs, 0, sizeof(SavedRegs)); 1380 StackAdjust = 0; 1381 SavedRegIdx = 0; 1382 MinAbsOffset = std::numeric_limits<int64_t>::max(); 1383 InstrOffset += MoveInstrSize; 1384 break; 1385 } 1386 case MCCFIInstruction::OpDefCfaOffset: { 1387 // Defines a new offset for the CFA. E.g. 1388 // 1389 // With frame: 1390 // 1391 // pushq %rbp 1392 // L0: 1393 // .cfi_def_cfa_offset 16 1394 // 1395 // Without frame: 1396 // 1397 // subq $72, %rsp 1398 // L0: 1399 // .cfi_def_cfa_offset 80 1400 // 1401 StackSize = Inst.getOffset() / StackDivide; 1402 break; 1403 } 1404 case MCCFIInstruction::OpOffset: { 1405 // Defines a "push" of a callee-saved register. E.g. 1406 // 1407 // pushq %r15 1408 // pushq %r14 1409 // pushq %rbx 1410 // L0: 1411 // subq $120, %rsp 1412 // L1: 1413 // .cfi_offset %rbx, -40 1414 // .cfi_offset %r14, -32 1415 // .cfi_offset %r15, -24 1416 // 1417 if (SavedRegIdx == CU_NUM_SAVED_REGS) 1418 // If there are too many saved registers, we cannot use a compact 1419 // unwind encoding. 1420 return CU::UNWIND_MODE_DWARF; 1421 1422 MCRegister Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); 1423 SavedRegs[SavedRegIdx++] = Reg; 1424 StackAdjust += OffsetSize; 1425 MinAbsOffset = std::min(MinAbsOffset, std::abs(Inst.getOffset())); 1426 InstrOffset += PushInstrSize(Reg); 1427 break; 1428 } 1429 } 1430 } 1431 1432 StackAdjust /= StackDivide; 1433 1434 if (HasFP) { 1435 if ((StackAdjust & 0xFF) != StackAdjust) 1436 // Offset was too big for a compact unwind encoding. 1437 return CU::UNWIND_MODE_DWARF; 1438 1439 // We don't attempt to track a real StackAdjust, so if the saved registers 1440 // aren't adjacent to rbp we can't cope. 1441 if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize) 1442 return CU::UNWIND_MODE_DWARF; 1443 1444 // Get the encoding of the saved registers when we have a frame pointer. 1445 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(); 1446 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1447 1448 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; 1449 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; 1450 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; 1451 } else { 1452 SubtractInstrIdx += InstrOffset; 1453 ++StackAdjust; 1454 1455 if ((StackSize & 0xFF) == StackSize) { 1456 // Frameless stack with a small stack size. 1457 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; 1458 1459 // Encode the stack size. 1460 CompactUnwindEncoding |= (StackSize & 0xFF) << 16; 1461 } else { 1462 if ((StackAdjust & 0x7) != StackAdjust) 1463 // The extra stack adjustments are too big for us to handle. 1464 return CU::UNWIND_MODE_DWARF; 1465 1466 // Frameless stack with an offset too large for us to encode compactly. 1467 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; 1468 1469 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' 1470 // instruction. 1471 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; 1472 1473 // Encode any extra stack adjustments (done via push instructions). 1474 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; 1475 } 1476 1477 // Encode the number of registers saved. (Reverse the list first.) 1478 std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]); 1479 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; 1480 1481 // Get the encoding of the saved registers when we don't have a frame 1482 // pointer. 1483 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx); 1484 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1485 1486 // Encode the register encoding. 1487 CompactUnwindEncoding |= 1488 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; 1489 } 1490 1491 return CompactUnwindEncoding; 1492 } 1493 }; 1494 1495 } // end anonymous namespace 1496 1497 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, 1498 const MCSubtargetInfo &STI, 1499 const MCRegisterInfo &MRI, 1500 const MCTargetOptions &Options) { 1501 const Triple &TheTriple = STI.getTargetTriple(); 1502 if (TheTriple.isOSBinFormatMachO()) 1503 return new DarwinX86AsmBackend(T, MRI, STI); 1504 1505 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1506 return new WindowsX86AsmBackend(T, false, STI); 1507 1508 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1509 1510 if (TheTriple.isOSIAMCU()) 1511 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI); 1512 1513 return new ELFX86_32AsmBackend(T, OSABI, STI); 1514 } 1515 1516 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, 1517 const MCSubtargetInfo &STI, 1518 const MCRegisterInfo &MRI, 1519 const MCTargetOptions &Options) { 1520 const Triple &TheTriple = STI.getTargetTriple(); 1521 if (TheTriple.isOSBinFormatMachO()) 1522 return new DarwinX86AsmBackend(T, MRI, STI); 1523 1524 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1525 return new WindowsX86AsmBackend(T, true, STI); 1526 1527 if (TheTriple.isUEFI()) { 1528 assert(TheTriple.isOSBinFormatCOFF() && 1529 "Only COFF format is supported in UEFI environment."); 1530 return new WindowsX86AsmBackend(T, true, STI); 1531 } 1532 1533 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1534 1535 if (TheTriple.isX32()) 1536 return new ELFX86_X32AsmBackend(T, OSABI, STI); 1537 return new ELFX86_64AsmBackend(T, OSABI, STI); 1538 } 1539 1540 namespace { 1541 class X86ELFStreamer : public MCELFStreamer { 1542 public: 1543 X86ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB, 1544 std::unique_ptr<MCObjectWriter> OW, 1545 std::unique_ptr<MCCodeEmitter> Emitter) 1546 : MCELFStreamer(Context, std::move(TAB), std::move(OW), 1547 std::move(Emitter)) {} 1548 1549 void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; 1550 }; 1551 } // end anonymous namespace 1552 1553 void X86_MC::emitInstruction(MCObjectStreamer &S, const MCInst &Inst, 1554 const MCSubtargetInfo &STI) { 1555 auto &Backend = static_cast<X86AsmBackend &>(S.getAssembler().getBackend()); 1556 Backend.emitInstructionBegin(S, Inst, STI); 1557 S.MCObjectStreamer::emitInstruction(Inst, STI); 1558 Backend.emitInstructionEnd(S, Inst); 1559 } 1560 1561 void X86ELFStreamer::emitInstruction(const MCInst &Inst, 1562 const MCSubtargetInfo &STI) { 1563 X86_MC::emitInstruction(*this, Inst, STI); 1564 } 1565 1566 MCStreamer *llvm::createX86ELFStreamer(const Triple &T, MCContext &Context, 1567 std::unique_ptr<MCAsmBackend> &&MAB, 1568 std::unique_ptr<MCObjectWriter> &&MOW, 1569 std::unique_ptr<MCCodeEmitter> &&MCE) { 1570 return new X86ELFStreamer(Context, std::move(MAB), std::move(MOW), 1571 std::move(MCE)); 1572 } 1573