xref: /llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp (revision 814b34f31e163e76b816194004689985f5b9fd7b)
1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86EncodingOptimization.h"
11 #include "MCTargetDesc/X86FixupKinds.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/BinaryFormat/MachO.h"
15 #include "llvm/MC/MCAsmBackend.h"
16 #include "llvm/MC/MCAssembler.h"
17 #include "llvm/MC/MCCodeEmitter.h"
18 #include "llvm/MC/MCContext.h"
19 #include "llvm/MC/MCDwarf.h"
20 #include "llvm/MC/MCELFObjectWriter.h"
21 #include "llvm/MC/MCELFStreamer.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCFixupKindInfo.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCObjectStreamer.h"
27 #include "llvm/MC/MCObjectWriter.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCValue.h"
31 #include "llvm/MC/TargetRegistry.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/raw_ostream.h"
35 
36 using namespace llvm;
37 
38 namespace {
39 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
40 class X86AlignBranchKind {
41 private:
42   uint8_t AlignBranchKind = 0;
43 
44 public:
45   void operator=(const std::string &Val) {
46     if (Val.empty())
47       return;
48     SmallVector<StringRef, 6> BranchTypes;
49     StringRef(Val).split(BranchTypes, '+', -1, false);
50     for (auto BranchType : BranchTypes) {
51       if (BranchType == "fused")
52         addKind(X86::AlignBranchFused);
53       else if (BranchType == "jcc")
54         addKind(X86::AlignBranchJcc);
55       else if (BranchType == "jmp")
56         addKind(X86::AlignBranchJmp);
57       else if (BranchType == "call")
58         addKind(X86::AlignBranchCall);
59       else if (BranchType == "ret")
60         addKind(X86::AlignBranchRet);
61       else if (BranchType == "indirect")
62         addKind(X86::AlignBranchIndirect);
63       else {
64         errs() << "invalid argument " << BranchType.str()
65                << " to -x86-align-branch=; each element must be one of: fused, "
66                   "jcc, jmp, call, ret, indirect.(plus separated)\n";
67       }
68     }
69   }
70 
71   operator uint8_t() const { return AlignBranchKind; }
72   void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
73 };
74 
75 X86AlignBranchKind X86AlignBranchKindLoc;
76 
77 cl::opt<unsigned> X86AlignBranchBoundary(
78     "x86-align-branch-boundary", cl::init(0),
79     cl::desc(
80         "Control how the assembler should align branches with NOP. If the "
81         "boundary's size is not 0, it should be a power of 2 and no less "
82         "than 32. Branches will be aligned to prevent from being across or "
83         "against the boundary of specified size. The default value 0 does not "
84         "align branches."));
85 
86 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
87     "x86-align-branch",
88     cl::desc(
89         "Specify types of branches to align (plus separated list of types):"
90              "\njcc      indicates conditional jumps"
91              "\nfused    indicates fused conditional jumps"
92              "\njmp      indicates direct unconditional jumps"
93              "\ncall     indicates direct and indirect calls"
94              "\nret      indicates rets"
95              "\nindirect indicates indirect unconditional jumps"),
96     cl::location(X86AlignBranchKindLoc));
97 
98 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
99     "x86-branches-within-32B-boundaries", cl::init(false),
100     cl::desc(
101         "Align selected instructions to mitigate negative performance impact "
102         "of Intel's micro code update for errata skx102.  May break "
103         "assumptions about labels corresponding to particular instructions, "
104         "and should be used with caution."));
105 
106 cl::opt<unsigned> X86PadMaxPrefixSize(
107     "x86-pad-max-prefix-size", cl::init(0),
108     cl::desc("Maximum number of prefixes to use for padding"));
109 
110 cl::opt<bool> X86PadForAlign(
111     "x86-pad-for-align", cl::init(false), cl::Hidden,
112     cl::desc("Pad previous instructions to implement align directives"));
113 
114 cl::opt<bool> X86PadForBranchAlign(
115     "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
116     cl::desc("Pad previous instructions to implement branch alignment"));
117 
118 class X86AsmBackend : public MCAsmBackend {
119   const MCSubtargetInfo &STI;
120   std::unique_ptr<const MCInstrInfo> MCII;
121   X86AlignBranchKind AlignBranchType;
122   Align AlignBoundary;
123   unsigned TargetPrefixMax = 0;
124 
125   MCInst PrevInst;
126   unsigned PrevInstOpcode = 0;
127   MCBoundaryAlignFragment *PendingBA = nullptr;
128   std::pair<MCFragment *, size_t> PrevInstPosition;
129   bool IsRightAfterData = false;
130 
131   uint8_t determinePaddingPrefix(const MCInst &Inst) const;
132   bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
133   bool needAlign(const MCInst &Inst) const;
134   bool canPadBranches(MCObjectStreamer &OS) const;
135   bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
136 
137 public:
138   X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
139       : MCAsmBackend(llvm::endianness::little), STI(STI),
140         MCII(T.createMCInstrInfo()) {
141     if (X86AlignBranchWithin32BBoundaries) {
142       // At the moment, this defaults to aligning fused branches, unconditional
143       // jumps, and (unfused) conditional jumps with nops.  Both the
144       // instructions aligned and the alignment method (nop vs prefix) may
145       // change in the future.
146       AlignBoundary = assumeAligned(32);
147       AlignBranchType.addKind(X86::AlignBranchFused);
148       AlignBranchType.addKind(X86::AlignBranchJcc);
149       AlignBranchType.addKind(X86::AlignBranchJmp);
150     }
151     // Allow overriding defaults set by main flag
152     if (X86AlignBranchBoundary.getNumOccurrences())
153       AlignBoundary = assumeAligned(X86AlignBranchBoundary);
154     if (X86AlignBranch.getNumOccurrences())
155       AlignBranchType = X86AlignBranchKindLoc;
156     if (X86PadMaxPrefixSize.getNumOccurrences())
157       TargetPrefixMax = X86PadMaxPrefixSize;
158   }
159 
160   bool allowAutoPadding() const override;
161   bool allowEnhancedRelaxation() const override;
162   void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
163                             const MCSubtargetInfo &STI);
164   void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst);
165 
166   unsigned getNumFixupKinds() const override {
167     return X86::NumTargetFixupKinds;
168   }
169 
170   std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
171 
172   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
173 
174   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
175                              const MCValue &Target, const uint64_t Value,
176                              const MCSubtargetInfo *STI) override;
177 
178   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
179                   const MCValue &Target, MutableArrayRef<char> Data,
180                   uint64_t Value, bool IsResolved,
181                   const MCSubtargetInfo *STI) const override;
182 
183   bool mayNeedRelaxation(const MCInst &Inst,
184                          const MCSubtargetInfo &STI) const override;
185 
186   bool fixupNeedsRelaxation(const MCFixup &Fixup,
187                             uint64_t Value) const override;
188 
189   void relaxInstruction(MCInst &Inst,
190                         const MCSubtargetInfo &STI) const override;
191 
192   bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
193                                    MCCodeEmitter &Emitter,
194                                    unsigned &RemainingSize) const;
195 
196   bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
197                                unsigned &RemainingSize) const;
198 
199   bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
200                               unsigned &RemainingSize) const;
201 
202   void finishLayout(const MCAssembler &Asm) const override;
203 
204   unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
205 
206   bool writeNopData(raw_ostream &OS, uint64_t Count,
207                     const MCSubtargetInfo *STI) const override;
208 };
209 } // end anonymous namespace
210 
211 static bool isRelaxableBranch(unsigned Opcode) {
212   return Opcode == X86::JCC_1 || Opcode == X86::JMP_1;
213 }
214 
215 static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
216                                        bool Is16BitMode = false) {
217   switch (Opcode) {
218   default:
219     llvm_unreachable("invalid opcode for branch");
220   case X86::JCC_1:
221     return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
222   case X86::JMP_1:
223     return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
224   }
225 }
226 
227 static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
228   unsigned Opcode = MI.getOpcode();
229   return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
230                                    : X86::getOpcodeForLongImmediateForm(Opcode);
231 }
232 
233 static X86::CondCode getCondFromBranch(const MCInst &MI,
234                                        const MCInstrInfo &MCII) {
235   unsigned Opcode = MI.getOpcode();
236   switch (Opcode) {
237   default:
238     return X86::COND_INVALID;
239   case X86::JCC_1: {
240     const MCInstrDesc &Desc = MCII.get(Opcode);
241     return static_cast<X86::CondCode>(
242         MI.getOperand(Desc.getNumOperands() - 1).getImm());
243   }
244   }
245 }
246 
247 static X86::SecondMacroFusionInstKind
248 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
249   X86::CondCode CC = getCondFromBranch(MI, MCII);
250   return classifySecondCondCodeInMacroFusion(CC);
251 }
252 
253 /// Check if the instruction uses RIP relative addressing.
254 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
255   unsigned Opcode = MI.getOpcode();
256   const MCInstrDesc &Desc = MCII.get(Opcode);
257   uint64_t TSFlags = Desc.TSFlags;
258   unsigned CurOp = X86II::getOperandBias(Desc);
259   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
260   if (MemoryOperand < 0)
261     return false;
262   unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
263   MCRegister BaseReg = MI.getOperand(BaseRegNum).getReg();
264   return (BaseReg == X86::RIP);
265 }
266 
267 /// Check if the instruction is a prefix.
268 static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII) {
269   return X86II::isPrefix(MCII.get(Opcode).TSFlags);
270 }
271 
272 /// Check if the instruction is valid as the first instruction in macro fusion.
273 static bool isFirstMacroFusibleInst(const MCInst &Inst,
274                                     const MCInstrInfo &MCII) {
275   // An Intel instruction with RIP relative addressing is not macro fusible.
276   if (isRIPRelative(Inst, MCII))
277     return false;
278   X86::FirstMacroFusionInstKind FIK =
279       X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
280   return FIK != X86::FirstMacroFusionInstKind::Invalid;
281 }
282 
283 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
284 /// get a better peformance in some cases. Here, we determine which prefix is
285 /// the most suitable.
286 ///
287 /// If the instruction has a segment override prefix, use the existing one.
288 /// If the target is 64-bit, use the CS.
289 /// If the target is 32-bit,
290 ///   - If the instruction has a ESP/EBP base register, use SS.
291 ///   - Otherwise use DS.
292 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
293   assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
294          "Prefixes can be added only in 32-bit or 64-bit mode.");
295   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
296   uint64_t TSFlags = Desc.TSFlags;
297 
298   // Determine where the memory operand starts, if present.
299   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
300   if (MemoryOperand != -1)
301     MemoryOperand += X86II::getOperandBias(Desc);
302 
303   MCRegister SegmentReg;
304   if (MemoryOperand >= 0) {
305     // Check for explicit segment override on memory operand.
306     SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
307   }
308 
309   switch (TSFlags & X86II::FormMask) {
310   default:
311     break;
312   case X86II::RawFrmDstSrc: {
313     // Check segment override opcode prefix as needed (not for %ds).
314     if (Inst.getOperand(2).getReg() != X86::DS)
315       SegmentReg = Inst.getOperand(2).getReg();
316     break;
317   }
318   case X86II::RawFrmSrc: {
319     // Check segment override opcode prefix as needed (not for %ds).
320     if (Inst.getOperand(1).getReg() != X86::DS)
321       SegmentReg = Inst.getOperand(1).getReg();
322     break;
323   }
324   case X86II::RawFrmMemOffs: {
325     // Check segment override opcode prefix as needed.
326     SegmentReg = Inst.getOperand(1).getReg();
327     break;
328   }
329   }
330 
331   if (SegmentReg)
332     return X86::getSegmentOverridePrefixForReg(SegmentReg);
333 
334   if (STI.hasFeature(X86::Is64Bit))
335     return X86::CS_Encoding;
336 
337   if (MemoryOperand >= 0) {
338     unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
339     MCRegister BaseReg = Inst.getOperand(BaseRegNum).getReg();
340     if (BaseReg == X86::ESP || BaseReg == X86::EBP)
341       return X86::SS_Encoding;
342   }
343   return X86::DS_Encoding;
344 }
345 
346 /// Check if the two instructions will be macro-fused on the target cpu.
347 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
348   const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
349   if (!InstDesc.isConditionalBranch())
350     return false;
351   if (!isFirstMacroFusibleInst(Cmp, *MCII))
352     return false;
353   const X86::FirstMacroFusionInstKind CmpKind =
354       X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
355   const X86::SecondMacroFusionInstKind BranchKind =
356       classifySecondInstInMacroFusion(Jcc, *MCII);
357   return X86::isMacroFused(CmpKind, BranchKind);
358 }
359 
360 /// Check if the instruction has a variant symbol operand.
361 static bool hasVariantSymbol(const MCInst &MI) {
362   for (auto &Operand : MI) {
363     if (!Operand.isExpr())
364       continue;
365     const MCExpr &Expr = *Operand.getExpr();
366     if (Expr.getKind() == MCExpr::SymbolRef &&
367         cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
368       return true;
369   }
370   return false;
371 }
372 
373 bool X86AsmBackend::allowAutoPadding() const {
374   return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
375 }
376 
377 bool X86AsmBackend::allowEnhancedRelaxation() const {
378   return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
379 }
380 
381 /// X86 has certain instructions which enable interrupts exactly one
382 /// instruction *after* the instruction which stores to SS.  Return true if the
383 /// given instruction may have such an interrupt delay slot.
384 static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) {
385   switch (InstOpcode) {
386   case X86::POPSS16:
387   case X86::POPSS32:
388   case X86::STI:
389     return true;
390 
391   case X86::MOV16sr:
392   case X86::MOV32sr:
393   case X86::MOV64sr:
394   case X86::MOV16sm:
395     // In fact, this is only the case if the first operand is SS. However, as
396     // segment moves occur extremely rarely, this is just a minor pessimization.
397     return true;
398   }
399   return false;
400 }
401 
402 /// Check if the instruction to be emitted is right after any data.
403 static bool
404 isRightAfterData(MCFragment *CurrentFragment,
405                  const std::pair<MCFragment *, size_t> &PrevInstPosition) {
406   MCFragment *F = CurrentFragment;
407   // Since data is always emitted into a DataFragment, our check strategy is
408   // simple here.
409   //   - If the fragment is a DataFragment
410   //     - If it's empty (section start or data after align), return false.
411   //     - If it's not the fragment where the previous instruction is,
412   //       returns true.
413   //     - If it's the fragment holding the previous instruction but its
414   //       size changed since the previous instruction was emitted into
415   //       it, returns true.
416   //     - Otherwise returns false.
417   //   - If the fragment is not a DataFragment, returns false.
418   if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
419     return DF->getContents().size() &&
420            (DF != PrevInstPosition.first ||
421             DF->getContents().size() != PrevInstPosition.second);
422 
423   return false;
424 }
425 
426 /// \returns the fragment size if it has instructions, otherwise returns 0.
427 static size_t getSizeForInstFragment(const MCFragment *F) {
428   if (!F || !F->hasInstructions())
429     return 0;
430   // MCEncodedFragmentWithContents being templated makes this tricky.
431   switch (F->getKind()) {
432   default:
433     llvm_unreachable("Unknown fragment with instructions!");
434   case MCFragment::FT_Data:
435     return cast<MCDataFragment>(*F).getContents().size();
436   case MCFragment::FT_Relaxable:
437     return cast<MCRelaxableFragment>(*F).getContents().size();
438   }
439 }
440 
441 /// Return true if we can insert NOP or prefixes automatically before the
442 /// the instruction to be emitted.
443 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
444   if (hasVariantSymbol(Inst))
445     // Linker may rewrite the instruction with variant symbol operand(e.g.
446     // TLSCALL).
447     return false;
448 
449   if (mayHaveInterruptDelaySlot(PrevInstOpcode))
450     // If this instruction follows an interrupt enabling instruction with a one
451     // instruction delay, inserting a nop would change behavior.
452     return false;
453 
454   if (isPrefix(PrevInstOpcode, *MCII))
455     // If this instruction follows a prefix, inserting a nop/prefix would change
456     // semantic.
457     return false;
458 
459   if (isPrefix(Inst.getOpcode(), *MCII))
460     // If this instruction is a prefix, inserting a prefix would change
461     // semantic.
462     return false;
463 
464   if (IsRightAfterData)
465     // If this instruction follows any data, there is no clear
466     // instruction boundary, inserting a nop/prefix would change semantic.
467     return false;
468 
469   return true;
470 }
471 
472 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
473   if (!OS.getAllowAutoPadding())
474     return false;
475   assert(allowAutoPadding() && "incorrect initialization!");
476 
477   // We only pad in text section.
478   if (!OS.getCurrentSectionOnly()->isText())
479     return false;
480 
481   // To be Done: Currently don't deal with Bundle cases.
482   if (OS.getAssembler().isBundlingEnabled())
483     return false;
484 
485   // Branches only need to be aligned in 32-bit or 64-bit mode.
486   if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
487     return false;
488 
489   return true;
490 }
491 
492 /// Check if the instruction operand needs to be aligned.
493 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
494   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
495   return (Desc.isConditionalBranch() &&
496           (AlignBranchType & X86::AlignBranchJcc)) ||
497          (Desc.isUnconditionalBranch() &&
498           (AlignBranchType & X86::AlignBranchJmp)) ||
499          (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
500          (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
501          (Desc.isIndirectBranch() &&
502           (AlignBranchType & X86::AlignBranchIndirect));
503 }
504 
505 /// Insert BoundaryAlignFragment before instructions to align branches.
506 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
507                                          const MCInst &Inst, const MCSubtargetInfo &STI) {
508   // Used by canPadInst. Done here, because in emitInstructionEnd, the current
509   // fragment will have changed.
510   IsRightAfterData =
511       isRightAfterData(OS.getCurrentFragment(), PrevInstPosition);
512 
513   if (!canPadBranches(OS))
514     return;
515 
516   // NB: PrevInst only valid if canPadBranches is true.
517   if (!isMacroFused(PrevInst, Inst))
518     // Macro fusion doesn't happen indeed, clear the pending.
519     PendingBA = nullptr;
520 
521   // When branch padding is enabled (basically the skx102 erratum => unlikely),
522   // we call canPadInst (not cheap) twice. However, in the common case, we can
523   // avoid unnecessary calls to that, as this is otherwise only used for
524   // relaxable fragments.
525   if (!canPadInst(Inst, OS))
526     return;
527 
528   if (PendingBA && PendingBA->getNext() == OS.getCurrentFragment()) {
529     // Macro fusion actually happens and there is no other fragment inserted
530     // after the previous instruction.
531     //
532     // Do nothing here since we already inserted a BoudaryAlign fragment when
533     // we met the first instruction in the fused pair and we'll tie them
534     // together in emitInstructionEnd.
535     //
536     // Note: When there is at least one fragment, such as MCAlignFragment,
537     // inserted after the previous instruction, e.g.
538     //
539     // \code
540     //   cmp %rax %rcx
541     //   .align 16
542     //   je .Label0
543     // \ endcode
544     //
545     // We will treat the JCC as a unfused branch although it may be fused
546     // with the CMP.
547     return;
548   }
549 
550   if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
551                           isFirstMacroFusibleInst(Inst, *MCII))) {
552     // If we meet a unfused branch or the first instuction in a fusiable pair,
553     // insert a BoundaryAlign fragment.
554     PendingBA = OS.getContext().allocFragment<MCBoundaryAlignFragment>(
555         AlignBoundary, STI);
556     OS.insert(PendingBA);
557   }
558 }
559 
560 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
561 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS,
562                                        const MCInst &Inst) {
563   MCFragment *CF = OS.getCurrentFragment();
564   if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
565     F->setAllowAutoPadding(canPadInst(Inst, OS));
566 
567   // Update PrevInstOpcode here, canPadInst() reads that.
568   PrevInstOpcode = Inst.getOpcode();
569   PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
570 
571   if (!canPadBranches(OS))
572     return;
573 
574   // PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap.
575   PrevInst = Inst;
576 
577   if (!needAlign(Inst) || !PendingBA)
578     return;
579 
580   // Tie the aligned instructions into a pending BoundaryAlign.
581   PendingBA->setLastFragment(CF);
582   PendingBA = nullptr;
583 
584   // We need to ensure that further data isn't added to the current
585   // DataFragment, so that we can get the size of instructions later in
586   // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
587   // DataFragment.
588   if (isa_and_nonnull<MCDataFragment>(CF))
589     OS.insert(OS.getContext().allocFragment<MCDataFragment>());
590 
591   // Update the maximum alignment on the current section if necessary.
592   MCSection *Sec = OS.getCurrentSectionOnly();
593   Sec->ensureMinAlignment(AlignBoundary);
594 }
595 
596 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
597   if (STI.getTargetTriple().isOSBinFormatELF()) {
598     unsigned Type;
599     if (STI.getTargetTriple().getArch() == Triple::x86_64) {
600       Type = llvm::StringSwitch<unsigned>(Name)
601 #define ELF_RELOC(X, Y) .Case(#X, Y)
602 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
603 #undef ELF_RELOC
604                  .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
605                  .Case("BFD_RELOC_8", ELF::R_X86_64_8)
606                  .Case("BFD_RELOC_16", ELF::R_X86_64_16)
607                  .Case("BFD_RELOC_32", ELF::R_X86_64_32)
608                  .Case("BFD_RELOC_64", ELF::R_X86_64_64)
609                  .Default(-1u);
610     } else {
611       Type = llvm::StringSwitch<unsigned>(Name)
612 #define ELF_RELOC(X, Y) .Case(#X, Y)
613 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
614 #undef ELF_RELOC
615                  .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
616                  .Case("BFD_RELOC_8", ELF::R_386_8)
617                  .Case("BFD_RELOC_16", ELF::R_386_16)
618                  .Case("BFD_RELOC_32", ELF::R_386_32)
619                  .Default(-1u);
620     }
621     if (Type == -1u)
622       return std::nullopt;
623     return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
624   }
625   return MCAsmBackend::getFixupKind(Name);
626 }
627 
628 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
629   const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
630       // clang-format off
631       {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
632       {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
633       {"reloc_riprel_4byte_movq_load_rex2", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
634       {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
635       {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
636       {"reloc_riprel_4byte_relax_rex2", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
637       {"reloc_riprel_4byte_relax_evex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
638       {"reloc_signed_4byte", 0, 32, 0},
639       {"reloc_signed_4byte_relax", 0, 32, 0},
640       {"reloc_global_offset_table", 0, 32, 0},
641       {"reloc_global_offset_table8", 0, 64, 0},
642       {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
643       // clang-format on
644   };
645 
646   // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
647   // do not require any extra processing.
648   if (Kind >= FirstLiteralRelocationKind)
649     return MCAsmBackend::getFixupKindInfo(FK_NONE);
650 
651   if (Kind < FirstTargetFixupKind)
652     return MCAsmBackend::getFixupKindInfo(Kind);
653 
654   assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
655          "Invalid kind!");
656   assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
657   return Infos[Kind - FirstTargetFixupKind];
658 }
659 
660 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
661                                           const MCFixup &Fixup, const MCValue &,
662                                           const uint64_t,
663                                           const MCSubtargetInfo *STI) {
664   return Fixup.getKind() >= FirstLiteralRelocationKind;
665 }
666 
667 static unsigned getFixupKindSize(unsigned Kind) {
668   switch (Kind) {
669   default:
670     llvm_unreachable("invalid fixup kind!");
671   case FK_NONE:
672     return 0;
673   case FK_PCRel_1:
674   case FK_SecRel_1:
675   case FK_Data_1:
676     return 1;
677   case FK_PCRel_2:
678   case FK_SecRel_2:
679   case FK_Data_2:
680     return 2;
681   case FK_PCRel_4:
682   case X86::reloc_riprel_4byte:
683   case X86::reloc_riprel_4byte_relax:
684   case X86::reloc_riprel_4byte_relax_rex:
685   case X86::reloc_riprel_4byte_relax_rex2:
686   case X86::reloc_riprel_4byte_movq_load:
687   case X86::reloc_riprel_4byte_movq_load_rex2:
688   case X86::reloc_riprel_4byte_relax_evex:
689   case X86::reloc_signed_4byte:
690   case X86::reloc_signed_4byte_relax:
691   case X86::reloc_global_offset_table:
692   case X86::reloc_branch_4byte_pcrel:
693   case FK_SecRel_4:
694   case FK_Data_4:
695     return 4;
696   case FK_PCRel_8:
697   case FK_SecRel_8:
698   case FK_Data_8:
699   case X86::reloc_global_offset_table8:
700     return 8;
701   }
702 }
703 
704 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
705                                const MCValue &Target,
706                                MutableArrayRef<char> Data,
707                                uint64_t Value, bool IsResolved,
708                                const MCSubtargetInfo *STI) const {
709   unsigned Kind = Fixup.getKind();
710   if (Kind >= FirstLiteralRelocationKind)
711     return;
712   unsigned Size = getFixupKindSize(Kind);
713 
714   assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
715 
716   int64_t SignedValue = static_cast<int64_t>(Value);
717   if ((Target.isAbsolute() || IsResolved) &&
718       getFixupKindInfo(Fixup.getKind()).Flags &
719       MCFixupKindInfo::FKF_IsPCRel) {
720     // check that PC relative fixup fits into the fixup size.
721     if (Size > 0 && !isIntN(Size * 8, SignedValue))
722       Asm.getContext().reportError(
723                                    Fixup.getLoc(), "value of " + Twine(SignedValue) +
724                                    " is too large for field of " + Twine(Size) +
725                                    ((Size == 1) ? " byte." : " bytes."));
726   } else {
727     // Check that uppper bits are either all zeros or all ones.
728     // Specifically ignore overflow/underflow as long as the leakage is
729     // limited to the lower bits. This is to remain compatible with
730     // other assemblers.
731     assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
732            "Value does not fit in the Fixup field");
733   }
734 
735   for (unsigned i = 0; i != Size; ++i)
736     Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
737 }
738 
739 bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
740                                       const MCSubtargetInfo &STI) const {
741   unsigned Opcode = MI.getOpcode();
742   unsigned SkipOperands = X86::isCCMPCC(Opcode) ? 2 : 0;
743   return isRelaxableBranch(Opcode) ||
744          (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
745           MI.getOperand(MI.getNumOperands() - 1 - SkipOperands).isExpr());
746 }
747 
748 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
749                                          uint64_t Value) const {
750   // Relax if the value is too big for a (signed) i8.
751   return !isInt<8>(Value);
752 }
753 
754 // FIXME: Can tblgen help at all here to verify there aren't other instructions
755 // we can relax?
756 void X86AsmBackend::relaxInstruction(MCInst &Inst,
757                                      const MCSubtargetInfo &STI) const {
758   // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
759   bool Is16BitMode = STI.hasFeature(X86::Is16Bit);
760   unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
761 
762   if (RelaxedOp == Inst.getOpcode()) {
763     SmallString<256> Tmp;
764     raw_svector_ostream OS(Tmp);
765     Inst.dump_pretty(OS);
766     OS << "\n";
767     report_fatal_error("unexpected instruction to relax: " + OS.str());
768   }
769 
770   Inst.setOpcode(RelaxedOp);
771 }
772 
773 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
774                                             MCCodeEmitter &Emitter,
775                                             unsigned &RemainingSize) const {
776   if (!RF.getAllowAutoPadding())
777     return false;
778   // If the instruction isn't fully relaxed, shifting it around might require a
779   // larger value for one of the fixups then can be encoded.  The outer loop
780   // will also catch this before moving to the next instruction, but we need to
781   // prevent padding this single instruction as well.
782   if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
783     return false;
784 
785   const unsigned OldSize = RF.getContents().size();
786   if (OldSize == 15)
787     return false;
788 
789   const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
790   const unsigned RemainingPrefixSize = [&]() -> unsigned {
791     SmallString<15> Code;
792     X86_MC::emitPrefix(Emitter, RF.getInst(), Code, STI);
793     assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
794 
795     // TODO: It turns out we need a decent amount of plumbing for the target
796     // specific bits to determine number of prefixes its safe to add.  Various
797     // targets (older chips mostly, but also Atom family) encounter decoder
798     // stalls with too many prefixes.  For testing purposes, we set the value
799     // externally for the moment.
800     unsigned ExistingPrefixSize = Code.size();
801     if (TargetPrefixMax <= ExistingPrefixSize)
802       return 0;
803     return TargetPrefixMax - ExistingPrefixSize;
804   }();
805   const unsigned PrefixBytesToAdd =
806       std::min(MaxPossiblePad, RemainingPrefixSize);
807   if (PrefixBytesToAdd == 0)
808     return false;
809 
810   const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
811 
812   SmallString<256> Code;
813   Code.append(PrefixBytesToAdd, Prefix);
814   Code.append(RF.getContents().begin(), RF.getContents().end());
815   RF.setContents(Code);
816 
817   // Adjust the fixups for the change in offsets
818   for (auto &F : RF.getFixups()) {
819     F.setOffset(F.getOffset() + PrefixBytesToAdd);
820   }
821 
822   RemainingSize -= PrefixBytesToAdd;
823   return true;
824 }
825 
826 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
827                                                 MCCodeEmitter &Emitter,
828                                                 unsigned &RemainingSize) const {
829   if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
830     // TODO: There are lots of other tricks we could apply for increasing
831     // encoding size without impacting performance.
832     return false;
833 
834   MCInst Relaxed = RF.getInst();
835   relaxInstruction(Relaxed, *RF.getSubtargetInfo());
836 
837   SmallVector<MCFixup, 4> Fixups;
838   SmallString<15> Code;
839   Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo());
840   const unsigned OldSize = RF.getContents().size();
841   const unsigned NewSize = Code.size();
842   assert(NewSize >= OldSize && "size decrease during relaxation?");
843   unsigned Delta = NewSize - OldSize;
844   if (Delta > RemainingSize)
845     return false;
846   RF.setInst(Relaxed);
847   RF.setContents(Code);
848   RF.getFixups() = Fixups;
849   RemainingSize -= Delta;
850   return true;
851 }
852 
853 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
854                                            MCCodeEmitter &Emitter,
855                                            unsigned &RemainingSize) const {
856   bool Changed = false;
857   if (RemainingSize != 0)
858     Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
859   if (RemainingSize != 0)
860     Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
861   return Changed;
862 }
863 
864 void X86AsmBackend::finishLayout(MCAssembler const &Asm) const {
865   // See if we can further relax some instructions to cut down on the number of
866   // nop bytes required for code alignment.  The actual win is in reducing
867   // instruction count, not number of bytes.  Modern X86-64 can easily end up
868   // decode limited.  It is often better to reduce the number of instructions
869   // (i.e. eliminate nops) even at the cost of increasing the size and
870   // complexity of others.
871   if (!X86PadForAlign && !X86PadForBranchAlign)
872     return;
873 
874   // The processed regions are delimitered by LabeledFragments. -g may have more
875   // MCSymbols and therefore different relaxation results. X86PadForAlign is
876   // disabled by default to eliminate the -g vs non -g difference.
877   DenseSet<MCFragment *> LabeledFragments;
878   for (const MCSymbol &S : Asm.symbols())
879     LabeledFragments.insert(S.getFragment(false));
880 
881   for (MCSection &Sec : Asm) {
882     if (!Sec.isText())
883       continue;
884 
885     SmallVector<MCRelaxableFragment *, 4> Relaxable;
886     for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
887       MCFragment &F = *I;
888 
889       if (LabeledFragments.count(&F))
890         Relaxable.clear();
891 
892       if (F.getKind() == MCFragment::FT_Data) // Skip and ignore
893         continue;
894 
895       if (F.getKind() == MCFragment::FT_Relaxable) {
896         auto &RF = cast<MCRelaxableFragment>(*I);
897         Relaxable.push_back(&RF);
898         continue;
899       }
900 
901       auto canHandle = [](MCFragment &F) -> bool {
902         switch (F.getKind()) {
903         default:
904           return false;
905         case MCFragment::FT_Align:
906           return X86PadForAlign;
907         case MCFragment::FT_BoundaryAlign:
908           return X86PadForBranchAlign;
909         }
910       };
911       // For any unhandled kind, assume we can't change layout.
912       if (!canHandle(F)) {
913         Relaxable.clear();
914         continue;
915       }
916 
917 #ifndef NDEBUG
918       const uint64_t OrigOffset = Asm.getFragmentOffset(F);
919 #endif
920       const uint64_t OrigSize = Asm.computeFragmentSize(F);
921 
922       // To keep the effects local, prefer to relax instructions closest to
923       // the align directive.  This is purely about human understandability
924       // of the resulting code.  If we later find a reason to expand
925       // particular instructions over others, we can adjust.
926       unsigned RemainingSize = OrigSize;
927       while (!Relaxable.empty() && RemainingSize != 0) {
928         auto &RF = *Relaxable.pop_back_val();
929         // Give the backend a chance to play any tricks it wishes to increase
930         // the encoding size of the given instruction.  Target independent code
931         // will try further relaxation, but target's may play further tricks.
932         if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
933           Sec.setHasLayout(false);
934 
935         // If we have an instruction which hasn't been fully relaxed, we can't
936         // skip past it and insert bytes before it.  Changing its starting
937         // offset might require a larger negative offset than it can encode.
938         // We don't need to worry about larger positive offsets as none of the
939         // possible offsets between this and our align are visible, and the
940         // ones afterwards aren't changing.
941         if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
942           break;
943       }
944       Relaxable.clear();
945 
946       // BoundaryAlign explicitly tracks it's size (unlike align)
947       if (F.getKind() == MCFragment::FT_BoundaryAlign)
948         cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
949 
950 #ifndef NDEBUG
951       const uint64_t FinalOffset = Asm.getFragmentOffset(F);
952       const uint64_t FinalSize = Asm.computeFragmentSize(F);
953       assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
954              "can't move start of next fragment!");
955       assert(FinalSize == RemainingSize && "inconsistent size computation?");
956 #endif
957 
958       // If we're looking at a boundary align, make sure we don't try to pad
959       // its target instructions for some following directive.  Doing so would
960       // break the alignment of the current boundary align.
961       if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
962         const MCFragment *LastFragment = BF->getLastFragment();
963         if (!LastFragment)
964           continue;
965         while (&*I != LastFragment)
966           ++I;
967       }
968     }
969   }
970 
971   // The layout is done. Mark every fragment as valid.
972   for (MCSection &Section : Asm) {
973     Asm.getFragmentOffset(*Section.curFragList()->Tail);
974     Asm.computeFragmentSize(*Section.curFragList()->Tail);
975   }
976 }
977 
978 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
979   if (STI.hasFeature(X86::Is16Bit))
980     return 4;
981   if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
982     return 1;
983   if (STI.hasFeature(X86::TuningFast7ByteNOP))
984     return 7;
985   if (STI.hasFeature(X86::TuningFast15ByteNOP))
986     return 15;
987   if (STI.hasFeature(X86::TuningFast11ByteNOP))
988     return 11;
989   // FIXME: handle 32-bit mode
990   // 15-bytes is the longest single NOP instruction, but 10-bytes is
991   // commonly the longest that can be efficiently decoded.
992   return 10;
993 }
994 
995 /// Write a sequence of optimal nops to the output, covering \p Count
996 /// bytes.
997 /// \return - true on success, false on failure
998 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
999                                  const MCSubtargetInfo *STI) const {
1000   static const char Nops32Bit[10][11] = {
1001       // nop
1002       "\x90",
1003       // xchg %ax,%ax
1004       "\x66\x90",
1005       // nopl (%[re]ax)
1006       "\x0f\x1f\x00",
1007       // nopl 0(%[re]ax)
1008       "\x0f\x1f\x40\x00",
1009       // nopl 0(%[re]ax,%[re]ax,1)
1010       "\x0f\x1f\x44\x00\x00",
1011       // nopw 0(%[re]ax,%[re]ax,1)
1012       "\x66\x0f\x1f\x44\x00\x00",
1013       // nopl 0L(%[re]ax)
1014       "\x0f\x1f\x80\x00\x00\x00\x00",
1015       // nopl 0L(%[re]ax,%[re]ax,1)
1016       "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1017       // nopw 0L(%[re]ax,%[re]ax,1)
1018       "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1019       // nopw %cs:0L(%[re]ax,%[re]ax,1)
1020       "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1021   };
1022 
1023   // 16-bit mode uses different nop patterns than 32-bit.
1024   static const char Nops16Bit[4][11] = {
1025       // nop
1026       "\x90",
1027       // xchg %eax,%eax
1028       "\x66\x90",
1029       // lea 0(%si),%si
1030       "\x8d\x74\x00",
1031       // lea 0w(%si),%si
1032       "\x8d\xb4\x00\x00",
1033   };
1034 
1035   const char(*Nops)[11] =
1036       STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit;
1037 
1038   uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
1039 
1040   // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1041   // length.
1042   do {
1043     const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1044     const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1045     for (uint8_t i = 0; i < Prefixes; i++)
1046       OS << '\x66';
1047     const uint8_t Rest = ThisNopLength - Prefixes;
1048     if (Rest != 0)
1049       OS.write(Nops[Rest - 1], Rest);
1050     Count -= ThisNopLength;
1051   } while (Count != 0);
1052 
1053   return true;
1054 }
1055 
1056 /* *** */
1057 
1058 namespace {
1059 
1060 class ELFX86AsmBackend : public X86AsmBackend {
1061 public:
1062   uint8_t OSABI;
1063   ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1064       : X86AsmBackend(T, STI), OSABI(OSABI) {}
1065 };
1066 
1067 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1068 public:
1069   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1070                       const MCSubtargetInfo &STI)
1071     : ELFX86AsmBackend(T, OSABI, STI) {}
1072 
1073   std::unique_ptr<MCObjectTargetWriter>
1074   createObjectTargetWriter() const override {
1075     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1076   }
1077 };
1078 
1079 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1080 public:
1081   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1082                        const MCSubtargetInfo &STI)
1083       : ELFX86AsmBackend(T, OSABI, STI) {}
1084 
1085   std::unique_ptr<MCObjectTargetWriter>
1086   createObjectTargetWriter() const override {
1087     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1088                                     ELF::EM_X86_64);
1089   }
1090 };
1091 
1092 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1093 public:
1094   ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1095                          const MCSubtargetInfo &STI)
1096       : ELFX86AsmBackend(T, OSABI, STI) {}
1097 
1098   std::unique_ptr<MCObjectTargetWriter>
1099   createObjectTargetWriter() const override {
1100     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1101                                     ELF::EM_IAMCU);
1102   }
1103 };
1104 
1105 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1106 public:
1107   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1108                       const MCSubtargetInfo &STI)
1109     : ELFX86AsmBackend(T, OSABI, STI) {}
1110 
1111   std::unique_ptr<MCObjectTargetWriter>
1112   createObjectTargetWriter() const override {
1113     return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1114   }
1115 };
1116 
1117 class WindowsX86AsmBackend : public X86AsmBackend {
1118   bool Is64Bit;
1119 
1120 public:
1121   WindowsX86AsmBackend(const Target &T, bool is64Bit,
1122                        const MCSubtargetInfo &STI)
1123     : X86AsmBackend(T, STI)
1124     , Is64Bit(is64Bit) {
1125   }
1126 
1127   std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1128     return StringSwitch<std::optional<MCFixupKind>>(Name)
1129         .Case("dir32", FK_Data_4)
1130         .Case("secrel32", FK_SecRel_4)
1131         .Case("secidx", FK_SecRel_2)
1132         .Default(MCAsmBackend::getFixupKind(Name));
1133   }
1134 
1135   std::unique_ptr<MCObjectTargetWriter>
1136   createObjectTargetWriter() const override {
1137     return createX86WinCOFFObjectWriter(Is64Bit);
1138   }
1139 };
1140 
1141 namespace CU {
1142 
1143   /// Compact unwind encoding values.
1144   enum CompactUnwindEncodings {
1145     /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1146     /// the return address, then [RE]SP is moved to [RE]BP.
1147     UNWIND_MODE_BP_FRAME                   = 0x01000000,
1148 
1149     /// A frameless function with a small constant stack size.
1150     UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1151 
1152     /// A frameless function with a large constant stack size.
1153     UNWIND_MODE_STACK_IND                  = 0x03000000,
1154 
1155     /// No compact unwind encoding is available.
1156     UNWIND_MODE_DWARF                      = 0x04000000,
1157 
1158     /// Mask for encoding the frame registers.
1159     UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1160 
1161     /// Mask for encoding the frameless registers.
1162     UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1163   };
1164 
1165 } // namespace CU
1166 
1167 class DarwinX86AsmBackend : public X86AsmBackend {
1168   const MCRegisterInfo &MRI;
1169 
1170   /// Number of registers that can be saved in a compact unwind encoding.
1171   enum { CU_NUM_SAVED_REGS = 6 };
1172 
1173   mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1174   Triple TT;
1175   bool Is64Bit;
1176 
1177   unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1178   unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1179   unsigned StackDivide;                  ///< Amount to adjust stack size by.
1180 protected:
1181   /// Size of a "push" instruction for the given register.
1182   unsigned PushInstrSize(unsigned Reg) const {
1183     switch (Reg) {
1184       case X86::EBX:
1185       case X86::ECX:
1186       case X86::EDX:
1187       case X86::EDI:
1188       case X86::ESI:
1189       case X86::EBP:
1190       case X86::RBX:
1191       case X86::RBP:
1192         return 1;
1193       case X86::R12:
1194       case X86::R13:
1195       case X86::R14:
1196       case X86::R15:
1197         return 2;
1198     }
1199     return 1;
1200   }
1201 
1202 private:
1203   /// Get the compact unwind number for a given register. The number
1204   /// corresponds to the enum lists in compact_unwind_encoding.h.
1205   int getCompactUnwindRegNum(unsigned Reg) const {
1206     static const MCPhysReg CU32BitRegs[7] = {
1207       X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1208     };
1209     static const MCPhysReg CU64BitRegs[] = {
1210       X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1211     };
1212     const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1213     for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1214       if (*CURegs == Reg)
1215         return Idx;
1216 
1217     return -1;
1218   }
1219 
1220   /// Return the registers encoded for a compact encoding with a frame
1221   /// pointer.
1222   uint32_t encodeCompactUnwindRegistersWithFrame() const {
1223     // Encode the registers in the order they were saved --- 3-bits per
1224     // register. The list of saved registers is assumed to be in reverse
1225     // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1226     uint32_t RegEnc = 0;
1227     for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1228       unsigned Reg = SavedRegs[i];
1229       if (Reg == 0) break;
1230 
1231       int CURegNum = getCompactUnwindRegNum(Reg);
1232       if (CURegNum == -1) return ~0U;
1233 
1234       // Encode the 3-bit register number in order, skipping over 3-bits for
1235       // each register.
1236       RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1237     }
1238 
1239     assert((RegEnc & 0x3FFFF) == RegEnc &&
1240            "Invalid compact register encoding!");
1241     return RegEnc;
1242   }
1243 
1244   /// Create the permutation encoding used with frameless stacks. It is
1245   /// passed the number of registers to be saved and an array of the registers
1246   /// saved.
1247   uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1248     // The saved registers are numbered from 1 to 6. In order to encode the
1249     // order in which they were saved, we re-number them according to their
1250     // place in the register order. The re-numbering is relative to the last
1251     // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1252     // that order:
1253     //
1254     //    Orig  Re-Num
1255     //    ----  ------
1256     //     6       6
1257     //     2       2
1258     //     4       3
1259     //     5       3
1260     //
1261     for (unsigned i = 0; i < RegCount; ++i) {
1262       int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1263       if (CUReg == -1) return ~0U;
1264       SavedRegs[i] = CUReg;
1265     }
1266 
1267     // Reverse the list.
1268     std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1269 
1270     uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1271     for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1272       unsigned Countless = 0;
1273       for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1274         if (SavedRegs[j] < SavedRegs[i])
1275           ++Countless;
1276 
1277       RenumRegs[i] = SavedRegs[i] - Countless - 1;
1278     }
1279 
1280     // Take the renumbered values and encode them into a 10-bit number.
1281     uint32_t permutationEncoding = 0;
1282     switch (RegCount) {
1283     case 6:
1284       permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1285                              + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1286                              +     RenumRegs[4];
1287       break;
1288     case 5:
1289       permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1290                              + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1291                              +     RenumRegs[5];
1292       break;
1293     case 4:
1294       permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1295                              + 3 * RenumRegs[4] +      RenumRegs[5];
1296       break;
1297     case 3:
1298       permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1299                              +     RenumRegs[5];
1300       break;
1301     case 2:
1302       permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1303       break;
1304     case 1:
1305       permutationEncoding |=       RenumRegs[5];
1306       break;
1307     }
1308 
1309     assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1310            "Invalid compact register encoding!");
1311     return permutationEncoding;
1312   }
1313 
1314 public:
1315   DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1316                       const MCSubtargetInfo &STI)
1317       : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1318         Is64Bit(TT.isArch64Bit()) {
1319     memset(SavedRegs, 0, sizeof(SavedRegs));
1320     OffsetSize = Is64Bit ? 8 : 4;
1321     MoveInstrSize = Is64Bit ? 3 : 2;
1322     StackDivide = Is64Bit ? 8 : 4;
1323   }
1324 
1325   std::unique_ptr<MCObjectTargetWriter>
1326   createObjectTargetWriter() const override {
1327     uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1328     uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1329     return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1330   }
1331 
1332   /// Implementation of algorithm to generate the compact unwind encoding
1333   /// for the CFI instructions.
1334   uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
1335                                          const MCContext *Ctxt) const override {
1336     ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
1337     if (Instrs.empty()) return 0;
1338     if (!isDarwinCanonicalPersonality(FI->Personality) &&
1339         !Ctxt->emitCompactUnwindNonCanonical())
1340       return CU::UNWIND_MODE_DWARF;
1341 
1342     // Reset the saved registers.
1343     unsigned SavedRegIdx = 0;
1344     memset(SavedRegs, 0, sizeof(SavedRegs));
1345 
1346     bool HasFP = false;
1347 
1348     // Encode that we are using EBP/RBP as the frame pointer.
1349     uint64_t CompactUnwindEncoding = 0;
1350 
1351     unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1352     unsigned InstrOffset = 0;
1353     unsigned StackAdjust = 0;
1354     uint64_t StackSize = 0;
1355     int64_t MinAbsOffset = std::numeric_limits<int64_t>::max();
1356 
1357     for (const MCCFIInstruction &Inst : Instrs) {
1358       switch (Inst.getOperation()) {
1359       default:
1360         // Any other CFI directives indicate a frame that we aren't prepared
1361         // to represent via compact unwind, so just bail out.
1362         return CU::UNWIND_MODE_DWARF;
1363       case MCCFIInstruction::OpDefCfaRegister: {
1364         // Defines a frame pointer. E.g.
1365         //
1366         //     movq %rsp, %rbp
1367         //  L0:
1368         //     .cfi_def_cfa_register %rbp
1369         //
1370         HasFP = true;
1371 
1372         // If the frame pointer is other than esp/rsp, we do not have a way to
1373         // generate a compact unwinding representation, so bail out.
1374         if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1375             (Is64Bit ? X86::RBP : X86::EBP))
1376           return CU::UNWIND_MODE_DWARF;
1377 
1378         // Reset the counts.
1379         memset(SavedRegs, 0, sizeof(SavedRegs));
1380         StackAdjust = 0;
1381         SavedRegIdx = 0;
1382         MinAbsOffset = std::numeric_limits<int64_t>::max();
1383         InstrOffset += MoveInstrSize;
1384         break;
1385       }
1386       case MCCFIInstruction::OpDefCfaOffset: {
1387         // Defines a new offset for the CFA. E.g.
1388         //
1389         //  With frame:
1390         //
1391         //     pushq %rbp
1392         //  L0:
1393         //     .cfi_def_cfa_offset 16
1394         //
1395         //  Without frame:
1396         //
1397         //     subq $72, %rsp
1398         //  L0:
1399         //     .cfi_def_cfa_offset 80
1400         //
1401         StackSize = Inst.getOffset() / StackDivide;
1402         break;
1403       }
1404       case MCCFIInstruction::OpOffset: {
1405         // Defines a "push" of a callee-saved register. E.g.
1406         //
1407         //     pushq %r15
1408         //     pushq %r14
1409         //     pushq %rbx
1410         //  L0:
1411         //     subq $120, %rsp
1412         //  L1:
1413         //     .cfi_offset %rbx, -40
1414         //     .cfi_offset %r14, -32
1415         //     .cfi_offset %r15, -24
1416         //
1417         if (SavedRegIdx == CU_NUM_SAVED_REGS)
1418           // If there are too many saved registers, we cannot use a compact
1419           // unwind encoding.
1420           return CU::UNWIND_MODE_DWARF;
1421 
1422         MCRegister Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1423         SavedRegs[SavedRegIdx++] = Reg;
1424         StackAdjust += OffsetSize;
1425         MinAbsOffset = std::min(MinAbsOffset, std::abs(Inst.getOffset()));
1426         InstrOffset += PushInstrSize(Reg);
1427         break;
1428       }
1429       }
1430     }
1431 
1432     StackAdjust /= StackDivide;
1433 
1434     if (HasFP) {
1435       if ((StackAdjust & 0xFF) != StackAdjust)
1436         // Offset was too big for a compact unwind encoding.
1437         return CU::UNWIND_MODE_DWARF;
1438 
1439       // We don't attempt to track a real StackAdjust, so if the saved registers
1440       // aren't adjacent to rbp we can't cope.
1441       if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1442         return CU::UNWIND_MODE_DWARF;
1443 
1444       // Get the encoding of the saved registers when we have a frame pointer.
1445       uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1446       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1447 
1448       CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1449       CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1450       CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1451     } else {
1452       SubtractInstrIdx += InstrOffset;
1453       ++StackAdjust;
1454 
1455       if ((StackSize & 0xFF) == StackSize) {
1456         // Frameless stack with a small stack size.
1457         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1458 
1459         // Encode the stack size.
1460         CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1461       } else {
1462         if ((StackAdjust & 0x7) != StackAdjust)
1463           // The extra stack adjustments are too big for us to handle.
1464           return CU::UNWIND_MODE_DWARF;
1465 
1466         // Frameless stack with an offset too large for us to encode compactly.
1467         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1468 
1469         // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1470         // instruction.
1471         CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1472 
1473         // Encode any extra stack adjustments (done via push instructions).
1474         CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1475       }
1476 
1477       // Encode the number of registers saved. (Reverse the list first.)
1478       std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1479       CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1480 
1481       // Get the encoding of the saved registers when we don't have a frame
1482       // pointer.
1483       uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1484       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1485 
1486       // Encode the register encoding.
1487       CompactUnwindEncoding |=
1488         RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1489     }
1490 
1491     return CompactUnwindEncoding;
1492   }
1493 };
1494 
1495 } // end anonymous namespace
1496 
1497 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1498                                            const MCSubtargetInfo &STI,
1499                                            const MCRegisterInfo &MRI,
1500                                            const MCTargetOptions &Options) {
1501   const Triple &TheTriple = STI.getTargetTriple();
1502   if (TheTriple.isOSBinFormatMachO())
1503     return new DarwinX86AsmBackend(T, MRI, STI);
1504 
1505   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1506     return new WindowsX86AsmBackend(T, false, STI);
1507 
1508   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1509 
1510   if (TheTriple.isOSIAMCU())
1511     return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1512 
1513   return new ELFX86_32AsmBackend(T, OSABI, STI);
1514 }
1515 
1516 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1517                                            const MCSubtargetInfo &STI,
1518                                            const MCRegisterInfo &MRI,
1519                                            const MCTargetOptions &Options) {
1520   const Triple &TheTriple = STI.getTargetTriple();
1521   if (TheTriple.isOSBinFormatMachO())
1522     return new DarwinX86AsmBackend(T, MRI, STI);
1523 
1524   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1525     return new WindowsX86AsmBackend(T, true, STI);
1526 
1527   if (TheTriple.isUEFI()) {
1528     assert(TheTriple.isOSBinFormatCOFF() &&
1529          "Only COFF format is supported in UEFI environment.");
1530     return new WindowsX86AsmBackend(T, true, STI);
1531   }
1532 
1533   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1534 
1535   if (TheTriple.isX32())
1536     return new ELFX86_X32AsmBackend(T, OSABI, STI);
1537   return new ELFX86_64AsmBackend(T, OSABI, STI);
1538 }
1539 
1540 namespace {
1541 class X86ELFStreamer : public MCELFStreamer {
1542 public:
1543   X86ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
1544                  std::unique_ptr<MCObjectWriter> OW,
1545                  std::unique_ptr<MCCodeEmitter> Emitter)
1546       : MCELFStreamer(Context, std::move(TAB), std::move(OW),
1547                       std::move(Emitter)) {}
1548 
1549   void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
1550 };
1551 } // end anonymous namespace
1552 
1553 void X86_MC::emitInstruction(MCObjectStreamer &S, const MCInst &Inst,
1554                              const MCSubtargetInfo &STI) {
1555   auto &Backend = static_cast<X86AsmBackend &>(S.getAssembler().getBackend());
1556   Backend.emitInstructionBegin(S, Inst, STI);
1557   S.MCObjectStreamer::emitInstruction(Inst, STI);
1558   Backend.emitInstructionEnd(S, Inst);
1559 }
1560 
1561 void X86ELFStreamer::emitInstruction(const MCInst &Inst,
1562                                      const MCSubtargetInfo &STI) {
1563   X86_MC::emitInstruction(*this, Inst, STI);
1564 }
1565 
1566 MCStreamer *llvm::createX86ELFStreamer(const Triple &T, MCContext &Context,
1567                                        std::unique_ptr<MCAsmBackend> &&MAB,
1568                                        std::unique_ptr<MCObjectWriter> &&MOW,
1569                                        std::unique_ptr<MCCodeEmitter> &&MCE) {
1570   return new X86ELFStreamer(Context, std::move(MAB), std::move(MOW),
1571                             std::move(MCE));
1572 }
1573