xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp (revision 1199d38d8ec764ce8545888b4c091d00441842bf)
1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86EncodingOptimization.h"
11 #include "MCTargetDesc/X86FixupKinds.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/BinaryFormat/MachO.h"
15 #include "llvm/MC/MCAsmBackend.h"
16 #include "llvm/MC/MCAssembler.h"
17 #include "llvm/MC/MCCodeEmitter.h"
18 #include "llvm/MC/MCContext.h"
19 #include "llvm/MC/MCDwarf.h"
20 #include "llvm/MC/MCELFObjectWriter.h"
21 #include "llvm/MC/MCELFStreamer.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCFixupKindInfo.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCMachObjectWriter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionMachO.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/MCValue.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/raw_ostream.h"
37 
38 using namespace llvm;
39 
40 namespace {
41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42 class X86AlignBranchKind {
43 private:
44   uint8_t AlignBranchKind = 0;
45 
46 public:
47   void operator=(const std::string &Val) {
48     if (Val.empty())
49       return;
50     SmallVector<StringRef, 6> BranchTypes;
51     StringRef(Val).split(BranchTypes, '+', -1, false);
52     for (auto BranchType : BranchTypes) {
53       if (BranchType == "fused")
54         addKind(X86::AlignBranchFused);
55       else if (BranchType == "jcc")
56         addKind(X86::AlignBranchJcc);
57       else if (BranchType == "jmp")
58         addKind(X86::AlignBranchJmp);
59       else if (BranchType == "call")
60         addKind(X86::AlignBranchCall);
61       else if (BranchType == "ret")
62         addKind(X86::AlignBranchRet);
63       else if (BranchType == "indirect")
64         addKind(X86::AlignBranchIndirect);
65       else {
66         errs() << "invalid argument " << BranchType.str()
67                << " to -x86-align-branch=; each element must be one of: fused, "
68                   "jcc, jmp, call, ret, indirect.(plus separated)\n";
69       }
70     }
71   }
72 
73   operator uint8_t() const { return AlignBranchKind; }
74   void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
75 };
76 
77 X86AlignBranchKind X86AlignBranchKindLoc;
78 
79 cl::opt<unsigned> X86AlignBranchBoundary(
80     "x86-align-branch-boundary", cl::init(0),
81     cl::desc(
82         "Control how the assembler should align branches with NOP. If the "
83         "boundary's size is not 0, it should be a power of 2 and no less "
84         "than 32. Branches will be aligned to prevent from being across or "
85         "against the boundary of specified size. The default value 0 does not "
86         "align branches."));
87 
88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
89     "x86-align-branch",
90     cl::desc(
91         "Specify types of branches to align (plus separated list of types):"
92              "\njcc      indicates conditional jumps"
93              "\nfused    indicates fused conditional jumps"
94              "\njmp      indicates direct unconditional jumps"
95              "\ncall     indicates direct and indirect calls"
96              "\nret      indicates rets"
97              "\nindirect indicates indirect unconditional jumps"),
98     cl::location(X86AlignBranchKindLoc));
99 
100 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
101     "x86-branches-within-32B-boundaries", cl::init(false),
102     cl::desc(
103         "Align selected instructions to mitigate negative performance impact "
104         "of Intel's micro code update for errata skx102.  May break "
105         "assumptions about labels corresponding to particular instructions, "
106         "and should be used with caution."));
107 
108 cl::opt<unsigned> X86PadMaxPrefixSize(
109     "x86-pad-max-prefix-size", cl::init(0),
110     cl::desc("Maximum number of prefixes to use for padding"));
111 
112 cl::opt<bool> X86PadForAlign(
113     "x86-pad-for-align", cl::init(false), cl::Hidden,
114     cl::desc("Pad previous instructions to implement align directives"));
115 
116 cl::opt<bool> X86PadForBranchAlign(
117     "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
118     cl::desc("Pad previous instructions to implement branch alignment"));
119 
120 class X86AsmBackend : public MCAsmBackend {
121   const MCSubtargetInfo &STI;
122   std::unique_ptr<const MCInstrInfo> MCII;
123   X86AlignBranchKind AlignBranchType;
124   Align AlignBoundary;
125   unsigned TargetPrefixMax = 0;
126 
127   MCInst PrevInst;
128   unsigned PrevInstOpcode = 0;
129   MCBoundaryAlignFragment *PendingBA = nullptr;
130   std::pair<MCFragment *, size_t> PrevInstPosition;
131   bool IsRightAfterData = false;
132 
133   uint8_t determinePaddingPrefix(const MCInst &Inst) const;
134   bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
135   bool needAlign(const MCInst &Inst) const;
136   bool canPadBranches(MCObjectStreamer &OS) const;
137   bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
138 
139 public:
140   X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
141       : MCAsmBackend(llvm::endianness::little), STI(STI),
142         MCII(T.createMCInstrInfo()) {
143     if (X86AlignBranchWithin32BBoundaries) {
144       // At the moment, this defaults to aligning fused branches, unconditional
145       // jumps, and (unfused) conditional jumps with nops.  Both the
146       // instructions aligned and the alignment method (nop vs prefix) may
147       // change in the future.
148       AlignBoundary = assumeAligned(32);
149       AlignBranchType.addKind(X86::AlignBranchFused);
150       AlignBranchType.addKind(X86::AlignBranchJcc);
151       AlignBranchType.addKind(X86::AlignBranchJmp);
152     }
153     // Allow overriding defaults set by main flag
154     if (X86AlignBranchBoundary.getNumOccurrences())
155       AlignBoundary = assumeAligned(X86AlignBranchBoundary);
156     if (X86AlignBranch.getNumOccurrences())
157       AlignBranchType = X86AlignBranchKindLoc;
158     if (X86PadMaxPrefixSize.getNumOccurrences())
159       TargetPrefixMax = X86PadMaxPrefixSize;
160   }
161 
162   bool allowAutoPadding() const override;
163   bool allowEnhancedRelaxation() const override;
164   void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
165                             const MCSubtargetInfo &STI);
166   void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst);
167 
168   unsigned getNumFixupKinds() const override {
169     return X86::NumTargetFixupKinds;
170   }
171 
172   std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
173 
174   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
175 
176   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
177                              const MCValue &Target,
178                              const MCSubtargetInfo *STI) override;
179 
180   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
181                   const MCValue &Target, MutableArrayRef<char> Data,
182                   uint64_t Value, bool IsResolved,
183                   const MCSubtargetInfo *STI) const override;
184 
185   bool mayNeedRelaxation(const MCInst &Inst,
186                          const MCSubtargetInfo &STI) const override;
187 
188   bool fixupNeedsRelaxation(const MCFixup &Fixup,
189                             uint64_t Value) const override;
190 
191   void relaxInstruction(MCInst &Inst,
192                         const MCSubtargetInfo &STI) const override;
193 
194   bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
195                                    MCCodeEmitter &Emitter,
196                                    unsigned &RemainingSize) const;
197 
198   bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
199                                unsigned &RemainingSize) const;
200 
201   bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
202                               unsigned &RemainingSize) const;
203 
204   bool finishLayout(const MCAssembler &Asm) const override;
205 
206   unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
207 
208   bool writeNopData(raw_ostream &OS, uint64_t Count,
209                     const MCSubtargetInfo *STI) const override;
210 };
211 } // end anonymous namespace
212 
213 static bool isRelaxableBranch(unsigned Opcode) {
214   return Opcode == X86::JCC_1 || Opcode == X86::JMP_1;
215 }
216 
217 static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
218                                        bool Is16BitMode = false) {
219   switch (Opcode) {
220   default:
221     llvm_unreachable("invalid opcode for branch");
222   case X86::JCC_1:
223     return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
224   case X86::JMP_1:
225     return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
226   }
227 }
228 
229 static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
230   unsigned Opcode = MI.getOpcode();
231   return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
232                                    : X86::getOpcodeForLongImmediateForm(Opcode);
233 }
234 
235 static X86::CondCode getCondFromBranch(const MCInst &MI,
236                                        const MCInstrInfo &MCII) {
237   unsigned Opcode = MI.getOpcode();
238   switch (Opcode) {
239   default:
240     return X86::COND_INVALID;
241   case X86::JCC_1: {
242     const MCInstrDesc &Desc = MCII.get(Opcode);
243     return static_cast<X86::CondCode>(
244         MI.getOperand(Desc.getNumOperands() - 1).getImm());
245   }
246   }
247 }
248 
249 static X86::SecondMacroFusionInstKind
250 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
251   X86::CondCode CC = getCondFromBranch(MI, MCII);
252   return classifySecondCondCodeInMacroFusion(CC);
253 }
254 
255 /// Check if the instruction uses RIP relative addressing.
256 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
257   unsigned Opcode = MI.getOpcode();
258   const MCInstrDesc &Desc = MCII.get(Opcode);
259   uint64_t TSFlags = Desc.TSFlags;
260   unsigned CurOp = X86II::getOperandBias(Desc);
261   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
262   if (MemoryOperand < 0)
263     return false;
264   unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
265   unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
266   return (BaseReg == X86::RIP);
267 }
268 
269 /// Check if the instruction is a prefix.
270 static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII) {
271   return X86II::isPrefix(MCII.get(Opcode).TSFlags);
272 }
273 
274 /// Check if the instruction is valid as the first instruction in macro fusion.
275 static bool isFirstMacroFusibleInst(const MCInst &Inst,
276                                     const MCInstrInfo &MCII) {
277   // An Intel instruction with RIP relative addressing is not macro fusible.
278   if (isRIPRelative(Inst, MCII))
279     return false;
280   X86::FirstMacroFusionInstKind FIK =
281       X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
282   return FIK != X86::FirstMacroFusionInstKind::Invalid;
283 }
284 
285 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
286 /// get a better peformance in some cases. Here, we determine which prefix is
287 /// the most suitable.
288 ///
289 /// If the instruction has a segment override prefix, use the existing one.
290 /// If the target is 64-bit, use the CS.
291 /// If the target is 32-bit,
292 ///   - If the instruction has a ESP/EBP base register, use SS.
293 ///   - Otherwise use DS.
294 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
295   assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
296          "Prefixes can be added only in 32-bit or 64-bit mode.");
297   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
298   uint64_t TSFlags = Desc.TSFlags;
299 
300   // Determine where the memory operand starts, if present.
301   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
302   if (MemoryOperand != -1)
303     MemoryOperand += X86II::getOperandBias(Desc);
304 
305   unsigned SegmentReg = 0;
306   if (MemoryOperand >= 0) {
307     // Check for explicit segment override on memory operand.
308     SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
309   }
310 
311   switch (TSFlags & X86II::FormMask) {
312   default:
313     break;
314   case X86II::RawFrmDstSrc: {
315     // Check segment override opcode prefix as needed (not for %ds).
316     if (Inst.getOperand(2).getReg() != X86::DS)
317       SegmentReg = Inst.getOperand(2).getReg();
318     break;
319   }
320   case X86II::RawFrmSrc: {
321     // Check segment override opcode prefix as needed (not for %ds).
322     if (Inst.getOperand(1).getReg() != X86::DS)
323       SegmentReg = Inst.getOperand(1).getReg();
324     break;
325   }
326   case X86II::RawFrmMemOffs: {
327     // Check segment override opcode prefix as needed.
328     SegmentReg = Inst.getOperand(1).getReg();
329     break;
330   }
331   }
332 
333   if (SegmentReg != 0)
334     return X86::getSegmentOverridePrefixForReg(SegmentReg);
335 
336   if (STI.hasFeature(X86::Is64Bit))
337     return X86::CS_Encoding;
338 
339   if (MemoryOperand >= 0) {
340     unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
341     unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
342     if (BaseReg == X86::ESP || BaseReg == X86::EBP)
343       return X86::SS_Encoding;
344   }
345   return X86::DS_Encoding;
346 }
347 
348 /// Check if the two instructions will be macro-fused on the target cpu.
349 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
350   const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
351   if (!InstDesc.isConditionalBranch())
352     return false;
353   if (!isFirstMacroFusibleInst(Cmp, *MCII))
354     return false;
355   const X86::FirstMacroFusionInstKind CmpKind =
356       X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
357   const X86::SecondMacroFusionInstKind BranchKind =
358       classifySecondInstInMacroFusion(Jcc, *MCII);
359   return X86::isMacroFused(CmpKind, BranchKind);
360 }
361 
362 /// Check if the instruction has a variant symbol operand.
363 static bool hasVariantSymbol(const MCInst &MI) {
364   for (auto &Operand : MI) {
365     if (!Operand.isExpr())
366       continue;
367     const MCExpr &Expr = *Operand.getExpr();
368     if (Expr.getKind() == MCExpr::SymbolRef &&
369         cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
370       return true;
371   }
372   return false;
373 }
374 
375 bool X86AsmBackend::allowAutoPadding() const {
376   return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
377 }
378 
379 bool X86AsmBackend::allowEnhancedRelaxation() const {
380   return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
381 }
382 
383 /// X86 has certain instructions which enable interrupts exactly one
384 /// instruction *after* the instruction which stores to SS.  Return true if the
385 /// given instruction may have such an interrupt delay slot.
386 static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) {
387   switch (InstOpcode) {
388   case X86::POPSS16:
389   case X86::POPSS32:
390   case X86::STI:
391     return true;
392 
393   case X86::MOV16sr:
394   case X86::MOV32sr:
395   case X86::MOV64sr:
396   case X86::MOV16sm:
397     // In fact, this is only the case if the first operand is SS. However, as
398     // segment moves occur extremely rarely, this is just a minor pessimization.
399     return true;
400   }
401   return false;
402 }
403 
404 /// Check if the instruction to be emitted is right after any data.
405 static bool
406 isRightAfterData(MCFragment *CurrentFragment,
407                  const std::pair<MCFragment *, size_t> &PrevInstPosition) {
408   MCFragment *F = CurrentFragment;
409   // Since data is always emitted into a DataFragment, our check strategy is
410   // simple here.
411   //   - If the fragment is a DataFragment
412   //     - If it's empty (section start or data after align), return false.
413   //     - If it's not the fragment where the previous instruction is,
414   //       returns true.
415   //     - If it's the fragment holding the previous instruction but its
416   //       size changed since the previous instruction was emitted into
417   //       it, returns true.
418   //     - Otherwise returns false.
419   //   - If the fragment is not a DataFragment, returns false.
420   if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
421     return DF->getContents().size() &&
422            (DF != PrevInstPosition.first ||
423             DF->getContents().size() != PrevInstPosition.second);
424 
425   return false;
426 }
427 
428 /// \returns the fragment size if it has instructions, otherwise returns 0.
429 static size_t getSizeForInstFragment(const MCFragment *F) {
430   if (!F || !F->hasInstructions())
431     return 0;
432   // MCEncodedFragmentWithContents being templated makes this tricky.
433   switch (F->getKind()) {
434   default:
435     llvm_unreachable("Unknown fragment with instructions!");
436   case MCFragment::FT_Data:
437     return cast<MCDataFragment>(*F).getContents().size();
438   case MCFragment::FT_Relaxable:
439     return cast<MCRelaxableFragment>(*F).getContents().size();
440   case MCFragment::FT_CompactEncodedInst:
441     return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
442   }
443 }
444 
445 /// Return true if we can insert NOP or prefixes automatically before the
446 /// the instruction to be emitted.
447 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
448   if (hasVariantSymbol(Inst))
449     // Linker may rewrite the instruction with variant symbol operand(e.g.
450     // TLSCALL).
451     return false;
452 
453   if (mayHaveInterruptDelaySlot(PrevInstOpcode))
454     // If this instruction follows an interrupt enabling instruction with a one
455     // instruction delay, inserting a nop would change behavior.
456     return false;
457 
458   if (isPrefix(PrevInstOpcode, *MCII))
459     // If this instruction follows a prefix, inserting a nop/prefix would change
460     // semantic.
461     return false;
462 
463   if (isPrefix(Inst.getOpcode(), *MCII))
464     // If this instruction is a prefix, inserting a prefix would change
465     // semantic.
466     return false;
467 
468   if (IsRightAfterData)
469     // If this instruction follows any data, there is no clear
470     // instruction boundary, inserting a nop/prefix would change semantic.
471     return false;
472 
473   return true;
474 }
475 
476 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
477   if (!OS.getAllowAutoPadding())
478     return false;
479   assert(allowAutoPadding() && "incorrect initialization!");
480 
481   // We only pad in text section.
482   if (!OS.getCurrentSectionOnly()->isText())
483     return false;
484 
485   // To be Done: Currently don't deal with Bundle cases.
486   if (OS.getAssembler().isBundlingEnabled())
487     return false;
488 
489   // Branches only need to be aligned in 32-bit or 64-bit mode.
490   if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
491     return false;
492 
493   return true;
494 }
495 
496 /// Check if the instruction operand needs to be aligned.
497 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
498   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
499   return (Desc.isConditionalBranch() &&
500           (AlignBranchType & X86::AlignBranchJcc)) ||
501          (Desc.isUnconditionalBranch() &&
502           (AlignBranchType & X86::AlignBranchJmp)) ||
503          (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
504          (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
505          (Desc.isIndirectBranch() &&
506           (AlignBranchType & X86::AlignBranchIndirect));
507 }
508 
509 /// Insert BoundaryAlignFragment before instructions to align branches.
510 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
511                                          const MCInst &Inst, const MCSubtargetInfo &STI) {
512   // Used by canPadInst. Done here, because in emitInstructionEnd, the current
513   // fragment will have changed.
514   IsRightAfterData =
515       isRightAfterData(OS.getCurrentFragment(), PrevInstPosition);
516 
517   if (!canPadBranches(OS))
518     return;
519 
520   // NB: PrevInst only valid if canPadBranches is true.
521   if (!isMacroFused(PrevInst, Inst))
522     // Macro fusion doesn't happen indeed, clear the pending.
523     PendingBA = nullptr;
524 
525   // When branch padding is enabled (basically the skx102 erratum => unlikely),
526   // we call canPadInst (not cheap) twice. However, in the common case, we can
527   // avoid unnecessary calls to that, as this is otherwise only used for
528   // relaxable fragments.
529   if (!canPadInst(Inst, OS))
530     return;
531 
532   if (PendingBA && PendingBA->getNext() == OS.getCurrentFragment()) {
533     // Macro fusion actually happens and there is no other fragment inserted
534     // after the previous instruction.
535     //
536     // Do nothing here since we already inserted a BoudaryAlign fragment when
537     // we met the first instruction in the fused pair and we'll tie them
538     // together in emitInstructionEnd.
539     //
540     // Note: When there is at least one fragment, such as MCAlignFragment,
541     // inserted after the previous instruction, e.g.
542     //
543     // \code
544     //   cmp %rax %rcx
545     //   .align 16
546     //   je .Label0
547     // \ endcode
548     //
549     // We will treat the JCC as a unfused branch although it may be fused
550     // with the CMP.
551     return;
552   }
553 
554   if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
555                           isFirstMacroFusibleInst(Inst, *MCII))) {
556     // If we meet a unfused branch or the first instuction in a fusiable pair,
557     // insert a BoundaryAlign fragment.
558     PendingBA = OS.getContext().allocFragment<MCBoundaryAlignFragment>(
559         AlignBoundary, STI);
560     OS.insert(PendingBA);
561   }
562 }
563 
564 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
565 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS,
566                                        const MCInst &Inst) {
567   MCFragment *CF = OS.getCurrentFragment();
568   if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
569     F->setAllowAutoPadding(canPadInst(Inst, OS));
570 
571   // Update PrevInstOpcode here, canPadInst() reads that.
572   PrevInstOpcode = Inst.getOpcode();
573   PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
574 
575   if (!canPadBranches(OS))
576     return;
577 
578   // PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap.
579   PrevInst = Inst;
580 
581   if (!needAlign(Inst) || !PendingBA)
582     return;
583 
584   // Tie the aligned instructions into a pending BoundaryAlign.
585   PendingBA->setLastFragment(CF);
586   PendingBA = nullptr;
587 
588   // We need to ensure that further data isn't added to the current
589   // DataFragment, so that we can get the size of instructions later in
590   // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
591   // DataFragment.
592   if (isa_and_nonnull<MCDataFragment>(CF))
593     OS.insert(OS.getContext().allocFragment<MCDataFragment>());
594 
595   // Update the maximum alignment on the current section if necessary.
596   MCSection *Sec = OS.getCurrentSectionOnly();
597   Sec->ensureMinAlignment(AlignBoundary);
598 }
599 
600 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
601   if (STI.getTargetTriple().isOSBinFormatELF()) {
602     unsigned Type;
603     if (STI.getTargetTriple().getArch() == Triple::x86_64) {
604       Type = llvm::StringSwitch<unsigned>(Name)
605 #define ELF_RELOC(X, Y) .Case(#X, Y)
606 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
607 #undef ELF_RELOC
608                  .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
609                  .Case("BFD_RELOC_8", ELF::R_X86_64_8)
610                  .Case("BFD_RELOC_16", ELF::R_X86_64_16)
611                  .Case("BFD_RELOC_32", ELF::R_X86_64_32)
612                  .Case("BFD_RELOC_64", ELF::R_X86_64_64)
613                  .Default(-1u);
614     } else {
615       Type = llvm::StringSwitch<unsigned>(Name)
616 #define ELF_RELOC(X, Y) .Case(#X, Y)
617 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
618 #undef ELF_RELOC
619                  .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
620                  .Case("BFD_RELOC_8", ELF::R_386_8)
621                  .Case("BFD_RELOC_16", ELF::R_386_16)
622                  .Case("BFD_RELOC_32", ELF::R_386_32)
623                  .Default(-1u);
624     }
625     if (Type == -1u)
626       return std::nullopt;
627     return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
628   }
629   return MCAsmBackend::getFixupKind(Name);
630 }
631 
632 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
633   const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
634       {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
635       {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
636       {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
637       {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
638       {"reloc_signed_4byte", 0, 32, 0},
639       {"reloc_signed_4byte_relax", 0, 32, 0},
640       {"reloc_global_offset_table", 0, 32, 0},
641       {"reloc_global_offset_table8", 0, 64, 0},
642       {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
643   };
644 
645   // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
646   // do not require any extra processing.
647   if (Kind >= FirstLiteralRelocationKind)
648     return MCAsmBackend::getFixupKindInfo(FK_NONE);
649 
650   if (Kind < FirstTargetFixupKind)
651     return MCAsmBackend::getFixupKindInfo(Kind);
652 
653   assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
654          "Invalid kind!");
655   assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
656   return Infos[Kind - FirstTargetFixupKind];
657 }
658 
659 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
660                                           const MCFixup &Fixup, const MCValue &,
661                                           const MCSubtargetInfo *STI) {
662   return Fixup.getKind() >= FirstLiteralRelocationKind;
663 }
664 
665 static unsigned getFixupKindSize(unsigned Kind) {
666   switch (Kind) {
667   default:
668     llvm_unreachable("invalid fixup kind!");
669   case FK_NONE:
670     return 0;
671   case FK_PCRel_1:
672   case FK_SecRel_1:
673   case FK_Data_1:
674     return 1;
675   case FK_PCRel_2:
676   case FK_SecRel_2:
677   case FK_Data_2:
678     return 2;
679   case FK_PCRel_4:
680   case X86::reloc_riprel_4byte:
681   case X86::reloc_riprel_4byte_relax:
682   case X86::reloc_riprel_4byte_relax_rex:
683   case X86::reloc_riprel_4byte_movq_load:
684   case X86::reloc_signed_4byte:
685   case X86::reloc_signed_4byte_relax:
686   case X86::reloc_global_offset_table:
687   case X86::reloc_branch_4byte_pcrel:
688   case FK_SecRel_4:
689   case FK_Data_4:
690     return 4;
691   case FK_PCRel_8:
692   case FK_SecRel_8:
693   case FK_Data_8:
694   case X86::reloc_global_offset_table8:
695     return 8;
696   }
697 }
698 
699 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
700                                const MCValue &Target,
701                                MutableArrayRef<char> Data,
702                                uint64_t Value, bool IsResolved,
703                                const MCSubtargetInfo *STI) const {
704   unsigned Kind = Fixup.getKind();
705   if (Kind >= FirstLiteralRelocationKind)
706     return;
707   unsigned Size = getFixupKindSize(Kind);
708 
709   assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
710 
711   int64_t SignedValue = static_cast<int64_t>(Value);
712   if ((Target.isAbsolute() || IsResolved) &&
713       getFixupKindInfo(Fixup.getKind()).Flags &
714       MCFixupKindInfo::FKF_IsPCRel) {
715     // check that PC relative fixup fits into the fixup size.
716     if (Size > 0 && !isIntN(Size * 8, SignedValue))
717       Asm.getContext().reportError(
718                                    Fixup.getLoc(), "value of " + Twine(SignedValue) +
719                                    " is too large for field of " + Twine(Size) +
720                                    ((Size == 1) ? " byte." : " bytes."));
721   } else {
722     // Check that uppper bits are either all zeros or all ones.
723     // Specifically ignore overflow/underflow as long as the leakage is
724     // limited to the lower bits. This is to remain compatible with
725     // other assemblers.
726     assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
727            "Value does not fit in the Fixup field");
728   }
729 
730   for (unsigned i = 0; i != Size; ++i)
731     Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
732 }
733 
734 bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
735                                       const MCSubtargetInfo &STI) const {
736   unsigned Opcode = MI.getOpcode();
737   unsigned SkipOperands = X86::isCCMPCC(Opcode) ? 2 : 0;
738   return isRelaxableBranch(Opcode) ||
739          (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
740           MI.getOperand(MI.getNumOperands() - 1 - SkipOperands).isExpr());
741 }
742 
743 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
744                                          uint64_t Value) const {
745   // Relax if the value is too big for a (signed) i8.
746   return !isInt<8>(Value);
747 }
748 
749 // FIXME: Can tblgen help at all here to verify there aren't other instructions
750 // we can relax?
751 void X86AsmBackend::relaxInstruction(MCInst &Inst,
752                                      const MCSubtargetInfo &STI) const {
753   // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
754   bool Is16BitMode = STI.hasFeature(X86::Is16Bit);
755   unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
756 
757   if (RelaxedOp == Inst.getOpcode()) {
758     SmallString<256> Tmp;
759     raw_svector_ostream OS(Tmp);
760     Inst.dump_pretty(OS);
761     OS << "\n";
762     report_fatal_error("unexpected instruction to relax: " + OS.str());
763   }
764 
765   Inst.setOpcode(RelaxedOp);
766 }
767 
768 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
769                                             MCCodeEmitter &Emitter,
770                                             unsigned &RemainingSize) const {
771   if (!RF.getAllowAutoPadding())
772     return false;
773   // If the instruction isn't fully relaxed, shifting it around might require a
774   // larger value for one of the fixups then can be encoded.  The outer loop
775   // will also catch this before moving to the next instruction, but we need to
776   // prevent padding this single instruction as well.
777   if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
778     return false;
779 
780   const unsigned OldSize = RF.getContents().size();
781   if (OldSize == 15)
782     return false;
783 
784   const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
785   const unsigned RemainingPrefixSize = [&]() -> unsigned {
786     SmallString<15> Code;
787     X86_MC::emitPrefix(Emitter, RF.getInst(), Code, STI);
788     assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
789 
790     // TODO: It turns out we need a decent amount of plumbing for the target
791     // specific bits to determine number of prefixes its safe to add.  Various
792     // targets (older chips mostly, but also Atom family) encounter decoder
793     // stalls with too many prefixes.  For testing purposes, we set the value
794     // externally for the moment.
795     unsigned ExistingPrefixSize = Code.size();
796     if (TargetPrefixMax <= ExistingPrefixSize)
797       return 0;
798     return TargetPrefixMax - ExistingPrefixSize;
799   }();
800   const unsigned PrefixBytesToAdd =
801       std::min(MaxPossiblePad, RemainingPrefixSize);
802   if (PrefixBytesToAdd == 0)
803     return false;
804 
805   const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
806 
807   SmallString<256> Code;
808   Code.append(PrefixBytesToAdd, Prefix);
809   Code.append(RF.getContents().begin(), RF.getContents().end());
810   RF.getContents() = Code;
811 
812   // Adjust the fixups for the change in offsets
813   for (auto &F : RF.getFixups()) {
814     F.setOffset(F.getOffset() + PrefixBytesToAdd);
815   }
816 
817   RemainingSize -= PrefixBytesToAdd;
818   return true;
819 }
820 
821 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
822                                                 MCCodeEmitter &Emitter,
823                                                 unsigned &RemainingSize) const {
824   if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
825     // TODO: There are lots of other tricks we could apply for increasing
826     // encoding size without impacting performance.
827     return false;
828 
829   MCInst Relaxed = RF.getInst();
830   relaxInstruction(Relaxed, *RF.getSubtargetInfo());
831 
832   SmallVector<MCFixup, 4> Fixups;
833   SmallString<15> Code;
834   Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo());
835   const unsigned OldSize = RF.getContents().size();
836   const unsigned NewSize = Code.size();
837   assert(NewSize >= OldSize && "size decrease during relaxation?");
838   unsigned Delta = NewSize - OldSize;
839   if (Delta > RemainingSize)
840     return false;
841   RF.setInst(Relaxed);
842   RF.getContents() = Code;
843   RF.getFixups() = Fixups;
844   RemainingSize -= Delta;
845   return true;
846 }
847 
848 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
849                                            MCCodeEmitter &Emitter,
850                                            unsigned &RemainingSize) const {
851   bool Changed = false;
852   if (RemainingSize != 0)
853     Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
854   if (RemainingSize != 0)
855     Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
856   return Changed;
857 }
858 
859 bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const {
860   // See if we can further relax some instructions to cut down on the number of
861   // nop bytes required for code alignment.  The actual win is in reducing
862   // instruction count, not number of bytes.  Modern X86-64 can easily end up
863   // decode limited.  It is often better to reduce the number of instructions
864   // (i.e. eliminate nops) even at the cost of increasing the size and
865   // complexity of others.
866   if (!X86PadForAlign && !X86PadForBranchAlign)
867     return false;
868 
869   // The processed regions are delimitered by LabeledFragments. -g may have more
870   // MCSymbols and therefore different relaxation results. X86PadForAlign is
871   // disabled by default to eliminate the -g vs non -g difference.
872   DenseSet<MCFragment *> LabeledFragments;
873   for (const MCSymbol &S : Asm.symbols())
874     LabeledFragments.insert(S.getFragment(false));
875 
876   for (MCSection &Sec : Asm) {
877     if (!Sec.isText())
878       continue;
879 
880     SmallVector<MCRelaxableFragment *, 4> Relaxable;
881     for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
882       MCFragment &F = *I;
883 
884       if (LabeledFragments.count(&F))
885         Relaxable.clear();
886 
887       if (F.getKind() == MCFragment::FT_Data ||
888           F.getKind() == MCFragment::FT_CompactEncodedInst)
889         // Skip and ignore
890         continue;
891 
892       if (F.getKind() == MCFragment::FT_Relaxable) {
893         auto &RF = cast<MCRelaxableFragment>(*I);
894         Relaxable.push_back(&RF);
895         continue;
896       }
897 
898       auto canHandle = [](MCFragment &F) -> bool {
899         switch (F.getKind()) {
900         default:
901           return false;
902         case MCFragment::FT_Align:
903           return X86PadForAlign;
904         case MCFragment::FT_BoundaryAlign:
905           return X86PadForBranchAlign;
906         }
907       };
908       // For any unhandled kind, assume we can't change layout.
909       if (!canHandle(F)) {
910         Relaxable.clear();
911         continue;
912       }
913 
914       const uint64_t OrigSize = Asm.computeFragmentSize(F);
915 
916       // To keep the effects local, prefer to relax instructions closest to
917       // the align directive.  This is purely about human understandability
918       // of the resulting code.  If we later find a reason to expand
919       // particular instructions over others, we can adjust.
920       unsigned RemainingSize = OrigSize;
921       while (!Relaxable.empty() && RemainingSize != 0) {
922         auto &RF = *Relaxable.pop_back_val();
923         // Give the backend a chance to play any tricks it wishes to increase
924         // the encoding size of the given instruction.  Target independent code
925         // will try further relaxation, but target's may play further tricks.
926         padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize);
927 
928         // If we have an instruction which hasn't been fully relaxed, we can't
929         // skip past it and insert bytes before it.  Changing its starting
930         // offset might require a larger negative offset than it can encode.
931         // We don't need to worry about larger positive offsets as none of the
932         // possible offsets between this and our align are visible, and the
933         // ones afterwards aren't changing.
934         if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
935           break;
936       }
937       Relaxable.clear();
938 
939       // BoundaryAlign explicitly tracks it's size (unlike align)
940       if (F.getKind() == MCFragment::FT_BoundaryAlign)
941         cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
942 
943       // If we're looking at a boundary align, make sure we don't try to pad
944       // its target instructions for some following directive.  Doing so would
945       // break the alignment of the current boundary align.
946       if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
947         const MCFragment *LastFragment = BF->getLastFragment();
948         if (!LastFragment)
949           continue;
950         while (&*I != LastFragment)
951           ++I;
952       }
953     }
954   }
955 
956   return true;
957 }
958 
959 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
960   if (STI.hasFeature(X86::Is16Bit))
961     return 4;
962   if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
963     return 1;
964   if (STI.hasFeature(X86::TuningFast7ByteNOP))
965     return 7;
966   if (STI.hasFeature(X86::TuningFast15ByteNOP))
967     return 15;
968   if (STI.hasFeature(X86::TuningFast11ByteNOP))
969     return 11;
970   // FIXME: handle 32-bit mode
971   // 15-bytes is the longest single NOP instruction, but 10-bytes is
972   // commonly the longest that can be efficiently decoded.
973   return 10;
974 }
975 
976 /// Write a sequence of optimal nops to the output, covering \p Count
977 /// bytes.
978 /// \return - true on success, false on failure
979 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
980                                  const MCSubtargetInfo *STI) const {
981   static const char Nops32Bit[10][11] = {
982       // nop
983       "\x90",
984       // xchg %ax,%ax
985       "\x66\x90",
986       // nopl (%[re]ax)
987       "\x0f\x1f\x00",
988       // nopl 0(%[re]ax)
989       "\x0f\x1f\x40\x00",
990       // nopl 0(%[re]ax,%[re]ax,1)
991       "\x0f\x1f\x44\x00\x00",
992       // nopw 0(%[re]ax,%[re]ax,1)
993       "\x66\x0f\x1f\x44\x00\x00",
994       // nopl 0L(%[re]ax)
995       "\x0f\x1f\x80\x00\x00\x00\x00",
996       // nopl 0L(%[re]ax,%[re]ax,1)
997       "\x0f\x1f\x84\x00\x00\x00\x00\x00",
998       // nopw 0L(%[re]ax,%[re]ax,1)
999       "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1000       // nopw %cs:0L(%[re]ax,%[re]ax,1)
1001       "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1002   };
1003 
1004   // 16-bit mode uses different nop patterns than 32-bit.
1005   static const char Nops16Bit[4][11] = {
1006       // nop
1007       "\x90",
1008       // xchg %eax,%eax
1009       "\x66\x90",
1010       // lea 0(%si),%si
1011       "\x8d\x74\x00",
1012       // lea 0w(%si),%si
1013       "\x8d\xb4\x00\x00",
1014   };
1015 
1016   const char(*Nops)[11] =
1017       STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit;
1018 
1019   uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
1020 
1021   // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1022   // length.
1023   do {
1024     const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1025     const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1026     for (uint8_t i = 0; i < Prefixes; i++)
1027       OS << '\x66';
1028     const uint8_t Rest = ThisNopLength - Prefixes;
1029     if (Rest != 0)
1030       OS.write(Nops[Rest - 1], Rest);
1031     Count -= ThisNopLength;
1032   } while (Count != 0);
1033 
1034   return true;
1035 }
1036 
1037 /* *** */
1038 
1039 namespace {
1040 
1041 class ELFX86AsmBackend : public X86AsmBackend {
1042 public:
1043   uint8_t OSABI;
1044   ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1045       : X86AsmBackend(T, STI), OSABI(OSABI) {}
1046 };
1047 
1048 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1049 public:
1050   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1051                       const MCSubtargetInfo &STI)
1052     : ELFX86AsmBackend(T, OSABI, STI) {}
1053 
1054   std::unique_ptr<MCObjectTargetWriter>
1055   createObjectTargetWriter() const override {
1056     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1057   }
1058 };
1059 
1060 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1061 public:
1062   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1063                        const MCSubtargetInfo &STI)
1064       : ELFX86AsmBackend(T, OSABI, STI) {}
1065 
1066   std::unique_ptr<MCObjectTargetWriter>
1067   createObjectTargetWriter() const override {
1068     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1069                                     ELF::EM_X86_64);
1070   }
1071 };
1072 
1073 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1074 public:
1075   ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1076                          const MCSubtargetInfo &STI)
1077       : ELFX86AsmBackend(T, OSABI, STI) {}
1078 
1079   std::unique_ptr<MCObjectTargetWriter>
1080   createObjectTargetWriter() const override {
1081     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1082                                     ELF::EM_IAMCU);
1083   }
1084 };
1085 
1086 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1087 public:
1088   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1089                       const MCSubtargetInfo &STI)
1090     : ELFX86AsmBackend(T, OSABI, STI) {}
1091 
1092   std::unique_ptr<MCObjectTargetWriter>
1093   createObjectTargetWriter() const override {
1094     return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1095   }
1096 };
1097 
1098 class WindowsX86AsmBackend : public X86AsmBackend {
1099   bool Is64Bit;
1100 
1101 public:
1102   WindowsX86AsmBackend(const Target &T, bool is64Bit,
1103                        const MCSubtargetInfo &STI)
1104     : X86AsmBackend(T, STI)
1105     , Is64Bit(is64Bit) {
1106   }
1107 
1108   std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1109     return StringSwitch<std::optional<MCFixupKind>>(Name)
1110         .Case("dir32", FK_Data_4)
1111         .Case("secrel32", FK_SecRel_4)
1112         .Case("secidx", FK_SecRel_2)
1113         .Default(MCAsmBackend::getFixupKind(Name));
1114   }
1115 
1116   std::unique_ptr<MCObjectTargetWriter>
1117   createObjectTargetWriter() const override {
1118     return createX86WinCOFFObjectWriter(Is64Bit);
1119   }
1120 };
1121 
1122 namespace CU {
1123 
1124   /// Compact unwind encoding values.
1125   enum CompactUnwindEncodings {
1126     /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1127     /// the return address, then [RE]SP is moved to [RE]BP.
1128     UNWIND_MODE_BP_FRAME                   = 0x01000000,
1129 
1130     /// A frameless function with a small constant stack size.
1131     UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1132 
1133     /// A frameless function with a large constant stack size.
1134     UNWIND_MODE_STACK_IND                  = 0x03000000,
1135 
1136     /// No compact unwind encoding is available.
1137     UNWIND_MODE_DWARF                      = 0x04000000,
1138 
1139     /// Mask for encoding the frame registers.
1140     UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1141 
1142     /// Mask for encoding the frameless registers.
1143     UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1144   };
1145 
1146 } // namespace CU
1147 
1148 class DarwinX86AsmBackend : public X86AsmBackend {
1149   const MCRegisterInfo &MRI;
1150 
1151   /// Number of registers that can be saved in a compact unwind encoding.
1152   enum { CU_NUM_SAVED_REGS = 6 };
1153 
1154   mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1155   Triple TT;
1156   bool Is64Bit;
1157 
1158   unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1159   unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1160   unsigned StackDivide;                  ///< Amount to adjust stack size by.
1161 protected:
1162   /// Size of a "push" instruction for the given register.
1163   unsigned PushInstrSize(unsigned Reg) const {
1164     switch (Reg) {
1165       case X86::EBX:
1166       case X86::ECX:
1167       case X86::EDX:
1168       case X86::EDI:
1169       case X86::ESI:
1170       case X86::EBP:
1171       case X86::RBX:
1172       case X86::RBP:
1173         return 1;
1174       case X86::R12:
1175       case X86::R13:
1176       case X86::R14:
1177       case X86::R15:
1178         return 2;
1179     }
1180     return 1;
1181   }
1182 
1183 private:
1184   /// Get the compact unwind number for a given register. The number
1185   /// corresponds to the enum lists in compact_unwind_encoding.h.
1186   int getCompactUnwindRegNum(unsigned Reg) const {
1187     static const MCPhysReg CU32BitRegs[7] = {
1188       X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1189     };
1190     static const MCPhysReg CU64BitRegs[] = {
1191       X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1192     };
1193     const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1194     for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1195       if (*CURegs == Reg)
1196         return Idx;
1197 
1198     return -1;
1199   }
1200 
1201   /// Return the registers encoded for a compact encoding with a frame
1202   /// pointer.
1203   uint32_t encodeCompactUnwindRegistersWithFrame() const {
1204     // Encode the registers in the order they were saved --- 3-bits per
1205     // register. The list of saved registers is assumed to be in reverse
1206     // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1207     uint32_t RegEnc = 0;
1208     for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1209       unsigned Reg = SavedRegs[i];
1210       if (Reg == 0) break;
1211 
1212       int CURegNum = getCompactUnwindRegNum(Reg);
1213       if (CURegNum == -1) return ~0U;
1214 
1215       // Encode the 3-bit register number in order, skipping over 3-bits for
1216       // each register.
1217       RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1218     }
1219 
1220     assert((RegEnc & 0x3FFFF) == RegEnc &&
1221            "Invalid compact register encoding!");
1222     return RegEnc;
1223   }
1224 
1225   /// Create the permutation encoding used with frameless stacks. It is
1226   /// passed the number of registers to be saved and an array of the registers
1227   /// saved.
1228   uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1229     // The saved registers are numbered from 1 to 6. In order to encode the
1230     // order in which they were saved, we re-number them according to their
1231     // place in the register order. The re-numbering is relative to the last
1232     // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1233     // that order:
1234     //
1235     //    Orig  Re-Num
1236     //    ----  ------
1237     //     6       6
1238     //     2       2
1239     //     4       3
1240     //     5       3
1241     //
1242     for (unsigned i = 0; i < RegCount; ++i) {
1243       int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1244       if (CUReg == -1) return ~0U;
1245       SavedRegs[i] = CUReg;
1246     }
1247 
1248     // Reverse the list.
1249     std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1250 
1251     uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1252     for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1253       unsigned Countless = 0;
1254       for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1255         if (SavedRegs[j] < SavedRegs[i])
1256           ++Countless;
1257 
1258       RenumRegs[i] = SavedRegs[i] - Countless - 1;
1259     }
1260 
1261     // Take the renumbered values and encode them into a 10-bit number.
1262     uint32_t permutationEncoding = 0;
1263     switch (RegCount) {
1264     case 6:
1265       permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1266                              + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1267                              +     RenumRegs[4];
1268       break;
1269     case 5:
1270       permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1271                              + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1272                              +     RenumRegs[5];
1273       break;
1274     case 4:
1275       permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1276                              + 3 * RenumRegs[4] +      RenumRegs[5];
1277       break;
1278     case 3:
1279       permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1280                              +     RenumRegs[5];
1281       break;
1282     case 2:
1283       permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1284       break;
1285     case 1:
1286       permutationEncoding |=       RenumRegs[5];
1287       break;
1288     }
1289 
1290     assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1291            "Invalid compact register encoding!");
1292     return permutationEncoding;
1293   }
1294 
1295 public:
1296   DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1297                       const MCSubtargetInfo &STI)
1298       : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1299         Is64Bit(TT.isArch64Bit()) {
1300     memset(SavedRegs, 0, sizeof(SavedRegs));
1301     OffsetSize = Is64Bit ? 8 : 4;
1302     MoveInstrSize = Is64Bit ? 3 : 2;
1303     StackDivide = Is64Bit ? 8 : 4;
1304   }
1305 
1306   std::unique_ptr<MCObjectTargetWriter>
1307   createObjectTargetWriter() const override {
1308     uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1309     uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1310     return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1311   }
1312 
1313   /// Implementation of algorithm to generate the compact unwind encoding
1314   /// for the CFI instructions.
1315   uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
1316                                          const MCContext *Ctxt) const override {
1317     ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
1318     if (Instrs.empty()) return 0;
1319     if (!isDarwinCanonicalPersonality(FI->Personality) &&
1320         !Ctxt->emitCompactUnwindNonCanonical())
1321       return CU::UNWIND_MODE_DWARF;
1322 
1323     // Reset the saved registers.
1324     unsigned SavedRegIdx = 0;
1325     memset(SavedRegs, 0, sizeof(SavedRegs));
1326 
1327     bool HasFP = false;
1328 
1329     // Encode that we are using EBP/RBP as the frame pointer.
1330     uint64_t CompactUnwindEncoding = 0;
1331 
1332     unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1333     unsigned InstrOffset = 0;
1334     unsigned StackAdjust = 0;
1335     uint64_t StackSize = 0;
1336     int64_t MinAbsOffset = std::numeric_limits<int64_t>::max();
1337 
1338     for (const MCCFIInstruction &Inst : Instrs) {
1339       switch (Inst.getOperation()) {
1340       default:
1341         // Any other CFI directives indicate a frame that we aren't prepared
1342         // to represent via compact unwind, so just bail out.
1343         return CU::UNWIND_MODE_DWARF;
1344       case MCCFIInstruction::OpDefCfaRegister: {
1345         // Defines a frame pointer. E.g.
1346         //
1347         //     movq %rsp, %rbp
1348         //  L0:
1349         //     .cfi_def_cfa_register %rbp
1350         //
1351         HasFP = true;
1352 
1353         // If the frame pointer is other than esp/rsp, we do not have a way to
1354         // generate a compact unwinding representation, so bail out.
1355         if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1356             (Is64Bit ? X86::RBP : X86::EBP))
1357           return CU::UNWIND_MODE_DWARF;
1358 
1359         // Reset the counts.
1360         memset(SavedRegs, 0, sizeof(SavedRegs));
1361         StackAdjust = 0;
1362         SavedRegIdx = 0;
1363         MinAbsOffset = std::numeric_limits<int64_t>::max();
1364         InstrOffset += MoveInstrSize;
1365         break;
1366       }
1367       case MCCFIInstruction::OpDefCfaOffset: {
1368         // Defines a new offset for the CFA. E.g.
1369         //
1370         //  With frame:
1371         //
1372         //     pushq %rbp
1373         //  L0:
1374         //     .cfi_def_cfa_offset 16
1375         //
1376         //  Without frame:
1377         //
1378         //     subq $72, %rsp
1379         //  L0:
1380         //     .cfi_def_cfa_offset 80
1381         //
1382         StackSize = Inst.getOffset() / StackDivide;
1383         break;
1384       }
1385       case MCCFIInstruction::OpOffset: {
1386         // Defines a "push" of a callee-saved register. E.g.
1387         //
1388         //     pushq %r15
1389         //     pushq %r14
1390         //     pushq %rbx
1391         //  L0:
1392         //     subq $120, %rsp
1393         //  L1:
1394         //     .cfi_offset %rbx, -40
1395         //     .cfi_offset %r14, -32
1396         //     .cfi_offset %r15, -24
1397         //
1398         if (SavedRegIdx == CU_NUM_SAVED_REGS)
1399           // If there are too many saved registers, we cannot use a compact
1400           // unwind encoding.
1401           return CU::UNWIND_MODE_DWARF;
1402 
1403         unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1404         SavedRegs[SavedRegIdx++] = Reg;
1405         StackAdjust += OffsetSize;
1406         MinAbsOffset = std::min(MinAbsOffset, std::abs(Inst.getOffset()));
1407         InstrOffset += PushInstrSize(Reg);
1408         break;
1409       }
1410       }
1411     }
1412 
1413     StackAdjust /= StackDivide;
1414 
1415     if (HasFP) {
1416       if ((StackAdjust & 0xFF) != StackAdjust)
1417         // Offset was too big for a compact unwind encoding.
1418         return CU::UNWIND_MODE_DWARF;
1419 
1420       // We don't attempt to track a real StackAdjust, so if the saved registers
1421       // aren't adjacent to rbp we can't cope.
1422       if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1423         return CU::UNWIND_MODE_DWARF;
1424 
1425       // Get the encoding of the saved registers when we have a frame pointer.
1426       uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1427       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1428 
1429       CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1430       CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1431       CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1432     } else {
1433       SubtractInstrIdx += InstrOffset;
1434       ++StackAdjust;
1435 
1436       if ((StackSize & 0xFF) == StackSize) {
1437         // Frameless stack with a small stack size.
1438         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1439 
1440         // Encode the stack size.
1441         CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1442       } else {
1443         if ((StackAdjust & 0x7) != StackAdjust)
1444           // The extra stack adjustments are too big for us to handle.
1445           return CU::UNWIND_MODE_DWARF;
1446 
1447         // Frameless stack with an offset too large for us to encode compactly.
1448         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1449 
1450         // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1451         // instruction.
1452         CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1453 
1454         // Encode any extra stack adjustments (done via push instructions).
1455         CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1456       }
1457 
1458       // Encode the number of registers saved. (Reverse the list first.)
1459       std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1460       CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1461 
1462       // Get the encoding of the saved registers when we don't have a frame
1463       // pointer.
1464       uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1465       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1466 
1467       // Encode the register encoding.
1468       CompactUnwindEncoding |=
1469         RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1470     }
1471 
1472     return CompactUnwindEncoding;
1473   }
1474 };
1475 
1476 } // end anonymous namespace
1477 
1478 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1479                                            const MCSubtargetInfo &STI,
1480                                            const MCRegisterInfo &MRI,
1481                                            const MCTargetOptions &Options) {
1482   const Triple &TheTriple = STI.getTargetTriple();
1483   if (TheTriple.isOSBinFormatMachO())
1484     return new DarwinX86AsmBackend(T, MRI, STI);
1485 
1486   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1487     return new WindowsX86AsmBackend(T, false, STI);
1488 
1489   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1490 
1491   if (TheTriple.isOSIAMCU())
1492     return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1493 
1494   return new ELFX86_32AsmBackend(T, OSABI, STI);
1495 }
1496 
1497 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1498                                            const MCSubtargetInfo &STI,
1499                                            const MCRegisterInfo &MRI,
1500                                            const MCTargetOptions &Options) {
1501   const Triple &TheTriple = STI.getTargetTriple();
1502   if (TheTriple.isOSBinFormatMachO())
1503     return new DarwinX86AsmBackend(T, MRI, STI);
1504 
1505   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1506     return new WindowsX86AsmBackend(T, true, STI);
1507 
1508   if (TheTriple.isUEFI()) {
1509     assert(TheTriple.isOSBinFormatCOFF() &&
1510          "Only COFF format is supported in UEFI environment.");
1511     return new WindowsX86AsmBackend(T, true, STI);
1512   }
1513 
1514   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1515 
1516   if (TheTriple.isX32())
1517     return new ELFX86_X32AsmBackend(T, OSABI, STI);
1518   return new ELFX86_64AsmBackend(T, OSABI, STI);
1519 }
1520 
1521 namespace {
1522 class X86ELFStreamer : public MCELFStreamer {
1523 public:
1524   X86ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
1525                  std::unique_ptr<MCObjectWriter> OW,
1526                  std::unique_ptr<MCCodeEmitter> Emitter)
1527       : MCELFStreamer(Context, std::move(TAB), std::move(OW),
1528                       std::move(Emitter)) {}
1529 
1530   void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
1531 };
1532 } // end anonymous namespace
1533 
1534 void X86_MC::emitInstruction(MCObjectStreamer &S, const MCInst &Inst,
1535                              const MCSubtargetInfo &STI) {
1536   auto &Backend = static_cast<X86AsmBackend &>(S.getAssembler().getBackend());
1537   Backend.emitInstructionBegin(S, Inst, STI);
1538   S.MCObjectStreamer::emitInstruction(Inst, STI);
1539   Backend.emitInstructionEnd(S, Inst);
1540 }
1541 
1542 void X86ELFStreamer::emitInstruction(const MCInst &Inst,
1543                                      const MCSubtargetInfo &STI) {
1544   X86_MC::emitInstruction(*this, Inst, STI);
1545 }
1546 
1547 MCStreamer *llvm::createX86ELFStreamer(const Triple &T, MCContext &Context,
1548                                        std::unique_ptr<MCAsmBackend> &&MAB,
1549                                        std::unique_ptr<MCObjectWriter> &&MOW,
1550                                        std::unique_ptr<MCCodeEmitter> &&MCE) {
1551   return new X86ELFStreamer(Context, std::move(MAB), std::move(MOW),
1552                             std::move(MCE));
1553 }
1554