xref: /llvm-project/bolt/lib/Target/X86/X86MCPlusBuilder.cpp (revision 34c6c5e72f48de65a7e332033af9566576c1895d)
1 //===- bolt/Target/X86/X86MCPlusBuilder.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides X86-specific MCPlus builder.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "MCTargetDesc/X86BaseInfo.h"
14 #include "MCTargetDesc/X86EncodingOptimization.h"
15 #include "MCTargetDesc/X86MCTargetDesc.h"
16 #include "X86MCSymbolizer.h"
17 #include "bolt/Core/MCPlus.h"
18 #include "bolt/Core/MCPlusBuilder.h"
19 #include "llvm/BinaryFormat/ELF.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCFixupKindInfo.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstBuilder.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCRegister.h"
26 #include "llvm/MC/MCRegisterInfo.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/DataExtractor.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/Errc.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/ErrorOr.h"
33 #include <set>
34 
35 #define DEBUG_TYPE "mcplus"
36 
37 using namespace llvm;
38 using namespace bolt;
39 
40 namespace opts {
41 
42 extern cl::OptionCategory BoltOptCategory;
43 
44 static cl::opt<bool> X86StripRedundantAddressSize(
45     "x86-strip-redundant-address-size",
46     cl::desc("Remove redundant Address-Size override prefix"), cl::init(true),
47     cl::cat(BoltOptCategory));
48 
49 } // namespace opts
50 
51 namespace {
52 
53 bool isMOVSX64rm32(const MCInst &Inst) {
54   return Inst.getOpcode() == X86::MOVSX64rm32;
55 }
56 
57 bool isADD64rr(const MCInst &Inst) { return Inst.getOpcode() == X86::ADD64rr; }
58 
59 bool isADDri(const MCInst &Inst) {
60   return Inst.getOpcode() == X86::ADD64ri32 ||
61          Inst.getOpcode() == X86::ADD64ri8;
62 }
63 
64 // Create instruction to increment contents of target by 1
65 static InstructionListType createIncMemory(const MCSymbol *Target,
66                                            MCContext *Ctx) {
67   InstructionListType Insts;
68   Insts.emplace_back();
69   Insts.back().setOpcode(X86::LOCK_INC64m);
70   Insts.back().clear();
71   Insts.back().addOperand(MCOperand::createReg(X86::RIP));        // BaseReg
72   Insts.back().addOperand(MCOperand::createImm(1));               // ScaleAmt
73   Insts.back().addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg
74 
75   Insts.back().addOperand(MCOperand::createExpr(
76       MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None,
77                               *Ctx))); // Displacement
78   Insts.back().addOperand(
79       MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
80   return Insts;
81 }
82 
83 #define GET_INSTRINFO_OPERAND_TYPES_ENUM
84 #define GET_INSTRINFO_OPERAND_TYPE
85 #define GET_INSTRINFO_MEM_OPERAND_SIZE
86 #include "X86GenInstrInfo.inc"
87 
88 class X86MCPlusBuilder : public MCPlusBuilder {
89 public:
90   using MCPlusBuilder::MCPlusBuilder;
91 
92   std::unique_ptr<MCSymbolizer>
93   createTargetSymbolizer(BinaryFunction &Function,
94                          bool CreateNewSymbols) const override {
95     return std::make_unique<X86MCSymbolizer>(Function, CreateNewSymbols);
96   }
97 
98   bool isBranch(const MCInst &Inst) const override {
99     return Analysis->isBranch(Inst) && !isTailCall(Inst);
100   }
101 
102   bool isNoop(const MCInst &Inst) const override {
103     return X86::isNOP(Inst.getOpcode());
104   }
105 
106   unsigned getCondCode(const MCInst &Inst) const override {
107     unsigned Opcode = Inst.getOpcode();
108     if (X86::isJCC(Opcode))
109       return Inst.getOperand(Info->get(Opcode).NumOperands - 1).getImm();
110     return X86::COND_INVALID;
111   }
112 
113   unsigned getInvertedCondCode(unsigned CC) const override {
114     switch (CC) {
115     default: return X86::COND_INVALID;
116     case X86::COND_E:  return X86::COND_NE;
117     case X86::COND_NE: return X86::COND_E;
118     case X86::COND_L:  return X86::COND_GE;
119     case X86::COND_LE: return X86::COND_G;
120     case X86::COND_G:  return X86::COND_LE;
121     case X86::COND_GE: return X86::COND_L;
122     case X86::COND_B:  return X86::COND_AE;
123     case X86::COND_BE: return X86::COND_A;
124     case X86::COND_A:  return X86::COND_BE;
125     case X86::COND_AE: return X86::COND_B;
126     case X86::COND_S:  return X86::COND_NS;
127     case X86::COND_NS: return X86::COND_S;
128     case X86::COND_P:  return X86::COND_NP;
129     case X86::COND_NP: return X86::COND_P;
130     case X86::COND_O:  return X86::COND_NO;
131     case X86::COND_NO: return X86::COND_O;
132     }
133   }
134 
135   unsigned getCondCodesLogicalOr(unsigned CC1, unsigned CC2) const override {
136     enum DecodedCondCode : uint8_t {
137       DCC_EQUAL = 0x1,
138       DCC_GREATER = 0x2,
139       DCC_LESSER = 0x4,
140       DCC_GREATER_OR_LESSER = 0x6,
141       DCC_UNSIGNED = 0x8,
142       DCC_SIGNED = 0x10,
143       DCC_INVALID = 0x20,
144     };
145 
146     auto decodeCondCode = [&](unsigned CC) -> uint8_t {
147       switch (CC) {
148       default: return DCC_INVALID;
149       case X86::COND_E: return DCC_EQUAL;
150       case X86::COND_NE: return DCC_GREATER | DCC_LESSER;
151       case X86::COND_L: return DCC_LESSER | DCC_SIGNED;
152       case X86::COND_LE: return DCC_EQUAL | DCC_LESSER | DCC_SIGNED;
153       case X86::COND_G: return DCC_GREATER | DCC_SIGNED;
154       case X86::COND_GE: return DCC_GREATER | DCC_EQUAL | DCC_SIGNED;
155       case X86::COND_B: return DCC_LESSER | DCC_UNSIGNED;
156       case X86::COND_BE: return DCC_EQUAL | DCC_LESSER | DCC_UNSIGNED;
157       case X86::COND_A: return DCC_GREATER | DCC_UNSIGNED;
158       case X86::COND_AE: return DCC_GREATER | DCC_EQUAL | DCC_UNSIGNED;
159       }
160     };
161 
162     uint8_t DCC = decodeCondCode(CC1) | decodeCondCode(CC2);
163 
164     if (DCC & DCC_INVALID)
165       return X86::COND_INVALID;
166 
167     if (DCC & DCC_SIGNED && DCC & DCC_UNSIGNED)
168       return X86::COND_INVALID;
169 
170     switch (DCC) {
171     default: return X86::COND_INVALID;
172     case DCC_EQUAL | DCC_LESSER | DCC_SIGNED: return X86::COND_LE;
173     case DCC_EQUAL | DCC_LESSER | DCC_UNSIGNED: return X86::COND_BE;
174     case DCC_EQUAL | DCC_GREATER | DCC_SIGNED: return X86::COND_GE;
175     case DCC_EQUAL | DCC_GREATER | DCC_UNSIGNED: return X86::COND_AE;
176     case DCC_GREATER | DCC_LESSER | DCC_SIGNED: return X86::COND_NE;
177     case DCC_GREATER | DCC_LESSER | DCC_UNSIGNED: return X86::COND_NE;
178     case DCC_GREATER | DCC_LESSER: return X86::COND_NE;
179     case DCC_EQUAL | DCC_SIGNED: return X86::COND_E;
180     case DCC_EQUAL | DCC_UNSIGNED: return X86::COND_E;
181     case DCC_EQUAL: return X86::COND_E;
182     case DCC_LESSER | DCC_SIGNED: return X86::COND_L;
183     case DCC_LESSER | DCC_UNSIGNED: return X86::COND_B;
184     case DCC_GREATER | DCC_SIGNED: return X86::COND_G;
185     case DCC_GREATER | DCC_UNSIGNED: return X86::COND_A;
186     }
187   }
188 
189   bool isValidCondCode(unsigned CC) const override {
190     return (CC != X86::COND_INVALID);
191   }
192 
193   bool isBreakpoint(const MCInst &Inst) const override {
194     return Inst.getOpcode() == X86::INT3;
195   }
196 
197   bool isPrefix(const MCInst &Inst) const override {
198     const MCInstrDesc &Desc = Info->get(Inst.getOpcode());
199     return X86II::isPrefix(Desc.TSFlags);
200   }
201 
202   bool isRep(const MCInst &Inst) const override {
203     return Inst.getFlags() == X86::IP_HAS_REPEAT;
204   }
205 
206   bool deleteREPPrefix(MCInst &Inst) const override {
207     if (Inst.getFlags() == X86::IP_HAS_REPEAT) {
208       Inst.setFlags(0);
209       return true;
210     }
211     return false;
212   }
213 
214   bool isIndirectCall(const MCInst &Inst) const override {
215     return isCall(Inst) &&
216            ((getMemoryOperandNo(Inst) != -1) || Inst.getOperand(0).isReg());
217   }
218 
219   bool isPop(const MCInst &Inst) const override {
220     return getPopSize(Inst) == 0 ? false : true;
221   }
222 
223   bool isTerminateBranch(const MCInst &Inst) const override {
224     return Inst.getOpcode() == X86::ENDBR32 || Inst.getOpcode() == X86::ENDBR64;
225   }
226 
227   int getPopSize(const MCInst &Inst) const override {
228     switch (Inst.getOpcode()) {
229     case X86::POP16r:
230     case X86::POP16rmm:
231     case X86::POP16rmr:
232     case X86::POPF16:
233     case X86::POPA16:
234     case X86::POPDS16:
235     case X86::POPES16:
236     case X86::POPFS16:
237     case X86::POPGS16:
238     case X86::POPSS16:
239       return 2;
240     case X86::POP32r:
241     case X86::POP32rmm:
242     case X86::POP32rmr:
243     case X86::POPA32:
244     case X86::POPDS32:
245     case X86::POPES32:
246     case X86::POPF32:
247     case X86::POPFS32:
248     case X86::POPGS32:
249     case X86::POPSS32:
250       return 4;
251     case X86::POP64r:
252     case X86::POP64rmm:
253     case X86::POP64rmr:
254     case X86::POPF64:
255     case X86::POPFS64:
256     case X86::POPGS64:
257       return 8;
258     }
259     return 0;
260   }
261 
262   bool isPush(const MCInst &Inst) const override {
263     return getPushSize(Inst) == 0 ? false : true;
264   }
265 
266   int getPushSize(const MCInst &Inst) const override {
267     switch (Inst.getOpcode()) {
268     case X86::PUSH16i8:
269     case X86::PUSH16r:
270     case X86::PUSH16rmm:
271     case X86::PUSH16rmr:
272     case X86::PUSHA16:
273     case X86::PUSHCS16:
274     case X86::PUSHDS16:
275     case X86::PUSHES16:
276     case X86::PUSHF16:
277     case X86::PUSHFS16:
278     case X86::PUSHGS16:
279     case X86::PUSHSS16:
280     case X86::PUSH16i:
281       return 2;
282     case X86::PUSH32i8:
283     case X86::PUSH32r:
284     case X86::PUSH32rmm:
285     case X86::PUSH32rmr:
286     case X86::PUSHA32:
287     case X86::PUSHCS32:
288     case X86::PUSHDS32:
289     case X86::PUSHES32:
290     case X86::PUSHF32:
291     case X86::PUSHFS32:
292     case X86::PUSHGS32:
293     case X86::PUSHSS32:
294     case X86::PUSH32i:
295       return 4;
296     case X86::PUSH64i32:
297     case X86::PUSH64i8:
298     case X86::PUSH64r:
299     case X86::PUSH64rmm:
300     case X86::PUSH64rmr:
301     case X86::PUSHF64:
302     case X86::PUSHFS64:
303     case X86::PUSHGS64:
304       return 8;
305     }
306     return 0;
307   }
308 
309   bool isSUB(const MCInst &Inst) const override {
310     return X86::isSUB(Inst.getOpcode());
311   }
312 
313   bool isLEA64r(const MCInst &Inst) const override {
314     return Inst.getOpcode() == X86::LEA64r;
315   }
316 
317   bool isLeave(const MCInst &Inst) const override {
318     return Inst.getOpcode() == X86::LEAVE || Inst.getOpcode() == X86::LEAVE64;
319   }
320 
321   bool isMoveMem2Reg(const MCInst &Inst) const override {
322     switch (Inst.getOpcode()) {
323     case X86::MOV16rm:
324     case X86::MOV32rm:
325     case X86::MOV64rm:
326       return true;
327     }
328     return false;
329   }
330 
331   bool isUnsupportedInstruction(const MCInst &Inst) const override {
332     switch (Inst.getOpcode()) {
333     default:
334       return false;
335 
336     case X86::LOOP:
337     case X86::LOOPE:
338     case X86::LOOPNE:
339     case X86::JECXZ:
340     case X86::JRCXZ:
341       // These have a short displacement, and therefore (often) break after
342       // basic block relayout.
343       return true;
344     }
345   }
346 
347   bool mayLoad(const MCInst &Inst) const override {
348     if (isPop(Inst))
349       return true;
350 
351     int MemOpNo = getMemoryOperandNo(Inst);
352     const MCInstrDesc &MCII = Info->get(Inst.getOpcode());
353 
354     if (MemOpNo == -1)
355       return false;
356 
357     return MCII.mayLoad();
358   }
359 
360   bool mayStore(const MCInst &Inst) const override {
361     if (isPush(Inst))
362       return true;
363 
364     int MemOpNo = getMemoryOperandNo(Inst);
365     const MCInstrDesc &MCII = Info->get(Inst.getOpcode());
366 
367     if (MemOpNo == -1)
368       return false;
369 
370     return MCII.mayStore();
371   }
372 
373   bool isCleanRegXOR(const MCInst &Inst) const override {
374     switch (Inst.getOpcode()) {
375     case X86::XOR16rr:
376     case X86::XOR32rr:
377     case X86::XOR64rr:
378       break;
379     default:
380       return false;
381     }
382     return (Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg());
383   }
384 
385   bool isPacked(const MCInst &Inst) const override {
386     const MCInstrDesc &Desc = Info->get(Inst.getOpcode());
387     return (Desc.TSFlags & X86II::OpPrefixMask) == X86II::PD;
388   }
389 
390   bool shouldRecordCodeRelocation(uint64_t RelType) const override {
391     switch (RelType) {
392     case ELF::R_X86_64_8:
393     case ELF::R_X86_64_16:
394     case ELF::R_X86_64_32:
395     case ELF::R_X86_64_32S:
396     case ELF::R_X86_64_64:
397     case ELF::R_X86_64_PC8:
398     case ELF::R_X86_64_PC32:
399     case ELF::R_X86_64_PC64:
400     case ELF::R_X86_64_GOTPC64:
401     case ELF::R_X86_64_GOTPCRELX:
402     case ELF::R_X86_64_REX_GOTPCRELX:
403       return true;
404     case ELF::R_X86_64_PLT32:
405     case ELF::R_X86_64_GOTPCREL:
406     case ELF::R_X86_64_TPOFF32:
407     case ELF::R_X86_64_GOTTPOFF:
408       return false;
409     default:
410       llvm_unreachable("Unexpected x86 relocation type in code");
411     }
412   }
413 
414   StringRef getTrapFillValue() const override { return StringRef("\314", 1); }
415 
416   struct IndJmpMatcherFrag1 : MCInstMatcher {
417     std::unique_ptr<MCInstMatcher> Base;
418     std::unique_ptr<MCInstMatcher> Scale;
419     std::unique_ptr<MCInstMatcher> Index;
420     std::unique_ptr<MCInstMatcher> Offset;
421 
422     IndJmpMatcherFrag1(std::unique_ptr<MCInstMatcher> Base,
423                        std::unique_ptr<MCInstMatcher> Scale,
424                        std::unique_ptr<MCInstMatcher> Index,
425                        std::unique_ptr<MCInstMatcher> Offset)
426         : Base(std::move(Base)), Scale(std::move(Scale)),
427           Index(std::move(Index)), Offset(std::move(Offset)) {}
428 
429     bool match(const MCRegisterInfo &MRI, MCPlusBuilder &MIB,
430                MutableArrayRef<MCInst> InInstrWindow, int OpNum) override {
431       if (!MCInstMatcher::match(MRI, MIB, InInstrWindow, OpNum))
432         return false;
433 
434       if (CurInst->getOpcode() != X86::JMP64m)
435         return false;
436 
437       int MemOpNo = MIB.getMemoryOperandNo(*CurInst);
438       if (MemOpNo == -1)
439         return false;
440 
441       if (!Base->match(MRI, MIB, this->InstrWindow, MemOpNo + X86::AddrBaseReg))
442         return false;
443       if (!Scale->match(MRI, MIB, this->InstrWindow,
444                         MemOpNo + X86::AddrScaleAmt))
445         return false;
446       if (!Index->match(MRI, MIB, this->InstrWindow,
447                         MemOpNo + X86::AddrIndexReg))
448         return false;
449       if (!Offset->match(MRI, MIB, this->InstrWindow, MemOpNo + X86::AddrDisp))
450         return false;
451       return true;
452     }
453 
454     void annotate(MCPlusBuilder &MIB, StringRef Annotation) override {
455       MIB.addAnnotation(*CurInst, Annotation, true);
456       Base->annotate(MIB, Annotation);
457       Scale->annotate(MIB, Annotation);
458       Index->annotate(MIB, Annotation);
459       Offset->annotate(MIB, Annotation);
460     }
461   };
462 
463   std::unique_ptr<MCInstMatcher>
464   matchIndJmp(std::unique_ptr<MCInstMatcher> Base,
465               std::unique_ptr<MCInstMatcher> Scale,
466               std::unique_ptr<MCInstMatcher> Index,
467               std::unique_ptr<MCInstMatcher> Offset) const override {
468     return std::unique_ptr<MCInstMatcher>(
469         new IndJmpMatcherFrag1(std::move(Base), std::move(Scale),
470                                std::move(Index), std::move(Offset)));
471   }
472 
473   struct IndJmpMatcherFrag2 : MCInstMatcher {
474     std::unique_ptr<MCInstMatcher> Reg;
475 
476     IndJmpMatcherFrag2(std::unique_ptr<MCInstMatcher> Reg)
477         : Reg(std::move(Reg)) {}
478 
479     bool match(const MCRegisterInfo &MRI, MCPlusBuilder &MIB,
480                MutableArrayRef<MCInst> InInstrWindow, int OpNum) override {
481       if (!MCInstMatcher::match(MRI, MIB, InInstrWindow, OpNum))
482         return false;
483 
484       if (CurInst->getOpcode() != X86::JMP64r)
485         return false;
486 
487       return Reg->match(MRI, MIB, this->InstrWindow, 0);
488     }
489 
490     void annotate(MCPlusBuilder &MIB, StringRef Annotation) override {
491       MIB.addAnnotation(*CurInst, Annotation, true);
492       Reg->annotate(MIB, Annotation);
493     }
494   };
495 
496   std::unique_ptr<MCInstMatcher>
497   matchIndJmp(std::unique_ptr<MCInstMatcher> Target) const override {
498     return std::unique_ptr<MCInstMatcher>(
499         new IndJmpMatcherFrag2(std::move(Target)));
500   }
501 
502   struct LoadMatcherFrag1 : MCInstMatcher {
503     std::unique_ptr<MCInstMatcher> Base;
504     std::unique_ptr<MCInstMatcher> Scale;
505     std::unique_ptr<MCInstMatcher> Index;
506     std::unique_ptr<MCInstMatcher> Offset;
507 
508     LoadMatcherFrag1(std::unique_ptr<MCInstMatcher> Base,
509                      std::unique_ptr<MCInstMatcher> Scale,
510                      std::unique_ptr<MCInstMatcher> Index,
511                      std::unique_ptr<MCInstMatcher> Offset)
512         : Base(std::move(Base)), Scale(std::move(Scale)),
513           Index(std::move(Index)), Offset(std::move(Offset)) {}
514 
515     bool match(const MCRegisterInfo &MRI, MCPlusBuilder &MIB,
516                MutableArrayRef<MCInst> InInstrWindow, int OpNum) override {
517       if (!MCInstMatcher::match(MRI, MIB, InInstrWindow, OpNum))
518         return false;
519 
520       if (CurInst->getOpcode() != X86::MOV64rm &&
521           CurInst->getOpcode() != X86::MOVSX64rm32)
522         return false;
523 
524       int MemOpNo = MIB.getMemoryOperandNo(*CurInst);
525       if (MemOpNo == -1)
526         return false;
527 
528       if (!Base->match(MRI, MIB, this->InstrWindow, MemOpNo + X86::AddrBaseReg))
529         return false;
530       if (!Scale->match(MRI, MIB, this->InstrWindow,
531                         MemOpNo + X86::AddrScaleAmt))
532         return false;
533       if (!Index->match(MRI, MIB, this->InstrWindow,
534                         MemOpNo + X86::AddrIndexReg))
535         return false;
536       if (!Offset->match(MRI, MIB, this->InstrWindow, MemOpNo + X86::AddrDisp))
537         return false;
538       return true;
539     }
540 
541     void annotate(MCPlusBuilder &MIB, StringRef Annotation) override {
542       MIB.addAnnotation(*CurInst, Annotation, true);
543       Base->annotate(MIB, Annotation);
544       Scale->annotate(MIB, Annotation);
545       Index->annotate(MIB, Annotation);
546       Offset->annotate(MIB, Annotation);
547     }
548   };
549 
550   std::unique_ptr<MCInstMatcher>
551   matchLoad(std::unique_ptr<MCInstMatcher> Base,
552             std::unique_ptr<MCInstMatcher> Scale,
553             std::unique_ptr<MCInstMatcher> Index,
554             std::unique_ptr<MCInstMatcher> Offset) const override {
555     return std::unique_ptr<MCInstMatcher>(
556         new LoadMatcherFrag1(std::move(Base), std::move(Scale),
557                              std::move(Index), std::move(Offset)));
558   }
559 
560   struct AddMatcher : MCInstMatcher {
561     std::unique_ptr<MCInstMatcher> A;
562     std::unique_ptr<MCInstMatcher> B;
563 
564     AddMatcher(std::unique_ptr<MCInstMatcher> A,
565                std::unique_ptr<MCInstMatcher> B)
566         : A(std::move(A)), B(std::move(B)) {}
567 
568     bool match(const MCRegisterInfo &MRI, MCPlusBuilder &MIB,
569                MutableArrayRef<MCInst> InInstrWindow, int OpNum) override {
570       if (!MCInstMatcher::match(MRI, MIB, InInstrWindow, OpNum))
571         return false;
572 
573       if (CurInst->getOpcode() == X86::ADD64rr ||
574           CurInst->getOpcode() == X86::ADD64rr_DB ||
575           CurInst->getOpcode() == X86::ADD64rr_REV) {
576         if (!A->match(MRI, MIB, this->InstrWindow, 1)) {
577           if (!B->match(MRI, MIB, this->InstrWindow, 1))
578             return false;
579           return A->match(MRI, MIB, this->InstrWindow, 2);
580         }
581 
582         if (B->match(MRI, MIB, this->InstrWindow, 2))
583           return true;
584 
585         if (!B->match(MRI, MIB, this->InstrWindow, 1))
586           return false;
587         return A->match(MRI, MIB, this->InstrWindow, 2);
588       }
589 
590       return false;
591     }
592 
593     void annotate(MCPlusBuilder &MIB, StringRef Annotation) override {
594       MIB.addAnnotation(*CurInst, Annotation, true);
595       A->annotate(MIB, Annotation);
596       B->annotate(MIB, Annotation);
597     }
598   };
599 
600   std::unique_ptr<MCInstMatcher>
601   matchAdd(std::unique_ptr<MCInstMatcher> A,
602            std::unique_ptr<MCInstMatcher> B) const override {
603     return std::unique_ptr<MCInstMatcher>(
604         new AddMatcher(std::move(A), std::move(B)));
605   }
606 
607   struct LEAMatcher : MCInstMatcher {
608     std::unique_ptr<MCInstMatcher> Target;
609 
610     LEAMatcher(std::unique_ptr<MCInstMatcher> Target)
611         : Target(std::move(Target)) {}
612 
613     bool match(const MCRegisterInfo &MRI, MCPlusBuilder &MIB,
614                MutableArrayRef<MCInst> InInstrWindow, int OpNum) override {
615       if (!MCInstMatcher::match(MRI, MIB, InInstrWindow, OpNum))
616         return false;
617 
618       if (CurInst->getOpcode() != X86::LEA64r)
619         return false;
620 
621       if (CurInst->getOperand(1 + X86::AddrScaleAmt).getImm() != 1 ||
622           CurInst->getOperand(1 + X86::AddrIndexReg).getReg() !=
623               X86::NoRegister ||
624           (CurInst->getOperand(1 + X86::AddrBaseReg).getReg() !=
625                X86::NoRegister &&
626            CurInst->getOperand(1 + X86::AddrBaseReg).getReg() != X86::RIP))
627         return false;
628 
629       return Target->match(MRI, MIB, this->InstrWindow, 1 + X86::AddrDisp);
630     }
631 
632     void annotate(MCPlusBuilder &MIB, StringRef Annotation) override {
633       MIB.addAnnotation(*CurInst, Annotation, true);
634       Target->annotate(MIB, Annotation);
635     }
636   };
637 
638   std::unique_ptr<MCInstMatcher>
639   matchLoadAddr(std::unique_ptr<MCInstMatcher> Target) const override {
640     return std::unique_ptr<MCInstMatcher>(new LEAMatcher(std::move(Target)));
641   }
642 
643   bool hasPCRelOperand(const MCInst &Inst) const override {
644     for (const MCOperand &Operand : Inst)
645       if (Operand.isReg() && Operand.getReg() == X86::RIP)
646         return true;
647     return false;
648   }
649 
650   int getMemoryOperandNo(const MCInst &Inst) const override {
651     unsigned Opcode = Inst.getOpcode();
652     const MCInstrDesc &Desc = Info->get(Opcode);
653     int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags);
654     if (MemOpNo >= 0)
655       MemOpNo += X86II::getOperandBias(Desc);
656     return MemOpNo;
657   }
658 
659   bool hasEVEXEncoding(const MCInst &Inst) const override {
660     const MCInstrDesc &Desc = Info->get(Inst.getOpcode());
661     return (Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX;
662   }
663 
664   std::optional<X86MemOperand>
665   evaluateX86MemoryOperand(const MCInst &Inst) const override {
666     int MemOpNo = getMemoryOperandNo(Inst);
667     if (MemOpNo < 0)
668       return std::nullopt;
669     unsigned MemOpOffset = static_cast<unsigned>(MemOpNo);
670 
671     if (MemOpOffset + X86::AddrSegmentReg >= MCPlus::getNumPrimeOperands(Inst))
672       return std::nullopt;
673 
674     const MCOperand &Base = Inst.getOperand(MemOpOffset + X86::AddrBaseReg);
675     const MCOperand &Scale = Inst.getOperand(MemOpOffset + X86::AddrScaleAmt);
676     const MCOperand &Index = Inst.getOperand(MemOpOffset + X86::AddrIndexReg);
677     const MCOperand &Disp = Inst.getOperand(MemOpOffset + X86::AddrDisp);
678     const MCOperand &Segment =
679         Inst.getOperand(MemOpOffset + X86::AddrSegmentReg);
680 
681     // Make sure it is a well-formed memory operand.
682     if (!Base.isReg() || !Scale.isImm() || !Index.isReg() ||
683         (!Disp.isImm() && !Disp.isExpr()) || !Segment.isReg())
684       return std::nullopt;
685 
686     X86MemOperand MO;
687     MO.BaseRegNum = Base.getReg();
688     MO.ScaleImm = Scale.getImm();
689     MO.IndexRegNum = Index.getReg();
690     MO.DispImm = Disp.isImm() ? Disp.getImm() : 0;
691     MO.DispExpr = Disp.isExpr() ? Disp.getExpr() : nullptr;
692     MO.SegRegNum = Segment.getReg();
693     return MO;
694   }
695 
696   bool evaluateMemOperandTarget(const MCInst &Inst, uint64_t &Target,
697                                 uint64_t Address,
698                                 uint64_t Size) const override {
699     std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Inst);
700     if (!MO)
701       return false;
702 
703     // Make sure it's a well-formed addressing we can statically evaluate.
704     if ((MO->BaseRegNum != X86::RIP && MO->BaseRegNum != X86::NoRegister) ||
705         MO->IndexRegNum != X86::NoRegister ||
706         MO->SegRegNum != X86::NoRegister || MO->DispExpr)
707       return false;
708 
709     Target = MO->DispImm;
710     if (MO->BaseRegNum == X86::RIP) {
711       assert(Size != 0 && "instruction size required in order to statically "
712                           "evaluate RIP-relative address");
713       Target += Address + Size;
714     }
715     return true;
716   }
717 
718   MCInst::iterator getMemOperandDisp(MCInst &Inst) const override {
719     int MemOpNo = getMemoryOperandNo(Inst);
720     if (MemOpNo < 0)
721       return Inst.end();
722     return Inst.begin() + (MemOpNo + X86::AddrDisp);
723   }
724 
725   bool replaceMemOperandDisp(MCInst &Inst, MCOperand Operand) const override {
726     MCOperand *OI = getMemOperandDisp(Inst);
727     if (OI == Inst.end())
728       return false;
729     *OI = Operand;
730     return true;
731   }
732 
733   /// Get the registers used as function parameters.
734   /// This function is specific to the x86_64 abi on Linux.
735   BitVector getRegsUsedAsParams() const override {
736     BitVector Regs = BitVector(RegInfo->getNumRegs(), false);
737     Regs |= getAliases(X86::RSI);
738     Regs |= getAliases(X86::RDI);
739     Regs |= getAliases(X86::RDX);
740     Regs |= getAliases(X86::RCX);
741     Regs |= getAliases(X86::R8);
742     Regs |= getAliases(X86::R9);
743     return Regs;
744   }
745 
746   void getCalleeSavedRegs(BitVector &Regs) const override {
747     Regs |= getAliases(X86::RBX);
748     Regs |= getAliases(X86::RBP);
749     Regs |= getAliases(X86::R12);
750     Regs |= getAliases(X86::R13);
751     Regs |= getAliases(X86::R14);
752     Regs |= getAliases(X86::R15);
753   }
754 
755   void getDefaultDefIn(BitVector &Regs) const override {
756     assert(Regs.size() >= RegInfo->getNumRegs() &&
757            "The size of BitVector is less than RegInfo->getNumRegs().");
758     Regs.set(X86::RAX);
759     Regs.set(X86::RCX);
760     Regs.set(X86::RDX);
761     Regs.set(X86::RSI);
762     Regs.set(X86::RDI);
763     Regs.set(X86::R8);
764     Regs.set(X86::R9);
765     Regs.set(X86::XMM0);
766     Regs.set(X86::XMM1);
767     Regs.set(X86::XMM2);
768     Regs.set(X86::XMM3);
769     Regs.set(X86::XMM4);
770     Regs.set(X86::XMM5);
771     Regs.set(X86::XMM6);
772     Regs.set(X86::XMM7);
773   }
774 
775   void getDefaultLiveOut(BitVector &Regs) const override {
776     assert(Regs.size() >= RegInfo->getNumRegs() &&
777            "The size of BitVector is less than RegInfo->getNumRegs().");
778     Regs |= getAliases(X86::RAX);
779     Regs |= getAliases(X86::RDX);
780     Regs |= getAliases(X86::RCX);
781     Regs |= getAliases(X86::XMM0);
782     Regs |= getAliases(X86::XMM1);
783   }
784 
785   void getGPRegs(BitVector &Regs, bool IncludeAlias) const override {
786     if (IncludeAlias) {
787       Regs |= getAliases(X86::RAX);
788       Regs |= getAliases(X86::RBX);
789       Regs |= getAliases(X86::RBP);
790       Regs |= getAliases(X86::RSI);
791       Regs |= getAliases(X86::RDI);
792       Regs |= getAliases(X86::RDX);
793       Regs |= getAliases(X86::RCX);
794       Regs |= getAliases(X86::R8);
795       Regs |= getAliases(X86::R9);
796       Regs |= getAliases(X86::R10);
797       Regs |= getAliases(X86::R11);
798       Regs |= getAliases(X86::R12);
799       Regs |= getAliases(X86::R13);
800       Regs |= getAliases(X86::R14);
801       Regs |= getAliases(X86::R15);
802       return;
803     }
804     Regs.set(X86::RAX);
805     Regs.set(X86::RBX);
806     Regs.set(X86::RBP);
807     Regs.set(X86::RSI);
808     Regs.set(X86::RDI);
809     Regs.set(X86::RDX);
810     Regs.set(X86::RCX);
811     Regs.set(X86::R8);
812     Regs.set(X86::R9);
813     Regs.set(X86::R10);
814     Regs.set(X86::R11);
815     Regs.set(X86::R12);
816     Regs.set(X86::R13);
817     Regs.set(X86::R14);
818     Regs.set(X86::R15);
819   }
820 
821   void getClassicGPRegs(BitVector &Regs) const override {
822     Regs |= getAliases(X86::RAX);
823     Regs |= getAliases(X86::RBX);
824     Regs |= getAliases(X86::RBP);
825     Regs |= getAliases(X86::RSI);
826     Regs |= getAliases(X86::RDI);
827     Regs |= getAliases(X86::RDX);
828     Regs |= getAliases(X86::RCX);
829   }
830 
831   void getRepRegs(BitVector &Regs) const override {
832     Regs |= getAliases(X86::RCX);
833   }
834 
835   MCPhysReg getAliasSized(MCPhysReg Reg, uint8_t Size) const override {
836     Reg = getX86SubSuperRegister(Reg, Size * 8);
837     assert((Reg != X86::NoRegister) && "Invalid register");
838     return Reg;
839   }
840 
841   bool isUpper8BitReg(MCPhysReg Reg) const override {
842     switch (Reg) {
843     case X86::AH:
844     case X86::BH:
845     case X86::CH:
846     case X86::DH:
847       return true;
848     default:
849       return false;
850     }
851   }
852 
853   bool cannotUseREX(const MCInst &Inst) const override {
854     switch (Inst.getOpcode()) {
855     case X86::MOV8mr_NOREX:
856     case X86::MOV8rm_NOREX:
857     case X86::MOV8rr_NOREX:
858     case X86::MOVSX32rm8_NOREX:
859     case X86::MOVSX32rr8_NOREX:
860     case X86::MOVZX32rm8_NOREX:
861     case X86::MOVZX32rr8_NOREX:
862     case X86::MOV8mr:
863     case X86::MOV8rm:
864     case X86::MOV8rr:
865     case X86::MOVSX32rm8:
866     case X86::MOVSX32rr8:
867     case X86::MOVZX32rm8:
868     case X86::MOVZX32rr8:
869     case X86::TEST8ri:
870       for (const MCOperand &Operand : MCPlus::primeOperands(Inst)) {
871         if (!Operand.isReg())
872           continue;
873         if (isUpper8BitReg(Operand.getReg()))
874           return true;
875       }
876       [[fallthrough]];
877     default:
878       return false;
879     }
880   }
881 
882   static uint8_t getMemDataSize(const MCInst &Inst, int MemOpNo) {
883     using namespace llvm::X86;
884     int OpType = getOperandType(Inst.getOpcode(), MemOpNo);
885     return getMemOperandSize(OpType) / 8;
886   }
887 
888   /// Classifying a stack access as *not* "SIMPLE" here means we don't know how
889   /// to change this instruction memory access. It will disable any changes to
890   /// the stack layout, so we can't do the most aggressive form of shrink
891   /// wrapping. We must do so in a way that keeps the original stack layout.
892   /// Otherwise you need to adjust the offset of all instructions accessing the
893   /// stack: we can't do that anymore because there is one instruction that is
894   /// not simple. There are other implications as well. We have heuristics to
895   /// detect when a register is callee-saved and thus eligible for shrink
896   /// wrapping. If you are restoring a register using a non-simple stack access,
897   /// then it is classified as NOT callee-saved, and it disables shrink wrapping
898   /// for *that* register (but not for others).
899   ///
900   /// Classifying a stack access as "size 0" or detecting an indexed memory
901   /// access (to address a vector, for example) here means we know there is a
902   /// stack access, but we can't quite understand how wide is the access in
903   /// bytes. This is very serious because we can't understand how memory
904   /// accesses alias with each other for this function. This will essentially
905   /// disable not only shrink wrapping but all frame analysis, it will fail it
906   /// as "we don't understand this function and we give up on it".
907   bool isStackAccess(const MCInst &Inst, bool &IsLoad, bool &IsStore,
908                      bool &IsStoreFromReg, MCPhysReg &Reg, int32_t &SrcImm,
909                      uint16_t &StackPtrReg, int64_t &StackOffset, uint8_t &Size,
910                      bool &IsSimple, bool &IsIndexed) const override {
911     // Detect simple push/pop cases first
912     if (int Sz = getPushSize(Inst)) {
913       IsLoad = false;
914       IsStore = true;
915       IsStoreFromReg = true;
916       StackPtrReg = X86::RSP;
917       StackOffset = -Sz;
918       Size = Sz;
919       IsSimple = true;
920       if (Inst.getOperand(0).isImm())
921         SrcImm = Inst.getOperand(0).getImm();
922       else if (Inst.getOperand(0).isReg())
923         Reg = Inst.getOperand(0).getReg();
924       else
925         IsSimple = false;
926 
927       return true;
928     }
929     if (int Sz = getPopSize(Inst)) {
930       IsLoad = true;
931       IsStore = false;
932       if (Inst.getNumOperands() == 0 || !Inst.getOperand(0).isReg()) {
933         IsSimple = false;
934       } else {
935         Reg = Inst.getOperand(0).getReg();
936         IsSimple = true;
937       }
938       StackPtrReg = X86::RSP;
939       StackOffset = 0;
940       Size = Sz;
941       return true;
942     }
943 
944     struct InstInfo {
945       // Size in bytes that Inst loads from memory.
946       uint8_t DataSize;
947       bool IsLoad;
948       bool IsStore;
949       bool StoreFromReg;
950       bool Simple;
951     };
952 
953     InstInfo I;
954     int MemOpNo = getMemoryOperandNo(Inst);
955     const MCInstrDesc &MCII = Info->get(Inst.getOpcode());
956     // If it is not dealing with a memory operand, we discard it
957     if (MemOpNo == -1 || MCII.isCall())
958       return false;
959 
960     switch (Inst.getOpcode()) {
961     default: {
962       bool IsLoad = MCII.mayLoad();
963       bool IsStore = MCII.mayStore();
964       // Is it LEA? (deals with memory but is not loading nor storing)
965       if (!IsLoad && !IsStore) {
966         I = {0, IsLoad, IsStore, false, false};
967         break;
968       }
969       uint8_t Sz = getMemDataSize(Inst, MemOpNo);
970       I = {Sz, IsLoad, IsStore, false, false};
971       break;
972     }
973     // Report simple stack accesses
974     case X86::MOV8rm: I = {1, true, false, false, true}; break;
975     case X86::MOV16rm: I = {2, true, false, false, true}; break;
976     case X86::MOV32rm: I = {4, true, false, false, true}; break;
977     case X86::MOV64rm: I = {8, true, false, false, true}; break;
978     case X86::MOV8mr: I = {1, false, true, true, true};  break;
979     case X86::MOV16mr: I = {2, false, true, true, true};  break;
980     case X86::MOV32mr: I = {4, false, true, true, true};  break;
981     case X86::MOV64mr: I = {8, false, true, true, true};  break;
982     case X86::MOV8mi: I = {1, false, true, false, true}; break;
983     case X86::MOV16mi: I = {2, false, true, false, true}; break;
984     case X86::MOV32mi: I = {4, false, true, false, true}; break;
985     } // end switch (Inst.getOpcode())
986 
987     std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Inst);
988     if (!MO) {
989       LLVM_DEBUG(dbgs() << "Evaluate failed on ");
990       LLVM_DEBUG(Inst.dump());
991       return false;
992     }
993 
994     // Make sure it's a stack access
995     if (MO->BaseRegNum != X86::RBP && MO->BaseRegNum != X86::RSP)
996       return false;
997 
998     IsLoad = I.IsLoad;
999     IsStore = I.IsStore;
1000     IsStoreFromReg = I.StoreFromReg;
1001     Size = I.DataSize;
1002     IsSimple = I.Simple;
1003     StackPtrReg = MO->BaseRegNum;
1004     StackOffset = MO->DispImm;
1005     IsIndexed =
1006         MO->IndexRegNum != X86::NoRegister || MO->SegRegNum != X86::NoRegister;
1007 
1008     if (!I.Simple)
1009       return true;
1010 
1011     // Retrieve related register in simple MOV from/to stack operations.
1012     unsigned MemOpOffset = static_cast<unsigned>(MemOpNo);
1013     if (I.IsLoad) {
1014       MCOperand RegOpnd = Inst.getOperand(0);
1015       assert(RegOpnd.isReg() && "unexpected destination operand");
1016       Reg = RegOpnd.getReg();
1017     } else if (I.IsStore) {
1018       MCOperand SrcOpnd =
1019           Inst.getOperand(MemOpOffset + X86::AddrSegmentReg + 1);
1020       if (I.StoreFromReg) {
1021         assert(SrcOpnd.isReg() && "unexpected source operand");
1022         Reg = SrcOpnd.getReg();
1023       } else {
1024         assert(SrcOpnd.isImm() && "unexpected source operand");
1025         SrcImm = SrcOpnd.getImm();
1026       }
1027     }
1028 
1029     return true;
1030   }
1031 
1032   void changeToPushOrPop(MCInst &Inst) const override {
1033     assert(!isPush(Inst) && !isPop(Inst));
1034 
1035     struct InstInfo {
1036       // Size in bytes that Inst loads from memory.
1037       uint8_t DataSize;
1038       bool IsLoad;
1039       bool StoreFromReg;
1040     };
1041 
1042     InstInfo I;
1043     switch (Inst.getOpcode()) {
1044     default: {
1045       llvm_unreachable("Unhandled opcode");
1046       return;
1047     }
1048     case X86::MOV16rm: I = {2, true, false}; break;
1049     case X86::MOV32rm: I = {4, true, false}; break;
1050     case X86::MOV64rm: I = {8, true, false}; break;
1051     case X86::MOV16mr: I = {2, false, true};  break;
1052     case X86::MOV32mr: I = {4, false, true};  break;
1053     case X86::MOV64mr: I = {8, false, true};  break;
1054     case X86::MOV16mi: I = {2, false, false}; break;
1055     case X86::MOV32mi: I = {4, false, false}; break;
1056     } // end switch (Inst.getOpcode())
1057 
1058     std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Inst);
1059     if (!MO) {
1060       llvm_unreachable("Evaluate failed");
1061       return;
1062     }
1063     // Make sure it's a stack access
1064     if (MO->BaseRegNum != X86::RBP && MO->BaseRegNum != X86::RSP) {
1065       llvm_unreachable("Not a stack access");
1066       return;
1067     }
1068 
1069     unsigned MemOpOffset = getMemoryOperandNo(Inst);
1070     unsigned NewOpcode = 0;
1071     if (I.IsLoad) {
1072       switch (I.DataSize) {
1073       case 2: NewOpcode = X86::POP16r; break;
1074       case 4: NewOpcode = X86::POP32r; break;
1075       case 8: NewOpcode = X86::POP64r; break;
1076       default:
1077         llvm_unreachable("Unexpected size");
1078       }
1079       unsigned RegOpndNum = Inst.getOperand(0).getReg();
1080       Inst.clear();
1081       Inst.setOpcode(NewOpcode);
1082       Inst.addOperand(MCOperand::createReg(RegOpndNum));
1083     } else {
1084       MCOperand SrcOpnd =
1085           Inst.getOperand(MemOpOffset + X86::AddrSegmentReg + 1);
1086       if (I.StoreFromReg) {
1087         switch (I.DataSize) {
1088         case 2: NewOpcode = X86::PUSH16r; break;
1089         case 4: NewOpcode = X86::PUSH32r; break;
1090         case 8: NewOpcode = X86::PUSH64r; break;
1091         default:
1092           llvm_unreachable("Unexpected size");
1093         }
1094         assert(SrcOpnd.isReg() && "Unexpected source operand");
1095         unsigned RegOpndNum = SrcOpnd.getReg();
1096         Inst.clear();
1097         Inst.setOpcode(NewOpcode);
1098         Inst.addOperand(MCOperand::createReg(RegOpndNum));
1099       } else {
1100         switch (I.DataSize) {
1101         case 2: NewOpcode = X86::PUSH16i8; break;
1102         case 4: NewOpcode = X86::PUSH32i8; break;
1103         case 8: NewOpcode = X86::PUSH64i32; break;
1104         default:
1105           llvm_unreachable("Unexpected size");
1106         }
1107         assert(SrcOpnd.isImm() && "Unexpected source operand");
1108         int64_t SrcImm = SrcOpnd.getImm();
1109         Inst.clear();
1110         Inst.setOpcode(NewOpcode);
1111         Inst.addOperand(MCOperand::createImm(SrcImm));
1112       }
1113     }
1114   }
1115 
1116   bool isStackAdjustment(const MCInst &Inst) const override {
1117     switch (Inst.getOpcode()) {
1118     default:
1119       return false;
1120     case X86::SUB64ri32:
1121     case X86::SUB64ri8:
1122     case X86::ADD64ri32:
1123     case X86::ADD64ri8:
1124     case X86::LEA64r:
1125       break;
1126     }
1127 
1128     return any_of(defOperands(Inst), [](const MCOperand &Op) {
1129       return Op.isReg() && Op.getReg() == X86::RSP;
1130     });
1131   }
1132 
1133   bool
1134   evaluateStackOffsetExpr(const MCInst &Inst, int64_t &Output,
1135                           std::pair<MCPhysReg, int64_t> Input1,
1136                           std::pair<MCPhysReg, int64_t> Input2) const override {
1137 
1138     auto getOperandVal = [&](MCPhysReg Reg) -> ErrorOr<int64_t> {
1139       if (Reg == Input1.first)
1140         return Input1.second;
1141       if (Reg == Input2.first)
1142         return Input2.second;
1143       return make_error_code(errc::result_out_of_range);
1144     };
1145 
1146     switch (Inst.getOpcode()) {
1147     default:
1148       return false;
1149 
1150     case X86::SUB64ri32:
1151     case X86::SUB64ri8:
1152       if (!Inst.getOperand(2).isImm())
1153         return false;
1154       if (ErrorOr<int64_t> InputVal =
1155               getOperandVal(Inst.getOperand(1).getReg()))
1156         Output = *InputVal - Inst.getOperand(2).getImm();
1157       else
1158         return false;
1159       break;
1160     case X86::ADD64ri32:
1161     case X86::ADD64ri8:
1162       if (!Inst.getOperand(2).isImm())
1163         return false;
1164       if (ErrorOr<int64_t> InputVal =
1165               getOperandVal(Inst.getOperand(1).getReg()))
1166         Output = *InputVal + Inst.getOperand(2).getImm();
1167       else
1168         return false;
1169       break;
1170     case X86::ADD64i32:
1171       if (!Inst.getOperand(0).isImm())
1172         return false;
1173       if (ErrorOr<int64_t> InputVal = getOperandVal(X86::RAX))
1174         Output = *InputVal + Inst.getOperand(0).getImm();
1175       else
1176         return false;
1177       break;
1178 
1179     case X86::LEA64r: {
1180       std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Inst);
1181       if (!MO)
1182         return false;
1183 
1184       if (MO->BaseRegNum == X86::NoRegister ||
1185           MO->IndexRegNum != X86::NoRegister ||
1186           MO->SegRegNum != X86::NoRegister || MO->DispExpr)
1187         return false;
1188 
1189       if (ErrorOr<int64_t> InputVal = getOperandVal(MO->BaseRegNum))
1190         Output = *InputVal + MO->DispImm;
1191       else
1192         return false;
1193 
1194       break;
1195     }
1196     }
1197     return true;
1198   }
1199 
1200   bool isRegToRegMove(const MCInst &Inst, MCPhysReg &From,
1201                       MCPhysReg &To) const override {
1202     switch (Inst.getOpcode()) {
1203     default:
1204       return false;
1205     case X86::LEAVE:
1206     case X86::LEAVE64:
1207       To = getStackPointer();
1208       From = getFramePointer();
1209       return true;
1210     case X86::MOV64rr:
1211       To = Inst.getOperand(0).getReg();
1212       From = Inst.getOperand(1).getReg();
1213       return true;
1214     }
1215   }
1216 
1217   MCPhysReg getStackPointer() const override { return X86::RSP; }
1218   MCPhysReg getFramePointer() const override { return X86::RBP; }
1219   MCPhysReg getFlagsReg() const override { return X86::EFLAGS; }
1220 
1221   bool escapesVariable(const MCInst &Inst,
1222                        bool HasFramePointer) const override {
1223     int MemOpNo = getMemoryOperandNo(Inst);
1224     const MCInstrDesc &MCII = Info->get(Inst.getOpcode());
1225     const unsigned NumDefs = MCII.getNumDefs();
1226     static BitVector SPBPAliases(BitVector(getAliases(X86::RSP)) |=
1227                                  getAliases(X86::RBP));
1228     static BitVector SPAliases(getAliases(X86::RSP));
1229 
1230     // FIXME: PUSH can be technically a leak, but let's ignore this for now
1231     // because a lot of harmless prologue code will spill SP to the stack.
1232     // Unless push is clearly pushing an object address to the stack as
1233     // demonstrated by having a MemOp.
1234     bool IsPush = isPush(Inst);
1235     if (IsPush && MemOpNo == -1)
1236       return false;
1237 
1238     // We use this to detect LEA (has memop but does not access mem)
1239     bool AccessMem = MCII.mayLoad() || MCII.mayStore();
1240     bool DoesLeak = false;
1241     for (int I = 0, E = MCPlus::getNumPrimeOperands(Inst); I != E; ++I) {
1242       // Ignore if SP/BP is used to dereference memory -- that's fine
1243       if (MemOpNo != -1 && !IsPush && AccessMem && I >= MemOpNo &&
1244           I <= MemOpNo + 5)
1245         continue;
1246       // Ignore if someone is writing to SP/BP
1247       if (I < static_cast<int>(NumDefs))
1248         continue;
1249 
1250       const MCOperand &Operand = Inst.getOperand(I);
1251       if (HasFramePointer && Operand.isReg() && SPBPAliases[Operand.getReg()]) {
1252         DoesLeak = true;
1253         break;
1254       }
1255       if (!HasFramePointer && Operand.isReg() && SPAliases[Operand.getReg()]) {
1256         DoesLeak = true;
1257         break;
1258       }
1259     }
1260 
1261     // If potential leak, check if it is not just writing to itself/sp/bp
1262     if (DoesLeak) {
1263       DoesLeak = !any_of(defOperands(Inst), [&](const MCOperand &Operand) {
1264         assert(Operand.isReg());
1265         MCPhysReg Reg = Operand.getReg();
1266         return HasFramePointer ? SPBPAliases[Reg] : SPAliases[Reg];
1267       });
1268     }
1269     return DoesLeak;
1270   }
1271 
1272   bool addToImm(MCInst &Inst, int64_t &Amt, MCContext *Ctx) const override {
1273     unsigned ImmOpNo = -1U;
1274     int MemOpNo = getMemoryOperandNo(Inst);
1275     if (MemOpNo != -1)
1276       ImmOpNo = MemOpNo + X86::AddrDisp;
1277     else
1278       for (unsigned Index = 0; Index < MCPlus::getNumPrimeOperands(Inst);
1279            ++Index)
1280         if (Inst.getOperand(Index).isImm())
1281           ImmOpNo = Index;
1282     if (ImmOpNo == -1U)
1283       return false;
1284 
1285     MCOperand &Operand = Inst.getOperand(ImmOpNo);
1286     Amt += Operand.getImm();
1287     Operand.setImm(Amt);
1288     // Check for the need for relaxation
1289     if (int64_t(Amt) == int64_t(int8_t(Amt)))
1290       return true;
1291 
1292     // Relax instruction
1293     switch (Inst.getOpcode()) {
1294     case X86::SUB64ri8:
1295       Inst.setOpcode(X86::SUB64ri32);
1296       break;
1297     case X86::ADD64ri8:
1298       Inst.setOpcode(X86::ADD64ri32);
1299       break;
1300     default:
1301       // No need for relaxation
1302       break;
1303     }
1304     return true;
1305   }
1306 
1307   /// TODO: this implementation currently works for the most common opcodes that
1308   /// load from memory. It can be extended to work with memory store opcodes as
1309   /// well as more memory load opcodes.
1310   bool replaceMemOperandWithImm(MCInst &Inst, StringRef ConstantData,
1311                                 uint64_t Offset) const override {
1312     enum CheckSignExt : uint8_t {
1313       NOCHECK = 0,
1314       CHECK8,
1315       CHECK32,
1316     };
1317 
1318     using CheckList = std::vector<std::pair<CheckSignExt, unsigned>>;
1319     struct InstInfo {
1320       // Size in bytes that Inst loads from memory.
1321       uint8_t DataSize;
1322 
1323       // True when the target operand has to be duplicated because the opcode
1324       // expects a LHS operand.
1325       bool HasLHS;
1326 
1327       // List of checks and corresponding opcodes to be used. We try to use the
1328       // smallest possible immediate value when various sizes are available,
1329       // hence we may need to check whether a larger constant fits in a smaller
1330       // immediate.
1331       CheckList Checks;
1332     };
1333 
1334     InstInfo I;
1335 
1336     switch (Inst.getOpcode()) {
1337     default: {
1338       switch (getPopSize(Inst)) {
1339       case 2:            I = {2, false, {{NOCHECK, X86::MOV16ri}}};  break;
1340       case 4:            I = {4, false, {{NOCHECK, X86::MOV32ri}}};  break;
1341       case 8:            I = {8, false, {{CHECK32, X86::MOV64ri32},
1342                                          {NOCHECK, X86::MOV64rm}}};  break;
1343       default:           return false;
1344       }
1345       break;
1346     }
1347 
1348     // MOV
1349     case X86::MOV8rm:      I = {1, false, {{NOCHECK, X86::MOV8ri}}};   break;
1350     case X86::MOV16rm:     I = {2, false, {{NOCHECK, X86::MOV16ri}}};  break;
1351     case X86::MOV32rm:     I = {4, false, {{NOCHECK, X86::MOV32ri}}};  break;
1352     case X86::MOV64rm:     I = {8, false, {{CHECK32, X86::MOV64ri32},
1353                                            {NOCHECK, X86::MOV64rm}}};  break;
1354 
1355     // MOVZX
1356     case X86::MOVZX16rm8:  I = {1, false, {{NOCHECK, X86::MOV16ri}}};  break;
1357     case X86::MOVZX32rm8:  I = {1, false, {{NOCHECK, X86::MOV32ri}}};  break;
1358     case X86::MOVZX32rm16: I = {2, false, {{NOCHECK, X86::MOV32ri}}};  break;
1359 
1360     // CMP
1361     case X86::CMP8rm:      I = {1, false, {{NOCHECK, X86::CMP8ri}}};   break;
1362     case X86::CMP16rm:     I = {2, false, {{CHECK8,  X86::CMP16ri8},
1363                                            {NOCHECK, X86::CMP16ri}}};  break;
1364     case X86::CMP32rm:     I = {4, false, {{CHECK8,  X86::CMP32ri8},
1365                                            {NOCHECK, X86::CMP32ri}}};  break;
1366     case X86::CMP64rm:     I = {8, false, {{CHECK8,  X86::CMP64ri8},
1367                                            {CHECK32, X86::CMP64ri32},
1368                                            {NOCHECK, X86::CMP64rm}}};  break;
1369 
1370     // TEST
1371     case X86::TEST8mr:     I = {1, false, {{NOCHECK, X86::TEST8ri}}};  break;
1372     case X86::TEST16mr:    I = {2, false, {{NOCHECK, X86::TEST16ri}}}; break;
1373     case X86::TEST32mr:    I = {4, false, {{NOCHECK, X86::TEST32ri}}}; break;
1374     case X86::TEST64mr:    I = {8, false, {{CHECK32, X86::TEST64ri32},
1375                                            {NOCHECK, X86::TEST64mr}}}; break;
1376 
1377     // ADD
1378     case X86::ADD8rm:      I = {1, true,  {{NOCHECK, X86::ADD8ri}}};   break;
1379     case X86::ADD16rm:     I = {2, true,  {{CHECK8,  X86::ADD16ri8},
1380                                            {NOCHECK, X86::ADD16ri}}};  break;
1381     case X86::ADD32rm:     I = {4, true,  {{CHECK8,  X86::ADD32ri8},
1382                                            {NOCHECK, X86::ADD32ri}}};  break;
1383     case X86::ADD64rm:     I = {8, true,  {{CHECK8,  X86::ADD64ri8},
1384                                            {CHECK32, X86::ADD64ri32},
1385                                            {NOCHECK, X86::ADD64rm}}};  break;
1386 
1387     // SUB
1388     case X86::SUB8rm:      I = {1, true,  {{NOCHECK, X86::SUB8ri}}};   break;
1389     case X86::SUB16rm:     I = {2, true,  {{CHECK8,  X86::SUB16ri8},
1390                                            {NOCHECK, X86::SUB16ri}}};  break;
1391     case X86::SUB32rm:     I = {4, true,  {{CHECK8,  X86::SUB32ri8},
1392                                            {NOCHECK, X86::SUB32ri}}};  break;
1393     case X86::SUB64rm:     I = {8, true,  {{CHECK8,  X86::SUB64ri8},
1394                                            {CHECK32, X86::SUB64ri32},
1395                                            {NOCHECK, X86::SUB64rm}}};  break;
1396 
1397     // AND
1398     case X86::AND8rm:      I = {1, true,  {{NOCHECK, X86::AND8ri}}};   break;
1399     case X86::AND16rm:     I = {2, true,  {{CHECK8,  X86::AND16ri8},
1400                                            {NOCHECK, X86::AND16ri}}};  break;
1401     case X86::AND32rm:     I = {4, true,  {{CHECK8,  X86::AND32ri8},
1402                                            {NOCHECK, X86::AND32ri}}};  break;
1403     case X86::AND64rm:     I = {8, true,  {{CHECK8,  X86::AND64ri8},
1404                                            {CHECK32, X86::AND64ri32},
1405                                            {NOCHECK, X86::AND64rm}}};  break;
1406 
1407     // OR
1408     case X86::OR8rm:       I = {1, true,  {{NOCHECK, X86::OR8ri}}};    break;
1409     case X86::OR16rm:      I = {2, true,  {{CHECK8,  X86::OR16ri8},
1410                                            {NOCHECK, X86::OR16ri}}};   break;
1411     case X86::OR32rm:      I = {4, true,  {{CHECK8,  X86::OR32ri8},
1412                                            {NOCHECK, X86::OR32ri}}};   break;
1413     case X86::OR64rm:      I = {8, true,  {{CHECK8,  X86::OR64ri8},
1414                                            {CHECK32, X86::OR64ri32},
1415                                            {NOCHECK, X86::OR64rm}}};   break;
1416 
1417     // XOR
1418     case X86::XOR8rm:      I = {1, true,  {{NOCHECK, X86::XOR8ri}}};   break;
1419     case X86::XOR16rm:     I = {2, true,  {{CHECK8,  X86::XOR16ri8},
1420                                            {NOCHECK, X86::XOR16ri}}};  break;
1421     case X86::XOR32rm:     I = {4, true,  {{CHECK8,  X86::XOR32ri8},
1422                                            {NOCHECK, X86::XOR32ri}}};  break;
1423     case X86::XOR64rm:     I = {8, true,  {{CHECK8,  X86::XOR64ri8},
1424                                            {CHECK32, X86::XOR64ri32},
1425                                            {NOCHECK, X86::XOR64rm}}};  break;
1426     }
1427 
1428     // Compute the immediate value.
1429     assert(Offset + I.DataSize <= ConstantData.size() &&
1430            "invalid offset for given constant data");
1431     int64_t ImmVal =
1432         DataExtractor(ConstantData, true, 8).getSigned(&Offset, I.DataSize);
1433 
1434     // Compute the new opcode.
1435     unsigned NewOpcode = 0;
1436     for (const std::pair<CheckSignExt, unsigned> &Check : I.Checks) {
1437       NewOpcode = Check.second;
1438       if (Check.first == NOCHECK)
1439         break;
1440       if (Check.first == CHECK8 && isInt<8>(ImmVal))
1441         break;
1442       if (Check.first == CHECK32 && isInt<32>(ImmVal))
1443         break;
1444     }
1445     if (NewOpcode == Inst.getOpcode())
1446       return false;
1447 
1448     // Modify the instruction.
1449     MCOperand ImmOp = MCOperand::createImm(ImmVal);
1450     uint32_t TargetOpNum = 0;
1451     // Test instruction does not follow the regular pattern of putting the
1452     // memory reference of a load (5 MCOperands) last in the list of operands.
1453     // Since it is not modifying the register operand, it is not treated as
1454     // a destination operand and it is not the first operand as it is in the
1455     // other instructions we treat here.
1456     if (NewOpcode == X86::TEST8ri || NewOpcode == X86::TEST16ri ||
1457         NewOpcode == X86::TEST32ri || NewOpcode == X86::TEST64ri32)
1458       TargetOpNum = getMemoryOperandNo(Inst) + X86::AddrNumOperands;
1459 
1460     MCOperand TargetOp = Inst.getOperand(TargetOpNum);
1461     Inst.clear();
1462     Inst.setOpcode(NewOpcode);
1463     Inst.addOperand(TargetOp);
1464     if (I.HasLHS)
1465       Inst.addOperand(TargetOp);
1466     Inst.addOperand(ImmOp);
1467 
1468     return true;
1469   }
1470 
1471   /// TODO: this implementation currently works for the most common opcodes that
1472   /// load from memory. It can be extended to work with memory store opcodes as
1473   /// well as more memory load opcodes.
1474   bool replaceMemOperandWithReg(MCInst &Inst, MCPhysReg RegNum) const override {
1475     unsigned NewOpcode;
1476 
1477     switch (Inst.getOpcode()) {
1478     default: {
1479       switch (getPopSize(Inst)) {
1480       case 2:            NewOpcode = X86::MOV16rr; break;
1481       case 4:            NewOpcode = X86::MOV32rr; break;
1482       case 8:            NewOpcode = X86::MOV64rr; break;
1483       default:           return false;
1484       }
1485       break;
1486     }
1487 
1488     // MOV
1489     case X86::MOV8rm:      NewOpcode = X86::MOV8rr;   break;
1490     case X86::MOV16rm:     NewOpcode = X86::MOV16rr;  break;
1491     case X86::MOV32rm:     NewOpcode = X86::MOV32rr;  break;
1492     case X86::MOV64rm:     NewOpcode = X86::MOV64rr;  break;
1493     }
1494 
1495     // Modify the instruction.
1496     MCOperand RegOp = MCOperand::createReg(RegNum);
1497     MCOperand TargetOp = Inst.getOperand(0);
1498     Inst.clear();
1499     Inst.setOpcode(NewOpcode);
1500     Inst.addOperand(TargetOp);
1501     Inst.addOperand(RegOp);
1502 
1503     return true;
1504   }
1505 
1506   bool isRedundantMove(const MCInst &Inst) const override {
1507     switch (Inst.getOpcode()) {
1508     default:
1509       return false;
1510 
1511     // MOV
1512     case X86::MOV8rr:
1513     case X86::MOV16rr:
1514     case X86::MOV32rr:
1515     case X86::MOV64rr:
1516       break;
1517     }
1518 
1519     assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg());
1520     return Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg();
1521   }
1522 
1523   bool requiresAlignedAddress(const MCInst &Inst) const override {
1524     const MCInstrDesc &Desc = Info->get(Inst.getOpcode());
1525     for (unsigned int I = 0; I < Desc.getNumOperands(); ++I) {
1526       const MCOperandInfo &Op = Desc.operands()[I];
1527       if (Op.OperandType != MCOI::OPERAND_REGISTER)
1528         continue;
1529       if (Op.RegClass == X86::VR128RegClassID)
1530         return true;
1531     }
1532     return false;
1533   }
1534 
1535   bool convertJmpToTailCall(MCInst &Inst) override {
1536     if (isTailCall(Inst))
1537       return false;
1538 
1539     int NewOpcode;
1540     switch (Inst.getOpcode()) {
1541     default:
1542       return false;
1543     case X86::JMP_1:
1544     case X86::JMP_2:
1545     case X86::JMP_4:
1546       NewOpcode = X86::JMP_4;
1547       break;
1548     case X86::JMP16m:
1549     case X86::JMP32m:
1550     case X86::JMP64m:
1551       NewOpcode = X86::JMP32m;
1552       break;
1553     case X86::JMP16r:
1554     case X86::JMP32r:
1555     case X86::JMP64r:
1556       NewOpcode = X86::JMP32r;
1557       break;
1558     }
1559 
1560     Inst.setOpcode(NewOpcode);
1561     setTailCall(Inst);
1562     return true;
1563   }
1564 
1565   bool convertTailCallToJmp(MCInst &Inst) override {
1566     int NewOpcode;
1567     switch (Inst.getOpcode()) {
1568     default:
1569       return false;
1570     case X86::JMP_4:
1571       NewOpcode = X86::JMP_1;
1572       break;
1573     case X86::JMP32m:
1574       NewOpcode = X86::JMP64m;
1575       break;
1576     case X86::JMP32r:
1577       NewOpcode = X86::JMP64r;
1578       break;
1579     }
1580 
1581     Inst.setOpcode(NewOpcode);
1582     removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall);
1583     clearOffset(Inst);
1584     return true;
1585   }
1586 
1587   bool convertTailCallToCall(MCInst &Inst) override {
1588     int NewOpcode;
1589     switch (Inst.getOpcode()) {
1590     default:
1591       return false;
1592     case X86::JMP_4:
1593       NewOpcode = X86::CALL64pcrel32;
1594       break;
1595     case X86::JMP32m:
1596       NewOpcode = X86::CALL64m;
1597       break;
1598     case X86::JMP32r:
1599       NewOpcode = X86::CALL64r;
1600       break;
1601     }
1602 
1603     Inst.setOpcode(NewOpcode);
1604     removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall);
1605     return true;
1606   }
1607 
1608   InstructionListType createIndirectPLTCall(MCInst &&DirectCall,
1609                                             const MCSymbol *TargetLocation,
1610                                             MCContext *Ctx) override {
1611     assert((DirectCall.getOpcode() == X86::CALL64pcrel32 ||
1612             (DirectCall.getOpcode() == X86::JMP_4 && isTailCall(DirectCall))) &&
1613            "64-bit direct (tail) call instruction expected");
1614 
1615     InstructionListType Code;
1616     // Create a new indirect call by converting the previous direct call.
1617     MCInst Inst = DirectCall;
1618     const auto NewOpcode =
1619         (Inst.getOpcode() == X86::CALL64pcrel32) ? X86::CALL64m : X86::JMP32m;
1620     Inst.setOpcode(NewOpcode);
1621 
1622     // Replace the first operand and preserve auxiliary operands of
1623     // the instruction.
1624     Inst.erase(Inst.begin());
1625     Inst.insert(Inst.begin(),
1626                 MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
1627     Inst.insert(Inst.begin(),
1628                 MCOperand::createExpr(                  // Displacement
1629                     MCSymbolRefExpr::create(TargetLocation,
1630                                             MCSymbolRefExpr::VK_None, *Ctx)));
1631     Inst.insert(Inst.begin(),
1632                 MCOperand::createReg(X86::NoRegister)); // IndexReg
1633     Inst.insert(Inst.begin(),
1634                 MCOperand::createImm(1));               // ScaleAmt
1635     Inst.insert(Inst.begin(),
1636                 MCOperand::createReg(X86::RIP));        // BaseReg
1637 
1638     Code.emplace_back(Inst);
1639     return Code;
1640   }
1641 
1642   void convertIndirectCallToLoad(MCInst &Inst, MCPhysReg Reg) override {
1643     bool IsTailCall = isTailCall(Inst);
1644     if (IsTailCall)
1645       removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall);
1646     if (Inst.getOpcode() == X86::CALL64m ||
1647         (Inst.getOpcode() == X86::JMP32m && IsTailCall)) {
1648       Inst.setOpcode(X86::MOV64rm);
1649       Inst.insert(Inst.begin(), MCOperand::createReg(Reg));
1650       return;
1651     }
1652     if (Inst.getOpcode() == X86::CALL64r ||
1653         (Inst.getOpcode() == X86::JMP32r && IsTailCall)) {
1654       Inst.setOpcode(X86::MOV64rr);
1655       Inst.insert(Inst.begin(), MCOperand::createReg(Reg));
1656       return;
1657     }
1658     LLVM_DEBUG(Inst.dump());
1659     llvm_unreachable("not implemented");
1660   }
1661 
1662   bool shortenInstruction(MCInst &Inst,
1663                           const MCSubtargetInfo &STI) const override {
1664     unsigned OldOpcode = Inst.getOpcode();
1665     unsigned NewOpcode = OldOpcode;
1666 
1667     int MemOpNo = getMemoryOperandNo(Inst);
1668 
1669     // Check and remove redundant Address-Size override prefix.
1670     if (opts::X86StripRedundantAddressSize) {
1671       uint64_t TSFlags = Info->get(OldOpcode).TSFlags;
1672       unsigned Flags = Inst.getFlags();
1673 
1674       if (!X86_MC::needsAddressSizeOverride(Inst, STI, MemOpNo, TSFlags) &&
1675           Flags & X86::IP_HAS_AD_SIZE)
1676         Inst.setFlags(Flags ^ X86::IP_HAS_AD_SIZE);
1677     }
1678 
1679     // Check and remove EIZ/RIZ. These cases represent ambiguous cases where
1680     // SIB byte is present, but no index is used and modrm alone should have
1681     // been enough. Converting to NoRegister effectively removes the SIB byte.
1682     if (MemOpNo >= 0) {
1683       MCOperand &IndexOp =
1684           Inst.getOperand(static_cast<unsigned>(MemOpNo) + X86::AddrIndexReg);
1685       if (IndexOp.getReg() == X86::EIZ || IndexOp.getReg() == X86::RIZ)
1686         IndexOp = MCOperand::createReg(X86::NoRegister);
1687     }
1688 
1689     if (isBranch(Inst)) {
1690       NewOpcode = getShortBranchOpcode(OldOpcode);
1691     } else if (OldOpcode == X86::MOV64ri) {
1692       if (Inst.getOperand(MCPlus::getNumPrimeOperands(Inst) - 1).isImm()) {
1693         const int64_t Imm =
1694             Inst.getOperand(MCPlus::getNumPrimeOperands(Inst) - 1).getImm();
1695         if (int64_t(Imm) == int64_t(int32_t(Imm)))
1696           NewOpcode = X86::MOV64ri32;
1697       }
1698     } else {
1699       // If it's arithmetic instruction check if signed operand fits in 1 byte.
1700       const unsigned ShortOpcode = X86::getOpcodeForShortImmediateForm(OldOpcode);
1701       if (ShortOpcode != OldOpcode &&
1702           Inst.getOperand(MCPlus::getNumPrimeOperands(Inst) - 1).isImm()) {
1703         int64_t Imm =
1704             Inst.getOperand(MCPlus::getNumPrimeOperands(Inst) - 1).getImm();
1705         if (int64_t(Imm) == int64_t(int8_t(Imm)))
1706           NewOpcode = ShortOpcode;
1707       }
1708     }
1709 
1710     if (NewOpcode == OldOpcode)
1711       return false;
1712 
1713     Inst.setOpcode(NewOpcode);
1714     return true;
1715   }
1716 
1717   bool
1718   convertMoveToConditionalMove(MCInst &Inst, unsigned CC, bool AllowStackMemOp,
1719                                bool AllowBasePtrStackMemOp) const override {
1720     // - Register-register moves are OK
1721     // - Stores are filtered out by opcode (no store CMOV)
1722     // - Non-stack loads are prohibited (generally unsafe)
1723     // - Stack loads are OK if AllowStackMemOp is true
1724     // - Stack loads with RBP are OK if AllowBasePtrStackMemOp is true
1725     if (mayLoad(Inst)) {
1726       // If stack memory operands are not allowed, no loads are allowed
1727       if (!AllowStackMemOp)
1728         return false;
1729 
1730       // If stack memory operands are allowed, check if it's a load from stack
1731       bool IsLoad, IsStore, IsStoreFromReg, IsSimple, IsIndexed;
1732       MCPhysReg Reg;
1733       int32_t SrcImm;
1734       uint16_t StackPtrReg;
1735       int64_t StackOffset;
1736       uint8_t Size;
1737       bool IsStackAccess =
1738           isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, SrcImm,
1739                         StackPtrReg, StackOffset, Size, IsSimple, IsIndexed);
1740       // Prohibit non-stack-based loads
1741       if (!IsStackAccess)
1742         return false;
1743       // If stack memory operands are allowed, check if it's RBP-based
1744       if (!AllowBasePtrStackMemOp &&
1745           RegInfo->isSubRegisterEq(X86::RBP, StackPtrReg))
1746         return false;
1747     }
1748 
1749     unsigned NewOpcode = 0;
1750     switch (Inst.getOpcode()) {
1751     case X86::MOV16rr:
1752       NewOpcode = X86::CMOV16rr;
1753       break;
1754     case X86::MOV16rm:
1755       NewOpcode = X86::CMOV16rm;
1756       break;
1757     case X86::MOV32rr:
1758       NewOpcode = X86::CMOV32rr;
1759       break;
1760     case X86::MOV32rm:
1761       NewOpcode = X86::CMOV32rm;
1762       break;
1763     case X86::MOV64rr:
1764       NewOpcode = X86::CMOV64rr;
1765       break;
1766     case X86::MOV64rm:
1767       NewOpcode = X86::CMOV64rm;
1768       break;
1769     default:
1770       return false;
1771     }
1772     Inst.setOpcode(NewOpcode);
1773     // Insert CC at the end of prime operands, before annotations
1774     Inst.insert(Inst.begin() + MCPlus::getNumPrimeOperands(Inst),
1775                 MCOperand::createImm(CC));
1776     // CMOV is a 3-operand MCInst, so duplicate the destination as src1
1777     Inst.insert(Inst.begin(), Inst.getOperand(0));
1778     return true;
1779   }
1780 
1781   bool lowerTailCall(MCInst &Inst) override {
1782     if (Inst.getOpcode() == X86::JMP_4 && isTailCall(Inst)) {
1783       Inst.setOpcode(X86::JMP_1);
1784       removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall);
1785       return true;
1786     }
1787     return false;
1788   }
1789 
1790   const MCSymbol *getTargetSymbol(const MCInst &Inst,
1791                                   unsigned OpNum = 0) const override {
1792     if (OpNum >= MCPlus::getNumPrimeOperands(Inst))
1793       return nullptr;
1794 
1795     const MCOperand &Op = Inst.getOperand(OpNum);
1796     if (!Op.isExpr())
1797       return nullptr;
1798 
1799     auto *SymExpr = dyn_cast<MCSymbolRefExpr>(Op.getExpr());
1800     if (!SymExpr || SymExpr->getKind() != MCSymbolRefExpr::VK_None)
1801       return nullptr;
1802 
1803     return &SymExpr->getSymbol();
1804   }
1805 
1806   // This is the same as the base class, but since we are overriding one of
1807   // getTargetSymbol's signatures above, we need to override all of them.
1808   const MCSymbol *getTargetSymbol(const MCExpr *Expr) const override {
1809     return &cast<const MCSymbolRefExpr>(Expr)->getSymbol();
1810   }
1811 
1812   bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
1813                      const MCSymbol *&TBB, const MCSymbol *&FBB,
1814                      MCInst *&CondBranch,
1815                      MCInst *&UncondBranch) const override {
1816     auto I = End;
1817 
1818     // Bottom-up analysis
1819     while (I != Begin) {
1820       --I;
1821 
1822       // Ignore nops and CFIs
1823       if (isPseudo(*I))
1824         continue;
1825 
1826       // Stop when we find the first non-terminator
1827       if (!isTerminator(*I))
1828         break;
1829 
1830       if (!isBranch(*I))
1831         break;
1832 
1833       // Handle unconditional branches.
1834       if ((I->getOpcode() == X86::JMP_1 || I->getOpcode() == X86::JMP_2 ||
1835            I->getOpcode() == X86::JMP_4) &&
1836           !isTailCall(*I)) {
1837         // If any code was seen after this unconditional branch, we've seen
1838         // unreachable code. Ignore them.
1839         CondBranch = nullptr;
1840         UncondBranch = &*I;
1841         const MCSymbol *Sym = getTargetSymbol(*I);
1842         assert(Sym != nullptr &&
1843                "Couldn't extract BB symbol from jump operand");
1844         TBB = Sym;
1845         continue;
1846       }
1847 
1848       // Ignore indirect branches
1849       if (getCondCode(*I) == X86::COND_INVALID)
1850         return false;
1851 
1852       if (CondBranch == nullptr) {
1853         const MCSymbol *TargetBB = getTargetSymbol(*I);
1854         if (TargetBB == nullptr) {
1855           // Unrecognized branch target
1856           return false;
1857         }
1858         FBB = TBB;
1859         TBB = TargetBB;
1860         CondBranch = &*I;
1861         continue;
1862       }
1863 
1864       llvm_unreachable("multiple conditional branches in one BB");
1865     }
1866     return true;
1867   }
1868 
1869   /// Analyzes PIC-style jump table code template and return identified
1870   /// IndirectBranchType, MemLocInstr (all cases) and FixedEntryLoadInstr
1871   /// (POSSIBLE_PIC_FIXED_BRANCH case).
1872   template <typename Itr>
1873   std::tuple<IndirectBranchType, MCInst *, MCInst *>
1874   analyzePICJumpTable(Itr II, Itr IE, MCPhysReg R1, MCPhysReg R2) const {
1875     // Analyze PIC-style jump table code template:
1876     //
1877     //    lea PIC_JUMP_TABLE(%rip), {%r1|%r2}     <- MemLocInstr
1878     //    mov ({%r1|%r2}, %index, 4), {%r2|%r1}
1879     //    add %r2, %r1
1880     //    jmp *%r1
1881     //
1882     // or a fixed indirect jump template:
1883     //
1884     //    movslq En(%rip), {%r2|%r1}              <- FixedEntryLoadInstr
1885     //    lea PIC_JUMP_TABLE(%rip), {%r1|%r2}     <- MemLocInstr
1886     //    add %r2, %r1
1887     //    jmp *%r1
1888     //
1889     // (with any irrelevant instructions in-between)
1890     //
1891     // When we call this helper we've already determined %r1 and %r2, and
1892     // reverse instruction iterator \p II is pointing to the ADD instruction.
1893     //
1894     // PIC jump table looks like following:
1895     //
1896     //   JT:  ----------
1897     //    E1:| L1 - JT  |
1898     //       |----------|
1899     //    E2:| L2 - JT  |
1900     //       |----------|
1901     //       |          |
1902     //          ......
1903     //    En:| Ln - JT  |
1904     //        ----------
1905     //
1906     // Where L1, L2, ..., Ln represent labels in the function.
1907     //
1908     // The actual relocations in the table will be of the form:
1909     //
1910     //   Ln - JT
1911     //    = (Ln - En) + (En - JT)
1912     //    = R_X86_64_PC32(Ln) + En - JT
1913     //    = R_X86_64_PC32(Ln + offsetof(En))
1914     //
1915     auto isRIPRel = [&](X86MemOperand &MO) {
1916       // NB: DispExpr should be set
1917       return MO.DispExpr != nullptr &&
1918              MO.BaseRegNum == RegInfo->getProgramCounter() &&
1919              MO.IndexRegNum == X86::NoRegister &&
1920              MO.SegRegNum == X86::NoRegister;
1921     };
1922     auto isIndexed = [](X86MemOperand &MO, MCPhysReg R) {
1923       // NB: IndexRegNum should be set.
1924       return MO.IndexRegNum != X86::NoRegister && MO.BaseRegNum == R &&
1925              MO.ScaleImm == 4 && MO.DispImm == 0 &&
1926              MO.SegRegNum == X86::NoRegister;
1927     };
1928     LLVM_DEBUG(dbgs() << "Checking for PIC jump table\n");
1929     MCInst *FirstInstr = nullptr;
1930     MCInst *SecondInstr = nullptr;
1931     enum {
1932       NOMATCH = 0,
1933       MATCH_JUMP_TABLE,
1934       MATCH_FIXED_BRANCH,
1935     } MatchingState = NOMATCH;
1936     while (++II != IE) {
1937       MCInst &Instr = *II;
1938       const MCInstrDesc &InstrDesc = Info->get(Instr.getOpcode());
1939       if (!InstrDesc.hasDefOfPhysReg(Instr, R1, *RegInfo) &&
1940           !InstrDesc.hasDefOfPhysReg(Instr, R2, *RegInfo)) {
1941         // Ignore instructions that don't affect R1, R2 registers.
1942         continue;
1943       }
1944       const bool IsMOVSXInstr = isMOVSX64rm32(Instr);
1945       const bool IsLEAInstr = isLEA64r(Instr);
1946       if (MatchingState == NOMATCH) {
1947         if (IsMOVSXInstr)
1948           MatchingState = MATCH_JUMP_TABLE;
1949         else if (IsLEAInstr)
1950           MatchingState = MATCH_FIXED_BRANCH;
1951         else
1952           break;
1953 
1954         // Check if the first instruction is setting %r1 or %r2. In canonical
1955         // form lea sets %r1 and mov sets %r2. If it's the opposite - rename so
1956         // we have to only check a single form.
1957         unsigned DestReg = Instr.getOperand(0).getReg();
1958         MCPhysReg &ExpectReg = MatchingState == MATCH_JUMP_TABLE ? R2 : R1;
1959         if (DestReg != ExpectReg)
1960           std::swap(R1, R2);
1961         if (DestReg != ExpectReg)
1962           break;
1963 
1964         // Verify operands
1965         std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Instr);
1966         if (!MO)
1967           break;
1968         if ((MatchingState == MATCH_JUMP_TABLE && isIndexed(*MO, R1)) ||
1969             (MatchingState == MATCH_FIXED_BRANCH && isRIPRel(*MO)))
1970           FirstInstr = &Instr;
1971         else
1972           break;
1973       } else {
1974         unsigned ExpectReg = MatchingState == MATCH_JUMP_TABLE ? R1 : R2;
1975         if (!InstrDesc.hasDefOfPhysReg(Instr, ExpectReg, *RegInfo))
1976           continue;
1977         if ((MatchingState == MATCH_JUMP_TABLE && !IsLEAInstr) ||
1978             (MatchingState == MATCH_FIXED_BRANCH && !IsMOVSXInstr))
1979           break;
1980         if (Instr.getOperand(0).getReg() != ExpectReg)
1981           break;
1982 
1983         // Verify operands.
1984         std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Instr);
1985         if (!MO)
1986           break;
1987         if (!isRIPRel(*MO))
1988           break;
1989         SecondInstr = &Instr;
1990         break;
1991       }
1992     }
1993 
1994     if (!SecondInstr)
1995       return std::make_tuple(IndirectBranchType::UNKNOWN, nullptr, nullptr);
1996 
1997     if (MatchingState == MATCH_FIXED_BRANCH) {
1998       LLVM_DEBUG(dbgs() << "checking potential fixed indirect branch\n");
1999       return std::make_tuple(IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH,
2000                              FirstInstr, SecondInstr);
2001     }
2002     LLVM_DEBUG(dbgs() << "checking potential PIC jump table\n");
2003     return std::make_tuple(IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE,
2004                            SecondInstr, nullptr);
2005   }
2006 
2007   IndirectBranchType
2008   analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin,
2009                         InstructionIterator End, const unsigned PtrSize,
2010                         MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
2011                         unsigned &IndexRegNumOut, int64_t &DispValueOut,
2012                         const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut,
2013                         MCInst *&FixedEntryLoadInst) const override {
2014     // Try to find a (base) memory location from where the address for
2015     // the indirect branch is loaded. For X86-64 the memory will be specified
2016     // in the following format:
2017     //
2018     //   {%rip}/{%basereg} + Imm + IndexReg * Scale
2019     //
2020     // We are interested in the cases where Scale == sizeof(uintptr_t) and
2021     // the contents of the memory are presumably an array of pointers to code.
2022     //
2023     // Normal jump table:
2024     //
2025     //    jmp *(JUMP_TABLE, %index, Scale)        <- MemLocInstr
2026     //
2027     //    or
2028     //
2029     //    mov (JUMP_TABLE, %index, Scale), %r1    <- MemLocInstr
2030     //    ...
2031     //    jmp %r1
2032     //
2033     // We handle PIC-style jump tables separately.
2034     //
2035     MemLocInstrOut = nullptr;
2036     BaseRegNumOut = X86::NoRegister;
2037     IndexRegNumOut = X86::NoRegister;
2038     DispValueOut = 0;
2039     DispExprOut = nullptr;
2040     FixedEntryLoadInst = nullptr;
2041 
2042     std::reverse_iterator<InstructionIterator> II(End);
2043     std::reverse_iterator<InstructionIterator> IE(Begin);
2044 
2045     IndirectBranchType Type = IndirectBranchType::UNKNOWN;
2046 
2047     // An instruction referencing memory used by jump instruction (directly or
2048     // via register). This location could be an array of function pointers
2049     // in case of indirect tail call, or a jump table.
2050     MCInst *MemLocInstr = nullptr;
2051 
2052     if (MCPlus::getNumPrimeOperands(Instruction) == 1) {
2053       // If the indirect jump is on register - try to detect if the
2054       // register value is loaded from a memory location.
2055       assert(Instruction.getOperand(0).isReg() && "register operand expected");
2056       const unsigned R1 = Instruction.getOperand(0).getReg();
2057       // Check if one of the previous instructions defines the jump-on register.
2058       for (auto PrevII = II; PrevII != IE; ++PrevII) {
2059         MCInst &PrevInstr = *PrevII;
2060         const MCInstrDesc &PrevInstrDesc = Info->get(PrevInstr.getOpcode());
2061 
2062         if (!PrevInstrDesc.hasDefOfPhysReg(PrevInstr, R1, *RegInfo))
2063           continue;
2064 
2065         if (isMoveMem2Reg(PrevInstr)) {
2066           MemLocInstr = &PrevInstr;
2067           break;
2068         }
2069         if (isADD64rr(PrevInstr)) {
2070           unsigned R2 = PrevInstr.getOperand(2).getReg();
2071           if (R1 == R2)
2072             return IndirectBranchType::UNKNOWN;
2073           std::tie(Type, MemLocInstr, FixedEntryLoadInst) =
2074               analyzePICJumpTable(PrevII, IE, R1, R2);
2075           break;
2076         }
2077         return IndirectBranchType::UNKNOWN;
2078       }
2079       if (!MemLocInstr) {
2080         // No definition seen for the register in this function so far. Could be
2081         // an input parameter - which means it is an external code reference.
2082         // It also could be that the definition happens to be in the code that
2083         // we haven't processed yet. Since we have to be conservative, return
2084         // as UNKNOWN case.
2085         return IndirectBranchType::UNKNOWN;
2086       }
2087     } else {
2088       MemLocInstr = &Instruction;
2089     }
2090 
2091     const MCRegister RIPRegister = RegInfo->getProgramCounter();
2092 
2093     // Analyze the memory location.
2094     std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(*MemLocInstr);
2095     if (!MO)
2096       return IndirectBranchType::UNKNOWN;
2097 
2098     BaseRegNumOut = MO->BaseRegNum;
2099     IndexRegNumOut = MO->IndexRegNum;
2100     DispValueOut = MO->DispImm;
2101     DispExprOut = MO->DispExpr;
2102 
2103     if ((MO->BaseRegNum != X86::NoRegister && MO->BaseRegNum != RIPRegister) ||
2104         MO->SegRegNum != X86::NoRegister)
2105       return IndirectBranchType::UNKNOWN;
2106 
2107     if (MemLocInstr == &Instruction &&
2108         (!MO->ScaleImm || MO->IndexRegNum == X86::NoRegister)) {
2109       MemLocInstrOut = MemLocInstr;
2110       return IndirectBranchType::POSSIBLE_FIXED_BRANCH;
2111     }
2112 
2113     switch (Type) {
2114     case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
2115       if (MO->ScaleImm != 1 || MO->BaseRegNum != RIPRegister)
2116         return IndirectBranchType::UNKNOWN;
2117       break;
2118     case IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH:
2119       break;
2120     default:
2121       if (MO->ScaleImm != PtrSize)
2122         return IndirectBranchType::UNKNOWN;
2123     }
2124 
2125     MemLocInstrOut = MemLocInstr;
2126 
2127     return Type;
2128   }
2129 
2130   /// Analyze a callsite to see if it could be a virtual method call.  This only
2131   /// checks to see if the overall pattern is satisfied, it does not guarantee
2132   /// that the callsite is a true virtual method call.
2133   /// The format of virtual method calls that are recognized is one of the
2134   /// following:
2135   ///
2136   ///  Form 1: (found in debug code)
2137   ///    add METHOD_OFFSET, %VtableReg
2138   ///    mov (%VtableReg), %MethodReg
2139   ///    ...
2140   ///    call or jmp *%MethodReg
2141   ///
2142   ///  Form 2:
2143   ///    mov METHOD_OFFSET(%VtableReg), %MethodReg
2144   ///    ...
2145   ///    call or jmp *%MethodReg
2146   ///
2147   ///  Form 3:
2148   ///    ...
2149   ///    call or jmp *METHOD_OFFSET(%VtableReg)
2150   ///
2151   bool analyzeVirtualMethodCall(InstructionIterator ForwardBegin,
2152                                 InstructionIterator ForwardEnd,
2153                                 std::vector<MCInst *> &MethodFetchInsns,
2154                                 unsigned &VtableRegNum, unsigned &MethodRegNum,
2155                                 uint64_t &MethodOffset) const override {
2156     VtableRegNum = X86::NoRegister;
2157     MethodRegNum = X86::NoRegister;
2158     MethodOffset = 0;
2159 
2160     std::reverse_iterator<InstructionIterator> Itr(ForwardEnd);
2161     std::reverse_iterator<InstructionIterator> End(ForwardBegin);
2162 
2163     MCInst &CallInst = *Itr++;
2164     assert(isIndirectBranch(CallInst) || isCall(CallInst));
2165 
2166     // The call can just be jmp offset(reg)
2167     if (std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(CallInst)) {
2168       if (!MO->DispExpr && MO->BaseRegNum != X86::RIP &&
2169           MO->BaseRegNum != X86::RBP && MO->BaseRegNum != X86::NoRegister) {
2170         MethodRegNum = MO->BaseRegNum;
2171         if (MO->ScaleImm == 1 && MO->IndexRegNum == X86::NoRegister &&
2172             MO->SegRegNum == X86::NoRegister) {
2173           VtableRegNum = MethodRegNum;
2174           MethodOffset = MO->DispImm;
2175           MethodFetchInsns.push_back(&CallInst);
2176           return true;
2177         }
2178       }
2179       return false;
2180     }
2181     if (CallInst.getOperand(0).isReg())
2182       MethodRegNum = CallInst.getOperand(0).getReg();
2183     else
2184       return false;
2185 
2186     if (MethodRegNum == X86::RIP || MethodRegNum == X86::RBP) {
2187       VtableRegNum = X86::NoRegister;
2188       MethodRegNum = X86::NoRegister;
2189       return false;
2190     }
2191 
2192     // find load from vtable, this may or may not include the method offset
2193     while (Itr != End) {
2194       MCInst &CurInst = *Itr++;
2195       const MCInstrDesc &Desc = Info->get(CurInst.getOpcode());
2196       if (Desc.hasDefOfPhysReg(CurInst, MethodRegNum, *RegInfo)) {
2197         if (!mayLoad(CurInst))
2198           return false;
2199         if (std::optional<X86MemOperand> MO =
2200                 evaluateX86MemoryOperand(CurInst)) {
2201           if (!MO->DispExpr && MO->ScaleImm == 1 &&
2202               MO->BaseRegNum != X86::RIP && MO->BaseRegNum != X86::RBP &&
2203               MO->BaseRegNum != X86::NoRegister &&
2204               MO->IndexRegNum == X86::NoRegister &&
2205               MO->SegRegNum == X86::NoRegister) {
2206             VtableRegNum = MO->BaseRegNum;
2207             MethodOffset = MO->DispImm;
2208             MethodFetchInsns.push_back(&CurInst);
2209             if (MethodOffset != 0)
2210               return true;
2211             break;
2212           }
2213         }
2214         return false;
2215       }
2216     }
2217 
2218     if (!VtableRegNum)
2219       return false;
2220 
2221     // look for any adds affecting the method register.
2222     while (Itr != End) {
2223       MCInst &CurInst = *Itr++;
2224       const MCInstrDesc &Desc = Info->get(CurInst.getOpcode());
2225       if (Desc.hasDefOfPhysReg(CurInst, VtableRegNum, *RegInfo)) {
2226         if (isADDri(CurInst)) {
2227           assert(!MethodOffset);
2228           MethodOffset = CurInst.getOperand(2).getImm();
2229           MethodFetchInsns.insert(MethodFetchInsns.begin(), &CurInst);
2230           break;
2231         }
2232       }
2233     }
2234 
2235     return true;
2236   }
2237 
2238   void createStackPointerIncrement(MCInst &Inst, int Size,
2239                                    bool NoFlagsClobber) const override {
2240     if (NoFlagsClobber) {
2241       Inst.setOpcode(X86::LEA64r);
2242       Inst.clear();
2243       Inst.addOperand(MCOperand::createReg(X86::RSP));
2244       Inst.addOperand(MCOperand::createReg(X86::RSP));        // BaseReg
2245       Inst.addOperand(MCOperand::createImm(1));               // ScaleAmt
2246       Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg
2247       Inst.addOperand(MCOperand::createImm(-Size));           // Displacement
2248       Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
2249       return;
2250     }
2251     Inst.setOpcode(X86::SUB64ri8);
2252     Inst.clear();
2253     Inst.addOperand(MCOperand::createReg(X86::RSP));
2254     Inst.addOperand(MCOperand::createReg(X86::RSP));
2255     Inst.addOperand(MCOperand::createImm(Size));
2256   }
2257 
2258   void createStackPointerDecrement(MCInst &Inst, int Size,
2259                                    bool NoFlagsClobber) const override {
2260     if (NoFlagsClobber) {
2261       Inst.setOpcode(X86::LEA64r);
2262       Inst.clear();
2263       Inst.addOperand(MCOperand::createReg(X86::RSP));
2264       Inst.addOperand(MCOperand::createReg(X86::RSP));        // BaseReg
2265       Inst.addOperand(MCOperand::createImm(1));               // ScaleAmt
2266       Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg
2267       Inst.addOperand(MCOperand::createImm(Size));            // Displacement
2268       Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
2269       return;
2270     }
2271     Inst.setOpcode(X86::ADD64ri8);
2272     Inst.clear();
2273     Inst.addOperand(MCOperand::createReg(X86::RSP));
2274     Inst.addOperand(MCOperand::createReg(X86::RSP));
2275     Inst.addOperand(MCOperand::createImm(Size));
2276   }
2277 
2278   void createSaveToStack(MCInst &Inst, const MCPhysReg &StackReg, int Offset,
2279                          const MCPhysReg &SrcReg, int Size) const override {
2280     unsigned NewOpcode;
2281     switch (Size) {
2282     default:
2283       llvm_unreachable("Invalid operand size");
2284       return;
2285     case 2:      NewOpcode = X86::MOV16mr; break;
2286     case 4:      NewOpcode = X86::MOV32mr; break;
2287     case 8:      NewOpcode = X86::MOV64mr; break;
2288     }
2289     Inst.setOpcode(NewOpcode);
2290     Inst.clear();
2291     Inst.addOperand(MCOperand::createReg(StackReg));        // BaseReg
2292     Inst.addOperand(MCOperand::createImm(1));               // ScaleAmt
2293     Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg
2294     Inst.addOperand(MCOperand::createImm(Offset));          // Displacement
2295     Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
2296     Inst.addOperand(MCOperand::createReg(SrcReg));
2297   }
2298 
2299   void createRestoreFromStack(MCInst &Inst, const MCPhysReg &StackReg,
2300                               int Offset, const MCPhysReg &DstReg,
2301                               int Size) const override {
2302     return createLoad(Inst, StackReg, /*Scale=*/1, /*IndexReg=*/X86::NoRegister,
2303                       Offset, nullptr, /*AddrSegmentReg=*/X86::NoRegister,
2304                       DstReg, Size);
2305   }
2306 
2307   void createLoad(MCInst &Inst, const MCPhysReg &BaseReg, int64_t Scale,
2308                   const MCPhysReg &IndexReg, int64_t Offset,
2309                   const MCExpr *OffsetExpr, const MCPhysReg &AddrSegmentReg,
2310                   const MCPhysReg &DstReg, int Size) const override {
2311     unsigned NewOpcode;
2312     switch (Size) {
2313     default:
2314       llvm_unreachable("Invalid operand size");
2315       return;
2316     case 2:      NewOpcode = X86::MOV16rm; break;
2317     case 4:      NewOpcode = X86::MOV32rm; break;
2318     case 8:      NewOpcode = X86::MOV64rm; break;
2319     }
2320     Inst.setOpcode(NewOpcode);
2321     Inst.clear();
2322     Inst.addOperand(MCOperand::createReg(DstReg));
2323     Inst.addOperand(MCOperand::createReg(BaseReg));
2324     Inst.addOperand(MCOperand::createImm(Scale));
2325     Inst.addOperand(MCOperand::createReg(IndexReg));
2326     if (OffsetExpr)
2327       Inst.addOperand(MCOperand::createExpr(OffsetExpr)); // Displacement
2328     else
2329       Inst.addOperand(MCOperand::createImm(Offset)); // Displacement
2330     Inst.addOperand(MCOperand::createReg(AddrSegmentReg)); // AddrSegmentReg
2331   }
2332 
2333   InstructionListType createLoadImmediate(const MCPhysReg Dest,
2334                                           uint64_t Imm) const override {
2335     InstructionListType Insts;
2336     Insts.emplace_back();
2337     Insts.back().setOpcode(X86::MOV64ri32);
2338     Insts.back().clear();
2339     Insts.back().addOperand(MCOperand::createReg(Dest));
2340     Insts.back().addOperand(MCOperand::createImm(Imm));
2341     return Insts;
2342   }
2343 
2344   void createIJmp32Frag(SmallVectorImpl<MCInst> &Insts,
2345                         const MCOperand &BaseReg, const MCOperand &Scale,
2346                         const MCOperand &IndexReg, const MCOperand &Offset,
2347                         const MCOperand &TmpReg) const override {
2348     // The code fragment we emit here is:
2349     //
2350     //  mov32 (%base, %index, scale), %tmpreg
2351     //  ijmp *(%tmpreg)
2352     //
2353     MCInst IJmp;
2354     IJmp.setOpcode(X86::JMP64r);
2355     IJmp.addOperand(TmpReg);
2356 
2357     MCInst Load;
2358     Load.setOpcode(X86::MOV32rm);
2359     Load.addOperand(TmpReg);
2360     Load.addOperand(BaseReg);
2361     Load.addOperand(Scale);
2362     Load.addOperand(IndexReg);
2363     Load.addOperand(Offset);
2364     Load.addOperand(MCOperand::createReg(X86::NoRegister));
2365 
2366     Insts.push_back(Load);
2367     Insts.push_back(IJmp);
2368   }
2369 
2370   void createNoop(MCInst &Inst) const override {
2371     Inst.setOpcode(X86::NOOP);
2372     Inst.clear();
2373   }
2374 
2375   void createReturn(MCInst &Inst) const override {
2376     Inst.setOpcode(X86::RET64);
2377     Inst.clear();
2378   }
2379 
2380   InstructionListType createInlineMemcpy(bool ReturnEnd) const override {
2381     InstructionListType Code;
2382     if (ReturnEnd)
2383       Code.emplace_back(MCInstBuilder(X86::LEA64r)
2384                             .addReg(X86::RAX)
2385                             .addReg(X86::RDI)
2386                             .addImm(1)
2387                             .addReg(X86::RDX)
2388                             .addImm(0)
2389                             .addReg(X86::NoRegister));
2390     else
2391       Code.emplace_back(MCInstBuilder(X86::MOV64rr)
2392                             .addReg(X86::RAX)
2393                             .addReg(X86::RDI));
2394 
2395     Code.emplace_back(MCInstBuilder(X86::MOV32rr)
2396                           .addReg(X86::ECX)
2397                           .addReg(X86::EDX));
2398     Code.emplace_back(MCInstBuilder(X86::REP_MOVSB_64));
2399 
2400     return Code;
2401   }
2402 
2403   InstructionListType createOneByteMemcpy() const override {
2404     InstructionListType Code;
2405     Code.emplace_back(MCInstBuilder(X86::MOV8rm)
2406                           .addReg(X86::CL)
2407                           .addReg(X86::RSI)
2408                           .addImm(0)
2409                           .addReg(X86::NoRegister)
2410                           .addImm(0)
2411                           .addReg(X86::NoRegister));
2412     Code.emplace_back(MCInstBuilder(X86::MOV8mr)
2413                           .addReg(X86::RDI)
2414                           .addImm(0)
2415                           .addReg(X86::NoRegister)
2416                           .addImm(0)
2417                           .addReg(X86::NoRegister)
2418                           .addReg(X86::CL));
2419     Code.emplace_back(MCInstBuilder(X86::MOV64rr)
2420                           .addReg(X86::RAX)
2421                           .addReg(X86::RDI));
2422     return Code;
2423   }
2424 
2425   InstructionListType createCmpJE(MCPhysReg RegNo, int64_t Imm,
2426                                   const MCSymbol *Target,
2427                                   MCContext *Ctx) const override {
2428     InstructionListType Code;
2429     Code.emplace_back(MCInstBuilder(X86::CMP64ri8)
2430                           .addReg(RegNo)
2431                           .addImm(Imm));
2432     Code.emplace_back(MCInstBuilder(X86::JCC_1)
2433                           .addExpr(MCSymbolRefExpr::create(
2434                               Target, MCSymbolRefExpr::VK_None, *Ctx))
2435                           .addImm(X86::COND_E));
2436     return Code;
2437   }
2438 
2439   std::optional<Relocation>
2440   createRelocation(const MCFixup &Fixup,
2441                    const MCAsmBackend &MAB) const override {
2442     const MCFixupKindInfo &FKI = MAB.getFixupKindInfo(Fixup.getKind());
2443 
2444     assert(FKI.TargetOffset == 0 && "0-bit relocation offset expected");
2445     const uint64_t RelOffset = Fixup.getOffset();
2446 
2447     uint64_t RelType;
2448     if (FKI.Flags & MCFixupKindInfo::FKF_IsPCRel) {
2449       switch (FKI.TargetSize) {
2450       default:
2451         return std::nullopt;
2452       case  8: RelType = ELF::R_X86_64_PC8; break;
2453       case 16: RelType = ELF::R_X86_64_PC16; break;
2454       case 32: RelType = ELF::R_X86_64_PC32; break;
2455       case 64: RelType = ELF::R_X86_64_PC64; break;
2456       }
2457     } else {
2458       switch (FKI.TargetSize) {
2459       default:
2460         return std::nullopt;
2461       case  8: RelType = ELF::R_X86_64_8; break;
2462       case 16: RelType = ELF::R_X86_64_16; break;
2463       case 32: RelType = ELF::R_X86_64_32; break;
2464       case 64: RelType = ELF::R_X86_64_64; break;
2465       }
2466     }
2467 
2468     auto [RelSymbol, RelAddend] = extractFixupExpr(Fixup);
2469 
2470     return Relocation({RelOffset, RelSymbol, RelType, RelAddend, 0});
2471   }
2472 
2473   bool replaceImmWithSymbolRef(MCInst &Inst, const MCSymbol *Symbol,
2474                                int64_t Addend, MCContext *Ctx, int64_t &Value,
2475                                uint64_t RelType) const override {
2476     unsigned ImmOpNo = -1U;
2477 
2478     for (unsigned Index = 0; Index < MCPlus::getNumPrimeOperands(Inst);
2479          ++Index) {
2480       if (Inst.getOperand(Index).isImm()) {
2481         ImmOpNo = Index;
2482         // TODO: this is a bit hacky.  It finds the correct operand by
2483         // searching for a specific immediate value.  If no value is
2484         // provided it defaults to the last immediate operand found.
2485         // This could lead to unexpected results if the instruction
2486         // has more than one immediate with the same value.
2487         if (Inst.getOperand(ImmOpNo).getImm() == Value)
2488           break;
2489       }
2490     }
2491 
2492     if (ImmOpNo == -1U)
2493       return false;
2494 
2495     Value = Inst.getOperand(ImmOpNo).getImm();
2496 
2497     setOperandToSymbolRef(Inst, ImmOpNo, Symbol, Addend, Ctx, RelType);
2498 
2499     return true;
2500   }
2501 
2502   bool replaceRegWithImm(MCInst &Inst, unsigned Register,
2503                          int64_t Imm) const override {
2504 
2505     enum CheckSignExt : uint8_t {
2506       NOCHECK = 0,
2507       CHECK8,
2508       CHECK32,
2509     };
2510 
2511     using CheckList = std::vector<std::pair<CheckSignExt, unsigned>>;
2512     struct InstInfo {
2513       // Size in bytes that Inst loads from memory.
2514       uint8_t DataSize;
2515 
2516       // True when the target operand has to be duplicated because the opcode
2517       // expects a LHS operand.
2518       bool HasLHS;
2519 
2520       // List of checks and corresponding opcodes to be used. We try to use the
2521       // smallest possible immediate value when various sizes are available,
2522       // hence we may need to check whether a larger constant fits in a smaller
2523       // immediate.
2524       CheckList Checks;
2525     };
2526 
2527     InstInfo I;
2528 
2529     switch (Inst.getOpcode()) {
2530     default: {
2531       switch (getPushSize(Inst)) {
2532 
2533       case 2: I = {2, false, {{CHECK8, X86::PUSH16i8}, {NOCHECK, X86::PUSH16i}}}; break;
2534       case 4: I = {4, false, {{CHECK8, X86::PUSH32i8}, {NOCHECK, X86::PUSH32i}}}; break;
2535       case 8: I = {8, false, {{CHECK8, X86::PUSH64i8},
2536                               {CHECK32, X86::PUSH64i32},
2537                               {NOCHECK, Inst.getOpcode()}}}; break;
2538       default: return false;
2539       }
2540       break;
2541     }
2542 
2543     // MOV
2544     case X86::MOV8rr:       I = {1, false, {{NOCHECK, X86::MOV8ri}}}; break;
2545     case X86::MOV16rr:      I = {2, false, {{NOCHECK, X86::MOV16ri}}}; break;
2546     case X86::MOV32rr:      I = {4, false, {{NOCHECK, X86::MOV32ri}}}; break;
2547     case X86::MOV64rr:      I = {8, false, {{CHECK32, X86::MOV64ri32},
2548                                             {NOCHECK, X86::MOV64ri}}}; break;
2549 
2550     case X86::MOV8mr:       I = {1, false, {{NOCHECK, X86::MOV8mi}}}; break;
2551     case X86::MOV16mr:      I = {2, false, {{NOCHECK, X86::MOV16mi}}}; break;
2552     case X86::MOV32mr:      I = {4, false, {{NOCHECK, X86::MOV32mi}}}; break;
2553     case X86::MOV64mr:      I = {8, false, {{CHECK32, X86::MOV64mi32},
2554                                             {NOCHECK, X86::MOV64mr}}}; break;
2555 
2556     // MOVZX
2557     case X86::MOVZX16rr8:   I = {1, false, {{NOCHECK, X86::MOV16ri}}}; break;
2558     case X86::MOVZX32rr8:   I = {1, false, {{NOCHECK, X86::MOV32ri}}}; break;
2559     case X86::MOVZX32rr16:  I = {2, false, {{NOCHECK, X86::MOV32ri}}}; break;
2560 
2561     // CMP
2562     case X86::CMP8rr:       I = {1, false, {{NOCHECK, X86::CMP8ri}}}; break;
2563     case X86::CMP16rr:      I = {2, false, {{CHECK8, X86::CMP16ri8},
2564                                             {NOCHECK, X86::CMP16ri}}}; break;
2565     case X86::CMP32rr:      I = {4, false, {{CHECK8, X86::CMP32ri8},
2566                                             {NOCHECK, X86::CMP32ri}}}; break;
2567     case X86::CMP64rr:      I = {8, false, {{CHECK8, X86::CMP64ri8},
2568                                             {CHECK32, X86::CMP64ri32},
2569                                             {NOCHECK, X86::CMP64rr}}}; break;
2570 
2571     // TEST
2572     case X86::TEST8rr:      I = {1, false, {{NOCHECK, X86::TEST8ri}}}; break;
2573     case X86::TEST16rr:     I = {2, false, {{NOCHECK, X86::TEST16ri}}}; break;
2574     case X86::TEST32rr:     I = {4, false, {{NOCHECK, X86::TEST32ri}}}; break;
2575     case X86::TEST64rr:     I = {8, false, {{CHECK32, X86::TEST64ri32},
2576                                             {NOCHECK, X86::TEST64rr}}}; break;
2577 
2578     // ADD
2579     case X86::ADD8rr:       I = {1, true, {{NOCHECK, X86::ADD8ri}}}; break;
2580     case X86::ADD16rr:      I = {2, true, {{CHECK8, X86::ADD16ri8},
2581                                            {NOCHECK, X86::ADD16ri}}}; break;
2582     case X86::ADD32rr:      I = {4, true, {{CHECK8, X86::ADD32ri8},
2583                                            {NOCHECK, X86::ADD32ri}}}; break;
2584     case X86::ADD64rr:      I = {8, true, {{CHECK8, X86::ADD64ri8},
2585                                            {CHECK32, X86::ADD64ri32},
2586                                            {NOCHECK, X86::ADD64rr}}}; break;
2587 
2588     // SUB
2589     case X86::SUB8rr:       I = {1, true, {{NOCHECK, X86::SUB8ri}}}; break;
2590     case X86::SUB16rr:      I = {2, true, {{CHECK8, X86::SUB16ri8},
2591                                            {NOCHECK, X86::SUB16ri}}}; break;
2592     case X86::SUB32rr:      I = {4, true, {{CHECK8, X86::SUB32ri8},
2593                                            {NOCHECK, X86::SUB32ri}}}; break;
2594     case X86::SUB64rr:      I = {8, true, {{CHECK8, X86::SUB64ri8},
2595                                            {CHECK32, X86::SUB64ri32},
2596                                            {NOCHECK, X86::SUB64rr}}}; break;
2597 
2598     // AND
2599     case X86::AND8rr:       I = {1, true, {{NOCHECK, X86::AND8ri}}}; break;
2600     case X86::AND16rr:      I = {2, true, {{CHECK8, X86::AND16ri8},
2601                                            {NOCHECK, X86::AND16ri}}}; break;
2602     case X86::AND32rr:      I = {4, true, {{CHECK8, X86::AND32ri8},
2603                                            {NOCHECK, X86::AND32ri}}}; break;
2604     case X86::AND64rr:      I = {8, true, {{CHECK8, X86::AND64ri8},
2605                                            {CHECK32, X86::AND64ri32},
2606                                            {NOCHECK, X86::AND64rr}}}; break;
2607 
2608     // OR
2609     case X86::OR8rr:        I = {1, true, {{NOCHECK, X86::OR8ri}}}; break;
2610     case X86::OR16rr:       I = {2, true, {{CHECK8, X86::OR16ri8},
2611                                            {NOCHECK, X86::OR16ri}}}; break;
2612     case X86::OR32rr:       I = {4, true, {{CHECK8, X86::OR32ri8},
2613                                            {NOCHECK, X86::OR32ri}}}; break;
2614     case X86::OR64rr:       I = {8, true, {{CHECK8, X86::OR64ri8},
2615                                            {CHECK32, X86::OR64ri32},
2616                                            {NOCHECK, X86::OR64rr}}}; break;
2617 
2618     // XOR
2619     case X86::XOR8rr:       I = {1, true, {{NOCHECK, X86::XOR8ri}}}; break;
2620     case X86::XOR16rr:      I = {2, true, {{CHECK8, X86::XOR16ri8},
2621                                            {NOCHECK, X86::XOR16ri}}}; break;
2622     case X86::XOR32rr:      I = {4, true, {{CHECK8, X86::XOR32ri8},
2623                                            {NOCHECK, X86::XOR32ri}}}; break;
2624     case X86::XOR64rr:      I = {8, true, {{CHECK8, X86::XOR64ri8},
2625                                            {CHECK32, X86::XOR64ri32},
2626                                            {NOCHECK, X86::XOR64rr}}}; break;
2627     }
2628 
2629     // Compute the new opcode.
2630     unsigned NewOpcode = 0;
2631     for (const std::pair<CheckSignExt, unsigned> &Check : I.Checks) {
2632       NewOpcode = Check.second;
2633       if (Check.first == NOCHECK)
2634         break;
2635       if (Check.first == CHECK8 && isInt<8>(Imm))
2636         break;
2637       if (Check.first == CHECK32 && isInt<32>(Imm))
2638         break;
2639     }
2640     if (NewOpcode == Inst.getOpcode())
2641       return false;
2642 
2643     const MCInstrDesc &InstDesc = Info->get(Inst.getOpcode());
2644 
2645     unsigned NumFound = 0;
2646     for (unsigned Index = InstDesc.getNumDefs() + (I.HasLHS ? 1 : 0),
2647                   E = InstDesc.getNumOperands();
2648          Index != E; ++Index)
2649       if (Inst.getOperand(Index).isReg() &&
2650           Inst.getOperand(Index).getReg() == Register)
2651         NumFound++;
2652 
2653     if (NumFound != 1)
2654       return false;
2655 
2656     MCOperand TargetOp = Inst.getOperand(0);
2657     Inst.clear();
2658     Inst.setOpcode(NewOpcode);
2659     Inst.addOperand(TargetOp);
2660     if (I.HasLHS)
2661       Inst.addOperand(TargetOp);
2662     Inst.addOperand(MCOperand::createImm(Imm));
2663 
2664     return true;
2665   }
2666 
2667   bool replaceRegWithReg(MCInst &Inst, unsigned ToReplace,
2668                          unsigned ReplaceWith) const override {
2669 
2670     // Get the HasLHS value so that iteration can be done
2671     bool HasLHS;
2672     if (X86::isAND(Inst.getOpcode()) || X86::isADD(Inst.getOpcode()) ||
2673         X86::isSUB(Inst.getOpcode())) {
2674       HasLHS = true;
2675     } else if (isPop(Inst) || isPush(Inst) || X86::isCMP(Inst.getOpcode()) ||
2676                X86::isTEST(Inst.getOpcode())) {
2677       HasLHS = false;
2678     } else {
2679       switch (Inst.getOpcode()) {
2680       case X86::MOV8rr:
2681       case X86::MOV8rm:
2682       case X86::MOV8mr:
2683       case X86::MOV8ri:
2684       case X86::MOV16rr:
2685       case X86::MOV16rm:
2686       case X86::MOV16mr:
2687       case X86::MOV16ri:
2688       case X86::MOV32rr:
2689       case X86::MOV32rm:
2690       case X86::MOV32mr:
2691       case X86::MOV32ri:
2692       case X86::MOV64rr:
2693       case X86::MOV64rm:
2694       case X86::MOV64mr:
2695       case X86::MOV64ri:
2696       case X86::MOVZX16rr8:
2697       case X86::MOVZX32rr8:
2698       case X86::MOVZX32rr16:
2699       case X86::MOVSX32rm8:
2700       case X86::MOVSX32rr8:
2701       case X86::MOVSX64rm32:
2702       case X86::LEA64r:
2703         HasLHS = false;
2704         break;
2705       default:
2706         return false;
2707       }
2708     }
2709 
2710     const MCInstrDesc &InstDesc = Info->get(Inst.getOpcode());
2711 
2712     bool FoundOne = false;
2713 
2714     // Iterate only through src operands that arent also dest operands
2715     for (unsigned Index = InstDesc.getNumDefs() + (HasLHS ? 1 : 0),
2716                   E = InstDesc.getNumOperands();
2717          Index != E; ++Index) {
2718       BitVector RegAliases = getAliases(ToReplace, true);
2719       if (!Inst.getOperand(Index).isReg() ||
2720           !RegAliases.test(Inst.getOperand(Index).getReg()))
2721         continue;
2722       // Resize register if needed
2723       unsigned SizedReplaceWith = getAliasSized(
2724           ReplaceWith, getRegSize(Inst.getOperand(Index).getReg()));
2725       MCOperand NewOperand = MCOperand::createReg(SizedReplaceWith);
2726       Inst.getOperand(Index) = NewOperand;
2727       FoundOne = true;
2728     }
2729 
2730     // Return true if at least one operand was replaced
2731     return FoundOne;
2732   }
2733 
2734   void createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
2735                           MCContext *Ctx) const override {
2736     Inst.clear();
2737     Inst.setOpcode(X86::JMP_1);
2738     Inst.clear();
2739     Inst.addOperand(MCOperand::createExpr(
2740         MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx)));
2741   }
2742 
2743   void createLongUncondBranch(MCInst &Inst, const MCSymbol *Target,
2744                               MCContext *Ctx) const override {
2745     Inst.setOpcode(X86::JMP_4);
2746     Inst.clear();
2747     Inst.addOperand(MCOperand::createExpr(
2748         MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
2749   }
2750 
2751   void createCall(MCInst &Inst, const MCSymbol *Target,
2752                   MCContext *Ctx) override {
2753     Inst.setOpcode(X86::CALL64pcrel32);
2754     Inst.clear();
2755     Inst.addOperand(MCOperand::createExpr(
2756         MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
2757   }
2758 
2759   void createTailCall(MCInst &Inst, const MCSymbol *Target,
2760                       MCContext *Ctx) override {
2761     return createDirectCall(Inst, Target, Ctx, /*IsTailCall*/ true);
2762   }
2763 
2764   void createLongTailCall(InstructionListType &Seq, const MCSymbol *Target,
2765                           MCContext *Ctx) override {
2766     Seq.clear();
2767     Seq.emplace_back();
2768     createDirectCall(Seq.back(), Target, Ctx, /*IsTailCall*/ true);
2769   }
2770 
2771   void createTrap(MCInst &Inst) const override {
2772     Inst.clear();
2773     Inst.setOpcode(X86::TRAP);
2774   }
2775 
2776   void createCondBranch(MCInst &Inst, const MCSymbol *Target, unsigned CC,
2777                         MCContext *Ctx) const override {
2778     Inst.setOpcode(X86::JCC_1);
2779     Inst.clear();
2780     Inst.addOperand(MCOperand::createExpr(
2781         MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
2782     Inst.addOperand(MCOperand::createImm(CC));
2783   }
2784 
2785   void createLongCondBranch(MCInst &Inst, const MCSymbol *Target, unsigned CC,
2786                             MCContext *Ctx) const override {
2787     Inst.setOpcode(X86::JCC_4);
2788     Inst.clear();
2789     Inst.addOperand(MCOperand::createExpr(
2790         MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
2791     Inst.addOperand(MCOperand::createImm(CC));
2792   }
2793 
2794   void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
2795                               MCContext *Ctx) const override {
2796     unsigned InvCC = getInvertedCondCode(getCondCode(Inst));
2797     assert(InvCC != X86::COND_INVALID && "invalid branch instruction");
2798     Inst.getOperand(Info->get(Inst.getOpcode()).NumOperands - 1).setImm(InvCC);
2799     Inst.getOperand(0) = MCOperand::createExpr(
2800         MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx));
2801   }
2802 
2803   bool replaceBranchCondition(MCInst &Inst, const MCSymbol *TBB, MCContext *Ctx,
2804                               unsigned CC) const override {
2805     if (CC == X86::COND_INVALID)
2806       return false;
2807     Inst.getOperand(Info->get(Inst.getOpcode()).NumOperands - 1).setImm(CC);
2808     Inst.getOperand(0) = MCOperand::createExpr(
2809         MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx));
2810     return true;
2811   }
2812 
2813   unsigned getCanonicalBranchCondCode(unsigned CC) const override {
2814     switch (CC) {
2815     default:           return X86::COND_INVALID;
2816 
2817     case X86::COND_E:  return X86::COND_E;
2818     case X86::COND_NE: return X86::COND_E;
2819 
2820     case X86::COND_L:  return X86::COND_L;
2821     case X86::COND_GE: return X86::COND_L;
2822 
2823     case X86::COND_LE: return X86::COND_G;
2824     case X86::COND_G:  return X86::COND_G;
2825 
2826     case X86::COND_B:  return X86::COND_B;
2827     case X86::COND_AE: return X86::COND_B;
2828 
2829     case X86::COND_BE: return X86::COND_A;
2830     case X86::COND_A:  return X86::COND_A;
2831 
2832     case X86::COND_S:  return X86::COND_S;
2833     case X86::COND_NS: return X86::COND_S;
2834 
2835     case X86::COND_P:  return X86::COND_P;
2836     case X86::COND_NP: return X86::COND_P;
2837 
2838     case X86::COND_O:  return X86::COND_O;
2839     case X86::COND_NO: return X86::COND_O;
2840     }
2841   }
2842 
2843   void replaceBranchTarget(MCInst &Inst, const MCSymbol *TBB,
2844                            MCContext *Ctx) const override {
2845     assert((isCall(Inst) || isBranch(Inst)) && !isIndirectBranch(Inst) &&
2846            "Invalid instruction");
2847     Inst.getOperand(0) = MCOperand::createExpr(
2848         MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx));
2849   }
2850 
2851   MCPhysReg getX86R11() const override { return X86::R11; }
2852 
2853   unsigned getShortBranchOpcode(unsigned Opcode) const override {
2854     switch (Opcode) {
2855     default:
2856       return Opcode;
2857     case X86::JMP_2:
2858       return X86::JMP_1;
2859     case X86::JMP_4:
2860       return X86::JMP_1;
2861     case X86::JCC_2:
2862       return X86::JCC_1;
2863     case X86::JCC_4:
2864       return X86::JCC_1;
2865     }
2866   }
2867 
2868   MCPhysReg getIntArgRegister(unsigned ArgNo) const override {
2869     // FIXME: this should depend on the calling convention.
2870     switch (ArgNo) {
2871     case 0:   return X86::RDI;
2872     case 1:   return X86::RSI;
2873     case 2:   return X86::RDX;
2874     case 3:   return X86::RCX;
2875     case 4:   return X86::R8;
2876     case 5:   return X86::R9;
2877     default:  return getNoRegister();
2878     }
2879   }
2880 
2881   void createPause(MCInst &Inst) const override {
2882     Inst.clear();
2883     Inst.setOpcode(X86::PAUSE);
2884   }
2885 
2886   void createLfence(MCInst &Inst) const override {
2887     Inst.clear();
2888     Inst.setOpcode(X86::LFENCE);
2889   }
2890 
2891   void createDirectCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx,
2892                         bool IsTailCall) override {
2893     Inst.clear();
2894     Inst.setOpcode(IsTailCall ? X86::JMP_4 : X86::CALL64pcrel32);
2895     Inst.addOperand(MCOperand::createExpr(
2896         MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
2897     if (IsTailCall)
2898       setTailCall(Inst);
2899   }
2900 
2901   void createShortJmp(InstructionListType &Seq, const MCSymbol *Target,
2902                       MCContext *Ctx, bool IsTailCall) override {
2903     Seq.clear();
2904     MCInst Inst;
2905     Inst.setOpcode(X86::JMP_1);
2906     Inst.addOperand(MCOperand::createExpr(
2907         MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
2908     if (IsTailCall)
2909       setTailCall(Inst);
2910     Seq.emplace_back(Inst);
2911   }
2912 
2913   bool isConditionalMove(const MCInst &Inst) const override {
2914     unsigned OpCode = Inst.getOpcode();
2915     return (OpCode == X86::CMOV16rr || OpCode == X86::CMOV32rr ||
2916             OpCode == X86::CMOV64rr);
2917   }
2918 
2919   bool isBranchOnMem(const MCInst &Inst) const override {
2920     unsigned OpCode = Inst.getOpcode();
2921     if (OpCode == X86::CALL64m || (OpCode == X86::JMP32m && isTailCall(Inst)) ||
2922         OpCode == X86::JMP64m)
2923       return true;
2924 
2925     return false;
2926   }
2927 
2928   bool isBranchOnReg(const MCInst &Inst) const override {
2929     unsigned OpCode = Inst.getOpcode();
2930     if (OpCode == X86::CALL64r || (OpCode == X86::JMP32r && isTailCall(Inst)) ||
2931         OpCode == X86::JMP64r)
2932       return true;
2933 
2934     return false;
2935   }
2936 
2937   void createPushRegister(MCInst &Inst, MCPhysReg Reg,
2938                           unsigned Size) const override {
2939     Inst.clear();
2940     unsigned NewOpcode = 0;
2941     if (Reg == X86::EFLAGS) {
2942       switch (Size) {
2943       case 2: NewOpcode = X86::PUSHF16;  break;
2944       case 4: NewOpcode = X86::PUSHF32;  break;
2945       case 8: NewOpcode = X86::PUSHF64;  break;
2946       default:
2947         llvm_unreachable("Unexpected size");
2948       }
2949       Inst.setOpcode(NewOpcode);
2950       return;
2951     }
2952     switch (Size) {
2953     case 2: NewOpcode = X86::PUSH16r;  break;
2954     case 4: NewOpcode = X86::PUSH32r;  break;
2955     case 8: NewOpcode = X86::PUSH64r;  break;
2956     default:
2957       llvm_unreachable("Unexpected size");
2958     }
2959     Inst.setOpcode(NewOpcode);
2960     Inst.addOperand(MCOperand::createReg(Reg));
2961   }
2962 
2963   void createPopRegister(MCInst &Inst, MCPhysReg Reg,
2964                          unsigned Size) const override {
2965     Inst.clear();
2966     unsigned NewOpcode = 0;
2967     if (Reg == X86::EFLAGS) {
2968       switch (Size) {
2969       case 2: NewOpcode = X86::POPF16;  break;
2970       case 4: NewOpcode = X86::POPF32;  break;
2971       case 8: NewOpcode = X86::POPF64;  break;
2972       default:
2973         llvm_unreachable("Unexpected size");
2974       }
2975       Inst.setOpcode(NewOpcode);
2976       return;
2977     }
2978     switch (Size) {
2979     case 2: NewOpcode = X86::POP16r;  break;
2980     case 4: NewOpcode = X86::POP32r;  break;
2981     case 8: NewOpcode = X86::POP64r;  break;
2982     default:
2983       llvm_unreachable("Unexpected size");
2984     }
2985     Inst.setOpcode(NewOpcode);
2986     Inst.addOperand(MCOperand::createReg(Reg));
2987   }
2988 
2989   void createPushFlags(MCInst &Inst, unsigned Size) const override {
2990     return createPushRegister(Inst, X86::EFLAGS, Size);
2991   }
2992 
2993   void createPopFlags(MCInst &Inst, unsigned Size) const override {
2994     return createPopRegister(Inst, X86::EFLAGS, Size);
2995   }
2996 
2997   void createAddRegImm(MCInst &Inst, MCPhysReg Reg, int64_t Value,
2998                        unsigned Size) const {
2999     unsigned int Opcode;
3000     switch (Size) {
3001     case 1: Opcode = X86::ADD8ri; break;
3002     case 2: Opcode = X86::ADD16ri; break;
3003     case 4: Opcode = X86::ADD32ri; break;
3004     default:
3005       llvm_unreachable("Unexpected size");
3006     }
3007     Inst.setOpcode(Opcode);
3008     Inst.clear();
3009     Inst.addOperand(MCOperand::createReg(Reg));
3010     Inst.addOperand(MCOperand::createReg(Reg));
3011     Inst.addOperand(MCOperand::createImm(Value));
3012   }
3013 
3014   void createClearRegWithNoEFlagsUpdate(MCInst &Inst, MCPhysReg Reg,
3015                                         unsigned Size) const {
3016     unsigned int Opcode;
3017     switch (Size) {
3018     case 1: Opcode = X86::MOV8ri; break;
3019     case 2: Opcode = X86::MOV16ri; break;
3020     case 4: Opcode = X86::MOV32ri; break;
3021     // Writing to a 32-bit register always zeros the upper 32 bits of the
3022     // full-width register
3023     case 8:
3024       Opcode = X86::MOV32ri;
3025       Reg = getAliasSized(Reg, 4);
3026       break;
3027     default:
3028       llvm_unreachable("Unexpected size");
3029     }
3030     Inst.setOpcode(Opcode);
3031     Inst.clear();
3032     Inst.addOperand(MCOperand::createReg(Reg));
3033     Inst.addOperand(MCOperand::createImm(0));
3034   }
3035 
3036   void createX86SaveOVFlagToRegister(MCInst &Inst, MCPhysReg Reg) const {
3037     Inst.setOpcode(X86::SETCCr);
3038     Inst.clear();
3039     Inst.addOperand(MCOperand::createReg(Reg));
3040     Inst.addOperand(MCOperand::createImm(X86::COND_O));
3041   }
3042 
3043   void createX86Lahf(MCInst &Inst) const {
3044     Inst.setOpcode(X86::LAHF);
3045     Inst.clear();
3046   }
3047 
3048   void createX86Sahf(MCInst &Inst) const {
3049     Inst.setOpcode(X86::SAHF);
3050     Inst.clear();
3051   }
3052 
3053   InstructionListType
3054   createInstrIncMemory(const MCSymbol *Target, MCContext *Ctx, bool IsLeaf,
3055                        unsigned CodePointerSize) const override {
3056     InstructionListType Instrs(IsLeaf ? 13 : 11);
3057     unsigned int I = 0;
3058 
3059     // Don't clobber application red zone (ABI dependent)
3060     if (IsLeaf)
3061       createStackPointerIncrement(Instrs[I++], 128,
3062                                   /*NoFlagsClobber=*/true);
3063 
3064     // Performance improvements based on the optimization discussed at
3065     // https://reviews.llvm.org/D6629
3066     // LAHF/SAHF are used instead of PUSHF/POPF
3067     // PUSHF
3068     createPushRegister(Instrs[I++], X86::RAX, 8);
3069     createClearRegWithNoEFlagsUpdate(Instrs[I++], X86::RAX, 8);
3070     createX86Lahf(Instrs[I++]);
3071     createPushRegister(Instrs[I++], X86::RAX, 8);
3072     createClearRegWithNoEFlagsUpdate(Instrs[I++], X86::RAX, 8);
3073     createX86SaveOVFlagToRegister(Instrs[I++], X86::AL);
3074     // LOCK INC
3075     InstructionListType IncMem = createIncMemory(Target, Ctx);
3076     assert(IncMem.size() == 1 && "Invalid IncMem size");
3077     std::copy(IncMem.begin(), IncMem.end(), Instrs.begin() + I);
3078     I += IncMem.size();
3079     // POPF
3080     createAddRegImm(Instrs[I++], X86::AL, 127, 1);
3081     createPopRegister(Instrs[I++], X86::RAX, 8);
3082     createX86Sahf(Instrs[I++]);
3083     createPopRegister(Instrs[I++], X86::RAX, 8);
3084 
3085     if (IsLeaf)
3086       createStackPointerDecrement(Instrs[I], 128,
3087                                   /*NoFlagsClobber=*/true);
3088     return Instrs;
3089   }
3090 
3091   void createSwap(MCInst &Inst, MCPhysReg Source, MCPhysReg MemBaseReg,
3092                   int64_t Disp) const {
3093     Inst.setOpcode(X86::XCHG64rm);
3094     Inst.clear();
3095     Inst.addOperand(MCOperand::createReg(Source));
3096     Inst.addOperand(MCOperand::createReg(Source));
3097     Inst.addOperand(MCOperand::createReg(MemBaseReg));      // BaseReg
3098     Inst.addOperand(MCOperand::createImm(1));               // ScaleAmt
3099     Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg
3100     Inst.addOperand(MCOperand::createImm(Disp));            // Displacement
3101     Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
3102   }
3103 
3104   void createIndirectBranch(MCInst &Inst, MCPhysReg MemBaseReg,
3105                             int64_t Disp) const {
3106     Inst.setOpcode(X86::JMP64m);
3107     Inst.clear();
3108     Inst.addOperand(MCOperand::createReg(MemBaseReg));      // BaseReg
3109     Inst.addOperand(MCOperand::createImm(1));               // ScaleAmt
3110     Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg
3111     Inst.addOperand(MCOperand::createImm(Disp));            // Displacement
3112     Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
3113   }
3114 
3115   InstructionListType createInstrumentedIndirectCall(MCInst &&CallInst,
3116                                                      MCSymbol *HandlerFuncAddr,
3117                                                      int CallSiteID,
3118                                                      MCContext *Ctx) override {
3119     // Check if the target address expression used in the original indirect call
3120     // uses the stack pointer, which we are going to clobber.
3121     static BitVector SPAliases(getAliases(X86::RSP));
3122     bool UsesSP = any_of(useOperands(CallInst), [&](const MCOperand &Op) {
3123       return Op.isReg() && SPAliases[Op.getReg()];
3124     });
3125 
3126     InstructionListType Insts;
3127     MCPhysReg TempReg = getIntArgRegister(0);
3128     // Code sequence used to enter indirect call instrumentation helper:
3129     //   push %rdi
3130     //   add $8, %rsp       ;; $rsp may be used in target, so fix it to prev val
3131     //   movq target, %rdi  ;; via convertIndirectCallTargetToLoad
3132     //   sub $8, %rsp       ;; restore correct stack value
3133     //   push %rdi
3134     //   movq $CallSiteID, %rdi
3135     //   push %rdi
3136     //   callq/jmp HandlerFuncAddr
3137     Insts.emplace_back();
3138     createPushRegister(Insts.back(), TempReg, 8);
3139     if (UsesSP) { // Only adjust SP if we really need to
3140       Insts.emplace_back();
3141       createStackPointerDecrement(Insts.back(), 8, /*NoFlagsClobber=*/false);
3142     }
3143     Insts.emplace_back(CallInst);
3144     // Insts.back() and CallInst now share the same annotation instruction.
3145     // Strip it from Insts.back(), only preserving tail call annotation.
3146     stripAnnotations(Insts.back(), /*KeepTC=*/true);
3147     convertIndirectCallToLoad(Insts.back(), TempReg);
3148     if (UsesSP) {
3149       Insts.emplace_back();
3150       createStackPointerIncrement(Insts.back(), 8, /*NoFlagsClobber=*/false);
3151     }
3152     Insts.emplace_back();
3153     createPushRegister(Insts.back(), TempReg, 8);
3154     InstructionListType LoadImm = createLoadImmediate(TempReg, CallSiteID);
3155     Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end());
3156     Insts.emplace_back();
3157     createPushRegister(Insts.back(), TempReg, 8);
3158 
3159     MCInst &NewCallInst = Insts.emplace_back();
3160     createDirectCall(NewCallInst, HandlerFuncAddr, Ctx, isTailCall(CallInst));
3161 
3162     // Carry over metadata including tail call marker if present.
3163     stripAnnotations(NewCallInst);
3164     moveAnnotations(std::move(CallInst), NewCallInst);
3165 
3166     return Insts;
3167   }
3168 
3169   InstructionListType createInstrumentedIndCallHandlerExitBB() const override {
3170     const MCPhysReg TempReg = getIntArgRegister(0);
3171     // We just need to undo the sequence created for every ind call in
3172     // instrumentIndirectTarget(), which can be accomplished minimally with:
3173     //   popfq
3174     //   pop %rdi
3175     //   add $16, %rsp
3176     //   xchg (%rsp), %rdi
3177     //   jmp *-8(%rsp)
3178     InstructionListType Insts(5);
3179     createPopFlags(Insts[0], 8);
3180     createPopRegister(Insts[1], TempReg, 8);
3181     createStackPointerDecrement(Insts[2], 16, /*NoFlagsClobber=*/false);
3182     createSwap(Insts[3], TempReg, X86::RSP, 0);
3183     createIndirectBranch(Insts[4], X86::RSP, -8);
3184     return Insts;
3185   }
3186 
3187   InstructionListType
3188   createInstrumentedIndTailCallHandlerExitBB() const override {
3189     const MCPhysReg TempReg = getIntArgRegister(0);
3190     // Same thing as above, but for tail calls
3191     //   popfq
3192     //   add $16, %rsp
3193     //   pop %rdi
3194     //   jmp *-16(%rsp)
3195     InstructionListType Insts(4);
3196     createPopFlags(Insts[0], 8);
3197     createStackPointerDecrement(Insts[1], 16, /*NoFlagsClobber=*/false);
3198     createPopRegister(Insts[2], TempReg, 8);
3199     createIndirectBranch(Insts[3], X86::RSP, -16);
3200     return Insts;
3201   }
3202 
3203   InstructionListType
3204   createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline,
3205                                           const MCSymbol *IndCallHandler,
3206                                           MCContext *Ctx) override {
3207     const MCPhysReg TempReg = getIntArgRegister(0);
3208     // Code sequence used to check whether InstrTampoline was initialized
3209     // and call it if so, returns via IndCallHandler.
3210     //   pushfq
3211     //   mov    InstrTrampoline,%rdi
3212     //   cmp    $0x0,%rdi
3213     //   je     IndCallHandler
3214     //   callq  *%rdi
3215     //   jmpq   IndCallHandler
3216     InstructionListType Insts;
3217     Insts.emplace_back();
3218     createPushFlags(Insts.back(), 8);
3219     Insts.emplace_back();
3220     createMove(Insts.back(), InstrTrampoline, TempReg, Ctx);
3221     InstructionListType cmpJmp = createCmpJE(TempReg, 0, IndCallHandler, Ctx);
3222     Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end());
3223     Insts.emplace_back();
3224     Insts.back().setOpcode(X86::CALL64r);
3225     Insts.back().addOperand(MCOperand::createReg(TempReg));
3226     Insts.emplace_back();
3227     createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true);
3228     return Insts;
3229   }
3230 
3231   InstructionListType createNumCountersGetter(MCContext *Ctx) const override {
3232     InstructionListType Insts(2);
3233     MCSymbol *NumLocs = Ctx->getOrCreateSymbol("__bolt_num_counters");
3234     createMove(Insts[0], NumLocs, X86::EAX, Ctx);
3235     createReturn(Insts[1]);
3236     return Insts;
3237   }
3238 
3239   InstructionListType
3240   createInstrLocationsGetter(MCContext *Ctx) const override {
3241     InstructionListType Insts(2);
3242     MCSymbol *Locs = Ctx->getOrCreateSymbol("__bolt_instr_locations");
3243     createLea(Insts[0], Locs, X86::EAX, Ctx);
3244     createReturn(Insts[1]);
3245     return Insts;
3246   }
3247 
3248   InstructionListType createInstrTablesGetter(MCContext *Ctx) const override {
3249     InstructionListType Insts(2);
3250     MCSymbol *Locs = Ctx->getOrCreateSymbol("__bolt_instr_tables");
3251     createLea(Insts[0], Locs, X86::EAX, Ctx);
3252     createReturn(Insts[1]);
3253     return Insts;
3254   }
3255 
3256   InstructionListType createInstrNumFuncsGetter(MCContext *Ctx) const override {
3257     InstructionListType Insts(2);
3258     MCSymbol *NumFuncs = Ctx->getOrCreateSymbol("__bolt_instr_num_funcs");
3259     createMove(Insts[0], NumFuncs, X86::EAX, Ctx);
3260     createReturn(Insts[1]);
3261     return Insts;
3262   }
3263 
3264   InstructionListType createSymbolTrampoline(const MCSymbol *TgtSym,
3265                                              MCContext *Ctx) override {
3266     InstructionListType Insts(1);
3267     createUncondBranch(Insts[0], TgtSym, Ctx);
3268     return Insts;
3269   }
3270 
3271   BlocksVectorTy indirectCallPromotion(
3272       const MCInst &CallInst,
3273       const std::vector<std::pair<MCSymbol *, uint64_t>> &Targets,
3274       const std::vector<std::pair<MCSymbol *, uint64_t>> &VtableSyms,
3275       const std::vector<MCInst *> &MethodFetchInsns,
3276       const bool MinimizeCodeSize, MCContext *Ctx) override {
3277     const bool IsTailCall = isTailCall(CallInst);
3278     const bool IsJumpTable = getJumpTable(CallInst) != 0;
3279     BlocksVectorTy Results;
3280 
3281     // Label for the current code block.
3282     MCSymbol *NextTarget = nullptr;
3283 
3284     // The join block which contains all the instructions following CallInst.
3285     // MergeBlock remains null if CallInst is a tail call.
3286     MCSymbol *MergeBlock = nullptr;
3287 
3288     unsigned FuncAddrReg = X86::R10;
3289 
3290     const bool LoadElim = !VtableSyms.empty();
3291     assert((!LoadElim || VtableSyms.size() == Targets.size()) &&
3292            "There must be a vtable entry for every method "
3293            "in the targets vector.");
3294 
3295     if (MinimizeCodeSize && !LoadElim) {
3296       std::set<unsigned> UsedRegs;
3297 
3298       for (unsigned int I = 0; I < MCPlus::getNumPrimeOperands(CallInst); ++I) {
3299         const MCOperand &Op = CallInst.getOperand(I);
3300         if (Op.isReg())
3301           UsedRegs.insert(Op.getReg());
3302       }
3303 
3304       if (UsedRegs.count(X86::R10) == 0)
3305         FuncAddrReg = X86::R10;
3306       else if (UsedRegs.count(X86::R11) == 0)
3307         FuncAddrReg = X86::R11;
3308       else
3309         return Results;
3310     }
3311 
3312     const auto jumpToMergeBlock = [&](InstructionListType &NewCall) {
3313       assert(MergeBlock);
3314       NewCall.push_back(CallInst);
3315       MCInst &Merge = NewCall.back();
3316       Merge.clear();
3317       createUncondBranch(Merge, MergeBlock, Ctx);
3318     };
3319 
3320     for (unsigned int i = 0; i < Targets.size(); ++i) {
3321       Results.emplace_back(NextTarget, InstructionListType());
3322       InstructionListType *NewCall = &Results.back().second;
3323 
3324       if (MinimizeCodeSize && !LoadElim) {
3325         // Load the call target into FuncAddrReg.
3326         NewCall->push_back(CallInst); // Copy CallInst in order to get SMLoc
3327         MCInst &Target = NewCall->back();
3328         Target.clear();
3329         Target.setOpcode(X86::MOV64ri32);
3330         Target.addOperand(MCOperand::createReg(FuncAddrReg));
3331         if (Targets[i].first) {
3332           // Is this OK?
3333           Target.addOperand(MCOperand::createExpr(MCSymbolRefExpr::create(
3334               Targets[i].first, MCSymbolRefExpr::VK_None, *Ctx)));
3335         } else {
3336           const uint64_t Addr = Targets[i].second;
3337           // Immediate address is out of sign extended 32 bit range.
3338           if (int64_t(Addr) != int64_t(int32_t(Addr)))
3339             return BlocksVectorTy();
3340 
3341           Target.addOperand(MCOperand::createImm(Addr));
3342         }
3343 
3344         // Compare current call target to a specific address.
3345         NewCall->push_back(CallInst);
3346         MCInst &Compare = NewCall->back();
3347         Compare.clear();
3348         if (isBranchOnReg(CallInst))
3349           Compare.setOpcode(X86::CMP64rr);
3350         else if (CallInst.getOpcode() == X86::CALL64pcrel32)
3351           Compare.setOpcode(X86::CMP64ri32);
3352         else
3353           Compare.setOpcode(X86::CMP64rm);
3354 
3355         Compare.addOperand(MCOperand::createReg(FuncAddrReg));
3356 
3357         // TODO: Would be preferable to only load this value once.
3358         for (unsigned i = 0;
3359              i < Info->get(CallInst.getOpcode()).getNumOperands(); ++i)
3360           if (!CallInst.getOperand(i).isInst())
3361             Compare.addOperand(CallInst.getOperand(i));
3362       } else {
3363         // Compare current call target to a specific address.
3364         NewCall->push_back(CallInst);
3365         MCInst &Compare = NewCall->back();
3366         Compare.clear();
3367         if (isBranchOnReg(CallInst))
3368           Compare.setOpcode(X86::CMP64ri32);
3369         else
3370           Compare.setOpcode(X86::CMP64mi32);
3371 
3372         // Original call address.
3373         for (unsigned i = 0;
3374              i < Info->get(CallInst.getOpcode()).getNumOperands(); ++i)
3375           if (!CallInst.getOperand(i).isInst())
3376             Compare.addOperand(CallInst.getOperand(i));
3377 
3378         // Target address.
3379         if (Targets[i].first || LoadElim) {
3380           const MCSymbol *Sym =
3381               LoadElim ? VtableSyms[i].first : Targets[i].first;
3382           const uint64_t Addend = LoadElim ? VtableSyms[i].second : 0;
3383           const MCExpr *Expr = MCSymbolRefExpr::create(Sym, *Ctx);
3384           if (Addend)
3385             Expr = MCBinaryExpr::createAdd(
3386                 Expr, MCConstantExpr::create(Addend, *Ctx), *Ctx);
3387           Compare.addOperand(MCOperand::createExpr(Expr));
3388         } else {
3389           const uint64_t Addr = Targets[i].second;
3390           // Immediate address is out of sign extended 32 bit range.
3391           if (int64_t(Addr) != int64_t(int32_t(Addr)))
3392             return BlocksVectorTy();
3393 
3394           Compare.addOperand(MCOperand::createImm(Addr));
3395         }
3396       }
3397 
3398       // jump to next target compare.
3399       NextTarget =
3400           Ctx->createNamedTempSymbol(); // generate label for the next block
3401       NewCall->push_back(CallInst);
3402 
3403       if (IsJumpTable) {
3404         MCInst &Je = NewCall->back();
3405 
3406         // Jump to next compare if target addresses don't match.
3407         Je.clear();
3408         Je.setOpcode(X86::JCC_1);
3409         if (Targets[i].first)
3410           Je.addOperand(MCOperand::createExpr(MCSymbolRefExpr::create(
3411               Targets[i].first, MCSymbolRefExpr::VK_None, *Ctx)));
3412         else
3413           Je.addOperand(MCOperand::createImm(Targets[i].second));
3414 
3415         Je.addOperand(MCOperand::createImm(X86::COND_E));
3416         assert(!isInvoke(CallInst));
3417       } else {
3418         MCInst &Jne = NewCall->back();
3419 
3420         // Jump to next compare if target addresses don't match.
3421         Jne.clear();
3422         Jne.setOpcode(X86::JCC_1);
3423         Jne.addOperand(MCOperand::createExpr(MCSymbolRefExpr::create(
3424             NextTarget, MCSymbolRefExpr::VK_None, *Ctx)));
3425         Jne.addOperand(MCOperand::createImm(X86::COND_NE));
3426 
3427         // Call specific target directly.
3428         Results.emplace_back(Ctx->createNamedTempSymbol(),
3429                              InstructionListType());
3430         NewCall = &Results.back().second;
3431         NewCall->push_back(CallInst);
3432         MCInst &CallOrJmp = NewCall->back();
3433 
3434         CallOrJmp.clear();
3435 
3436         if (MinimizeCodeSize && !LoadElim) {
3437           CallOrJmp.setOpcode(IsTailCall ? X86::JMP32r : X86::CALL64r);
3438           CallOrJmp.addOperand(MCOperand::createReg(FuncAddrReg));
3439         } else {
3440           CallOrJmp.setOpcode(IsTailCall ? X86::JMP_4 : X86::CALL64pcrel32);
3441 
3442           if (Targets[i].first)
3443             CallOrJmp.addOperand(MCOperand::createExpr(MCSymbolRefExpr::create(
3444                 Targets[i].first, MCSymbolRefExpr::VK_None, *Ctx)));
3445           else
3446             CallOrJmp.addOperand(MCOperand::createImm(Targets[i].second));
3447         }
3448         if (IsTailCall)
3449           setTailCall(CallOrJmp);
3450 
3451         if (CallOrJmp.getOpcode() == X86::CALL64r ||
3452             CallOrJmp.getOpcode() == X86::CALL64pcrel32) {
3453           if (std::optional<uint32_t> Offset = getOffset(CallInst))
3454             // Annotated as duplicated call
3455             setOffset(CallOrJmp, *Offset);
3456         }
3457 
3458         if (isInvoke(CallInst) && !isInvoke(CallOrJmp)) {
3459           // Copy over any EH or GNU args size information from the original
3460           // call.
3461           std::optional<MCPlus::MCLandingPad> EHInfo = getEHInfo(CallInst);
3462           if (EHInfo)
3463             addEHInfo(CallOrJmp, *EHInfo);
3464           int64_t GnuArgsSize = getGnuArgsSize(CallInst);
3465           if (GnuArgsSize >= 0)
3466             addGnuArgsSize(CallOrJmp, GnuArgsSize);
3467         }
3468 
3469         if (!IsTailCall) {
3470           // The fallthrough block for the most common target should be
3471           // the merge block.
3472           if (i == 0) {
3473             // Fallthrough to merge block.
3474             MergeBlock = Ctx->createNamedTempSymbol();
3475           } else {
3476             // Insert jump to the merge block if we are not doing a fallthrough.
3477             jumpToMergeBlock(*NewCall);
3478           }
3479         }
3480       }
3481     }
3482 
3483     // Cold call block.
3484     Results.emplace_back(NextTarget, InstructionListType());
3485     InstructionListType &NewCall = Results.back().second;
3486     for (const MCInst *Inst : MethodFetchInsns)
3487       if (Inst != &CallInst)
3488         NewCall.push_back(*Inst);
3489     NewCall.push_back(CallInst);
3490 
3491     // Jump to merge block from cold call block
3492     if (!IsTailCall && !IsJumpTable) {
3493       jumpToMergeBlock(NewCall);
3494 
3495       // Record merge block
3496       Results.emplace_back(MergeBlock, InstructionListType());
3497     }
3498 
3499     return Results;
3500   }
3501 
3502   BlocksVectorTy jumpTablePromotion(
3503       const MCInst &IJmpInst,
3504       const std::vector<std::pair<MCSymbol *, uint64_t>> &Targets,
3505       const std::vector<MCInst *> &TargetFetchInsns,
3506       MCContext *Ctx) const override {
3507     assert(getJumpTable(IJmpInst) != 0);
3508     uint16_t IndexReg = getAnnotationAs<uint16_t>(IJmpInst, "JTIndexReg");
3509     if (IndexReg == 0)
3510       return BlocksVectorTy();
3511 
3512     BlocksVectorTy Results;
3513 
3514     // Label for the current code block.
3515     MCSymbol *NextTarget = nullptr;
3516 
3517     for (unsigned int i = 0; i < Targets.size(); ++i) {
3518       Results.emplace_back(NextTarget, InstructionListType());
3519       InstructionListType *CurBB = &Results.back().second;
3520 
3521       // Compare current index to a specific index.
3522       CurBB->emplace_back(MCInst());
3523       MCInst &CompareInst = CurBB->back();
3524       CompareInst.setLoc(IJmpInst.getLoc());
3525       CompareInst.setOpcode(X86::CMP64ri32);
3526       CompareInst.addOperand(MCOperand::createReg(IndexReg));
3527 
3528       const uint64_t CaseIdx = Targets[i].second;
3529       // Immediate address is out of sign extended 32 bit range.
3530       if (int64_t(CaseIdx) != int64_t(int32_t(CaseIdx)))
3531         return BlocksVectorTy();
3532 
3533       CompareInst.addOperand(MCOperand::createImm(CaseIdx));
3534       shortenInstruction(CompareInst, *Ctx->getSubtargetInfo());
3535 
3536       // jump to next target compare.
3537       NextTarget =
3538           Ctx->createNamedTempSymbol(); // generate label for the next block
3539       CurBB->push_back(MCInst());
3540 
3541       MCInst &JEInst = CurBB->back();
3542       JEInst.setLoc(IJmpInst.getLoc());
3543 
3544       // Jump to target if indices match
3545       JEInst.setOpcode(X86::JCC_1);
3546       JEInst.addOperand(MCOperand::createExpr(MCSymbolRefExpr::create(
3547           Targets[i].first, MCSymbolRefExpr::VK_None, *Ctx)));
3548       JEInst.addOperand(MCOperand::createImm(X86::COND_E));
3549     }
3550 
3551     // Cold call block.
3552     Results.emplace_back(NextTarget, InstructionListType());
3553     InstructionListType &CurBB = Results.back().second;
3554     for (const MCInst *Inst : TargetFetchInsns)
3555       if (Inst != &IJmpInst)
3556         CurBB.push_back(*Inst);
3557 
3558     CurBB.push_back(IJmpInst);
3559 
3560     return Results;
3561   }
3562 
3563 private:
3564   void createMove(MCInst &Inst, const MCSymbol *Src, unsigned Reg,
3565                   MCContext *Ctx) const {
3566     Inst.setOpcode(X86::MOV64rm);
3567     Inst.clear();
3568     Inst.addOperand(MCOperand::createReg(Reg));
3569     Inst.addOperand(MCOperand::createReg(X86::RIP));        // BaseReg
3570     Inst.addOperand(MCOperand::createImm(1));               // ScaleAmt
3571     Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg
3572     Inst.addOperand(MCOperand::createExpr(
3573         MCSymbolRefExpr::create(Src, MCSymbolRefExpr::VK_None,
3574                                 *Ctx)));                    // Displacement
3575     Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
3576   }
3577 
3578   void createLea(MCInst &Inst, const MCSymbol *Src, unsigned Reg,
3579                  MCContext *Ctx) const {
3580     Inst.setOpcode(X86::LEA64r);
3581     Inst.clear();
3582     Inst.addOperand(MCOperand::createReg(Reg));
3583     Inst.addOperand(MCOperand::createReg(X86::RIP));        // BaseReg
3584     Inst.addOperand(MCOperand::createImm(1));               // ScaleAmt
3585     Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg
3586     Inst.addOperand(MCOperand::createExpr(
3587         MCSymbolRefExpr::create(Src, MCSymbolRefExpr::VK_None,
3588                                 *Ctx)));                    // Displacement
3589     Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
3590   }
3591 };
3592 
3593 } // namespace
3594 
3595 namespace llvm {
3596 namespace bolt {
3597 
3598 MCPlusBuilder *createX86MCPlusBuilder(const MCInstrAnalysis *Analysis,
3599                                       const MCInstrInfo *Info,
3600                                       const MCRegisterInfo *RegInfo,
3601                                       const MCSubtargetInfo *STI) {
3602   return new X86MCPlusBuilder(Analysis, Info, RegInfo, STI);
3603 }
3604 
3605 } // namespace bolt
3606 } // namespace llvm
3607