xref: /freebsd-src/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp (revision 647cbc5de815c5651677bf8582797f716ec7b48d)
1 //===- utils/TableGen/X86FoldTablesEmitter.cpp - X86 backend-*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This tablegen backend is responsible for emitting the memory fold tables of
10 // the X86 backend instructions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeGenInstruction.h"
15 #include "CodeGenTarget.h"
16 #include "X86RecognizableInstr.h"
17 #include "llvm/Support/FormattedStream.h"
18 #include "llvm/Support/X86FoldTablesUtils.h"
19 #include "llvm/TableGen/Record.h"
20 #include "llvm/TableGen/TableGenBackend.h"
21 #include <set>
22 
23 using namespace llvm;
24 using namespace X86Disassembler;
25 
26 namespace {
27 // Represents an entry in the manual mapped instructions set.
28 struct ManualMapEntry {
29   const char *RegInstStr;
30   const char *MemInstStr;
31   uint16_t Strategy;
32 };
33 
34 // List of instructions requiring explicitly aligned memory.
35 const char *ExplicitAlign[] = {
36     "MOVDQA",    "MOVAPS",     "MOVAPD",     "MOVNTPS",    "MOVNTPD",
37     "MOVNTDQ",   "MOVNTDQA",   "SHA1MSG1",   "SHA1MSG2",   "SHA1NEXTE",
38     "SHA1RNDS4", "SHA256MSG1", "SHA256MSG2", "SHA256RNDS2"};
39 
40 // List of instructions NOT requiring explicit memory alignment.
41 const char *ExplicitUnalign[] = {"MOVDQU",    "MOVUPS",    "MOVUPD",
42                                  "PCMPESTRM", "PCMPESTRI", "PCMPISTRM",
43                                  "PCMPISTRI"};
44 
45 const ManualMapEntry ManualMapSet[] = {
46 #define ENTRY(REG, MEM, FLAGS) {#REG, #MEM, FLAGS},
47 #include "X86ManualFoldTables.def"
48 };
49 
50 const std::set<StringRef> NoFoldSet = {
51 #define NOFOLD(INSN) #INSN,
52 #include "X86ManualFoldTables.def"
53 };
54 
55 static bool isExplicitAlign(const CodeGenInstruction *Inst) {
56   return any_of(ExplicitAlign, [Inst](const char *InstStr) {
57     return Inst->TheDef->getName().contains(InstStr);
58   });
59 }
60 
61 static bool isExplicitUnalign(const CodeGenInstruction *Inst) {
62   return any_of(ExplicitUnalign, [Inst](const char *InstStr) {
63     return Inst->TheDef->getName().contains(InstStr);
64   });
65 }
66 
67 class X86FoldTablesEmitter {
68   RecordKeeper &Records;
69   CodeGenTarget Target;
70 
71   // Represents an entry in the folding table
72   class X86FoldTableEntry {
73     const CodeGenInstruction *RegInst;
74     const CodeGenInstruction *MemInst;
75 
76   public:
77     bool NoReverse = false;
78     bool NoForward = false;
79     bool FoldLoad = false;
80     bool FoldStore = false;
81     enum BcastType {
82       BCAST_NONE,
83       BCAST_D,
84       BCAST_Q,
85       BCAST_SS,
86       BCAST_SD,
87       BCAST_SH,
88     };
89     BcastType BroadcastKind = BCAST_NONE;
90 
91     Align Alignment;
92 
93     X86FoldTableEntry() = default;
94     X86FoldTableEntry(const CodeGenInstruction *RegInst,
95                       const CodeGenInstruction *MemInst)
96         : RegInst(RegInst), MemInst(MemInst) {}
97 
98     void print(formatted_raw_ostream &OS) const {
99       OS.indent(2);
100       OS << "{X86::" << RegInst->TheDef->getName() << ", ";
101       OS << "X86::" << MemInst->TheDef->getName() << ", ";
102 
103       std::string Attrs;
104       if (FoldLoad)
105         Attrs += "TB_FOLDED_LOAD|";
106       if (FoldStore)
107         Attrs += "TB_FOLDED_STORE|";
108       if (NoReverse)
109         Attrs += "TB_NO_REVERSE|";
110       if (NoForward)
111         Attrs += "TB_NO_FORWARD|";
112       if (Alignment != Align(1))
113         Attrs += "TB_ALIGN_" + std::to_string(Alignment.value()) + "|";
114       switch (BroadcastKind) {
115       case BCAST_NONE:
116         break;
117       case BCAST_D:
118         Attrs += "TB_BCAST_D|";
119         break;
120       case BCAST_Q:
121         Attrs += "TB_BCAST_Q|";
122         break;
123       case BCAST_SS:
124         Attrs += "TB_BCAST_SS|";
125         break;
126       case BCAST_SD:
127         Attrs += "TB_BCAST_SD|";
128         break;
129       case BCAST_SH:
130         Attrs += "TB_BCAST_SH|";
131         break;
132       }
133 
134       StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("|");
135       if (SimplifiedAttrs.empty())
136         SimplifiedAttrs = "0";
137 
138       OS << SimplifiedAttrs << "},\n";
139     }
140 
141 #ifndef NDEBUG
142     // Check that Uses and Defs are same after memory fold.
143     void checkCorrectness() const {
144       auto &RegInstRec = *RegInst->TheDef;
145       auto &MemInstRec = *MemInst->TheDef;
146       auto ListOfUsesReg = RegInstRec.getValueAsListOfDefs("Uses");
147       auto ListOfUsesMem = MemInstRec.getValueAsListOfDefs("Uses");
148       auto ListOfDefsReg = RegInstRec.getValueAsListOfDefs("Defs");
149       auto ListOfDefsMem = MemInstRec.getValueAsListOfDefs("Defs");
150       if (ListOfUsesReg != ListOfUsesMem || ListOfDefsReg != ListOfDefsMem)
151         report_fatal_error("Uses/Defs couldn't be changed after folding " +
152                            RegInstRec.getName() + " to " +
153                            MemInstRec.getName());
154     }
155 #endif
156   };
157 
158   // NOTE: We check the fold tables are sorted in X86InstrFoldTables.cpp by the
159   // enum of the instruction, which is computed in
160   // CodeGenTarget::ComputeInstrsByEnum. So we should use the same comparator
161   // here.
162   // FIXME: Could we share the code with CodeGenTarget::ComputeInstrsByEnum?
163   struct CompareInstrsByEnum {
164     bool operator()(const CodeGenInstruction *LHS,
165                     const CodeGenInstruction *RHS) const {
166       assert(LHS && RHS && "LHS and RHS shouldn't be nullptr");
167       const auto &D1 = *LHS->TheDef;
168       const auto &D2 = *RHS->TheDef;
169       return std::make_tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) <
170              std::make_tuple(!D2.getValueAsBit("isPseudo"), D2.getName());
171     }
172   };
173 
174   typedef std::map<const CodeGenInstruction *, X86FoldTableEntry,
175                    CompareInstrsByEnum>
176       FoldTable;
177   // Table2Addr - Holds instructions which their memory form performs
178   //              load+store.
179   //
180   // Table#i - Holds instructions which the their memory form
181   //           performs a load OR a store, and their #i'th operand is folded.
182   //
183   // BroadcastTable#i - Holds instructions which the their memory form performs
184   //                    a broadcast load and their #i'th operand is folded.
185   FoldTable Table2Addr;
186   FoldTable Table0;
187   FoldTable Table1;
188   FoldTable Table2;
189   FoldTable Table3;
190   FoldTable Table4;
191   FoldTable BroadcastTable1;
192   FoldTable BroadcastTable2;
193   FoldTable BroadcastTable3;
194   FoldTable BroadcastTable4;
195 
196 public:
197   X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}
198 
199   // run - Generate the 6 X86 memory fold tables.
200   void run(raw_ostream &OS);
201 
202 private:
203   // Decides to which table to add the entry with the given instructions.
204   // S sets the strategy of adding the TB_NO_REVERSE flag.
205   void updateTables(const CodeGenInstruction *RegInst,
206                     const CodeGenInstruction *MemInst, uint16_t S = 0,
207                     bool IsManual = false, bool IsBroadcast = false);
208 
209   // Generates X86FoldTableEntry with the given instructions and fill it with
210   // the appropriate flags, then adds it to a memory fold table.
211   void addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInst,
212                          const CodeGenInstruction *MemInst, uint16_t S,
213                          unsigned FoldedIdx, bool IsManual);
214   // Generates X86FoldTableEntry with the given instructions and adds it to a
215   // broadcast table.
216   void addBroadcastEntry(FoldTable &Table, const CodeGenInstruction *RegInst,
217                          const CodeGenInstruction *MemInst);
218 
219   // Print the given table as a static const C++ array of type
220   // X86FoldTableEntry.
221   void printTable(const FoldTable &Table, StringRef TableName,
222                   formatted_raw_ostream &OS) {
223     OS << "static const X86FoldTableEntry " << TableName << "[] = {\n";
224 
225     for (auto &E : Table)
226       E.second.print(OS);
227 
228     OS << "};\n\n";
229   }
230 };
231 
232 // Return true if one of the instruction's operands is a RST register class
233 static bool hasRSTRegClass(const CodeGenInstruction *Inst) {
234   return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) {
235     return OpIn.Rec->getName() == "RST" || OpIn.Rec->getName() == "RSTi";
236   });
237 }
238 
239 // Return true if one of the instruction's operands is a ptr_rc_tailcall
240 static bool hasPtrTailcallRegClass(const CodeGenInstruction *Inst) {
241   return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) {
242     return OpIn.Rec->getName() == "ptr_rc_tailcall";
243   });
244 }
245 
246 static uint8_t byteFromBitsInit(const BitsInit *B) {
247   unsigned N = B->getNumBits();
248   assert(N <= 8 && "Field is too large for uint8_t!");
249 
250   uint8_t Value = 0;
251   for (unsigned I = 0; I != N; ++I) {
252     BitInit *Bit = cast<BitInit>(B->getBit(I));
253     Value |= Bit->getValue() << I;
254   }
255   return Value;
256 }
257 
258 static bool mayFoldFromForm(uint8_t Form) {
259   switch (Form) {
260   default:
261     return Form >= X86Local::MRM0r && Form <= X86Local::MRM7r;
262   case X86Local::MRMXr:
263   case X86Local::MRMXrCC:
264   case X86Local::MRMDestReg:
265   case X86Local::MRMSrcReg:
266   case X86Local::MRMSrcReg4VOp3:
267   case X86Local::MRMSrcRegOp4:
268   case X86Local::MRMSrcRegCC:
269     return true;
270   }
271 }
272 
273 static bool mayFoldToForm(uint8_t Form) {
274   switch (Form) {
275   default:
276     return Form >= X86Local::MRM0m && Form <= X86Local::MRM7m;
277   case X86Local::MRMXm:
278   case X86Local::MRMXmCC:
279   case X86Local::MRMDestMem:
280   case X86Local::MRMSrcMem:
281   case X86Local::MRMSrcMem4VOp3:
282   case X86Local::MRMSrcMemOp4:
283   case X86Local::MRMSrcMemCC:
284     return true;
285   }
286 }
287 
288 static bool mayFoldFromLeftToRight(uint8_t LHS, uint8_t RHS) {
289   switch (LHS) {
290   default:
291     llvm_unreachable("Unexpected Form!");
292   case X86Local::MRM0r:
293     return RHS == X86Local::MRM0m;
294   case X86Local::MRM1r:
295     return RHS == X86Local::MRM1m;
296   case X86Local::MRM2r:
297     return RHS == X86Local::MRM2m;
298   case X86Local::MRM3r:
299     return RHS == X86Local::MRM3m;
300   case X86Local::MRM4r:
301     return RHS == X86Local::MRM4m;
302   case X86Local::MRM5r:
303     return RHS == X86Local::MRM5m;
304   case X86Local::MRM6r:
305     return RHS == X86Local::MRM6m;
306   case X86Local::MRM7r:
307     return RHS == X86Local::MRM7m;
308   case X86Local::MRMXr:
309     return RHS == X86Local::MRMXm;
310   case X86Local::MRMXrCC:
311     return RHS == X86Local::MRMXmCC;
312   case X86Local::MRMDestReg:
313     return RHS == X86Local::MRMDestMem;
314   case X86Local::MRMSrcReg:
315     return RHS == X86Local::MRMSrcMem;
316   case X86Local::MRMSrcReg4VOp3:
317     return RHS == X86Local::MRMSrcMem4VOp3;
318   case X86Local::MRMSrcRegOp4:
319     return RHS == X86Local::MRMSrcMemOp4;
320   case X86Local::MRMSrcRegCC:
321     return RHS == X86Local::MRMSrcMemCC;
322   }
323 }
324 
325 static bool isNOREXRegClass(const Record *Op) {
326   return Op->getName().contains("_NOREX");
327 }
328 
329 // Function object - Operator() returns true if the given Reg instruction
330 // matches the Mem instruction of this object.
331 class IsMatch {
332   const CodeGenInstruction *MemInst;
333   const X86Disassembler::RecognizableInstrBase MemRI;
334   bool IsBroadcast;
335   const unsigned Variant;
336 
337 public:
338   IsMatch(const CodeGenInstruction *Inst, bool IsBroadcast, unsigned V)
339       : MemInst(Inst), MemRI(*MemInst), IsBroadcast(IsBroadcast), Variant(V) {}
340 
341   bool operator()(const CodeGenInstruction *RegInst) {
342     X86Disassembler::RecognizableInstrBase RegRI(*RegInst);
343     const Record *RegRec = RegInst->TheDef;
344     const Record *MemRec = MemInst->TheDef;
345 
346     // EVEX_B means different things for memory and register forms.
347     // register form: rounding control or SAE
348     // memory form: broadcast
349     if (IsBroadcast && (RegRI.HasEVEX_B || !MemRI.HasEVEX_B))
350       return false;
351     // EVEX_B indicates NDD for MAP4 instructions
352     if (!IsBroadcast && (RegRI.HasEVEX_B || MemRI.HasEVEX_B) &&
353         RegRI.OpMap != X86Local::T_MAP4)
354       return false;
355 
356     if (!mayFoldFromLeftToRight(RegRI.Form, MemRI.Form))
357       return false;
358 
359     // X86 encoding is crazy, e.g
360     //
361     // f3 0f c7 30       vmxon   (%rax)
362     // f3 0f c7 f0       senduipi        %rax
363     //
364     // This two instruction have similiar encoding fields but are unrelated
365     if (X86Disassembler::getMnemonic(MemInst, Variant) !=
366         X86Disassembler::getMnemonic(RegInst, Variant))
367       return false;
368 
369     // Return false if any of the following fields of does not match.
370     if (std::make_tuple(RegRI.Encoding, RegRI.Opcode, RegRI.OpPrefix,
371                         RegRI.OpMap, RegRI.OpSize, RegRI.AdSize, RegRI.HasREX_W,
372                         RegRI.HasVEX_4V, RegRI.HasVEX_L, RegRI.IgnoresVEX_L,
373                         RegRI.IgnoresW, RegRI.HasEVEX_K, RegRI.HasEVEX_KZ,
374                         RegRI.HasEVEX_L2, RegRI.HasEVEX_NF,
375                         RegRec->getValueAsBit("hasEVEX_RC"),
376                         RegRec->getValueAsBit("hasLockPrefix"),
377                         RegRec->getValueAsBit("hasNoTrackPrefix"),
378                         RegRec->getValueAsBit("EVEX_W1_VEX_W0")) !=
379         std::make_tuple(MemRI.Encoding, MemRI.Opcode, MemRI.OpPrefix,
380                         MemRI.OpMap, MemRI.OpSize, MemRI.AdSize, MemRI.HasREX_W,
381                         MemRI.HasVEX_4V, MemRI.HasVEX_L, MemRI.IgnoresVEX_L,
382                         MemRI.IgnoresW, MemRI.HasEVEX_K, MemRI.HasEVEX_KZ,
383                         MemRI.HasEVEX_L2, MemRI.HasEVEX_NF,
384                         MemRec->getValueAsBit("hasEVEX_RC"),
385                         MemRec->getValueAsBit("hasLockPrefix"),
386                         MemRec->getValueAsBit("hasNoTrackPrefix"),
387                         MemRec->getValueAsBit("EVEX_W1_VEX_W0")))
388       return false;
389 
390     // Make sure the sizes of the operands of both instructions suit each other.
391     // This is needed for instructions with intrinsic version (_Int).
392     // Where the only difference is the size of the operands.
393     // For example: VUCOMISDZrm and VUCOMISDrm_Int
394     // Also for instructions that their EVEX version was upgraded to work with
395     // k-registers. For example VPCMPEQBrm (xmm output register) and
396     // VPCMPEQBZ128rm (k register output register).
397     unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs();
398     unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs();
399     unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs();
400     unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs();
401 
402     // Instructions with one output in their memory form use the memory folded
403     // operand as source and destination (Read-Modify-Write).
404     unsigned RegStartIdx =
405         (MemOutSize + 1 == RegOutSize) && (MemInSize == RegInSize) ? 1 : 0;
406 
407     bool FoundFoldedOp = false;
408     for (unsigned I = 0, E = MemInst->Operands.size(); I != E; I++) {
409       Record *MemOpRec = MemInst->Operands[I].Rec;
410       Record *RegOpRec = RegInst->Operands[I + RegStartIdx].Rec;
411 
412       if (MemOpRec == RegOpRec)
413         continue;
414 
415       if (isRegisterOperand(MemOpRec) && isRegisterOperand(RegOpRec) &&
416           ((getRegOperandSize(MemOpRec) != getRegOperandSize(RegOpRec)) ||
417            (isNOREXRegClass(MemOpRec) != isNOREXRegClass(RegOpRec))))
418         return false;
419 
420       if (isMemoryOperand(MemOpRec) && isMemoryOperand(RegOpRec) &&
421           (getMemOperandSize(MemOpRec) != getMemOperandSize(RegOpRec)))
422         return false;
423 
424       if (isImmediateOperand(MemOpRec) && isImmediateOperand(RegOpRec) &&
425           (MemOpRec->getValueAsDef("Type") != RegOpRec->getValueAsDef("Type")))
426         return false;
427 
428       // Only one operand can be folded.
429       if (FoundFoldedOp)
430         return false;
431 
432       assert(isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec));
433       FoundFoldedOp = true;
434     }
435 
436     return FoundFoldedOp;
437   }
438 };
439 
440 } // end anonymous namespace
441 
442 void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table,
443                                              const CodeGenInstruction *RegInst,
444                                              const CodeGenInstruction *MemInst,
445                                              uint16_t S, unsigned FoldedIdx,
446                                              bool IsManual) {
447 
448   assert((IsManual || Table.find(RegInst) == Table.end()) &&
449          "Override entry unexpectedly");
450   X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst);
451   Record *RegRec = RegInst->TheDef;
452   Record *MemRec = MemInst->TheDef;
453 
454   Result.NoReverse = S & TB_NO_REVERSE;
455   Result.NoForward = S & TB_NO_FORWARD;
456   Result.FoldLoad = S & TB_FOLDED_LOAD;
457   Result.FoldStore = S & TB_FOLDED_STORE;
458   Result.Alignment = Align(1ULL << ((S & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT));
459   if (IsManual) {
460     Table[RegInst] = Result;
461     return;
462   }
463 
464   // Only table0 entries should explicitly specify a load or store flag.
465   if (&Table == &Table0) {
466     unsigned MemInOpsNum = MemRec->getValueAsDag("InOperandList")->getNumArgs();
467     unsigned RegInOpsNum = RegRec->getValueAsDag("InOperandList")->getNumArgs();
468     // If the instruction writes to the folded operand, it will appear as an
469     // output in the register form instruction and as an input in the memory
470     // form instruction.
471     // If the instruction reads from the folded operand, it well appear as in
472     // input in both forms.
473     if (MemInOpsNum == RegInOpsNum)
474       Result.FoldLoad = true;
475     else
476       Result.FoldStore = true;
477   }
478 
479   Record *RegOpRec = RegInst->Operands[FoldedIdx].Rec;
480   Record *MemOpRec = MemInst->Operands[FoldedIdx].Rec;
481 
482   // Unfolding code generates a load/store instruction according to the size of
483   // the register in the register form instruction.
484   // If the register's size is greater than the memory's operand size, do not
485   // allow unfolding.
486 
487   // the unfolded load size will be based on the register size. If that’s bigger
488   // than the memory operand size, the unfolded load will load more memory and
489   // potentially cause a memory fault.
490   if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec))
491     Result.NoReverse = true;
492 
493   // Check no-kz version's isMoveReg
494   StringRef RegInstName = RegRec->getName();
495   unsigned DropLen =
496       RegInstName.ends_with("rkz") ? 2 : (RegInstName.ends_with("rk") ? 1 : 0);
497   Record *BaseDef =
498       DropLen ? Records.getDef(RegInstName.drop_back(DropLen)) : nullptr;
499   bool IsMoveReg =
500       BaseDef ? Target.getInstruction(BaseDef).isMoveReg : RegInst->isMoveReg;
501   // A masked load can not be unfolded to a full load, otherwise it would access
502   // unexpected memory. A simple store can not be unfolded.
503   if (IsMoveReg && (BaseDef || Result.FoldStore))
504     Result.NoReverse = true;
505 
506   uint8_t Enc = byteFromBitsInit(RegRec->getValueAsBitsInit("OpEncBits"));
507   if (isExplicitAlign(RegInst)) {
508     // The instruction require explicitly aligned memory.
509     BitsInit *VectSize = RegRec->getValueAsBitsInit("VectSize");
510     Result.Alignment = Align(byteFromBitsInit(VectSize));
511   } else if (!Enc && !isExplicitUnalign(RegInst) &&
512              getMemOperandSize(MemOpRec) > 64) {
513     // Instructions with XOP/VEX/EVEX encoding do not require alignment while
514     // SSE packed vector instructions require a 16 byte alignment.
515     Result.Alignment = Align(16);
516   }
517   // Expand is only ever created as a masked instruction. It is not safe to
518   // unfold a masked expand because we don't know if it came from an expand load
519   // intrinsic or folding a plain load. If it is from a expand load intrinsic,
520   // Unfolding to plain load would read more elements and could trigger a fault.
521   if (RegRec->getName().contains("EXPAND"))
522     Result.NoReverse = true;
523 
524   Table[RegInst] = Result;
525 }
526 
527 void X86FoldTablesEmitter::addBroadcastEntry(
528     FoldTable &Table, const CodeGenInstruction *RegInst,
529     const CodeGenInstruction *MemInst) {
530 
531   assert(Table.find(RegInst) == Table.end() && "Override entry unexpectedly");
532   X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst);
533 
534   Record *RegRec = RegInst->TheDef;
535   StringRef RegInstName = RegRec->getName();
536   StringRef MemInstName = MemInst->TheDef->getName();
537   Record *Domain = RegRec->getValueAsDef("ExeDomain");
538   bool IsSSEPackedInt = Domain->getName() == "SSEPackedInt";
539   if ((RegInstName.contains("DZ") || RegInstName.contains("DWZ") ||
540        RegInstName.contains("Dr") || RegInstName.contains("I32")) &&
541       IsSSEPackedInt) {
542     assert((MemInstName.contains("DZ") || RegInstName.contains("DWZ") ||
543             MemInstName.contains("Dr") || MemInstName.contains("I32")) &&
544            "Unmatched names for broadcast");
545     Result.BroadcastKind = X86FoldTableEntry::BCAST_D;
546   } else if ((RegInstName.contains("QZ") || RegInstName.contains("QBZ") ||
547               RegInstName.contains("Qr") || RegInstName.contains("I64")) &&
548              IsSSEPackedInt) {
549     assert((MemInstName.contains("QZ") || MemInstName.contains("QBZ") ||
550             MemInstName.contains("Qr") || MemInstName.contains("I64")) &&
551            "Unmatched names for broadcast");
552     Result.BroadcastKind = X86FoldTableEntry::BCAST_Q;
553   } else if ((RegInstName.contains("PS") || RegInstName.contains("F32") ||
554               RegInstName.contains("CPH")) &&
555              !RegInstName.contains("PH2PS")) {
556     assert((MemInstName.contains("PS") || MemInstName.contains("F32") ||
557             MemInstName.contains("CPH")) &&
558            "Unmatched names for broadcast");
559     Result.BroadcastKind = X86FoldTableEntry::BCAST_SS;
560   } else if ((RegInstName.contains("PD") || RegInstName.contains("F64")) &&
561              !RegInstName.contains("PH2PD")) {
562     assert((MemInstName.contains("PD") || MemInstName.contains("F64")) &&
563            "Unmatched names for broadcast");
564     Result.BroadcastKind = X86FoldTableEntry::BCAST_SD;
565   } else if (RegInstName.contains("PH")) {
566     assert(MemInstName.contains("PH") && "Unmatched names for broadcast");
567     Result.BroadcastKind = X86FoldTableEntry::BCAST_SH;
568   } else {
569     errs() << RegInstName << ", " << MemInstName << "\n";
570     llvm_unreachable("Name is not canoicalized for broadcast or "
571                      "ExeDomain is incorrect");
572   }
573 
574   Table[RegInst] = Result;
575 }
576 
577 void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInst,
578                                         const CodeGenInstruction *MemInst,
579                                         uint16_t S, bool IsManual,
580                                         bool IsBroadcast) {
581 
582   Record *RegRec = RegInst->TheDef;
583   Record *MemRec = MemInst->TheDef;
584   unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs();
585   unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs();
586   unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs();
587   unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs();
588 
589   // Instructions which Read-Modify-Write should be added to Table2Addr.
590   if (!MemOutSize && RegOutSize == 1 && MemInSize == RegInSize) {
591     assert(!IsBroadcast && "Read-Modify-Write can not be broadcast");
592     // X86 would not unfold Read-Modify-Write instructions so add TB_NO_REVERSE.
593     addEntryWithFlags(Table2Addr, RegInst, MemInst, S | TB_NO_REVERSE, 0,
594                       IsManual);
595     return;
596   }
597 
598   if (MemInSize == RegInSize && MemOutSize == RegOutSize) {
599     // Load-Folding cases.
600     // If the i'th register form operand is a register and the i'th memory form
601     // operand is a memory operand, add instructions to Table#i.
602     for (unsigned I = RegOutSize, E = RegInst->Operands.size(); I < E; I++) {
603       Record *RegOpRec = RegInst->Operands[I].Rec;
604       Record *MemOpRec = MemInst->Operands[I].Rec;
605       // PointerLikeRegClass: For instructions like TAILJMPr, TAILJMPr64,
606       // TAILJMPr64_REX
607       if ((isRegisterOperand(RegOpRec) ||
608            RegOpRec->isSubClassOf("PointerLikeRegClass")) &&
609           isMemoryOperand(MemOpRec)) {
610         switch (I) {
611         case 0:
612           assert(!IsBroadcast && "BroadcastTable0 needs to be added");
613           addEntryWithFlags(Table0, RegInst, MemInst, S, 0, IsManual);
614           return;
615         case 1:
616           IsBroadcast
617               ? addBroadcastEntry(BroadcastTable1, RegInst, MemInst)
618               : addEntryWithFlags(Table1, RegInst, MemInst, S, 1, IsManual);
619           return;
620         case 2:
621           IsBroadcast
622               ? addBroadcastEntry(BroadcastTable2, RegInst, MemInst)
623               : addEntryWithFlags(Table2, RegInst, MemInst, S, 2, IsManual);
624           return;
625         case 3:
626           IsBroadcast
627               ? addBroadcastEntry(BroadcastTable3, RegInst, MemInst)
628               : addEntryWithFlags(Table3, RegInst, MemInst, S, 3, IsManual);
629           return;
630         case 4:
631           IsBroadcast
632               ? addBroadcastEntry(BroadcastTable4, RegInst, MemInst)
633               : addEntryWithFlags(Table4, RegInst, MemInst, S, 4, IsManual);
634           return;
635         }
636       }
637     }
638   } else if (MemInSize == RegInSize + 1 && MemOutSize + 1 == RegOutSize) {
639     // Store-Folding cases.
640     // If the memory form instruction performs a store, the *output*
641     // register of the register form instructions disappear and instead a
642     // memory *input* operand appears in the memory form instruction.
643     // For example:
644     //   MOVAPSrr => (outs VR128:$dst), (ins VR128:$src)
645     //   MOVAPSmr => (outs), (ins f128mem:$dst, VR128:$src)
646     Record *RegOpRec = RegInst->Operands[RegOutSize - 1].Rec;
647     Record *MemOpRec = MemInst->Operands[RegOutSize - 1].Rec;
648     if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec) &&
649         getRegOperandSize(RegOpRec) == getMemOperandSize(MemOpRec)) {
650       assert(!IsBroadcast && "Store can not be broadcast");
651       addEntryWithFlags(Table0, RegInst, MemInst, S, 0, IsManual);
652     }
653   }
654 }
655 
656 void X86FoldTablesEmitter::run(raw_ostream &O) {
657   formatted_raw_ostream OS(O);
658 
659   // Holds all memory instructions
660   std::vector<const CodeGenInstruction *> MemInsts;
661   // Holds all register instructions - divided according to opcode.
662   std::map<uint8_t, std::vector<const CodeGenInstruction *>> RegInsts;
663 
664   ArrayRef<const CodeGenInstruction *> NumberedInstructions =
665       Target.getInstructionsByEnumValue();
666 
667   for (const CodeGenInstruction *Inst : NumberedInstructions) {
668     const Record *Rec = Inst->TheDef;
669     if (!Rec->isSubClassOf("X86Inst") || Rec->getValueAsBit("isAsmParserOnly"))
670       continue;
671 
672     if (NoFoldSet.find(Rec->getName()) != NoFoldSet.end())
673       continue;
674 
675     // Promoted legacy instruction is in EVEX space, and has REX2-encoding
676     // alternative. It's added due to HW design and never emitted by compiler.
677     if (byteFromBitsInit(Rec->getValueAsBitsInit("OpMapBits")) ==
678             X86Local::T_MAP4 &&
679         byteFromBitsInit(Rec->getValueAsBitsInit("explicitOpPrefixBits")) ==
680             X86Local::ExplicitEVEX)
681       continue;
682 
683     // - Instructions including RST register class operands are not relevant
684     //   for memory folding (for further details check the explanation in
685     //   lib/Target/X86/X86InstrFPStack.td file).
686     // - Some instructions (listed in the manual map above) use the register
687     //   class ptr_rc_tailcall, which can be of a size 32 or 64, to ensure
688     //   safe mapping of these instruction we manually map them and exclude
689     //   them from the automation.
690     if (hasRSTRegClass(Inst) || hasPtrTailcallRegClass(Inst))
691       continue;
692 
693     // Add all the memory form instructions to MemInsts, and all the register
694     // form instructions to RegInsts[Opc], where Opc is the opcode of each
695     // instructions. this helps reducing the runtime of the backend.
696     const BitsInit *FormBits = Rec->getValueAsBitsInit("FormBits");
697     uint8_t Form = byteFromBitsInit(FormBits);
698     if (mayFoldToForm(Form))
699       MemInsts.push_back(Inst);
700     else if (mayFoldFromForm(Form)) {
701       uint8_t Opc = byteFromBitsInit(Rec->getValueAsBitsInit("Opcode"));
702       RegInsts[Opc].push_back(Inst);
703     }
704   }
705 
706   // Create a copy b/c the register instruction will removed when a new entry is
707   // added into memory fold tables.
708   auto RegInstsForBroadcast = RegInsts;
709 
710   Record *AsmWriter = Target.getAsmWriter();
711   unsigned Variant = AsmWriter->getValueAsInt("Variant");
712   auto FixUp = [&](const CodeGenInstruction *RegInst) {
713     StringRef RegInstName = RegInst->TheDef->getName();
714     if (RegInstName.ends_with("_REV") || RegInstName.ends_with("_alt"))
715       if (auto *RegAltRec = Records.getDef(RegInstName.drop_back(4)))
716         RegInst = &Target.getInstruction(RegAltRec);
717     return RegInst;
718   };
719   // For each memory form instruction, try to find its register form
720   // instruction.
721   for (const CodeGenInstruction *MemInst : MemInsts) {
722     uint8_t Opc =
723         byteFromBitsInit(MemInst->TheDef->getValueAsBitsInit("Opcode"));
724 
725     auto RegInstsIt = RegInsts.find(Opc);
726     if (RegInstsIt == RegInsts.end())
727       continue;
728 
729     // Two forms (memory & register) of the same instruction must have the same
730     // opcode.
731     std::vector<const CodeGenInstruction *> &OpcRegInsts = RegInstsIt->second;
732 
733     // Memory fold tables
734     auto Match =
735         find_if(OpcRegInsts, IsMatch(MemInst, /*IsBroadcast=*/false, Variant));
736     if (Match != OpcRegInsts.end()) {
737       updateTables(FixUp(*Match), MemInst);
738       OpcRegInsts.erase(Match);
739     }
740 
741     // Broadcast tables
742     StringRef MemInstName = MemInst->TheDef->getName();
743     if (!MemInstName.contains("mb") && !MemInstName.contains("mib"))
744       continue;
745     RegInstsIt = RegInstsForBroadcast.find(Opc);
746     assert(RegInstsIt != RegInstsForBroadcast.end() &&
747            "Unexpected control flow");
748     std::vector<const CodeGenInstruction *> &OpcRegInstsForBroadcast =
749         RegInstsIt->second;
750     Match = find_if(OpcRegInstsForBroadcast,
751                     IsMatch(MemInst, /*IsBroadcast=*/true, Variant));
752     if (Match != OpcRegInstsForBroadcast.end()) {
753       updateTables(FixUp(*Match), MemInst, 0, /*IsManual=*/false,
754                    /*IsBroadcast=*/true);
755       OpcRegInstsForBroadcast.erase(Match);
756     }
757   }
758 
759   // Add the manually mapped instructions listed above.
760   for (const ManualMapEntry &Entry : ManualMapSet) {
761     Record *RegInstIter = Records.getDef(Entry.RegInstStr);
762     Record *MemInstIter = Records.getDef(Entry.MemInstStr);
763 
764     updateTables(&(Target.getInstruction(RegInstIter)),
765                  &(Target.getInstruction(MemInstIter)), Entry.Strategy, true);
766   }
767 
768 #ifndef NDEBUG
769   auto CheckMemFoldTable = [](const FoldTable &Table) -> void {
770     for (const auto &Record : Table) {
771       auto &FoldEntry = Record.second;
772       FoldEntry.checkCorrectness();
773     }
774   };
775   CheckMemFoldTable(Table2Addr);
776   CheckMemFoldTable(Table0);
777   CheckMemFoldTable(Table1);
778   CheckMemFoldTable(Table2);
779   CheckMemFoldTable(Table3);
780   CheckMemFoldTable(Table4);
781   CheckMemFoldTable(BroadcastTable1);
782   CheckMemFoldTable(BroadcastTable2);
783   CheckMemFoldTable(BroadcastTable3);
784   CheckMemFoldTable(BroadcastTable4);
785 #endif
786 #define PRINT_TABLE(TABLE) printTable(TABLE, #TABLE, OS);
787   // Print all tables.
788   PRINT_TABLE(Table2Addr)
789   PRINT_TABLE(Table0)
790   PRINT_TABLE(Table1)
791   PRINT_TABLE(Table2)
792   PRINT_TABLE(Table3)
793   PRINT_TABLE(Table4)
794   PRINT_TABLE(BroadcastTable1)
795   PRINT_TABLE(BroadcastTable2)
796   PRINT_TABLE(BroadcastTable3)
797   PRINT_TABLE(BroadcastTable4)
798 }
799 
800 static TableGen::Emitter::OptClass<X86FoldTablesEmitter>
801     X("gen-x86-fold-tables", "Generate X86 fold tables");
802