xref: /llvm-project/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp (revision 4e8c9d28132039a98feb97cec2759cddeb37d934)
1 //===- GlobalISelCombinerMatchTableEmitter.cpp - --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file Generate a combiner implementation for GlobalISel from a declarative
10 /// syntax using GlobalISelMatchTable.
11 ///
12 /// Usually, TableGen backends use "assert is an error" as a means to report
13 /// invalid input. They try to diagnose common case but don't try very hard and
14 /// crashes can be common. This backend aims to behave closer to how a language
15 /// compiler frontend would behave: we try extra hard to diagnose invalid inputs
16 /// early, and any crash should be considered a bug (= a feature or diagnostic
17 /// is missing).
18 ///
19 /// While this can make the backend a bit more complex than it needs to be, it
20 /// pays off because MIR patterns can get complicated. Giving useful error
21 /// messages to combine writers can help boost their productivity.
22 ///
23 /// As with anything, a good balance has to be found. We also don't want to
24 /// write hundreds of lines of code to detect edge cases. In practice, crashing
25 /// very occasionally, or giving poor errors in some rare instances, is fine.
26 ///
27 //===----------------------------------------------------------------------===//
28 
29 #include "Basic/CodeGenIntrinsics.h"
30 #include "Common/CodeGenInstruction.h"
31 #include "Common/CodeGenTarget.h"
32 #include "Common/GlobalISel/CXXPredicates.h"
33 #include "Common/GlobalISel/CodeExpander.h"
34 #include "Common/GlobalISel/CodeExpansions.h"
35 #include "Common/GlobalISel/CombinerUtils.h"
36 #include "Common/GlobalISel/GlobalISelMatchTable.h"
37 #include "Common/GlobalISel/GlobalISelMatchTableExecutorEmitter.h"
38 #include "Common/GlobalISel/PatternParser.h"
39 #include "Common/GlobalISel/Patterns.h"
40 #include "Common/SubtargetFeatureInfo.h"
41 #include "llvm/ADT/APInt.h"
42 #include "llvm/ADT/EquivalenceClasses.h"
43 #include "llvm/ADT/MapVector.h"
44 #include "llvm/ADT/Statistic.h"
45 #include "llvm/ADT/StringExtras.h"
46 #include "llvm/ADT/StringSet.h"
47 #include "llvm/Support/CommandLine.h"
48 #include "llvm/Support/Debug.h"
49 #include "llvm/Support/PrettyStackTrace.h"
50 #include "llvm/Support/ScopedPrinter.h"
51 #include "llvm/TableGen/Error.h"
52 #include "llvm/TableGen/Record.h"
53 #include "llvm/TableGen/StringMatcher.h"
54 #include "llvm/TableGen/TGTimer.h"
55 #include "llvm/TableGen/TableGenBackend.h"
56 #include <cstdint>
57 
58 using namespace llvm;
59 using namespace llvm::gi;
60 
61 #define DEBUG_TYPE "gicombiner-emitter"
62 
63 namespace {
64 cl::OptionCategory
65     GICombinerEmitterCat("Options for -gen-global-isel-combiner");
66 cl::opt<bool> StopAfterParse(
67     "gicombiner-stop-after-parse",
68     cl::desc("Stop processing after parsing rules and dump state"),
69     cl::cat(GICombinerEmitterCat));
70 cl::list<std::string>
71     SelectedCombiners("combiners", cl::desc("Emit the specified combiners"),
72                       cl::cat(GICombinerEmitterCat), cl::CommaSeparated);
73 cl::opt<bool> DebugCXXPreds(
74     "gicombiner-debug-cxxpreds",
75     cl::desc("Add Contextual/Debug comments to all C++ predicates"),
76     cl::cat(GICombinerEmitterCat));
77 cl::opt<bool> DebugTypeInfer("gicombiner-debug-typeinfer",
78                              cl::desc("Print type inference debug logs"),
79                              cl::cat(GICombinerEmitterCat));
80 
81 constexpr StringLiteral CXXCustomActionPrefix = "GICXXCustomAction_";
82 constexpr StringLiteral CXXPredPrefix = "GICXXPred_MI_Predicate_";
83 constexpr StringLiteral MatchDataClassName = "GIDefMatchData";
84 
85 //===- CodeExpansions Helpers  --------------------------------------------===//
86 
87 void declareInstExpansion(CodeExpansions &CE, const InstructionMatcher &IM,
88                           StringRef Name) {
89   CE.declare(Name, "State.MIs[" + to_string(IM.getInsnVarID()) + "]");
90 }
91 
92 void declareInstExpansion(CodeExpansions &CE, const BuildMIAction &A,
93                           StringRef Name) {
94   // Note: we use redeclare here because this may overwrite a matcher inst
95   // expansion.
96   CE.redeclare(Name, "OutMIs[" + to_string(A.getInsnID()) + "]");
97 }
98 
99 void declareOperandExpansion(CodeExpansions &CE, const OperandMatcher &OM,
100                              StringRef Name) {
101   if (OM.isVariadic()) {
102     CE.declare(Name, "getRemainingOperands(*State.MIs[" +
103                          to_string(OM.getInsnVarID()) + "], " +
104                          to_string(OM.getOpIdx()) + ")");
105   } else {
106     CE.declare(Name, "State.MIs[" + to_string(OM.getInsnVarID()) +
107                          "]->getOperand(" + to_string(OM.getOpIdx()) + ")");
108   }
109 }
110 
111 void declareTempRegExpansion(CodeExpansions &CE, unsigned TempRegID,
112                              StringRef Name) {
113   CE.declare(Name, "State.TempRegisters[" + to_string(TempRegID) + "]");
114 }
115 
116 //===- Misc. Helpers  -----------------------------------------------------===//
117 
118 template <typename Container> auto keys(Container &&C) {
119   return map_range(C, [](auto &Entry) -> auto & { return Entry.first; });
120 }
121 
122 template <typename Container> auto values(Container &&C) {
123   return map_range(C, [](auto &Entry) -> auto & { return Entry.second; });
124 }
125 
126 std::string getIsEnabledPredicateEnumName(unsigned CombinerRuleID) {
127   return "GICXXPred_Simple_IsRule" + to_string(CombinerRuleID) + "Enabled";
128 }
129 
130 //===- MatchTable Helpers  ------------------------------------------------===//
131 
132 LLTCodeGen getLLTCodeGen(const PatternType &PT) {
133   return *MVTToLLT(getValueType(PT.getLLTRecord()));
134 }
135 
136 //===- PrettyStackTrace Helpers  ------------------------------------------===//
137 
138 class PrettyStackTraceParse : public PrettyStackTraceEntry {
139   const Record &Def;
140 
141 public:
142   PrettyStackTraceParse(const Record &Def) : Def(Def) {}
143 
144   void print(raw_ostream &OS) const override {
145     if (Def.isSubClassOf("GICombineRule"))
146       OS << "Parsing GICombineRule '" << Def.getName() << "'";
147     else if (Def.isSubClassOf(PatFrag::ClassName))
148       OS << "Parsing " << PatFrag::ClassName << " '" << Def.getName() << "'";
149     else
150       OS << "Parsing '" << Def.getName() << "'";
151     OS << '\n';
152   }
153 };
154 
155 class PrettyStackTraceEmit : public PrettyStackTraceEntry {
156   const Record &Def;
157   const Pattern *Pat = nullptr;
158 
159 public:
160   PrettyStackTraceEmit(const Record &Def, const Pattern *Pat = nullptr)
161       : Def(Def), Pat(Pat) {}
162 
163   void print(raw_ostream &OS) const override {
164     if (Def.isSubClassOf("GICombineRule"))
165       OS << "Emitting GICombineRule '" << Def.getName() << "'";
166     else if (Def.isSubClassOf(PatFrag::ClassName))
167       OS << "Emitting " << PatFrag::ClassName << " '" << Def.getName() << "'";
168     else
169       OS << "Emitting '" << Def.getName() << "'";
170 
171     if (Pat)
172       OS << " [" << Pat->getKindName() << " '" << Pat->getName() << "']";
173     OS << '\n';
174   }
175 };
176 
177 //===- CombineRuleOperandTypeChecker --------------------------------------===//
178 
179 /// This is a wrapper around OperandTypeChecker specialized for Combiner Rules.
180 /// On top of doing the same things as OperandTypeChecker, this also attempts to
181 /// infer as many types as possible for temporary register defs & immediates in
182 /// apply patterns.
183 ///
184 /// The inference is trivial and leverages the MCOI OperandTypes encoded in
185 /// CodeGenInstructions to infer types across patterns in a CombineRule. It's
186 /// thus very limited and only supports CodeGenInstructions (but that's the main
187 /// use case so it's fine).
188 ///
189 /// We only try to infer untyped operands in apply patterns when they're temp
190 /// reg defs, or immediates. Inference always outputs a `TypeOf<$x>` where $x is
191 /// a named operand from a match pattern.
192 class CombineRuleOperandTypeChecker : private OperandTypeChecker {
193 public:
194   CombineRuleOperandTypeChecker(const Record &RuleDef,
195                                 const OperandTable &MatchOpTable)
196       : OperandTypeChecker(RuleDef.getLoc()), RuleDef(RuleDef),
197         MatchOpTable(MatchOpTable) {}
198 
199   /// Records and checks a 'match' pattern.
200   bool processMatchPattern(InstructionPattern &P);
201 
202   /// Records and checks an 'apply' pattern.
203   bool processApplyPattern(InstructionPattern &P);
204 
205   /// Propagates types, then perform type inference and do a second round of
206   /// propagation in the apply patterns only if any types were inferred.
207   void propagateAndInferTypes();
208 
209 private:
210   /// TypeEquivalenceClasses are groups of operands of an instruction that share
211   /// a common type.
212   ///
213   /// e.g. [[a, b], [c, d]] means a and b have the same type, and c and
214   /// d have the same type too. b/c and a/d don't have to have the same type,
215   /// though.
216   using TypeEquivalenceClasses = EquivalenceClasses<StringRef>;
217 
218   /// \returns true for `OPERAND_GENERIC_` 0 through 5.
219   /// These are the MCOI types that can be registers. The other MCOI types are
220   /// either immediates, or fancier operands used only post-ISel, so we don't
221   /// care about them for combiners.
222   static bool canMCOIOperandTypeBeARegister(StringRef MCOIType) {
223     // Assume OPERAND_GENERIC_0 through 5 can be registers. The other MCOI
224     // OperandTypes are either never used in gMIR, or not relevant (e.g.
225     // OPERAND_GENERIC_IMM, which is definitely never a register).
226     return MCOIType.drop_back(1).ends_with("OPERAND_GENERIC_");
227   }
228 
229   /// Finds the "MCOI::"" operand types for each operand of \p CGP.
230   ///
231   /// This is a bit trickier than it looks because we need to handle variadic
232   /// in/outs.
233   ///
234   /// e.g. for
235   ///   (G_BUILD_VECTOR $vec, $x, $y) ->
236   ///   [MCOI::OPERAND_GENERIC_0, MCOI::OPERAND_GENERIC_1,
237   ///    MCOI::OPERAND_GENERIC_1]
238   ///
239   /// For unknown types (which can happen in variadics where varargs types are
240   /// inconsistent), a unique name is given, e.g. "unknown_type_0".
241   static std::vector<std::string>
242   getMCOIOperandTypes(const CodeGenInstructionPattern &CGP);
243 
244   /// Adds the TypeEquivalenceClasses for \p P in \p OutTECs.
245   void getInstEqClasses(const InstructionPattern &P,
246                         TypeEquivalenceClasses &OutTECs) const;
247 
248   /// Calls `getInstEqClasses` on all patterns of the rule to produce the whole
249   /// rule's TypeEquivalenceClasses.
250   TypeEquivalenceClasses getRuleEqClasses() const;
251 
252   /// Tries to infer the type of the \p ImmOpIdx -th operand of \p IP using \p
253   /// TECs.
254   ///
255   /// This is achieved by trying to find a named operand in \p IP that shares
256   /// the same type as \p ImmOpIdx, and using \ref inferNamedOperandType on that
257   /// operand instead.
258   ///
259   /// \returns the inferred type or an empty PatternType if inference didn't
260   /// succeed.
261   PatternType inferImmediateType(const InstructionPattern &IP,
262                                  unsigned ImmOpIdx,
263                                  const TypeEquivalenceClasses &TECs) const;
264 
265   /// Looks inside \p TECs to infer \p OpName's type.
266   ///
267   /// \returns the inferred type or an empty PatternType if inference didn't
268   /// succeed.
269   PatternType inferNamedOperandType(const InstructionPattern &IP,
270                                     StringRef OpName,
271                                     const TypeEquivalenceClasses &TECs,
272                                     bool AllowSelf = false) const;
273 
274   const Record &RuleDef;
275   SmallVector<InstructionPattern *, 8> MatchPats;
276   SmallVector<InstructionPattern *, 8> ApplyPats;
277 
278   const OperandTable &MatchOpTable;
279 };
280 
281 bool CombineRuleOperandTypeChecker::processMatchPattern(InstructionPattern &P) {
282   MatchPats.push_back(&P);
283   return check(P, /*CheckTypeOf*/ [](const auto &) {
284     // GITypeOf in 'match' is currently always rejected by the
285     // CombineRuleBuilder after inference is done.
286     return true;
287   });
288 }
289 
290 bool CombineRuleOperandTypeChecker::processApplyPattern(InstructionPattern &P) {
291   ApplyPats.push_back(&P);
292   return check(P, /*CheckTypeOf*/ [&](const PatternType &Ty) {
293     // GITypeOf<"$x"> can only be used if "$x" is a matched operand.
294     const auto OpName = Ty.getTypeOfOpName();
295     if (MatchOpTable.lookup(OpName).Found)
296       return true;
297 
298     PrintError(RuleDef.getLoc(), "'" + OpName + "' ('" + Ty.str() +
299                                      "') does not refer to a matched operand!");
300     return false;
301   });
302 }
303 
304 void CombineRuleOperandTypeChecker::propagateAndInferTypes() {
305   /// First step here is to propagate types using the OperandTypeChecker. That
306   /// way we ensure all uses of a given register have consistent types.
307   propagateTypes();
308 
309   /// Build the TypeEquivalenceClasses for the whole rule.
310   const TypeEquivalenceClasses TECs = getRuleEqClasses();
311 
312   /// Look at the apply patterns and find operands that need to be
313   /// inferred. We then try to find an equivalence class that they're a part of
314   /// and select the best operand to use for the `GITypeOf` type. We prioritize
315   /// defs of matched instructions because those are guaranteed to be registers.
316   bool InferredAny = false;
317   for (auto *Pat : ApplyPats) {
318     for (unsigned K = 0; K < Pat->operands_size(); ++K) {
319       auto &Op = Pat->getOperand(K);
320 
321       // We only want to take a look at untyped defs or immediates.
322       if ((!Op.isDef() && !Op.hasImmValue()) || Op.getType())
323         continue;
324 
325       // Infer defs & named immediates.
326       if (Op.isDef() || Op.isNamedImmediate()) {
327         // Check it's not a redefinition of a matched operand.
328         // In such cases, inference is not necessary because we just copy
329         // operands and don't create temporary registers.
330         if (MatchOpTable.lookup(Op.getOperandName()).Found)
331           continue;
332 
333         // Inference is needed here, so try to do it.
334         if (PatternType Ty =
335                 inferNamedOperandType(*Pat, Op.getOperandName(), TECs)) {
336           if (DebugTypeInfer)
337             errs() << "INFER: " << Op.describe() << " -> " << Ty.str() << '\n';
338           Op.setType(Ty);
339           InferredAny = true;
340         }
341 
342         continue;
343       }
344 
345       // Infer immediates
346       if (Op.hasImmValue()) {
347         if (PatternType Ty = inferImmediateType(*Pat, K, TECs)) {
348           if (DebugTypeInfer)
349             errs() << "INFER: " << Op.describe() << " -> " << Ty.str() << '\n';
350           Op.setType(Ty);
351           InferredAny = true;
352         }
353         continue;
354       }
355     }
356   }
357 
358   // If we've inferred any types, we want to propagate them across the apply
359   // patterns. Type inference only adds GITypeOf types that point to Matched
360   // operands, so we definitely don't want to propagate types into the match
361   // patterns as well, otherwise bad things happen.
362   if (InferredAny) {
363     OperandTypeChecker OTC(RuleDef.getLoc());
364     for (auto *Pat : ApplyPats) {
365       if (!OTC.check(*Pat, [&](const auto &) { return true; }))
366         PrintFatalError(RuleDef.getLoc(),
367                         "OperandTypeChecker unexpectedly failed on '" +
368                             Pat->getName() + "' during Type Inference");
369     }
370     OTC.propagateTypes();
371 
372     if (DebugTypeInfer) {
373       errs() << "Apply patterns for rule " << RuleDef.getName()
374              << " after inference:\n";
375       for (auto *Pat : ApplyPats) {
376         errs() << "  ";
377         Pat->print(errs(), /*PrintName*/ true);
378         errs() << '\n';
379       }
380       errs() << '\n';
381     }
382   }
383 }
384 
385 PatternType CombineRuleOperandTypeChecker::inferImmediateType(
386     const InstructionPattern &IP, unsigned ImmOpIdx,
387     const TypeEquivalenceClasses &TECs) const {
388   // We can only infer CGPs (except intrinsics).
389   const auto *CGP = dyn_cast<CodeGenInstructionPattern>(&IP);
390   if (!CGP || CGP->isIntrinsic())
391     return {};
392 
393   // For CGPs, we try to infer immediates by trying to infer another named
394   // operand that shares its type.
395   //
396   // e.g.
397   //    Pattern: G_BUILD_VECTOR $x, $y, 0
398   //    MCOIs:   [MCOI::OPERAND_GENERIC_0, MCOI::OPERAND_GENERIC_1,
399   //              MCOI::OPERAND_GENERIC_1]
400   //    $y has the same type as 0, so we can infer $y and get the type 0 should
401   //    have.
402 
403   // We infer immediates by looking for a named operand that shares the same
404   // MCOI type.
405   const auto MCOITypes = getMCOIOperandTypes(*CGP);
406   StringRef ImmOpTy = MCOITypes[ImmOpIdx];
407 
408   for (const auto &[Idx, Ty] : enumerate(MCOITypes)) {
409     if (Idx != ImmOpIdx && Ty == ImmOpTy) {
410       const auto &Op = IP.getOperand(Idx);
411       if (!Op.isNamedOperand())
412         continue;
413 
414       // Named operand with the same name, try to infer that.
415       if (PatternType InferTy = inferNamedOperandType(IP, Op.getOperandName(),
416                                                       TECs, /*AllowSelf=*/true))
417         return InferTy;
418     }
419   }
420 
421   return {};
422 }
423 
424 PatternType CombineRuleOperandTypeChecker::inferNamedOperandType(
425     const InstructionPattern &IP, StringRef OpName,
426     const TypeEquivalenceClasses &TECs, bool AllowSelf) const {
427   // This is the simplest possible case, we just need to find a TEC that
428   // contains OpName. Look at all operands in equivalence class and try to
429   // find a suitable one. If `AllowSelf` is true, the operand itself is also
430   // considered suitable.
431 
432   // Check for a def of a matched pattern. This is guaranteed to always
433   // be a register so we can blindly use that.
434   StringRef GoodOpName;
435   for (auto It = TECs.findLeader(OpName); It != TECs.member_end(); ++It) {
436     if (!AllowSelf && *It == OpName)
437       continue;
438 
439     const auto LookupRes = MatchOpTable.lookup(*It);
440     if (LookupRes.Def) // Favor defs
441       return PatternType::getTypeOf(*It);
442 
443     // Otherwise just save this in case we don't find any def.
444     if (GoodOpName.empty() && LookupRes.Found)
445       GoodOpName = *It;
446   }
447 
448   if (!GoodOpName.empty())
449     return PatternType::getTypeOf(GoodOpName);
450 
451   // No good operand found, give up.
452   return {};
453 }
454 
455 std::vector<std::string> CombineRuleOperandTypeChecker::getMCOIOperandTypes(
456     const CodeGenInstructionPattern &CGP) {
457   // FIXME?: Should we cache this? We call it twice when inferring immediates.
458 
459   static unsigned UnknownTypeIdx = 0;
460 
461   std::vector<std::string> OpTypes;
462   auto &CGI = CGP.getInst();
463   const Record *VarArgsTy =
464       CGI.TheDef->isSubClassOf("GenericInstruction")
465           ? CGI.TheDef->getValueAsOptionalDef("variadicOpsType")
466           : nullptr;
467   std::string VarArgsTyName =
468       VarArgsTy ? ("MCOI::" + VarArgsTy->getValueAsString("OperandType")).str()
469                 : ("unknown_type_" + Twine(UnknownTypeIdx++)).str();
470 
471   // First, handle defs.
472   for (unsigned K = 0; K < CGI.Operands.NumDefs; ++K)
473     OpTypes.push_back(CGI.Operands[K].OperandType);
474 
475   // Then, handle variadic defs if there are any.
476   if (CGP.hasVariadicDefs()) {
477     for (unsigned K = CGI.Operands.NumDefs; K < CGP.getNumInstDefs(); ++K)
478       OpTypes.push_back(VarArgsTyName);
479   }
480 
481   // If we had variadic defs, the op idx in the pattern won't match the op idx
482   // in the CGI anymore.
483   int CGIOpOffset = int(CGI.Operands.NumDefs) - CGP.getNumInstDefs();
484   assert(CGP.hasVariadicDefs() ? (CGIOpOffset <= 0) : (CGIOpOffset == 0));
485 
486   // Handle all remaining use operands, including variadic ones.
487   for (unsigned K = CGP.getNumInstDefs(); K < CGP.getNumInstOperands(); ++K) {
488     unsigned CGIOpIdx = K + CGIOpOffset;
489     if (CGIOpIdx >= CGI.Operands.size()) {
490       assert(CGP.isVariadic());
491       OpTypes.push_back(VarArgsTyName);
492     } else {
493       OpTypes.push_back(CGI.Operands[CGIOpIdx].OperandType);
494     }
495   }
496 
497   assert(OpTypes.size() == CGP.operands_size());
498   return OpTypes;
499 }
500 
501 void CombineRuleOperandTypeChecker::getInstEqClasses(
502     const InstructionPattern &P, TypeEquivalenceClasses &OutTECs) const {
503   // Determine the TypeEquivalenceClasses by:
504   //    - Getting the MCOI Operand Types.
505   //    - Creating a Map of MCOI Type -> [Operand Indexes]
506   //    - Iterating over the map, filtering types we don't like, and just adding
507   //      the array of Operand Indexes to \p OutTECs.
508 
509   // We can only do this on CodeGenInstructions that aren't intrinsics. Other
510   // InstructionPatterns have no type inference information associated with
511   // them.
512   // TODO: We could try to extract some info from CodeGenIntrinsic to
513   //       guide inference.
514 
515   // TODO: Could we add some inference information to builtins at least? e.g.
516   // ReplaceReg should always replace with a reg of the same type, for instance.
517   // Though, those patterns are often used alone so it might not be worth the
518   // trouble to infer their types.
519   auto *CGP = dyn_cast<CodeGenInstructionPattern>(&P);
520   if (!CGP || CGP->isIntrinsic())
521     return;
522 
523   const auto MCOITypes = getMCOIOperandTypes(*CGP);
524   assert(MCOITypes.size() == P.operands_size());
525 
526   MapVector<StringRef, SmallVector<unsigned, 0>> TyToOpIdx;
527   for (const auto &[Idx, Ty] : enumerate(MCOITypes))
528     TyToOpIdx[Ty].push_back(Idx);
529 
530   if (DebugTypeInfer)
531     errs() << "\tGroups for " << P.getName() << ":\t";
532 
533   for (const auto &[Ty, Idxs] : TyToOpIdx) {
534     if (!canMCOIOperandTypeBeARegister(Ty))
535       continue;
536 
537     if (DebugTypeInfer)
538       errs() << '[';
539     StringRef Sep = "";
540 
541     // We only collect named operands.
542     StringRef Leader;
543     for (unsigned Idx : Idxs) {
544       const auto &Op = P.getOperand(Idx);
545       if (!Op.isNamedOperand())
546         continue;
547 
548       const auto OpName = Op.getOperandName();
549       if (DebugTypeInfer) {
550         errs() << Sep << OpName;
551         Sep = ", ";
552       }
553 
554       if (Leader.empty())
555         OutTECs.insert((Leader = OpName));
556       else
557         OutTECs.unionSets(Leader, OpName);
558     }
559 
560     if (DebugTypeInfer)
561       errs() << "] ";
562   }
563 
564   if (DebugTypeInfer)
565     errs() << '\n';
566 }
567 
568 CombineRuleOperandTypeChecker::TypeEquivalenceClasses
569 CombineRuleOperandTypeChecker::getRuleEqClasses() const {
570   StringMap<unsigned> OpNameToEqClassIdx;
571   TypeEquivalenceClasses TECs;
572 
573   if (DebugTypeInfer)
574     errs() << "Rule Operand Type Equivalence Classes for " << RuleDef.getName()
575            << ":\n";
576 
577   for (const auto *Pat : MatchPats)
578     getInstEqClasses(*Pat, TECs);
579   for (const auto *Pat : ApplyPats)
580     getInstEqClasses(*Pat, TECs);
581 
582   if (DebugTypeInfer) {
583     errs() << "Final Type Equivalence Classes: ";
584     for (auto ClassIt = TECs.begin(); ClassIt != TECs.end(); ++ClassIt) {
585       // only print non-empty classes.
586       if (auto MembIt = TECs.member_begin(ClassIt);
587           MembIt != TECs.member_end()) {
588         errs() << '[';
589         StringRef Sep = "";
590         for (; MembIt != TECs.member_end(); ++MembIt) {
591           errs() << Sep << *MembIt;
592           Sep = ", ";
593         }
594         errs() << "] ";
595       }
596     }
597     errs() << '\n';
598   }
599 
600   return TECs;
601 }
602 
603 //===- MatchData Handling -------------------------------------------------===//
604 struct MatchDataDef {
605   MatchDataDef(StringRef Symbol, StringRef Type) : Symbol(Symbol), Type(Type) {}
606 
607   StringRef Symbol;
608   StringRef Type;
609 
610   /// \returns the desired variable name for this MatchData.
611   std::string getVarName() const {
612     // Add a prefix in case the symbol name is very generic and conflicts with
613     // something else.
614     return "GIMatchData_" + Symbol.str();
615   }
616 };
617 
618 //===- CombineRuleBuilder -------------------------------------------------===//
619 
620 /// Parses combine rule and builds a small intermediate representation to tie
621 /// patterns together and emit RuleMatchers to match them. This may emit more
622 /// than one RuleMatcher, e.g. for `wip_match_opcode`.
623 ///
624 /// Memory management for `Pattern` objects is done through `std::unique_ptr`.
625 /// In most cases, there are two stages to a pattern's lifetime:
626 ///   - Creation in a `parse` function
627 ///     - The unique_ptr is stored in a variable, and may be destroyed if the
628 ///       pattern is found to be semantically invalid.
629 ///   - Ownership transfer into a `PatternMap`
630 ///     - Once a pattern is moved into either the map of Match or Apply
631 ///       patterns, it is known to be valid and it never moves back.
632 class CombineRuleBuilder {
633 public:
634   using PatternMap = MapVector<StringRef, std::unique_ptr<Pattern>>;
635   using PatternAlternatives = DenseMap<const Pattern *, unsigned>;
636 
637   CombineRuleBuilder(const CodeGenTarget &CGT,
638                      SubtargetFeatureInfoMap &SubtargetFeatures,
639                      const Record &RuleDef, unsigned ID,
640                      std::vector<RuleMatcher> &OutRMs)
641       : Parser(CGT, RuleDef.getLoc()), CGT(CGT),
642         SubtargetFeatures(SubtargetFeatures), RuleDef(RuleDef), RuleID(ID),
643         OutRMs(OutRMs) {}
644 
645   /// Parses all fields in the RuleDef record.
646   bool parseAll();
647 
648   /// Emits all RuleMatchers into the vector of RuleMatchers passed in the
649   /// constructor.
650   bool emitRuleMatchers();
651 
652   void print(raw_ostream &OS) const;
653   void dump() const { print(dbgs()); }
654 
655   /// Debug-only verification of invariants.
656 #ifndef NDEBUG
657   void verify() const;
658 #endif
659 
660 private:
661   const CodeGenInstruction &getGConstant() const {
662     return CGT.getInstruction(RuleDef.getRecords().getDef("G_CONSTANT"));
663   }
664 
665   std::optional<LLTCodeGenOrTempType>
666   getLLTCodeGenOrTempType(const PatternType &PT, RuleMatcher &RM);
667 
668   void PrintError(Twine Msg) const { ::PrintError(&RuleDef, Msg); }
669   void PrintWarning(Twine Msg) const { ::PrintWarning(RuleDef.getLoc(), Msg); }
670   void PrintNote(Twine Msg) const { ::PrintNote(RuleDef.getLoc(), Msg); }
671 
672   void print(raw_ostream &OS, const PatternAlternatives &Alts) const;
673 
674   bool addApplyPattern(std::unique_ptr<Pattern> Pat);
675   bool addMatchPattern(std::unique_ptr<Pattern> Pat);
676 
677   /// Adds the expansions from \see MatchDatas to \p CE.
678   void declareAllMatchDatasExpansions(CodeExpansions &CE) const;
679 
680   /// Adds a matcher \p P to \p IM, expanding its code using \p CE.
681   /// Note that the predicate is added on the last InstructionMatcher.
682   ///
683   /// \p Alts is only used if DebugCXXPreds is enabled.
684   void addCXXPredicate(RuleMatcher &M, const CodeExpansions &CE,
685                        const CXXPattern &P, const PatternAlternatives &Alts);
686 
687   bool hasOnlyCXXApplyPatterns() const;
688   bool hasEraseRoot() const;
689 
690   // Infer machine operand types and check their consistency.
691   bool typecheckPatterns();
692 
693   /// For all PatFragPatterns, add a new entry in PatternAlternatives for each
694   /// PatternList it contains. This is multiplicative, so if we have 2
695   /// PatFrags with 3 alternatives each, we get 2*3 permutations added to
696   /// PermutationsToEmit. The "MaxPermutations" field controls how many
697   /// permutations are allowed before an error is emitted and this function
698   /// returns false. This is a simple safeguard to prevent combination of
699   /// PatFrags from generating enormous amounts of rules.
700   bool buildPermutationsToEmit();
701 
702   /// Checks additional semantics of the Patterns.
703   bool checkSemantics();
704 
705   /// Creates a new RuleMatcher with some boilerplate
706   /// settings/actions/predicates, and and adds it to \p OutRMs.
707   /// \see addFeaturePredicates too.
708   ///
709   /// \param Alts Current set of alternatives, for debug comment.
710   /// \param AdditionalComment Comment string to be added to the
711   ///        `DebugCommentAction`.
712   RuleMatcher &addRuleMatcher(const PatternAlternatives &Alts,
713                               Twine AdditionalComment = "");
714   bool addFeaturePredicates(RuleMatcher &M);
715 
716   bool findRoots();
717   bool buildRuleOperandsTable();
718 
719   bool parseDefs(const DagInit &Def);
720 
721   bool emitMatchPattern(CodeExpansions &CE, const PatternAlternatives &Alts,
722                         const InstructionPattern &IP);
723   bool emitMatchPattern(CodeExpansions &CE, const PatternAlternatives &Alts,
724                         const AnyOpcodePattern &AOP);
725 
726   bool emitPatFragMatchPattern(CodeExpansions &CE,
727                                const PatternAlternatives &Alts, RuleMatcher &RM,
728                                InstructionMatcher *IM,
729                                const PatFragPattern &PFP,
730                                DenseSet<const Pattern *> &SeenPats);
731 
732   bool emitApplyPatterns(CodeExpansions &CE, RuleMatcher &M);
733   bool emitCXXMatchApply(CodeExpansions &CE, RuleMatcher &M,
734                          ArrayRef<CXXPattern *> Matchers);
735 
736   // Recursively visits InstructionPatterns from P to build up the
737   // RuleMatcher actions.
738   bool emitInstructionApplyPattern(CodeExpansions &CE, RuleMatcher &M,
739                                    const InstructionPattern &P,
740                                    DenseSet<const Pattern *> &SeenPats,
741                                    StringMap<unsigned> &OperandToTempRegID);
742 
743   bool emitCodeGenInstructionApplyImmOperand(RuleMatcher &M,
744                                              BuildMIAction &DstMI,
745                                              const CodeGenInstructionPattern &P,
746                                              const InstructionOperand &O);
747 
748   bool emitBuiltinApplyPattern(CodeExpansions &CE, RuleMatcher &M,
749                                const BuiltinPattern &P,
750                                StringMap<unsigned> &OperandToTempRegID);
751 
752   // Recursively visits CodeGenInstructionPattern from P to build up the
753   // RuleMatcher/InstructionMatcher. May create new InstructionMatchers as
754   // needed.
755   using OperandMapperFnRef =
756       function_ref<InstructionOperand(const InstructionOperand &)>;
757   using OperandDefLookupFn =
758       function_ref<const InstructionPattern *(StringRef)>;
759   bool emitCodeGenInstructionMatchPattern(
760       CodeExpansions &CE, const PatternAlternatives &Alts, RuleMatcher &M,
761       InstructionMatcher &IM, const CodeGenInstructionPattern &P,
762       DenseSet<const Pattern *> &SeenPats, OperandDefLookupFn LookupOperandDef,
763       OperandMapperFnRef OperandMapper = [](const auto &O) { return O; });
764 
765   PatternParser Parser;
766   const CodeGenTarget &CGT;
767   SubtargetFeatureInfoMap &SubtargetFeatures;
768   const Record &RuleDef;
769   const unsigned RuleID;
770   std::vector<RuleMatcher> &OutRMs;
771 
772   // For InstructionMatcher::addOperand
773   unsigned AllocatedTemporariesBaseID = 0;
774 
775   /// The root of the pattern.
776   StringRef RootName;
777 
778   /// These maps have ownership of the actual Pattern objects.
779   /// They both map a Pattern's name to the Pattern instance.
780   PatternMap MatchPats;
781   PatternMap ApplyPats;
782 
783   /// Operand tables to tie match/apply patterns together.
784   OperandTable MatchOpTable;
785   OperandTable ApplyOpTable;
786 
787   /// Set by findRoots.
788   Pattern *MatchRoot = nullptr;
789   SmallDenseSet<InstructionPattern *, 2> ApplyRoots;
790 
791   SmallVector<MatchDataDef, 2> MatchDatas;
792   SmallVector<PatternAlternatives, 1> PermutationsToEmit;
793 };
794 
795 bool CombineRuleBuilder::parseAll() {
796   auto StackTrace = PrettyStackTraceParse(RuleDef);
797 
798   if (!parseDefs(*RuleDef.getValueAsDag("Defs")))
799     return false;
800 
801   if (!Parser.parsePatternList(
802           *RuleDef.getValueAsDag("Match"),
803           [this](auto Pat) { return addMatchPattern(std::move(Pat)); }, "match",
804           (RuleDef.getName() + "_match").str()))
805     return false;
806 
807   if (!Parser.parsePatternList(
808           *RuleDef.getValueAsDag("Apply"),
809           [this](auto Pat) { return addApplyPattern(std::move(Pat)); }, "apply",
810           (RuleDef.getName() + "_apply").str()))
811     return false;
812 
813   if (!buildRuleOperandsTable() || !typecheckPatterns() || !findRoots() ||
814       !checkSemantics() || !buildPermutationsToEmit())
815     return false;
816   LLVM_DEBUG(verify());
817   return true;
818 }
819 
820 bool CombineRuleBuilder::emitRuleMatchers() {
821   auto StackTrace = PrettyStackTraceEmit(RuleDef);
822 
823   assert(MatchRoot);
824   CodeExpansions CE;
825 
826   assert(!PermutationsToEmit.empty());
827   for (const auto &Alts : PermutationsToEmit) {
828     switch (MatchRoot->getKind()) {
829     case Pattern::K_AnyOpcode: {
830       if (!emitMatchPattern(CE, Alts, *cast<AnyOpcodePattern>(MatchRoot)))
831         return false;
832       break;
833     }
834     case Pattern::K_PatFrag:
835     case Pattern::K_Builtin:
836     case Pattern::K_CodeGenInstruction:
837       if (!emitMatchPattern(CE, Alts, *cast<InstructionPattern>(MatchRoot)))
838         return false;
839       break;
840     case Pattern::K_CXX:
841       PrintError("C++ code cannot be the root of a rule!");
842       return false;
843     default:
844       llvm_unreachable("unknown pattern kind!");
845     }
846   }
847 
848   return true;
849 }
850 
851 void CombineRuleBuilder::print(raw_ostream &OS) const {
852   OS << "(CombineRule name:" << RuleDef.getName() << " id:" << RuleID
853      << " root:" << RootName << '\n';
854 
855   if (!MatchDatas.empty()) {
856     OS << "  (MatchDatas\n";
857     for (const auto &MD : MatchDatas) {
858       OS << "    (MatchDataDef symbol:" << MD.Symbol << " type:" << MD.Type
859          << ")\n";
860     }
861     OS << "  )\n";
862   }
863 
864   const auto &SeenPFs = Parser.getSeenPatFrags();
865   if (!SeenPFs.empty()) {
866     OS << "  (PatFrags\n";
867     for (const auto *PF : Parser.getSeenPatFrags()) {
868       PF->print(OS, /*Indent=*/"    ");
869       OS << '\n';
870     }
871     OS << "  )\n";
872   }
873 
874   const auto DumpPats = [&](StringRef Name, const PatternMap &Pats) {
875     OS << "  (" << Name << " ";
876     if (Pats.empty()) {
877       OS << "<empty>)\n";
878       return;
879     }
880 
881     OS << '\n';
882     for (const auto &[Name, Pat] : Pats) {
883       OS << "    ";
884       if (Pat.get() == MatchRoot)
885         OS << "<match_root>";
886       if (isa<InstructionPattern>(Pat.get()) &&
887           ApplyRoots.contains(cast<InstructionPattern>(Pat.get())))
888         OS << "<apply_root>";
889       OS << Name << ":";
890       Pat->print(OS, /*PrintName=*/false);
891       OS << '\n';
892     }
893     OS << "  )\n";
894   };
895 
896   DumpPats("MatchPats", MatchPats);
897   DumpPats("ApplyPats", ApplyPats);
898 
899   MatchOpTable.print(OS, "MatchPats", /*Indent*/ "  ");
900   ApplyOpTable.print(OS, "ApplyPats", /*Indent*/ "  ");
901 
902   if (PermutationsToEmit.size() > 1) {
903     OS << "  (PermutationsToEmit\n";
904     for (const auto &Perm : PermutationsToEmit) {
905       OS << "    ";
906       print(OS, Perm);
907       OS << ",\n";
908     }
909     OS << "  )\n";
910   }
911 
912   OS << ")\n";
913 }
914 
915 #ifndef NDEBUG
916 void CombineRuleBuilder::verify() const {
917   const auto VerifyPats = [&](const PatternMap &Pats) {
918     for (const auto &[Name, Pat] : Pats) {
919       if (!Pat)
920         PrintFatalError("null pattern in pattern map!");
921 
922       if (Name != Pat->getName()) {
923         Pat->dump();
924         PrintFatalError("Pattern name mismatch! Map name: " + Name +
925                         ", Pat name: " + Pat->getName());
926       }
927 
928       // Sanity check: the map should point to the same data as the Pattern.
929       // Both strings are allocated in the pool using insertStrRef.
930       if (Name.data() != Pat->getName().data()) {
931         dbgs() << "Map StringRef: '" << Name << "' @ "
932                << (const void *)Name.data() << '\n';
933         dbgs() << "Pat String: '" << Pat->getName() << "' @ "
934                << (const void *)Pat->getName().data() << '\n';
935         PrintFatalError("StringRef stored in the PatternMap is not referencing "
936                         "the same string as its Pattern!");
937       }
938     }
939   };
940 
941   VerifyPats(MatchPats);
942   VerifyPats(ApplyPats);
943 
944   // Check there are no wip_match_opcode patterns in the "apply" patterns.
945   if (any_of(ApplyPats,
946              [&](auto &E) { return isa<AnyOpcodePattern>(E.second.get()); })) {
947     dump();
948     PrintFatalError(
949         "illegal wip_match_opcode pattern in the 'apply' patterns!");
950   }
951 
952   // Check there are no nullptrs in ApplyRoots.
953   if (ApplyRoots.contains(nullptr)) {
954     PrintFatalError(
955         "CombineRuleBuilder's ApplyRoots set contains a null pointer!");
956   }
957 }
958 #endif
959 
960 std::optional<LLTCodeGenOrTempType>
961 CombineRuleBuilder::getLLTCodeGenOrTempType(const PatternType &PT,
962                                             RuleMatcher &RM) {
963   assert(!PT.isNone());
964 
965   if (PT.isLLT())
966     return getLLTCodeGen(PT);
967 
968   assert(PT.isTypeOf());
969   auto &OM = RM.getOperandMatcher(PT.getTypeOfOpName());
970   if (OM.isVariadic()) {
971     PrintError("type '" + PT.str() + "' is ill-formed: '" +
972                OM.getSymbolicName() + "' is a variadic pack operand");
973     return std::nullopt;
974   }
975   return OM.getTempTypeIdx(RM);
976 }
977 
978 void CombineRuleBuilder::print(raw_ostream &OS,
979                                const PatternAlternatives &Alts) const {
980   SmallVector<std::string, 1> Strings(
981       map_range(Alts, [](const auto &PatAndPerm) {
982         return PatAndPerm.first->getName().str() + "[" +
983                to_string(PatAndPerm.second) + "]";
984       }));
985   // Sort so output is deterministic for tests. Otherwise it's sorted by pointer
986   // values.
987   sort(Strings);
988   OS << "[" << join(Strings, ", ") << "]";
989 }
990 
991 bool CombineRuleBuilder::addApplyPattern(std::unique_ptr<Pattern> Pat) {
992   StringRef Name = Pat->getName();
993   if (ApplyPats.contains(Name)) {
994     PrintError("'" + Name + "' apply pattern defined more than once!");
995     return false;
996   }
997 
998   if (isa<AnyOpcodePattern>(Pat.get())) {
999     PrintError("'" + Name +
1000                "': wip_match_opcode is not supported in apply patterns");
1001     return false;
1002   }
1003 
1004   if (isa<PatFragPattern>(Pat.get())) {
1005     PrintError("'" + Name + "': using " + PatFrag::ClassName +
1006                " is not supported in apply patterns");
1007     return false;
1008   }
1009 
1010   if (auto *CXXPat = dyn_cast<CXXPattern>(Pat.get()))
1011     CXXPat->setIsApply();
1012 
1013   ApplyPats[Name] = std::move(Pat);
1014   return true;
1015 }
1016 
1017 bool CombineRuleBuilder::addMatchPattern(std::unique_ptr<Pattern> Pat) {
1018   StringRef Name = Pat->getName();
1019   if (MatchPats.contains(Name)) {
1020     PrintError("'" + Name + "' match pattern defined more than once!");
1021     return false;
1022   }
1023 
1024   // For now, none of the builtins can appear in 'match'.
1025   if (const auto *BP = dyn_cast<BuiltinPattern>(Pat.get())) {
1026     PrintError("'" + BP->getInstName() +
1027                "' cannot be used in a 'match' pattern");
1028     return false;
1029   }
1030 
1031   MatchPats[Name] = std::move(Pat);
1032   return true;
1033 }
1034 
1035 void CombineRuleBuilder::declareAllMatchDatasExpansions(
1036     CodeExpansions &CE) const {
1037   for (const auto &MD : MatchDatas)
1038     CE.declare(MD.Symbol, MD.getVarName());
1039 }
1040 
1041 void CombineRuleBuilder::addCXXPredicate(RuleMatcher &M,
1042                                          const CodeExpansions &CE,
1043                                          const CXXPattern &P,
1044                                          const PatternAlternatives &Alts) {
1045   // FIXME: Hack so C++ code is executed last. May not work for more complex
1046   // patterns.
1047   auto &IM = *std::prev(M.insnmatchers().end());
1048   auto Loc = RuleDef.getLoc();
1049   const auto AddComment = [&](raw_ostream &OS) {
1050     OS << "// Pattern Alternatives: ";
1051     print(OS, Alts);
1052     OS << '\n';
1053   };
1054   const auto &ExpandedCode =
1055       DebugCXXPreds ? P.expandCode(CE, Loc, AddComment) : P.expandCode(CE, Loc);
1056   IM->addPredicate<GenericInstructionPredicateMatcher>(
1057       ExpandedCode.getEnumNameWithPrefix(CXXPredPrefix));
1058 }
1059 
1060 bool CombineRuleBuilder::hasOnlyCXXApplyPatterns() const {
1061   return all_of(ApplyPats, [&](auto &Entry) {
1062     return isa<CXXPattern>(Entry.second.get());
1063   });
1064 }
1065 
1066 bool CombineRuleBuilder::hasEraseRoot() const {
1067   return any_of(ApplyPats, [&](auto &Entry) {
1068     if (const auto *BP = dyn_cast<BuiltinPattern>(Entry.second.get()))
1069       return BP->getBuiltinKind() == BI_EraseRoot;
1070     return false;
1071   });
1072 }
1073 
1074 bool CombineRuleBuilder::typecheckPatterns() {
1075   CombineRuleOperandTypeChecker OTC(RuleDef, MatchOpTable);
1076 
1077   for (auto &Pat : values(MatchPats)) {
1078     if (auto *IP = dyn_cast<InstructionPattern>(Pat.get())) {
1079       if (!OTC.processMatchPattern(*IP))
1080         return false;
1081     }
1082   }
1083 
1084   for (auto &Pat : values(ApplyPats)) {
1085     if (auto *IP = dyn_cast<InstructionPattern>(Pat.get())) {
1086       if (!OTC.processApplyPattern(*IP))
1087         return false;
1088     }
1089   }
1090 
1091   OTC.propagateAndInferTypes();
1092 
1093   // Always check this after in case inference adds some special types to the
1094   // match patterns.
1095   for (auto &Pat : values(MatchPats)) {
1096     if (auto *IP = dyn_cast<InstructionPattern>(Pat.get())) {
1097       bool HasDiag = false;
1098       for (const auto &[Idx, Op] : enumerate(IP->operands())) {
1099         if (Op.getType().isTypeOf()) {
1100           PrintError(PatternType::TypeOfClassName +
1101                      " is not supported in 'match' patterns");
1102           PrintNote("operand " + Twine(Idx) + " of '" + IP->getName() +
1103                     "' has type '" + Op.getType().str() + "'");
1104           HasDiag = true;
1105         }
1106       }
1107       if (HasDiag)
1108         return false;
1109     }
1110   }
1111   return true;
1112 }
1113 
1114 bool CombineRuleBuilder::buildPermutationsToEmit() {
1115   PermutationsToEmit.clear();
1116 
1117   // Start with one empty set of alternatives.
1118   PermutationsToEmit.emplace_back();
1119   for (const auto &Pat : values(MatchPats)) {
1120     unsigned NumAlts = 0;
1121     // Note: technically, AnyOpcodePattern also needs permutations, but:
1122     //    - We only allow a single one of them in the root.
1123     //    - They cannot be mixed with any other pattern other than C++ code.
1124     // So we don't really need to take them into account here. We could, but
1125     // that pattern is a hack anyway and the less it's involved, the better.
1126     if (const auto *PFP = dyn_cast<PatFragPattern>(Pat.get()))
1127       NumAlts = PFP->getPatFrag().num_alternatives();
1128     else
1129       continue;
1130 
1131     // For each pattern that needs permutations, multiply the current set of
1132     // alternatives.
1133     auto CurPerms = PermutationsToEmit;
1134     PermutationsToEmit.clear();
1135 
1136     for (const auto &Perm : CurPerms) {
1137       assert(!Perm.count(Pat.get()) && "Pattern already emitted?");
1138       for (unsigned K = 0; K < NumAlts; ++K) {
1139         PatternAlternatives NewPerm = Perm;
1140         NewPerm[Pat.get()] = K;
1141         PermutationsToEmit.emplace_back(std::move(NewPerm));
1142       }
1143     }
1144   }
1145 
1146   if (int64_t MaxPerms = RuleDef.getValueAsInt("MaxPermutations");
1147       MaxPerms > 0) {
1148     if ((int64_t)PermutationsToEmit.size() > MaxPerms) {
1149       PrintError("cannot emit rule '" + RuleDef.getName() + "'; " +
1150                  Twine(PermutationsToEmit.size()) +
1151                  " permutations would be emitted, but the max is " +
1152                  Twine(MaxPerms));
1153       return false;
1154     }
1155   }
1156 
1157   // Ensure we always have a single empty entry, it simplifies the emission
1158   // logic so it doesn't need to handle the case where there are no perms.
1159   if (PermutationsToEmit.empty()) {
1160     PermutationsToEmit.emplace_back();
1161     return true;
1162   }
1163 
1164   return true;
1165 }
1166 
1167 bool CombineRuleBuilder::checkSemantics() {
1168   assert(MatchRoot && "Cannot call this before findRoots()");
1169 
1170   const auto CheckVariadicOperands = [&](const InstructionPattern &IP,
1171                                          bool IsMatch) {
1172     bool HasVariadic = false;
1173     for (auto &Op : IP.operands()) {
1174       if (!Op.getType().isVariadicPack())
1175         continue;
1176 
1177       HasVariadic = true;
1178 
1179       if (IsMatch && &Op != &IP.operands_back()) {
1180         PrintError("'" + IP.getInstName() +
1181                    "': " + PatternType::VariadicClassName +
1182                    " can only be used on the last operand");
1183         return false;
1184       }
1185 
1186       if (Op.isDef()) {
1187         PrintError("'" + IP.getInstName() + "': " +
1188                    PatternType::VariadicClassName + " cannot be used on defs");
1189         return false;
1190       }
1191     }
1192 
1193     if (HasVariadic && !IP.isVariadic()) {
1194       PrintError("cannot use a " + PatternType::VariadicClassName +
1195                  " operand on non-variadic instruction '" + IP.getInstName() +
1196                  "'");
1197       return false;
1198     }
1199 
1200     return true;
1201   };
1202 
1203   bool UsesWipMatchOpcode = false;
1204   for (const auto &Match : MatchPats) {
1205     const auto *Pat = Match.second.get();
1206 
1207     if (const auto *CXXPat = dyn_cast<CXXPattern>(Pat)) {
1208       if (!CXXPat->getRawCode().contains("return "))
1209         PrintWarning("'match' C++ code does not seem to return!");
1210       continue;
1211     }
1212 
1213     if (const auto IP = dyn_cast<InstructionPattern>(Pat)) {
1214       if (!CheckVariadicOperands(*IP, /*IsMatch=*/true))
1215         return false;
1216 
1217       // MIFlags in match cannot use the following syntax: (MIFlags $mi)
1218       if (const auto *CGP = dyn_cast<CodeGenInstructionPattern>(Pat)) {
1219         if (auto *FI = CGP->getMIFlagsInfo()) {
1220           if (!FI->copy_flags().empty()) {
1221             PrintError("'match' patterns cannot refer to flags from other "
1222                        "instructions");
1223             PrintNote("MIFlags in '" + CGP->getName() +
1224                       "' refer to: " + join(FI->copy_flags(), ", "));
1225             return false;
1226           }
1227         }
1228       }
1229       continue;
1230     }
1231 
1232     const auto *AOP = dyn_cast<AnyOpcodePattern>(Pat);
1233     if (!AOP)
1234       continue;
1235 
1236     if (UsesWipMatchOpcode) {
1237       PrintError("wip_opcode_match can only be present once");
1238       return false;
1239     }
1240 
1241     UsesWipMatchOpcode = true;
1242   }
1243 
1244   std::optional<bool> IsUsingCXXPatterns;
1245   for (const auto &Apply : ApplyPats) {
1246     Pattern *Pat = Apply.second.get();
1247     if (IsUsingCXXPatterns) {
1248       if (*IsUsingCXXPatterns != isa<CXXPattern>(Pat)) {
1249         PrintError("'apply' patterns cannot mix C++ code with other types of "
1250                    "patterns");
1251         return false;
1252       }
1253     } else
1254       IsUsingCXXPatterns = isa<CXXPattern>(Pat);
1255 
1256     assert(Pat);
1257     const auto *IP = dyn_cast<InstructionPattern>(Pat);
1258     if (!IP)
1259       continue;
1260 
1261     if (!CheckVariadicOperands(*IP, /*IsMatch=*/false))
1262       return false;
1263 
1264     if (UsesWipMatchOpcode) {
1265       PrintError("cannot use wip_match_opcode in combination with apply "
1266                  "instruction patterns!");
1267       return false;
1268     }
1269 
1270     // Check that the insts mentioned in copy_flags exist.
1271     if (const auto *CGP = dyn_cast<CodeGenInstructionPattern>(IP)) {
1272       if (auto *FI = CGP->getMIFlagsInfo()) {
1273         for (auto InstName : FI->copy_flags()) {
1274           auto It = MatchPats.find(InstName);
1275           if (It == MatchPats.end()) {
1276             PrintError("unknown instruction '$" + InstName +
1277                        "' referenced in MIFlags of '" + CGP->getName() + "'");
1278             return false;
1279           }
1280 
1281           if (!isa<CodeGenInstructionPattern>(It->second.get())) {
1282             PrintError(
1283                 "'$" + InstName +
1284                 "' does not refer to a CodeGenInstruction in MIFlags of '" +
1285                 CGP->getName() + "'");
1286             return false;
1287           }
1288         }
1289       }
1290     }
1291 
1292     const auto *BIP = dyn_cast<BuiltinPattern>(IP);
1293     if (!BIP)
1294       continue;
1295     StringRef Name = BIP->getInstName();
1296 
1297     // (GIEraseInst) has to be the only apply pattern, or it can not be used at
1298     // all. The root cannot have any defs either.
1299     switch (BIP->getBuiltinKind()) {
1300     case BI_EraseRoot: {
1301       if (ApplyPats.size() > 1) {
1302         PrintError(Name + " must be the only 'apply' pattern");
1303         return false;
1304       }
1305 
1306       const auto *IRoot = dyn_cast<CodeGenInstructionPattern>(MatchRoot);
1307       if (!IRoot) {
1308         PrintError(Name + " can only be used if the root is a "
1309                           "CodeGenInstruction or Intrinsic");
1310         return false;
1311       }
1312 
1313       if (IRoot->getNumInstDefs() != 0) {
1314         PrintError(Name + " can only be used if on roots that do "
1315                           "not have any output operand");
1316         PrintNote("'" + IRoot->getInstName() + "' has " +
1317                   Twine(IRoot->getNumInstDefs()) + " output operands");
1318         return false;
1319       }
1320       break;
1321     }
1322     case BI_ReplaceReg: {
1323       // (GIReplaceReg can only be used on the root instruction)
1324       // TODO: When we allow rewriting non-root instructions, also allow this.
1325       StringRef OldRegName = BIP->getOperand(0).getOperandName();
1326       auto *Def = MatchOpTable.getDef(OldRegName);
1327       if (!Def) {
1328         PrintError(Name + " cannot find a matched pattern that defines '" +
1329                    OldRegName + "'");
1330         return false;
1331       }
1332       if (MatchOpTable.getDef(OldRegName) != MatchRoot) {
1333         PrintError(Name + " cannot replace '" + OldRegName +
1334                    "': this builtin can only replace a register defined by the "
1335                    "match root");
1336         return false;
1337       }
1338       break;
1339     }
1340     }
1341   }
1342 
1343   if (!hasOnlyCXXApplyPatterns() && !MatchDatas.empty()) {
1344     PrintError(MatchDataClassName +
1345                " can only be used if 'apply' in entirely written in C++");
1346     return false;
1347   }
1348 
1349   return true;
1350 }
1351 
1352 RuleMatcher &CombineRuleBuilder::addRuleMatcher(const PatternAlternatives &Alts,
1353                                                 Twine AdditionalComment) {
1354   auto &RM = OutRMs.emplace_back(RuleDef.getLoc());
1355   addFeaturePredicates(RM);
1356   RM.setPermanentGISelFlags(GISF_IgnoreCopies);
1357   RM.addRequiredSimplePredicate(getIsEnabledPredicateEnumName(RuleID));
1358 
1359   std::string Comment;
1360   raw_string_ostream CommentOS(Comment);
1361   CommentOS << "Combiner Rule #" << RuleID << ": " << RuleDef.getName();
1362   if (!Alts.empty()) {
1363     CommentOS << " @ ";
1364     print(CommentOS, Alts);
1365   }
1366   if (!AdditionalComment.isTriviallyEmpty())
1367     CommentOS << "; " << AdditionalComment;
1368   RM.addAction<DebugCommentAction>(Comment);
1369   return RM;
1370 }
1371 
1372 bool CombineRuleBuilder::addFeaturePredicates(RuleMatcher &M) {
1373   if (!RuleDef.getValue("Predicates"))
1374     return true;
1375 
1376   const ListInit *Preds = RuleDef.getValueAsListInit("Predicates");
1377   for (const Init *PI : Preds->getValues()) {
1378     const DefInit *Pred = dyn_cast<DefInit>(PI);
1379     if (!Pred)
1380       continue;
1381 
1382     const Record *Def = Pred->getDef();
1383     if (!Def->isSubClassOf("Predicate")) {
1384       ::PrintError(Def, "Unknown 'Predicate' Type");
1385       return false;
1386     }
1387 
1388     if (Def->getValueAsString("CondString").empty())
1389       continue;
1390 
1391     if (SubtargetFeatures.count(Def) == 0) {
1392       SubtargetFeatures.emplace(
1393           Def, SubtargetFeatureInfo(Def, SubtargetFeatures.size()));
1394     }
1395 
1396     M.addRequiredFeature(Def);
1397   }
1398 
1399   return true;
1400 }
1401 
1402 bool CombineRuleBuilder::findRoots() {
1403   const auto Finish = [&]() {
1404     assert(MatchRoot);
1405 
1406     if (hasOnlyCXXApplyPatterns() || hasEraseRoot())
1407       return true;
1408 
1409     auto *IPRoot = dyn_cast<InstructionPattern>(MatchRoot);
1410     if (!IPRoot)
1411       return true;
1412 
1413     if (IPRoot->getNumInstDefs() == 0) {
1414       // No defs to work with -> find the root using the pattern name.
1415       auto It = ApplyPats.find(RootName);
1416       if (It == ApplyPats.end()) {
1417         PrintError("Cannot find root '" + RootName + "' in apply patterns!");
1418         return false;
1419       }
1420 
1421       auto *ApplyRoot = dyn_cast<InstructionPattern>(It->second.get());
1422       if (!ApplyRoot) {
1423         PrintError("apply pattern root '" + RootName +
1424                    "' must be an instruction pattern");
1425         return false;
1426       }
1427 
1428       ApplyRoots.insert(ApplyRoot);
1429       return true;
1430     }
1431 
1432     // Collect all redefinitions of the MatchRoot's defs and put them in
1433     // ApplyRoots.
1434     const auto DefsNeeded = IPRoot->getApplyDefsNeeded();
1435     for (auto &Op : DefsNeeded) {
1436       assert(Op.isDef() && Op.isNamedOperand());
1437       StringRef Name = Op.getOperandName();
1438 
1439       auto *ApplyRedef = ApplyOpTable.getDef(Name);
1440       if (!ApplyRedef) {
1441         PrintError("'" + Name + "' must be redefined in the 'apply' pattern");
1442         return false;
1443       }
1444 
1445       ApplyRoots.insert((InstructionPattern *)ApplyRedef);
1446     }
1447 
1448     if (auto It = ApplyPats.find(RootName); It != ApplyPats.end()) {
1449       if (find(ApplyRoots, It->second.get()) == ApplyRoots.end()) {
1450         PrintError("apply pattern '" + RootName +
1451                    "' is supposed to be a root but it does not redefine any of "
1452                    "the defs of the match root");
1453         return false;
1454       }
1455     }
1456 
1457     return true;
1458   };
1459 
1460   // Look by pattern name, e.g.
1461   //    (G_FNEG $x, $y):$root
1462   if (auto MatchPatIt = MatchPats.find(RootName);
1463       MatchPatIt != MatchPats.end()) {
1464     MatchRoot = MatchPatIt->second.get();
1465     return Finish();
1466   }
1467 
1468   // Look by def:
1469   //    (G_FNEG $root, $y)
1470   auto LookupRes = MatchOpTable.lookup(RootName);
1471   if (!LookupRes.Found) {
1472     PrintError("Cannot find root '" + RootName + "' in match patterns!");
1473     return false;
1474   }
1475 
1476   MatchRoot = LookupRes.Def;
1477   if (!MatchRoot) {
1478     PrintError("Cannot use live-in operand '" + RootName +
1479                "' as match pattern root!");
1480     return false;
1481   }
1482 
1483   return Finish();
1484 }
1485 
1486 bool CombineRuleBuilder::buildRuleOperandsTable() {
1487   const auto DiagnoseRedefMatch = [&](StringRef OpName) {
1488     PrintError("Operand '" + OpName +
1489                "' is defined multiple times in the 'match' patterns");
1490   };
1491 
1492   const auto DiagnoseRedefApply = [&](StringRef OpName) {
1493     PrintError("Operand '" + OpName +
1494                "' is defined multiple times in the 'apply' patterns");
1495   };
1496 
1497   for (auto &Pat : values(MatchPats)) {
1498     auto *IP = dyn_cast<InstructionPattern>(Pat.get());
1499     if (IP && !MatchOpTable.addPattern(IP, DiagnoseRedefMatch))
1500       return false;
1501   }
1502 
1503   for (auto &Pat : values(ApplyPats)) {
1504     auto *IP = dyn_cast<InstructionPattern>(Pat.get());
1505     if (IP && !ApplyOpTable.addPattern(IP, DiagnoseRedefApply))
1506       return false;
1507   }
1508 
1509   return true;
1510 }
1511 
1512 bool CombineRuleBuilder::parseDefs(const DagInit &Def) {
1513   if (Def.getOperatorAsDef(RuleDef.getLoc())->getName() != "defs") {
1514     PrintError("Expected defs operator");
1515     return false;
1516   }
1517 
1518   SmallVector<StringRef> Roots;
1519   for (unsigned I = 0, E = Def.getNumArgs(); I < E; ++I) {
1520     if (isSpecificDef(*Def.getArg(I), "root")) {
1521       Roots.emplace_back(Def.getArgNameStr(I));
1522       continue;
1523     }
1524 
1525     // Subclasses of GIDefMatchData should declare that this rule needs to pass
1526     // data from the match stage to the apply stage, and ensure that the
1527     // generated matcher has a suitable variable for it to do so.
1528     if (const Record *MatchDataRec =
1529             getDefOfSubClass(*Def.getArg(I), MatchDataClassName)) {
1530       MatchDatas.emplace_back(Def.getArgNameStr(I),
1531                               MatchDataRec->getValueAsString("Type"));
1532       continue;
1533     }
1534 
1535     // Otherwise emit an appropriate error message.
1536     if (getDefOfSubClass(*Def.getArg(I), "GIDefKind"))
1537       PrintError("This GIDefKind not implemented in tablegen");
1538     else if (getDefOfSubClass(*Def.getArg(I), "GIDefKindWithArgs"))
1539       PrintError("This GIDefKindWithArgs not implemented in tablegen");
1540     else
1541       PrintError("Expected a subclass of GIDefKind or a sub-dag whose "
1542                  "operator is of type GIDefKindWithArgs");
1543     return false;
1544   }
1545 
1546   if (Roots.size() != 1) {
1547     PrintError("Combine rules must have exactly one root");
1548     return false;
1549   }
1550 
1551   RootName = Roots.front();
1552   return true;
1553 }
1554 
1555 bool CombineRuleBuilder::emitMatchPattern(CodeExpansions &CE,
1556                                           const PatternAlternatives &Alts,
1557                                           const InstructionPattern &IP) {
1558   auto StackTrace = PrettyStackTraceEmit(RuleDef, &IP);
1559 
1560   auto &M = addRuleMatcher(Alts);
1561   InstructionMatcher &IM = M.addInstructionMatcher(IP.getName());
1562   declareInstExpansion(CE, IM, IP.getName());
1563 
1564   DenseSet<const Pattern *> SeenPats;
1565 
1566   const auto FindOperandDef = [&](StringRef Op) -> InstructionPattern * {
1567     return MatchOpTable.getDef(Op);
1568   };
1569 
1570   if (const auto *CGP = dyn_cast<CodeGenInstructionPattern>(&IP)) {
1571     if (!emitCodeGenInstructionMatchPattern(CE, Alts, M, IM, *CGP, SeenPats,
1572                                             FindOperandDef))
1573       return false;
1574   } else if (const auto *PFP = dyn_cast<PatFragPattern>(&IP)) {
1575     if (!PFP->getPatFrag().canBeMatchRoot()) {
1576       PrintError("cannot use '" + PFP->getInstName() + " as match root");
1577       return false;
1578     }
1579 
1580     if (!emitPatFragMatchPattern(CE, Alts, M, &IM, *PFP, SeenPats))
1581       return false;
1582   } else if (isa<BuiltinPattern>(&IP)) {
1583     llvm_unreachable("No match builtins known!");
1584   } else
1585     llvm_unreachable("Unknown kind of InstructionPattern!");
1586 
1587   // Emit remaining patterns
1588   const bool IsUsingCustomCXXAction = hasOnlyCXXApplyPatterns();
1589   SmallVector<CXXPattern *, 2> CXXMatchers;
1590   for (auto &Pat : values(MatchPats)) {
1591     if (SeenPats.contains(Pat.get()))
1592       continue;
1593 
1594     switch (Pat->getKind()) {
1595     case Pattern::K_AnyOpcode:
1596       PrintError("wip_match_opcode can not be used with instruction patterns!");
1597       return false;
1598     case Pattern::K_PatFrag: {
1599       if (!emitPatFragMatchPattern(CE, Alts, M, /*IM*/ nullptr,
1600                                    *cast<PatFragPattern>(Pat.get()), SeenPats))
1601         return false;
1602       continue;
1603     }
1604     case Pattern::K_Builtin:
1605       PrintError("No known match builtins");
1606       return false;
1607     case Pattern::K_CodeGenInstruction:
1608       cast<InstructionPattern>(Pat.get())->reportUnreachable(RuleDef.getLoc());
1609       return false;
1610     case Pattern::K_CXX: {
1611       // Delay emission for top-level C++ matchers (which can use MatchDatas).
1612       if (IsUsingCustomCXXAction)
1613         CXXMatchers.push_back(cast<CXXPattern>(Pat.get()));
1614       else
1615         addCXXPredicate(M, CE, *cast<CXXPattern>(Pat.get()), Alts);
1616       continue;
1617     }
1618     default:
1619       llvm_unreachable("unknown pattern kind!");
1620     }
1621   }
1622 
1623   return IsUsingCustomCXXAction ? emitCXXMatchApply(CE, M, CXXMatchers)
1624                                 : emitApplyPatterns(CE, M);
1625 }
1626 
1627 bool CombineRuleBuilder::emitMatchPattern(CodeExpansions &CE,
1628                                           const PatternAlternatives &Alts,
1629                                           const AnyOpcodePattern &AOP) {
1630   auto StackTrace = PrettyStackTraceEmit(RuleDef, &AOP);
1631 
1632   const bool IsUsingCustomCXXAction = hasOnlyCXXApplyPatterns();
1633   for (const CodeGenInstruction *CGI : AOP.insts()) {
1634     auto &M = addRuleMatcher(Alts, "wip_match_opcode '" +
1635                                        CGI->TheDef->getName() + "'");
1636 
1637     InstructionMatcher &IM = M.addInstructionMatcher(AOP.getName());
1638     declareInstExpansion(CE, IM, AOP.getName());
1639     // declareInstExpansion needs to be identical, otherwise we need to create a
1640     // CodeExpansions object here instead.
1641     assert(IM.getInsnVarID() == 0);
1642 
1643     IM.addPredicate<InstructionOpcodeMatcher>(CGI);
1644 
1645     // Emit remaining patterns.
1646     SmallVector<CXXPattern *, 2> CXXMatchers;
1647     for (auto &Pat : values(MatchPats)) {
1648       if (Pat.get() == &AOP)
1649         continue;
1650 
1651       switch (Pat->getKind()) {
1652       case Pattern::K_AnyOpcode:
1653         PrintError("wip_match_opcode can only be present once!");
1654         return false;
1655       case Pattern::K_PatFrag: {
1656         DenseSet<const Pattern *> SeenPats;
1657         if (!emitPatFragMatchPattern(CE, Alts, M, /*IM*/ nullptr,
1658                                      *cast<PatFragPattern>(Pat.get()),
1659                                      SeenPats))
1660           return false;
1661         continue;
1662       }
1663       case Pattern::K_Builtin:
1664         PrintError("No known match builtins");
1665         return false;
1666       case Pattern::K_CodeGenInstruction:
1667         cast<InstructionPattern>(Pat.get())->reportUnreachable(
1668             RuleDef.getLoc());
1669         return false;
1670       case Pattern::K_CXX: {
1671         // Delay emission for top-level C++ matchers (which can use MatchDatas).
1672         if (IsUsingCustomCXXAction)
1673           CXXMatchers.push_back(cast<CXXPattern>(Pat.get()));
1674         else
1675           addCXXPredicate(M, CE, *cast<CXXPattern>(Pat.get()), Alts);
1676         break;
1677       }
1678       default:
1679         llvm_unreachable("unknown pattern kind!");
1680       }
1681     }
1682 
1683     const bool Res = IsUsingCustomCXXAction
1684                          ? emitCXXMatchApply(CE, M, CXXMatchers)
1685                          : emitApplyPatterns(CE, M);
1686     if (!Res)
1687       return false;
1688   }
1689 
1690   return true;
1691 }
1692 
1693 bool CombineRuleBuilder::emitPatFragMatchPattern(
1694     CodeExpansions &CE, const PatternAlternatives &Alts, RuleMatcher &RM,
1695     InstructionMatcher *IM, const PatFragPattern &PFP,
1696     DenseSet<const Pattern *> &SeenPats) {
1697   auto StackTrace = PrettyStackTraceEmit(RuleDef, &PFP);
1698 
1699   if (!SeenPats.insert(&PFP).second)
1700     return true;
1701 
1702   const auto &PF = PFP.getPatFrag();
1703 
1704   if (!IM) {
1705     // When we don't have an IM, this means this PatFrag isn't reachable from
1706     // the root. This is only acceptable if it doesn't define anything (e.g. a
1707     // pure C++ PatFrag).
1708     if (PF.num_out_params() != 0) {
1709       PFP.reportUnreachable(RuleDef.getLoc());
1710       return false;
1711     }
1712   } else {
1713     // When an IM is provided, this is reachable from the root, and we're
1714     // expecting to have output operands.
1715     // TODO: If we want to allow for multiple roots we'll need a map of IMs
1716     // then, and emission becomes a bit more complicated.
1717     assert(PF.num_roots() == 1);
1718   }
1719 
1720   CodeExpansions PatFragCEs;
1721   if (!PFP.mapInputCodeExpansions(CE, PatFragCEs, RuleDef.getLoc()))
1722     return false;
1723 
1724   // List of {ParamName, ArgName}.
1725   // When all patterns have been emitted, find expansions in PatFragCEs named
1726   // ArgName and add their expansion to CE using ParamName as the key.
1727   SmallVector<std::pair<std::string, std::string>, 4> CEsToImport;
1728 
1729   // Map parameter names to the actual argument.
1730   const auto OperandMapper =
1731       [&](const InstructionOperand &O) -> InstructionOperand {
1732     if (!O.isNamedOperand())
1733       return O;
1734 
1735     StringRef ParamName = O.getOperandName();
1736 
1737     // Not sure what to do with those tbh. They should probably never be here.
1738     assert(!O.isNamedImmediate() && "TODO: handle named imms");
1739     unsigned PIdx = PF.getParamIdx(ParamName);
1740 
1741     // Map parameters to the argument values.
1742     if (PIdx == (unsigned)-1) {
1743       // This is a temp of the PatFragPattern, prefix the name to avoid
1744       // conflicts.
1745       return O.withNewName(
1746           insertStrRef((PFP.getName() + "." + ParamName).str()));
1747     }
1748 
1749     // The operand will be added to PatFragCEs's code expansions using the
1750     // parameter's name. If it's bound to some operand during emission of the
1751     // patterns, we'll want to add it to CE.
1752     auto ArgOp = PFP.getOperand(PIdx);
1753     if (ArgOp.isNamedOperand())
1754       CEsToImport.emplace_back(ArgOp.getOperandName().str(), ParamName);
1755 
1756     if (ArgOp.getType() && O.getType() && ArgOp.getType() != O.getType()) {
1757       StringRef PFName = PF.getName();
1758       PrintWarning("impossible type constraints: operand " + Twine(PIdx) +
1759                    " of '" + PFP.getName() + "' has type '" +
1760                    ArgOp.getType().str() + "', but '" + PFName +
1761                    "' constrains it to '" + O.getType().str() + "'");
1762       if (ArgOp.isNamedOperand())
1763         PrintNote("operand " + Twine(PIdx) + " of '" + PFP.getName() +
1764                   "' is '" + ArgOp.getOperandName() + "'");
1765       if (O.isNamedOperand())
1766         PrintNote("argument " + Twine(PIdx) + " of '" + PFName + "' is '" +
1767                   ParamName + "'");
1768     }
1769 
1770     return ArgOp;
1771   };
1772 
1773   // PatFragPatterns are only made of InstructionPatterns or CXXPatterns.
1774   // Emit instructions from the root.
1775   const auto &FragAlt = PF.getAlternative(Alts.lookup(&PFP));
1776   const auto &FragAltOT = FragAlt.OpTable;
1777   const auto LookupOperandDef =
1778       [&](StringRef Op) -> const InstructionPattern * {
1779     return FragAltOT.getDef(Op);
1780   };
1781 
1782   DenseSet<const Pattern *> PatFragSeenPats;
1783   for (const auto &[Idx, InOp] : enumerate(PF.out_params())) {
1784     if (InOp.Kind != PatFrag::PK_Root)
1785       continue;
1786 
1787     StringRef ParamName = InOp.Name;
1788     const auto *Def = FragAltOT.getDef(ParamName);
1789     assert(Def && "PatFrag::checkSemantics should have emitted an error if "
1790                   "an out operand isn't defined!");
1791     assert(isa<CodeGenInstructionPattern>(Def) &&
1792            "Nested PatFrags not supported yet");
1793 
1794     if (!emitCodeGenInstructionMatchPattern(
1795             PatFragCEs, Alts, RM, *IM, *cast<CodeGenInstructionPattern>(Def),
1796             PatFragSeenPats, LookupOperandDef, OperandMapper))
1797       return false;
1798   }
1799 
1800   // Emit leftovers.
1801   for (const auto &Pat : FragAlt.Pats) {
1802     if (PatFragSeenPats.contains(Pat.get()))
1803       continue;
1804 
1805     if (const auto *CXXPat = dyn_cast<CXXPattern>(Pat.get())) {
1806       addCXXPredicate(RM, PatFragCEs, *CXXPat, Alts);
1807       continue;
1808     }
1809 
1810     if (const auto *IP = dyn_cast<InstructionPattern>(Pat.get())) {
1811       IP->reportUnreachable(PF.getLoc());
1812       return false;
1813     }
1814 
1815     llvm_unreachable("Unexpected pattern kind in PatFrag");
1816   }
1817 
1818   for (const auto &[ParamName, ArgName] : CEsToImport) {
1819     // Note: we're find if ParamName already exists. It just means it's been
1820     // bound before, so we prefer to keep the first binding.
1821     CE.declare(ParamName, PatFragCEs.lookup(ArgName));
1822   }
1823 
1824   return true;
1825 }
1826 
1827 bool CombineRuleBuilder::emitApplyPatterns(CodeExpansions &CE, RuleMatcher &M) {
1828   assert(MatchDatas.empty());
1829 
1830   DenseSet<const Pattern *> SeenPats;
1831   StringMap<unsigned> OperandToTempRegID;
1832 
1833   for (auto *ApplyRoot : ApplyRoots) {
1834     assert(isa<InstructionPattern>(ApplyRoot) &&
1835            "Root can only be a InstructionPattern!");
1836     if (!emitInstructionApplyPattern(CE, M,
1837                                      cast<InstructionPattern>(*ApplyRoot),
1838                                      SeenPats, OperandToTempRegID))
1839       return false;
1840   }
1841 
1842   for (auto &Pat : values(ApplyPats)) {
1843     if (SeenPats.contains(Pat.get()))
1844       continue;
1845 
1846     switch (Pat->getKind()) {
1847     case Pattern::K_AnyOpcode:
1848       llvm_unreachable("Unexpected pattern in apply!");
1849     case Pattern::K_PatFrag:
1850       // TODO: We could support pure C++ PatFrags as a temporary thing.
1851       llvm_unreachable("Unexpected pattern in apply!");
1852     case Pattern::K_Builtin:
1853       if (!emitInstructionApplyPattern(CE, M, cast<BuiltinPattern>(*Pat),
1854                                        SeenPats, OperandToTempRegID))
1855         return false;
1856       break;
1857     case Pattern::K_CodeGenInstruction:
1858       cast<CodeGenInstructionPattern>(*Pat).reportUnreachable(RuleDef.getLoc());
1859       return false;
1860     case Pattern::K_CXX: {
1861       llvm_unreachable(
1862           "CXX Pattern Emission should have been handled earlier!");
1863     }
1864     default:
1865       llvm_unreachable("unknown pattern kind!");
1866     }
1867   }
1868 
1869   // Erase the root.
1870   unsigned RootInsnID =
1871       M.getInsnVarID(M.getInstructionMatcher(MatchRoot->getName()));
1872   M.addAction<EraseInstAction>(RootInsnID);
1873 
1874   return true;
1875 }
1876 
1877 bool CombineRuleBuilder::emitCXXMatchApply(CodeExpansions &CE, RuleMatcher &M,
1878                                            ArrayRef<CXXPattern *> Matchers) {
1879   assert(hasOnlyCXXApplyPatterns());
1880   declareAllMatchDatasExpansions(CE);
1881 
1882   std::string CodeStr;
1883   raw_string_ostream OS(CodeStr);
1884 
1885   for (auto &MD : MatchDatas)
1886     OS << MD.Type << " " << MD.getVarName() << ";\n";
1887 
1888   if (!Matchers.empty()) {
1889     OS << "// Match Patterns\n";
1890     for (auto *M : Matchers) {
1891       OS << "if(![&](){";
1892       CodeExpander Expander(M->getRawCode(), CE, RuleDef.getLoc(),
1893                             /*ShowExpansions=*/false);
1894       Expander.emit(OS);
1895       OS << "}()) {\n"
1896          << "  return false;\n}\n";
1897     }
1898   }
1899 
1900   OS << "// Apply Patterns\n";
1901   ListSeparator LS("\n");
1902   for (auto &Pat : ApplyPats) {
1903     auto *CXXPat = cast<CXXPattern>(Pat.second.get());
1904     CodeExpander Expander(CXXPat->getRawCode(), CE, RuleDef.getLoc(),
1905                           /*ShowExpansions=*/false);
1906     OS << LS;
1907     Expander.emit(OS);
1908   }
1909 
1910   const auto &Code = CXXPredicateCode::getCustomActionCode(CodeStr);
1911   M.setCustomCXXAction(Code.getEnumNameWithPrefix(CXXCustomActionPrefix));
1912   return true;
1913 }
1914 
1915 bool CombineRuleBuilder::emitInstructionApplyPattern(
1916     CodeExpansions &CE, RuleMatcher &M, const InstructionPattern &P,
1917     DenseSet<const Pattern *> &SeenPats,
1918     StringMap<unsigned> &OperandToTempRegID) {
1919   auto StackTrace = PrettyStackTraceEmit(RuleDef, &P);
1920 
1921   if (!SeenPats.insert(&P).second)
1922     return true;
1923 
1924   // First, render the uses.
1925   for (auto &Op : P.named_operands()) {
1926     if (Op.isDef())
1927       continue;
1928 
1929     StringRef OpName = Op.getOperandName();
1930     if (const auto *DefPat = ApplyOpTable.getDef(OpName)) {
1931       if (!emitInstructionApplyPattern(CE, M, *DefPat, SeenPats,
1932                                        OperandToTempRegID))
1933         return false;
1934     } else {
1935       // If we have no def, check this exists in the MatchRoot.
1936       if (!Op.isNamedImmediate() && !MatchOpTable.lookup(OpName).Found) {
1937         PrintError("invalid output operand '" + OpName +
1938                    "': operand is not a live-in of the match pattern, and it "
1939                    "has no definition");
1940         return false;
1941       }
1942     }
1943   }
1944 
1945   if (const auto *BP = dyn_cast<BuiltinPattern>(&P))
1946     return emitBuiltinApplyPattern(CE, M, *BP, OperandToTempRegID);
1947 
1948   if (isa<PatFragPattern>(&P))
1949     llvm_unreachable("PatFragPatterns is not supported in 'apply'!");
1950 
1951   auto &CGIP = cast<CodeGenInstructionPattern>(P);
1952 
1953   // Now render this inst.
1954   auto &DstMI =
1955       M.addAction<BuildMIAction>(M.allocateOutputInsnID(), &CGIP.getInst());
1956 
1957   bool HasEmittedIntrinsicID = false;
1958   const auto EmitIntrinsicID = [&]() {
1959     assert(CGIP.isIntrinsic());
1960     DstMI.addRenderer<IntrinsicIDRenderer>(CGIP.getIntrinsic());
1961     HasEmittedIntrinsicID = true;
1962   };
1963 
1964   for (auto &Op : P.operands()) {
1965     // Emit the intrinsic ID after the last def.
1966     if (CGIP.isIntrinsic() && !Op.isDef() && !HasEmittedIntrinsicID)
1967       EmitIntrinsicID();
1968 
1969     if (Op.isNamedImmediate()) {
1970       PrintError("invalid output operand '" + Op.getOperandName() +
1971                  "': output immediates cannot be named");
1972       PrintNote("while emitting pattern '" + P.getName() + "' (" +
1973                 P.getInstName() + ")");
1974       return false;
1975     }
1976 
1977     if (Op.hasImmValue()) {
1978       if (!emitCodeGenInstructionApplyImmOperand(M, DstMI, CGIP, Op))
1979         return false;
1980       continue;
1981     }
1982 
1983     StringRef OpName = Op.getOperandName();
1984 
1985     // Uses of operand.
1986     if (!Op.isDef()) {
1987       if (auto It = OperandToTempRegID.find(OpName);
1988           It != OperandToTempRegID.end()) {
1989         assert(!MatchOpTable.lookup(OpName).Found &&
1990                "Temp reg is also from match pattern?");
1991         DstMI.addRenderer<TempRegRenderer>(It->second);
1992       } else {
1993         // This should be a match live in or a redef of a matched instr.
1994         // If it's a use of a temporary register, then we messed up somewhere -
1995         // the previous condition should have passed.
1996         assert(MatchOpTable.lookup(OpName).Found &&
1997                !ApplyOpTable.getDef(OpName) && "Temp reg not emitted yet!");
1998         DstMI.addRenderer<CopyRenderer>(OpName);
1999       }
2000       continue;
2001     }
2002 
2003     // Determine what we're dealing with. Are we replacing a matched
2004     // instruction? Creating a new one?
2005     auto OpLookupRes = MatchOpTable.lookup(OpName);
2006     if (OpLookupRes.Found) {
2007       if (OpLookupRes.isLiveIn()) {
2008         // live-in of the match pattern.
2009         PrintError("Cannot define live-in operand '" + OpName +
2010                    "' in the 'apply' pattern");
2011         return false;
2012       }
2013       assert(OpLookupRes.Def);
2014 
2015       // TODO: Handle this. We need to mutate the instr, or delete the old
2016       // one.
2017       //       Likewise, we also need to ensure we redef everything, if the
2018       //       instr has more than one def, we need to redef all or nothing.
2019       if (OpLookupRes.Def != MatchRoot) {
2020         PrintError("redefining an instruction other than the root is not "
2021                    "supported (operand '" +
2022                    OpName + "')");
2023         return false;
2024       }
2025       // redef of a match
2026       DstMI.addRenderer<CopyRenderer>(OpName);
2027       continue;
2028     }
2029 
2030     // Define a new register unique to the apply patterns (AKA a "temp"
2031     // register).
2032     unsigned TempRegID;
2033     if (auto It = OperandToTempRegID.find(OpName);
2034         It != OperandToTempRegID.end()) {
2035       TempRegID = It->second;
2036     } else {
2037       // This is a brand new register.
2038       TempRegID = M.allocateTempRegID();
2039       OperandToTempRegID[OpName] = TempRegID;
2040       const auto Ty = Op.getType();
2041       if (!Ty) {
2042         PrintError("def of a new register '" + OpName +
2043                    "' in the apply patterns must have a type");
2044         return false;
2045       }
2046 
2047       declareTempRegExpansion(CE, TempRegID, OpName);
2048       // Always insert the action at the beginning, otherwise we may end up
2049       // using the temp reg before it's available.
2050       auto Result = getLLTCodeGenOrTempType(Ty, M);
2051       if (!Result)
2052         return false;
2053       M.insertAction<MakeTempRegisterAction>(M.actions_begin(), *Result,
2054                                              TempRegID);
2055     }
2056 
2057     DstMI.addRenderer<TempRegRenderer>(TempRegID, /*IsDef=*/true);
2058   }
2059 
2060   // Some intrinsics have no in operands, ensure the ID is still emitted in such
2061   // cases.
2062   if (CGIP.isIntrinsic() && !HasEmittedIntrinsicID)
2063     EmitIntrinsicID();
2064 
2065   // Render MIFlags
2066   if (const auto *FI = CGIP.getMIFlagsInfo()) {
2067     for (StringRef InstName : FI->copy_flags())
2068       DstMI.addCopiedMIFlags(M.getInstructionMatcher(InstName));
2069     for (StringRef F : FI->set_flags())
2070       DstMI.addSetMIFlags(F);
2071     for (StringRef F : FI->unset_flags())
2072       DstMI.addUnsetMIFlags(F);
2073   }
2074 
2075   // Don't allow mutating opcodes for GISel combiners. We want a more precise
2076   // handling of MIFlags so we require them to be explicitly preserved.
2077   //
2078   // TODO: We don't mutate very often, if at all in combiners, but it'd be nice
2079   // to re-enable this. We'd then need to always clear MIFlags when mutating
2080   // opcodes, and never mutate an inst that we copy flags from.
2081   // DstMI.chooseInsnToMutate(M);
2082   declareInstExpansion(CE, DstMI, P.getName());
2083 
2084   return true;
2085 }
2086 
2087 bool CombineRuleBuilder::emitCodeGenInstructionApplyImmOperand(
2088     RuleMatcher &M, BuildMIAction &DstMI, const CodeGenInstructionPattern &P,
2089     const InstructionOperand &O) {
2090   // If we have a type, we implicitly emit a G_CONSTANT, except for G_CONSTANT
2091   // itself where we emit a CImm.
2092   //
2093   // No type means we emit a simple imm.
2094   // G_CONSTANT is a special case and needs a CImm though so this is likely a
2095   // mistake.
2096   const bool isGConstant = P.is("G_CONSTANT");
2097   const auto Ty = O.getType();
2098   if (!Ty) {
2099     if (isGConstant) {
2100       PrintError("'G_CONSTANT' immediate must be typed!");
2101       PrintNote("while emitting pattern '" + P.getName() + "' (" +
2102                 P.getInstName() + ")");
2103       return false;
2104     }
2105 
2106     DstMI.addRenderer<ImmRenderer>(O.getImmValue());
2107     return true;
2108   }
2109 
2110   auto ImmTy = getLLTCodeGenOrTempType(Ty, M);
2111   if (!ImmTy)
2112     return false;
2113 
2114   if (isGConstant) {
2115     DstMI.addRenderer<ImmRenderer>(O.getImmValue(), *ImmTy);
2116     return true;
2117   }
2118 
2119   unsigned TempRegID = M.allocateTempRegID();
2120   // Ensure MakeTempReg & the BuildConstantAction occur at the beginning.
2121   auto InsertIt = M.insertAction<MakeTempRegisterAction>(M.actions_begin(),
2122                                                          *ImmTy, TempRegID);
2123   M.insertAction<BuildConstantAction>(++InsertIt, TempRegID, O.getImmValue());
2124   DstMI.addRenderer<TempRegRenderer>(TempRegID);
2125   return true;
2126 }
2127 
2128 bool CombineRuleBuilder::emitBuiltinApplyPattern(
2129     CodeExpansions &CE, RuleMatcher &M, const BuiltinPattern &P,
2130     StringMap<unsigned> &OperandToTempRegID) {
2131   const auto Error = [&](Twine Reason) {
2132     PrintError("cannot emit '" + P.getInstName() + "' builtin: " + Reason);
2133     return false;
2134   };
2135 
2136   switch (P.getBuiltinKind()) {
2137   case BI_EraseRoot: {
2138     // Root is always inst 0.
2139     M.addAction<EraseInstAction>(/*InsnID*/ 0);
2140     return true;
2141   }
2142   case BI_ReplaceReg: {
2143     StringRef Old = P.getOperand(0).getOperandName();
2144     StringRef New = P.getOperand(1).getOperandName();
2145 
2146     if (!ApplyOpTable.lookup(New).Found && !MatchOpTable.lookup(New).Found)
2147       return Error("unknown operand '" + Old + "'");
2148 
2149     auto &OldOM = M.getOperandMatcher(Old);
2150     if (auto It = OperandToTempRegID.find(New);
2151         It != OperandToTempRegID.end()) {
2152       // Replace with temp reg.
2153       M.addAction<ReplaceRegAction>(OldOM.getInsnVarID(), OldOM.getOpIdx(),
2154                                     It->second);
2155     } else {
2156       // Replace with matched reg.
2157       auto &NewOM = M.getOperandMatcher(New);
2158       M.addAction<ReplaceRegAction>(OldOM.getInsnVarID(), OldOM.getOpIdx(),
2159                                     NewOM.getInsnVarID(), NewOM.getOpIdx());
2160     }
2161     // checkSemantics should have ensured that we can only rewrite the root.
2162     // Ensure we're deleting it.
2163     assert(MatchOpTable.getDef(Old) == MatchRoot);
2164     return true;
2165   }
2166   }
2167 
2168   llvm_unreachable("Unknown BuiltinKind!");
2169 }
2170 
2171 bool isLiteralImm(const InstructionPattern &P, unsigned OpIdx) {
2172   if (const auto *CGP = dyn_cast<CodeGenInstructionPattern>(&P)) {
2173     StringRef InstName = CGP->getInst().TheDef->getName();
2174     return (InstName == "G_CONSTANT" || InstName == "G_FCONSTANT") &&
2175            OpIdx == 1;
2176   }
2177 
2178   llvm_unreachable("TODO");
2179 }
2180 
2181 bool CombineRuleBuilder::emitCodeGenInstructionMatchPattern(
2182     CodeExpansions &CE, const PatternAlternatives &Alts, RuleMatcher &M,
2183     InstructionMatcher &IM, const CodeGenInstructionPattern &P,
2184     DenseSet<const Pattern *> &SeenPats, OperandDefLookupFn LookupOperandDef,
2185     OperandMapperFnRef OperandMapper) {
2186   auto StackTrace = PrettyStackTraceEmit(RuleDef, &P);
2187 
2188   if (!SeenPats.insert(&P).second)
2189     return true;
2190 
2191   IM.addPredicate<InstructionOpcodeMatcher>(&P.getInst());
2192   declareInstExpansion(CE, IM, P.getName());
2193 
2194   // If this is an intrinsic, check the intrinsic ID.
2195   if (P.isIntrinsic()) {
2196     // The IntrinsicID's operand is the first operand after the defs.
2197     OperandMatcher &OM = IM.addOperand(P.getNumInstDefs(), "$intrinsic_id",
2198                                        AllocatedTemporariesBaseID++);
2199     OM.addPredicate<IntrinsicIDOperandMatcher>(P.getIntrinsic());
2200   }
2201 
2202   // Check flags if needed.
2203   if (const auto *FI = P.getMIFlagsInfo()) {
2204     assert(FI->copy_flags().empty());
2205 
2206     if (const auto &SetF = FI->set_flags(); !SetF.empty())
2207       IM.addPredicate<MIFlagsInstructionPredicateMatcher>(SetF.getArrayRef());
2208     if (const auto &UnsetF = FI->unset_flags(); !UnsetF.empty())
2209       IM.addPredicate<MIFlagsInstructionPredicateMatcher>(UnsetF.getArrayRef(),
2210                                                           /*CheckNot=*/true);
2211   }
2212 
2213   for (auto [Idx, OriginalO] : enumerate(P.operands())) {
2214     // Remap the operand. This is used when emitting InstructionPatterns inside
2215     // PatFrags, so it can remap them to the arguments passed to the pattern.
2216     //
2217     // We use the remapped operand to emit immediates, and for the symbolic
2218     // operand names (in IM.addOperand). CodeExpansions and OperandTable lookups
2219     // still use the original name.
2220     //
2221     // The "def" flag on the remapped operand is always ignored.
2222     auto RemappedO = OperandMapper(OriginalO);
2223     assert(RemappedO.isNamedOperand() == OriginalO.isNamedOperand() &&
2224            "Cannot remap an unnamed operand to a named one!");
2225 
2226     const auto Ty = RemappedO.getType();
2227 
2228     const auto OpName =
2229         RemappedO.isNamedOperand() ? RemappedO.getOperandName().str() : "";
2230 
2231     // For intrinsics, the first use operand is the intrinsic id, so the true
2232     // operand index is shifted by 1.
2233     //
2234     // From now on:
2235     //    Idx = index in the pattern operand list.
2236     //    RealIdx = expected index in the MachineInstr.
2237     const unsigned RealIdx =
2238         (P.isIntrinsic() && !OriginalO.isDef()) ? (Idx + 1) : Idx;
2239 
2240     if (Ty.isVariadicPack() && M.hasOperand(OpName)) {
2241       // TODO: We could add some CheckIsSameOperand opcode variant that checks
2242       // all operands. We could also just emit a C++ code snippet lazily to do
2243       // the check since it's probably fairly rare that we need to do it.
2244       //
2245       // I'm just not sure it's worth the effort at this stage.
2246       PrintError("each instance of a " + PatternType::VariadicClassName +
2247                  " operand must have a unique name within the match patterns");
2248       PrintNote("'" + OpName + "' is used multiple times");
2249       return false;
2250     }
2251 
2252     OperandMatcher &OM =
2253         IM.addOperand(RealIdx, OpName, AllocatedTemporariesBaseID++,
2254                       /*IsVariadic=*/Ty.isVariadicPack());
2255     if (!OpName.empty())
2256       declareOperandExpansion(CE, OM, OriginalO.getOperandName());
2257 
2258     if (Ty.isVariadicPack()) {
2259       // In the presence of variadics, the InstructionMatcher won't insert a
2260       // InstructionNumOperandsMatcher implicitly, so we have to emit our own.
2261       assert((Idx + 1) == P.operands_size() &&
2262              "VariadicPack isn't last operand!");
2263       auto VPTI = Ty.getVariadicPackTypeInfo();
2264       assert(VPTI.Min > 0 && (VPTI.Max == 0 || VPTI.Max > VPTI.Min));
2265       IM.addPredicate<InstructionNumOperandsMatcher>(
2266           RealIdx + VPTI.Min, InstructionNumOperandsMatcher::CheckKind::GE);
2267       if (VPTI.Max) {
2268         IM.addPredicate<InstructionNumOperandsMatcher>(
2269             RealIdx + VPTI.Max, InstructionNumOperandsMatcher::CheckKind::LE);
2270       }
2271       break;
2272     }
2273 
2274     // Handle immediates.
2275     if (RemappedO.hasImmValue()) {
2276       if (isLiteralImm(P, Idx))
2277         OM.addPredicate<LiteralIntOperandMatcher>(RemappedO.getImmValue());
2278       else
2279         OM.addPredicate<ConstantIntOperandMatcher>(RemappedO.getImmValue());
2280     }
2281 
2282     // Handle typed operands, but only bother to check if it hasn't been done
2283     // before.
2284     //
2285     // getOperandMatcher will always return the first OM to have been created
2286     // for that Operand. "OM" here is always a new OperandMatcher.
2287     //
2288     // Always emit a check for unnamed operands.
2289     if (Ty && (OpName.empty() ||
2290                !M.getOperandMatcher(OpName).contains<LLTOperandMatcher>())) {
2291       // TODO: We could support GITypeOf here on the condition that the
2292       // OperandMatcher exists already. Though it's clunky to make this work
2293       // and isn't all that useful so it's just rejected in typecheckPatterns
2294       // at this time.
2295       assert(Ty.isLLT());
2296       OM.addPredicate<LLTOperandMatcher>(getLLTCodeGen(Ty));
2297     }
2298 
2299     // Stop here if the operand is a def, or if it had no name.
2300     if (OriginalO.isDef() || !OriginalO.isNamedOperand())
2301       continue;
2302 
2303     const auto *DefPat = LookupOperandDef(OriginalO.getOperandName());
2304     if (!DefPat)
2305       continue;
2306 
2307     if (OriginalO.hasImmValue()) {
2308       assert(!OpName.empty());
2309       // This is a named immediate that also has a def, that's not okay.
2310       // e.g.
2311       //    (G_SEXT $y, (i32 0))
2312       //    (COPY $x, 42:$y)
2313       PrintError("'" + OpName +
2314                  "' is a named immediate, it cannot be defined by another "
2315                  "instruction");
2316       PrintNote("'" + OpName + "' is defined by '" + DefPat->getName() + "'");
2317       return false;
2318     }
2319 
2320     // From here we know that the operand defines an instruction, and we need to
2321     // emit it.
2322     auto InstOpM =
2323         OM.addPredicate<InstructionOperandMatcher>(M, DefPat->getName());
2324     if (!InstOpM) {
2325       // TODO: copy-pasted from GlobalISelEmitter.cpp. Is it still relevant
2326       // here?
2327       PrintError("Nested instruction '" + DefPat->getName() +
2328                  "' cannot be the same as another operand '" +
2329                  OriginalO.getOperandName() + "'");
2330       return false;
2331     }
2332 
2333     auto &IM = (*InstOpM)->getInsnMatcher();
2334     if (const auto *CGIDef = dyn_cast<CodeGenInstructionPattern>(DefPat)) {
2335       if (!emitCodeGenInstructionMatchPattern(CE, Alts, M, IM, *CGIDef,
2336                                               SeenPats, LookupOperandDef,
2337                                               OperandMapper))
2338         return false;
2339       continue;
2340     }
2341 
2342     if (const auto *PFPDef = dyn_cast<PatFragPattern>(DefPat)) {
2343       if (!emitPatFragMatchPattern(CE, Alts, M, &IM, *PFPDef, SeenPats))
2344         return false;
2345       continue;
2346     }
2347 
2348     llvm_unreachable("unknown type of InstructionPattern");
2349   }
2350 
2351   return true;
2352 }
2353 
2354 //===- GICombinerEmitter --------------------------------------------------===//
2355 
2356 /// Main implementation class. This emits the tablegenerated output.
2357 ///
2358 /// It collects rules, uses `CombineRuleBuilder` to parse them and accumulate
2359 /// RuleMatchers, then takes all the necessary state/data from the various
2360 /// static storage pools and wires them together to emit the match table &
2361 /// associated function/data structures.
2362 class GICombinerEmitter final : public GlobalISelMatchTableExecutorEmitter {
2363   const RecordKeeper &Records;
2364   StringRef Name;
2365   const CodeGenTarget &Target;
2366   const Record *Combiner;
2367   unsigned NextRuleID = 0;
2368 
2369   // List all combine rules (ID, name) imported.
2370   // Note that the combiner rule ID is different from the RuleMatcher ID. The
2371   // latter is internal to the MatchTable, the former is the canonical ID of the
2372   // combine rule used to disable/enable it.
2373   std::vector<std::pair<unsigned, std::string>> AllCombineRules;
2374 
2375   // Keep track of all rules we've seen so far to ensure we don't process
2376   // the same rule twice.
2377   StringSet<> RulesSeen;
2378 
2379   MatchTable buildMatchTable(MutableArrayRef<RuleMatcher> Rules);
2380 
2381   void emitRuleConfigImpl(raw_ostream &OS);
2382 
2383   void emitAdditionalImpl(raw_ostream &OS) override;
2384 
2385   void emitMIPredicateFns(raw_ostream &OS) override;
2386   void emitI64ImmPredicateFns(raw_ostream &OS) override;
2387   void emitAPFloatImmPredicateFns(raw_ostream &OS) override;
2388   void emitAPIntImmPredicateFns(raw_ostream &OS) override;
2389   void emitTestSimplePredicate(raw_ostream &OS) override;
2390   void emitRunCustomAction(raw_ostream &OS) override;
2391 
2392   const CodeGenTarget &getTarget() const override { return Target; }
2393   StringRef getClassName() const override {
2394     return Combiner->getValueAsString("Classname");
2395   }
2396 
2397   StringRef getCombineAllMethodName() const {
2398     return Combiner->getValueAsString("CombineAllMethodName");
2399   }
2400 
2401   std::string getRuleConfigClassName() const {
2402     return getClassName().str() + "RuleConfig";
2403   }
2404 
2405   void gatherRules(std::vector<RuleMatcher> &Rules,
2406                    ArrayRef<const Record *> RulesAndGroups);
2407 
2408 public:
2409   explicit GICombinerEmitter(const RecordKeeper &RK,
2410                              const CodeGenTarget &Target, StringRef Name,
2411                              const Record *Combiner);
2412   ~GICombinerEmitter() {}
2413 
2414   void run(raw_ostream &OS);
2415 };
2416 
2417 void GICombinerEmitter::emitRuleConfigImpl(raw_ostream &OS) {
2418   OS << "struct " << getRuleConfigClassName() << " {\n"
2419      << "  SparseBitVector<> DisabledRules;\n\n"
2420      << "  bool isRuleEnabled(unsigned RuleID) const;\n"
2421      << "  bool parseCommandLineOption();\n"
2422      << "  bool setRuleEnabled(StringRef RuleIdentifier);\n"
2423      << "  bool setRuleDisabled(StringRef RuleIdentifier);\n"
2424      << "};\n\n";
2425 
2426   std::vector<std::pair<std::string, std::string>> Cases;
2427   Cases.reserve(AllCombineRules.size());
2428 
2429   for (const auto &[ID, Name] : AllCombineRules)
2430     Cases.emplace_back(Name, "return " + to_string(ID) + ";\n");
2431 
2432   OS << "static std::optional<uint64_t> getRuleIdxForIdentifier(StringRef "
2433         "RuleIdentifier) {\n"
2434      << "  uint64_t I;\n"
2435      << "  // getAtInteger(...) returns false on success\n"
2436      << "  bool Parsed = !RuleIdentifier.getAsInteger(0, I);\n"
2437      << "  if (Parsed)\n"
2438      << "    return I;\n\n"
2439      << "#ifndef NDEBUG\n";
2440   StringMatcher Matcher("RuleIdentifier", Cases, OS);
2441   Matcher.Emit();
2442   OS << "#endif // ifndef NDEBUG\n\n"
2443      << "  return std::nullopt;\n"
2444      << "}\n";
2445 
2446   OS << "static std::optional<std::pair<uint64_t, uint64_t>> "
2447         "getRuleRangeForIdentifier(StringRef RuleIdentifier) {\n"
2448      << "  std::pair<StringRef, StringRef> RangePair = "
2449         "RuleIdentifier.split('-');\n"
2450      << "  if (!RangePair.second.empty()) {\n"
2451      << "    const auto First = "
2452         "getRuleIdxForIdentifier(RangePair.first);\n"
2453      << "    const auto Last = "
2454         "getRuleIdxForIdentifier(RangePair.second);\n"
2455      << "    if (!First || !Last)\n"
2456      << "      return std::nullopt;\n"
2457      << "    if (First >= Last)\n"
2458      << "      report_fatal_error(\"Beginning of range should be before "
2459         "end of range\");\n"
2460      << "    return {{*First, *Last + 1}};\n"
2461      << "  }\n"
2462      << "  if (RangePair.first == \"*\") {\n"
2463      << "    return {{0, " << AllCombineRules.size() << "}};\n"
2464      << "  }\n"
2465      << "  const auto I = getRuleIdxForIdentifier(RangePair.first);\n"
2466      << "  if (!I)\n"
2467      << "    return std::nullopt;\n"
2468      << "  return {{*I, *I + 1}};\n"
2469      << "}\n\n";
2470 
2471   for (bool Enabled : {true, false}) {
2472     OS << "bool " << getRuleConfigClassName() << "::setRule"
2473        << (Enabled ? "Enabled" : "Disabled") << "(StringRef RuleIdentifier) {\n"
2474        << "  auto MaybeRange = getRuleRangeForIdentifier(RuleIdentifier);\n"
2475        << "  if (!MaybeRange)\n"
2476        << "    return false;\n"
2477        << "  for (auto I = MaybeRange->first; I < MaybeRange->second; ++I)\n"
2478        << "    DisabledRules." << (Enabled ? "reset" : "set") << "(I);\n"
2479        << "  return true;\n"
2480        << "}\n\n";
2481   }
2482 
2483   OS << "static std::vector<std::string> " << Name << "Option;\n"
2484      << "static cl::list<std::string> " << Name << "DisableOption(\n"
2485      << "    \"" << Name.lower() << "-disable-rule\",\n"
2486      << "    cl::desc(\"Disable one or more combiner rules temporarily in "
2487      << "the " << Name << " pass\"),\n"
2488      << "    cl::CommaSeparated,\n"
2489      << "    cl::Hidden,\n"
2490      << "    cl::cat(GICombinerOptionCategory),\n"
2491      << "    cl::callback([](const std::string &Str) {\n"
2492      << "      " << Name << "Option.push_back(Str);\n"
2493      << "    }));\n"
2494      << "static cl::list<std::string> " << Name << "OnlyEnableOption(\n"
2495      << "    \"" << Name.lower() << "-only-enable-rule\",\n"
2496      << "    cl::desc(\"Disable all rules in the " << Name
2497      << " pass then re-enable the specified ones\"),\n"
2498      << "    cl::Hidden,\n"
2499      << "    cl::cat(GICombinerOptionCategory),\n"
2500      << "    cl::callback([](const std::string &CommaSeparatedArg) {\n"
2501      << "      StringRef Str = CommaSeparatedArg;\n"
2502      << "      " << Name << "Option.push_back(\"*\");\n"
2503      << "      do {\n"
2504      << "        auto X = Str.split(\",\");\n"
2505      << "        " << Name << "Option.push_back((\"!\" + X.first).str());\n"
2506      << "        Str = X.second;\n"
2507      << "      } while (!Str.empty());\n"
2508      << "    }));\n"
2509      << "\n\n"
2510      << "bool " << getRuleConfigClassName()
2511      << "::isRuleEnabled(unsigned RuleID) const {\n"
2512      << "    return  !DisabledRules.test(RuleID);\n"
2513      << "}\n"
2514      << "bool " << getRuleConfigClassName() << "::parseCommandLineOption() {\n"
2515      << "  for (StringRef Identifier : " << Name << "Option) {\n"
2516      << "    bool Enabled = Identifier.consume_front(\"!\");\n"
2517      << "    if (Enabled && !setRuleEnabled(Identifier))\n"
2518      << "      return false;\n"
2519      << "    if (!Enabled && !setRuleDisabled(Identifier))\n"
2520      << "      return false;\n"
2521      << "  }\n"
2522      << "  return true;\n"
2523      << "}\n\n";
2524 }
2525 
2526 void GICombinerEmitter::emitAdditionalImpl(raw_ostream &OS) {
2527   OS << "bool " << getClassName() << "::" << getCombineAllMethodName()
2528      << "(MachineInstr &I) const {\n"
2529      << "  const TargetSubtargetInfo &ST = MF.getSubtarget();\n"
2530      << "  const PredicateBitset AvailableFeatures = "
2531         "getAvailableFeatures();\n"
2532      << "  B.setInstrAndDebugLoc(I);\n"
2533      << "  State.MIs.clear();\n"
2534      << "  State.MIs.push_back(&I);\n"
2535      << "  if (executeMatchTable(*this, State, ExecInfo, B"
2536      << ", getMatchTable(), *ST.getInstrInfo(), MRI, "
2537         "*MRI.getTargetRegisterInfo(), *ST.getRegBankInfo(), AvailableFeatures"
2538      << ", /*CoverageInfo*/ nullptr)) {\n"
2539      << "    return true;\n"
2540      << "  }\n\n"
2541      << "  return false;\n"
2542      << "}\n\n";
2543 }
2544 
2545 void GICombinerEmitter::emitMIPredicateFns(raw_ostream &OS) {
2546   auto MatchCode = CXXPredicateCode::getAllMatchCode();
2547   emitMIPredicateFnsImpl<const CXXPredicateCode *>(
2548       OS, "", ArrayRef<const CXXPredicateCode *>(MatchCode),
2549       [](const CXXPredicateCode *C) -> StringRef { return C->BaseEnumName; },
2550       [](const CXXPredicateCode *C) -> StringRef { return C->Code; });
2551 }
2552 
2553 void GICombinerEmitter::emitI64ImmPredicateFns(raw_ostream &OS) {
2554   // Unused, but still needs to be called.
2555   emitImmPredicateFnsImpl<unsigned>(
2556       OS, "I64", "int64_t", {}, [](unsigned) { return ""; },
2557       [](unsigned) { return ""; });
2558 }
2559 
2560 void GICombinerEmitter::emitAPFloatImmPredicateFns(raw_ostream &OS) {
2561   // Unused, but still needs to be called.
2562   emitImmPredicateFnsImpl<unsigned>(
2563       OS, "APFloat", "const APFloat &", {}, [](unsigned) { return ""; },
2564       [](unsigned) { return ""; });
2565 }
2566 
2567 void GICombinerEmitter::emitAPIntImmPredicateFns(raw_ostream &OS) {
2568   // Unused, but still needs to be called.
2569   emitImmPredicateFnsImpl<unsigned>(
2570       OS, "APInt", "const APInt &", {}, [](unsigned) { return ""; },
2571       [](unsigned) { return ""; });
2572 }
2573 
2574 void GICombinerEmitter::emitTestSimplePredicate(raw_ostream &OS) {
2575   if (!AllCombineRules.empty()) {
2576     OS << "enum {\n";
2577     std::string EnumeratorSeparator = " = GICXXPred_Invalid + 1,\n";
2578     // To avoid emitting a switch, we expect that all those rules are in order.
2579     // That way we can just get the RuleID from the enum by subtracting
2580     // (GICXXPred_Invalid + 1).
2581     unsigned ExpectedID = 0;
2582     (void)ExpectedID;
2583     for (const auto &ID : keys(AllCombineRules)) {
2584       assert(ExpectedID++ == ID && "combine rules are not ordered!");
2585       OS << "  " << getIsEnabledPredicateEnumName(ID) << EnumeratorSeparator;
2586       EnumeratorSeparator = ",\n";
2587     }
2588     OS << "};\n\n";
2589   }
2590 
2591   OS << "bool " << getClassName()
2592      << "::testSimplePredicate(unsigned Predicate) const {\n"
2593      << "    return RuleConfig.isRuleEnabled(Predicate - "
2594         "GICXXPred_Invalid - "
2595         "1);\n"
2596      << "}\n";
2597 }
2598 
2599 void GICombinerEmitter::emitRunCustomAction(raw_ostream &OS) {
2600   const auto CustomActionsCode = CXXPredicateCode::getAllCustomActionsCode();
2601 
2602   if (!CustomActionsCode.empty()) {
2603     OS << "enum {\n";
2604     std::string EnumeratorSeparator = " = GICXXCustomAction_Invalid + 1,\n";
2605     for (const auto &CA : CustomActionsCode) {
2606       OS << "  " << CA->getEnumNameWithPrefix(CXXCustomActionPrefix)
2607          << EnumeratorSeparator;
2608       EnumeratorSeparator = ",\n";
2609     }
2610     OS << "};\n";
2611   }
2612 
2613   OS << "bool " << getClassName()
2614      << "::runCustomAction(unsigned ApplyID, const MatcherState &State, "
2615         "NewMIVector &OutMIs) const "
2616         "{\n  Helper.getBuilder().setInstrAndDebugLoc(*State.MIs[0]);\n";
2617   if (!CustomActionsCode.empty()) {
2618     OS << "  switch(ApplyID) {\n";
2619     for (const auto &CA : CustomActionsCode) {
2620       OS << "  case " << CA->getEnumNameWithPrefix(CXXCustomActionPrefix)
2621          << ":{\n"
2622          << "    " << join(split(CA->Code, '\n'), "\n    ") << '\n'
2623          << "    return true;\n";
2624       OS << "  }\n";
2625     }
2626     OS << "  }\n";
2627   }
2628   OS << "  llvm_unreachable(\"Unknown Apply Action\");\n"
2629      << "}\n";
2630 }
2631 
2632 GICombinerEmitter::GICombinerEmitter(const RecordKeeper &RK,
2633                                      const CodeGenTarget &Target,
2634                                      StringRef Name, const Record *Combiner)
2635     : Records(RK), Name(Name), Target(Target), Combiner(Combiner) {}
2636 
2637 MatchTable
2638 GICombinerEmitter::buildMatchTable(MutableArrayRef<RuleMatcher> Rules) {
2639   std::vector<Matcher *> InputRules;
2640   for (Matcher &Rule : Rules)
2641     InputRules.push_back(&Rule);
2642 
2643   unsigned CurrentOrdering = 0;
2644   StringMap<unsigned> OpcodeOrder;
2645   for (RuleMatcher &Rule : Rules) {
2646     const StringRef Opcode = Rule.getOpcode();
2647     assert(!Opcode.empty() && "Didn't expect an undefined opcode");
2648     if (OpcodeOrder.try_emplace(Opcode, CurrentOrdering).second)
2649       ++CurrentOrdering;
2650   }
2651 
2652   llvm::stable_sort(InputRules, [&OpcodeOrder](const Matcher *A,
2653                                                const Matcher *B) {
2654     auto *L = static_cast<const RuleMatcher *>(A);
2655     auto *R = static_cast<const RuleMatcher *>(B);
2656     return std::tuple(OpcodeOrder[L->getOpcode()],
2657                       L->insnmatchers_front().getNumOperandMatchers()) <
2658            std::tuple(OpcodeOrder[R->getOpcode()],
2659                       R->insnmatchers_front().getNumOperandMatchers());
2660   });
2661 
2662   for (Matcher *Rule : InputRules)
2663     Rule->optimize();
2664 
2665   std::vector<std::unique_ptr<Matcher>> MatcherStorage;
2666   std::vector<Matcher *> OptRules =
2667       optimizeRules<GroupMatcher>(InputRules, MatcherStorage);
2668 
2669   for (Matcher *Rule : OptRules)
2670     Rule->optimize();
2671 
2672   OptRules = optimizeRules<SwitchMatcher>(OptRules, MatcherStorage);
2673 
2674   return MatchTable::buildTable(OptRules, /*WithCoverage*/ false,
2675                                 /*IsCombiner*/ true);
2676 }
2677 
2678 /// Recurse into GICombineGroup's and flatten the ruleset into a simple list.
2679 void GICombinerEmitter::gatherRules(std::vector<RuleMatcher> &ActiveRules,
2680                                     ArrayRef<const Record *> RulesAndGroups) {
2681   for (const Record *Rec : RulesAndGroups) {
2682     if (!Rec->isValueUnset("Rules")) {
2683       gatherRules(ActiveRules, Rec->getValueAsListOfDefs("Rules"));
2684       continue;
2685     }
2686 
2687     StringRef RuleName = Rec->getName();
2688     if (!RulesSeen.insert(RuleName).second) {
2689       PrintWarning(Rec->getLoc(),
2690                    "skipping rule '" + Rec->getName() +
2691                        "' because it has already been processed");
2692       continue;
2693     }
2694 
2695     AllCombineRules.emplace_back(NextRuleID, Rec->getName().str());
2696     CombineRuleBuilder CRB(Target, SubtargetFeatures, *Rec, NextRuleID++,
2697                            ActiveRules);
2698 
2699     if (!CRB.parseAll()) {
2700       assert(ErrorsPrinted && "Parsing failed without errors!");
2701       continue;
2702     }
2703 
2704     if (StopAfterParse) {
2705       CRB.print(outs());
2706       continue;
2707     }
2708 
2709     if (!CRB.emitRuleMatchers()) {
2710       assert(ErrorsPrinted && "Emission failed without errors!");
2711       continue;
2712     }
2713   }
2714 }
2715 
2716 void GICombinerEmitter::run(raw_ostream &OS) {
2717   InstructionOpcodeMatcher::initOpcodeValuesMap(Target);
2718   LLTOperandMatcher::initTypeIDValuesMap();
2719 
2720   TGTimer &Timer = Records.getTimer();
2721   Timer.startTimer("Gather rules");
2722   std::vector<RuleMatcher> Rules;
2723   gatherRules(Rules, Combiner->getValueAsListOfDefs("Rules"));
2724   if (ErrorsPrinted)
2725     PrintFatalError(Combiner->getLoc(), "Failed to parse one or more rules");
2726 
2727   if (StopAfterParse)
2728     return;
2729 
2730   Timer.startTimer("Creating Match Table");
2731   unsigned MaxTemporaries = 0;
2732   for (const auto &Rule : Rules)
2733     MaxTemporaries = std::max(MaxTemporaries, Rule.countRendererFns());
2734 
2735   llvm::stable_sort(Rules, [&](const RuleMatcher &A, const RuleMatcher &B) {
2736     if (A.isHigherPriorityThan(B)) {
2737       assert(!B.isHigherPriorityThan(A) && "Cannot be more important "
2738                                            "and less important at "
2739                                            "the same time");
2740       return true;
2741     }
2742     return false;
2743   });
2744 
2745   const MatchTable Table = buildMatchTable(Rules);
2746 
2747   Timer.startTimer("Emit combiner");
2748 
2749   emitSourceFileHeader(getClassName().str() + " Combiner Match Table", OS);
2750 
2751   SmallVector<LLTCodeGen, 16> TypeObjects;
2752   append_range(TypeObjects, KnownTypes);
2753   llvm::sort(TypeObjects);
2754 
2755   // Hack: Avoid empty declarator.
2756   if (TypeObjects.empty())
2757     TypeObjects.push_back(LLT::scalar(1));
2758 
2759   // GET_GICOMBINER_DEPS, which pulls in extra dependencies.
2760   OS << "#ifdef GET_GICOMBINER_DEPS\n"
2761      << "#include \"llvm/ADT/SparseBitVector.h\"\n"
2762      << "namespace llvm {\n"
2763      << "extern cl::OptionCategory GICombinerOptionCategory;\n"
2764      << "} // end namespace llvm\n"
2765      << "#endif // ifdef GET_GICOMBINER_DEPS\n\n";
2766 
2767   // GET_GICOMBINER_TYPES, which needs to be included before the declaration of
2768   // the class.
2769   OS << "#ifdef GET_GICOMBINER_TYPES\n";
2770   emitRuleConfigImpl(OS);
2771   OS << "#endif // ifdef GET_GICOMBINER_TYPES\n\n";
2772   emitPredicateBitset(OS, "GET_GICOMBINER_TYPES");
2773 
2774   // GET_GICOMBINER_CLASS_MEMBERS, which need to be included inside the class.
2775   emitPredicatesDecl(OS, "GET_GICOMBINER_CLASS_MEMBERS");
2776   emitTemporariesDecl(OS, "GET_GICOMBINER_CLASS_MEMBERS");
2777 
2778   // GET_GICOMBINER_IMPL, which needs to be included outside the class.
2779   emitExecutorImpl(OS, Table, TypeObjects, Rules, {}, {},
2780                    "GET_GICOMBINER_IMPL");
2781 
2782   // GET_GICOMBINER_CONSTRUCTOR_INITS, which are in the constructor's
2783   // initializer list.
2784   emitPredicatesInit(OS, "GET_GICOMBINER_CONSTRUCTOR_INITS");
2785   emitTemporariesInit(OS, MaxTemporaries, "GET_GICOMBINER_CONSTRUCTOR_INITS");
2786 }
2787 
2788 } // end anonymous namespace
2789 
2790 //===----------------------------------------------------------------------===//
2791 
2792 static void EmitGICombiner(const RecordKeeper &RK, raw_ostream &OS) {
2793   EnablePrettyStackTrace();
2794   const CodeGenTarget Target(RK);
2795 
2796   if (SelectedCombiners.empty())
2797     PrintFatalError("No combiners selected with -combiners");
2798   for (const auto &Combiner : SelectedCombiners) {
2799     const Record *CombinerDef = RK.getDef(Combiner);
2800     if (!CombinerDef)
2801       PrintFatalError("Could not find " + Combiner);
2802     GICombinerEmitter(RK, Target, Combiner, CombinerDef).run(OS);
2803   }
2804 }
2805 
2806 static TableGen::Emitter::Opt X("gen-global-isel-combiner", EmitGICombiner,
2807                                 "Generate GlobalISel Combiner");
2808